I'm announcing the release of the 3.18.107 kernel.
All users of the 3.18 kernel series must upgrade.
The updated 3.18.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-3.18.y
and can be browsed at the normal kernel.org git web browser:
http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2
arch/x86/kernel/tsc.c | 2
drivers/cdrom/cdrom.c | 2
drivers/message/fusion/mptsas.c | 1
drivers/net/bonding/bond_main.c | 3 -
drivers/net/ppp/pppoe.c | 4 +
drivers/net/team/team.c | 38 ++++++++++++++---
fs/cifs/dir.c | 9 ++--
fs/ext4/balloc.c | 3 -
fs/ext4/ialloc.c | 43 +------------------
fs/ext4/inline.c | 66 +++++++++++++-----------------
fs/ext4/inode.c | 2
fs/ext4/xattr.c | 30 +++++--------
fs/ext4/xattr.h | 32 ++++++++++++++
fs/jbd2/journal.c | 2
include/net/llc_conn.h | 1
kernel/events/core.c | 4 -
mm/filemap.c | 4 -
net/dns_resolver/dns_key.c | 13 ++---
net/ipv4/tcp.c | 6 +-
net/ipv4/tcp_input.c | 7 ---
net/ipv6/route.c | 2
net/l2tp/l2tp_ppp.c | 7 +++
net/llc/af_llc.c | 14 +++++-
net/llc/llc_c_ac.c | 9 ----
net/llc/llc_conn.c | 22 +++++++++-
net/packet/af_packet.c | 88 ++++++++++++++++++++++++++++------------
net/packet/internal.h | 10 ++--
28 files changed, 253 insertions(+), 173 deletions(-)
Cong Wang (3):
llc: hold llc_sap before release_sock()
llc: fix NULL pointer deref for SOCK_ZAPPED
llc: delete timers synchronously in llc_sk_free()
Dan Carpenter (1):
cdrom: information leak in cdrom_ioctl_media_changed()
Eric Biggers (1):
KEYS: DNS: limit the length of option strings
Eric Dumazet (3):
tcp: md5: reject TCP_MD5SIG or TCP_MD5SIG_EXT on established sockets
net: af_packet: fix race in PACKET_{R|T}X_RING
ipv6: add RTA_TABLE and RTA_PREFSRC to rtm_ipv6_policy
Greg Kroah-Hartman (1):
Linux 3.18.107
Guillaume Nault (2):
l2tp: check sockaddr length in pppol2tp_connect()
pppoe: check sockaddr length in pppoe_connect()
Jann Horn (1):
tcp: don't read out-of-bounds opsize
Jiri Olsa (1):
perf: Return proper values for user stack errors
Martin K. Petersen (1):
scsi: mptsas: Disable WRITE SAME
Matthew Wilcox (1):
mm/filemap.c: fix NULL pointer in page_cache_tree_insert()
Paolo Abeni (1):
team: avoid adding twice the same option to the event list
Sahitya Tummala (1):
jbd2: fix use after free in kjournald2()
Steve French (1):
cifs: do not allow creating sockets except with SMB1 posix exensions
Theodore Ts'o (2):
ext4: fix deadlock between inline_data and ext4_expand_extra_isize_ea()
ext4: don't update checksum of new initialized bitmaps
Willem de Bruijn (1):
packet: fix bitfield update race
Xiaoming Gao (1):
x86/tsc: Prevent 32bit truncation in calc_hpet_ref()
Xin Long (2):
bonding: do not set slave_dev npinfo before slave_enable_netpoll in bond_enslave
team: fix netconsole setup over team
wangguang (1):
ext4: bugfix for mmaped pages in mpage_release_unused_pages()
Jeremy Cline correctly points out in rhbz#1514836 that a device where the
QCA rome chipset needs the USB_QUIRK_RESET_RESUME quirk, may also ship
with a different wifi/bt chipset in some configurations.
If that is the case then we are needlessly penalizing those other chipsets
with a reset-resume quirk, typically causing 0.4W extra power use because
this disables runtime-pm.
This commit moves the DMI table check to a btusb_check_needs_reset_resume()
helper (so that we can easily also call it for other chipsets) and calls
this new helper only for QCA_ROME chipsets for now.
BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1514836
Cc: stable(a)vger.kernel.org
Cc: Jeremy Cline <jcline(a)redhat.com>
Suggested-by: Jeremy Cline <jcline(a)redhat.com>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/bluetooth/btusb.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index f064984c9ec0..15e7cdca6eb5 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2863,6 +2863,12 @@ static int btusb_config_oob_wake(struct hci_dev *hdev)
}
#endif
+static void btusb_check_needs_reset_resume(struct usb_interface *intf)
+{
+ if (dmi_check_system(btusb_needs_reset_resume_table))
+ interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME;
+}
+
static int btusb_probe(struct usb_interface *intf,
const struct usb_device_id *id)
{
@@ -2985,9 +2991,6 @@ static int btusb_probe(struct usb_interface *intf,
hdev->send = btusb_send_frame;
hdev->notify = btusb_notify;
- if (dmi_check_system(btusb_needs_reset_resume_table))
- interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME;
-
#ifdef CONFIG_PM
err = btusb_config_oob_wake(hdev);
if (err)
@@ -3076,6 +3079,7 @@ static int btusb_probe(struct usb_interface *intf,
data->setup_on_usb = btusb_setup_qca;
hdev->set_bdaddr = btusb_set_bdaddr_ath3012;
set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
+ btusb_check_needs_reset_resume(intf);
}
#ifdef CONFIG_BT_HCIBTUSB_RTL
--
2.17.0
From: Ian W MORRISON <ianwmorrison(a)gmail.com>
As the Geminilake firmware is now merged to linux-firmware.git
use MODUE_FIRMWARE to load the firmware.
This removes the error message in the dmesg log:
i915 0000:00:02.0: Direct firmware load for
i915/glk_dmc_ver1_04.bin failed with error -2
i915 0000:00:02.0: Failed to load DMC firmware
i915/glk_dmc_ver1_04.bin. Disabling runtime power management.
i915 0000:00:02.0: DMC firmware homepage:
https://01.org/linuxgraphics/downloads/firmware
and now shows that the firmware has correctly loaded:
[drm] Finished loading DMC firmware i915/glk_dmc_ver1_04.bin (v1.4)
Cc: stable(a)vger.kernel.org
Signed-off-by: Ian W MORRISON <ianwmorrison(a)gmail.com>
---
drivers/gpu/drm/i915/intel_csr.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 41e6c75a7f3c..f9550ea46c26 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -35,6 +35,7 @@
*/
#define I915_CSR_GLK "i915/glk_dmc_ver1_04.bin"
+MODULE_FIRMWARE(I915_CSR_GLK);
#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 4)
#define I915_CSR_CNL "i915/cnl_dmc_ver1_07.bin"
--
2.11.0
Hi Greg,
These two patches should probably be part of the 4.4 because
ce59e48fdbad ("serial: mctrl_gpio: implement interrupt handling")
backport. 4.9, 4.14 and 4.16 have these patches already in it.
Romain Izard (1):
serial: mctrl_gpio: Add missing module license
Uwe Kleine-König (1):
serial: mctrl_gpio: export mctrl_gpio_disable_ms and mctrl_gpio_init
drivers/tty/serial/serial_mctrl_gpio.c | 5 +++++
1 file changed, 5 insertions(+)
--
2.14.3
On Sun, Apr 29, 2018 at 10:15:03PM +0200, Fredrik Schön wrote:
> 2018-04-29 14:53 GMT+02:00 Greg KH <gregkh(a)linuxfoundation.org>:
> > How about build warnings for gcc 8? Anything we need to make it build
> > "clean" there? I do have a fedora system around here, I guess I could
> > try it out myself later this week...
>
> There are a ton of build warnings of the kind:
>
> mm/vmscan.o: warning: objtool: shrink_slab.part.44()+0x120: sibling call
> from callable instruction with modified stack frame
>
> I don't know how to fix them, unfortunately.
Do they show up in Linus's tree? I thought Arnd was working on gcc 8
warning fixes a while ago...
thanks,
greg k-h
Since the commit "8003c9ae204e: add APIC Timer periodic/oneshot mode VMX
preemption timer support", a Windows 10 guest has some erratic timer
spikes.
Here the results on a 150000 times 1ms timer without any load:
Before 8003c9ae204e | After 8003c9ae204e
Max 1834us | 86000us
Mean 1100us | 1021us
Deviation 59us | 149us
Here the results on a 150000 times 1ms timer with a cpu-z stress test:
Before 8003c9ae204e | After 8003c9ae204e
Max 32000us | 140000us
Mean 1006us | 1997us
Deviation 140us | 11095us
The root cause of the problem is starting hrtimer with an expiry time
already in the past can take more than 20 milliseconds to trigger the
timer function. It can be solved by forward such past timers
immediately, rather than submitting them to hrtimer_start().
In case the timer is periodic, update the target expiration and call
hrtimer_start with it.
v2: Check if the tsc deadline is already expired. Thank you Mika.
v3: Execute the past timers immediately rather than submitting them to
hrtimer_start().
v4: Rearm the periodic timer with advance_periodic_target_expiration() a
simpler version of set_target_expiration(). Thank you Paolo.
Cc: Mika Penttilä <mika.penttila(a)nextfour.com>
Cc: Wanpeng Li <kernellwp(a)gmail.com>
Cc: Paolo Bonzini <pbonzini(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Anthoine Bourgeois <anthoine.bourgeois(a)blade-group.com>
---
arch/x86/kvm/lapic.c | 37 ++++++++++++++++++++-----------------
1 file changed, 20 insertions(+), 17 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 70dcb5548022..b74c9c1405b9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1463,23 +1463,6 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
local_irq_restore(flags);
}
-static void start_sw_period(struct kvm_lapic *apic)
-{
- if (!apic->lapic_timer.period)
- return;
-
- if (apic_lvtt_oneshot(apic) &&
- ktime_after(ktime_get(),
- apic->lapic_timer.target_expiration)) {
- apic_timer_expired(apic);
- return;
- }
-
- hrtimer_start(&apic->lapic_timer.timer,
- apic->lapic_timer.target_expiration,
- HRTIMER_MODE_ABS_PINNED);
-}
-
static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
{
ktime_t now, remaining;
@@ -1546,6 +1529,26 @@ static void advance_periodic_target_expiration(struct kvm_lapic *apic)
apic->lapic_timer.period);
}
+static void start_sw_period(struct kvm_lapic *apic)
+{
+ if (!apic->lapic_timer.period)
+ return;
+
+ if (ktime_after(ktime_get(),
+ apic->lapic_timer.target_expiration)) {
+ apic_timer_expired(apic);
+
+ if (apic_lvtt_oneshot(apic))
+ return;
+
+ advance_periodic_target_expiration(apic);
+ }
+
+ hrtimer_start(&apic->lapic_timer.timer,
+ apic->lapic_timer.target_expiration,
+ HRTIMER_MODE_ABS_PINNED);
+}
+
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
{
if (!lapic_in_kernel(vcpu))
--
2.11.0
When a USB device is connected to the USB host port on the SAM9N12 then
you get "-62" error which seems to indicate USB replies from the device
are timing out. Based on a logic sniffer, I saw the USB bus was running
at half speed.
The PLL code uses cached MUL and DIV values which get set in set_rate()
and applied in prepare(), but the recalc_rate() function instead
queries the hardware instead of using these cached values. Therefore,
if recalc_rate() is called between a set_rate() and prepare(), the
wrong frequency is calculated and later the USB clock divider for the
SAM9N12 SOC will be configured for an incorrect clock.
In my case, the PLL hardware was set to 96 Mhz before the OHCI
driver loads, and therefore the usb clock divider was being set
to /2 even though the OHCI driver set the PLL to 48 Mhz.
As an alternative explanation, I noticed this was fixed in the past by
87e2ed338f1b ("clk: at91: fix recalc_rate implementation of PLL
driver") but the bug was later re-introduced by 1bdf02326b71 ("clk:
at91: make use of syscon/regmap internally").
Fixes: 1bdf02326b71 ("clk: at91: make use of syscon/regmap internally)
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Marcin Ziemianowicz <marcin(a)ziemianowicz.com>
---
Thank you for bearing with me about this Boris.
Changes since V3:
Fix for double returns found by kbluild test robot
> Comments by Boris Brezillon about email formatting issues
Changes since V2:
Removed all logging/debug messages I added
> Comment by Boris Brezillon about my fix being wrong addressed
Changes since V1:
Added patch set cover letter
Shortened lines which were over >80 characters long
> Comment by Greg Kroah-Hartman about "from" field in email addressed
> Comment by Alan Stern about redundant debug lines addressed
drivers/clk/at91/clk-pll.c | 13 +------------
1 file changed, 1 insertion(+), 12 deletions(-)
diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c
index 7d3223fc..72b6091e 100644
--- a/drivers/clk/at91/clk-pll.c
+++ b/drivers/clk/at91/clk-pll.c
@@ -132,19 +132,8 @@ static unsigned long clk_pll_recalc_rate(struct clk_hw *hw,
unsigned long parent_rate)
{
struct clk_pll *pll = to_clk_pll(hw);
- unsigned int pllr;
- u16 mul;
- u8 div;
-
- regmap_read(pll->regmap, PLL_REG(pll->id), &pllr);
-
- div = PLL_DIV(pllr);
- mul = PLL_MUL(pllr, pll->layout);
-
- if (!div || !mul)
- return 0;
- return (parent_rate / div) * (mul + 1);
+ return (parent_rate / pll->div) * (pll->mul + 1);
}
static long clk_pll_get_best_div_mul(struct clk_pll *pll, unsigned long rate,
--
2.17.0
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d973f8535f033fac1599bd5eae6835e6bb304da3 Mon Sep 17 00:00:00 2001
From: "Jerry (Fangzhi) Zuo" <Jerry.Zuo(a)amd.com>
Date: Tue, 17 Apr 2018 13:49:48 -0400
Subject: [PATCH] drm/amd/display: Update MST edid property every time
Extended fix to: "Don't read EDID in atomic_check"
Fix display property not observed in GUI display after hot plug.
Call drm_mode_connector_update_edid_property every time in
.get_modes hook, due to the fact that edid property is getting
removed from usermode ioctl DRM_IOCTL_MODE_GETCONNECTOR each time
in hot unplug.
Signed-off-by: Jerry (Fangzhi) Zuo <Jerry.Zuo(a)amd.com>
Reviewed-by: Harry Wentland <Harry.Wentland(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 305292a9ff80..8c1d084429dc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -253,11 +253,11 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
if (aconnector->dc_sink)
amdgpu_dm_add_sink_to_freesync_module(
connector, edid);
-
- drm_mode_connector_update_edid_property(
- &aconnector->base, edid);
}
+ drm_mode_connector_update_edid_property(
+ &aconnector->base, aconnector->edid);
+
ret = drm_add_edid_modes(connector, aconnector->edid);
return ret;
GCC 8.0.1 as shipped with Fedora 28 beta fails to build Linux-stable
4.16.5 and 4.14.37. Cherry-picking the below commits from mainline
fixes the build. Build and boot tested on x86-64
4.16.5:
854e55ad289e objtool, perf: Fix GCC 8 -Wrestrict error
4.14.37:
854e55ad289e objtool, perf: Fix GCC 8 -Wrestrict error
ad343a98e74e tools/lib/subcmd/pager.c: do not alias select() params
Regards,
Fredrik
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d973f8535f033fac1599bd5eae6835e6bb304da3 Mon Sep 17 00:00:00 2001
From: "Jerry (Fangzhi) Zuo" <Jerry.Zuo(a)amd.com>
Date: Tue, 17 Apr 2018 13:49:48 -0400
Subject: [PATCH] drm/amd/display: Update MST edid property every time
Extended fix to: "Don't read EDID in atomic_check"
Fix display property not observed in GUI display after hot plug.
Call drm_mode_connector_update_edid_property every time in
.get_modes hook, due to the fact that edid property is getting
removed from usermode ioctl DRM_IOCTL_MODE_GETCONNECTOR each time
in hot unplug.
Signed-off-by: Jerry (Fangzhi) Zuo <Jerry.Zuo(a)amd.com>
Reviewed-by: Harry Wentland <Harry.Wentland(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
Cc: stable(a)vger.kernel.org
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 305292a9ff80..8c1d084429dc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -253,11 +253,11 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
if (aconnector->dc_sink)
amdgpu_dm_add_sink_to_freesync_module(
connector, edid);
-
- drm_mode_connector_update_edid_property(
- &aconnector->base, edid);
}
+ drm_mode_connector_update_edid_property(
+ &aconnector->base, aconnector->edid);
+
ret = drm_add_edid_modes(connector, aconnector->edid);
return ret;
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 6de3b1f26d1e8adb53d97835400c541ce50155e5 Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Date: Mon, 23 Apr 2018 14:37:53 +0300
Subject: [PATCH] drm/i915: Use ktime on wait_for
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
We use jiffies to determine when wait expires. However
Imre did find out that jiffies can and will do a >1
increments on certain situations [1]. When this happens
in a wait_for loop, we return timeout errorneously
much earlier than what the real wallclock would say.
We can't afford our waits to timeout prematurely.
Discard jiffies and change to ktime to detect timeouts.
v2: added bugzilla entry (Imre), added stable (Chris)
Reported-by: Imre Deak <imre.deak(a)intel.com>
References: https://lkml.org/lkml/2018/4/18/798 [1]
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105771
Cc: Imre Deak <imre.deak(a)intel.com>
Cc: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Reviewed-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180423113754.28424-1-mika.k…
(cherry picked from commit 3085982c6b45d7d22f76e3aa018affbc143a7370)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index d4368589b355..a80fbad9be0f 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -49,12 +49,12 @@
* check the condition before the timeout.
*/
#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \
- unsigned long timeout__ = jiffies + usecs_to_jiffies(US) + 1; \
+ const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \
long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \
int ret__; \
might_sleep(); \
for (;;) { \
- bool expired__ = time_after(jiffies, timeout__); \
+ const bool expired__ = ktime_after(ktime_get_raw(), end__); \
OP; \
if (COND) { \
ret__ = 0; \
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 28a5933e8d362766462ea9e5f135e19f41e658ba Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair(a)popple.id.au>
Date: Wed, 11 Apr 2018 16:38:54 +1000
Subject: [PATCH] powerpc/powernv/npu: Add lock to prevent race in concurrent
context init/destroy
The pnv_npu2_init_context() and pnv_npu2_destroy_context() functions
are used to allocate/free contexts to allow address translation and
shootdown by the NPU on a particular GPU. Context initialisation is
implicitly safe as it is protected by the requirement mmap_sem be held
in write mode, however pnv_npu2_destroy_context() does not require
mmap_sem to be held and it is not safe to call with a concurrent
initialisation for a different GPU.
It was assumed the driver would ensure destruction was not called
concurrently with initialisation. However the driver may be simplified
by allowing concurrent initialisation and destruction for different
GPUs. As npu context creation/destruction is not a performance
critical path and the critical section is not large a single spinlock
is used for simplicity.
Fixes: 1ab66d1fbada ("powerpc/powernv: Introduce address translation services for Nvlink2")
Cc: stable(a)vger.kernel.org # v4.12+
Signed-off-by: Alistair Popple <alistair(a)popple.id.au>
Reviewed-by: Mark Hairgrove <mhairgrove(a)nvidia.com>
Tested-by: Mark Hairgrove <mhairgrove(a)nvidia.com>
Reviewed-by: Balbir Singh <bsingharora(a)gmail.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 69a4f9e8bd55..5ff7c6e0e6da 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -33,6 +33,12 @@
#define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
+/*
+ * spinlock to protect initialisation of an npu_context for a particular
+ * mm_struct.
+ */
+static DEFINE_SPINLOCK(npu_context_lock);
+
/*
* Other types of TCE cache invalidation are not functional in the
* hardware.
@@ -696,7 +702,8 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
* Returns an error if there no contexts are currently available or a
* npu_context which should be passed to pnv_npu2_handle_fault().
*
- * mmap_sem must be held in write mode.
+ * mmap_sem must be held in write mode and must not be called from interrupt
+ * context.
*/
struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
unsigned long flags,
@@ -743,7 +750,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
/*
* Setup the NPU context table for a particular GPU. These need to be
* per-GPU as we need the tables to filter ATSDs when there are no
- * active contexts on a particular GPU.
+ * active contexts on a particular GPU. It is safe for these to be
+ * called concurrently with destroy as the OPAL call takes appropriate
+ * locks and refcounts on init/destroy.
*/
rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
PCI_DEVID(gpdev->bus->number, gpdev->devfn));
@@ -754,8 +763,19 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
* We store the npu pci device so we can more easily get at the
* associated npus.
*/
+ spin_lock(&npu_context_lock);
npu_context = mm->context.npu_context;
+ if (npu_context)
+ WARN_ON(!kref_get_unless_zero(&npu_context->kref));
+ spin_unlock(&npu_context_lock);
+
if (!npu_context) {
+ /*
+ * We can set up these fields without holding the
+ * npu_context_lock as the npu_context hasn't been returned to
+ * the caller meaning it can't be destroyed. Parallel allocation
+ * is protected against by mmap_sem.
+ */
rc = -ENOMEM;
npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
if (npu_context) {
@@ -774,8 +794,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
}
mm->context.npu_context = npu_context;
- } else {
- WARN_ON(!kref_get_unless_zero(&npu_context->kref));
}
npu_context->release_cb = cb;
@@ -814,15 +832,16 @@ static void pnv_npu2_release_context(struct kref *kref)
mm_context_remove_copro(npu_context->mm);
npu_context->mm->context.npu_context = NULL;
- mmu_notifier_unregister(&npu_context->mn,
- npu_context->mm);
-
- kfree(npu_context);
}
+/*
+ * Destroy a context on the given GPU. May free the npu_context if it is no
+ * longer active on any GPUs. Must not be called from interrupt context.
+ */
void pnv_npu2_destroy_context(struct npu_context *npu_context,
struct pci_dev *gpdev)
{
+ int removed;
struct pnv_phb *nphb;
struct npu *npu;
struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
@@ -844,7 +863,21 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
PCI_DEVID(gpdev->bus->number, gpdev->devfn));
- kref_put(&npu_context->kref, pnv_npu2_release_context);
+ spin_lock(&npu_context_lock);
+ removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
+ spin_unlock(&npu_context_lock);
+
+ /*
+ * We need to do this outside of pnv_npu2_release_context so that it is
+ * outside the spinlock as mmu_notifier_destroy uses SRCU.
+ */
+ if (removed) {
+ mmu_notifier_unregister(&npu_context->mn,
+ npu_context->mm);
+
+ kfree(npu_context);
+ }
+
}
EXPORT_SYMBOL(pnv_npu2_destroy_context);
The patch below does not apply to the 4.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 28a5933e8d362766462ea9e5f135e19f41e658ba Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair(a)popple.id.au>
Date: Wed, 11 Apr 2018 16:38:54 +1000
Subject: [PATCH] powerpc/powernv/npu: Add lock to prevent race in concurrent
context init/destroy
The pnv_npu2_init_context() and pnv_npu2_destroy_context() functions
are used to allocate/free contexts to allow address translation and
shootdown by the NPU on a particular GPU. Context initialisation is
implicitly safe as it is protected by the requirement mmap_sem be held
in write mode, however pnv_npu2_destroy_context() does not require
mmap_sem to be held and it is not safe to call with a concurrent
initialisation for a different GPU.
It was assumed the driver would ensure destruction was not called
concurrently with initialisation. However the driver may be simplified
by allowing concurrent initialisation and destruction for different
GPUs. As npu context creation/destruction is not a performance
critical path and the critical section is not large a single spinlock
is used for simplicity.
Fixes: 1ab66d1fbada ("powerpc/powernv: Introduce address translation services for Nvlink2")
Cc: stable(a)vger.kernel.org # v4.12+
Signed-off-by: Alistair Popple <alistair(a)popple.id.au>
Reviewed-by: Mark Hairgrove <mhairgrove(a)nvidia.com>
Tested-by: Mark Hairgrove <mhairgrove(a)nvidia.com>
Reviewed-by: Balbir Singh <bsingharora(a)gmail.com>
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 69a4f9e8bd55..5ff7c6e0e6da 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -33,6 +33,12 @@
#define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
+/*
+ * spinlock to protect initialisation of an npu_context for a particular
+ * mm_struct.
+ */
+static DEFINE_SPINLOCK(npu_context_lock);
+
/*
* Other types of TCE cache invalidation are not functional in the
* hardware.
@@ -696,7 +702,8 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
* Returns an error if there no contexts are currently available or a
* npu_context which should be passed to pnv_npu2_handle_fault().
*
- * mmap_sem must be held in write mode.
+ * mmap_sem must be held in write mode and must not be called from interrupt
+ * context.
*/
struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
unsigned long flags,
@@ -743,7 +750,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
/*
* Setup the NPU context table for a particular GPU. These need to be
* per-GPU as we need the tables to filter ATSDs when there are no
- * active contexts on a particular GPU.
+ * active contexts on a particular GPU. It is safe for these to be
+ * called concurrently with destroy as the OPAL call takes appropriate
+ * locks and refcounts on init/destroy.
*/
rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
PCI_DEVID(gpdev->bus->number, gpdev->devfn));
@@ -754,8 +763,19 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
* We store the npu pci device so we can more easily get at the
* associated npus.
*/
+ spin_lock(&npu_context_lock);
npu_context = mm->context.npu_context;
+ if (npu_context)
+ WARN_ON(!kref_get_unless_zero(&npu_context->kref));
+ spin_unlock(&npu_context_lock);
+
if (!npu_context) {
+ /*
+ * We can set up these fields without holding the
+ * npu_context_lock as the npu_context hasn't been returned to
+ * the caller meaning it can't be destroyed. Parallel allocation
+ * is protected against by mmap_sem.
+ */
rc = -ENOMEM;
npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
if (npu_context) {
@@ -774,8 +794,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
}
mm->context.npu_context = npu_context;
- } else {
- WARN_ON(!kref_get_unless_zero(&npu_context->kref));
}
npu_context->release_cb = cb;
@@ -814,15 +832,16 @@ static void pnv_npu2_release_context(struct kref *kref)
mm_context_remove_copro(npu_context->mm);
npu_context->mm->context.npu_context = NULL;
- mmu_notifier_unregister(&npu_context->mn,
- npu_context->mm);
-
- kfree(npu_context);
}
+/*
+ * Destroy a context on the given GPU. May free the npu_context if it is no
+ * longer active on any GPUs. Must not be called from interrupt context.
+ */
void pnv_npu2_destroy_context(struct npu_context *npu_context,
struct pci_dev *gpdev)
{
+ int removed;
struct pnv_phb *nphb;
struct npu *npu;
struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
@@ -844,7 +863,21 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
PCI_DEVID(gpdev->bus->number, gpdev->devfn));
- kref_put(&npu_context->kref, pnv_npu2_release_context);
+ spin_lock(&npu_context_lock);
+ removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
+ spin_unlock(&npu_context_lock);
+
+ /*
+ * We need to do this outside of pnv_npu2_release_context so that it is
+ * outside the spinlock as mmu_notifier_destroy uses SRCU.
+ */
+ if (removed) {
+ mmu_notifier_unregister(&npu_context->mn,
+ npu_context->mm);
+
+ kfree(npu_context);
+ }
+
}
EXPORT_SYMBOL(pnv_npu2_destroy_context);