This is a note to let you know that I've just added the patch titled
xhci: Do warm-reset when both CAS and XDEV_RESUME are set
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 904df64a5f4d5ebd670801d869ca0a6d6a6e8df6 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Date: Fri, 21 Aug 2020 12:15:48 +0300
Subject: xhci: Do warm-reset when both CAS and XDEV_RESUME are set
Sometimes re-plugging a USB device during system sleep renders the device
useless:
[ 173.418345] xhci_hcd 0000:00:14.0: Get port status 2-4 read: 0x14203e2, return 0x10262
...
[ 176.496485] usb 2-4: Waited 2000ms for CONNECT
[ 176.496781] usb usb2-port4: status 0000.0262 after resume, -19
[ 176.497103] usb 2-4: can't resume, status -19
[ 176.497438] usb usb2-port4: logical disconnect
Because PLS equals to XDEV_RESUME, xHCI driver reports U3 to usbcore,
despite of CAS bit is flagged.
So proritize CAS over XDEV_RESUME to let usbcore handle warm-reset for
the port.
Cc: stable <stable(a)vger.kernel.org>
Signed-off-by: Kai-Heng Feng <kai.heng.feng(a)canonical.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Link: https://lore.kernel.org/r/20200821091549.20556-3-mathias.nyman@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/host/xhci-hub.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index c3554e37e09f..4e14e164cb68 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -740,15 +740,6 @@ static void xhci_hub_report_usb3_link_state(struct xhci_hcd *xhci,
{
u32 pls = status_reg & PORT_PLS_MASK;
- /* resume state is a xHCI internal state.
- * Do not report it to usb core, instead, pretend to be U3,
- * thus usb core knows it's not ready for transfer
- */
- if (pls == XDEV_RESUME) {
- *status |= USB_SS_PORT_LS_U3;
- return;
- }
-
/* When the CAS bit is set then warm reset
* should be performed on port
*/
@@ -770,6 +761,16 @@ static void xhci_hub_report_usb3_link_state(struct xhci_hcd *xhci,
*/
pls |= USB_PORT_STAT_CONNECTION;
} else {
+ /*
+ * Resume state is an xHCI internal state. Do not report it to
+ * usb core, instead, pretend to be U3, thus usb core knows
+ * it's not ready for transfer.
+ */
+ if (pls == XDEV_RESUME) {
+ *status |= USB_SS_PORT_LS_U3;
+ return;
+ }
+
/*
* If CAS bit isn't set but the Port is already at
* Compliance Mode, fake a connection so the USB core
--
2.28.0
This is a note to let you know that I've just added the patch titled
usb: host: xhci: fix ep context print mismatch in debugfs
to my usb git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git
in the usb-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
>From 0077b1b2c8d9ad5f7a08b62fb8524cdb9938388f Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li(a)nxp.com>
Date: Fri, 21 Aug 2020 12:15:47 +0300
Subject: usb: host: xhci: fix ep context print mismatch in debugfs
dci is 0 based and xhci_get_ep_ctx() will do ep index increment to get
the ep context.
[rename dci to ep_index -Mathias]
Cc: stable <stable(a)vger.kernel.org> # v4.15+
Fixes: 02b6fdc2a153 ("usb: xhci: Add debugfs interface for xHCI driver")
Signed-off-by: Li Jun <jun.li(a)nxp.com>
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Link: https://lore.kernel.org/r/20200821091549.20556-2-mathias.nyman@linux.intel.…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/usb/host/xhci-debugfs.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c
index 92e25a62fdb5..c88bffd68742 100644
--- a/drivers/usb/host/xhci-debugfs.c
+++ b/drivers/usb/host/xhci-debugfs.c
@@ -274,7 +274,7 @@ static int xhci_slot_context_show(struct seq_file *s, void *unused)
static int xhci_endpoint_context_show(struct seq_file *s, void *unused)
{
- int dci;
+ int ep_index;
dma_addr_t dma;
struct xhci_hcd *xhci;
struct xhci_ep_ctx *ep_ctx;
@@ -283,9 +283,9 @@ static int xhci_endpoint_context_show(struct seq_file *s, void *unused)
xhci = hcd_to_xhci(bus_to_hcd(dev->udev->bus));
- for (dci = 1; dci < 32; dci++) {
- ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, dci);
- dma = dev->out_ctx->dma + dci * CTX_SIZE(xhci->hcc_params);
+ for (ep_index = 0; ep_index < 31; ep_index++) {
+ ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, ep_index);
+ dma = dev->out_ctx->dma + (ep_index + 1) * CTX_SIZE(xhci->hcc_params);
seq_printf(s, "%pad: %s\n", &dma,
xhci_decode_ep_context(le32_to_cpu(ep_ctx->ep_info),
le32_to_cpu(ep_ctx->ep_info2),
--
2.28.0
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a4501bac0e553bed117b7e1b166d49731caf7260 Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak(a)codeaurora.org>
Date: Mon, 10 Aug 2020 12:36:19 +0530
Subject: [PATCH] opp: Enable resources again if they were disabled earlier
dev_pm_opp_set_rate() can now be called with freq = 0 in order
to either drop performance or bandwidth votes or to disable
regulators on platforms which support them.
In such cases, a subsequent call to dev_pm_opp_set_rate() with
the same frequency ends up returning early because 'old_freq == freq'
Instead make it fall through and put back the dropped performance
and bandwidth votes and/or enable back the regulators.
Cc: v5.3+ <stable(a)vger.kernel.org> # v5.3+
Fixes: cd7ea582866f ("opp: Make dev_pm_opp_set_rate() handle freq = 0 to drop performance votes")
Reported-by: Sajida Bhanu <sbhanu(a)codeaurora.org>
Reviewed-by: Sibi Sankar <sibis(a)codeaurora.org>
Reported-by: Matthias Kaehlcke <mka(a)chromium.org>
Tested-by: Matthias Kaehlcke <mka(a)chromium.org>
Reviewed-by: Stephen Boyd <sboyd(a)kernel.org>
Signed-off-by: Rajendra Nayak <rnayak(a)codeaurora.org>
[ Viresh: Don't skip clk_set_rate() and massaged changelog ]
Signed-off-by: Viresh Kumar <viresh.kumar(a)linaro.org>
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index bdb028c7793d..9668ea04cc80 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -934,10 +934,13 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
/* Return early if nothing to do */
if (old_freq == freq) {
- dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n",
- __func__, freq);
- ret = 0;
- goto put_opp_table;
+ if (!opp_table->required_opp_tables && !opp_table->regulators &&
+ !opp_table->paths) {
+ dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n",
+ __func__, freq);
+ ret = 0;
+ goto put_opp_table;
+ }
}
/*
The patch below does not apply to the 5.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a4501bac0e553bed117b7e1b166d49731caf7260 Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak(a)codeaurora.org>
Date: Mon, 10 Aug 2020 12:36:19 +0530
Subject: [PATCH] opp: Enable resources again if they were disabled earlier
dev_pm_opp_set_rate() can now be called with freq = 0 in order
to either drop performance or bandwidth votes or to disable
regulators on platforms which support them.
In such cases, a subsequent call to dev_pm_opp_set_rate() with
the same frequency ends up returning early because 'old_freq == freq'
Instead make it fall through and put back the dropped performance
and bandwidth votes and/or enable back the regulators.
Cc: v5.3+ <stable(a)vger.kernel.org> # v5.3+
Fixes: cd7ea582866f ("opp: Make dev_pm_opp_set_rate() handle freq = 0 to drop performance votes")
Reported-by: Sajida Bhanu <sbhanu(a)codeaurora.org>
Reviewed-by: Sibi Sankar <sibis(a)codeaurora.org>
Reported-by: Matthias Kaehlcke <mka(a)chromium.org>
Tested-by: Matthias Kaehlcke <mka(a)chromium.org>
Reviewed-by: Stephen Boyd <sboyd(a)kernel.org>
Signed-off-by: Rajendra Nayak <rnayak(a)codeaurora.org>
[ Viresh: Don't skip clk_set_rate() and massaged changelog ]
Signed-off-by: Viresh Kumar <viresh.kumar(a)linaro.org>
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index bdb028c7793d..9668ea04cc80 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -934,10 +934,13 @@ int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq)
/* Return early if nothing to do */
if (old_freq == freq) {
- dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n",
- __func__, freq);
- ret = 0;
- goto put_opp_table;
+ if (!opp_table->required_opp_tables && !opp_table->regulators &&
+ !opp_table->paths) {
+ dev_dbg(dev, "%s: old/new frequencies (%lu Hz) are same, nothing to do\n",
+ __func__, freq);
+ ret = 0;
+ goto put_opp_table;
+ }
}
/*
The patch below does not apply to the 5.8-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From df3ab3cb7eae63c6eb7c9aebcc196a75d59f65dd Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Thu, 16 Jul 2020 10:46:43 +0100
Subject: [PATCH] drm/i915: Provide the perf pmu.module
Rather than manually implement our own module reference counting for perf
pmu events, finally realise that there is a module parameter to struct
pmu for this very purpose.
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200716094643.31410-1-chris@…
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
(cherry picked from commit 27e897beec1c59861f15d4d3562c39ad1143620f)
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 28bc5f13ae52..056994224c6b 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -445,8 +445,6 @@ static void i915_pmu_event_destroy(struct perf_event *event)
container_of(event->pmu, typeof(*i915), pmu.base);
drm_WARN_ON(&i915->drm, event->parent);
-
- module_put(THIS_MODULE);
}
static int
@@ -538,10 +536,8 @@ static int i915_pmu_event_init(struct perf_event *event)
if (ret)
return ret;
- if (!event->parent) {
- __module_get(THIS_MODULE);
+ if (!event->parent)
event->destroy = i915_pmu_event_destroy;
- }
return 0;
}
@@ -1130,6 +1126,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
if (!pmu->base.attr_groups)
goto err_attr;
+ pmu->base.module = THIS_MODULE;
pmu->base.task_ctx_nr = perf_invalid_context;
pmu->base.event_init = i915_pmu_event_init;
pmu->base.add = i915_pmu_event_add;
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b5331379bc62611d1026173a09c73573384201d9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Tue, 11 Aug 2020 11:27:25 +0100
Subject: [PATCH] KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE
is not set
When an MMU notifier call results in unmapping a range that spans multiple
PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary,
since this avoids running into RCU stalls during VM teardown. Unfortunately,
if the VM is destroyed as a result of OOM, then blocking is not permitted
and the call to the scheduler triggers the following BUG():
| BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394
| in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper
| INFO: lockdep is turned off.
| CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1
| Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
| Call trace:
| dump_backtrace+0x0/0x284
| show_stack+0x1c/0x28
| dump_stack+0xf0/0x1a4
| ___might_sleep+0x2bc/0x2cc
| unmap_stage2_range+0x160/0x1ac
| kvm_unmap_hva_range+0x1a0/0x1c8
| kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8
| __mmu_notifier_invalidate_range_start+0x218/0x31c
| mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0
| __oom_reap_task_mm+0x128/0x268
| oom_reap_task+0xac/0x298
| oom_reaper+0x178/0x17c
| kthread+0x1e4/0x1fc
| ret_from_fork+0x10/0x30
Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we
only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier
flags.
Cc: <stable(a)vger.kernel.org>
Fixes: 8b3405e345b5 ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd")
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
Message-Id: <20200811102725.7121-3-will(a)kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc351802ff18..ba00bcc0c884 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
+ bool may_block)
{
struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
@@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
- if (next != end)
+ if (may_block && next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+{
+ __unmap_stage2_range(mmu, start, size, true);
+}
+
static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
@@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(&kvm->arch.mmu, gpa, size);
+ unsigned flags = *(unsigned *)data;
+ bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
+
+ __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
return 0;
}
@@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
trace_kvm_unmap_hva_range(start, end);
- handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+ handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
return 0;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b5331379bc62611d1026173a09c73573384201d9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Tue, 11 Aug 2020 11:27:25 +0100
Subject: [PATCH] KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE
is not set
When an MMU notifier call results in unmapping a range that spans multiple
PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary,
since this avoids running into RCU stalls during VM teardown. Unfortunately,
if the VM is destroyed as a result of OOM, then blocking is not permitted
and the call to the scheduler triggers the following BUG():
| BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394
| in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper
| INFO: lockdep is turned off.
| CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1
| Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
| Call trace:
| dump_backtrace+0x0/0x284
| show_stack+0x1c/0x28
| dump_stack+0xf0/0x1a4
| ___might_sleep+0x2bc/0x2cc
| unmap_stage2_range+0x160/0x1ac
| kvm_unmap_hva_range+0x1a0/0x1c8
| kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8
| __mmu_notifier_invalidate_range_start+0x218/0x31c
| mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0
| __oom_reap_task_mm+0x128/0x268
| oom_reap_task+0xac/0x298
| oom_reaper+0x178/0x17c
| kthread+0x1e4/0x1fc
| ret_from_fork+0x10/0x30
Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we
only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier
flags.
Cc: <stable(a)vger.kernel.org>
Fixes: 8b3405e345b5 ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd")
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
Message-Id: <20200811102725.7121-3-will(a)kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc351802ff18..ba00bcc0c884 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
+ bool may_block)
{
struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
@@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
- if (next != end)
+ if (may_block && next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+{
+ __unmap_stage2_range(mmu, start, size, true);
+}
+
static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
@@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(&kvm->arch.mmu, gpa, size);
+ unsigned flags = *(unsigned *)data;
+ bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
+
+ __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
return 0;
}
@@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
trace_kvm_unmap_hva_range(start, end);
- handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+ handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
return 0;
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b5331379bc62611d1026173a09c73573384201d9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Tue, 11 Aug 2020 11:27:25 +0100
Subject: [PATCH] KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE
is not set
When an MMU notifier call results in unmapping a range that spans multiple
PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary,
since this avoids running into RCU stalls during VM teardown. Unfortunately,
if the VM is destroyed as a result of OOM, then blocking is not permitted
and the call to the scheduler triggers the following BUG():
| BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394
| in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper
| INFO: lockdep is turned off.
| CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1
| Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
| Call trace:
| dump_backtrace+0x0/0x284
| show_stack+0x1c/0x28
| dump_stack+0xf0/0x1a4
| ___might_sleep+0x2bc/0x2cc
| unmap_stage2_range+0x160/0x1ac
| kvm_unmap_hva_range+0x1a0/0x1c8
| kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8
| __mmu_notifier_invalidate_range_start+0x218/0x31c
| mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0
| __oom_reap_task_mm+0x128/0x268
| oom_reap_task+0xac/0x298
| oom_reaper+0x178/0x17c
| kthread+0x1e4/0x1fc
| ret_from_fork+0x10/0x30
Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we
only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier
flags.
Cc: <stable(a)vger.kernel.org>
Fixes: 8b3405e345b5 ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd")
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
Message-Id: <20200811102725.7121-3-will(a)kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc351802ff18..ba00bcc0c884 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
+ bool may_block)
{
struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
@@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
- if (next != end)
+ if (may_block && next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+{
+ __unmap_stage2_range(mmu, start, size, true);
+}
+
static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
@@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(&kvm->arch.mmu, gpa, size);
+ unsigned flags = *(unsigned *)data;
+ bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
+
+ __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
return 0;
}
@@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
trace_kvm_unmap_hva_range(start, end);
- handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+ handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
return 0;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b5331379bc62611d1026173a09c73573384201d9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Tue, 11 Aug 2020 11:27:25 +0100
Subject: [PATCH] KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE
is not set
When an MMU notifier call results in unmapping a range that spans multiple
PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary,
since this avoids running into RCU stalls during VM teardown. Unfortunately,
if the VM is destroyed as a result of OOM, then blocking is not permitted
and the call to the scheduler triggers the following BUG():
| BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394
| in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper
| INFO: lockdep is turned off.
| CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1
| Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
| Call trace:
| dump_backtrace+0x0/0x284
| show_stack+0x1c/0x28
| dump_stack+0xf0/0x1a4
| ___might_sleep+0x2bc/0x2cc
| unmap_stage2_range+0x160/0x1ac
| kvm_unmap_hva_range+0x1a0/0x1c8
| kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8
| __mmu_notifier_invalidate_range_start+0x218/0x31c
| mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0
| __oom_reap_task_mm+0x128/0x268
| oom_reap_task+0xac/0x298
| oom_reaper+0x178/0x17c
| kthread+0x1e4/0x1fc
| ret_from_fork+0x10/0x30
Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we
only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier
flags.
Cc: <stable(a)vger.kernel.org>
Fixes: 8b3405e345b5 ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd")
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
Message-Id: <20200811102725.7121-3-will(a)kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc351802ff18..ba00bcc0c884 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
+ bool may_block)
{
struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
@@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
- if (next != end)
+ if (may_block && next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+{
+ __unmap_stage2_range(mmu, start, size, true);
+}
+
static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
@@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(&kvm->arch.mmu, gpa, size);
+ unsigned flags = *(unsigned *)data;
+ bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
+
+ __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
return 0;
}
@@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
trace_kvm_unmap_hva_range(start, end);
- handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+ handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
return 0;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From b5331379bc62611d1026173a09c73573384201d9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will(a)kernel.org>
Date: Tue, 11 Aug 2020 11:27:25 +0100
Subject: [PATCH] KVM: arm64: Only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE
is not set
When an MMU notifier call results in unmapping a range that spans multiple
PGDs, we end up calling into cond_resched_lock() when crossing a PGD boundary,
since this avoids running into RCU stalls during VM teardown. Unfortunately,
if the VM is destroyed as a result of OOM, then blocking is not permitted
and the call to the scheduler triggers the following BUG():
| BUG: sleeping function called from invalid context at arch/arm64/kvm/mmu.c:394
| in_atomic(): 1, irqs_disabled(): 0, non_block: 1, pid: 36, name: oom_reaper
| INFO: lockdep is turned off.
| CPU: 3 PID: 36 Comm: oom_reaper Not tainted 5.8.0 #1
| Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
| Call trace:
| dump_backtrace+0x0/0x284
| show_stack+0x1c/0x28
| dump_stack+0xf0/0x1a4
| ___might_sleep+0x2bc/0x2cc
| unmap_stage2_range+0x160/0x1ac
| kvm_unmap_hva_range+0x1a0/0x1c8
| kvm_mmu_notifier_invalidate_range_start+0x8c/0xf8
| __mmu_notifier_invalidate_range_start+0x218/0x31c
| mmu_notifier_invalidate_range_start_nonblock+0x78/0xb0
| __oom_reap_task_mm+0x128/0x268
| oom_reap_task+0xac/0x298
| oom_reaper+0x178/0x17c
| kthread+0x1e4/0x1fc
| ret_from_fork+0x10/0x30
Use the new 'flags' argument to kvm_unmap_hva_range() to ensure that we
only reschedule if MMU_NOTIFIER_RANGE_BLOCKABLE is set in the notifier
flags.
Cc: <stable(a)vger.kernel.org>
Fixes: 8b3405e345b5 ("kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd")
Cc: Marc Zyngier <maz(a)kernel.org>
Cc: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Cc: James Morse <james.morse(a)arm.com>
Signed-off-by: Will Deacon <will(a)kernel.org>
Message-Id: <20200811102725.7121-3-will(a)kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc351802ff18..ba00bcc0c884 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -343,7 +343,8 @@ static void unmap_stage2_p4ds(struct kvm_s2_mmu *mmu, pgd_t *pgd,
* destroying the VM), otherwise another faulting VCPU may come in and mess
* with things behind our backs.
*/
-static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size,
+ bool may_block)
{
struct kvm *kvm = mmu->kvm;
pgd_t *pgd;
@@ -369,11 +370,16 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
- if (next != end)
+ if (may_block && next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
+static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
+{
+ __unmap_stage2_range(mmu, start, size, true);
+}
+
static void stage2_flush_ptes(struct kvm_s2_mmu *mmu, pmd_t *pmd,
phys_addr_t addr, phys_addr_t end)
{
@@ -2208,7 +2214,10 @@ static int handle_hva_to_gpa(struct kvm *kvm,
static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data)
{
- unmap_stage2_range(&kvm->arch.mmu, gpa, size);
+ unsigned flags = *(unsigned *)data;
+ bool may_block = flags & MMU_NOTIFIER_RANGE_BLOCKABLE;
+
+ __unmap_stage2_range(&kvm->arch.mmu, gpa, size, may_block);
return 0;
}
@@ -2219,7 +2228,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
trace_kvm_unmap_hva_range(start, end);
- handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
+ handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, &flags);
return 0;
}