The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From da8e393e23efb60eba8959856c7df88f9859f6eb Mon Sep 17 00:00:00 2001
From: Dave Stevenson <dave.stevenson(a)raspberrypi.com>
Date: Mon, 13 Jun 2022 16:47:28 +0200
Subject: [PATCH] drm/vc4: drv: Adopt the dma configuration from the HVS or V3D
component
vc4_drv isn't necessarily under the /soc node in DT as it is a
virtual device, but it is the one that does the allocations.
The DMA addresses are consumed by primarily the HVS or V3D, and
those require VideoCore cache alias address mapping, and so will be
under /soc.
During probe find the a suitable device node for HVS or V3D,
and adopt the DMA configuration of that node.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Dave Stevenson <dave.stevenson(a)raspberrypi.com>
Link: https://lore.kernel.org/r/20220613144800.326124-2-maxime@cerno.tech
Signed-off-by: Maxime Ripard <maxime(a)cerno.tech>
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 162bc18e7497..14a7d529144d 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -209,6 +209,15 @@ static void vc4_match_add_drivers(struct device *dev,
}
}
+const struct of_device_id vc4_dma_range_matches[] = {
+ { .compatible = "brcm,bcm2711-hvs" },
+ { .compatible = "brcm,bcm2835-hvs" },
+ { .compatible = "brcm,bcm2835-v3d" },
+ { .compatible = "brcm,cygnus-v3d" },
+ { .compatible = "brcm,vc4-v3d" },
+ {}
+};
+
static int vc4_drm_bind(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
@@ -227,6 +236,16 @@ static int vc4_drm_bind(struct device *dev)
vc4_drm_driver.driver_features &= ~DRIVER_RENDER;
of_node_put(node);
+ node = of_find_matching_node_and_match(NULL, vc4_dma_range_matches,
+ NULL);
+ if (node) {
+ ret = of_dma_configure(dev, node, true);
+ of_node_put(node);
+
+ if (ret)
+ return ret;
+ }
+
vc4 = devm_drm_dev_alloc(dev, &vc4_drm_driver, struct vc4_dev, base);
if (IS_ERR(vc4))
return PTR_ERR(vc4);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33da97894758737895e90c909f16786052680ef4 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris.p.wilson(a)intel.com>
Date: Tue, 12 Jul 2022 16:21:33 +0100
Subject: [PATCH] drm/i915/gt: Serialize TLB invalidates with GT resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Avoid trying to invalidate the TLB in the middle of performing an
engine reset, as this may result in the reset timing out. Currently,
the TLB invalidate is only serialised by its own mutex, forgoing the
uncore lock, but we can take the uncore->lock as well to serialise
the mmio access, thereby serialising with the GDRST.
Tested on a NUC5i7RYB, BIOS RYBDWi35.86A.0380.2019.0517.1530 with
i915 selftest/hangcheck.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
Reported-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Tested-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Chris Wilson <chris.p.wilson(a)intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1e59a7c45dd919a530256b9ac721a…
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 8da3314bb6bf..68c2b0d8f187 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -952,6 +952,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
mutex_lock(>->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+
+ for_each_engine(engine, gt, id) {
+ struct reg_and_bit rb;
+
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
+
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ }
+
+ spin_unlock_irq(&uncore->lock);
+
for_each_engine(engine, gt, id) {
/*
* HW architecture suggest typical invalidation time at 40us,
@@ -966,7 +980,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
if (!i915_mmio_reg_offset(rb.reg))
continue;
- intel_uncore_write_fw(uncore, rb.reg, rb.bit);
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33da97894758737895e90c909f16786052680ef4 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris.p.wilson(a)intel.com>
Date: Tue, 12 Jul 2022 16:21:33 +0100
Subject: [PATCH] drm/i915/gt: Serialize TLB invalidates with GT resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Avoid trying to invalidate the TLB in the middle of performing an
engine reset, as this may result in the reset timing out. Currently,
the TLB invalidate is only serialised by its own mutex, forgoing the
uncore lock, but we can take the uncore->lock as well to serialise
the mmio access, thereby serialising with the GDRST.
Tested on a NUC5i7RYB, BIOS RYBDWi35.86A.0380.2019.0517.1530 with
i915 selftest/hangcheck.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
Reported-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Tested-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Chris Wilson <chris.p.wilson(a)intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1e59a7c45dd919a530256b9ac721a…
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 8da3314bb6bf..68c2b0d8f187 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -952,6 +952,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
mutex_lock(>->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+
+ for_each_engine(engine, gt, id) {
+ struct reg_and_bit rb;
+
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
+
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ }
+
+ spin_unlock_irq(&uncore->lock);
+
for_each_engine(engine, gt, id) {
/*
* HW architecture suggest typical invalidation time at 40us,
@@ -966,7 +980,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
if (!i915_mmio_reg_offset(rb.reg))
continue;
- intel_uncore_write_fw(uncore, rb.reg, rb.bit);
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33da97894758737895e90c909f16786052680ef4 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris.p.wilson(a)intel.com>
Date: Tue, 12 Jul 2022 16:21:33 +0100
Subject: [PATCH] drm/i915/gt: Serialize TLB invalidates with GT resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Avoid trying to invalidate the TLB in the middle of performing an
engine reset, as this may result in the reset timing out. Currently,
the TLB invalidate is only serialised by its own mutex, forgoing the
uncore lock, but we can take the uncore->lock as well to serialise
the mmio access, thereby serialising with the GDRST.
Tested on a NUC5i7RYB, BIOS RYBDWi35.86A.0380.2019.0517.1530 with
i915 selftest/hangcheck.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
Reported-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Tested-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Chris Wilson <chris.p.wilson(a)intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1e59a7c45dd919a530256b9ac721a…
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 8da3314bb6bf..68c2b0d8f187 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -952,6 +952,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
mutex_lock(>->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+
+ for_each_engine(engine, gt, id) {
+ struct reg_and_bit rb;
+
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
+
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ }
+
+ spin_unlock_irq(&uncore->lock);
+
for_each_engine(engine, gt, id) {
/*
* HW architecture suggest typical invalidation time at 40us,
@@ -966,7 +980,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
if (!i915_mmio_reg_offset(rb.reg))
continue;
- intel_uncore_write_fw(uncore, rb.reg, rb.bit);
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33da97894758737895e90c909f16786052680ef4 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris.p.wilson(a)intel.com>
Date: Tue, 12 Jul 2022 16:21:33 +0100
Subject: [PATCH] drm/i915/gt: Serialize TLB invalidates with GT resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Avoid trying to invalidate the TLB in the middle of performing an
engine reset, as this may result in the reset timing out. Currently,
the TLB invalidate is only serialised by its own mutex, forgoing the
uncore lock, but we can take the uncore->lock as well to serialise
the mmio access, thereby serialising with the GDRST.
Tested on a NUC5i7RYB, BIOS RYBDWi35.86A.0380.2019.0517.1530 with
i915 selftest/hangcheck.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
Reported-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Tested-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Chris Wilson <chris.p.wilson(a)intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1e59a7c45dd919a530256b9ac721a…
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 8da3314bb6bf..68c2b0d8f187 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -952,6 +952,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
mutex_lock(>->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+
+ for_each_engine(engine, gt, id) {
+ struct reg_and_bit rb;
+
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
+
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ }
+
+ spin_unlock_irq(&uncore->lock);
+
for_each_engine(engine, gt, id) {
/*
* HW architecture suggest typical invalidation time at 40us,
@@ -966,7 +980,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
if (!i915_mmio_reg_offset(rb.reg))
continue;
- intel_uncore_write_fw(uncore, rb.reg, rb.bit);
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33da97894758737895e90c909f16786052680ef4 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris.p.wilson(a)intel.com>
Date: Tue, 12 Jul 2022 16:21:33 +0100
Subject: [PATCH] drm/i915/gt: Serialize TLB invalidates with GT resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Avoid trying to invalidate the TLB in the middle of performing an
engine reset, as this may result in the reset timing out. Currently,
the TLB invalidate is only serialised by its own mutex, forgoing the
uncore lock, but we can take the uncore->lock as well to serialise
the mmio access, thereby serialising with the GDRST.
Tested on a NUC5i7RYB, BIOS RYBDWi35.86A.0380.2019.0517.1530 with
i915 selftest/hangcheck.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
Reported-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Tested-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Chris Wilson <chris.p.wilson(a)intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1e59a7c45dd919a530256b9ac721a…
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 8da3314bb6bf..68c2b0d8f187 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -952,6 +952,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
mutex_lock(>->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+
+ for_each_engine(engine, gt, id) {
+ struct reg_and_bit rb;
+
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
+
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ }
+
+ spin_unlock_irq(&uncore->lock);
+
for_each_engine(engine, gt, id) {
/*
* HW architecture suggest typical invalidation time at 40us,
@@ -966,7 +980,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
if (!i915_mmio_reg_offset(rb.reg))
continue;
- intel_uncore_write_fw(uncore, rb.reg, rb.bit);
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33da97894758737895e90c909f16786052680ef4 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris.p.wilson(a)intel.com>
Date: Tue, 12 Jul 2022 16:21:33 +0100
Subject: [PATCH] drm/i915/gt: Serialize TLB invalidates with GT resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Avoid trying to invalidate the TLB in the middle of performing an
engine reset, as this may result in the reset timing out. Currently,
the TLB invalidate is only serialised by its own mutex, forgoing the
uncore lock, but we can take the uncore->lock as well to serialise
the mmio access, thereby serialising with the GDRST.
Tested on a NUC5i7RYB, BIOS RYBDWi35.86A.0380.2019.0517.1530 with
i915 selftest/hangcheck.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
Reported-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Tested-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Chris Wilson <chris.p.wilson(a)intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1e59a7c45dd919a530256b9ac721a…
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 8da3314bb6bf..68c2b0d8f187 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -952,6 +952,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
mutex_lock(>->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+
+ for_each_engine(engine, gt, id) {
+ struct reg_and_bit rb;
+
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
+
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ }
+
+ spin_unlock_irq(&uncore->lock);
+
for_each_engine(engine, gt, id) {
/*
* HW architecture suggest typical invalidation time at 40us,
@@ -966,7 +980,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
if (!i915_mmio_reg_offset(rb.reg))
continue;
- intel_uncore_write_fw(uncore, rb.reg, rb.bit);
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33da97894758737895e90c909f16786052680ef4 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris.p.wilson(a)intel.com>
Date: Tue, 12 Jul 2022 16:21:33 +0100
Subject: [PATCH] drm/i915/gt: Serialize TLB invalidates with GT resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Avoid trying to invalidate the TLB in the middle of performing an
engine reset, as this may result in the reset timing out. Currently,
the TLB invalidate is only serialised by its own mutex, forgoing the
uncore lock, but we can take the uncore->lock as well to serialise
the mmio access, thereby serialising with the GDRST.
Tested on a NUC5i7RYB, BIOS RYBDWi35.86A.0380.2019.0517.1530 with
i915 selftest/hangcheck.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
Reported-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Tested-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Chris Wilson <chris.p.wilson(a)intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1e59a7c45dd919a530256b9ac721a…
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 8da3314bb6bf..68c2b0d8f187 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -952,6 +952,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
mutex_lock(>->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+
+ for_each_engine(engine, gt, id) {
+ struct reg_and_bit rb;
+
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
+
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ }
+
+ spin_unlock_irq(&uncore->lock);
+
for_each_engine(engine, gt, id) {
/*
* HW architecture suggest typical invalidation time at 40us,
@@ -966,7 +980,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
if (!i915_mmio_reg_offset(rb.reg))
continue;
- intel_uncore_write_fw(uncore, rb.reg, rb.bit);
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33da97894758737895e90c909f16786052680ef4 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris.p.wilson(a)intel.com>
Date: Tue, 12 Jul 2022 16:21:33 +0100
Subject: [PATCH] drm/i915/gt: Serialize TLB invalidates with GT resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Avoid trying to invalidate the TLB in the middle of performing an
engine reset, as this may result in the reset timing out. Currently,
the TLB invalidate is only serialised by its own mutex, forgoing the
uncore lock, but we can take the uncore->lock as well to serialise
the mmio access, thereby serialising with the GDRST.
Tested on a NUC5i7RYB, BIOS RYBDWi35.86A.0380.2019.0517.1530 with
i915 selftest/hangcheck.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store")
Reported-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Tested-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Reviewed-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Chris Wilson <chris.p.wilson(a)intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)linux.intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1e59a7c45dd919a530256b9ac721a…
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 8da3314bb6bf..68c2b0d8f187 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -952,6 +952,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
mutex_lock(>->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+
+ for_each_engine(engine, gt, id) {
+ struct reg_and_bit rb;
+
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
+
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ }
+
+ spin_unlock_irq(&uncore->lock);
+
for_each_engine(engine, gt, id) {
/*
* HW architecture suggest typical invalidation time at 40us,
@@ -966,7 +980,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
if (!i915_mmio_reg_offset(rb.reg))
continue;
- intel_uncore_write_fw(uncore, rb.reg, rb.bit);
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d36bdd77b9e6aa7f5cb7b0f11ebbab8e5febf10b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala(a)linux.intel.com>
Date: Mon, 13 Jun 2022 23:14:39 +0300
Subject: [PATCH] drm/i915: Implement w/a 22010492432 for adl-s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
adl-s needs the combo PLL DCO fraction w/a as well.
Gets us slightly more accurate clock out of the PLL.
Cc: stable(a)vger.kernel.org
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220613201439.23341-1-ville.…
Reviewed-by: Matt Roper <matthew.d.roper(a)intel.com>
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 64708e874b13..982e5b945680 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -2459,7 +2459,7 @@ static void icl_wrpll_params_populate(struct skl_wrpll_params *params,
}
/*
- * Display WA #22010492432: ehl, tgl, adl-p
+ * Display WA #22010492432: ehl, tgl, adl-s, adl-p
* Program half of the nominal DCO divider fraction value.
*/
static bool
@@ -2467,7 +2467,7 @@ ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915)
{
return ((IS_PLATFORM(i915, INTEL_ELKHARTLAKE) &&
IS_JSL_EHL_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) ||
- IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) &&
+ IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) &&
i915->dpll.ref_clks.nssc == 38400;
}
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d36bdd77b9e6aa7f5cb7b0f11ebbab8e5febf10b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala(a)linux.intel.com>
Date: Mon, 13 Jun 2022 23:14:39 +0300
Subject: [PATCH] drm/i915: Implement w/a 22010492432 for adl-s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
adl-s needs the combo PLL DCO fraction w/a as well.
Gets us slightly more accurate clock out of the PLL.
Cc: stable(a)vger.kernel.org
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220613201439.23341-1-ville.…
Reviewed-by: Matt Roper <matthew.d.roper(a)intel.com>
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 64708e874b13..982e5b945680 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -2459,7 +2459,7 @@ static void icl_wrpll_params_populate(struct skl_wrpll_params *params,
}
/*
- * Display WA #22010492432: ehl, tgl, adl-p
+ * Display WA #22010492432: ehl, tgl, adl-s, adl-p
* Program half of the nominal DCO divider fraction value.
*/
static bool
@@ -2467,7 +2467,7 @@ ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915)
{
return ((IS_PLATFORM(i915, INTEL_ELKHARTLAKE) &&
IS_JSL_EHL_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) ||
- IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) &&
+ IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) &&
i915->dpll.ref_clks.nssc == 38400;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d36bdd77b9e6aa7f5cb7b0f11ebbab8e5febf10b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala(a)linux.intel.com>
Date: Mon, 13 Jun 2022 23:14:39 +0300
Subject: [PATCH] drm/i915: Implement w/a 22010492432 for adl-s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
adl-s needs the combo PLL DCO fraction w/a as well.
Gets us slightly more accurate clock out of the PLL.
Cc: stable(a)vger.kernel.org
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220613201439.23341-1-ville.…
Reviewed-by: Matt Roper <matthew.d.roper(a)intel.com>
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 64708e874b13..982e5b945680 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -2459,7 +2459,7 @@ static void icl_wrpll_params_populate(struct skl_wrpll_params *params,
}
/*
- * Display WA #22010492432: ehl, tgl, adl-p
+ * Display WA #22010492432: ehl, tgl, adl-s, adl-p
* Program half of the nominal DCO divider fraction value.
*/
static bool
@@ -2467,7 +2467,7 @@ ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915)
{
return ((IS_PLATFORM(i915, INTEL_ELKHARTLAKE) &&
IS_JSL_EHL_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) ||
- IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) &&
+ IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) &&
i915->dpll.ref_clks.nssc == 38400;
}
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d36bdd77b9e6aa7f5cb7b0f11ebbab8e5febf10b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala(a)linux.intel.com>
Date: Mon, 13 Jun 2022 23:14:39 +0300
Subject: [PATCH] drm/i915: Implement w/a 22010492432 for adl-s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
adl-s needs the combo PLL DCO fraction w/a as well.
Gets us slightly more accurate clock out of the PLL.
Cc: stable(a)vger.kernel.org
Signed-off-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220613201439.23341-1-ville.…
Reviewed-by: Matt Roper <matthew.d.roper(a)intel.com>
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 64708e874b13..982e5b945680 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -2459,7 +2459,7 @@ static void icl_wrpll_params_populate(struct skl_wrpll_params *params,
}
/*
- * Display WA #22010492432: ehl, tgl, adl-p
+ * Display WA #22010492432: ehl, tgl, adl-s, adl-p
* Program half of the nominal DCO divider fraction value.
*/
static bool
@@ -2467,7 +2467,7 @@ ehl_combo_pll_div_frac_wa_needed(struct drm_i915_private *i915)
{
return ((IS_PLATFORM(i915, INTEL_ELKHARTLAKE) &&
IS_JSL_EHL_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) ||
- IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) &&
+ IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) &&
i915->dpll.ref_clks.nssc == 38400;
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 336561a914fc0c6f1218228718f633b31b7af1c3 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Tue, 12 Jul 2022 16:21:32 +0100
Subject: [PATCH] drm/i915/gt: Serialize GRDOM access between multiple engine
resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Don't allow two engines to be reset in parallel, as they would both
try to select a reset bit (and send requests to common registers)
and wait on that register, at the same time. Serialize control of
the reset requests/acks using the uncore->lock, which will also ensure
that no other GT state changes at the same time as the actual reset.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Reported-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Acked-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda(a)intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/e0a2d894e77aed7c2e36b0d1abdc7…
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index a5338c3fde7a..c68d36fb5bbd 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
return err;
}
-static int gen6_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
u32 hw_mask;
@@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt,
return gen6_hw_domain_reset(gt, hw_mask);
}
+static int gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(>->uncore->lock, flags);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
+ return ret;
+}
+
static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
{
int vecs_id;
@@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
}
-static int gen11_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen11_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
intel_engine_mask_t tmp;
@@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt,
struct intel_engine_cs *engine;
const bool reset_non_ready = retry >= 1;
intel_engine_mask_t tmp;
+ unsigned long flags;
int ret;
+ spin_lock_irqsave(>->uncore->lock, flags);
+
for_each_engine_masked(engine, gt, engine_mask, tmp) {
ret = gen8_engine_reset_prepare(engine);
if (ret && !reset_non_ready)
@@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt,
* This is best effort, so ignore any error from the initial reset.
*/
if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
- gen11_reset_engines(gt, gt->info.engine_mask, 0);
+ __gen11_reset_engines(gt, gt->info.engine_mask, 0);
if (GRAPHICS_VER(gt->i915) >= 11)
- ret = gen11_reset_engines(gt, engine_mask, retry);
+ ret = __gen11_reset_engines(gt, engine_mask, retry);
else
- ret = gen6_reset_engines(gt, engine_mask, retry);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
skip_reset:
for_each_engine_masked(engine, gt, engine_mask, tmp)
gen8_engine_reset_cancel(engine);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
return ret;
}
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 336561a914fc0c6f1218228718f633b31b7af1c3 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Tue, 12 Jul 2022 16:21:32 +0100
Subject: [PATCH] drm/i915/gt: Serialize GRDOM access between multiple engine
resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Don't allow two engines to be reset in parallel, as they would both
try to select a reset bit (and send requests to common registers)
and wait on that register, at the same time. Serialize control of
the reset requests/acks using the uncore->lock, which will also ensure
that no other GT state changes at the same time as the actual reset.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Reported-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Acked-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda(a)intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/e0a2d894e77aed7c2e36b0d1abdc7…
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index a5338c3fde7a..c68d36fb5bbd 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
return err;
}
-static int gen6_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
u32 hw_mask;
@@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt,
return gen6_hw_domain_reset(gt, hw_mask);
}
+static int gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(>->uncore->lock, flags);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
+ return ret;
+}
+
static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
{
int vecs_id;
@@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
}
-static int gen11_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen11_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
intel_engine_mask_t tmp;
@@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt,
struct intel_engine_cs *engine;
const bool reset_non_ready = retry >= 1;
intel_engine_mask_t tmp;
+ unsigned long flags;
int ret;
+ spin_lock_irqsave(>->uncore->lock, flags);
+
for_each_engine_masked(engine, gt, engine_mask, tmp) {
ret = gen8_engine_reset_prepare(engine);
if (ret && !reset_non_ready)
@@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt,
* This is best effort, so ignore any error from the initial reset.
*/
if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
- gen11_reset_engines(gt, gt->info.engine_mask, 0);
+ __gen11_reset_engines(gt, gt->info.engine_mask, 0);
if (GRAPHICS_VER(gt->i915) >= 11)
- ret = gen11_reset_engines(gt, engine_mask, retry);
+ ret = __gen11_reset_engines(gt, engine_mask, retry);
else
- ret = gen6_reset_engines(gt, engine_mask, retry);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
skip_reset:
for_each_engine_masked(engine, gt, engine_mask, tmp)
gen8_engine_reset_cancel(engine);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
return ret;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 336561a914fc0c6f1218228718f633b31b7af1c3 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Tue, 12 Jul 2022 16:21:32 +0100
Subject: [PATCH] drm/i915/gt: Serialize GRDOM access between multiple engine
resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Don't allow two engines to be reset in parallel, as they would both
try to select a reset bit (and send requests to common registers)
and wait on that register, at the same time. Serialize control of
the reset requests/acks using the uncore->lock, which will also ensure
that no other GT state changes at the same time as the actual reset.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Reported-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Acked-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda(a)intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/e0a2d894e77aed7c2e36b0d1abdc7…
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index a5338c3fde7a..c68d36fb5bbd 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
return err;
}
-static int gen6_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
u32 hw_mask;
@@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt,
return gen6_hw_domain_reset(gt, hw_mask);
}
+static int gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(>->uncore->lock, flags);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
+ return ret;
+}
+
static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
{
int vecs_id;
@@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
}
-static int gen11_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen11_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
intel_engine_mask_t tmp;
@@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt,
struct intel_engine_cs *engine;
const bool reset_non_ready = retry >= 1;
intel_engine_mask_t tmp;
+ unsigned long flags;
int ret;
+ spin_lock_irqsave(>->uncore->lock, flags);
+
for_each_engine_masked(engine, gt, engine_mask, tmp) {
ret = gen8_engine_reset_prepare(engine);
if (ret && !reset_non_ready)
@@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt,
* This is best effort, so ignore any error from the initial reset.
*/
if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
- gen11_reset_engines(gt, gt->info.engine_mask, 0);
+ __gen11_reset_engines(gt, gt->info.engine_mask, 0);
if (GRAPHICS_VER(gt->i915) >= 11)
- ret = gen11_reset_engines(gt, engine_mask, retry);
+ ret = __gen11_reset_engines(gt, engine_mask, retry);
else
- ret = gen6_reset_engines(gt, engine_mask, retry);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
skip_reset:
for_each_engine_masked(engine, gt, engine_mask, tmp)
gen8_engine_reset_cancel(engine);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
return ret;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 336561a914fc0c6f1218228718f633b31b7af1c3 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Tue, 12 Jul 2022 16:21:32 +0100
Subject: [PATCH] drm/i915/gt: Serialize GRDOM access between multiple engine
resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Don't allow two engines to be reset in parallel, as they would both
try to select a reset bit (and send requests to common registers)
and wait on that register, at the same time. Serialize control of
the reset requests/acks using the uncore->lock, which will also ensure
that no other GT state changes at the same time as the actual reset.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Reported-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Acked-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda(a)intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/e0a2d894e77aed7c2e36b0d1abdc7…
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index a5338c3fde7a..c68d36fb5bbd 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
return err;
}
-static int gen6_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
u32 hw_mask;
@@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt,
return gen6_hw_domain_reset(gt, hw_mask);
}
+static int gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(>->uncore->lock, flags);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
+ return ret;
+}
+
static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
{
int vecs_id;
@@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
}
-static int gen11_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen11_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
intel_engine_mask_t tmp;
@@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt,
struct intel_engine_cs *engine;
const bool reset_non_ready = retry >= 1;
intel_engine_mask_t tmp;
+ unsigned long flags;
int ret;
+ spin_lock_irqsave(>->uncore->lock, flags);
+
for_each_engine_masked(engine, gt, engine_mask, tmp) {
ret = gen8_engine_reset_prepare(engine);
if (ret && !reset_non_ready)
@@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt,
* This is best effort, so ignore any error from the initial reset.
*/
if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
- gen11_reset_engines(gt, gt->info.engine_mask, 0);
+ __gen11_reset_engines(gt, gt->info.engine_mask, 0);
if (GRAPHICS_VER(gt->i915) >= 11)
- ret = gen11_reset_engines(gt, engine_mask, retry);
+ ret = __gen11_reset_engines(gt, engine_mask, retry);
else
- ret = gen6_reset_engines(gt, engine_mask, retry);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
skip_reset:
for_each_engine_masked(engine, gt, engine_mask, tmp)
gen8_engine_reset_cancel(engine);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
return ret;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 336561a914fc0c6f1218228718f633b31b7af1c3 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Tue, 12 Jul 2022 16:21:32 +0100
Subject: [PATCH] drm/i915/gt: Serialize GRDOM access between multiple engine
resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Don't allow two engines to be reset in parallel, as they would both
try to select a reset bit (and send requests to common registers)
and wait on that register, at the same time. Serialize control of
the reset requests/acks using the uncore->lock, which will also ensure
that no other GT state changes at the same time as the actual reset.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Reported-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Acked-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda(a)intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/e0a2d894e77aed7c2e36b0d1abdc7…
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index a5338c3fde7a..c68d36fb5bbd 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
return err;
}
-static int gen6_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
u32 hw_mask;
@@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt,
return gen6_hw_domain_reset(gt, hw_mask);
}
+static int gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(>->uncore->lock, flags);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
+ return ret;
+}
+
static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
{
int vecs_id;
@@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
}
-static int gen11_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen11_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
intel_engine_mask_t tmp;
@@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt,
struct intel_engine_cs *engine;
const bool reset_non_ready = retry >= 1;
intel_engine_mask_t tmp;
+ unsigned long flags;
int ret;
+ spin_lock_irqsave(>->uncore->lock, flags);
+
for_each_engine_masked(engine, gt, engine_mask, tmp) {
ret = gen8_engine_reset_prepare(engine);
if (ret && !reset_non_ready)
@@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt,
* This is best effort, so ignore any error from the initial reset.
*/
if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
- gen11_reset_engines(gt, gt->info.engine_mask, 0);
+ __gen11_reset_engines(gt, gt->info.engine_mask, 0);
if (GRAPHICS_VER(gt->i915) >= 11)
- ret = gen11_reset_engines(gt, engine_mask, retry);
+ ret = __gen11_reset_engines(gt, engine_mask, retry);
else
- ret = gen6_reset_engines(gt, engine_mask, retry);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
skip_reset:
for_each_engine_masked(engine, gt, engine_mask, tmp)
gen8_engine_reset_cancel(engine);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
return ret;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 336561a914fc0c6f1218228718f633b31b7af1c3 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Tue, 12 Jul 2022 16:21:32 +0100
Subject: [PATCH] drm/i915/gt: Serialize GRDOM access between multiple engine
resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Don't allow two engines to be reset in parallel, as they would both
try to select a reset bit (and send requests to common registers)
and wait on that register, at the same time. Serialize control of
the reset requests/acks using the uncore->lock, which will also ensure
that no other GT state changes at the same time as the actual reset.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Reported-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Acked-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda(a)intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/e0a2d894e77aed7c2e36b0d1abdc7…
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index a5338c3fde7a..c68d36fb5bbd 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
return err;
}
-static int gen6_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
u32 hw_mask;
@@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt,
return gen6_hw_domain_reset(gt, hw_mask);
}
+static int gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(>->uncore->lock, flags);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
+ return ret;
+}
+
static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
{
int vecs_id;
@@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
}
-static int gen11_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen11_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
intel_engine_mask_t tmp;
@@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt,
struct intel_engine_cs *engine;
const bool reset_non_ready = retry >= 1;
intel_engine_mask_t tmp;
+ unsigned long flags;
int ret;
+ spin_lock_irqsave(>->uncore->lock, flags);
+
for_each_engine_masked(engine, gt, engine_mask, tmp) {
ret = gen8_engine_reset_prepare(engine);
if (ret && !reset_non_ready)
@@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt,
* This is best effort, so ignore any error from the initial reset.
*/
if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
- gen11_reset_engines(gt, gt->info.engine_mask, 0);
+ __gen11_reset_engines(gt, gt->info.engine_mask, 0);
if (GRAPHICS_VER(gt->i915) >= 11)
- ret = gen11_reset_engines(gt, engine_mask, retry);
+ ret = __gen11_reset_engines(gt, engine_mask, retry);
else
- ret = gen6_reset_engines(gt, engine_mask, retry);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
skip_reset:
for_each_engine_masked(engine, gt, engine_mask, tmp)
gen8_engine_reset_cancel(engine);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
return ret;
}
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 336561a914fc0c6f1218228718f633b31b7af1c3 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Tue, 12 Jul 2022 16:21:32 +0100
Subject: [PATCH] drm/i915/gt: Serialize GRDOM access between multiple engine
resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Don't allow two engines to be reset in parallel, as they would both
try to select a reset bit (and send requests to common registers)
and wait on that register, at the same time. Serialize control of
the reset requests/acks using the uncore->lock, which will also ensure
that no other GT state changes at the same time as the actual reset.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Reported-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Acked-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda(a)intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/e0a2d894e77aed7c2e36b0d1abdc7…
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index a5338c3fde7a..c68d36fb5bbd 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
return err;
}
-static int gen6_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
u32 hw_mask;
@@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt,
return gen6_hw_domain_reset(gt, hw_mask);
}
+static int gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(>->uncore->lock, flags);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
+ return ret;
+}
+
static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
{
int vecs_id;
@@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
}
-static int gen11_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen11_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
intel_engine_mask_t tmp;
@@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt,
struct intel_engine_cs *engine;
const bool reset_non_ready = retry >= 1;
intel_engine_mask_t tmp;
+ unsigned long flags;
int ret;
+ spin_lock_irqsave(>->uncore->lock, flags);
+
for_each_engine_masked(engine, gt, engine_mask, tmp) {
ret = gen8_engine_reset_prepare(engine);
if (ret && !reset_non_ready)
@@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt,
* This is best effort, so ignore any error from the initial reset.
*/
if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
- gen11_reset_engines(gt, gt->info.engine_mask, 0);
+ __gen11_reset_engines(gt, gt->info.engine_mask, 0);
if (GRAPHICS_VER(gt->i915) >= 11)
- ret = gen11_reset_engines(gt, engine_mask, retry);
+ ret = __gen11_reset_engines(gt, engine_mask, retry);
else
- ret = gen6_reset_engines(gt, engine_mask, retry);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
skip_reset:
for_each_engine_masked(engine, gt, engine_mask, tmp)
gen8_engine_reset_cancel(engine);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
return ret;
}
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 336561a914fc0c6f1218228718f633b31b7af1c3 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris(a)chris-wilson.co.uk>
Date: Tue, 12 Jul 2022 16:21:32 +0100
Subject: [PATCH] drm/i915/gt: Serialize GRDOM access between multiple engine
resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Don't allow two engines to be reset in parallel, as they would both
try to select a reset bit (and send requests to common registers)
and wait on that register, at the same time. Serialize control of
the reset requests/acks using the uncore->lock, which will also ensure
that no other GT state changes at the same time as the actual reset.
Cc: stable(a)vger.kernel.org # v4.4 and upper
Reported-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Signed-off-by: Chris Wilson <chris(a)chris-wilson.co.uk>
Acked-by: Mika Kuoppala <mika.kuoppala(a)linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti(a)intel.com>
Reviewed-by: Andrzej Hajda <andrzej.hajda(a)intel.com>
Acked-by: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/e0a2d894e77aed7c2e36b0d1abdc7…
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index a5338c3fde7a..c68d36fb5bbd 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
return err;
}
-static int gen6_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
u32 hw_mask;
@@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt,
return gen6_hw_domain_reset(gt, hw_mask);
}
+static int gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(>->uncore->lock, flags);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
+ return ret;
+}
+
static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
{
int vecs_id;
@@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
}
-static int gen11_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen11_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
intel_engine_mask_t tmp;
@@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt,
struct intel_engine_cs *engine;
const bool reset_non_ready = retry >= 1;
intel_engine_mask_t tmp;
+ unsigned long flags;
int ret;
+ spin_lock_irqsave(>->uncore->lock, flags);
+
for_each_engine_masked(engine, gt, engine_mask, tmp) {
ret = gen8_engine_reset_prepare(engine);
if (ret && !reset_non_ready)
@@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt,
* This is best effort, so ignore any error from the initial reset.
*/
if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
- gen11_reset_engines(gt, gt->info.engine_mask, 0);
+ __gen11_reset_engines(gt, gt->info.engine_mask, 0);
if (GRAPHICS_VER(gt->i915) >= 11)
- ret = gen11_reset_engines(gt, engine_mask, retry);
+ ret = __gen11_reset_engines(gt, engine_mask, retry);
else
- ret = gen6_reset_engines(gt, engine_mask, retry);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
skip_reset:
for_each_engine_masked(engine, gt, engine_mask, tmp)
gen8_engine_reset_cancel(engine);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
return ret;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 08c59dde71b73a0ac94e3ed2d431345b01f20485 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula(a)intel.com>
Date: Fri, 20 May 2022 12:46:00 +0300
Subject: [PATCH] drm/i915/dsi: fix VBT send packet port selection for ICL+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The VBT send packet port selection was never updated for ICL+ where the
2nd link is on port B instead of port C as in VLV+ DSI.
First, single link DSI needs to use the configured port instead of
relying on the VBT sequence block port. Remove the hard-coded port C
check here and make it generic. For reference, see commit f915084edc5a
("drm/i915: Changes related to the sequence port no for") for the
original VLV specific fix.
Second, the sequence block port number is either 0 or 1, where 1
indicates the 2nd link. Remove the hard-coded port C here for 2nd
link. (This could be a "find second set bit" on DSI ports, but just
check the two possible options.)
Third, sanity check the result with a warning to avoid a NULL pointer
dereference.
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5984
Cc: stable(a)vger.kernel.org # v4.19+
Cc: Ville Syrjala <ville.syrjala(a)linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220520094600.2066945-1-jani…
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index f370e9c4350d..dd24aef925f2 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -125,9 +125,25 @@ struct i2c_adapter_lookup {
#define ICL_GPIO_DDPA_CTRLCLK_2 8
#define ICL_GPIO_DDPA_CTRLDATA_2 9
-static enum port intel_dsi_seq_port_to_port(u8 port)
+static enum port intel_dsi_seq_port_to_port(struct intel_dsi *intel_dsi,
+ u8 seq_port)
{
- return port ? PORT_C : PORT_A;
+ /*
+ * If single link DSI is being used on any port, the VBT sequence block
+ * send packet apparently always has 0 for the port. Just use the port
+ * we have configured, and ignore the sequence block port.
+ */
+ if (hweight8(intel_dsi->ports) == 1)
+ return ffs(intel_dsi->ports) - 1;
+
+ if (seq_port) {
+ if (intel_dsi->ports & PORT_B)
+ return PORT_B;
+ else if (intel_dsi->ports & PORT_C)
+ return PORT_C;
+ }
+
+ return PORT_A;
}
static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
@@ -149,15 +165,10 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
seq_port = (flags >> MIPI_PORT_SHIFT) & 3;
- /* For DSI single link on Port A & C, the seq_port value which is
- * parsed from Sequence Block#53 of VBT has been set to 0
- * Now, read/write of packets for the DSI single link on Port A and
- * Port C will based on the DVO port from VBT block 2.
- */
- if (intel_dsi->ports == (1 << PORT_C))
- port = PORT_C;
- else
- port = intel_dsi_seq_port_to_port(seq_port);
+ port = intel_dsi_seq_port_to_port(intel_dsi, seq_port);
+
+ if (drm_WARN_ON(&dev_priv->drm, !intel_dsi->dsi_hosts[port]))
+ goto out;
dsi_device = intel_dsi->dsi_hosts[port]->device;
if (!dsi_device) {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 08c59dde71b73a0ac94e3ed2d431345b01f20485 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula(a)intel.com>
Date: Fri, 20 May 2022 12:46:00 +0300
Subject: [PATCH] drm/i915/dsi: fix VBT send packet port selection for ICL+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The VBT send packet port selection was never updated for ICL+ where the
2nd link is on port B instead of port C as in VLV+ DSI.
First, single link DSI needs to use the configured port instead of
relying on the VBT sequence block port. Remove the hard-coded port C
check here and make it generic. For reference, see commit f915084edc5a
("drm/i915: Changes related to the sequence port no for") for the
original VLV specific fix.
Second, the sequence block port number is either 0 or 1, where 1
indicates the 2nd link. Remove the hard-coded port C here for 2nd
link. (This could be a "find second set bit" on DSI ports, but just
check the two possible options.)
Third, sanity check the result with a warning to avoid a NULL pointer
dereference.
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5984
Cc: stable(a)vger.kernel.org # v4.19+
Cc: Ville Syrjala <ville.syrjala(a)linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220520094600.2066945-1-jani…
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index f370e9c4350d..dd24aef925f2 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -125,9 +125,25 @@ struct i2c_adapter_lookup {
#define ICL_GPIO_DDPA_CTRLCLK_2 8
#define ICL_GPIO_DDPA_CTRLDATA_2 9
-static enum port intel_dsi_seq_port_to_port(u8 port)
+static enum port intel_dsi_seq_port_to_port(struct intel_dsi *intel_dsi,
+ u8 seq_port)
{
- return port ? PORT_C : PORT_A;
+ /*
+ * If single link DSI is being used on any port, the VBT sequence block
+ * send packet apparently always has 0 for the port. Just use the port
+ * we have configured, and ignore the sequence block port.
+ */
+ if (hweight8(intel_dsi->ports) == 1)
+ return ffs(intel_dsi->ports) - 1;
+
+ if (seq_port) {
+ if (intel_dsi->ports & PORT_B)
+ return PORT_B;
+ else if (intel_dsi->ports & PORT_C)
+ return PORT_C;
+ }
+
+ return PORT_A;
}
static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
@@ -149,15 +165,10 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
seq_port = (flags >> MIPI_PORT_SHIFT) & 3;
- /* For DSI single link on Port A & C, the seq_port value which is
- * parsed from Sequence Block#53 of VBT has been set to 0
- * Now, read/write of packets for the DSI single link on Port A and
- * Port C will based on the DVO port from VBT block 2.
- */
- if (intel_dsi->ports == (1 << PORT_C))
- port = PORT_C;
- else
- port = intel_dsi_seq_port_to_port(seq_port);
+ port = intel_dsi_seq_port_to_port(intel_dsi, seq_port);
+
+ if (drm_WARN_ON(&dev_priv->drm, !intel_dsi->dsi_hosts[port]))
+ goto out;
dsi_device = intel_dsi->dsi_hosts[port]->device;
if (!dsi_device) {
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 08c59dde71b73a0ac94e3ed2d431345b01f20485 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula(a)intel.com>
Date: Fri, 20 May 2022 12:46:00 +0300
Subject: [PATCH] drm/i915/dsi: fix VBT send packet port selection for ICL+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The VBT send packet port selection was never updated for ICL+ where the
2nd link is on port B instead of port C as in VLV+ DSI.
First, single link DSI needs to use the configured port instead of
relying on the VBT sequence block port. Remove the hard-coded port C
check here and make it generic. For reference, see commit f915084edc5a
("drm/i915: Changes related to the sequence port no for") for the
original VLV specific fix.
Second, the sequence block port number is either 0 or 1, where 1
indicates the 2nd link. Remove the hard-coded port C here for 2nd
link. (This could be a "find second set bit" on DSI ports, but just
check the two possible options.)
Third, sanity check the result with a warning to avoid a NULL pointer
dereference.
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5984
Cc: stable(a)vger.kernel.org # v4.19+
Cc: Ville Syrjala <ville.syrjala(a)linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220520094600.2066945-1-jani…
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index f370e9c4350d..dd24aef925f2 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -125,9 +125,25 @@ struct i2c_adapter_lookup {
#define ICL_GPIO_DDPA_CTRLCLK_2 8
#define ICL_GPIO_DDPA_CTRLDATA_2 9
-static enum port intel_dsi_seq_port_to_port(u8 port)
+static enum port intel_dsi_seq_port_to_port(struct intel_dsi *intel_dsi,
+ u8 seq_port)
{
- return port ? PORT_C : PORT_A;
+ /*
+ * If single link DSI is being used on any port, the VBT sequence block
+ * send packet apparently always has 0 for the port. Just use the port
+ * we have configured, and ignore the sequence block port.
+ */
+ if (hweight8(intel_dsi->ports) == 1)
+ return ffs(intel_dsi->ports) - 1;
+
+ if (seq_port) {
+ if (intel_dsi->ports & PORT_B)
+ return PORT_B;
+ else if (intel_dsi->ports & PORT_C)
+ return PORT_C;
+ }
+
+ return PORT_A;
}
static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
@@ -149,15 +165,10 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
seq_port = (flags >> MIPI_PORT_SHIFT) & 3;
- /* For DSI single link on Port A & C, the seq_port value which is
- * parsed from Sequence Block#53 of VBT has been set to 0
- * Now, read/write of packets for the DSI single link on Port A and
- * Port C will based on the DVO port from VBT block 2.
- */
- if (intel_dsi->ports == (1 << PORT_C))
- port = PORT_C;
- else
- port = intel_dsi_seq_port_to_port(seq_port);
+ port = intel_dsi_seq_port_to_port(intel_dsi, seq_port);
+
+ if (drm_WARN_ON(&dev_priv->drm, !intel_dsi->dsi_hosts[port]))
+ goto out;
dsi_device = intel_dsi->dsi_hosts[port]->device;
if (!dsi_device) {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 08c59dde71b73a0ac94e3ed2d431345b01f20485 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula(a)intel.com>
Date: Fri, 20 May 2022 12:46:00 +0300
Subject: [PATCH] drm/i915/dsi: fix VBT send packet port selection for ICL+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The VBT send packet port selection was never updated for ICL+ where the
2nd link is on port B instead of port C as in VLV+ DSI.
First, single link DSI needs to use the configured port instead of
relying on the VBT sequence block port. Remove the hard-coded port C
check here and make it generic. For reference, see commit f915084edc5a
("drm/i915: Changes related to the sequence port no for") for the
original VLV specific fix.
Second, the sequence block port number is either 0 or 1, where 1
indicates the 2nd link. Remove the hard-coded port C here for 2nd
link. (This could be a "find second set bit" on DSI ports, but just
check the two possible options.)
Third, sanity check the result with a warning to avoid a NULL pointer
dereference.
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5984
Cc: stable(a)vger.kernel.org # v4.19+
Cc: Ville Syrjala <ville.syrjala(a)linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220520094600.2066945-1-jani…
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index f370e9c4350d..dd24aef925f2 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -125,9 +125,25 @@ struct i2c_adapter_lookup {
#define ICL_GPIO_DDPA_CTRLCLK_2 8
#define ICL_GPIO_DDPA_CTRLDATA_2 9
-static enum port intel_dsi_seq_port_to_port(u8 port)
+static enum port intel_dsi_seq_port_to_port(struct intel_dsi *intel_dsi,
+ u8 seq_port)
{
- return port ? PORT_C : PORT_A;
+ /*
+ * If single link DSI is being used on any port, the VBT sequence block
+ * send packet apparently always has 0 for the port. Just use the port
+ * we have configured, and ignore the sequence block port.
+ */
+ if (hweight8(intel_dsi->ports) == 1)
+ return ffs(intel_dsi->ports) - 1;
+
+ if (seq_port) {
+ if (intel_dsi->ports & PORT_B)
+ return PORT_B;
+ else if (intel_dsi->ports & PORT_C)
+ return PORT_C;
+ }
+
+ return PORT_A;
}
static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
@@ -149,15 +165,10 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
seq_port = (flags >> MIPI_PORT_SHIFT) & 3;
- /* For DSI single link on Port A & C, the seq_port value which is
- * parsed from Sequence Block#53 of VBT has been set to 0
- * Now, read/write of packets for the DSI single link on Port A and
- * Port C will based on the DVO port from VBT block 2.
- */
- if (intel_dsi->ports == (1 << PORT_C))
- port = PORT_C;
- else
- port = intel_dsi_seq_port_to_port(seq_port);
+ port = intel_dsi_seq_port_to_port(intel_dsi, seq_port);
+
+ if (drm_WARN_ON(&dev_priv->drm, !intel_dsi->dsi_hosts[port]))
+ goto out;
dsi_device = intel_dsi->dsi_hosts[port]->device;
if (!dsi_device) {
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 08c59dde71b73a0ac94e3ed2d431345b01f20485 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula(a)intel.com>
Date: Fri, 20 May 2022 12:46:00 +0300
Subject: [PATCH] drm/i915/dsi: fix VBT send packet port selection for ICL+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The VBT send packet port selection was never updated for ICL+ where the
2nd link is on port B instead of port C as in VLV+ DSI.
First, single link DSI needs to use the configured port instead of
relying on the VBT sequence block port. Remove the hard-coded port C
check here and make it generic. For reference, see commit f915084edc5a
("drm/i915: Changes related to the sequence port no for") for the
original VLV specific fix.
Second, the sequence block port number is either 0 or 1, where 1
indicates the 2nd link. Remove the hard-coded port C here for 2nd
link. (This could be a "find second set bit" on DSI ports, but just
check the two possible options.)
Third, sanity check the result with a warning to avoid a NULL pointer
dereference.
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5984
Cc: stable(a)vger.kernel.org # v4.19+
Cc: Ville Syrjala <ville.syrjala(a)linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220520094600.2066945-1-jani…
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index f370e9c4350d..dd24aef925f2 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -125,9 +125,25 @@ struct i2c_adapter_lookup {
#define ICL_GPIO_DDPA_CTRLCLK_2 8
#define ICL_GPIO_DDPA_CTRLDATA_2 9
-static enum port intel_dsi_seq_port_to_port(u8 port)
+static enum port intel_dsi_seq_port_to_port(struct intel_dsi *intel_dsi,
+ u8 seq_port)
{
- return port ? PORT_C : PORT_A;
+ /*
+ * If single link DSI is being used on any port, the VBT sequence block
+ * send packet apparently always has 0 for the port. Just use the port
+ * we have configured, and ignore the sequence block port.
+ */
+ if (hweight8(intel_dsi->ports) == 1)
+ return ffs(intel_dsi->ports) - 1;
+
+ if (seq_port) {
+ if (intel_dsi->ports & PORT_B)
+ return PORT_B;
+ else if (intel_dsi->ports & PORT_C)
+ return PORT_C;
+ }
+
+ return PORT_A;
}
static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
@@ -149,15 +165,10 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
seq_port = (flags >> MIPI_PORT_SHIFT) & 3;
- /* For DSI single link on Port A & C, the seq_port value which is
- * parsed from Sequence Block#53 of VBT has been set to 0
- * Now, read/write of packets for the DSI single link on Port A and
- * Port C will based on the DVO port from VBT block 2.
- */
- if (intel_dsi->ports == (1 << PORT_C))
- port = PORT_C;
- else
- port = intel_dsi_seq_port_to_port(seq_port);
+ port = intel_dsi_seq_port_to_port(intel_dsi, seq_port);
+
+ if (drm_WARN_ON(&dev_priv->drm, !intel_dsi->dsi_hosts[port]))
+ goto out;
dsi_device = intel_dsi->dsi_hosts[port]->device;
if (!dsi_device) {
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 08c59dde71b73a0ac94e3ed2d431345b01f20485 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula(a)intel.com>
Date: Fri, 20 May 2022 12:46:00 +0300
Subject: [PATCH] drm/i915/dsi: fix VBT send packet port selection for ICL+
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The VBT send packet port selection was never updated for ICL+ where the
2nd link is on port B instead of port C as in VLV+ DSI.
First, single link DSI needs to use the configured port instead of
relying on the VBT sequence block port. Remove the hard-coded port C
check here and make it generic. For reference, see commit f915084edc5a
("drm/i915: Changes related to the sequence port no for") for the
original VLV specific fix.
Second, the sequence block port number is either 0 or 1, where 1
indicates the 2nd link. Remove the hard-coded port C here for 2nd
link. (This could be a "find second set bit" on DSI ports, but just
check the two possible options.)
Third, sanity check the result with a warning to avoid a NULL pointer
dereference.
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5984
Cc: stable(a)vger.kernel.org # v4.19+
Cc: Ville Syrjala <ville.syrjala(a)linux.intel.com>
Signed-off-by: Jani Nikula <jani.nikula(a)intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220520094600.2066945-1-jani…
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index f370e9c4350d..dd24aef925f2 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -125,9 +125,25 @@ struct i2c_adapter_lookup {
#define ICL_GPIO_DDPA_CTRLCLK_2 8
#define ICL_GPIO_DDPA_CTRLDATA_2 9
-static enum port intel_dsi_seq_port_to_port(u8 port)
+static enum port intel_dsi_seq_port_to_port(struct intel_dsi *intel_dsi,
+ u8 seq_port)
{
- return port ? PORT_C : PORT_A;
+ /*
+ * If single link DSI is being used on any port, the VBT sequence block
+ * send packet apparently always has 0 for the port. Just use the port
+ * we have configured, and ignore the sequence block port.
+ */
+ if (hweight8(intel_dsi->ports) == 1)
+ return ffs(intel_dsi->ports) - 1;
+
+ if (seq_port) {
+ if (intel_dsi->ports & PORT_B)
+ return PORT_B;
+ else if (intel_dsi->ports & PORT_C)
+ return PORT_C;
+ }
+
+ return PORT_A;
}
static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
@@ -149,15 +165,10 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi,
seq_port = (flags >> MIPI_PORT_SHIFT) & 3;
- /* For DSI single link on Port A & C, the seq_port value which is
- * parsed from Sequence Block#53 of VBT has been set to 0
- * Now, read/write of packets for the DSI single link on Port A and
- * Port C will based on the DVO port from VBT block 2.
- */
- if (intel_dsi->ports == (1 << PORT_C))
- port = PORT_C;
- else
- port = intel_dsi_seq_port_to_port(seq_port);
+ port = intel_dsi_seq_port_to_port(intel_dsi, seq_port);
+
+ if (drm_WARN_ON(&dev_priv->drm, !intel_dsi->dsi_hosts[port]))
+ goto out;
dsi_device = intel_dsi->dsi_hosts[port]->device;
if (!dsi_device) {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c08b4848f596fd95543197463b5162bd7bab2442 Mon Sep 17 00:00:00 2001
From: Chen Lifu <chenlifu(a)huawei.com>
Date: Wed, 15 Jun 2022 09:47:14 +0800
Subject: [PATCH] riscv: lib: uaccess: fix CSR_STATUS SR_SUM bit
Since commit 5d8544e2d007 ("RISC-V: Generic library routines and assembly")
and commit ebcbd75e3962 ("riscv: Fix the bug in memory access fixup code"),
if __clear_user and __copy_user return from an fixup branch,
CSR_STATUS SR_SUM bit will be set, it is a vulnerability, so that
S-mode memory accesses to pages that are accessible by U-mode will success.
Disable S-mode access to U-mode memory should clear SR_SUM bit.
Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly")
Fixes: ebcbd75e3962 ("riscv: Fix the bug in memory access fixup code")
Signed-off-by: Chen Lifu <chenlifu(a)huawei.com>
Reviewed-by: Ben Dooks <ben.dooks(a)codethink.co.uk>
Link: https://lore.kernel.org/r/20220615014714.1650349-1-chenlifu@huawei.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 8c475f4da308..ec486e5369d9 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -175,7 +175,7 @@ ENTRY(__asm_copy_from_user)
/* Exception fixup code */
10:
/* Disable access to user memory */
- csrs CSR_STATUS, t6
+ csrc CSR_STATUS, t6
mv a0, t5
ret
ENDPROC(__asm_copy_to_user)
@@ -227,7 +227,7 @@ ENTRY(__clear_user)
/* Exception fixup code */
11:
/* Disable access to user memory */
- csrs CSR_STATUS, t6
+ csrc CSR_STATUS, t6
mv a0, a1
ret
ENDPROC(__clear_user)
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c08b4848f596fd95543197463b5162bd7bab2442 Mon Sep 17 00:00:00 2001
From: Chen Lifu <chenlifu(a)huawei.com>
Date: Wed, 15 Jun 2022 09:47:14 +0800
Subject: [PATCH] riscv: lib: uaccess: fix CSR_STATUS SR_SUM bit
Since commit 5d8544e2d007 ("RISC-V: Generic library routines and assembly")
and commit ebcbd75e3962 ("riscv: Fix the bug in memory access fixup code"),
if __clear_user and __copy_user return from an fixup branch,
CSR_STATUS SR_SUM bit will be set, it is a vulnerability, so that
S-mode memory accesses to pages that are accessible by U-mode will success.
Disable S-mode access to U-mode memory should clear SR_SUM bit.
Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly")
Fixes: ebcbd75e3962 ("riscv: Fix the bug in memory access fixup code")
Signed-off-by: Chen Lifu <chenlifu(a)huawei.com>
Reviewed-by: Ben Dooks <ben.dooks(a)codethink.co.uk>
Link: https://lore.kernel.org/r/20220615014714.1650349-1-chenlifu@huawei.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 8c475f4da308..ec486e5369d9 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -175,7 +175,7 @@ ENTRY(__asm_copy_from_user)
/* Exception fixup code */
10:
/* Disable access to user memory */
- csrs CSR_STATUS, t6
+ csrc CSR_STATUS, t6
mv a0, t5
ret
ENDPROC(__asm_copy_to_user)
@@ -227,7 +227,7 @@ ENTRY(__clear_user)
/* Exception fixup code */
11:
/* Disable access to user memory */
- csrs CSR_STATUS, t6
+ csrc CSR_STATUS, t6
mv a0, a1
ret
ENDPROC(__clear_user)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c08b4848f596fd95543197463b5162bd7bab2442 Mon Sep 17 00:00:00 2001
From: Chen Lifu <chenlifu(a)huawei.com>
Date: Wed, 15 Jun 2022 09:47:14 +0800
Subject: [PATCH] riscv: lib: uaccess: fix CSR_STATUS SR_SUM bit
Since commit 5d8544e2d007 ("RISC-V: Generic library routines and assembly")
and commit ebcbd75e3962 ("riscv: Fix the bug in memory access fixup code"),
if __clear_user and __copy_user return from an fixup branch,
CSR_STATUS SR_SUM bit will be set, it is a vulnerability, so that
S-mode memory accesses to pages that are accessible by U-mode will success.
Disable S-mode access to U-mode memory should clear SR_SUM bit.
Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly")
Fixes: ebcbd75e3962 ("riscv: Fix the bug in memory access fixup code")
Signed-off-by: Chen Lifu <chenlifu(a)huawei.com>
Reviewed-by: Ben Dooks <ben.dooks(a)codethink.co.uk>
Link: https://lore.kernel.org/r/20220615014714.1650349-1-chenlifu@huawei.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 8c475f4da308..ec486e5369d9 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -175,7 +175,7 @@ ENTRY(__asm_copy_from_user)
/* Exception fixup code */
10:
/* Disable access to user memory */
- csrs CSR_STATUS, t6
+ csrc CSR_STATUS, t6
mv a0, t5
ret
ENDPROC(__asm_copy_to_user)
@@ -227,7 +227,7 @@ ENTRY(__clear_user)
/* Exception fixup code */
11:
/* Disable access to user memory */
- csrs CSR_STATUS, t6
+ csrc CSR_STATUS, t6
mv a0, a1
ret
ENDPROC(__clear_user)
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c08b4848f596fd95543197463b5162bd7bab2442 Mon Sep 17 00:00:00 2001
From: Chen Lifu <chenlifu(a)huawei.com>
Date: Wed, 15 Jun 2022 09:47:14 +0800
Subject: [PATCH] riscv: lib: uaccess: fix CSR_STATUS SR_SUM bit
Since commit 5d8544e2d007 ("RISC-V: Generic library routines and assembly")
and commit ebcbd75e3962 ("riscv: Fix the bug in memory access fixup code"),
if __clear_user and __copy_user return from an fixup branch,
CSR_STATUS SR_SUM bit will be set, it is a vulnerability, so that
S-mode memory accesses to pages that are accessible by U-mode will success.
Disable S-mode access to U-mode memory should clear SR_SUM bit.
Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly")
Fixes: ebcbd75e3962 ("riscv: Fix the bug in memory access fixup code")
Signed-off-by: Chen Lifu <chenlifu(a)huawei.com>
Reviewed-by: Ben Dooks <ben.dooks(a)codethink.co.uk>
Link: https://lore.kernel.org/r/20220615014714.1650349-1-chenlifu@huawei.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index 8c475f4da308..ec486e5369d9 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -175,7 +175,7 @@ ENTRY(__asm_copy_from_user)
/* Exception fixup code */
10:
/* Disable access to user memory */
- csrs CSR_STATUS, t6
+ csrc CSR_STATUS, t6
mv a0, t5
ret
ENDPROC(__asm_copy_to_user)
@@ -227,7 +227,7 @@ ENTRY(__clear_user)
/* Exception fixup code */
11:
/* Disable access to user memory */
- csrs CSR_STATUS, t6
+ csrc CSR_STATUS, t6
mv a0, a1
ret
ENDPROC(__clear_user)
--
Hello Dear
Am a dying woman here in the hospital, i was diagnose as a
Coronavirus patient over 2 months ago. I am A business woman who is
dealing with Gold Exportation, I Am 59 year old from USA California i
have a charitable and unfufilling project that am about to handover
to you, if you are interested to know more about this project please reply me.
Hope to hear from you
Best Regard
Mrs. Christopher
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c3497fd009ef2c59eea60d21c3ac22de3585ed7d Mon Sep 17 00:00:00 2001
From: Al Viro <viro(a)zeniv.linux.org.uk>
Date: Sun, 12 Jun 2022 19:50:29 -0400
Subject: [PATCH] fix short copy handling in copy_mc_pipe_to_iter()
Unlike other copying operations on ITER_PIPE, copy_mc_to_iter() can
result in a short copy. In that case we need to trim the unused
buffers, as well as the length of partially filled one - it's not
enough to set ->head, ->iov_offset and ->count to reflect how
much had we copied. Not hard to fix, fortunately...
I'd put a helper (pipe_discard_from(pipe, head)) into pipe_fs_i.h,
rather than iov_iter.c - it has nothing to do with iov_iter and
having it will allow us to avoid an ugly kludge in fs/splice.c.
We could put it into lib/iov_iter.c for now and move it later,
but I don't see the point going that way...
Cc: stable(a)kernel.org # 4.19+
Fixes: ca146f6f091e "lib/iov_iter: Fix pipe handling in _copy_to_iter_mcsafe()"
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Reviewed-by: Christian Brauner (Microsoft) <brauner(a)kernel.org>
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index cb0fd633a610..4ea496924106 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -229,6 +229,15 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe,
return buf->ops->try_steal(pipe, buf);
}
+static inline void pipe_discard_from(struct pipe_inode_info *pipe,
+ unsigned int old_head)
+{
+ unsigned int mask = pipe->ring_size - 1;
+
+ while (pipe->head > old_head)
+ pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]);
+}
+
/* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
#define PIPE_SIZE PAGE_SIZE
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 0b64695ab632..2bf20b48a04a 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -689,6 +689,7 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
struct pipe_inode_info *pipe = i->pipe;
unsigned int p_mask = pipe->ring_size - 1;
unsigned int i_head;
+ unsigned int valid = pipe->head;
size_t n, off, xfer = 0;
if (!sanity(i))
@@ -702,11 +703,17 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
rem = copy_mc_to_kernel(p + off, addr + xfer, chunk);
chunk -= rem;
kunmap_local(p);
- i->head = i_head;
- i->iov_offset = off + chunk;
- xfer += chunk;
- if (rem)
+ if (chunk) {
+ i->head = i_head;
+ i->iov_offset = off + chunk;
+ xfer += chunk;
+ valid = i_head + 1;
+ }
+ if (rem) {
+ pipe->bufs[i_head & p_mask].len -= rem;
+ pipe_discard_from(pipe, valid);
break;
+ }
n -= chunk;
off = 0;
i_head++;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c3497fd009ef2c59eea60d21c3ac22de3585ed7d Mon Sep 17 00:00:00 2001
From: Al Viro <viro(a)zeniv.linux.org.uk>
Date: Sun, 12 Jun 2022 19:50:29 -0400
Subject: [PATCH] fix short copy handling in copy_mc_pipe_to_iter()
Unlike other copying operations on ITER_PIPE, copy_mc_to_iter() can
result in a short copy. In that case we need to trim the unused
buffers, as well as the length of partially filled one - it's not
enough to set ->head, ->iov_offset and ->count to reflect how
much had we copied. Not hard to fix, fortunately...
I'd put a helper (pipe_discard_from(pipe, head)) into pipe_fs_i.h,
rather than iov_iter.c - it has nothing to do with iov_iter and
having it will allow us to avoid an ugly kludge in fs/splice.c.
We could put it into lib/iov_iter.c for now and move it later,
but I don't see the point going that way...
Cc: stable(a)kernel.org # 4.19+
Fixes: ca146f6f091e "lib/iov_iter: Fix pipe handling in _copy_to_iter_mcsafe()"
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Reviewed-by: Christian Brauner (Microsoft) <brauner(a)kernel.org>
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index cb0fd633a610..4ea496924106 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -229,6 +229,15 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe,
return buf->ops->try_steal(pipe, buf);
}
+static inline void pipe_discard_from(struct pipe_inode_info *pipe,
+ unsigned int old_head)
+{
+ unsigned int mask = pipe->ring_size - 1;
+
+ while (pipe->head > old_head)
+ pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]);
+}
+
/* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
#define PIPE_SIZE PAGE_SIZE
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 0b64695ab632..2bf20b48a04a 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -689,6 +689,7 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
struct pipe_inode_info *pipe = i->pipe;
unsigned int p_mask = pipe->ring_size - 1;
unsigned int i_head;
+ unsigned int valid = pipe->head;
size_t n, off, xfer = 0;
if (!sanity(i))
@@ -702,11 +703,17 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
rem = copy_mc_to_kernel(p + off, addr + xfer, chunk);
chunk -= rem;
kunmap_local(p);
- i->head = i_head;
- i->iov_offset = off + chunk;
- xfer += chunk;
- if (rem)
+ if (chunk) {
+ i->head = i_head;
+ i->iov_offset = off + chunk;
+ xfer += chunk;
+ valid = i_head + 1;
+ }
+ if (rem) {
+ pipe->bufs[i_head & p_mask].len -= rem;
+ pipe_discard_from(pipe, valid);
break;
+ }
n -= chunk;
off = 0;
i_head++;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c3497fd009ef2c59eea60d21c3ac22de3585ed7d Mon Sep 17 00:00:00 2001
From: Al Viro <viro(a)zeniv.linux.org.uk>
Date: Sun, 12 Jun 2022 19:50:29 -0400
Subject: [PATCH] fix short copy handling in copy_mc_pipe_to_iter()
Unlike other copying operations on ITER_PIPE, copy_mc_to_iter() can
result in a short copy. In that case we need to trim the unused
buffers, as well as the length of partially filled one - it's not
enough to set ->head, ->iov_offset and ->count to reflect how
much had we copied. Not hard to fix, fortunately...
I'd put a helper (pipe_discard_from(pipe, head)) into pipe_fs_i.h,
rather than iov_iter.c - it has nothing to do with iov_iter and
having it will allow us to avoid an ugly kludge in fs/splice.c.
We could put it into lib/iov_iter.c for now and move it later,
but I don't see the point going that way...
Cc: stable(a)kernel.org # 4.19+
Fixes: ca146f6f091e "lib/iov_iter: Fix pipe handling in _copy_to_iter_mcsafe()"
Reviewed-by: Jeff Layton <jlayton(a)kernel.org>
Reviewed-by: Christian Brauner (Microsoft) <brauner(a)kernel.org>
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index cb0fd633a610..4ea496924106 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -229,6 +229,15 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe,
return buf->ops->try_steal(pipe, buf);
}
+static inline void pipe_discard_from(struct pipe_inode_info *pipe,
+ unsigned int old_head)
+{
+ unsigned int mask = pipe->ring_size - 1;
+
+ while (pipe->head > old_head)
+ pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]);
+}
+
/* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
#define PIPE_SIZE PAGE_SIZE
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 0b64695ab632..2bf20b48a04a 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -689,6 +689,7 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
struct pipe_inode_info *pipe = i->pipe;
unsigned int p_mask = pipe->ring_size - 1;
unsigned int i_head;
+ unsigned int valid = pipe->head;
size_t n, off, xfer = 0;
if (!sanity(i))
@@ -702,11 +703,17 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
rem = copy_mc_to_kernel(p + off, addr + xfer, chunk);
chunk -= rem;
kunmap_local(p);
- i->head = i_head;
- i->iov_offset = off + chunk;
- xfer += chunk;
- if (rem)
+ if (chunk) {
+ i->head = i_head;
+ i->iov_offset = off + chunk;
+ xfer += chunk;
+ valid = i_head + 1;
+ }
+ if (rem) {
+ pipe->bufs[i_head & p_mask].len -= rem;
+ pipe_discard_from(pipe, valid);
break;
+ }
n -= chunk;
off = 0;
i_head++;
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7b960c967f2aa01ab8f45c5a0bd78e754cffdeee Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Fri, 1 Jul 2022 22:47:51 +0200
Subject: [PATCH] usbnet: smsc95xx: Fix deadlock on runtime resume
Commit 05b35e7eb9a1 ("smsc95xx: add phylib support") amended
smsc95xx_resume() to call phy_init_hw(). That function waits for the
device to runtime resume even though it is placed in the runtime resume
path, causing a deadlock.
The problem is that phy_init_hw() calls down to smsc95xx_mdiobus_read(),
which never uses the _nopm variant of usbnet_read_cmd().
Commit b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with
reset operation") causes a similar deadlock on resume if the device was
already runtime suspended when entering system sleep:
That's because the commit introduced smsc95xx_reset_resume(), which
calls down to smsc95xx_reset(), which neglects to use _nopm accessors.
Fix by auto-detecting whether a device access is performed by the
suspend/resume task_struct and use the _nopm variant if so. This works
because the PM core guarantees that suspend/resume callbacks are run in
task context.
Stacktrace for posterity:
INFO: task kworker/2:1:49 blocked for more than 122 seconds.
Workqueue: usb_hub_wq hub_event
schedule
rpm_resume
__pm_runtime_resume
usb_autopm_get_interface
usbnet_read_cmd
__smsc95xx_read_reg
__smsc95xx_phy_wait_not_busy
__smsc95xx_mdio_read
smsc95xx_mdiobus_read
__mdiobus_read
mdiobus_read
smsc_phy_reset
phy_init_hw
smsc95xx_resume
usb_resume_interface
usb_resume_both
usb_runtime_resume
__rpm_callback
rpm_callback
rpm_resume
__pm_runtime_resume
usb_autoresume_device
hub_event
process_one_work
Fixes: b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with reset operation")
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Cc: stable(a)vger.kernel.org # v3.16+
Cc: Andre Edich <andre.edich(a)microchip.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 35110814ba22..0316c80c3fc4 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -71,6 +71,7 @@ struct smsc95xx_priv {
struct fwnode_handle *irqfwnode;
struct mii_bus *mdiobus;
struct phy_device *phydev;
+ struct task_struct *pm_task;
};
static bool turbo_mode = true;
@@ -80,13 +81,14 @@ MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction");
static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
u32 *data, int in_pm)
{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 buf;
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16);
BUG_ON(!dev);
- if (!in_pm)
+ if (current != pdata->pm_task)
fn = usbnet_read_cmd;
else
fn = usbnet_read_cmd_nopm;
@@ -110,13 +112,14 @@ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index,
u32 data, int in_pm)
{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 buf;
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16);
BUG_ON(!dev);
- if (!in_pm)
+ if (current != pdata->pm_task)
fn = usbnet_write_cmd;
else
fn = usbnet_write_cmd_nopm;
@@ -1490,9 +1493,12 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
u32 val, link_up;
int ret;
+ pdata->pm_task = current;
+
ret = usbnet_suspend(intf, message);
if (ret < 0) {
netdev_warn(dev->net, "usbnet_suspend error\n");
+ pdata->pm_task = NULL;
return ret;
}
@@ -1732,6 +1738,7 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
if (ret && PMSG_IS_AUTO(message))
usbnet_resume(intf);
+ pdata->pm_task = NULL;
return ret;
}
@@ -1752,29 +1759,31 @@ static int smsc95xx_resume(struct usb_interface *intf)
/* do this first to ensure it's cleared even in error case */
pdata->suspend_flags = 0;
+ pdata->pm_task = current;
+
if (suspend_flags & SUSPEND_ALLMODES) {
/* clear wake-up sources */
ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val);
if (ret < 0)
- return ret;
+ goto done;
val &= ~(WUCSR_WAKE_EN_ | WUCSR_MPEN_);
ret = smsc95xx_write_reg_nopm(dev, WUCSR, val);
if (ret < 0)
- return ret;
+ goto done;
/* clear wake-up status */
ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val);
if (ret < 0)
- return ret;
+ goto done;
val &= ~PM_CTL_WOL_EN_;
val |= PM_CTL_WUPS_;
ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val);
if (ret < 0)
- return ret;
+ goto done;
}
phy_init_hw(pdata->phydev);
@@ -1783,15 +1792,20 @@ static int smsc95xx_resume(struct usb_interface *intf)
if (ret < 0)
netdev_warn(dev->net, "usbnet_resume error\n");
+done:
+ pdata->pm_task = NULL;
return ret;
}
static int smsc95xx_reset_resume(struct usb_interface *intf)
{
struct usbnet *dev = usb_get_intfdata(intf);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
int ret;
+ pdata->pm_task = current;
ret = smsc95xx_reset(dev);
+ pdata->pm_task = NULL;
if (ret < 0)
return ret;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7b960c967f2aa01ab8f45c5a0bd78e754cffdeee Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Fri, 1 Jul 2022 22:47:51 +0200
Subject: [PATCH] usbnet: smsc95xx: Fix deadlock on runtime resume
Commit 05b35e7eb9a1 ("smsc95xx: add phylib support") amended
smsc95xx_resume() to call phy_init_hw(). That function waits for the
device to runtime resume even though it is placed in the runtime resume
path, causing a deadlock.
The problem is that phy_init_hw() calls down to smsc95xx_mdiobus_read(),
which never uses the _nopm variant of usbnet_read_cmd().
Commit b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with
reset operation") causes a similar deadlock on resume if the device was
already runtime suspended when entering system sleep:
That's because the commit introduced smsc95xx_reset_resume(), which
calls down to smsc95xx_reset(), which neglects to use _nopm accessors.
Fix by auto-detecting whether a device access is performed by the
suspend/resume task_struct and use the _nopm variant if so. This works
because the PM core guarantees that suspend/resume callbacks are run in
task context.
Stacktrace for posterity:
INFO: task kworker/2:1:49 blocked for more than 122 seconds.
Workqueue: usb_hub_wq hub_event
schedule
rpm_resume
__pm_runtime_resume
usb_autopm_get_interface
usbnet_read_cmd
__smsc95xx_read_reg
__smsc95xx_phy_wait_not_busy
__smsc95xx_mdio_read
smsc95xx_mdiobus_read
__mdiobus_read
mdiobus_read
smsc_phy_reset
phy_init_hw
smsc95xx_resume
usb_resume_interface
usb_resume_both
usb_runtime_resume
__rpm_callback
rpm_callback
rpm_resume
__pm_runtime_resume
usb_autoresume_device
hub_event
process_one_work
Fixes: b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with reset operation")
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Cc: stable(a)vger.kernel.org # v3.16+
Cc: Andre Edich <andre.edich(a)microchip.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 35110814ba22..0316c80c3fc4 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -71,6 +71,7 @@ struct smsc95xx_priv {
struct fwnode_handle *irqfwnode;
struct mii_bus *mdiobus;
struct phy_device *phydev;
+ struct task_struct *pm_task;
};
static bool turbo_mode = true;
@@ -80,13 +81,14 @@ MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction");
static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
u32 *data, int in_pm)
{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 buf;
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16);
BUG_ON(!dev);
- if (!in_pm)
+ if (current != pdata->pm_task)
fn = usbnet_read_cmd;
else
fn = usbnet_read_cmd_nopm;
@@ -110,13 +112,14 @@ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index,
u32 data, int in_pm)
{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 buf;
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16);
BUG_ON(!dev);
- if (!in_pm)
+ if (current != pdata->pm_task)
fn = usbnet_write_cmd;
else
fn = usbnet_write_cmd_nopm;
@@ -1490,9 +1493,12 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
u32 val, link_up;
int ret;
+ pdata->pm_task = current;
+
ret = usbnet_suspend(intf, message);
if (ret < 0) {
netdev_warn(dev->net, "usbnet_suspend error\n");
+ pdata->pm_task = NULL;
return ret;
}
@@ -1732,6 +1738,7 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
if (ret && PMSG_IS_AUTO(message))
usbnet_resume(intf);
+ pdata->pm_task = NULL;
return ret;
}
@@ -1752,29 +1759,31 @@ static int smsc95xx_resume(struct usb_interface *intf)
/* do this first to ensure it's cleared even in error case */
pdata->suspend_flags = 0;
+ pdata->pm_task = current;
+
if (suspend_flags & SUSPEND_ALLMODES) {
/* clear wake-up sources */
ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val);
if (ret < 0)
- return ret;
+ goto done;
val &= ~(WUCSR_WAKE_EN_ | WUCSR_MPEN_);
ret = smsc95xx_write_reg_nopm(dev, WUCSR, val);
if (ret < 0)
- return ret;
+ goto done;
/* clear wake-up status */
ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val);
if (ret < 0)
- return ret;
+ goto done;
val &= ~PM_CTL_WOL_EN_;
val |= PM_CTL_WUPS_;
ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val);
if (ret < 0)
- return ret;
+ goto done;
}
phy_init_hw(pdata->phydev);
@@ -1783,15 +1792,20 @@ static int smsc95xx_resume(struct usb_interface *intf)
if (ret < 0)
netdev_warn(dev->net, "usbnet_resume error\n");
+done:
+ pdata->pm_task = NULL;
return ret;
}
static int smsc95xx_reset_resume(struct usb_interface *intf)
{
struct usbnet *dev = usb_get_intfdata(intf);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
int ret;
+ pdata->pm_task = current;
ret = smsc95xx_reset(dev);
+ pdata->pm_task = NULL;
if (ret < 0)
return ret;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7b960c967f2aa01ab8f45c5a0bd78e754cffdeee Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Fri, 1 Jul 2022 22:47:51 +0200
Subject: [PATCH] usbnet: smsc95xx: Fix deadlock on runtime resume
Commit 05b35e7eb9a1 ("smsc95xx: add phylib support") amended
smsc95xx_resume() to call phy_init_hw(). That function waits for the
device to runtime resume even though it is placed in the runtime resume
path, causing a deadlock.
The problem is that phy_init_hw() calls down to smsc95xx_mdiobus_read(),
which never uses the _nopm variant of usbnet_read_cmd().
Commit b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with
reset operation") causes a similar deadlock on resume if the device was
already runtime suspended when entering system sleep:
That's because the commit introduced smsc95xx_reset_resume(), which
calls down to smsc95xx_reset(), which neglects to use _nopm accessors.
Fix by auto-detecting whether a device access is performed by the
suspend/resume task_struct and use the _nopm variant if so. This works
because the PM core guarantees that suspend/resume callbacks are run in
task context.
Stacktrace for posterity:
INFO: task kworker/2:1:49 blocked for more than 122 seconds.
Workqueue: usb_hub_wq hub_event
schedule
rpm_resume
__pm_runtime_resume
usb_autopm_get_interface
usbnet_read_cmd
__smsc95xx_read_reg
__smsc95xx_phy_wait_not_busy
__smsc95xx_mdio_read
smsc95xx_mdiobus_read
__mdiobus_read
mdiobus_read
smsc_phy_reset
phy_init_hw
smsc95xx_resume
usb_resume_interface
usb_resume_both
usb_runtime_resume
__rpm_callback
rpm_callback
rpm_resume
__pm_runtime_resume
usb_autoresume_device
hub_event
process_one_work
Fixes: b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with reset operation")
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Cc: stable(a)vger.kernel.org # v3.16+
Cc: Andre Edich <andre.edich(a)microchip.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 35110814ba22..0316c80c3fc4 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -71,6 +71,7 @@ struct smsc95xx_priv {
struct fwnode_handle *irqfwnode;
struct mii_bus *mdiobus;
struct phy_device *phydev;
+ struct task_struct *pm_task;
};
static bool turbo_mode = true;
@@ -80,13 +81,14 @@ MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction");
static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
u32 *data, int in_pm)
{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 buf;
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16);
BUG_ON(!dev);
- if (!in_pm)
+ if (current != pdata->pm_task)
fn = usbnet_read_cmd;
else
fn = usbnet_read_cmd_nopm;
@@ -110,13 +112,14 @@ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index,
u32 data, int in_pm)
{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 buf;
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16);
BUG_ON(!dev);
- if (!in_pm)
+ if (current != pdata->pm_task)
fn = usbnet_write_cmd;
else
fn = usbnet_write_cmd_nopm;
@@ -1490,9 +1493,12 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
u32 val, link_up;
int ret;
+ pdata->pm_task = current;
+
ret = usbnet_suspend(intf, message);
if (ret < 0) {
netdev_warn(dev->net, "usbnet_suspend error\n");
+ pdata->pm_task = NULL;
return ret;
}
@@ -1732,6 +1738,7 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
if (ret && PMSG_IS_AUTO(message))
usbnet_resume(intf);
+ pdata->pm_task = NULL;
return ret;
}
@@ -1752,29 +1759,31 @@ static int smsc95xx_resume(struct usb_interface *intf)
/* do this first to ensure it's cleared even in error case */
pdata->suspend_flags = 0;
+ pdata->pm_task = current;
+
if (suspend_flags & SUSPEND_ALLMODES) {
/* clear wake-up sources */
ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val);
if (ret < 0)
- return ret;
+ goto done;
val &= ~(WUCSR_WAKE_EN_ | WUCSR_MPEN_);
ret = smsc95xx_write_reg_nopm(dev, WUCSR, val);
if (ret < 0)
- return ret;
+ goto done;
/* clear wake-up status */
ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val);
if (ret < 0)
- return ret;
+ goto done;
val &= ~PM_CTL_WOL_EN_;
val |= PM_CTL_WUPS_;
ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val);
if (ret < 0)
- return ret;
+ goto done;
}
phy_init_hw(pdata->phydev);
@@ -1783,15 +1792,20 @@ static int smsc95xx_resume(struct usb_interface *intf)
if (ret < 0)
netdev_warn(dev->net, "usbnet_resume error\n");
+done:
+ pdata->pm_task = NULL;
return ret;
}
static int smsc95xx_reset_resume(struct usb_interface *intf)
{
struct usbnet *dev = usb_get_intfdata(intf);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
int ret;
+ pdata->pm_task = current;
ret = smsc95xx_reset(dev);
+ pdata->pm_task = NULL;
if (ret < 0)
return ret;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7b960c967f2aa01ab8f45c5a0bd78e754cffdeee Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Fri, 1 Jul 2022 22:47:51 +0200
Subject: [PATCH] usbnet: smsc95xx: Fix deadlock on runtime resume
Commit 05b35e7eb9a1 ("smsc95xx: add phylib support") amended
smsc95xx_resume() to call phy_init_hw(). That function waits for the
device to runtime resume even though it is placed in the runtime resume
path, causing a deadlock.
The problem is that phy_init_hw() calls down to smsc95xx_mdiobus_read(),
which never uses the _nopm variant of usbnet_read_cmd().
Commit b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with
reset operation") causes a similar deadlock on resume if the device was
already runtime suspended when entering system sleep:
That's because the commit introduced smsc95xx_reset_resume(), which
calls down to smsc95xx_reset(), which neglects to use _nopm accessors.
Fix by auto-detecting whether a device access is performed by the
suspend/resume task_struct and use the _nopm variant if so. This works
because the PM core guarantees that suspend/resume callbacks are run in
task context.
Stacktrace for posterity:
INFO: task kworker/2:1:49 blocked for more than 122 seconds.
Workqueue: usb_hub_wq hub_event
schedule
rpm_resume
__pm_runtime_resume
usb_autopm_get_interface
usbnet_read_cmd
__smsc95xx_read_reg
__smsc95xx_phy_wait_not_busy
__smsc95xx_mdio_read
smsc95xx_mdiobus_read
__mdiobus_read
mdiobus_read
smsc_phy_reset
phy_init_hw
smsc95xx_resume
usb_resume_interface
usb_resume_both
usb_runtime_resume
__rpm_callback
rpm_callback
rpm_resume
__pm_runtime_resume
usb_autoresume_device
hub_event
process_one_work
Fixes: b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with reset operation")
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Cc: stable(a)vger.kernel.org # v3.16+
Cc: Andre Edich <andre.edich(a)microchip.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 35110814ba22..0316c80c3fc4 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -71,6 +71,7 @@ struct smsc95xx_priv {
struct fwnode_handle *irqfwnode;
struct mii_bus *mdiobus;
struct phy_device *phydev;
+ struct task_struct *pm_task;
};
static bool turbo_mode = true;
@@ -80,13 +81,14 @@ MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction");
static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
u32 *data, int in_pm)
{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 buf;
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16);
BUG_ON(!dev);
- if (!in_pm)
+ if (current != pdata->pm_task)
fn = usbnet_read_cmd;
else
fn = usbnet_read_cmd_nopm;
@@ -110,13 +112,14 @@ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index,
u32 data, int in_pm)
{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 buf;
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16);
BUG_ON(!dev);
- if (!in_pm)
+ if (current != pdata->pm_task)
fn = usbnet_write_cmd;
else
fn = usbnet_write_cmd_nopm;
@@ -1490,9 +1493,12 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
u32 val, link_up;
int ret;
+ pdata->pm_task = current;
+
ret = usbnet_suspend(intf, message);
if (ret < 0) {
netdev_warn(dev->net, "usbnet_suspend error\n");
+ pdata->pm_task = NULL;
return ret;
}
@@ -1732,6 +1738,7 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
if (ret && PMSG_IS_AUTO(message))
usbnet_resume(intf);
+ pdata->pm_task = NULL;
return ret;
}
@@ -1752,29 +1759,31 @@ static int smsc95xx_resume(struct usb_interface *intf)
/* do this first to ensure it's cleared even in error case */
pdata->suspend_flags = 0;
+ pdata->pm_task = current;
+
if (suspend_flags & SUSPEND_ALLMODES) {
/* clear wake-up sources */
ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val);
if (ret < 0)
- return ret;
+ goto done;
val &= ~(WUCSR_WAKE_EN_ | WUCSR_MPEN_);
ret = smsc95xx_write_reg_nopm(dev, WUCSR, val);
if (ret < 0)
- return ret;
+ goto done;
/* clear wake-up status */
ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val);
if (ret < 0)
- return ret;
+ goto done;
val &= ~PM_CTL_WOL_EN_;
val |= PM_CTL_WUPS_;
ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val);
if (ret < 0)
- return ret;
+ goto done;
}
phy_init_hw(pdata->phydev);
@@ -1783,15 +1792,20 @@ static int smsc95xx_resume(struct usb_interface *intf)
if (ret < 0)
netdev_warn(dev->net, "usbnet_resume error\n");
+done:
+ pdata->pm_task = NULL;
return ret;
}
static int smsc95xx_reset_resume(struct usb_interface *intf)
{
struct usbnet *dev = usb_get_intfdata(intf);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
int ret;
+ pdata->pm_task = current;
ret = smsc95xx_reset(dev);
+ pdata->pm_task = NULL;
if (ret < 0)
return ret;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7b960c967f2aa01ab8f45c5a0bd78e754cffdeee Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Fri, 1 Jul 2022 22:47:51 +0200
Subject: [PATCH] usbnet: smsc95xx: Fix deadlock on runtime resume
Commit 05b35e7eb9a1 ("smsc95xx: add phylib support") amended
smsc95xx_resume() to call phy_init_hw(). That function waits for the
device to runtime resume even though it is placed in the runtime resume
path, causing a deadlock.
The problem is that phy_init_hw() calls down to smsc95xx_mdiobus_read(),
which never uses the _nopm variant of usbnet_read_cmd().
Commit b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with
reset operation") causes a similar deadlock on resume if the device was
already runtime suspended when entering system sleep:
That's because the commit introduced smsc95xx_reset_resume(), which
calls down to smsc95xx_reset(), which neglects to use _nopm accessors.
Fix by auto-detecting whether a device access is performed by the
suspend/resume task_struct and use the _nopm variant if so. This works
because the PM core guarantees that suspend/resume callbacks are run in
task context.
Stacktrace for posterity:
INFO: task kworker/2:1:49 blocked for more than 122 seconds.
Workqueue: usb_hub_wq hub_event
schedule
rpm_resume
__pm_runtime_resume
usb_autopm_get_interface
usbnet_read_cmd
__smsc95xx_read_reg
__smsc95xx_phy_wait_not_busy
__smsc95xx_mdio_read
smsc95xx_mdiobus_read
__mdiobus_read
mdiobus_read
smsc_phy_reset
phy_init_hw
smsc95xx_resume
usb_resume_interface
usb_resume_both
usb_runtime_resume
__rpm_callback
rpm_callback
rpm_resume
__pm_runtime_resume
usb_autoresume_device
hub_event
process_one_work
Fixes: b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with reset operation")
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Cc: stable(a)vger.kernel.org # v3.16+
Cc: Andre Edich <andre.edich(a)microchip.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 35110814ba22..0316c80c3fc4 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -71,6 +71,7 @@ struct smsc95xx_priv {
struct fwnode_handle *irqfwnode;
struct mii_bus *mdiobus;
struct phy_device *phydev;
+ struct task_struct *pm_task;
};
static bool turbo_mode = true;
@@ -80,13 +81,14 @@ MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction");
static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
u32 *data, int in_pm)
{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 buf;
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16);
BUG_ON(!dev);
- if (!in_pm)
+ if (current != pdata->pm_task)
fn = usbnet_read_cmd;
else
fn = usbnet_read_cmd_nopm;
@@ -110,13 +112,14 @@ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index,
u32 data, int in_pm)
{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 buf;
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16);
BUG_ON(!dev);
- if (!in_pm)
+ if (current != pdata->pm_task)
fn = usbnet_write_cmd;
else
fn = usbnet_write_cmd_nopm;
@@ -1490,9 +1493,12 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
u32 val, link_up;
int ret;
+ pdata->pm_task = current;
+
ret = usbnet_suspend(intf, message);
if (ret < 0) {
netdev_warn(dev->net, "usbnet_suspend error\n");
+ pdata->pm_task = NULL;
return ret;
}
@@ -1732,6 +1738,7 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
if (ret && PMSG_IS_AUTO(message))
usbnet_resume(intf);
+ pdata->pm_task = NULL;
return ret;
}
@@ -1752,29 +1759,31 @@ static int smsc95xx_resume(struct usb_interface *intf)
/* do this first to ensure it's cleared even in error case */
pdata->suspend_flags = 0;
+ pdata->pm_task = current;
+
if (suspend_flags & SUSPEND_ALLMODES) {
/* clear wake-up sources */
ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val);
if (ret < 0)
- return ret;
+ goto done;
val &= ~(WUCSR_WAKE_EN_ | WUCSR_MPEN_);
ret = smsc95xx_write_reg_nopm(dev, WUCSR, val);
if (ret < 0)
- return ret;
+ goto done;
/* clear wake-up status */
ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val);
if (ret < 0)
- return ret;
+ goto done;
val &= ~PM_CTL_WOL_EN_;
val |= PM_CTL_WUPS_;
ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val);
if (ret < 0)
- return ret;
+ goto done;
}
phy_init_hw(pdata->phydev);
@@ -1783,15 +1792,20 @@ static int smsc95xx_resume(struct usb_interface *intf)
if (ret < 0)
netdev_warn(dev->net, "usbnet_resume error\n");
+done:
+ pdata->pm_task = NULL;
return ret;
}
static int smsc95xx_reset_resume(struct usb_interface *intf)
{
struct usbnet *dev = usb_get_intfdata(intf);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
int ret;
+ pdata->pm_task = current;
ret = smsc95xx_reset(dev);
+ pdata->pm_task = NULL;
if (ret < 0)
return ret;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7b960c967f2aa01ab8f45c5a0bd78e754cffdeee Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Fri, 1 Jul 2022 22:47:51 +0200
Subject: [PATCH] usbnet: smsc95xx: Fix deadlock on runtime resume
Commit 05b35e7eb9a1 ("smsc95xx: add phylib support") amended
smsc95xx_resume() to call phy_init_hw(). That function waits for the
device to runtime resume even though it is placed in the runtime resume
path, causing a deadlock.
The problem is that phy_init_hw() calls down to smsc95xx_mdiobus_read(),
which never uses the _nopm variant of usbnet_read_cmd().
Commit b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with
reset operation") causes a similar deadlock on resume if the device was
already runtime suspended when entering system sleep:
That's because the commit introduced smsc95xx_reset_resume(), which
calls down to smsc95xx_reset(), which neglects to use _nopm accessors.
Fix by auto-detecting whether a device access is performed by the
suspend/resume task_struct and use the _nopm variant if so. This works
because the PM core guarantees that suspend/resume callbacks are run in
task context.
Stacktrace for posterity:
INFO: task kworker/2:1:49 blocked for more than 122 seconds.
Workqueue: usb_hub_wq hub_event
schedule
rpm_resume
__pm_runtime_resume
usb_autopm_get_interface
usbnet_read_cmd
__smsc95xx_read_reg
__smsc95xx_phy_wait_not_busy
__smsc95xx_mdio_read
smsc95xx_mdiobus_read
__mdiobus_read
mdiobus_read
smsc_phy_reset
phy_init_hw
smsc95xx_resume
usb_resume_interface
usb_resume_both
usb_runtime_resume
__rpm_callback
rpm_callback
rpm_resume
__pm_runtime_resume
usb_autoresume_device
hub_event
process_one_work
Fixes: b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with reset operation")
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Cc: stable(a)vger.kernel.org # v3.16+
Cc: Andre Edich <andre.edich(a)microchip.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 35110814ba22..0316c80c3fc4 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -71,6 +71,7 @@ struct smsc95xx_priv {
struct fwnode_handle *irqfwnode;
struct mii_bus *mdiobus;
struct phy_device *phydev;
+ struct task_struct *pm_task;
};
static bool turbo_mode = true;
@@ -80,13 +81,14 @@ MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction");
static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
u32 *data, int in_pm)
{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 buf;
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16);
BUG_ON(!dev);
- if (!in_pm)
+ if (current != pdata->pm_task)
fn = usbnet_read_cmd;
else
fn = usbnet_read_cmd_nopm;
@@ -110,13 +112,14 @@ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index,
u32 data, int in_pm)
{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 buf;
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16);
BUG_ON(!dev);
- if (!in_pm)
+ if (current != pdata->pm_task)
fn = usbnet_write_cmd;
else
fn = usbnet_write_cmd_nopm;
@@ -1490,9 +1493,12 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
u32 val, link_up;
int ret;
+ pdata->pm_task = current;
+
ret = usbnet_suspend(intf, message);
if (ret < 0) {
netdev_warn(dev->net, "usbnet_suspend error\n");
+ pdata->pm_task = NULL;
return ret;
}
@@ -1732,6 +1738,7 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
if (ret && PMSG_IS_AUTO(message))
usbnet_resume(intf);
+ pdata->pm_task = NULL;
return ret;
}
@@ -1752,29 +1759,31 @@ static int smsc95xx_resume(struct usb_interface *intf)
/* do this first to ensure it's cleared even in error case */
pdata->suspend_flags = 0;
+ pdata->pm_task = current;
+
if (suspend_flags & SUSPEND_ALLMODES) {
/* clear wake-up sources */
ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val);
if (ret < 0)
- return ret;
+ goto done;
val &= ~(WUCSR_WAKE_EN_ | WUCSR_MPEN_);
ret = smsc95xx_write_reg_nopm(dev, WUCSR, val);
if (ret < 0)
- return ret;
+ goto done;
/* clear wake-up status */
ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val);
if (ret < 0)
- return ret;
+ goto done;
val &= ~PM_CTL_WOL_EN_;
val |= PM_CTL_WUPS_;
ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val);
if (ret < 0)
- return ret;
+ goto done;
}
phy_init_hw(pdata->phydev);
@@ -1783,15 +1792,20 @@ static int smsc95xx_resume(struct usb_interface *intf)
if (ret < 0)
netdev_warn(dev->net, "usbnet_resume error\n");
+done:
+ pdata->pm_task = NULL;
return ret;
}
static int smsc95xx_reset_resume(struct usb_interface *intf)
{
struct usbnet *dev = usb_get_intfdata(intf);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
int ret;
+ pdata->pm_task = current;
ret = smsc95xx_reset(dev);
+ pdata->pm_task = NULL;
if (ret < 0)
return ret;
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7b960c967f2aa01ab8f45c5a0bd78e754cffdeee Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas(a)wunner.de>
Date: Fri, 1 Jul 2022 22:47:51 +0200
Subject: [PATCH] usbnet: smsc95xx: Fix deadlock on runtime resume
Commit 05b35e7eb9a1 ("smsc95xx: add phylib support") amended
smsc95xx_resume() to call phy_init_hw(). That function waits for the
device to runtime resume even though it is placed in the runtime resume
path, causing a deadlock.
The problem is that phy_init_hw() calls down to smsc95xx_mdiobus_read(),
which never uses the _nopm variant of usbnet_read_cmd().
Commit b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with
reset operation") causes a similar deadlock on resume if the device was
already runtime suspended when entering system sleep:
That's because the commit introduced smsc95xx_reset_resume(), which
calls down to smsc95xx_reset(), which neglects to use _nopm accessors.
Fix by auto-detecting whether a device access is performed by the
suspend/resume task_struct and use the _nopm variant if so. This works
because the PM core guarantees that suspend/resume callbacks are run in
task context.
Stacktrace for posterity:
INFO: task kworker/2:1:49 blocked for more than 122 seconds.
Workqueue: usb_hub_wq hub_event
schedule
rpm_resume
__pm_runtime_resume
usb_autopm_get_interface
usbnet_read_cmd
__smsc95xx_read_reg
__smsc95xx_phy_wait_not_busy
__smsc95xx_mdio_read
smsc95xx_mdiobus_read
__mdiobus_read
mdiobus_read
smsc_phy_reset
phy_init_hw
smsc95xx_resume
usb_resume_interface
usb_resume_both
usb_runtime_resume
__rpm_callback
rpm_callback
rpm_resume
__pm_runtime_resume
usb_autoresume_device
hub_event
process_one_work
Fixes: b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with reset operation")
Signed-off-by: Lukas Wunner <lukas(a)wunner.de>
Cc: stable(a)vger.kernel.org # v3.16+
Cc: Andre Edich <andre.edich(a)microchip.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 35110814ba22..0316c80c3fc4 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -71,6 +71,7 @@ struct smsc95xx_priv {
struct fwnode_handle *irqfwnode;
struct mii_bus *mdiobus;
struct phy_device *phydev;
+ struct task_struct *pm_task;
};
static bool turbo_mode = true;
@@ -80,13 +81,14 @@ MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction");
static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
u32 *data, int in_pm)
{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 buf;
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16);
BUG_ON(!dev);
- if (!in_pm)
+ if (current != pdata->pm_task)
fn = usbnet_read_cmd;
else
fn = usbnet_read_cmd_nopm;
@@ -110,13 +112,14 @@ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index,
u32 data, int in_pm)
{
+ struct smsc95xx_priv *pdata = dev->driver_priv;
u32 buf;
int ret;
int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16);
BUG_ON(!dev);
- if (!in_pm)
+ if (current != pdata->pm_task)
fn = usbnet_write_cmd;
else
fn = usbnet_write_cmd_nopm;
@@ -1490,9 +1493,12 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
u32 val, link_up;
int ret;
+ pdata->pm_task = current;
+
ret = usbnet_suspend(intf, message);
if (ret < 0) {
netdev_warn(dev->net, "usbnet_suspend error\n");
+ pdata->pm_task = NULL;
return ret;
}
@@ -1732,6 +1738,7 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
if (ret && PMSG_IS_AUTO(message))
usbnet_resume(intf);
+ pdata->pm_task = NULL;
return ret;
}
@@ -1752,29 +1759,31 @@ static int smsc95xx_resume(struct usb_interface *intf)
/* do this first to ensure it's cleared even in error case */
pdata->suspend_flags = 0;
+ pdata->pm_task = current;
+
if (suspend_flags & SUSPEND_ALLMODES) {
/* clear wake-up sources */
ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val);
if (ret < 0)
- return ret;
+ goto done;
val &= ~(WUCSR_WAKE_EN_ | WUCSR_MPEN_);
ret = smsc95xx_write_reg_nopm(dev, WUCSR, val);
if (ret < 0)
- return ret;
+ goto done;
/* clear wake-up status */
ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val);
if (ret < 0)
- return ret;
+ goto done;
val &= ~PM_CTL_WOL_EN_;
val |= PM_CTL_WUPS_;
ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val);
if (ret < 0)
- return ret;
+ goto done;
}
phy_init_hw(pdata->phydev);
@@ -1783,15 +1792,20 @@ static int smsc95xx_resume(struct usb_interface *intf)
if (ret < 0)
netdev_warn(dev->net, "usbnet_resume error\n");
+done:
+ pdata->pm_task = NULL;
return ret;
}
static int smsc95xx_reset_resume(struct usb_interface *intf)
{
struct usbnet *dev = usb_get_intfdata(intf);
+ struct smsc95xx_priv *pdata = dev->driver_priv;
int ret;
+ pdata->pm_task = current;
ret = smsc95xx_reset(dev);
+ pdata->pm_task = NULL;
if (ret < 0)
return ret;
Linux voidshop 5.15.52_1 #1 SMP Sat Jul 2 15:17:21 UTC 2022 x86_64 GNU/Linux. This kernel runs fine.
Every other later kernel does not.
kern.err: [ 128.868336] amdgpu 0000:29:00.0: [drm] *ERROR* [CRTC:53:crtc-0] flip_done timed out
This is the error i get when coming back from playing a wine game ( Sacred gold ).
Screen freezes, although sometimes i can type sudo reboot and it works.
Trying different kernel options.
GRUB_CMDLINE_LINUX_DEFAULT="amdgpu.noretry=0 pci=noats udev.log_priority=3 idle=nomwait radeon.si_support=0 radeon.audio=0 radeon.cik_support=0 amd_iommu=fullflush amdgpu.si_support=1 amdgpu.cik_support=1 amdgpu.dc=1 amdgpu.dpm=1 amdgpu.modeset=1 ipv6.disable=1 raid=noautodetect nomdadm vt.default_utf8=1"
Can i provide any other information ?.
Please advise
Craig Miles
The patch titled
Subject: kprobes: don't call disarm_kprobe() for disabled kprobes.
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
kprobes-dont-call-disarm_kprobe-for-disabled-kprobes.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kuniyuki Iwashima <kuniyu(a)amazon.com>
Subject: kprobes: don't call disarm_kprobe() for disabled kprobes.
Date: Fri, 12 Aug 2022 19:05:09 -0700
The assumption in __disable_kprobe() is wrong, and it could try to disarm
an already disarmed kprobe and fire the WARN_ONCE() below. [0] We can
easily reproduce this issue.
1. Write 0 to /sys/kernel/debug/kprobes/enabled.
# echo 0 > /sys/kernel/debug/kprobes/enabled
2. Run execsnoop. At this time, one kprobe is disabled.
# /usr/share/bcc/tools/execsnoop &
[1] 2460
PCOMM PID PPID RET ARGS
# cat /sys/kernel/debug/kprobes/list
ffffffff91345650 r __x64_sys_execve+0x0 [FTRACE]
ffffffff91345650 k __x64_sys_execve+0x0 [DISABLED][FTRACE]
3. Write 1 to /sys/kernel/debug/kprobes/enabled, which changes
kprobes_all_disarmed to false but does not arm the disabled kprobe.
# echo 1 > /sys/kernel/debug/kprobes/enabled
# cat /sys/kernel/debug/kprobes/list
ffffffff91345650 r __x64_sys_execve+0x0 [FTRACE]
ffffffff91345650 k __x64_sys_execve+0x0 [DISABLED][FTRACE]
4. Kill execsnoop, when __disable_kprobe() calls disarm_kprobe() for the
disabled kprobe and hits the WARN_ONCE() in __disarm_kprobe_ftrace().
# fg
/usr/share/bcc/tools/execsnoop
^C
Actually, WARN_ONCE() is fired twice, and __unregister_kprobe_top() misses
some cleanups and leaves the aggregated kprobe in the hash table. Then,
__unregister_trace_kprobe() initialises tk->rp.kp.list and creates an
infinite loop like this.
aggregated kprobe.list -> kprobe.list -.
^ |
'.__.'
In this situation, these commands fall into the infinite loop and result
in RCU stall or soft lockup.
cat /sys/kernel/debug/kprobes/list : show_kprobe_addr() enters into the
infinite loop with RCU.
/usr/share/bcc/tools/execsnoop : warn_kprobe_rereg() holds kprobe_mutex,
and __get_valid_kprobe() is stuck in
the loop.
To avoid the issue, make sure we don't call disarm_kprobe() for disabled
kprobes.
[0]
Failed to disarm kprobe-ftrace at __x64_sys_execve+0x0/0x40 (error -2)
WARNING: CPU: 6 PID: 2460 at kernel/kprobes.c:1130 __disarm_kprobe_ftrace.isra.19 (kernel/kprobes.c:1129)
Modules linked in: ena
CPU: 6 PID: 2460 Comm: execsnoop Not tainted 5.19.0+ #28
Hardware name: Amazon EC2 c5.2xlarge/, BIOS 1.0 10/16/2017
RIP: 0010:__disarm_kprobe_ftrace.isra.19 (kernel/kprobes.c:1129)
Code: 24 8b 02 eb c1 80 3d c4 83 f2 01 00 75 d4 48 8b 75 00 89 c2 48 c7 c7 90 fa 0f 92 89 04 24 c6 05 ab 83 01 e8 e4 94 f0 ff <0f> 0b 8b 04 24 eb b1 89 c6 48 c7 c7 60 fa 0f 92 89 04 24 e8 cc 94
RSP: 0018:ffff9e6ec154bd98 EFLAGS: 00010282
RAX: 0000000000000000 RBX: ffffffff930f7b00 RCX: 0000000000000001
RDX: 0000000080000001 RSI: ffffffff921461c5 RDI: 00000000ffffffff
RBP: ffff89c504286da8 R08: 0000000000000000 R09: c0000000fffeffff
R10: 0000000000000000 R11: ffff9e6ec154bc28 R12: ffff89c502394e40
R13: ffff89c502394c00 R14: ffff9e6ec154bc00 R15: 0000000000000000
FS: 00007fe800398740(0000) GS:ffff89c812d80000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000c00057f010 CR3: 0000000103b54006 CR4: 00000000007706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
PKRU: 55555554
Call Trace:
<TASK>
__disable_kprobe (kernel/kprobes.c:1716)
disable_kprobe (kernel/kprobes.c:2392)
__disable_trace_kprobe (kernel/trace/trace_kprobe.c:340)
disable_trace_kprobe (kernel/trace/trace_kprobe.c:429)
perf_trace_event_unreg.isra.2 (./include/linux/tracepoint.h:93 kernel/trace/trace_event_perf.c:168)
perf_kprobe_destroy (kernel/trace/trace_event_perf.c:295)
_free_event (kernel/events/core.c:4971)
perf_event_release_kernel (kernel/events/core.c:5176)
perf_release (kernel/events/core.c:5186)
__fput (fs/file_table.c:321)
task_work_run (./include/linux/sched.h:2056 (discriminator 1) kernel/task_work.c:179 (discriminator 1))
exit_to_user_mode_prepare (./include/linux/resume_user_mode.h:49 kernel/entry/common.c:169 kernel/entry/common.c:201)
syscall_exit_to_user_mode (./arch/x86/include/asm/jump_label.h:55 ./arch/x86/include/asm/nospec-branch.h:384 ./arch/x86/include/asm/entry-common.h:94 kernel/entry/common.c:133 kernel/entry/common.c:296)
do_syscall_64 (arch/x86/entry/common.c:87)
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
RIP: 0033:0x7fe7ff210654
Code: 15 79 89 20 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb be 0f 1f 00 8b 05 9a cd 20 00 48 63 ff 85 c0 75 11 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 3a f3 c3 48 83 ec 18 48 89 7c 24 08 e8 34 fc
RSP: 002b:00007ffdbd1d3538 EFLAGS: 00000246 ORIG_RAX: 0000000000000003
RAX: 0000000000000000 RBX: 0000000000000008 RCX: 00007fe7ff210654
RDX: 0000000000000000 RSI: 0000000000002401 RDI: 0000000000000008
RBP: 0000000000000000 R08: 94ae31d6fda838a4 R0900007fe8001c9d30
R10: 00007ffdbd1d34b0 R11: 0000000000000246 R12: 00007ffdbd1d3600
R13: 0000000000000000 R14: fffffffffffffffc R15: 00007ffdbd1d3560
</TASK>
Link: https://lkml.kernel.org/r/20220813020509.90805-1-kuniyu@amazon.com
Fixes: 69d54b916d83 ("kprobes: makes kprobes/enabled works correctly for optimized kprobes.")
Signed-off-by: Kuniyuki Iwashima <kuniyu(a)amazon.com>
Reported-by: Ayushman Dutta <ayudutta(a)amazon.com>
Cc: "Naveen N. Rao" <naveen.n.rao(a)linux.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy(a)intel.com>
Cc: "David S. Miller" <davem(a)davemloft.net>
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Wang Nan <wangnan0(a)huawei.com>
Cc: Kuniyuki Iwashima <kuniyu(a)amazon.com>
Cc: Kuniyuki Iwashima <kuni1840(a)gmail.com>
Cc: Ayushman Dutta <ayudutta(a)amazon.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/kprobes.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
--- a/kernel/kprobes.c~kprobes-dont-call-disarm_kprobe-for-disabled-kprobes
+++ a/kernel/kprobes.c
@@ -1707,11 +1707,12 @@ static struct kprobe *__disable_kprobe(s
/* Try to disarm and disable this/parent probe */
if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
/*
- * If 'kprobes_all_disarmed' is set, 'orig_p'
- * should have already been disarmed, so
- * skip unneed disarming process.
+ * Don't be lazy here. Even if 'kprobes_all_disarmed'
+ * is false, 'orig_p' might not have been armed yet.
+ * Note arm_all_kprobes() __tries__ to arm all kprobes
+ * on the best effort basis.
*/
- if (!kprobes_all_disarmed) {
+ if (!kprobes_all_disarmed && !kprobe_disabled(orig_p)) {
ret = disarm_kprobe(orig_p, true);
if (ret) {
p->flags &= ~KPROBE_FLAG_DISABLED;
_
Patches currently in -mm which might be from kuniyu(a)amazon.com are
kprobes-dont-call-disarm_kprobe-for-disabled-kprobes.patch
Syzkaller reports a number of "BUG: MAX_LOCKDEP_CHAINS too low!" warnings
on 5.10 branch and it means that the fuzz testing is terminated prematurely.
Since upstream patch that allows to increase limits is applied cleanly
and our testing demonstrated that it works well, we suggest to backport
it to the 5.10 branch.
--
Alexey
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b663f0b5f3d665c261256d1f76e98f077c6e56af Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:07 +0000
Subject: [PATCH] KVM: VMX: Add helper to check if the guest PMU has
PERF_GLOBAL_CTRL
Add a helper to check of the guest PMU has PERF_GLOBAL_CTRL, which is
unintuitive _and_ diverges from Intel's architecturally defined behavior.
Even worse, KVM currently implements the check using two different (but
equivalent) checks, _and_ there has been at least one attempt to add a
_third_ flavor.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-4-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 6e355c5d2f40..78f2800fd850 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -111,7 +111,7 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (pmu->version < 2)
+ if (!intel_pmu_has_perf_global_ctrl(pmu))
return true;
return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
@@ -207,7 +207,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_CORE_PERF_GLOBAL_STATUS:
case MSR_CORE_PERF_GLOBAL_CTRL:
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- ret = pmu->version > 1;
+ return intel_pmu_has_perf_global_ctrl(pmu);
break;
case MSR_IA32_PEBS_ENABLE:
ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 286c88e285ea..2a0b94e0fda7 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -91,6 +91,18 @@ union vmx_exit_reason {
u32 full;
};
+static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
+{
+ /*
+ * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is
+ * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is
+ * greater than zero. However, KVM only exposes and emulates the MSR
+ * to/for the guest if the guest PMU supports at least "Architectural
+ * Performance Monitoring Version 2".
+ */
+ return pmu->version > 1;
+}
+
#define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
#define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b663f0b5f3d665c261256d1f76e98f077c6e56af Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:07 +0000
Subject: [PATCH] KVM: VMX: Add helper to check if the guest PMU has
PERF_GLOBAL_CTRL
Add a helper to check of the guest PMU has PERF_GLOBAL_CTRL, which is
unintuitive _and_ diverges from Intel's architecturally defined behavior.
Even worse, KVM currently implements the check using two different (but
equivalent) checks, _and_ there has been at least one attempt to add a
_third_ flavor.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-4-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 6e355c5d2f40..78f2800fd850 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -111,7 +111,7 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (pmu->version < 2)
+ if (!intel_pmu_has_perf_global_ctrl(pmu))
return true;
return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
@@ -207,7 +207,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_CORE_PERF_GLOBAL_STATUS:
case MSR_CORE_PERF_GLOBAL_CTRL:
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- ret = pmu->version > 1;
+ return intel_pmu_has_perf_global_ctrl(pmu);
break;
case MSR_IA32_PEBS_ENABLE:
ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 286c88e285ea..2a0b94e0fda7 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -91,6 +91,18 @@ union vmx_exit_reason {
u32 full;
};
+static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
+{
+ /*
+ * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is
+ * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is
+ * greater than zero. However, KVM only exposes and emulates the MSR
+ * to/for the guest if the guest PMU supports at least "Architectural
+ * Performance Monitoring Version 2".
+ */
+ return pmu->version > 1;
+}
+
#define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
#define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b663f0b5f3d665c261256d1f76e98f077c6e56af Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:07 +0000
Subject: [PATCH] KVM: VMX: Add helper to check if the guest PMU has
PERF_GLOBAL_CTRL
Add a helper to check of the guest PMU has PERF_GLOBAL_CTRL, which is
unintuitive _and_ diverges from Intel's architecturally defined behavior.
Even worse, KVM currently implements the check using two different (but
equivalent) checks, _and_ there has been at least one attempt to add a
_third_ flavor.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-4-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 6e355c5d2f40..78f2800fd850 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -111,7 +111,7 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (pmu->version < 2)
+ if (!intel_pmu_has_perf_global_ctrl(pmu))
return true;
return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
@@ -207,7 +207,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_CORE_PERF_GLOBAL_STATUS:
case MSR_CORE_PERF_GLOBAL_CTRL:
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- ret = pmu->version > 1;
+ return intel_pmu_has_perf_global_ctrl(pmu);
break;
case MSR_IA32_PEBS_ENABLE:
ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 286c88e285ea..2a0b94e0fda7 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -91,6 +91,18 @@ union vmx_exit_reason {
u32 full;
};
+static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
+{
+ /*
+ * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is
+ * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is
+ * greater than zero. However, KVM only exposes and emulates the MSR
+ * to/for the guest if the guest PMU supports at least "Architectural
+ * Performance Monitoring Version 2".
+ */
+ return pmu->version > 1;
+}
+
#define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
#define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b663f0b5f3d665c261256d1f76e98f077c6e56af Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:07 +0000
Subject: [PATCH] KVM: VMX: Add helper to check if the guest PMU has
PERF_GLOBAL_CTRL
Add a helper to check of the guest PMU has PERF_GLOBAL_CTRL, which is
unintuitive _and_ diverges from Intel's architecturally defined behavior.
Even worse, KVM currently implements the check using two different (but
equivalent) checks, _and_ there has been at least one attempt to add a
_third_ flavor.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-4-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 6e355c5d2f40..78f2800fd850 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -111,7 +111,7 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (pmu->version < 2)
+ if (!intel_pmu_has_perf_global_ctrl(pmu))
return true;
return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
@@ -207,7 +207,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_CORE_PERF_GLOBAL_STATUS:
case MSR_CORE_PERF_GLOBAL_CTRL:
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- ret = pmu->version > 1;
+ return intel_pmu_has_perf_global_ctrl(pmu);
break;
case MSR_IA32_PEBS_ENABLE:
ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 286c88e285ea..2a0b94e0fda7 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -91,6 +91,18 @@ union vmx_exit_reason {
u32 full;
};
+static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
+{
+ /*
+ * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is
+ * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is
+ * greater than zero. However, KVM only exposes and emulates the MSR
+ * to/for the guest if the guest PMU supports at least "Architectural
+ * Performance Monitoring Version 2".
+ */
+ return pmu->version > 1;
+}
+
#define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
#define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b663f0b5f3d665c261256d1f76e98f077c6e56af Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:07 +0000
Subject: [PATCH] KVM: VMX: Add helper to check if the guest PMU has
PERF_GLOBAL_CTRL
Add a helper to check of the guest PMU has PERF_GLOBAL_CTRL, which is
unintuitive _and_ diverges from Intel's architecturally defined behavior.
Even worse, KVM currently implements the check using two different (but
equivalent) checks, _and_ there has been at least one attempt to add a
_third_ flavor.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-4-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 6e355c5d2f40..78f2800fd850 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -111,7 +111,7 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (pmu->version < 2)
+ if (!intel_pmu_has_perf_global_ctrl(pmu))
return true;
return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
@@ -207,7 +207,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_CORE_PERF_GLOBAL_STATUS:
case MSR_CORE_PERF_GLOBAL_CTRL:
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- ret = pmu->version > 1;
+ return intel_pmu_has_perf_global_ctrl(pmu);
break;
case MSR_IA32_PEBS_ENABLE:
ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 286c88e285ea..2a0b94e0fda7 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -91,6 +91,18 @@ union vmx_exit_reason {
u32 full;
};
+static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
+{
+ /*
+ * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is
+ * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is
+ * greater than zero. However, KVM only exposes and emulates the MSR
+ * to/for the guest if the guest PMU supports at least "Architectural
+ * Performance Monitoring Version 2".
+ */
+ return pmu->version > 1;
+}
+
#define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
#define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b663f0b5f3d665c261256d1f76e98f077c6e56af Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:07 +0000
Subject: [PATCH] KVM: VMX: Add helper to check if the guest PMU has
PERF_GLOBAL_CTRL
Add a helper to check of the guest PMU has PERF_GLOBAL_CTRL, which is
unintuitive _and_ diverges from Intel's architecturally defined behavior.
Even worse, KVM currently implements the check using two different (but
equivalent) checks, _and_ there has been at least one attempt to add a
_third_ flavor.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-4-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 6e355c5d2f40..78f2800fd850 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -111,7 +111,7 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (pmu->version < 2)
+ if (!intel_pmu_has_perf_global_ctrl(pmu))
return true;
return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
@@ -207,7 +207,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_CORE_PERF_GLOBAL_STATUS:
case MSR_CORE_PERF_GLOBAL_CTRL:
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- ret = pmu->version > 1;
+ return intel_pmu_has_perf_global_ctrl(pmu);
break;
case MSR_IA32_PEBS_ENABLE:
ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 286c88e285ea..2a0b94e0fda7 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -91,6 +91,18 @@ union vmx_exit_reason {
u32 full;
};
+static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
+{
+ /*
+ * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is
+ * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is
+ * greater than zero. However, KVM only exposes and emulates the MSR
+ * to/for the guest if the guest PMU supports at least "Architectural
+ * Performance Monitoring Version 2".
+ */
+ return pmu->version > 1;
+}
+
#define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
#define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b663f0b5f3d665c261256d1f76e98f077c6e56af Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:07 +0000
Subject: [PATCH] KVM: VMX: Add helper to check if the guest PMU has
PERF_GLOBAL_CTRL
Add a helper to check of the guest PMU has PERF_GLOBAL_CTRL, which is
unintuitive _and_ diverges from Intel's architecturally defined behavior.
Even worse, KVM currently implements the check using two different (but
equivalent) checks, _and_ there has been at least one attempt to add a
_third_ flavor.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-4-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 6e355c5d2f40..78f2800fd850 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -111,7 +111,7 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (pmu->version < 2)
+ if (!intel_pmu_has_perf_global_ctrl(pmu))
return true;
return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
@@ -207,7 +207,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_CORE_PERF_GLOBAL_STATUS:
case MSR_CORE_PERF_GLOBAL_CTRL:
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- ret = pmu->version > 1;
+ return intel_pmu_has_perf_global_ctrl(pmu);
break;
case MSR_IA32_PEBS_ENABLE:
ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 286c88e285ea..2a0b94e0fda7 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -91,6 +91,18 @@ union vmx_exit_reason {
u32 full;
};
+static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
+{
+ /*
+ * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is
+ * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is
+ * greater than zero. However, KVM only exposes and emulates the MSR
+ * to/for the guest if the guest PMU supports at least "Architectural
+ * Performance Monitoring Version 2".
+ */
+ return pmu->version > 1;
+}
+
#define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
#define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b663f0b5f3d665c261256d1f76e98f077c6e56af Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:07 +0000
Subject: [PATCH] KVM: VMX: Add helper to check if the guest PMU has
PERF_GLOBAL_CTRL
Add a helper to check of the guest PMU has PERF_GLOBAL_CTRL, which is
unintuitive _and_ diverges from Intel's architecturally defined behavior.
Even worse, KVM currently implements the check using two different (but
equivalent) checks, _and_ there has been at least one attempt to add a
_third_ flavor.
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-4-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 6e355c5d2f40..78f2800fd850 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -111,7 +111,7 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
- if (pmu->version < 2)
+ if (!intel_pmu_has_perf_global_ctrl(pmu))
return true;
return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
@@ -207,7 +207,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_CORE_PERF_GLOBAL_STATUS:
case MSR_CORE_PERF_GLOBAL_CTRL:
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- ret = pmu->version > 1;
+ return intel_pmu_has_perf_global_ctrl(pmu);
break;
case MSR_IA32_PEBS_ENABLE:
ret = vcpu_get_perf_capabilities(vcpu) & PERF_CAP_PEBS_FORMAT;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 286c88e285ea..2a0b94e0fda7 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -91,6 +91,18 @@ union vmx_exit_reason {
u32 full;
};
+static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
+{
+ /*
+ * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is
+ * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is
+ * greater than zero. However, KVM only exposes and emulates the MSR
+ * to/for the guest if the guest PMU supports at least "Architectural
+ * Performance Monitoring Version 2".
+ */
+ return pmu->version > 1;
+}
+
#define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
#define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 93255bf92939d948bc86d81c6bb70bb0fecc5db1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:06 +0000
Subject: [PATCH] KVM: VMX: Mark all PERF_GLOBAL_(OVF)_CTRL bits reserved if
there's no vPMU
Mark all MSR_CORE_PERF_GLOBAL_CTRL and MSR_CORE_PERF_GLOBAL_OVF_CTRL bits
as reserved if there is no guest vPMU. The nVMX VM-Entry consistency
checks do not check for a valid vPMU prior to consuming the masks via
kvm_valid_perf_global_ctrl(), i.e. may incorrectly allow a non-zero mask
to be loaded via VM-Enter or VM-Exit (well, attempted to be loaded, the
actual MSR load will be rejected by intel_is_valid_msr()).
Fixes: f5132b01386b ("KVM: Expose a version 2 architectural PMU to a guests")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 4bc098fbec31..6e355c5d2f40 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -527,6 +527,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->version = 0;
pmu->reserved_bits = 0xffffffff00200000ull;
pmu->raw_event_mask = X86_RAW_EVENT_MASK;
+ pmu->global_ctrl_mask = ~0ull;
+ pmu->global_ovf_ctrl_mask = ~0ull;
pmu->fixed_ctr_ctrl_mask = ~0ull;
pmu->pebs_enable_mask = ~0ull;
pmu->pebs_data_cfg_mask = ~0ull;
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 93255bf92939d948bc86d81c6bb70bb0fecc5db1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:06 +0000
Subject: [PATCH] KVM: VMX: Mark all PERF_GLOBAL_(OVF)_CTRL bits reserved if
there's no vPMU
Mark all MSR_CORE_PERF_GLOBAL_CTRL and MSR_CORE_PERF_GLOBAL_OVF_CTRL bits
as reserved if there is no guest vPMU. The nVMX VM-Entry consistency
checks do not check for a valid vPMU prior to consuming the masks via
kvm_valid_perf_global_ctrl(), i.e. may incorrectly allow a non-zero mask
to be loaded via VM-Enter or VM-Exit (well, attempted to be loaded, the
actual MSR load will be rejected by intel_is_valid_msr()).
Fixes: f5132b01386b ("KVM: Expose a version 2 architectural PMU to a guests")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 4bc098fbec31..6e355c5d2f40 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -527,6 +527,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->version = 0;
pmu->reserved_bits = 0xffffffff00200000ull;
pmu->raw_event_mask = X86_RAW_EVENT_MASK;
+ pmu->global_ctrl_mask = ~0ull;
+ pmu->global_ovf_ctrl_mask = ~0ull;
pmu->fixed_ctr_ctrl_mask = ~0ull;
pmu->pebs_enable_mask = ~0ull;
pmu->pebs_data_cfg_mask = ~0ull;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 93255bf92939d948bc86d81c6bb70bb0fecc5db1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:06 +0000
Subject: [PATCH] KVM: VMX: Mark all PERF_GLOBAL_(OVF)_CTRL bits reserved if
there's no vPMU
Mark all MSR_CORE_PERF_GLOBAL_CTRL and MSR_CORE_PERF_GLOBAL_OVF_CTRL bits
as reserved if there is no guest vPMU. The nVMX VM-Entry consistency
checks do not check for a valid vPMU prior to consuming the masks via
kvm_valid_perf_global_ctrl(), i.e. may incorrectly allow a non-zero mask
to be loaded via VM-Enter or VM-Exit (well, attempted to be loaded, the
actual MSR load will be rejected by intel_is_valid_msr()).
Fixes: f5132b01386b ("KVM: Expose a version 2 architectural PMU to a guests")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 4bc098fbec31..6e355c5d2f40 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -527,6 +527,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->version = 0;
pmu->reserved_bits = 0xffffffff00200000ull;
pmu->raw_event_mask = X86_RAW_EVENT_MASK;
+ pmu->global_ctrl_mask = ~0ull;
+ pmu->global_ovf_ctrl_mask = ~0ull;
pmu->fixed_ctr_ctrl_mask = ~0ull;
pmu->pebs_enable_mask = ~0ull;
pmu->pebs_data_cfg_mask = ~0ull;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 93255bf92939d948bc86d81c6bb70bb0fecc5db1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:06 +0000
Subject: [PATCH] KVM: VMX: Mark all PERF_GLOBAL_(OVF)_CTRL bits reserved if
there's no vPMU
Mark all MSR_CORE_PERF_GLOBAL_CTRL and MSR_CORE_PERF_GLOBAL_OVF_CTRL bits
as reserved if there is no guest vPMU. The nVMX VM-Entry consistency
checks do not check for a valid vPMU prior to consuming the masks via
kvm_valid_perf_global_ctrl(), i.e. may incorrectly allow a non-zero mask
to be loaded via VM-Enter or VM-Exit (well, attempted to be loaded, the
actual MSR load will be rejected by intel_is_valid_msr()).
Fixes: f5132b01386b ("KVM: Expose a version 2 architectural PMU to a guests")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 4bc098fbec31..6e355c5d2f40 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -527,6 +527,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->version = 0;
pmu->reserved_bits = 0xffffffff00200000ull;
pmu->raw_event_mask = X86_RAW_EVENT_MASK;
+ pmu->global_ctrl_mask = ~0ull;
+ pmu->global_ovf_ctrl_mask = ~0ull;
pmu->fixed_ctr_ctrl_mask = ~0ull;
pmu->pebs_enable_mask = ~0ull;
pmu->pebs_data_cfg_mask = ~0ull;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 93255bf92939d948bc86d81c6bb70bb0fecc5db1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:06 +0000
Subject: [PATCH] KVM: VMX: Mark all PERF_GLOBAL_(OVF)_CTRL bits reserved if
there's no vPMU
Mark all MSR_CORE_PERF_GLOBAL_CTRL and MSR_CORE_PERF_GLOBAL_OVF_CTRL bits
as reserved if there is no guest vPMU. The nVMX VM-Entry consistency
checks do not check for a valid vPMU prior to consuming the masks via
kvm_valid_perf_global_ctrl(), i.e. may incorrectly allow a non-zero mask
to be loaded via VM-Enter or VM-Exit (well, attempted to be loaded, the
actual MSR load will be rejected by intel_is_valid_msr()).
Fixes: f5132b01386b ("KVM: Expose a version 2 architectural PMU to a guests")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 4bc098fbec31..6e355c5d2f40 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -527,6 +527,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->version = 0;
pmu->reserved_bits = 0xffffffff00200000ull;
pmu->raw_event_mask = X86_RAW_EVENT_MASK;
+ pmu->global_ctrl_mask = ~0ull;
+ pmu->global_ovf_ctrl_mask = ~0ull;
pmu->fixed_ctr_ctrl_mask = ~0ull;
pmu->pebs_enable_mask = ~0ull;
pmu->pebs_data_cfg_mask = ~0ull;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 93255bf92939d948bc86d81c6bb70bb0fecc5db1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:06 +0000
Subject: [PATCH] KVM: VMX: Mark all PERF_GLOBAL_(OVF)_CTRL bits reserved if
there's no vPMU
Mark all MSR_CORE_PERF_GLOBAL_CTRL and MSR_CORE_PERF_GLOBAL_OVF_CTRL bits
as reserved if there is no guest vPMU. The nVMX VM-Entry consistency
checks do not check for a valid vPMU prior to consuming the masks via
kvm_valid_perf_global_ctrl(), i.e. may incorrectly allow a non-zero mask
to be loaded via VM-Enter or VM-Exit (well, attempted to be loaded, the
actual MSR load will be rejected by intel_is_valid_msr()).
Fixes: f5132b01386b ("KVM: Expose a version 2 architectural PMU to a guests")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 4bc098fbec31..6e355c5d2f40 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -527,6 +527,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->version = 0;
pmu->reserved_bits = 0xffffffff00200000ull;
pmu->raw_event_mask = X86_RAW_EVENT_MASK;
+ pmu->global_ctrl_mask = ~0ull;
+ pmu->global_ovf_ctrl_mask = ~0ull;
pmu->fixed_ctr_ctrl_mask = ~0ull;
pmu->pebs_enable_mask = ~0ull;
pmu->pebs_data_cfg_mask = ~0ull;
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 93255bf92939d948bc86d81c6bb70bb0fecc5db1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:06 +0000
Subject: [PATCH] KVM: VMX: Mark all PERF_GLOBAL_(OVF)_CTRL bits reserved if
there's no vPMU
Mark all MSR_CORE_PERF_GLOBAL_CTRL and MSR_CORE_PERF_GLOBAL_OVF_CTRL bits
as reserved if there is no guest vPMU. The nVMX VM-Entry consistency
checks do not check for a valid vPMU prior to consuming the masks via
kvm_valid_perf_global_ctrl(), i.e. may incorrectly allow a non-zero mask
to be loaded via VM-Enter or VM-Exit (well, attempted to be loaded, the
actual MSR load will be rejected by intel_is_valid_msr()).
Fixes: f5132b01386b ("KVM: Expose a version 2 architectural PMU to a guests")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 4bc098fbec31..6e355c5d2f40 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -527,6 +527,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->version = 0;
pmu->reserved_bits = 0xffffffff00200000ull;
pmu->raw_event_mask = X86_RAW_EVENT_MASK;
+ pmu->global_ctrl_mask = ~0ull;
+ pmu->global_ovf_ctrl_mask = ~0ull;
pmu->fixed_ctr_ctrl_mask = ~0ull;
pmu->pebs_enable_mask = ~0ull;
pmu->pebs_data_cfg_mask = ~0ull;
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 93255bf92939d948bc86d81c6bb70bb0fecc5db1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:06 +0000
Subject: [PATCH] KVM: VMX: Mark all PERF_GLOBAL_(OVF)_CTRL bits reserved if
there's no vPMU
Mark all MSR_CORE_PERF_GLOBAL_CTRL and MSR_CORE_PERF_GLOBAL_OVF_CTRL bits
as reserved if there is no guest vPMU. The nVMX VM-Entry consistency
checks do not check for a valid vPMU prior to consuming the masks via
kvm_valid_perf_global_ctrl(), i.e. may incorrectly allow a non-zero mask
to be loaded via VM-Enter or VM-Exit (well, attempted to be loaded, the
actual MSR load will be rejected by intel_is_valid_msr()).
Fixes: f5132b01386b ("KVM: Expose a version 2 architectural PMU to a guests")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 4bc098fbec31..6e355c5d2f40 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -527,6 +527,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->version = 0;
pmu->reserved_bits = 0xffffffff00200000ull;
pmu->raw_event_mask = X86_RAW_EVENT_MASK;
+ pmu->global_ctrl_mask = ~0ull;
+ pmu->global_ovf_ctrl_mask = ~0ull;
pmu->fixed_ctr_ctrl_mask = ~0ull;
pmu->pebs_enable_mask = ~0ull;
pmu->pebs_data_cfg_mask = ~0ull;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2368048bf5c2ec4b604ac3431564071e89a0bc71 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Thu, 12 May 2022 22:27:14 +0000
Subject: [PATCH] KVM: x86: Signal #GP, not -EPERM, on bad
WRMSR(MCi_CTL/STATUS)
Return '1', not '-1', when handling an illegal WRMSR to a MCi_CTL or
MCi_STATUS MSR. The behavior of "all zeros' or "all ones" for CTL MSRs
is architectural, as is the "only zeros" behavior for STATUS MSRs. I.e.
the intent is to inject a #GP, not exit to userspace due to an unhandled
emulation case. Returning '-1' gets interpreted as -EPERM up the stack
and effecitvely kills the guest.
Fixes: 890ca9aefa78 ("KVM: Add MCE support")
Fixes: 9ffd986c6e4e ("KVM: X86: #GP when guest attempts to write MCi_STATUS register w/o 0")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Reviewed-by: Jim Mattson <jmattson(a)google.com>
Link: https://lore.kernel.org/r/20220512222716.4112548-2-seanjc@google.com
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7ce0c6fe166d..891ca973c838 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3239,13 +3239,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/
if ((offset & 0x3) == 0 &&
data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
- return -1;
+ return 1;
/* MCi_STATUS */
if (!msr_info->host_initiated &&
(offset & 0x3) == 1 && data != 0) {
if (!can_set_mci_status(vcpu))
- return -1;
+ return 1;
}
vcpu->arch.mce_banks[offset] = data;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2368048bf5c2ec4b604ac3431564071e89a0bc71 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Thu, 12 May 2022 22:27:14 +0000
Subject: [PATCH] KVM: x86: Signal #GP, not -EPERM, on bad
WRMSR(MCi_CTL/STATUS)
Return '1', not '-1', when handling an illegal WRMSR to a MCi_CTL or
MCi_STATUS MSR. The behavior of "all zeros' or "all ones" for CTL MSRs
is architectural, as is the "only zeros" behavior for STATUS MSRs. I.e.
the intent is to inject a #GP, not exit to userspace due to an unhandled
emulation case. Returning '-1' gets interpreted as -EPERM up the stack
and effecitvely kills the guest.
Fixes: 890ca9aefa78 ("KVM: Add MCE support")
Fixes: 9ffd986c6e4e ("KVM: X86: #GP when guest attempts to write MCi_STATUS register w/o 0")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Reviewed-by: Jim Mattson <jmattson(a)google.com>
Link: https://lore.kernel.org/r/20220512222716.4112548-2-seanjc@google.com
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7ce0c6fe166d..891ca973c838 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3239,13 +3239,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/
if ((offset & 0x3) == 0 &&
data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
- return -1;
+ return 1;
/* MCi_STATUS */
if (!msr_info->host_initiated &&
(offset & 0x3) == 1 && data != 0) {
if (!can_set_mci_status(vcpu))
- return -1;
+ return 1;
}
vcpu->arch.mce_banks[offset] = data;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2368048bf5c2ec4b604ac3431564071e89a0bc71 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Thu, 12 May 2022 22:27:14 +0000
Subject: [PATCH] KVM: x86: Signal #GP, not -EPERM, on bad
WRMSR(MCi_CTL/STATUS)
Return '1', not '-1', when handling an illegal WRMSR to a MCi_CTL or
MCi_STATUS MSR. The behavior of "all zeros' or "all ones" for CTL MSRs
is architectural, as is the "only zeros" behavior for STATUS MSRs. I.e.
the intent is to inject a #GP, not exit to userspace due to an unhandled
emulation case. Returning '-1' gets interpreted as -EPERM up the stack
and effecitvely kills the guest.
Fixes: 890ca9aefa78 ("KVM: Add MCE support")
Fixes: 9ffd986c6e4e ("KVM: X86: #GP when guest attempts to write MCi_STATUS register w/o 0")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Reviewed-by: Jim Mattson <jmattson(a)google.com>
Link: https://lore.kernel.org/r/20220512222716.4112548-2-seanjc@google.com
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7ce0c6fe166d..891ca973c838 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3239,13 +3239,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/
if ((offset & 0x3) == 0 &&
data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
- return -1;
+ return 1;
/* MCi_STATUS */
if (!msr_info->host_initiated &&
(offset & 0x3) == 1 && data != 0) {
if (!can_set_mci_status(vcpu))
- return -1;
+ return 1;
}
vcpu->arch.mce_banks[offset] = data;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2368048bf5c2ec4b604ac3431564071e89a0bc71 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Thu, 12 May 2022 22:27:14 +0000
Subject: [PATCH] KVM: x86: Signal #GP, not -EPERM, on bad
WRMSR(MCi_CTL/STATUS)
Return '1', not '-1', when handling an illegal WRMSR to a MCi_CTL or
MCi_STATUS MSR. The behavior of "all zeros' or "all ones" for CTL MSRs
is architectural, as is the "only zeros" behavior for STATUS MSRs. I.e.
the intent is to inject a #GP, not exit to userspace due to an unhandled
emulation case. Returning '-1' gets interpreted as -EPERM up the stack
and effecitvely kills the guest.
Fixes: 890ca9aefa78 ("KVM: Add MCE support")
Fixes: 9ffd986c6e4e ("KVM: X86: #GP when guest attempts to write MCi_STATUS register w/o 0")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Reviewed-by: Jim Mattson <jmattson(a)google.com>
Link: https://lore.kernel.org/r/20220512222716.4112548-2-seanjc@google.com
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7ce0c6fe166d..891ca973c838 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3239,13 +3239,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/
if ((offset & 0x3) == 0 &&
data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
- return -1;
+ return 1;
/* MCi_STATUS */
if (!msr_info->host_initiated &&
(offset & 0x3) == 1 && data != 0) {
if (!can_set_mci_status(vcpu))
- return -1;
+ return 1;
}
vcpu->arch.mce_banks[offset] = data;
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2368048bf5c2ec4b604ac3431564071e89a0bc71 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Thu, 12 May 2022 22:27:14 +0000
Subject: [PATCH] KVM: x86: Signal #GP, not -EPERM, on bad
WRMSR(MCi_CTL/STATUS)
Return '1', not '-1', when handling an illegal WRMSR to a MCi_CTL or
MCi_STATUS MSR. The behavior of "all zeros' or "all ones" for CTL MSRs
is architectural, as is the "only zeros" behavior for STATUS MSRs. I.e.
the intent is to inject a #GP, not exit to userspace due to an unhandled
emulation case. Returning '-1' gets interpreted as -EPERM up the stack
and effecitvely kills the guest.
Fixes: 890ca9aefa78 ("KVM: Add MCE support")
Fixes: 9ffd986c6e4e ("KVM: X86: #GP when guest attempts to write MCi_STATUS register w/o 0")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Reviewed-by: Jim Mattson <jmattson(a)google.com>
Link: https://lore.kernel.org/r/20220512222716.4112548-2-seanjc@google.com
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7ce0c6fe166d..891ca973c838 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3239,13 +3239,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/
if ((offset & 0x3) == 0 &&
data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
- return -1;
+ return 1;
/* MCi_STATUS */
if (!msr_info->host_initiated &&
(offset & 0x3) == 1 && data != 0) {
if (!can_set_mci_status(vcpu))
- return -1;
+ return 1;
}
vcpu->arch.mce_banks[offset] = data;
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2368048bf5c2ec4b604ac3431564071e89a0bc71 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Thu, 12 May 2022 22:27:14 +0000
Subject: [PATCH] KVM: x86: Signal #GP, not -EPERM, on bad
WRMSR(MCi_CTL/STATUS)
Return '1', not '-1', when handling an illegal WRMSR to a MCi_CTL or
MCi_STATUS MSR. The behavior of "all zeros' or "all ones" for CTL MSRs
is architectural, as is the "only zeros" behavior for STATUS MSRs. I.e.
the intent is to inject a #GP, not exit to userspace due to an unhandled
emulation case. Returning '-1' gets interpreted as -EPERM up the stack
and effecitvely kills the guest.
Fixes: 890ca9aefa78 ("KVM: Add MCE support")
Fixes: 9ffd986c6e4e ("KVM: X86: #GP when guest attempts to write MCi_STATUS register w/o 0")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Reviewed-by: Jim Mattson <jmattson(a)google.com>
Link: https://lore.kernel.org/r/20220512222716.4112548-2-seanjc@google.com
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7ce0c6fe166d..891ca973c838 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3239,13 +3239,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
*/
if ((offset & 0x3) == 0 &&
data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
- return -1;
+ return 1;
/* MCi_STATUS */
if (!msr_info->host_initiated &&
(offset & 0x3) == 1 && data != 0) {
if (!can_set_mci_status(vcpu))
- return -1;
+ return 1;
}
vcpu->arch.mce_banks[offset] = data;
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 156b9d76e8822f2956c15029acf2d4b171502f3a Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Date: Tue, 12 Jul 2022 15:50:09 +0200
Subject: [PATCH] KVM: nVMX: Always enable TSC scaling for L2 when it was
enabled for L1
Windows 10/11 guests with Hyper-V role (WSL2) enabled are observed to
hang upon boot or shortly after when a non-default TSC frequency was
set for L1. The issue is observed on a host where TSC scaling is
supported. The problem appears to be that Windows doesn't use TSC
scaling for its guests, even when the feature is advertised, and KVM
filters SECONDARY_EXEC_TSC_SCALING out when creating L2 controls from
L1's VMCS. This leads to L2 running with the default frequency (matching
host's) while L1 is running with an altered one.
Keep SECONDARY_EXEC_TSC_SCALING in secondary exec controls for L2 when
it was set for L1. TSC_MULTIPLIER is already correctly computed and
written by prepare_vmcs02().
Signed-off-by: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Fixes: d041b5ea93352b ("KVM: nVMX: Enable nested TSC scaling")
Cc: stable(a)vger.kernel.org
Reviewed-by: Maxim Levitsky <mlevitsk(a)redhat.com>
Link: https://lore.kernel.org/r/20220712135009.952805-1-vkuznets@redhat.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 778f82015f03..bfa366938c49 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2284,7 +2284,6 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_ENABLE_VMFUNC |
- SECONDARY_EXEC_TSC_SCALING |
SECONDARY_EXEC_DESC);
if (nested_cpu_has(vmcs12,
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4496a6f9b45e8cd83343ad86a3984d614e22cf54 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:08 +0000
Subject: [PATCH] KVM: nVMX: Attempt to load PERF_GLOBAL_CTRL on nVMX xfer iff
it exists
Attempt to load PERF_GLOBAL_CTRL during nested VM-Enter/VM-Exit if and
only if the MSR exists (according to the guest vCPU model). KVM has very
misguided handling of VM_{ENTRY,EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL and
attempts to force the nVMX MSR settings to match the vPMU model, i.e. to
hide/expose the control based on whether or not the MSR exists from the
guest's perspective.
KVM's modifications fail to handle the scenario where the vPMU is hidden
from the guest _after_ being exposed to the guest, e.g. by userspace
doing multiple KVM_SET_CPUID2 calls, which is allowed if done before any
KVM_RUN. nested_vmx_pmu_refresh() is called if and only if there's a
recognized vPMU, i.e. KVM will leave the bits in the allow state and then
ultimately reject the MSR load and WARN.
KVM should not force the VMX MSRs in the first place. KVM taking control
of the MSRs was a misguided attempt at mimicking what commit 5f76f6f5ff96
("KVM: nVMX: Do not expose MPX VMX controls when guest MPX disabled",
2018-10-01) did for MPX. However, the MPX commit was a workaround for
another KVM bug and not something that should be imitated (and it should
never been done in the first place).
In other words, KVM's ABI _should_ be that userspace has full control
over the MSRs, at which point triggering the WARN that loading the MSR
must not fail is trivial.
The intent of the WARN is still valid; KVM has consistency checks to
ensure that vmcs12->{guest,host}_ia32_perf_global_ctrl is valid. The
problem is that '0' must be considered a valid value at all times, and so
the simple/obvious solution is to just not actually load the MSR when it
does not exist. It is userspace's responsibility to provide a sane vCPU
model, i.e. KVM is well within its ABI and Intel's VMX architecture to
skip the loads if the MSR does not exist.
Fixes: 03a8871add95 ("KVM: nVMX: Expose load IA32_PERF_GLOBAL_CTRL VM-{Entry,Exit} control")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-5-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index c1c85fd75d42..819cfd926954 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2623,6 +2623,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) &&
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
vmcs12->guest_ia32_perf_global_ctrl))) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
@@ -4333,7 +4334,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
vcpu->arch.pat = vmcs12->host_ia32_pat;
}
- if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)))
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
vmcs12->host_ia32_perf_global_ctrl));
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4496a6f9b45e8cd83343ad86a3984d614e22cf54 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:08 +0000
Subject: [PATCH] KVM: nVMX: Attempt to load PERF_GLOBAL_CTRL on nVMX xfer iff
it exists
Attempt to load PERF_GLOBAL_CTRL during nested VM-Enter/VM-Exit if and
only if the MSR exists (according to the guest vCPU model). KVM has very
misguided handling of VM_{ENTRY,EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL and
attempts to force the nVMX MSR settings to match the vPMU model, i.e. to
hide/expose the control based on whether or not the MSR exists from the
guest's perspective.
KVM's modifications fail to handle the scenario where the vPMU is hidden
from the guest _after_ being exposed to the guest, e.g. by userspace
doing multiple KVM_SET_CPUID2 calls, which is allowed if done before any
KVM_RUN. nested_vmx_pmu_refresh() is called if and only if there's a
recognized vPMU, i.e. KVM will leave the bits in the allow state and then
ultimately reject the MSR load and WARN.
KVM should not force the VMX MSRs in the first place. KVM taking control
of the MSRs was a misguided attempt at mimicking what commit 5f76f6f5ff96
("KVM: nVMX: Do not expose MPX VMX controls when guest MPX disabled",
2018-10-01) did for MPX. However, the MPX commit was a workaround for
another KVM bug and not something that should be imitated (and it should
never been done in the first place).
In other words, KVM's ABI _should_ be that userspace has full control
over the MSRs, at which point triggering the WARN that loading the MSR
must not fail is trivial.
The intent of the WARN is still valid; KVM has consistency checks to
ensure that vmcs12->{guest,host}_ia32_perf_global_ctrl is valid. The
problem is that '0' must be considered a valid value at all times, and so
the simple/obvious solution is to just not actually load the MSR when it
does not exist. It is userspace's responsibility to provide a sane vCPU
model, i.e. KVM is well within its ABI and Intel's VMX architecture to
skip the loads if the MSR does not exist.
Fixes: 03a8871add95 ("KVM: nVMX: Expose load IA32_PERF_GLOBAL_CTRL VM-{Entry,Exit} control")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-5-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index c1c85fd75d42..819cfd926954 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2623,6 +2623,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) &&
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
vmcs12->guest_ia32_perf_global_ctrl))) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
@@ -4333,7 +4334,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
vcpu->arch.pat = vmcs12->host_ia32_pat;
}
- if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)))
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
vmcs12->host_ia32_perf_global_ctrl));
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4496a6f9b45e8cd83343ad86a3984d614e22cf54 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:08 +0000
Subject: [PATCH] KVM: nVMX: Attempt to load PERF_GLOBAL_CTRL on nVMX xfer iff
it exists
Attempt to load PERF_GLOBAL_CTRL during nested VM-Enter/VM-Exit if and
only if the MSR exists (according to the guest vCPU model). KVM has very
misguided handling of VM_{ENTRY,EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL and
attempts to force the nVMX MSR settings to match the vPMU model, i.e. to
hide/expose the control based on whether or not the MSR exists from the
guest's perspective.
KVM's modifications fail to handle the scenario where the vPMU is hidden
from the guest _after_ being exposed to the guest, e.g. by userspace
doing multiple KVM_SET_CPUID2 calls, which is allowed if done before any
KVM_RUN. nested_vmx_pmu_refresh() is called if and only if there's a
recognized vPMU, i.e. KVM will leave the bits in the allow state and then
ultimately reject the MSR load and WARN.
KVM should not force the VMX MSRs in the first place. KVM taking control
of the MSRs was a misguided attempt at mimicking what commit 5f76f6f5ff96
("KVM: nVMX: Do not expose MPX VMX controls when guest MPX disabled",
2018-10-01) did for MPX. However, the MPX commit was a workaround for
another KVM bug and not something that should be imitated (and it should
never been done in the first place).
In other words, KVM's ABI _should_ be that userspace has full control
over the MSRs, at which point triggering the WARN that loading the MSR
must not fail is trivial.
The intent of the WARN is still valid; KVM has consistency checks to
ensure that vmcs12->{guest,host}_ia32_perf_global_ctrl is valid. The
problem is that '0' must be considered a valid value at all times, and so
the simple/obvious solution is to just not actually load the MSR when it
does not exist. It is userspace's responsibility to provide a sane vCPU
model, i.e. KVM is well within its ABI and Intel's VMX architecture to
skip the loads if the MSR does not exist.
Fixes: 03a8871add95 ("KVM: nVMX: Expose load IA32_PERF_GLOBAL_CTRL VM-{Entry,Exit} control")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-5-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index c1c85fd75d42..819cfd926954 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2623,6 +2623,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) &&
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
vmcs12->guest_ia32_perf_global_ctrl))) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
@@ -4333,7 +4334,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
vcpu->arch.pat = vmcs12->host_ia32_pat;
}
- if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)))
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
vmcs12->host_ia32_perf_global_ctrl));
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4496a6f9b45e8cd83343ad86a3984d614e22cf54 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 22 Jul 2022 22:44:08 +0000
Subject: [PATCH] KVM: nVMX: Attempt to load PERF_GLOBAL_CTRL on nVMX xfer iff
it exists
Attempt to load PERF_GLOBAL_CTRL during nested VM-Enter/VM-Exit if and
only if the MSR exists (according to the guest vCPU model). KVM has very
misguided handling of VM_{ENTRY,EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL and
attempts to force the nVMX MSR settings to match the vPMU model, i.e. to
hide/expose the control based on whether or not the MSR exists from the
guest's perspective.
KVM's modifications fail to handle the scenario where the vPMU is hidden
from the guest _after_ being exposed to the guest, e.g. by userspace
doing multiple KVM_SET_CPUID2 calls, which is allowed if done before any
KVM_RUN. nested_vmx_pmu_refresh() is called if and only if there's a
recognized vPMU, i.e. KVM will leave the bits in the allow state and then
ultimately reject the MSR load and WARN.
KVM should not force the VMX MSRs in the first place. KVM taking control
of the MSRs was a misguided attempt at mimicking what commit 5f76f6f5ff96
("KVM: nVMX: Do not expose MPX VMX controls when guest MPX disabled",
2018-10-01) did for MPX. However, the MPX commit was a workaround for
another KVM bug and not something that should be imitated (and it should
never been done in the first place).
In other words, KVM's ABI _should_ be that userspace has full control
over the MSRs, at which point triggering the WARN that loading the MSR
must not fail is trivial.
The intent of the WARN is still valid; KVM has consistency checks to
ensure that vmcs12->{guest,host}_ia32_perf_global_ctrl is valid. The
problem is that '0' must be considered a valid value at all times, and so
the simple/obvious solution is to just not actually load the MSR when it
does not exist. It is userspace's responsibility to provide a sane vCPU
model, i.e. KVM is well within its ABI and Intel's VMX architecture to
skip the loads if the MSR does not exist.
Fixes: 03a8871add95 ("KVM: nVMX: Expose load IA32_PERF_GLOBAL_CTRL VM-{Entry,Exit} control")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220722224409.1336532-5-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index c1c85fd75d42..819cfd926954 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2623,6 +2623,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) &&
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
vmcs12->guest_ia32_perf_global_ctrl))) {
*entry_failure_code = ENTRY_FAIL_DEFAULT;
@@ -4333,7 +4334,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
vcpu->arch.pat = vmcs12->host_ia32_pat;
}
- if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
+ intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)))
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
vmcs12->host_ia32_perf_global_ctrl));
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ca58f3aa53d165afe4ab74c755bc2f6d168617ac Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Tue, 7 Jun 2022 21:35:51 +0000
Subject: [PATCH] KVM: nVMX: Account for KVM reserved CR4 bits in consistency
checks
Check that the guest (L2) and host (L1) CR4 values that would be loaded
by nested VM-Enter and VM-Exit respectively are valid with respect to
KVM's (L0 host) allowed CR4 bits. Failure to check KVM reserved bits
would allow L1 to load an illegal CR4 (or trigger hardware VM-Fail or
failed VM-Entry) by massaging guest CPUID to allow features that are not
supported by KVM. Amusingly, KVM itself is an accomplice in its doom, as
KVM adjusts L1's MSR_IA32_VMX_CR4_FIXED1 to allow L1 to enable bits for
L2 based on L1's CPUID model.
Note, although nested_{guest,host}_cr4_valid() are _currently_ used if
and only if the vCPU is post-VMXON (nested.vmxon == true), that may not
be true in the future, e.g. emulating VMXON has a bug where it doesn't
check the allowed/required CR0/CR4 bits.
Cc: stable(a)vger.kernel.org
Fixes: 3899152ccbf4 ("KVM: nVMX: fix checks on CR{0,4} during virtual VMX operation")
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220607213604.3346000-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index c92cea0b8ccc..129ae4e01f7c 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -281,7 +281,8 @@ static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
- return fixed_bits_valid(val, fixed0, fixed1);
+ return fixed_bits_valid(val, fixed0, fixed1) &&
+ __kvm_is_valid_cr4(vcpu, val);
}
/* No difference in the restrictions on guest and host CR4 in VMX operation. */
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ca58f3aa53d165afe4ab74c755bc2f6d168617ac Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Tue, 7 Jun 2022 21:35:51 +0000
Subject: [PATCH] KVM: nVMX: Account for KVM reserved CR4 bits in consistency
checks
Check that the guest (L2) and host (L1) CR4 values that would be loaded
by nested VM-Enter and VM-Exit respectively are valid with respect to
KVM's (L0 host) allowed CR4 bits. Failure to check KVM reserved bits
would allow L1 to load an illegal CR4 (or trigger hardware VM-Fail or
failed VM-Entry) by massaging guest CPUID to allow features that are not
supported by KVM. Amusingly, KVM itself is an accomplice in its doom, as
KVM adjusts L1's MSR_IA32_VMX_CR4_FIXED1 to allow L1 to enable bits for
L2 based on L1's CPUID model.
Note, although nested_{guest,host}_cr4_valid() are _currently_ used if
and only if the vCPU is post-VMXON (nested.vmxon == true), that may not
be true in the future, e.g. emulating VMXON has a bug where it doesn't
check the allowed/required CR0/CR4 bits.
Cc: stable(a)vger.kernel.org
Fixes: 3899152ccbf4 ("KVM: nVMX: fix checks on CR{0,4} during virtual VMX operation")
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220607213604.3346000-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index c92cea0b8ccc..129ae4e01f7c 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -281,7 +281,8 @@ static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
- return fixed_bits_valid(val, fixed0, fixed1);
+ return fixed_bits_valid(val, fixed0, fixed1) &&
+ __kvm_is_valid_cr4(vcpu, val);
}
/* No difference in the restrictions on guest and host CR4 in VMX operation. */
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ca58f3aa53d165afe4ab74c755bc2f6d168617ac Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Tue, 7 Jun 2022 21:35:51 +0000
Subject: [PATCH] KVM: nVMX: Account for KVM reserved CR4 bits in consistency
checks
Check that the guest (L2) and host (L1) CR4 values that would be loaded
by nested VM-Enter and VM-Exit respectively are valid with respect to
KVM's (L0 host) allowed CR4 bits. Failure to check KVM reserved bits
would allow L1 to load an illegal CR4 (or trigger hardware VM-Fail or
failed VM-Entry) by massaging guest CPUID to allow features that are not
supported by KVM. Amusingly, KVM itself is an accomplice in its doom, as
KVM adjusts L1's MSR_IA32_VMX_CR4_FIXED1 to allow L1 to enable bits for
L2 based on L1's CPUID model.
Note, although nested_{guest,host}_cr4_valid() are _currently_ used if
and only if the vCPU is post-VMXON (nested.vmxon == true), that may not
be true in the future, e.g. emulating VMXON has a bug where it doesn't
check the allowed/required CR0/CR4 bits.
Cc: stable(a)vger.kernel.org
Fixes: 3899152ccbf4 ("KVM: nVMX: fix checks on CR{0,4} during virtual VMX operation")
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220607213604.3346000-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index c92cea0b8ccc..129ae4e01f7c 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -281,7 +281,8 @@ static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
- return fixed_bits_valid(val, fixed0, fixed1);
+ return fixed_bits_valid(val, fixed0, fixed1) &&
+ __kvm_is_valid_cr4(vcpu, val);
}
/* No difference in the restrictions on guest and host CR4 in VMX operation. */
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ca58f3aa53d165afe4ab74c755bc2f6d168617ac Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Tue, 7 Jun 2022 21:35:51 +0000
Subject: [PATCH] KVM: nVMX: Account for KVM reserved CR4 bits in consistency
checks
Check that the guest (L2) and host (L1) CR4 values that would be loaded
by nested VM-Enter and VM-Exit respectively are valid with respect to
KVM's (L0 host) allowed CR4 bits. Failure to check KVM reserved bits
would allow L1 to load an illegal CR4 (or trigger hardware VM-Fail or
failed VM-Entry) by massaging guest CPUID to allow features that are not
supported by KVM. Amusingly, KVM itself is an accomplice in its doom, as
KVM adjusts L1's MSR_IA32_VMX_CR4_FIXED1 to allow L1 to enable bits for
L2 based on L1's CPUID model.
Note, although nested_{guest,host}_cr4_valid() are _currently_ used if
and only if the vCPU is post-VMXON (nested.vmxon == true), that may not
be true in the future, e.g. emulating VMXON has a bug where it doesn't
check the allowed/required CR0/CR4 bits.
Cc: stable(a)vger.kernel.org
Fixes: 3899152ccbf4 ("KVM: nVMX: fix checks on CR{0,4} during virtual VMX operation")
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220607213604.3346000-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index c92cea0b8ccc..129ae4e01f7c 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -281,7 +281,8 @@ static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
- return fixed_bits_valid(val, fixed0, fixed1);
+ return fixed_bits_valid(val, fixed0, fixed1) &&
+ __kvm_is_valid_cr4(vcpu, val);
}
/* No difference in the restrictions on guest and host CR4 in VMX operation. */
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6c6ab524cfae0799e55c82b2c1d61f1af0156f8d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Sat, 23 Jul 2022 01:30:29 +0000
Subject: [PATCH] KVM: x86/mmu: Treat NX as a valid SPTE bit for NPT
Treat the NX bit as valid when using NPT, as KVM will set the NX bit when
the NX huge page mitigation is enabled (mindblowing) and trigger the WARN
that fires on reserved SPTE bits being set.
KVM has required NX support for SVM since commit b26a71a1a5b9 ("KVM: SVM:
Refuse to load kvm_amd if NX support is not available") for exactly this
reason, but apparently it never occurred to anyone to actually test NPT
with the mitigation enabled.
------------[ cut here ]------------
spte = 0x800000018a600ee7, level = 2, rsvd bits = 0x800f0000001fe000
WARNING: CPU: 152 PID: 15966 at arch/x86/kvm/mmu/spte.c:215 make_spte+0x327/0x340 [kvm]
Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 10.48.0 01/27/2022
RIP: 0010:make_spte+0x327/0x340 [kvm]
Call Trace:
<TASK>
tdp_mmu_map_handle_target_level+0xc3/0x230 [kvm]
kvm_tdp_mmu_map+0x343/0x3b0 [kvm]
direct_page_fault+0x1ae/0x2a0 [kvm]
kvm_tdp_page_fault+0x7d/0x90 [kvm]
kvm_mmu_page_fault+0xfb/0x2e0 [kvm]
npf_interception+0x55/0x90 [kvm_amd]
svm_invoke_exit_handler+0x31/0xf0 [kvm_amd]
svm_handle_exit+0xf6/0x1d0 [kvm_amd]
vcpu_enter_guest+0xb6d/0xee0 [kvm]
? kvm_pmu_trigger_event+0x6d/0x230 [kvm]
vcpu_run+0x65/0x2c0 [kvm]
kvm_arch_vcpu_ioctl_run+0x355/0x610 [kvm]
kvm_vcpu_ioctl+0x551/0x610 [kvm]
__se_sys_ioctl+0x77/0xc0
__x64_sys_ioctl+0x1d/0x20
do_syscall_64+0x44/0xa0
entry_SYSCALL_64_after_hwframe+0x46/0xb0
</TASK>
---[ end trace 0000000000000000 ]---
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220723013029.1753623-1-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8e477333a263..3e1317325e1f 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4735,7 +4735,7 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context)
if (boot_cpu_is_amd())
__reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
- context->root_role.level, false,
+ context->root_role.level, true,
boot_cpu_has(X86_FEATURE_GBPAGES),
false, true);
else
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6c6ab524cfae0799e55c82b2c1d61f1af0156f8d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Sat, 23 Jul 2022 01:30:29 +0000
Subject: [PATCH] KVM: x86/mmu: Treat NX as a valid SPTE bit for NPT
Treat the NX bit as valid when using NPT, as KVM will set the NX bit when
the NX huge page mitigation is enabled (mindblowing) and trigger the WARN
that fires on reserved SPTE bits being set.
KVM has required NX support for SVM since commit b26a71a1a5b9 ("KVM: SVM:
Refuse to load kvm_amd if NX support is not available") for exactly this
reason, but apparently it never occurred to anyone to actually test NPT
with the mitigation enabled.
------------[ cut here ]------------
spte = 0x800000018a600ee7, level = 2, rsvd bits = 0x800f0000001fe000
WARNING: CPU: 152 PID: 15966 at arch/x86/kvm/mmu/spte.c:215 make_spte+0x327/0x340 [kvm]
Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 10.48.0 01/27/2022
RIP: 0010:make_spte+0x327/0x340 [kvm]
Call Trace:
<TASK>
tdp_mmu_map_handle_target_level+0xc3/0x230 [kvm]
kvm_tdp_mmu_map+0x343/0x3b0 [kvm]
direct_page_fault+0x1ae/0x2a0 [kvm]
kvm_tdp_page_fault+0x7d/0x90 [kvm]
kvm_mmu_page_fault+0xfb/0x2e0 [kvm]
npf_interception+0x55/0x90 [kvm_amd]
svm_invoke_exit_handler+0x31/0xf0 [kvm_amd]
svm_handle_exit+0xf6/0x1d0 [kvm_amd]
vcpu_enter_guest+0xb6d/0xee0 [kvm]
? kvm_pmu_trigger_event+0x6d/0x230 [kvm]
vcpu_run+0x65/0x2c0 [kvm]
kvm_arch_vcpu_ioctl_run+0x355/0x610 [kvm]
kvm_vcpu_ioctl+0x551/0x610 [kvm]
__se_sys_ioctl+0x77/0xc0
__x64_sys_ioctl+0x1d/0x20
do_syscall_64+0x44/0xa0
entry_SYSCALL_64_after_hwframe+0x46/0xb0
</TASK>
---[ end trace 0000000000000000 ]---
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220723013029.1753623-1-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8e477333a263..3e1317325e1f 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4735,7 +4735,7 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context)
if (boot_cpu_is_amd())
__reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
- context->root_role.level, false,
+ context->root_role.level, true,
boot_cpu_has(X86_FEATURE_GBPAGES),
false, true);
else
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2626206963ace9e8bf92b6eea5ff78dd674c555c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Mon, 11 Jul 2022 23:27:49 +0000
Subject: [PATCH] KVM: x86: Set error code to segment selector on LLDT/LTR
non-canonical #GP
When injecting a #GP on LLDT/LTR due to a non-canonical LDT/TSS base, set
the error code to the selector. Intel SDM's says nothing about the #GP,
but AMD's APM explicitly states that both LLDT and LTR set the error code
to the selector, not zero.
Note, a non-canonical memory operand on LLDT/LTR does generate a #GP(0),
but the KVM code in question is specific to the base from the descriptor.
Fixes: e37a75a13cda ("KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Reviewed-by: Maxim Levitsky <mlevitsk(a)redhat.com>
Link: https://lore.kernel.org/r/20220711232750.1092012-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 09e4b67b881f..bd9e9c5627d0 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1736,8 +1736,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
if (ret != X86EMUL_CONTINUE)
return ret;
if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
- ((u64)base3 << 32), ctxt))
- return emulate_gp(ctxt, 0);
+ ((u64)base3 << 32), ctxt))
+ return emulate_gp(ctxt, err_code);
}
if (seg == VCPU_SREG_TR) {
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7d855c2aff2d511fd60ee2e356134c4fb394799 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Tue, 7 Jun 2022 21:35:52 +0000
Subject: [PATCH] KVM: nVMX: Inject #UD if VMXON is attempted with incompatible
CR0/CR4
Inject a #UD if L1 attempts VMXON with a CR0 or CR4 that is disallowed
per the associated nested VMX MSRs' fixed0/1 settings. KVM cannot rely
on hardware to perform the checks, even for the few checks that have
higher priority than VM-Exit, as (a) KVM may have forced CR0/CR4 bits in
hardware while running the guest, (b) there may incompatible CR0/CR4 bits
that have lower priority than VM-Exit, e.g. CR0.NE, and (c) userspace may
have further restricted the allowed CR0/CR4 values by manipulating the
guest's nested VMX MSRs.
Note, despite a very strong desire to throw shade at Jim, commit
70f3aac964ae ("kvm: nVMX: Remove superfluous VMX instruction fault checks")
is not to blame for the buggy behavior (though the comment...). That
commit only removed the CR0.PE, EFLAGS.VM, and COMPATIBILITY mode checks
(though it did erroneously drop the CPL check, but that has already been
remedied). KVM may force CR0.PE=1, but will do so only when also
forcing EFLAGS.VM=1 to emulate Real Mode, i.e. hardware will still #UD.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216033
Fixes: ec378aeef9df ("KVM: nVMX: Implement VMXON and VMXOFF")
Reported-by: Eric Li <ercli(a)ucdavis.edu>
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220607213604.3346000-4-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bfa366938c49..9c3350545b09 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4952,20 +4952,25 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
| FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
/*
- * The Intel VMX Instruction Reference lists a bunch of bits that are
- * prerequisite to running VMXON, most notably cr4.VMXE must be set to
- * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this).
- * Otherwise, we should fail with #UD. But most faulting conditions
- * have already been checked by hardware, prior to the VM-exit for
- * VMXON. We do test guest cr4.VMXE because processor CR4 always has
- * that bit set to 1 in non-root mode.
+ * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks
+ * that have higher priority than VM-Exit (see Intel SDM's pseudocode
+ * for VMXON), as KVM must load valid CR0/CR4 values into hardware while
+ * running the guest, i.e. KVM needs to check the _guest_ values.
+ *
+ * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and
+ * !COMPATIBILITY modes. KVM may run the guest in VM86 to emulate Real
+ * Mode, but KVM will never take the guest out of those modes.
*/
- if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
+ if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
+ !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
- /* CPL=0 must be checked manually. */
+ /*
+ * CPL=0 and all other checks that are lower priority than VM-Exit must
+ * be checked manually.
+ */
if (vmx_get_cpl(vcpu)) {
kvm_inject_gp(vcpu, 0);
return 1;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7d855c2aff2d511fd60ee2e356134c4fb394799 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Tue, 7 Jun 2022 21:35:52 +0000
Subject: [PATCH] KVM: nVMX: Inject #UD if VMXON is attempted with incompatible
CR0/CR4
Inject a #UD if L1 attempts VMXON with a CR0 or CR4 that is disallowed
per the associated nested VMX MSRs' fixed0/1 settings. KVM cannot rely
on hardware to perform the checks, even for the few checks that have
higher priority than VM-Exit, as (a) KVM may have forced CR0/CR4 bits in
hardware while running the guest, (b) there may incompatible CR0/CR4 bits
that have lower priority than VM-Exit, e.g. CR0.NE, and (c) userspace may
have further restricted the allowed CR0/CR4 values by manipulating the
guest's nested VMX MSRs.
Note, despite a very strong desire to throw shade at Jim, commit
70f3aac964ae ("kvm: nVMX: Remove superfluous VMX instruction fault checks")
is not to blame for the buggy behavior (though the comment...). That
commit only removed the CR0.PE, EFLAGS.VM, and COMPATIBILITY mode checks
(though it did erroneously drop the CPL check, but that has already been
remedied). KVM may force CR0.PE=1, but will do so only when also
forcing EFLAGS.VM=1 to emulate Real Mode, i.e. hardware will still #UD.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216033
Fixes: ec378aeef9df ("KVM: nVMX: Implement VMXON and VMXOFF")
Reported-by: Eric Li <ercli(a)ucdavis.edu>
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220607213604.3346000-4-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bfa366938c49..9c3350545b09 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4952,20 +4952,25 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
| FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
/*
- * The Intel VMX Instruction Reference lists a bunch of bits that are
- * prerequisite to running VMXON, most notably cr4.VMXE must be set to
- * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this).
- * Otherwise, we should fail with #UD. But most faulting conditions
- * have already been checked by hardware, prior to the VM-exit for
- * VMXON. We do test guest cr4.VMXE because processor CR4 always has
- * that bit set to 1 in non-root mode.
+ * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks
+ * that have higher priority than VM-Exit (see Intel SDM's pseudocode
+ * for VMXON), as KVM must load valid CR0/CR4 values into hardware while
+ * running the guest, i.e. KVM needs to check the _guest_ values.
+ *
+ * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and
+ * !COMPATIBILITY modes. KVM may run the guest in VM86 to emulate Real
+ * Mode, but KVM will never take the guest out of those modes.
*/
- if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
+ if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
+ !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
- /* CPL=0 must be checked manually. */
+ /*
+ * CPL=0 and all other checks that are lower priority than VM-Exit must
+ * be checked manually.
+ */
if (vmx_get_cpl(vcpu)) {
kvm_inject_gp(vcpu, 0);
return 1;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7d855c2aff2d511fd60ee2e356134c4fb394799 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Tue, 7 Jun 2022 21:35:52 +0000
Subject: [PATCH] KVM: nVMX: Inject #UD if VMXON is attempted with incompatible
CR0/CR4
Inject a #UD if L1 attempts VMXON with a CR0 or CR4 that is disallowed
per the associated nested VMX MSRs' fixed0/1 settings. KVM cannot rely
on hardware to perform the checks, even for the few checks that have
higher priority than VM-Exit, as (a) KVM may have forced CR0/CR4 bits in
hardware while running the guest, (b) there may incompatible CR0/CR4 bits
that have lower priority than VM-Exit, e.g. CR0.NE, and (c) userspace may
have further restricted the allowed CR0/CR4 values by manipulating the
guest's nested VMX MSRs.
Note, despite a very strong desire to throw shade at Jim, commit
70f3aac964ae ("kvm: nVMX: Remove superfluous VMX instruction fault checks")
is not to blame for the buggy behavior (though the comment...). That
commit only removed the CR0.PE, EFLAGS.VM, and COMPATIBILITY mode checks
(though it did erroneously drop the CPL check, but that has already been
remedied). KVM may force CR0.PE=1, but will do so only when also
forcing EFLAGS.VM=1 to emulate Real Mode, i.e. hardware will still #UD.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216033
Fixes: ec378aeef9df ("KVM: nVMX: Implement VMXON and VMXOFF")
Reported-by: Eric Li <ercli(a)ucdavis.edu>
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220607213604.3346000-4-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bfa366938c49..9c3350545b09 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4952,20 +4952,25 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
| FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
/*
- * The Intel VMX Instruction Reference lists a bunch of bits that are
- * prerequisite to running VMXON, most notably cr4.VMXE must be set to
- * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this).
- * Otherwise, we should fail with #UD. But most faulting conditions
- * have already been checked by hardware, prior to the VM-exit for
- * VMXON. We do test guest cr4.VMXE because processor CR4 always has
- * that bit set to 1 in non-root mode.
+ * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks
+ * that have higher priority than VM-Exit (see Intel SDM's pseudocode
+ * for VMXON), as KVM must load valid CR0/CR4 values into hardware while
+ * running the guest, i.e. KVM needs to check the _guest_ values.
+ *
+ * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and
+ * !COMPATIBILITY modes. KVM may run the guest in VM86 to emulate Real
+ * Mode, but KVM will never take the guest out of those modes.
*/
- if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
+ if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
+ !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
- /* CPL=0 must be checked manually. */
+ /*
+ * CPL=0 and all other checks that are lower priority than VM-Exit must
+ * be checked manually.
+ */
if (vmx_get_cpl(vcpu)) {
kvm_inject_gp(vcpu, 0);
return 1;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7d855c2aff2d511fd60ee2e356134c4fb394799 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Tue, 7 Jun 2022 21:35:52 +0000
Subject: [PATCH] KVM: nVMX: Inject #UD if VMXON is attempted with incompatible
CR0/CR4
Inject a #UD if L1 attempts VMXON with a CR0 or CR4 that is disallowed
per the associated nested VMX MSRs' fixed0/1 settings. KVM cannot rely
on hardware to perform the checks, even for the few checks that have
higher priority than VM-Exit, as (a) KVM may have forced CR0/CR4 bits in
hardware while running the guest, (b) there may incompatible CR0/CR4 bits
that have lower priority than VM-Exit, e.g. CR0.NE, and (c) userspace may
have further restricted the allowed CR0/CR4 values by manipulating the
guest's nested VMX MSRs.
Note, despite a very strong desire to throw shade at Jim, commit
70f3aac964ae ("kvm: nVMX: Remove superfluous VMX instruction fault checks")
is not to blame for the buggy behavior (though the comment...). That
commit only removed the CR0.PE, EFLAGS.VM, and COMPATIBILITY mode checks
(though it did erroneously drop the CPL check, but that has already been
remedied). KVM may force CR0.PE=1, but will do so only when also
forcing EFLAGS.VM=1 to emulate Real Mode, i.e. hardware will still #UD.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216033
Fixes: ec378aeef9df ("KVM: nVMX: Implement VMXON and VMXOFF")
Reported-by: Eric Li <ercli(a)ucdavis.edu>
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220607213604.3346000-4-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bfa366938c49..9c3350545b09 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4952,20 +4952,25 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
| FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
/*
- * The Intel VMX Instruction Reference lists a bunch of bits that are
- * prerequisite to running VMXON, most notably cr4.VMXE must be set to
- * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this).
- * Otherwise, we should fail with #UD. But most faulting conditions
- * have already been checked by hardware, prior to the VM-exit for
- * VMXON. We do test guest cr4.VMXE because processor CR4 always has
- * that bit set to 1 in non-root mode.
+ * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks
+ * that have higher priority than VM-Exit (see Intel SDM's pseudocode
+ * for VMXON), as KVM must load valid CR0/CR4 values into hardware while
+ * running the guest, i.e. KVM needs to check the _guest_ values.
+ *
+ * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and
+ * !COMPATIBILITY modes. KVM may run the guest in VM86 to emulate Real
+ * Mode, but KVM will never take the guest out of those modes.
*/
- if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
+ if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
+ !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
- /* CPL=0 must be checked manually. */
+ /*
+ * CPL=0 and all other checks that are lower priority than VM-Exit must
+ * be checked manually.
+ */
if (vmx_get_cpl(vcpu)) {
kvm_inject_gp(vcpu, 0);
return 1;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c7d855c2aff2d511fd60ee2e356134c4fb394799 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Tue, 7 Jun 2022 21:35:52 +0000
Subject: [PATCH] KVM: nVMX: Inject #UD if VMXON is attempted with incompatible
CR0/CR4
Inject a #UD if L1 attempts VMXON with a CR0 or CR4 that is disallowed
per the associated nested VMX MSRs' fixed0/1 settings. KVM cannot rely
on hardware to perform the checks, even for the few checks that have
higher priority than VM-Exit, as (a) KVM may have forced CR0/CR4 bits in
hardware while running the guest, (b) there may incompatible CR0/CR4 bits
that have lower priority than VM-Exit, e.g. CR0.NE, and (c) userspace may
have further restricted the allowed CR0/CR4 values by manipulating the
guest's nested VMX MSRs.
Note, despite a very strong desire to throw shade at Jim, commit
70f3aac964ae ("kvm: nVMX: Remove superfluous VMX instruction fault checks")
is not to blame for the buggy behavior (though the comment...). That
commit only removed the CR0.PE, EFLAGS.VM, and COMPATIBILITY mode checks
(though it did erroneously drop the CPL check, but that has already been
remedied). KVM may force CR0.PE=1, but will do so only when also
forcing EFLAGS.VM=1 to emulate Real Mode, i.e. hardware will still #UD.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=216033
Fixes: ec378aeef9df ("KVM: nVMX: Implement VMXON and VMXOFF")
Reported-by: Eric Li <ercli(a)ucdavis.edu>
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220607213604.3346000-4-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bfa366938c49..9c3350545b09 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4952,20 +4952,25 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
| FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
/*
- * The Intel VMX Instruction Reference lists a bunch of bits that are
- * prerequisite to running VMXON, most notably cr4.VMXE must be set to
- * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this).
- * Otherwise, we should fail with #UD. But most faulting conditions
- * have already been checked by hardware, prior to the VM-exit for
- * VMXON. We do test guest cr4.VMXE because processor CR4 always has
- * that bit set to 1 in non-root mode.
+ * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks
+ * that have higher priority than VM-Exit (see Intel SDM's pseudocode
+ * for VMXON), as KVM must load valid CR0/CR4 values into hardware while
+ * running the guest, i.e. KVM needs to check the _guest_ values.
+ *
+ * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and
+ * !COMPATIBILITY modes. KVM may run the guest in VM86 to emulate Real
+ * Mode, but KVM will never take the guest out of those modes.
*/
- if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
+ if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
+ !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
- /* CPL=0 must be checked manually. */
+ /*
+ * CPL=0 and all other checks that are lower priority than VM-Exit must
+ * be checked manually.
+ */
if (vmx_get_cpl(vcpu)) {
kvm_inject_gp(vcpu, 0);
return 1;
The patch titled
Subject: mm/hugetlb: support write-faults in shared mappings
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-hugetlb-support-write-faults-in-shared-mappings.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: mm/hugetlb: support write-faults in shared mappings
Date: Thu, 11 Aug 2022 12:34:35 +0200
If we ever get a write-fault on a write-protected page in a shared
mapping, we'd be in trouble (again). Instead, we can simply map the page
writable.
And in fact, there is even a way right now to trigger that code via
uffd-wp ever since we stared to support it for shmem in 5.19:
--------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <linux/userfaultfd.h>
#define HUGETLB_SIZE (2 * 1024 * 1024u)
static char *map;
int uffd;
static int temp_setup_uffd(void)
{
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
struct uffdio_writeprotect uffd_writeprotect;
struct uffdio_range uffd_range;
uffd = syscall(__NR_userfaultfd,
O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
if (uffd < 0) {
fprintf(stderr, "syscall() failed: %d\n", errno);
return -errno;
}
uffdio_api.api = UFFD_API;
uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
fprintf(stderr, "UFFDIO_API failed: %d\n", errno);
return -errno;
}
if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
fprintf(stderr, "UFFD_FEATURE_WRITEPROTECT missing\n");
return -ENOSYS;
}
/* Register UFFD-WP */
uffdio_register.range.start = (unsigned long) map;
uffdio_register.range.len = HUGETLB_SIZE;
uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) < 0) {
fprintf(stderr, "UFFDIO_REGISTER failed: %d\n", errno);
return -errno;
}
/* Writeprotect a single page. */
uffd_writeprotect.range.start = (unsigned long) map;
uffd_writeprotect.range.len = HUGETLB_SIZE;
uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP;
if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
fprintf(stderr, "UFFDIO_WRITEPROTECT failed: %d\n", errno);
return -errno;
}
/* Unregister UFFD-WP without prior writeunprotection. */
uffd_range.start = (unsigned long) map;
uffd_range.len = HUGETLB_SIZE;
if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_range)) {
fprintf(stderr, "UFFDIO_UNREGISTER failed: %d\n", errno);
return -errno;
}
return 0;
}
int main(int argc, char **argv)
{
int fd;
fd = open("/dev/hugepages/tmp", O_RDWR | O_CREAT);
if (!fd) {
fprintf(stderr, "open() failed\n");
return -errno;
}
if (ftruncate(fd, HUGETLB_SIZE)) {
fprintf(stderr, "ftruncate() failed\n");
return -errno;
}
map = mmap(NULL, HUGETLB_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
fprintf(stderr, "mmap() failed\n");
return -errno;
}
*map = 0;
if (temp_setup_uffd())
return 1;
*map = 0;
return 0;
}
--------------------------------------------------------------------------
Above test fails with SIGBUS when there is only a single free hugetlb page.
# echo 1 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
# ./test
Bus error (core dumped)
And worse, with sufficient free hugetlb pages it will map an anonymous page
into a shared mapping, for example, messing up accounting during unmap
and breaking MAP_SHARED semantics:
# echo 2 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
# ./test
# cat /proc/meminfo | grep HugePages_
HugePages_Total: 2
HugePages_Free: 1
HugePages_Rsvd: 18446744073709551615
HugePages_Surp: 0
Reason is that uffd-wp doesn't clear the uffd-wp PTE bit when
unregistering and consequently keeps the PTE writeprotected. Reason for
this is to avoid the additional overhead when unregistering. Note that
this is the case also for !hugetlb and that we will end up with writable
PTEs that still have the uffd-wp PTE bit set once we return from
hugetlb_wp(). I'm not touching the uffd-wp PTE bit for now, because it
seems to be a generic thing -- wp_page_reuse() also doesn't clear it.
VM_MAYSHARE handling in hugetlb_fault() for FAULT_FLAG_WRITE indicates
that MAP_SHARED handling was at least envisioned, but could never have
worked as expected.
While at it, make sure that we never end up in hugetlb_wp() on write
faults without VM_WRITE, because we don't support maybe_mkwrite()
semantics as commonly used in the !hugetlb case -- for example, in
wp_page_reuse().
Note that there is no need to do any kind of reservation in
hugetlb_fault() in this case ... because we already have a hugetlb page
mapped R/O that we will simply map writable and we are not dealing with
COW/unsharing.
Link: https://lkml.kernel.org/r/20220811103435.188481-3-david@redhat.com
Fixes: b1f9e876862d ("mm/uffd: enable write protection for shmem & hugetlbfs")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: <stable(a)vger.kernel.org> [5.19]
Cc: Bjorn Helgaas <bhelgaas(a)google.com>
Cc: Cyrill Gorcunov <gorcunov(a)openvz.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Jamie Liu <jamieliu(a)google.com>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Muchun Song <songmuchun(a)bytedance.com>
Cc: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: Pavel Emelyanov <xemul(a)parallels.com>
Cc: Peter Feiner <pfeiner(a)google.com>
Cc: Peter Xu <peterx(a)redhat.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 26 +++++++++++++++++++-------
1 file changed, 19 insertions(+), 7 deletions(-)
--- a/mm/hugetlb.c~mm-hugetlb-support-write-faults-in-shared-mappings
+++ a/mm/hugetlb.c
@@ -5241,6 +5241,21 @@ static vm_fault_t hugetlb_wp(struct mm_s
VM_BUG_ON(unshare && (flags & FOLL_WRITE));
VM_BUG_ON(!unshare && !(flags & FOLL_WRITE));
+ /*
+ * hugetlb does not support FOLL_FORCE-style write faults that keep the
+ * PTE mapped R/O such as maybe_mkwrite() would do.
+ */
+ if (WARN_ON_ONCE(!unshare && !(vma->vm_flags & VM_WRITE)))
+ return VM_FAULT_SIGSEGV;
+
+ /* Let's take out MAP_SHARED mappings first. */
+ if (vma->vm_flags & VM_MAYSHARE) {
+ if (unlikely(unshare))
+ return 0;
+ set_huge_ptep_writable(vma, haddr, ptep);
+ return 0;
+ }
+
pte = huge_ptep_get(ptep);
old_page = pte_page(pte);
@@ -5781,12 +5796,11 @@ vm_fault_t hugetlb_fault(struct mm_struc
* If we are going to COW/unshare the mapping later, we examine the
* pending reservations for this page now. This will ensure that any
* allocations necessary to record that reservation occur outside the
- * spinlock. For private mappings, we also lookup the pagecache
- * page now as it is used to determine if a reservation has been
- * consumed.
+ * spinlock. Also lookup the pagecache page now as it is used to
+ * determine if a reservation has been consumed.
*/
if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
- !huge_pte_write(entry)) {
+ !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(entry)) {
if (vma_needs_reservation(h, vma, haddr) < 0) {
ret = VM_FAULT_OOM;
goto out_mutex;
@@ -5794,9 +5808,7 @@ vm_fault_t hugetlb_fault(struct mm_struc
/* Just decrements count, does not deallocate */
vma_end_reservation(h, vma, haddr);
- if (!(vma->vm_flags & VM_MAYSHARE))
- pagecache_page = hugetlbfs_pagecache_page(h,
- vma, haddr);
+ pagecache_page = hugetlbfs_pagecache_page(h, vma, haddr);
}
ptl = huge_pte_lock(h, mm, ptep);
_
Patches currently in -mm which might be from david(a)redhat.com are
mm-gup-fix-foll_force-cow-security-issue-and-remove-foll_cow.patch
mm-hugetlb-fix-hugetlb-not-supporting-softdirty-tracking.patch
mm-hugetlb-support-write-faults-in-shared-mappings.patch
The patch titled
Subject: mm/hugetlb: fix hugetlb not supporting softdirty tracking
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-hugetlb-fix-hugetlb-not-supporting-softdirty-tracking.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: mm/hugetlb: fix hugetlb not supporting softdirty tracking
Date: Thu, 11 Aug 2022 12:34:34 +0200
Patch series "mm/hugetlb: fix write-fault handling for shared mappings", v2.
I observed that hugetlb does not support/expect write-faults in shared
mappings that would have to map the R/O-mapped page writable -- and I
found two case where we could currently get such faults and would
erroneously map an anon page into a shared mapping.
Reproducers part of the patches.
I propose to backport both fixes to stable trees. The first fix needs a
small adjustment.
This patch (of 2):
Staring at hugetlb_wp(), one might wonder where all the logic for shared
mappings is when stumbling over a write-protected page in a shared
mapping. In fact, there is none, and so far we thought we could get away
with that because e.g., mprotect() should always do the right thing and
map all pages directly writable.
Looks like we were wrong:
--------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/mman.h>
#define HUGETLB_SIZE (2 * 1024 * 1024u)
static void clear_softdirty(void)
{
int fd = open("/proc/self/clear_refs", O_WRONLY);
const char *ctrl = "4";
int ret;
if (fd < 0) {
fprintf(stderr, "open(clear_refs) failed\n");
exit(1);
}
ret = write(fd, ctrl, strlen(ctrl));
if (ret != strlen(ctrl)) {
fprintf(stderr, "write(clear_refs) failed\n");
exit(1);
}
close(fd);
}
int main(int argc, char **argv)
{
char *map;
int fd;
fd = open("/dev/hugepages/tmp", O_RDWR | O_CREAT);
if (!fd) {
fprintf(stderr, "open() failed\n");
return -errno;
}
if (ftruncate(fd, HUGETLB_SIZE)) {
fprintf(stderr, "ftruncate() failed\n");
return -errno;
}
map = mmap(NULL, HUGETLB_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
fprintf(stderr, "mmap() failed\n");
return -errno;
}
*map = 0;
if (mprotect(map, HUGETLB_SIZE, PROT_READ)) {
fprintf(stderr, "mmprotect() failed\n");
return -errno;
}
clear_softdirty();
if (mprotect(map, HUGETLB_SIZE, PROT_READ|PROT_WRITE)) {
fprintf(stderr, "mmprotect() failed\n");
return -errno;
}
*map = 0;
return 0;
}
--------------------------------------------------------------------------
Above test fails with SIGBUS when there is only a single free hugetlb page.
# echo 1 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
# ./test
Bus error (core dumped)
And worse, with sufficient free hugetlb pages it will map an anonymous page
into a shared mapping, for example, messing up accounting during unmap
and breaking MAP_SHARED semantics:
# echo 2 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
# ./test
# cat /proc/meminfo | grep HugePages_
HugePages_Total: 2
HugePages_Free: 1
HugePages_Rsvd: 18446744073709551615
HugePages_Surp: 0
Reason in this particular case is that vma_wants_writenotify() will
return "true", removing VM_SHARED in vma_set_page_prot() to map pages
write-protected. Let's teach vma_wants_writenotify() that hugetlb does not
support softdirty tracking.
Link: https://lkml.kernel.org/r/20220811103435.188481-1-david@redhat.com
Link: https://lkml.kernel.org/r/20220811103435.188481-2-david@redhat.com
Fixes: 64e455079e1b ("mm: softdirty: enable write notifications on VMAs after VM_SOFTDIRTY cleared")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Peter Feiner <pfeiner(a)google.com>
Cc: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: Cyrill Gorcunov <gorcunov(a)openvz.org>
Cc: Pavel Emelyanov <xemul(a)parallels.com>
Cc: Jamie Liu <jamieliu(a)google.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Naoya Horiguchi <n-horiguchi(a)ah.jp.nec.com>
Cc: Bjorn Helgaas <bhelgaas(a)google.com>
Cc: Muchun Song <songmuchun(a)bytedance.com>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: <stable(a)vger.kernel.org> [3.18+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mmap.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
--- a/mm/mmap.c~mm-hugetlb-fix-hugetlb-not-supporting-softdirty-tracking
+++ a/mm/mmap.c
@@ -1646,8 +1646,11 @@ int vma_wants_writenotify(struct vm_area
pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
return 0;
- /* Do we need to track softdirty? */
- if (vma_soft_dirty_enabled(vma))
+ /*
+ * Do we need to track softdirty? hugetlb does not support softdirty
+ * tracking yet.
+ */
+ if (vma_soft_dirty_enabled(vma) && !is_vm_hugetlb_page(vma))
return 1;
/* Specialty mapping? */
_
Patches currently in -mm which might be from david(a)redhat.com are
mm-gup-fix-foll_force-cow-security-issue-and-remove-foll_cow.patch
mm-hugetlb-fix-hugetlb-not-supporting-softdirty-tracking.patch
mm-hugetlb-support-write-faults-in-shared-mappings.patch
The patch titled
Subject: mm/uffd: reset write protection when unregister with wp-mode
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-uffd-reset-write-protection-when-unregister-with-wp-mode.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Peter Xu <peterx(a)redhat.com>
Subject: mm/uffd: reset write protection when unregister with wp-mode
Date: Thu, 11 Aug 2022 16:13:40 -0400
The motivation of this patch comes from a recent report and patchfix from
David Hildenbrand on hugetlb shared handling of wr-protected page [1].
With the reproducer provided in commit message of [1], one can leverage
the uffd-wp lazy-reset of ptes to trigger a hugetlb issue which can affect
not only the attacker process, but also the whole system.
The lazy-reset mechanism of uffd-wp was used to make unregister faster,
meanwhile it has an assumption that any leftover pgtable entries should
only affect the process on its own, so not only the user should be aware
of anything it does, but also it should not affect outside of the process.
But it seems that this is not true, and it can also be utilized to make
some exploit easier.
So far there's no clue showing that the lazy-reset is important to any
userfaultfd users because normally the unregister will only happen once
for a specific range of memory of the lifecycle of the process.
Considering all above, what this patch proposes is to do explicit pte
resets when unregister an uffd region with wr-protect mode enabled.
It should be the same as calling ioctl(UFFDIO_WRITEPROTECT, wp=false)
right before ioctl(UFFDIO_UNREGISTER) for the user. So potentially it'll
make the unregister slower. From that pov it's a very slight abi change,
but hopefully nothing should break with this change either.
Regarding to the change itself - core of uffd write [un]protect operation
is moved into a separate function (uffd_wp_range()) and it is reused in
the unregister code path.
Note that the new function will not check for anything, e.g. ranges or
memory types, because they should have been checked during the previous
UFFDIO_REGISTER or it should have failed already. It also doesn't check
mmap_changing because we're with mmap write lock held anyway.
I added a Fixes upon introducing of uffd-wp shmem+hugetlbfs because that's
the only issue reported so far and that's the commit David's reproducer
will start working (v5.19+). But the whole idea actually applies to not
only file memories but also anonymous. It's just that we don't need to
fix anonymous prior to v5.19- because there's no known way to exploit.
IOW, this patch can also fix the issue reported in [1] as the patch 2 does.
[1] https://lore.kernel.org/all/20220811103435.188481-3-david@redhat.com/
Link: https://lkml.kernel.org/r/20220811201340.39342-1-peterx@redhat.com
Fixes: b1f9e876862d ("mm/uffd: enable write protection for shmem & hugetlbfs")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Mike Rapoport <rppt(a)linux.vnet.ibm.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Nadav Amit <nadav.amit(a)gmail.com>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/userfaultfd.c | 4 ++++
include/linux/userfaultfd_k.h | 2 ++
mm/userfaultfd.c | 29 ++++++++++++++++++-----------
3 files changed, 24 insertions(+), 11 deletions(-)
--- a/fs/userfaultfd.c~mm-uffd-reset-write-protection-when-unregister-with-wp-mode
+++ a/fs/userfaultfd.c
@@ -1601,6 +1601,10 @@ static int userfaultfd_unregister(struct
wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range);
}
+ /* Reset ptes for the whole vma range if wr-protected */
+ if (userfaultfd_wp(vma))
+ uffd_wp_range(mm, vma, start, vma_end - start, false);
+
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
--- a/include/linux/userfaultfd_k.h~mm-uffd-reset-write-protection-when-unregister-with-wp-mode
+++ a/include/linux/userfaultfd_k.h
@@ -73,6 +73,8 @@ extern ssize_t mcopy_continue(struct mm_
extern int mwriteprotect_range(struct mm_struct *dst_mm,
unsigned long start, unsigned long len,
bool enable_wp, atomic_t *mmap_changing);
+extern void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *vma,
+ unsigned long start, unsigned long len, bool enable_wp);
/* mm helpers */
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
--- a/mm/userfaultfd.c~mm-uffd-reset-write-protection-when-unregister-with-wp-mode
+++ a/mm/userfaultfd.c
@@ -703,14 +703,29 @@ ssize_t mcopy_continue(struct mm_struct
mmap_changing, 0);
}
+void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma,
+ unsigned long start, unsigned long len, bool enable_wp)
+{
+ struct mmu_gather tlb;
+ pgprot_t newprot;
+
+ if (enable_wp)
+ newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
+ else
+ newprot = vm_get_page_prot(dst_vma->vm_flags);
+
+ tlb_gather_mmu(&tlb, dst_mm);
+ change_protection(&tlb, dst_vma, start, start + len, newprot,
+ enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
+ tlb_finish_mmu(&tlb);
+}
+
int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, bool enable_wp,
atomic_t *mmap_changing)
{
struct vm_area_struct *dst_vma;
unsigned long page_mask;
- struct mmu_gather tlb;
- pgprot_t newprot;
int err;
/*
@@ -750,15 +765,7 @@ int mwriteprotect_range(struct mm_struct
goto out_unlock;
}
- if (enable_wp)
- newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
- else
- newprot = vm_get_page_prot(dst_vma->vm_flags);
-
- tlb_gather_mmu(&tlb, dst_mm);
- change_protection(&tlb, dst_vma, start, start + len, newprot,
- enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
- tlb_finish_mmu(&tlb);
+ uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp);
err = 0;
out_unlock:
_
Patches currently in -mm which might be from peterx(a)redhat.com are
mm-smaps-dont-access-young-dirty-bit-if-pte-unpresent.patch
mm-uffd-reset-write-protection-when-unregister-with-wp-mode.patch
Staring at hugetlb_wp(), one might wonder where all the logic for shared
mappings is when stumbling over a write-protected page in a shared
mapping. In fact, there is none, and so far we thought we could get
away with that because e.g., mprotect() should always do the right thing
and map all pages directly writable.
Looks like we were wrong:
--------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/mman.h>
#define HUGETLB_SIZE (2 * 1024 * 1024u)
static void clear_softdirty(void)
{
int fd = open("/proc/self/clear_refs", O_WRONLY);
const char *ctrl = "4";
int ret;
if (fd < 0) {
fprintf(stderr, "open(clear_refs) failed\n");
exit(1);
}
ret = write(fd, ctrl, strlen(ctrl));
if (ret != strlen(ctrl)) {
fprintf(stderr, "write(clear_refs) failed\n");
exit(1);
}
close(fd);
}
int main(int argc, char **argv)
{
char *map;
int fd;
fd = open("/dev/hugepages/tmp", O_RDWR | O_CREAT);
if (!fd) {
fprintf(stderr, "open() failed\n");
return -errno;
}
if (ftruncate(fd, HUGETLB_SIZE)) {
fprintf(stderr, "ftruncate() failed\n");
return -errno;
}
map = mmap(NULL, HUGETLB_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (map == MAP_FAILED) {
fprintf(stderr, "mmap() failed\n");
return -errno;
}
*map = 0;
if (mprotect(map, HUGETLB_SIZE, PROT_READ)) {
fprintf(stderr, "mmprotect() failed\n");
return -errno;
}
clear_softdirty();
if (mprotect(map, HUGETLB_SIZE, PROT_READ|PROT_WRITE)) {
fprintf(stderr, "mmprotect() failed\n");
return -errno;
}
*map = 0;
return 0;
}
--------------------------------------------------------------------------
Above test fails with SIGBUS when there is only a single free hugetlb page.
# echo 1 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
# ./test
Bus error (core dumped)
And worse, with sufficient free hugetlb pages it will map an anonymous page
into a shared mapping, for example, messing up accounting during unmap
and breaking MAP_SHARED semantics:
# echo 2 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
# ./test
# cat /proc/meminfo | grep HugePages_
HugePages_Total: 2
HugePages_Free: 1
HugePages_Rsvd: 18446744073709551615
HugePages_Surp: 0
Reason in this particular case is that vma_wants_writenotify() will
return "true", removing VM_SHARED in vma_set_page_prot() to map pages
write-protected. Let's teach vma_wants_writenotify() that hugetlb does not
support softdirty tracking.
Fixes: 64e455079e1b ("mm: softdirty: enable write notifications on VMAs after VM_SOFTDIRTY cleared")
Cc: <stable(a)vger.kernel.org> # v3.18+
Signed-off-by: David Hildenbrand <david(a)redhat.com>
---
mm/mmap.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c
index c035020d0c89..9d780f415be3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1646,8 +1646,11 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
return 0;
- /* Do we need to track softdirty? */
- if (vma_soft_dirty_enabled(vma))
+ /*
+ * Do we need to track softdirty? hugetlb does not support softdirty
+ * tracking yet.
+ */
+ if (vma_soft_dirty_enabled(vma) && !is_vm_hugetlb_page(vma))
return 1;
/* Specialty mapping? */
--
2.35.3
The patch below does not apply to the `gum-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
The patch below does not apply to the 5.18-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
The patch below does not apply to the 5.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
The patch below does not apply to the see-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
The patch below does not apply to the options-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
The patch below does not apply to the provided,-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
The patch below does not apply to the no-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason(a)zx2c4.com>
Date: Thu, 28 Jul 2022 00:32:18 +1000
Subject: [PATCH] powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
The existing logic in KVM to support guests calling H_RANDOM only works
on Power8, because it looks for an RNG in the device tree, but on Power9
we just use darn.
In addition the existing code needs to work in real mode, so we have the
special cased powernv_get_random_real_mode() to deal with that.
Instead just have KVM call ppc_md.get_random_seed(), and do the real
mode check inside of there, that way we use whatever RNG is available,
including darn on Power9.
Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
Cc: stable(a)vger.kernel.org # v4.1+
Signed-off-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Sachin Sant <sachinp(a)linux.ibm.com>
[mpe: Rebase on previous commit, update change log appropriately]
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index 9a53e29680f4..258174304904 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
-int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
-int powernv_get_random_real_mode(unsigned long *v);
-#else
-static inline int powernv_hwrng_present(void) { return 0; }
-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 88a8f6473c4e..3abaef5f9ac2 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -19,7 +19,7 @@
#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/archrandom.h>
+#include <asm/machdep.h>
#include <asm/xics.h>
#include <asm/xive.h>
#include <asm/dbell.h>
@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
- return powernv_hwrng_present();
+ return ppc_md.get_random_seed != NULL;
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
- if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 2287c9cd0cd5..d19305292e1e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -29,15 +29,6 @@ struct powernv_rng {
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-int powernv_hwrng_present(void)
-{
- struct powernv_rng *rng;
-
- rng = get_cpu_var(powernv_rng);
- put_cpu_var(rng);
- return rng != NULL;
-}
-
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
-int powernv_get_random_real_mode(unsigned long *v)
-{
- struct powernv_rng *rng;
-
- rng = raw_cpu_read(powernv_rng);
- if (!rng)
- return 0;
-
- *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
-
- return 1;
-}
-
static int powernv_get_random_darn(unsigned long *v)
{
unsigned long val;
@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
- rng = get_cpu_var(powernv_rng);
-
- *v = rng_whiten(rng, in_be64(rng->regs));
-
- put_cpu_var(rng);
-
+ if (mfmsr() & MSR_DR) {
+ rng = get_cpu_var(powernv_rng);
+ *v = rng_whiten(rng, in_be64(rng->regs));
+ put_cpu_var(rng);
+ } else {
+ rng = raw_cpu_read(powernv_rng);
+ *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ }
return 1;
}
EXPORT_SYMBOL_GPL(powernv_get_random_long);
From: Javier Martinez Canillas <javierm(a)redhat.com>
[ Upstream commit 3367aa7d74d240261de2543ddb35531ccad9d884 ]
Drivers that want to remove registered conflicting framebuffers prior to
register their own framebuffer, call to remove_conflicting_framebuffers().
This function takes the registration_lock mutex, to prevent a race when
drivers register framebuffer devices. But if a conflicting framebuffer
device is found, the underlaying platform device is unregistered and this
will lead to the platform driver .remove callback to be called. Which in
turn will call to unregister_framebuffer() that takes the same lock.
To prevent this, a struct fb_info.forced_out field was used as indication
to unregister_framebuffer() whether the mutex has to be grabbed or not.
But this could be unsafe, since the fbdev core is making assumptions about
what drivers may or may not do in their .remove callbacks. Allowing to run
these callbacks with the registration_lock held can cause deadlocks, since
the fbdev core has no control over what drivers do in their removal path.
A better solution is to drop the lock before platform_device_unregister(),
so unregister_framebuffer() can take it when called from the fbdev driver.
The lock is acquired again after the device has been unregistered and at
this point the removal loop can be restarted.
Since the conflicting framebuffer device has already been removed, the
loop would just finish when no more conflicting framebuffers are found.
Suggested-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Signed-off-by: Javier Martinez Canillas <javierm(a)redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20220511113039.1252432-1-javi…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/video/fbdev/core/fbmem.c | 22 +++++++++++++++-------
include/linux/fb.h | 1 -
2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 7ee6eb2fa715..02b0cf2cfafe 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1568,6 +1568,7 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
{
int i;
+restart_removal:
/* check all firmware fbs and kick off if the base addr overlaps */
for_each_registered_fb(i) {
struct apertures_struct *gen_aper;
@@ -1600,12 +1601,23 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
pr_warn("fb%d: no device set\n", i);
do_unregister_framebuffer(registered_fb[i]);
} else if (dev_is_platform(device)) {
- registered_fb[i]->forced_out = true;
+ /*
+ * Drop the lock because if the device is unregistered, its
+ * driver will call to unregister_framebuffer(), that takes
+ * this lock.
+ */
+ mutex_unlock(®istration_lock);
platform_device_unregister(to_platform_device(device));
+ mutex_lock(®istration_lock);
} else {
pr_warn("fb%d: cannot remove device\n", i);
do_unregister_framebuffer(registered_fb[i]);
}
+ /*
+ * Restart the removal loop now that the device has been
+ * unregistered and its associated framebuffer gone.
+ */
+ goto restart_removal;
}
}
}
@@ -1876,13 +1888,9 @@ EXPORT_SYMBOL(register_framebuffer);
void
unregister_framebuffer(struct fb_info *fb_info)
{
- bool forced_out = fb_info->forced_out;
-
- if (!forced_out)
- mutex_lock(®istration_lock);
+ mutex_lock(®istration_lock);
do_unregister_framebuffer(fb_info);
- if (!forced_out)
- mutex_unlock(®istration_lock);
+ mutex_unlock(®istration_lock);
}
EXPORT_SYMBOL(unregister_framebuffer);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 2892145468c9..07fcd0e56682 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -511,7 +511,6 @@ struct fb_info {
} *apertures;
bool skip_vt_switch; /* no VT switch on suspend/resume required */
- bool forced_out; /* set when being removed by another driver */
};
static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
--
2.35.1
From: Javier Martinez Canillas <javierm(a)redhat.com>
[ Upstream commit 3367aa7d74d240261de2543ddb35531ccad9d884 ]
Drivers that want to remove registered conflicting framebuffers prior to
register their own framebuffer, call to remove_conflicting_framebuffers().
This function takes the registration_lock mutex, to prevent a race when
drivers register framebuffer devices. But if a conflicting framebuffer
device is found, the underlaying platform device is unregistered and this
will lead to the platform driver .remove callback to be called. Which in
turn will call to unregister_framebuffer() that takes the same lock.
To prevent this, a struct fb_info.forced_out field was used as indication
to unregister_framebuffer() whether the mutex has to be grabbed or not.
But this could be unsafe, since the fbdev core is making assumptions about
what drivers may or may not do in their .remove callbacks. Allowing to run
these callbacks with the registration_lock held can cause deadlocks, since
the fbdev core has no control over what drivers do in their removal path.
A better solution is to drop the lock before platform_device_unregister(),
so unregister_framebuffer() can take it when called from the fbdev driver.
The lock is acquired again after the device has been unregistered and at
this point the removal loop can be restarted.
Since the conflicting framebuffer device has already been removed, the
loop would just finish when no more conflicting framebuffers are found.
Suggested-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Signed-off-by: Javier Martinez Canillas <javierm(a)redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20220511113039.1252432-1-javi…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/video/fbdev/core/fbmem.c | 22 +++++++++++++++-------
include/linux/fb.h | 1 -
2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 528c87ff14d8..619d82e20d4e 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1568,6 +1568,7 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
{
int i;
+restart_removal:
/* check all firmware fbs and kick off if the base addr overlaps */
for_each_registered_fb(i) {
struct apertures_struct *gen_aper;
@@ -1602,12 +1603,23 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
*/
do_unregister_framebuffer(registered_fb[i]);
} else if (dev_is_platform(device)) {
- registered_fb[i]->forced_out = true;
+ /*
+ * Drop the lock because if the device is unregistered, its
+ * driver will call to unregister_framebuffer(), that takes
+ * this lock.
+ */
+ mutex_unlock(®istration_lock);
platform_device_unregister(to_platform_device(device));
+ mutex_lock(®istration_lock);
} else {
pr_warn("fb%d: cannot remove device\n", i);
do_unregister_framebuffer(registered_fb[i]);
}
+ /*
+ * Restart the removal loop now that the device has been
+ * unregistered and its associated framebuffer gone.
+ */
+ goto restart_removal;
}
}
}
@@ -1945,13 +1957,9 @@ EXPORT_SYMBOL(register_framebuffer);
void
unregister_framebuffer(struct fb_info *fb_info)
{
- bool forced_out = fb_info->forced_out;
-
- if (!forced_out)
- mutex_lock(®istration_lock);
+ mutex_lock(®istration_lock);
do_unregister_framebuffer(fb_info);
- if (!forced_out)
- mutex_unlock(®istration_lock);
+ mutex_unlock(®istration_lock);
}
EXPORT_SYMBOL(unregister_framebuffer);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 3d7306c9a706..02f362c661c8 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -502,7 +502,6 @@ struct fb_info {
} *apertures;
bool skip_vt_switch; /* no VT switch on suspend/resume required */
- bool forced_out; /* set when being removed by another driver */
};
static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
--
2.35.1
From: Javier Martinez Canillas <javierm(a)redhat.com>
[ Upstream commit 3367aa7d74d240261de2543ddb35531ccad9d884 ]
Drivers that want to remove registered conflicting framebuffers prior to
register their own framebuffer, call to remove_conflicting_framebuffers().
This function takes the registration_lock mutex, to prevent a race when
drivers register framebuffer devices. But if a conflicting framebuffer
device is found, the underlaying platform device is unregistered and this
will lead to the platform driver .remove callback to be called. Which in
turn will call to unregister_framebuffer() that takes the same lock.
To prevent this, a struct fb_info.forced_out field was used as indication
to unregister_framebuffer() whether the mutex has to be grabbed or not.
But this could be unsafe, since the fbdev core is making assumptions about
what drivers may or may not do in their .remove callbacks. Allowing to run
these callbacks with the registration_lock held can cause deadlocks, since
the fbdev core has no control over what drivers do in their removal path.
A better solution is to drop the lock before platform_device_unregister(),
so unregister_framebuffer() can take it when called from the fbdev driver.
The lock is acquired again after the device has been unregistered and at
this point the removal loop can be restarted.
Since the conflicting framebuffer device has already been removed, the
loop would just finish when no more conflicting framebuffers are found.
Suggested-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Signed-off-by: Javier Martinez Canillas <javierm(a)redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter(a)ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20220511113039.1252432-1-javi…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/video/fbdev/core/fbmem.c | 22 +++++++++++++++-------
include/linux/fb.h | 1 -
2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 643383d74edc..82880f59ba67 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1566,6 +1566,7 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
{
int i;
+restart_removal:
/* check all firmware fbs and kick off if the base addr overlaps */
for_each_registered_fb(i) {
struct apertures_struct *gen_aper;
@@ -1600,12 +1601,23 @@ static void do_remove_conflicting_framebuffers(struct apertures_struct *a,
*/
do_unregister_framebuffer(registered_fb[i]);
} else if (dev_is_platform(device)) {
- registered_fb[i]->forced_out = true;
+ /*
+ * Drop the lock because if the device is unregistered, its
+ * driver will call to unregister_framebuffer(), that takes
+ * this lock.
+ */
+ mutex_unlock(®istration_lock);
platform_device_unregister(to_platform_device(device));
+ mutex_lock(®istration_lock);
} else {
pr_warn("fb%d: cannot remove device\n", i);
do_unregister_framebuffer(registered_fb[i]);
}
+ /*
+ * Restart the removal loop now that the device has been
+ * unregistered and its associated framebuffer gone.
+ */
+ goto restart_removal;
}
}
}
@@ -1946,13 +1958,9 @@ EXPORT_SYMBOL(register_framebuffer);
void
unregister_framebuffer(struct fb_info *fb_info)
{
- bool forced_out = fb_info->forced_out;
-
- if (!forced_out)
- mutex_lock(®istration_lock);
+ mutex_lock(®istration_lock);
do_unregister_framebuffer(fb_info);
- if (!forced_out)
- mutex_unlock(®istration_lock);
+ mutex_unlock(®istration_lock);
}
EXPORT_SYMBOL(unregister_framebuffer);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 9a77ab615c36..ae399d3ea4f9 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -503,7 +503,6 @@ struct fb_info {
} *apertures;
bool skip_vt_switch; /* no VT switch on suspend/resume required */
- bool forced_out; /* set when being removed by another driver */
};
static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
--
2.35.1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 764643a6be07445308e492a528197044c801b3ba Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Tue, 14 Jun 2022 21:58:28 +0000
Subject: [PATCH] KVM: nVMX: Snapshot pre-VM-Enter DEBUGCTL for
!nested_run_pending case
If a nested run isn't pending, snapshot vmcs01.GUEST_IA32_DEBUGCTL
irrespective of whether or not VM_ENTRY_LOAD_DEBUG_CONTROLS is set in
vmcs12. When restoring nested state, e.g. after migration, without a
nested run pending, prepare_vmcs02() will propagate
nested.vmcs01_debugctl to vmcs02, i.e. will load garbage/zeros into
vmcs02.GUEST_IA32_DEBUGCTL.
If userspace restores nested state before MSRs, then loading garbage is a
non-issue as loading DEBUGCTL will also update vmcs02. But if usersepace
restores MSRs first, then KVM is responsible for propagating L2's value,
which is actually thrown into vmcs01, into vmcs02.
Restoring L2 MSRs into vmcs01, i.e. loading all MSRs before nested state
is all kinds of bizarre and ideally would not be supported. Sadly, some
VMMs do exactly that and rely on KVM to make things work.
Note, there's still a lurking SMM bug, as propagating vmcs01's DEBUGCTL
to vmcs02 across RSM may corrupt L2's DEBUGCTL. But KVM's entire VMX+SMM
emulation is flawed as SMI+RSM should not toouch _any_ VMCS when use the
"default treatment of SMIs", i.e. when not using an SMI Transfer Monitor.
Link: https://lore.kernel.org/all/Yobt1XwOfb5M6Dfa@google.com
Fixes: 8fcc4b5923af ("kvm: nVMX: Introduce KVM_CAP_NESTED_STATE")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220614215831.3762138-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 66c25bb56938..4a53e0c73445 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3379,7 +3379,8 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
- if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
+ if (!vmx->nested.nested_run_pending ||
+ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
if (kvm_mpx_supported() &&
(!vmx->nested.nested_run_pending ||
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From fa578398a0ba2c079fa1170da21fa5baae0cedb2 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Tue, 14 Jun 2022 21:58:27 +0000
Subject: [PATCH] KVM: nVMX: Snapshot pre-VM-Enter BNDCFGS for
!nested_run_pending case
If a nested run isn't pending, snapshot vmcs01.GUEST_BNDCFGS irrespective
of whether or not VM_ENTRY_LOAD_BNDCFGS is set in vmcs12. When restoring
nested state, e.g. after migration, without a nested run pending,
prepare_vmcs02() will propagate nested.vmcs01_guest_bndcfgs to vmcs02,
i.e. will load garbage/zeros into vmcs02.GUEST_BNDCFGS.
If userspace restores nested state before MSRs, then loading garbage is a
non-issue as loading BNDCFGS will also update vmcs02. But if usersepace
restores MSRs first, then KVM is responsible for propagating L2's value,
which is actually thrown into vmcs01, into vmcs02.
Restoring L2 MSRs into vmcs01, i.e. loading all MSRs before nested state
is all kinds of bizarre and ideally would not be supported. Sadly, some
VMMs do exactly that and rely on KVM to make things work.
Note, there's still a lurking SMM bug, as propagating vmcs01.GUEST_BNDFGS
to vmcs02 across RSM may corrupt L2's BNDCFGS. But KVM's entire VMX+SMM
emulation is flawed as SMI+RSM should not toouch _any_ VMCS when use the
"default treatment of SMIs", i.e. when not using an SMI Transfer Monitor.
Link: https://lore.kernel.org/all/Yobt1XwOfb5M6Dfa@google.com
Fixes: 62cf9bd8118c ("KVM: nVMX: Fix emulation of VM_ENTRY_LOAD_BNDCFGS")
Cc: stable(a)vger.kernel.org
Cc: Lei Wang <lei4.wang(a)intel.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20220614215831.3762138-2-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7d8cd0ebcc75..66c25bb56938 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3382,7 +3382,8 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
if (kvm_mpx_supported() &&
- !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
+ (!vmx->nested.nested_run_pending ||
+ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
/*
Hello everyone,
I'd like to request a review to merge these patches into stable releases.
These patches fix race conditions in pending packets from
mac80211_hwsim which cause kernel panic after a device is running for
a few hours. It makes test cases in Android(which uses mac80211_hwsim
for test purposes) flaky, and also, makes Android emulator unstable.
It would be great if these patches could be merged after version 4.19.
If you have further questions, please let me know!
commit cc5250cdb43d444061412df7fae72d2b4acbdf97 wifi: mac80211_hwsim:
use 32-bit skb cookie
commit 58b6259d820d63c2adf1c7541b54cce5a2ae6073 wifi: mac80211_hwsim:
add back erroneously removed cast
commit 4ee186fa7e40ae06ebbfbad77e249e3746e14114 wifi: mac80211_hwsim:
fix race condition in pending packet
Thanks
Jeongik
[Public]
Hi,
Some Lenovo laptops need to be added to an ACP6x quirk list for the DMIC on them to work.
Can you please backport this commit to 5.18.y and 5.19.y?
be0aa8d4b0fc ("ASoC: amd: yc: Update DMI table entries")
Thanks,
This backports a commit from upstream which I would consider a cleanup
as well as a (minor) performance fix due to less memory being used and
avoiding an unneccessary pointer dereferencing, i.e. the change
from `sbsec->sb->s_root` to `sb->s_root`.
However as it changes the `superblock_security_struct` please check if
this violates any API/ABI stability requirements which I'm not aware of.
Ondrej Mosnacek (1):
selinux: drop super_block backpointer from superblock_security_struct
security/selinux/hooks.c | 5 ++---
security/selinux/include/objsec.h | 1 -
2 files changed, 2 insertions(+), 4 deletions(-)
--
2.25.1
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 23ba98de6dcec665e15c0ca19244379bb0d30932 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton(a)kernel.org>
Date: Fri, 29 Jul 2022 17:01:07 -0400
Subject: [PATCH] nfsd: eliminate the NFSD_FILE_BREAK_* flags
We had a report from the spring Bake-a-thon of data corruption in some
nfstest_interop tests. Looking at the traces showed the NFS server
allowing a v3 WRITE to proceed while a read delegation was still
outstanding.
Currently, we only set NFSD_FILE_BREAK_* flags if
NFSD_MAY_NOT_BREAK_LEASE was set when we call nfsd_file_alloc.
NFSD_MAY_NOT_BREAK_LEASE was intended to be set when finding files for
COMMIT ops, where we need a writeable filehandle but don't need to
break read leases.
It doesn't make any sense to consult that flag when allocating a file
since the file may be used on subsequent calls where we do want to break
the lease (and the usage of it here seems to be reverse from what it
should be anyway).
Also, after calling nfsd_open_break_lease, we don't want to clear the
BREAK_* bits. A lease could end up being set on it later (more than
once) and we need to be able to break those leases as well.
This means that the NFSD_FILE_BREAK_* flags now just mirror
NFSD_MAY_{READ,WRITE} flags, so there's no need for them at all. Just
drop those flags and unconditionally call nfsd_open_break_lease every
time.
Reported-by: Olga Kornieskaia <kolga(a)netapp.com>
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2107360
Fixes: 65294c1f2c5e (nfsd: add a new struct file caching facility to nfsd)
Cc: <stable(a)vger.kernel.org> # 5.4.x : bb283ca18d1e NFSD: Clean up the show_nf_flags() macro
Cc: <stable(a)vger.kernel.org> # 5.4.x
Signed-off-by: Jeff Layton <jlayton(a)kernel.org>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 9cb2d590c036..e1f98d32cee1 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -184,12 +184,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
nf->nf_hashval = hashval;
refcount_set(&nf->nf_ref, 1);
nf->nf_may = may & NFSD_FILE_MAY_MASK;
- if (may & NFSD_MAY_NOT_BREAK_LEASE) {
- if (may & NFSD_MAY_WRITE)
- __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
- if (may & NFSD_MAY_READ)
- __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
- }
nf->nf_mark = NULL;
trace_nfsd_file_alloc(nf);
}
@@ -958,21 +952,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
this_cpu_inc(nfsd_file_cache_hits);
- if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
- bool write = (may_flags & NFSD_MAY_WRITE);
-
- if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
- (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
- status = nfserrno(nfsd_open_break_lease(
- file_inode(nf->nf_file), may_flags));
- if (status == nfs_ok) {
- clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
- if (write)
- clear_bit(NFSD_FILE_BREAK_WRITE,
- &nf->nf_flags);
- }
- }
- }
+ status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
out:
if (status == nfs_ok) {
*pnf = nf;
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 1da0c79a5580..c9e3c6eb4776 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -37,9 +37,7 @@ struct nfsd_file {
struct net *nf_net;
#define NFSD_FILE_HASHED (0)
#define NFSD_FILE_PENDING (1)
-#define NFSD_FILE_BREAK_READ (2)
-#define NFSD_FILE_BREAK_WRITE (3)
-#define NFSD_FILE_REFERENCED (4)
+#define NFSD_FILE_REFERENCED (2)
unsigned long nf_flags;
struct inode *nf_inode;
unsigned int nf_hashval;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index a60ead3b227a..081179fb17e8 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -696,8 +696,6 @@ DEFINE_CLID_EVENT(confirmed_r);
__print_flags(val, "|", \
{ 1 << NFSD_FILE_HASHED, "HASHED" }, \
{ 1 << NFSD_FILE_PENDING, "PENDING" }, \
- { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \
- { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \
{ 1 << NFSD_FILE_REFERENCED, "REFERENCED"})
DECLARE_EVENT_CLASS(nfsd_file_class,
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 23ba98de6dcec665e15c0ca19244379bb0d30932 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton(a)kernel.org>
Date: Fri, 29 Jul 2022 17:01:07 -0400
Subject: [PATCH] nfsd: eliminate the NFSD_FILE_BREAK_* flags
We had a report from the spring Bake-a-thon of data corruption in some
nfstest_interop tests. Looking at the traces showed the NFS server
allowing a v3 WRITE to proceed while a read delegation was still
outstanding.
Currently, we only set NFSD_FILE_BREAK_* flags if
NFSD_MAY_NOT_BREAK_LEASE was set when we call nfsd_file_alloc.
NFSD_MAY_NOT_BREAK_LEASE was intended to be set when finding files for
COMMIT ops, where we need a writeable filehandle but don't need to
break read leases.
It doesn't make any sense to consult that flag when allocating a file
since the file may be used on subsequent calls where we do want to break
the lease (and the usage of it here seems to be reverse from what it
should be anyway).
Also, after calling nfsd_open_break_lease, we don't want to clear the
BREAK_* bits. A lease could end up being set on it later (more than
once) and we need to be able to break those leases as well.
This means that the NFSD_FILE_BREAK_* flags now just mirror
NFSD_MAY_{READ,WRITE} flags, so there's no need for them at all. Just
drop those flags and unconditionally call nfsd_open_break_lease every
time.
Reported-by: Olga Kornieskaia <kolga(a)netapp.com>
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2107360
Fixes: 65294c1f2c5e (nfsd: add a new struct file caching facility to nfsd)
Cc: <stable(a)vger.kernel.org> # 5.4.x : bb283ca18d1e NFSD: Clean up the show_nf_flags() macro
Cc: <stable(a)vger.kernel.org> # 5.4.x
Signed-off-by: Jeff Layton <jlayton(a)kernel.org>
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 9cb2d590c036..e1f98d32cee1 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -184,12 +184,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
nf->nf_hashval = hashval;
refcount_set(&nf->nf_ref, 1);
nf->nf_may = may & NFSD_FILE_MAY_MASK;
- if (may & NFSD_MAY_NOT_BREAK_LEASE) {
- if (may & NFSD_MAY_WRITE)
- __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
- if (may & NFSD_MAY_READ)
- __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
- }
nf->nf_mark = NULL;
trace_nfsd_file_alloc(nf);
}
@@ -958,21 +952,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
this_cpu_inc(nfsd_file_cache_hits);
- if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
- bool write = (may_flags & NFSD_MAY_WRITE);
-
- if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
- (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
- status = nfserrno(nfsd_open_break_lease(
- file_inode(nf->nf_file), may_flags));
- if (status == nfs_ok) {
- clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
- if (write)
- clear_bit(NFSD_FILE_BREAK_WRITE,
- &nf->nf_flags);
- }
- }
- }
+ status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
out:
if (status == nfs_ok) {
*pnf = nf;
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 1da0c79a5580..c9e3c6eb4776 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -37,9 +37,7 @@ struct nfsd_file {
struct net *nf_net;
#define NFSD_FILE_HASHED (0)
#define NFSD_FILE_PENDING (1)
-#define NFSD_FILE_BREAK_READ (2)
-#define NFSD_FILE_BREAK_WRITE (3)
-#define NFSD_FILE_REFERENCED (4)
+#define NFSD_FILE_REFERENCED (2)
unsigned long nf_flags;
struct inode *nf_inode;
unsigned int nf_hashval;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index a60ead3b227a..081179fb17e8 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -696,8 +696,6 @@ DEFINE_CLID_EVENT(confirmed_r);
__print_flags(val, "|", \
{ 1 << NFSD_FILE_HASHED, "HASHED" }, \
{ 1 << NFSD_FILE_PENDING, "PENDING" }, \
- { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \
- { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \
{ 1 << NFSD_FILE_REFERENCED, "REFERENCED"})
DECLARE_EVENT_CLASS(nfsd_file_class,
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bb283ca18d1e67c82d22a329c96c9d6036a74790 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Sun, 27 Mar 2022 16:43:03 -0400
Subject: [PATCH] NFSD: Clean up the show_nf_flags() macro
The flags are defined using C macros, so TRACE_DEFINE_ENUM is
unnecessary.
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index feb6e6f834b6..a60ead3b227a 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -692,12 +692,6 @@ DEFINE_CLID_EVENT(confirmed_r);
/*
* from fs/nfsd/filecache.h
*/
-TRACE_DEFINE_ENUM(NFSD_FILE_HASHED);
-TRACE_DEFINE_ENUM(NFSD_FILE_PENDING);
-TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ);
-TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE);
-TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED);
-
#define show_nf_flags(val) \
__print_flags(val, "|", \
{ 1 << NFSD_FILE_HASHED, "HASHED" }, \
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bb283ca18d1e67c82d22a329c96c9d6036a74790 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever(a)oracle.com>
Date: Sun, 27 Mar 2022 16:43:03 -0400
Subject: [PATCH] NFSD: Clean up the show_nf_flags() macro
The flags are defined using C macros, so TRACE_DEFINE_ENUM is
unnecessary.
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index feb6e6f834b6..a60ead3b227a 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -692,12 +692,6 @@ DEFINE_CLID_EVENT(confirmed_r);
/*
* from fs/nfsd/filecache.h
*/
-TRACE_DEFINE_ENUM(NFSD_FILE_HASHED);
-TRACE_DEFINE_ENUM(NFSD_FILE_PENDING);
-TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ);
-TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_WRITE);
-TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED);
-
#define show_nf_flags(val) \
__print_flags(val, "|", \
{ 1 << NFSD_FILE_HASHED, "HASHED" }, \
I'm announcing the release of the 5.19.1 kernel.
All users of the 5.19 kernel series must upgrade.
The updated 5.19.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-5.19.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/admin-guide/hw-vuln/spectre.rst | 8
Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml | 1
Makefile | 2
arch/arm64/crypto/poly1305-glue.c | 2
arch/arm64/include/asm/kernel-pgtable.h | 4
arch/arm64/kernel/head.S | 2
arch/x86/include/asm/cpufeatures.h | 2
arch/x86/include/asm/msr-index.h | 4
arch/x86/include/asm/nospec-branch.h | 21 ++
arch/x86/kernel/cpu/bugs.c | 86 +++++++---
arch/x86/kernel/cpu/common.c | 12 +
arch/x86/kvm/vmx/vmenter.S | 8
block/blk-ioc.c | 2
block/ioprio.c | 4
drivers/acpi/apei/bert.c | 31 ++-
drivers/acpi/video_detect.c | 55 ++++--
drivers/ata/sata_mv.c | 2
drivers/bluetooth/btbcm.c | 2
drivers/bluetooth/btusb.c | 15 +
drivers/bluetooth/hci_bcm.c | 2
drivers/bluetooth/hci_qca.c | 2
drivers/macintosh/adb.c | 2
include/linux/ioprio.h | 2
tools/arch/x86/include/asm/cpufeatures.h | 1
tools/arch/x86/include/asm/msr-index.h | 4
tools/vm/slabinfo.c | 26 ++-
26 files changed, 231 insertions(+), 71 deletions(-)
Aaron Ma (1):
Bluetooth: btusb: Add support of IMC Networks PID 0x3568
Ahmad Fatoum (2):
Bluetooth: hci_bcm: Add BCM4349B1 variant
dt-bindings: bluetooth: broadcom: Add BCM4349B1 DT binding
Andrew Lunn (1):
ata: sata_mv: Fixes expected number of resources now IRQs are gone
Daniel Sneddon (1):
x86/speculation: Add RSB VM Exit protections
GUO Zihua (1):
crypto: arm64/poly1305 - fix a read out-of-bound
Greg Kroah-Hartman (1):
Linux 5.19.1
Hakan Jansson (1):
Bluetooth: hci_bcm: Add DT compatible for CYW55572
Hilda Wu (5):
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x04CA:0x4007
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x04C5:0x1675
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x0CB8:0xC558
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x13D3:0x3587
Bluetooth: btusb: Add Realtek RTL8852C support ID 0x13D3:0x3586
Jan Kara (1):
block: fix default IO priority handling again
Ning Qiang (1):
macintosh/adb: fix oob read in do_adb_query() function
Pawan Gupta (1):
x86/speculation: Add LFENCE to RSB fill sequence
Peter Collingbourne (1):
arm64: set UXN on swapper page tables
Sai Teja Aluvala (1):
Bluetooth: hci_qca: Return wakeup for qca_wakeup
Stéphane Graber (1):
tools/vm/slabinfo: Handle files in debugfs
Tony Luck (1):
ACPI: APEI: Better fix to avoid spamming the console with old error logs
Werner Sembach (2):
ACPI: video: Force backlight native for some TongFang devices
ACPI: video: Shortening quirk list by identifying Clevo by board_name only
I'm announcing the release of the 4.19.255 kernel.
All users of the 4.19 kernel series must upgrade.
The updated 4.19.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-4.19.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Documentation/admin-guide/hw-vuln/spectre.rst | 8 +++
Documentation/networking/ip-sysctl.txt | 9 +++
Makefile | 2
arch/arm/lib/xor-neon.c | 3 -
arch/s390/include/asm/archrandom.h | 9 ++-
arch/x86/include/asm/cpufeatures.h | 2
arch/x86/include/asm/msr-index.h | 4 +
arch/x86/include/asm/nospec-branch.h | 19 +++++++-
arch/x86/kernel/cpu/bugs.c | 61 +++++++++++++++++++++++++-
arch/x86/kernel/cpu/common.c | 12 ++++-
arch/x86/kvm/vmx.c | 6 +-
drivers/acpi/video_detect.c | 55 ++++++++++++++---------
drivers/macintosh/adb.c | 2
drivers/net/ethernet/intel/i40e/i40e_main.c | 4 +
drivers/net/sungem_phy.c | 1
drivers/net/wireless/mediatek/mt7601u/usb.c | 1
drivers/scsi/scsi_lib.c | 3 -
drivers/scsi/ufs/ufshcd-pltfrm.c | 15 +++++-
fs/ntfs/attrib.c | 8 ++-
include/net/bluetooth/l2cap.h | 1
include/net/tcp.h | 2
net/bluetooth/l2cap_core.c | 61 ++++++++++++++++++++------
net/ipv4/igmp.c | 24 +++++-----
net/ipv4/tcp.c | 2
net/ipv4/tcp_input.c | 20 ++++----
net/ipv4/tcp_metrics.c | 2
net/ipv4/tcp_output.c | 2
net/ipv6/ping.c | 6 ++
net/netfilter/nfnetlink_queue.c | 7 ++
net/sctp/stream_sched.c | 2
tools/perf/util/symbol-elf.c | 45 +++++++++++++++++--
31 files changed, 315 insertions(+), 83 deletions(-)
ChenXiaoSong (1):
ntfs: fix use-after-free in ntfs_ucsncmp()
Daniel Sneddon (1):
x86/speculation: Add RSB VM Exit protections
Duoming Zhou (1):
sctp: fix sleep in atomic context bug in timer handlers
Florian Westphal (1):
netfilter: nf_queue: do not allow packet truncation below transport header offset
Greg Kroah-Hartman (2):
ARM: crypto: comment out gcc warning that breaks clang builds
Linux 4.19.255
Harald Freudenberger (1):
s390/archrandom: prevent CPACF trng invocations in interrupt context
Kuniyuki Iwashima (14):
tcp: Fix data-races around sysctl_tcp_dsack.
tcp: Fix a data-race around sysctl_tcp_app_win.
tcp: Fix a data-race around sysctl_tcp_adv_win_scale.
tcp: Fix a data-race around sysctl_tcp_frto.
tcp: Fix a data-race around sysctl_tcp_nometrics_save.
tcp: Fix a data-race around sysctl_tcp_challenge_ack_limit.
net: ping6: Fix memleak in ipv6_renew_options().
igmp: Fix data-races around sysctl_igmp_qrv.
tcp: Fix a data-race around sysctl_tcp_min_tso_segs.
tcp: Fix a data-race around sysctl_tcp_min_rtt_wlen.
tcp: Fix a data-race around sysctl_tcp_autocorking.
tcp: Fix a data-race around sysctl_tcp_invalid_ratelimit.
tcp: Fix a data-race around sysctl_tcp_comp_sack_delay_ns.
tcp: Fix a data-race around sysctl_tcp_comp_sack_nr.
Leo Yan (1):
perf symbol: Correct address for bss symbols
Liang He (2):
scsi: ufs: host: Hold reference returned by of_parse_phandle()
net: sungem_phy: Add of_node_put() for reference returned by of_get_parent()
Luiz Augusto von Dentz (1):
Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put
Michal Maloszewski (1):
i40e: Fix interface init with MSI interrupts (no MSI-X)
Ming Lei (1):
scsi: core: Fix race between handling STS_RESOURCE and completion
Ning Qiang (1):
macintosh/adb: fix oob read in do_adb_query() function
Pawan Gupta (1):
x86/speculation: Add LFENCE to RSB fill sequence
Wei Mingzhi (1):
mt7601u: add USB device ID for some versions of XiaoDu WiFi Dongle.
Werner Sembach (2):
ACPI: video: Force backlight native for some TongFang devices
ACPI: video: Shortening quirk list by identifying Clevo by board_name only
Xin Long (1):
Documentation: fix sctp_wmem in ip-sysctl.rst
This patch fixes an inconsistency, if not a clear bug, with the extended permissions.
To quote from the original discussion [1]:
> The behavior of dontauditx and auditallowx appears to be broken making them useless.
[1] https://lore.kernel.org/selinux/6a791504-7728-3026-17ee-c22cbff8c3d1@gmail.…
bauen1 (1):
selinux: allow dontauditx and auditallowx rules to take effect without allowx
security/selinux/ss/services.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
--
2.25.1
When a route filter is replaced and the old filter has a 0 handle, the old
one won't be removed from the hashtable, while it will still be freed.
The test was there since before commit 1109c00547fc ("net: sched: RCU
cls_route"), when a new filter was not allocated when there was an old one.
The old filter was reused and the reinserting would only be necessary if an
old filter was replaced. That was still wrong for the same case where the
old handle was 0.
Remove the old filter from the list independently from its handle value.
This fixes CVE-2022-2588, also reported as ZDI-CAN-17440.
Reported-by: Zhenpeng Lin <zplin(a)u.northwestern.edu>
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo(a)canonical.com>
Reviewed-by: Kamal Mostafa <kamal(a)canonical.com>
Cc: <stable(a)vger.kernel.org>
Cc: Jamal Hadi Salim <jhs(a)mojatatu.com>
---
net/sched/cls_route.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index a35ab8c27866..3f935cbbaff6 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -526,7 +526,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
rcu_assign_pointer(f->next, f1);
rcu_assign_pointer(*fp, f);
- if (fold && fold->handle && f->handle != fold->handle) {
+ if (fold) {
th = to_hash(fold->handle);
h = from_hash(fold->handle >> 16);
b = rtnl_dereference(head->table[th]);
--
2.34.1
The patch titled
Subject: kernel/sys_ni: add compat entry for fadvise64_64
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
kernel-sys_ni-add-compat-entry-for-fadvise64_64.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Randy Dunlap <rdunlap(a)infradead.org>
Subject: kernel/sys_ni: add compat entry for fadvise64_64
Date: Sun, 7 Aug 2022 15:09:34 -0700
When CONFIG_ADVISE_SYSCALLS is not set/enabled and CONFIG_COMPAT is
set/enabled, the riscv compat_syscall_table references
'compat_sys_fadvise64_64', which is not defined:
riscv64-linux-ld: arch/riscv/kernel/compat_syscall_table.o:(.rodata+0x6f8):
undefined reference to `compat_sys_fadvise64_64'
Add 'fadvise64_64' to kernel/sys_ni.c as a conditional COMPAT function so
that when CONFIG_ADVISE_SYSCALLS is not set, there is a fallback function
available.
Link: https://lkml.kernel.org/r/20220807220934.5689-1-rdunlap@infradead.org
Fixes: d3ac21cacc24 ("mm: Support compiling out madvise and fadvise")
Signed-off-by: Randy Dunlap <rdunlap(a)infradead.org>
Suggested-by: Arnd Bergmann <arnd(a)arndb.de>
Reviewed-by: Arnd Bergmann <arnd(a)arndb.de>
Cc: Josh Triplett <josh(a)joshtriplett.org>
Cc: Paul Walmsley <paul.walmsley(a)sifive.com>
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Albert Ou <aou(a)eecs.berkeley.edu>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/sys_ni.c | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/sys_ni.c~kernel-sys_ni-add-compat-entry-for-fadvise64_64
+++ a/kernel/sys_ni.c
@@ -277,6 +277,7 @@ COND_SYSCALL(landlock_restrict_self);
/* mm/fadvise.c */
COND_SYSCALL(fadvise64_64);
+COND_SYSCALL_COMPAT(fadvise64_64);
/* mm/, CONFIG_MMU only */
COND_SYSCALL(swapon);
_
Patches currently in -mm which might be from rdunlap(a)infradead.org are
kernel-sys_ni-add-compat-entry-for-fadvise64_64.patch
The patch titled
Subject: mm/gup: fix FOLL_FORCE COW security issue and remove FOLL_COW
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-gup-fix-foll_force-cow-security-issue-and-remove-foll_cow.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: David Hildenbrand <david(a)redhat.com>
Subject: mm/gup: fix FOLL_FORCE COW security issue and remove FOLL_COW
Date: Tue, 9 Aug 2022 22:56:40 +0200
Ever since the Dirty COW (CVE-2016-5195) security issue happened, we know
that FOLL_FORCE can be possibly dangerous, especially if there are races
that can be exploited by user space.
Right now, it would be sufficient to have some code that sets a PTE of a
R/O-mapped shared page dirty, in order for it to erroneously become
writable by FOLL_FORCE. The implications of setting a write-protected PTE
dirty might not be immediately obvious to everyone.
And in fact ever since commit 9ae0f87d009c ("mm/shmem: unconditionally set
pte dirty in mfill_atomic_install_pte"), we can use UFFDIO_CONTINUE to map
a shmem page R/O while marking the pte dirty. This can be used by
unprivileged user space to modify tmpfs/shmem file content even if the
user does not have write permissions to the file, and to bypass memfd
write sealing -- Dirty COW restricted to tmpfs/shmem (CVE-2022-2590).
To fix such security issues for good, the insight is that we really only
need that fancy retry logic (FOLL_COW) for COW mappings that are not
writable (!VM_WRITE). And in a COW mapping, we really only broke COW if
we have an exclusive anonymous page mapped. If we have something else
mapped, or the mapped anonymous page might be shared (!PageAnonExclusive),
we have to trigger a write fault to break COW. If we don't find an
exclusive anonymous page when we retry, we have to trigger COW breaking
once again because something intervened.
Let's move away from this mandatory-retry + dirty handling and rely on our
PageAnonExclusive() flag for making a similar decision, to use the same
COW logic as in other kernel parts here as well. In case we stumble over
a PTE in a COW mapping that does not map an exclusive anonymous page, COW
was not properly broken and we have to trigger a fake write-fault to break
COW.
Just like we do in can_change_pte_writable() added via commit 64fe24a3e05e
("mm/mprotect: try avoiding write faults for exclusive anonymous pages
when changing protection") and commit 76aefad628aa ("mm/mprotect: fix
soft-dirty check in can_change_pte_writable()"), take care of softdirty
and uffd-wp manually.
For example, a write() via /proc/self/mem to a uffd-wp-protected range has
to fail instead of silently granting write access and bypassing the
userspace fault handler. Note that FOLL_FORCE is not only used for debug
access, but also triggered by applications without debug intentions, for
example, when pinning pages via RDMA.
This fixes CVE-2022-2590. Note that only x86_64 and aarch64 are
affected, because only those support CONFIG_HAVE_ARCH_USERFAULTFD_MINOR.
Fortunately, FOLL_COW is no longer required to handle FOLL_FORCE. So
let's just get rid of it.
Thanks to Nadav Amit for pointing out that the pte_dirty() check in
FOLL_FORCE code is problematic and might be exploitable.
Note 1: We don't check for the PTE being dirty because it doesn't matter
for making a "was COWed" decision anymore, and whoever modifies the
page has to set the page dirty either way.
Note 2: Kernels before extended uffd-wp support and before
PageAnonExclusive (< 5.19) can simply revert the problematic
commit instead and be safe regarding UFFDIO_CONTINUE. A backport to
v5.19 requires minor adjustments due to lack of
vma_soft_dirty_enabled().
Link: https://lkml.kernel.org/r/20220809205640.70916-1-david@redhat.com
Fixes: 9ae0f87d009c ("mm/shmem: unconditionally set pte dirty in mfill_atomic_install_pte")
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: Nadav Amit <nadav.amit(a)gmail.com>
Cc: Peter Xu <peterx(a)redhat.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: John Hubbard <jhubbard(a)nvidia.com>
Cc: Jason Gunthorpe <jgg(a)nvidia.com>
Cc: David Laight <David.Laight(a)ACULAB.COM>
Cc: <stable(a)vger.kernel.org> [5.16]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/mm.h | 1
mm/gup.c | 68 +++++++++++++++++++++++++++++--------------
mm/huge_memory.c | 64 +++++++++++++++++++++++++++-------------
3 files changed, 89 insertions(+), 44 deletions(-)
--- a/include/linux/mm.h~mm-gup-fix-foll_force-cow-security-issue-and-remove-foll_cow
+++ a/include/linux/mm.h
@@ -2896,7 +2896,6 @@ struct page *follow_page(struct vm_area_
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
-#define FOLL_COW 0x4000 /* internal GUP flag */
#define FOLL_ANON 0x8000 /* don't do file mappings */
#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */
#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
--- a/mm/gup.c~mm-gup-fix-foll_force-cow-security-issue-and-remove-foll_cow
+++ a/mm/gup.c
@@ -478,14 +478,42 @@ static int follow_pfn_pte(struct vm_area
return -EEXIST;
}
-/*
- * FOLL_FORCE can write to even unwritable pte's, but only
- * after we've gone through a COW cycle and they are dirty.
- */
-static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
+/* FOLL_FORCE can write to even unwritable PTEs in COW mappings. */
+static inline bool can_follow_write_pte(pte_t pte, struct page *page,
+ struct vm_area_struct *vma,
+ unsigned int flags)
{
- return pte_write(pte) ||
- ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
+ /* If the pte is writable, we can write to the page. */
+ if (pte_write(pte))
+ return true;
+
+ /* Maybe FOLL_FORCE is set to override it? */
+ if (!(flags & FOLL_FORCE))
+ return false;
+
+ /* But FOLL_FORCE has no effect on shared mappings */
+ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+ return false;
+
+ /* ... or read-only private ones */
+ if (!(vma->vm_flags & VM_MAYWRITE))
+ return false;
+
+ /* ... or already writable ones that just need to take a write fault */
+ if (vma->vm_flags & VM_WRITE)
+ return false;
+
+ /*
+ * See can_change_pte_writable(): we broke COW and could map the page
+ * writable if we have an exclusive anonymous page ...
+ */
+ if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+ return false;
+
+ /* ... and a write-fault isn't required for other reasons. */
+ if (vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte))
+ return false;
+ return !userfaultfd_pte_wp(vma, pte);
}
static struct page *follow_page_pte(struct vm_area_struct *vma,
@@ -528,12 +556,19 @@ retry:
}
if ((flags & FOLL_NUMA) && pte_protnone(pte))
goto no_page;
- if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
- pte_unmap_unlock(ptep, ptl);
- return NULL;
- }
page = vm_normal_page(vma, address, pte);
+
+ /*
+ * We only care about anon pages in can_follow_write_pte() and don't
+ * have to worry about pte_devmap() because they are never anon.
+ */
+ if ((flags & FOLL_WRITE) &&
+ !can_follow_write_pte(pte, page, vma, flags)) {
+ page = NULL;
+ goto out;
+ }
+
if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
/*
* Only return device mapping pages in the FOLL_GET or FOLL_PIN
@@ -986,17 +1021,6 @@ static int faultin_page(struct vm_area_s
return -EBUSY;
}
- /*
- * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when
- * necessary, even if maybe_mkwrite decided not to set pte_write. We
- * can thus safely do subsequent page lookups as if they were reads.
- * But only do so when looping for pte_write is futile: in some cases
- * userspace may also be wanting to write to the gotten user page,
- * which a read fault here might prevent (a readonly page might get
- * reCOWed by userspace write).
- */
- if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
- *flags |= FOLL_COW;
return 0;
}
--- a/mm/huge_memory.c~mm-gup-fix-foll_force-cow-security-issue-and-remove-foll_cow
+++ a/mm/huge_memory.c
@@ -1040,12 +1040,6 @@ struct page *follow_devmap_pmd(struct vm
assert_spin_locked(pmd_lockptr(mm, pmd));
- /*
- * When we COW a devmap PMD entry, we split it into PTEs, so we should
- * not be in this function with `flags & FOLL_COW` set.
- */
- WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
-
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
(FOLL_PIN | FOLL_GET)))
@@ -1395,14 +1389,42 @@ fallback:
return VM_FAULT_FALLBACK;
}
-/*
- * FOLL_FORCE can write to even unwritable pmd's, but only
- * after we've gone through a COW cycle and they are dirty.
- */
-static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
+/* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */
+static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
+ struct vm_area_struct *vma,
+ unsigned int flags)
{
- return pmd_write(pmd) ||
- ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
+ /* If the pmd is writable, we can write to the page. */
+ if (pmd_write(pmd))
+ return true;
+
+ /* Maybe FOLL_FORCE is set to override it? */
+ if (!(flags & FOLL_FORCE))
+ return false;
+
+ /* But FOLL_FORCE has no effect on shared mappings */
+ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+ return false;
+
+ /* ... or read-only private ones */
+ if (!(vma->vm_flags & VM_MAYWRITE))
+ return false;
+
+ /* ... or already writable ones that just need to take a write fault */
+ if (vma->vm_flags & VM_WRITE)
+ return false;
+
+ /*
+ * See can_change_pte_writable(): we broke COW and could map the page
+ * writable if we have an exclusive anonymous page ...
+ */
+ if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+ return false;
+
+ /* ... and a write-fault isn't required for other reasons. */
+ if (vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd))
+ return false;
+ return !userfaultfd_huge_pmd_wp(vma, pmd);
}
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
@@ -1411,12 +1433,16 @@ struct page *follow_trans_huge_pmd(struc
unsigned int flags)
{
struct mm_struct *mm = vma->vm_mm;
- struct page *page = NULL;
+ struct page *page;
assert_spin_locked(pmd_lockptr(mm, pmd));
- if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags))
- goto out;
+ page = pmd_page(*pmd);
+ VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
+
+ if ((flags & FOLL_WRITE) &&
+ !can_follow_write_pmd(*pmd, page, vma, flags))
+ return NULL;
/* Avoid dumping huge zero page */
if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
@@ -1424,10 +1450,7 @@ struct page *follow_trans_huge_pmd(struc
/* Full NUMA hinting faults to serialise migration in fault paths */
if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
- goto out;
-
- page = pmd_page(*pmd);
- VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
+ return NULL;
if (!pmd_write(*pmd) && gup_must_unshare(flags, page))
return ERR_PTR(-EMLINK);
@@ -1444,7 +1467,6 @@ struct page *follow_trans_huge_pmd(struc
page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
-out:
return page;
}
_
Patches currently in -mm which might be from david(a)redhat.com are
mm-gup-fix-foll_force-cow-security-issue-and-remove-foll_cow.patch