The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 6389e616fae8a101ce00068f7690461ab57b29d8
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021837-entree-estrogen-6751@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6389e616fae8a101ce00068f7690461ab57b29d8 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen+renesas(a)ideasonboard.com>
Date: Tue, 17 Dec 2024 07:31:35 +0200
Subject: [PATCH] drm/rcar-du: dsi: Fix PHY lock bit check
The driver checks for bit 16 (using CLOCKSET1_LOCK define) in CLOCKSET1
register when waiting for the PPI clock. However, the right bit to check
is bit 17 (CLOCKSET1_LOCK_PHY define). Not only that, but there's
nothing in the documents for bit 16 for V3U nor V4H.
So, fix the check to use bit 17, and drop the define for bit 16.
Fixes: 155358310f01 ("drm: rcar-du: Add R-Car DSI driver")
Fixes: 11696c5e8924 ("drm: Place Renesas drivers in a separate dir")
Cc: stable(a)vger.kernel.org
Signed-off-by: Tomi Valkeinen <tomi.valkeinen+renesas(a)ideasonboard.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas(a)ideasonboard.com>
Tested-by: Geert Uytterhoeven <geert+renesas(a)glider.be>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241217-rcar-gh-dsi-v5-1-e77…
diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
index 8180625d5866..be4ffc0ab14f 100644
--- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
+++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c
@@ -587,7 +587,7 @@ static int rcar_mipi_dsi_startup(struct rcar_mipi_dsi *dsi,
for (timeout = 10; timeout > 0; --timeout) {
if ((rcar_mipi_dsi_read(dsi, PPICLSR) & PPICLSR_STPST) &&
(rcar_mipi_dsi_read(dsi, PPIDLSR) & PPIDLSR_STPST) &&
- (rcar_mipi_dsi_read(dsi, CLOCKSET1) & CLOCKSET1_LOCK))
+ (rcar_mipi_dsi_read(dsi, CLOCKSET1) & CLOCKSET1_LOCK_PHY))
break;
usleep_range(1000, 2000);
diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h
index f8114d11f2d1..a6b276f1d6ee 100644
--- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h
+++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h
@@ -142,7 +142,6 @@
#define CLOCKSET1 0x101c
#define CLOCKSET1_LOCK_PHY (1 << 17)
-#define CLOCKSET1_LOCK (1 << 16)
#define CLOCKSET1_CLKSEL (1 << 8)
#define CLOCKSET1_CLKINSEL_EXTAL (0 << 2)
#define CLOCKSET1_CLKINSEL_DIG (1 << 2)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 3a47f4b439beb98e955d501c609dfd12b7836d61
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021816-flail-manger-7c9a@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3a47f4b439beb98e955d501c609dfd12b7836d61 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Fri, 15 Nov 2024 17:50:08 +0300
Subject: [PATCH] drm/msm/gem: prevent integer overflow in
msm_ioctl_gem_submit()
The "submit->cmd[i].size" and "submit->cmd[i].offset" variables are u32
values that come from the user via the submit_lookup_cmds() function.
This addition could lead to an integer wrapping bug so use size_add()
to prevent that.
Fixes: 198725337ef1 ("drm/msm: fix cmdstream size check")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/624696/
Signed-off-by: Rob Clark <robdclark(a)chromium.org>
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index fba78193127d..f775638d239a 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -787,8 +787,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
goto out;
if (!submit->cmd[i].size ||
- ((submit->cmd[i].size + submit->cmd[i].offset) >
- obj->size / 4)) {
+ (size_add(submit->cmd[i].size, submit->cmd[i].offset) > obj->size / 4)) {
SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4);
ret = -EINVAL;
goto out;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 3a47f4b439beb98e955d501c609dfd12b7836d61
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021814-rhyme-unimpeded-1604@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3a47f4b439beb98e955d501c609dfd12b7836d61 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Fri, 15 Nov 2024 17:50:08 +0300
Subject: [PATCH] drm/msm/gem: prevent integer overflow in
msm_ioctl_gem_submit()
The "submit->cmd[i].size" and "submit->cmd[i].offset" variables are u32
values that come from the user via the submit_lookup_cmds() function.
This addition could lead to an integer wrapping bug so use size_add()
to prevent that.
Fixes: 198725337ef1 ("drm/msm: fix cmdstream size check")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/624696/
Signed-off-by: Rob Clark <robdclark(a)chromium.org>
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index fba78193127d..f775638d239a 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -787,8 +787,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
goto out;
if (!submit->cmd[i].size ||
- ((submit->cmd[i].size + submit->cmd[i].offset) >
- obj->size / 4)) {
+ (size_add(submit->cmd[i].size, submit->cmd[i].offset) > obj->size / 4)) {
SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4);
ret = -EINVAL;
goto out;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 3a47f4b439beb98e955d501c609dfd12b7836d61
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021813-scorebook-fountain-acda@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3a47f4b439beb98e955d501c609dfd12b7836d61 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Fri, 15 Nov 2024 17:50:08 +0300
Subject: [PATCH] drm/msm/gem: prevent integer overflow in
msm_ioctl_gem_submit()
The "submit->cmd[i].size" and "submit->cmd[i].offset" variables are u32
values that come from the user via the submit_lookup_cmds() function.
This addition could lead to an integer wrapping bug so use size_add()
to prevent that.
Fixes: 198725337ef1 ("drm/msm: fix cmdstream size check")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/624696/
Signed-off-by: Rob Clark <robdclark(a)chromium.org>
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index fba78193127d..f775638d239a 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -787,8 +787,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
goto out;
if (!submit->cmd[i].size ||
- ((submit->cmd[i].size + submit->cmd[i].offset) >
- obj->size / 4)) {
+ (size_add(submit->cmd[i].size, submit->cmd[i].offset) > obj->size / 4)) {
SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4);
ret = -EINVAL;
goto out;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 3a47f4b439beb98e955d501c609dfd12b7836d61
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021811-coauthor-gosling-ff49@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3a47f4b439beb98e955d501c609dfd12b7836d61 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Fri, 15 Nov 2024 17:50:08 +0300
Subject: [PATCH] drm/msm/gem: prevent integer overflow in
msm_ioctl_gem_submit()
The "submit->cmd[i].size" and "submit->cmd[i].offset" variables are u32
values that come from the user via the submit_lookup_cmds() function.
This addition could lead to an integer wrapping bug so use size_add()
to prevent that.
Fixes: 198725337ef1 ("drm/msm: fix cmdstream size check")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/624696/
Signed-off-by: Rob Clark <robdclark(a)chromium.org>
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index fba78193127d..f775638d239a 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -787,8 +787,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
goto out;
if (!submit->cmd[i].size ||
- ((submit->cmd[i].size + submit->cmd[i].offset) >
- obj->size / 4)) {
+ (size_add(submit->cmd[i].size, submit->cmd[i].offset) > obj->size / 4)) {
SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4);
ret = -EINVAL;
goto out;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 3a47f4b439beb98e955d501c609dfd12b7836d61
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021809-sixtyfold-showroom-5f4e@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3a47f4b439beb98e955d501c609dfd12b7836d61 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter(a)linaro.org>
Date: Fri, 15 Nov 2024 17:50:08 +0300
Subject: [PATCH] drm/msm/gem: prevent integer overflow in
msm_ioctl_gem_submit()
The "submit->cmd[i].size" and "submit->cmd[i].offset" variables are u32
values that come from the user via the submit_lookup_cmds() function.
This addition could lead to an integer wrapping bug so use size_add()
to prevent that.
Fixes: 198725337ef1 ("drm/msm: fix cmdstream size check")
Cc: stable(a)vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter(a)linaro.org>
Patchwork: https://patchwork.freedesktop.org/patch/624696/
Signed-off-by: Rob Clark <robdclark(a)chromium.org>
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index fba78193127d..f775638d239a 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -787,8 +787,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
goto out;
if (!submit->cmd[i].size ||
- ((submit->cmd[i].size + submit->cmd[i].offset) >
- obj->size / 4)) {
+ (size_add(submit->cmd[i].size, submit->cmd[i].offset) > obj->size / 4)) {
SUBMIT_ERROR(submit, "invalid cmdstream size: %u\n", submit->cmd[i].size * 4);
ret = -EINVAL;
goto out;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x a9a73f2661e6f625d306c9b0ef082e4593f45a21
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021816-rotten-viral-2615@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a9a73f2661e6f625d306c9b0ef082e4593f45a21 Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Mon, 21 Oct 2024 17:07:50 +0300
Subject: [PATCH] drm/tidss: Fix race condition while handling interrupt
registers
The driver has a spinlock for protecting the irq_masks field and irq
enable registers. However, the driver misses protecting the irq status
registers which can lead to races.
Take the spinlock when accessing irqstatus too.
Fixes: 32a1795f57ee ("drm/tidss: New driver for TI Keystone platform Display SubSystem")
Cc: stable(a)vger.kernel.org
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
[Tomi: updated the desc]
Reviewed-by: Jonathan Cormier <jcormier(a)criticallink.com>
Tested-by: Jonathan Cormier <jcormier(a)criticallink.com>
Reviewed-by: Aradhya Bhatia <aradhya.bhatia(a)linux.dev>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241021-tidss-irq-fix-v1-6-8…
diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index 515f82e8a0a5..07f5c26cfa26 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -2767,8 +2767,12 @@ static void dispc_init_errata(struct dispc_device *dispc)
*/
static void dispc_softreset_k2g(struct dispc_device *dispc)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dispc->tidss->wait_lock, flags);
dispc_set_irqenable(dispc, 0);
dispc_read_and_clear_irqstatus(dispc);
+ spin_unlock_irqrestore(&dispc->tidss->wait_lock, flags);
for (unsigned int vp_idx = 0; vp_idx < dispc->feat->num_vps; ++vp_idx)
VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, 0, 0);
diff --git a/drivers/gpu/drm/tidss/tidss_irq.c b/drivers/gpu/drm/tidss/tidss_irq.c
index 3cc4024ec7ff..8af4682ba56b 100644
--- a/drivers/gpu/drm/tidss/tidss_irq.c
+++ b/drivers/gpu/drm/tidss/tidss_irq.c
@@ -60,7 +60,9 @@ static irqreturn_t tidss_irq_handler(int irq, void *arg)
unsigned int id;
dispc_irq_t irqstatus;
+ spin_lock(&tidss->wait_lock);
irqstatus = dispc_read_and_clear_irqstatus(tidss->dispc);
+ spin_unlock(&tidss->wait_lock);
for (id = 0; id < tidss->num_crtcs; id++) {
struct drm_crtc *crtc = tidss->crtcs[id];
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x a9a73f2661e6f625d306c9b0ef082e4593f45a21
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021816-stonework-task-247b@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a9a73f2661e6f625d306c9b0ef082e4593f45a21 Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Mon, 21 Oct 2024 17:07:50 +0300
Subject: [PATCH] drm/tidss: Fix race condition while handling interrupt
registers
The driver has a spinlock for protecting the irq_masks field and irq
enable registers. However, the driver misses protecting the irq status
registers which can lead to races.
Take the spinlock when accessing irqstatus too.
Fixes: 32a1795f57ee ("drm/tidss: New driver for TI Keystone platform Display SubSystem")
Cc: stable(a)vger.kernel.org
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
[Tomi: updated the desc]
Reviewed-by: Jonathan Cormier <jcormier(a)criticallink.com>
Tested-by: Jonathan Cormier <jcormier(a)criticallink.com>
Reviewed-by: Aradhya Bhatia <aradhya.bhatia(a)linux.dev>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241021-tidss-irq-fix-v1-6-8…
diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index 515f82e8a0a5..07f5c26cfa26 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -2767,8 +2767,12 @@ static void dispc_init_errata(struct dispc_device *dispc)
*/
static void dispc_softreset_k2g(struct dispc_device *dispc)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dispc->tidss->wait_lock, flags);
dispc_set_irqenable(dispc, 0);
dispc_read_and_clear_irqstatus(dispc);
+ spin_unlock_irqrestore(&dispc->tidss->wait_lock, flags);
for (unsigned int vp_idx = 0; vp_idx < dispc->feat->num_vps; ++vp_idx)
VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, 0, 0);
diff --git a/drivers/gpu/drm/tidss/tidss_irq.c b/drivers/gpu/drm/tidss/tidss_irq.c
index 3cc4024ec7ff..8af4682ba56b 100644
--- a/drivers/gpu/drm/tidss/tidss_irq.c
+++ b/drivers/gpu/drm/tidss/tidss_irq.c
@@ -60,7 +60,9 @@ static irqreturn_t tidss_irq_handler(int irq, void *arg)
unsigned int id;
dispc_irq_t irqstatus;
+ spin_lock(&tidss->wait_lock);
irqstatus = dispc_read_and_clear_irqstatus(tidss->dispc);
+ spin_unlock(&tidss->wait_lock);
for (id = 0; id < tidss->num_crtcs; id++) {
struct drm_crtc *crtc = tidss->crtcs[id];
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x a9a73f2661e6f625d306c9b0ef082e4593f45a21
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021815-procreate-false-2b40@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a9a73f2661e6f625d306c9b0ef082e4593f45a21 Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Mon, 21 Oct 2024 17:07:50 +0300
Subject: [PATCH] drm/tidss: Fix race condition while handling interrupt
registers
The driver has a spinlock for protecting the irq_masks field and irq
enable registers. However, the driver misses protecting the irq status
registers which can lead to races.
Take the spinlock when accessing irqstatus too.
Fixes: 32a1795f57ee ("drm/tidss: New driver for TI Keystone platform Display SubSystem")
Cc: stable(a)vger.kernel.org
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
[Tomi: updated the desc]
Reviewed-by: Jonathan Cormier <jcormier(a)criticallink.com>
Tested-by: Jonathan Cormier <jcormier(a)criticallink.com>
Reviewed-by: Aradhya Bhatia <aradhya.bhatia(a)linux.dev>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241021-tidss-irq-fix-v1-6-8…
diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index 515f82e8a0a5..07f5c26cfa26 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -2767,8 +2767,12 @@ static void dispc_init_errata(struct dispc_device *dispc)
*/
static void dispc_softreset_k2g(struct dispc_device *dispc)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dispc->tidss->wait_lock, flags);
dispc_set_irqenable(dispc, 0);
dispc_read_and_clear_irqstatus(dispc);
+ spin_unlock_irqrestore(&dispc->tidss->wait_lock, flags);
for (unsigned int vp_idx = 0; vp_idx < dispc->feat->num_vps; ++vp_idx)
VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, 0, 0);
diff --git a/drivers/gpu/drm/tidss/tidss_irq.c b/drivers/gpu/drm/tidss/tidss_irq.c
index 3cc4024ec7ff..8af4682ba56b 100644
--- a/drivers/gpu/drm/tidss/tidss_irq.c
+++ b/drivers/gpu/drm/tidss/tidss_irq.c
@@ -60,7 +60,9 @@ static irqreturn_t tidss_irq_handler(int irq, void *arg)
unsigned int id;
dispc_irq_t irqstatus;
+ spin_lock(&tidss->wait_lock);
irqstatus = dispc_read_and_clear_irqstatus(tidss->dispc);
+ spin_unlock(&tidss->wait_lock);
for (id = 0; id < tidss->num_crtcs; id++) {
struct drm_crtc *crtc = tidss->crtcs[id];
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x a9a73f2661e6f625d306c9b0ef082e4593f45a21
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021815-chrome-sycamore-9640@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a9a73f2661e6f625d306c9b0ef082e4593f45a21 Mon Sep 17 00:00:00 2001
From: Devarsh Thakkar <devarsht(a)ti.com>
Date: Mon, 21 Oct 2024 17:07:50 +0300
Subject: [PATCH] drm/tidss: Fix race condition while handling interrupt
registers
The driver has a spinlock for protecting the irq_masks field and irq
enable registers. However, the driver misses protecting the irq status
registers which can lead to races.
Take the spinlock when accessing irqstatus too.
Fixes: 32a1795f57ee ("drm/tidss: New driver for TI Keystone platform Display SubSystem")
Cc: stable(a)vger.kernel.org
Signed-off-by: Devarsh Thakkar <devarsht(a)ti.com>
[Tomi: updated the desc]
Reviewed-by: Jonathan Cormier <jcormier(a)criticallink.com>
Tested-by: Jonathan Cormier <jcormier(a)criticallink.com>
Reviewed-by: Aradhya Bhatia <aradhya.bhatia(a)linux.dev>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen(a)ideasonboard.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241021-tidss-irq-fix-v1-6-8…
diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c
index 515f82e8a0a5..07f5c26cfa26 100644
--- a/drivers/gpu/drm/tidss/tidss_dispc.c
+++ b/drivers/gpu/drm/tidss/tidss_dispc.c
@@ -2767,8 +2767,12 @@ static void dispc_init_errata(struct dispc_device *dispc)
*/
static void dispc_softreset_k2g(struct dispc_device *dispc)
{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dispc->tidss->wait_lock, flags);
dispc_set_irqenable(dispc, 0);
dispc_read_and_clear_irqstatus(dispc);
+ spin_unlock_irqrestore(&dispc->tidss->wait_lock, flags);
for (unsigned int vp_idx = 0; vp_idx < dispc->feat->num_vps; ++vp_idx)
VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, 0, 0);
diff --git a/drivers/gpu/drm/tidss/tidss_irq.c b/drivers/gpu/drm/tidss/tidss_irq.c
index 3cc4024ec7ff..8af4682ba56b 100644
--- a/drivers/gpu/drm/tidss/tidss_irq.c
+++ b/drivers/gpu/drm/tidss/tidss_irq.c
@@ -60,7 +60,9 @@ static irqreturn_t tidss_irq_handler(int irq, void *arg)
unsigned int id;
dispc_irq_t irqstatus;
+ spin_lock(&tidss->wait_lock);
irqstatus = dispc_read_and_clear_irqstatus(tidss->dispc);
+ spin_unlock(&tidss->wait_lock);
for (id = 0; id < tidss->num_crtcs; id++) {
struct drm_crtc *crtc = tidss->crtcs[id];
At the end of a 128b/132b link training sequence, the HW expects the
transcoder training pattern to be set to TPS2 and from that to normal
mode (disabling the training pattern). Transitioning from TPS1 directly
to normal mode leaves the transcoder in a stuck state, resulting in
page-flip timeouts later in the modeset sequence.
Atm, in case of a failure during link training, the transcoder may be
still set to output the TPS1 pattern. Later the transcoder is then set
from TPS1 directly to normal mode in intel_dp_stop_link_train(), leading
to modeset failures later as described above. Fix this by setting the
training patter to TPS2, if the link training failed at any point.
Cc: stable(a)vger.kernel.org # v5.18+
Cc: Jani Nikula <jani.nikula(a)intel.com>
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
---
.../gpu/drm/i915/display/intel_dp_link_training.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
index 3cc06c916017d..11953b03bb6aa 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c
@@ -1563,7 +1563,7 @@ intel_dp_128b132b_link_train(struct intel_dp *intel_dp,
if (wait_for(intel_dp_128b132b_intra_hop(intel_dp, crtc_state) == 0, 500)) {
lt_err(intel_dp, DP_PHY_DPRX, "128b/132b intra-hop not clear\n");
- return false;
+ goto out;
}
if (intel_dp_128b132b_lane_eq(intel_dp, crtc_state) &&
@@ -1575,6 +1575,19 @@ intel_dp_128b132b_link_train(struct intel_dp *intel_dp,
passed ? "passed" : "failed",
crtc_state->port_clock, crtc_state->lane_count);
+out:
+ /*
+ * Ensure that the training pattern does get set to TPS2 even in case
+ * of a failure, as is the case at the end of a passing link training
+ * and what is expected by the transcoder. Leaving TPS1 set (and
+ * disabling the link train mode in DP_TP_CTL later from TPS1 directly)
+ * would result in a stuck transcoder HW state and flip-done timeouts
+ * later in the modeset sequence.
+ */
+ if (!passed)
+ intel_dp_program_link_training_pattern(intel_dp, crtc_state,
+ DP_PHY_DPRX, DP_TRAINING_PATTERN_2);
+
return passed;
}
--
2.44.2
On Tue, Feb 18, 2025 at 07:48:33AM -0500, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> MIPS: fix mips_get_syscall_arg() for o32
>
> to the 5.4-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> mips-fix-mips_get_syscall_arg-for-o32.patch
> and it can be found in the queue-5.4 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
>
>
> commit de89111213a3fb4751245214dc4dd590ed153d29
> Author: Dmitry V. Levin <ldv(a)strace.io>
> Date: Wed Feb 12 01:02:09 2025 +0200
>
> MIPS: fix mips_get_syscall_arg() for o32
>
> [ Upstream commit 733a90561ad0a4a74035d2d627098da85d43b592 ]
Just in case, the previous commit,
ed975485a13d1f6080218aa71c29425ba2dfb332, has to be applied
along with this one, otherwise the change will not compile.
--
ldv
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x da2dccd7451de62b175fb8f0808d644959e964c7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021859-obligate-simile-7eaf@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From da2dccd7451de62b175fb8f0808d644959e964c7 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Wed, 5 Feb 2025 17:36:48 +0000
Subject: [PATCH] btrfs: fix hole expansion when writing at an offset beyond
EOF
At btrfs_write_check() if our file's i_size is not sector size aligned and
we have a write that starts at an offset larger than the i_size that falls
within the same page of the i_size, then we end up not zeroing the file
range [i_size, write_offset).
The code is this:
start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
if (start_pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
loff_t end_pos = round_up(pos + count, fs_info->sectorsize);
ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, end_pos);
if (ret)
return ret;
}
So if our file's i_size is 90269 bytes and a write at offset 90365 bytes
comes in, we get 'start_pos' set to 90112 bytes, which is less than the
i_size and therefore we don't zero out the range [90269, 90365) by
calling btrfs_cont_expand().
This is an old bug introduced in commit 9036c10208e1 ("Btrfs: update hole
handling v2"), from 2008, and the buggy code got moved around over the
years.
Fix this by discarding 'start_pos' and comparing against the write offset
('pos') without any alignment.
This bug was recently exposed by test case generic/363 which tests this
scenario by polluting ranges beyond EOF with an mmap write and than verify
that after a file increases we get zeroes for the range which is supposed
to be a hole and not what we wrote with the previous mmaped write.
We're only seeing this exposed now because generic/363 used to run only
on xfs until last Sunday's fstests update.
The test was failing like this:
$ ./check generic/363
FSTYP -- btrfs
PLATFORM -- Linux/x86_64 debian0 6.13.0-rc7-btrfs-next-185+ #17 SMP PREEMPT_DYNAMIC Mon Feb 3 12:28:46 WET 2025
MKFS_OPTIONS -- /dev/sdc
MOUNT_OPTIONS -- /dev/sdc /home/fdmanana/btrfs-tests/scratch_1
generic/363 0s ... [failed, exit status 1]- output mismatch (see /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad)
--- tests/generic/363.out 2025-02-05 15:31:14.013646509 +0000
+++ /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad 2025-02-05 17:25:33.112630781 +0000
@@ -1 +1,46 @@
QA output created by 363
+READ BAD DATA: offset = 0xdcad, size = 0xd921, fname = /home/fdmanana/btrfs-tests/dev/junk
+OFFSET GOOD BAD RANGE
+0x1609d 0x0000 0x3104 0x0
+operation# (mod 256) for the bad data may be 4
+0x1609e 0x0000 0x0472 0x1
+operation# (mod 256) for the bad data may be 4
...
(Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/generic/363.out /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad' to see the entire diff)
Ran: generic/363
Failures: generic/363
Failed 1 of 1 tests
Fixes: 9036c10208e1 ("Btrfs: update hole handling v2")
CC: stable(a)vger.kernel.org
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 36f51c311bb1..ed3c0d6546c5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1039,7 +1039,6 @@ int btrfs_write_check(struct kiocb *iocb, size_t count)
loff_t pos = iocb->ki_pos;
int ret;
loff_t oldsize;
- loff_t start_pos;
/*
* Quickly bail out on NOWAIT writes if we don't have the nodatacow or
@@ -1066,9 +1065,8 @@ int btrfs_write_check(struct kiocb *iocb, size_t count)
inode_inc_iversion(inode);
}
- start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
- if (start_pos > oldsize) {
+ if (pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
loff_t end_pos = round_up(pos + count, fs_info->sectorsize);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x da2dccd7451de62b175fb8f0808d644959e964c7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021853-wiring-vest-fedd@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From da2dccd7451de62b175fb8f0808d644959e964c7 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana(a)suse.com>
Date: Wed, 5 Feb 2025 17:36:48 +0000
Subject: [PATCH] btrfs: fix hole expansion when writing at an offset beyond
EOF
At btrfs_write_check() if our file's i_size is not sector size aligned and
we have a write that starts at an offset larger than the i_size that falls
within the same page of the i_size, then we end up not zeroing the file
range [i_size, write_offset).
The code is this:
start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
if (start_pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
loff_t end_pos = round_up(pos + count, fs_info->sectorsize);
ret = btrfs_cont_expand(BTRFS_I(inode), oldsize, end_pos);
if (ret)
return ret;
}
So if our file's i_size is 90269 bytes and a write at offset 90365 bytes
comes in, we get 'start_pos' set to 90112 bytes, which is less than the
i_size and therefore we don't zero out the range [90269, 90365) by
calling btrfs_cont_expand().
This is an old bug introduced in commit 9036c10208e1 ("Btrfs: update hole
handling v2"), from 2008, and the buggy code got moved around over the
years.
Fix this by discarding 'start_pos' and comparing against the write offset
('pos') without any alignment.
This bug was recently exposed by test case generic/363 which tests this
scenario by polluting ranges beyond EOF with an mmap write and than verify
that after a file increases we get zeroes for the range which is supposed
to be a hole and not what we wrote with the previous mmaped write.
We're only seeing this exposed now because generic/363 used to run only
on xfs until last Sunday's fstests update.
The test was failing like this:
$ ./check generic/363
FSTYP -- btrfs
PLATFORM -- Linux/x86_64 debian0 6.13.0-rc7-btrfs-next-185+ #17 SMP PREEMPT_DYNAMIC Mon Feb 3 12:28:46 WET 2025
MKFS_OPTIONS -- /dev/sdc
MOUNT_OPTIONS -- /dev/sdc /home/fdmanana/btrfs-tests/scratch_1
generic/363 0s ... [failed, exit status 1]- output mismatch (see /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad)
--- tests/generic/363.out 2025-02-05 15:31:14.013646509 +0000
+++ /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad 2025-02-05 17:25:33.112630781 +0000
@@ -1 +1,46 @@
QA output created by 363
+READ BAD DATA: offset = 0xdcad, size = 0xd921, fname = /home/fdmanana/btrfs-tests/dev/junk
+OFFSET GOOD BAD RANGE
+0x1609d 0x0000 0x3104 0x0
+operation# (mod 256) for the bad data may be 4
+0x1609e 0x0000 0x0472 0x1
+operation# (mod 256) for the bad data may be 4
...
(Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/generic/363.out /home/fdmanana/git/hub/xfstests/results//generic/363.out.bad' to see the entire diff)
Ran: generic/363
Failures: generic/363
Failed 1 of 1 tests
Fixes: 9036c10208e1 ("Btrfs: update hole handling v2")
CC: stable(a)vger.kernel.org
Reviewed-by: Qu Wenruo <wqu(a)suse.com>
Signed-off-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 36f51c311bb1..ed3c0d6546c5 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1039,7 +1039,6 @@ int btrfs_write_check(struct kiocb *iocb, size_t count)
loff_t pos = iocb->ki_pos;
int ret;
loff_t oldsize;
- loff_t start_pos;
/*
* Quickly bail out on NOWAIT writes if we don't have the nodatacow or
@@ -1066,9 +1065,8 @@ int btrfs_write_check(struct kiocb *iocb, size_t count)
inode_inc_iversion(inode);
}
- start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
- if (start_pos > oldsize) {
+ if (pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
loff_t end_pos = round_up(pos + count, fs_info->sectorsize);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 32966821574cd2917bd60f2554f435fe527f4702
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021801-myself-cane-67bf@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 32966821574cd2917bd60f2554f435fe527f4702 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj(a)kernel.org>
Date: Fri, 7 Feb 2025 10:59:06 -1000
Subject: [PATCH] sched_ext: Fix migration disabled handling in targeted
dispatches
A dispatch operation that can target a specific local DSQ -
scx_bpf_dsq_move_to_local() or scx_bpf_dsq_move() - checks whether the task
can be migrated to the target CPU using task_can_run_on_remote_rq(). If the
task can't be migrated to the targeted CPU, it is bounced through a global
DSQ.
task_can_run_on_remote_rq() assumes that the task is on a CPU that's
different from the targeted CPU but the callers doesn't uphold the
assumption and may call the function when the task is already on the target
CPU. When such task has migration disabled, task_can_run_on_remote_rq() ends
up returning %false incorrectly unnecessarily bouncing the task to a global
DSQ.
Fix it by updating the callers to only call task_can_run_on_remote_rq() when
the task is on a different CPU than the target CPU. As this is a bit subtle,
for clarity and documentation:
- Make task_can_run_on_remote_rq() trigger SCHED_WARN_ON() if the task is on
the same CPU as the target CPU.
- is_migration_disabled() test in task_can_run_on_remote_rq() cannot trigger
if the task is on a different CPU than the target CPU as the preceding
task_allowed_on_cpu() test should fail beforehand. Convert the test into
SCHED_WARN_ON().
Signed-off-by: Tejun Heo <tj(a)kernel.org>
Fixes: 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()")
Fixes: 0366017e0973 ("sched_ext: Use task_can_run_on_remote_rq() test in dispatch_to_local_dsq()")
Cc: stable(a)vger.kernel.org # v6.12+
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index efdbf4d85a21..e01144340d67 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2333,12 +2333,16 @@ static void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
*
* - The BPF scheduler is bypassed while the rq is offline and we can always say
* no to the BPF scheduler initiated migrations while offline.
+ *
+ * The caller must ensure that @p and @rq are on different CPUs.
*/
static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq,
bool trigger_error)
{
int cpu = cpu_of(rq);
+ SCHED_WARN_ON(task_cpu(p) == cpu);
+
/*
* We don't require the BPF scheduler to avoid dispatching to offline
* CPUs mostly for convenience but also because CPUs can go offline
@@ -2352,8 +2356,11 @@ static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq,
return false;
}
- if (unlikely(is_migration_disabled(p)))
- return false;
+ /*
+ * If @p has migration disabled, @p->cpus_ptr only contains its current
+ * CPU and the above task_allowed_on_cpu() test should have failed.
+ */
+ SCHED_WARN_ON(is_migration_disabled(p));
if (!scx_rq_online(rq))
return false;
@@ -2457,7 +2464,8 @@ static struct rq *move_task_between_dsqs(struct task_struct *p, u64 enq_flags,
if (dst_dsq->id == SCX_DSQ_LOCAL) {
dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq);
- if (!task_can_run_on_remote_rq(p, dst_rq, true)) {
+ if (src_rq != dst_rq &&
+ unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) {
dst_dsq = find_global_dsq(p);
dst_rq = src_rq;
}
@@ -2611,7 +2619,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
}
#ifdef CONFIG_SMP
- if (unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) {
+ if (src_rq != dst_rq &&
+ unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) {
dispatch_enqueue(find_global_dsq(p), p,
enq_flags | SCX_ENQ_CLEAR_OPSS);
return;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 4ab37fcb42832cdd3e9d5e50653285ca84d6686f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021832-floss-petticoat-45ad@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001
From: Jill Donahue <jilliandonahue58(a)gmail.com>
Date: Tue, 11 Feb 2025 10:48:05 -0700
Subject: [PATCH] USB: gadget: f_midi: f_midi_complete to call queue_work
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using queue_work() to schedule the inner f_midi_transmit() via
a high priority work queue from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 47260d65066a..da82598fcef8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ queue_work(system_highpri_wq, &midi->work);
return;
}
break;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 4ab37fcb42832cdd3e9d5e50653285ca84d6686f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021832-vowel-unsheathe-d6d6@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001
From: Jill Donahue <jilliandonahue58(a)gmail.com>
Date: Tue, 11 Feb 2025 10:48:05 -0700
Subject: [PATCH] USB: gadget: f_midi: f_midi_complete to call queue_work
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using queue_work() to schedule the inner f_midi_transmit() via
a high priority work queue from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 47260d65066a..da82598fcef8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ queue_work(system_highpri_wq, &midi->work);
return;
}
break;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 4ab37fcb42832cdd3e9d5e50653285ca84d6686f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021831-swab-hassle-6824@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001
From: Jill Donahue <jilliandonahue58(a)gmail.com>
Date: Tue, 11 Feb 2025 10:48:05 -0700
Subject: [PATCH] USB: gadget: f_midi: f_midi_complete to call queue_work
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using queue_work() to schedule the inner f_midi_transmit() via
a high priority work queue from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 47260d65066a..da82598fcef8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ queue_work(system_highpri_wq, &midi->work);
return;
}
break;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 4ab37fcb42832cdd3e9d5e50653285ca84d6686f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021831-fedora-vanquish-5096@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001
From: Jill Donahue <jilliandonahue58(a)gmail.com>
Date: Tue, 11 Feb 2025 10:48:05 -0700
Subject: [PATCH] USB: gadget: f_midi: f_midi_complete to call queue_work
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using queue_work() to schedule the inner f_midi_transmit() via
a high priority work queue from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 47260d65066a..da82598fcef8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ queue_work(system_highpri_wq, &midi->work);
return;
}
break;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 4ab37fcb42832cdd3e9d5e50653285ca84d6686f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021830-phantom-negligent-416a@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001
From: Jill Donahue <jilliandonahue58(a)gmail.com>
Date: Tue, 11 Feb 2025 10:48:05 -0700
Subject: [PATCH] USB: gadget: f_midi: f_midi_complete to call queue_work
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using queue_work() to schedule the inner f_midi_transmit() via
a high priority work queue from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 47260d65066a..da82598fcef8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ queue_work(system_highpri_wq, &midi->work);
return;
}
break;
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 4ab37fcb42832cdd3e9d5e50653285ca84d6686f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021830-agenda-disgrace-4850@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001
From: Jill Donahue <jilliandonahue58(a)gmail.com>
Date: Tue, 11 Feb 2025 10:48:05 -0700
Subject: [PATCH] USB: gadget: f_midi: f_midi_complete to call queue_work
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using queue_work() to schedule the inner f_midi_transmit() via
a high priority work queue from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 47260d65066a..da82598fcef8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ queue_work(system_highpri_wq, &midi->work);
return;
}
break;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 011b0335903832facca86cd8ed05d7d8d94c9c76
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021859-upturned-trailside-5b21@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 011b0335903832facca86cd8ed05d7d8d94c9c76 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Thu, 6 Feb 2025 22:28:48 +0100
Subject: [PATCH] Revert "net: skb: introduce and use a single page frag cache"
This reverts commit dbae2b062824 ("net: skb: introduce and use a single
page frag cache"). The intended goal of such change was to counter a
performance regression introduced by commit 3226b158e67c ("net: avoid
32 x truesize under-estimation for tiny skbs").
Unfortunately, the blamed commit introduces another regression for the
virtio_net driver. Such a driver calls napi_alloc_skb() with a tiny
size, so that the whole head frag could fit a 512-byte block.
The single page frag cache uses a 1K fragment for such allocation, and
the additional overhead, under small UDP packets flood, makes the page
allocator a bottleneck.
Thanks to commit bf9f1baa279f ("net: add dedicated kmem_cache for
typical/small skb->head"), this revert does not re-introduce the
original regression. Actually, in the relevant test on top of this
revert, I measure a small but noticeable positive delta, just above
noise level.
The revert itself required some additional mangling due to the
introduction of the SKB_HEAD_ALIGN() helper and local lock infra in the
affected code.
Suggested-by: Eric Dumazet <edumazet(a)google.com>
Fixes: dbae2b062824 ("net: skb: introduce and use a single page frag cache")
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Link: https://patch.msgid.link/e649212fde9f0fdee23909ca0d14158d32bb7425.173887729…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c0a86afb85da..365f0e2098d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4115,7 +4115,6 @@ void netif_receive_skb_list(struct list_head *head);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
-void napi_get_frags_check(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
static inline void napi_free_frags(struct napi_struct *napi)
diff --git a/net/core/dev.c b/net/core/dev.c
index b91658e8aedb..55e356a68db6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6920,6 +6920,23 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
list_add_rcu(&napi->dev_list, higher); /* adds after higher */
}
+/* Double check that napi_get_frags() allocates skbs with
+ * skb->head being backed by slab, not a page fragment.
+ * This is to make sure bug fixed in 3226b158e67c
+ * ("net: avoid 32 x truesize under-estimation for tiny skbs")
+ * does not accidentally come back.
+ */
+static void napi_get_frags_check(struct napi_struct *napi)
+{
+ struct sk_buff *skb;
+
+ local_bh_disable();
+ skb = napi_get_frags(napi);
+ WARN_ON_ONCE(skb && skb->head_frag);
+ napi_free_frags(napi);
+ local_bh_enable();
+}
+
void netif_napi_add_weight_locked(struct net_device *dev,
struct napi_struct *napi,
int (*poll)(struct napi_struct *, int),
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a441613a1e6c..6a99c453397f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -220,67 +220,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
#define NAPI_SKB_CACHE_BULK 16
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
-#if PAGE_SIZE == SZ_4K
-
-#define NAPI_HAS_SMALL_PAGE_FRAG 1
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
-
-/* specialized page frag allocator using a single order 0 page
- * and slicing it into 1K sized fragment. Constrained to systems
- * with a very limited amount of 1K fragments fitting a single
- * page - to avoid excessive truesize underestimation
- */
-
-struct page_frag_1k {
- void *va;
- u16 offset;
- bool pfmemalloc;
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
-{
- struct page *page;
- int offset;
-
- offset = nc->offset - SZ_1K;
- if (likely(offset >= 0))
- goto use_frag;
-
- page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
- if (!page)
- return NULL;
-
- nc->va = page_address(page);
- nc->pfmemalloc = page_is_pfmemalloc(page);
- offset = PAGE_SIZE - SZ_1K;
- page_ref_add(page, offset / SZ_1K);
-
-use_frag:
- nc->offset = offset;
- return nc->va + offset;
-}
-#else
-
-/* the small page is actually unused in this build; add dummy helpers
- * to please the compiler and avoid later preprocessor's conditionals
- */
-#define NAPI_HAS_SMALL_PAGE_FRAG 0
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
-
-struct page_frag_1k {
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
-{
- return NULL;
-}
-
-#endif
-
struct napi_alloc_cache {
local_lock_t bh_lock;
struct page_frag_cache page;
- struct page_frag_1k page_small;
unsigned int skb_count;
void *skb_cache[NAPI_SKB_CACHE_SIZE];
};
@@ -290,23 +232,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
};
-/* Double check that napi_get_frags() allocates skbs with
- * skb->head being backed by slab, not a page fragment.
- * This is to make sure bug fixed in 3226b158e67c
- * ("net: avoid 32 x truesize under-estimation for tiny skbs")
- * does not accidentally come back.
- */
-void napi_get_frags_check(struct napi_struct *napi)
-{
- struct sk_buff *skb;
-
- local_bh_disable();
- skb = napi_get_frags(napi);
- WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
- napi_free_frags(napi);
- local_bh_enable();
-}
-
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
@@ -813,10 +738,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
/* If requested length is either too small or too big,
* we use kmalloc() for skb->head allocation.
- * When the small frag allocator is available, prefer it over kmalloc
- * for small fragments
*/
- if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
+ if (len <= SKB_WITH_OVERHEAD(1024) ||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
@@ -826,32 +749,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
goto skb_success;
}
+ len = SKB_HEAD_ALIGN(len);
+
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
nc = this_cpu_ptr(&napi_alloc_cache);
- if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
- /* we are artificially inflating the allocation size, but
- * that is not as bad as it may look like, as:
- * - 'len' less than GRO_MAX_HEAD makes little sense
- * - On most systems, larger 'len' values lead to fragment
- * size above 512 bytes
- * - kmalloc would use the kmalloc-1k slab for such values
- * - Builds with smaller GRO_MAX_HEAD will very likely do
- * little networking, as that implies no WiFi and no
- * tunnels support, and 32 bits arches.
- */
- len = SZ_1K;
- data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
- pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
- } else {
- len = SKB_HEAD_ALIGN(len);
-
- data = page_frag_alloc(&nc->page, len, gfp_mask);
- pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
- }
+ data = page_frag_alloc(&nc->page, len, gfp_mask);
+ pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!data))
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 011b0335903832facca86cd8ed05d7d8d94c9c76
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021800-freebase-womanlike-28b0@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 011b0335903832facca86cd8ed05d7d8d94c9c76 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni(a)redhat.com>
Date: Thu, 6 Feb 2025 22:28:48 +0100
Subject: [PATCH] Revert "net: skb: introduce and use a single page frag cache"
This reverts commit dbae2b062824 ("net: skb: introduce and use a single
page frag cache"). The intended goal of such change was to counter a
performance regression introduced by commit 3226b158e67c ("net: avoid
32 x truesize under-estimation for tiny skbs").
Unfortunately, the blamed commit introduces another regression for the
virtio_net driver. Such a driver calls napi_alloc_skb() with a tiny
size, so that the whole head frag could fit a 512-byte block.
The single page frag cache uses a 1K fragment for such allocation, and
the additional overhead, under small UDP packets flood, makes the page
allocator a bottleneck.
Thanks to commit bf9f1baa279f ("net: add dedicated kmem_cache for
typical/small skb->head"), this revert does not re-introduce the
original regression. Actually, in the relevant test on top of this
revert, I measure a small but noticeable positive delta, just above
noise level.
The revert itself required some additional mangling due to the
introduction of the SKB_HEAD_ALIGN() helper and local lock infra in the
affected code.
Suggested-by: Eric Dumazet <edumazet(a)google.com>
Fixes: dbae2b062824 ("net: skb: introduce and use a single page frag cache")
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
Link: https://patch.msgid.link/e649212fde9f0fdee23909ca0d14158d32bb7425.173887729…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c0a86afb85da..365f0e2098d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4115,7 +4115,6 @@ void netif_receive_skb_list(struct list_head *head);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
-void napi_get_frags_check(struct napi_struct *napi);
gro_result_t napi_gro_frags(struct napi_struct *napi);
static inline void napi_free_frags(struct napi_struct *napi)
diff --git a/net/core/dev.c b/net/core/dev.c
index b91658e8aedb..55e356a68db6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6920,6 +6920,23 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
list_add_rcu(&napi->dev_list, higher); /* adds after higher */
}
+/* Double check that napi_get_frags() allocates skbs with
+ * skb->head being backed by slab, not a page fragment.
+ * This is to make sure bug fixed in 3226b158e67c
+ * ("net: avoid 32 x truesize under-estimation for tiny skbs")
+ * does not accidentally come back.
+ */
+static void napi_get_frags_check(struct napi_struct *napi)
+{
+ struct sk_buff *skb;
+
+ local_bh_disable();
+ skb = napi_get_frags(napi);
+ WARN_ON_ONCE(skb && skb->head_frag);
+ napi_free_frags(napi);
+ local_bh_enable();
+}
+
void netif_napi_add_weight_locked(struct net_device *dev,
struct napi_struct *napi,
int (*poll)(struct napi_struct *, int),
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a441613a1e6c..6a99c453397f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -220,67 +220,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
#define NAPI_SKB_CACHE_BULK 16
#define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
-#if PAGE_SIZE == SZ_4K
-
-#define NAPI_HAS_SMALL_PAGE_FRAG 1
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
-
-/* specialized page frag allocator using a single order 0 page
- * and slicing it into 1K sized fragment. Constrained to systems
- * with a very limited amount of 1K fragments fitting a single
- * page - to avoid excessive truesize underestimation
- */
-
-struct page_frag_1k {
- void *va;
- u16 offset;
- bool pfmemalloc;
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
-{
- struct page *page;
- int offset;
-
- offset = nc->offset - SZ_1K;
- if (likely(offset >= 0))
- goto use_frag;
-
- page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
- if (!page)
- return NULL;
-
- nc->va = page_address(page);
- nc->pfmemalloc = page_is_pfmemalloc(page);
- offset = PAGE_SIZE - SZ_1K;
- page_ref_add(page, offset / SZ_1K);
-
-use_frag:
- nc->offset = offset;
- return nc->va + offset;
-}
-#else
-
-/* the small page is actually unused in this build; add dummy helpers
- * to please the compiler and avoid later preprocessor's conditionals
- */
-#define NAPI_HAS_SMALL_PAGE_FRAG 0
-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
-
-struct page_frag_1k {
-};
-
-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
-{
- return NULL;
-}
-
-#endif
-
struct napi_alloc_cache {
local_lock_t bh_lock;
struct page_frag_cache page;
- struct page_frag_1k page_small;
unsigned int skb_count;
void *skb_cache[NAPI_SKB_CACHE_SIZE];
};
@@ -290,23 +232,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
.bh_lock = INIT_LOCAL_LOCK(bh_lock),
};
-/* Double check that napi_get_frags() allocates skbs with
- * skb->head being backed by slab, not a page fragment.
- * This is to make sure bug fixed in 3226b158e67c
- * ("net: avoid 32 x truesize under-estimation for tiny skbs")
- * does not accidentally come back.
- */
-void napi_get_frags_check(struct napi_struct *napi)
-{
- struct sk_buff *skb;
-
- local_bh_disable();
- skb = napi_get_frags(napi);
- WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
- napi_free_frags(napi);
- local_bh_enable();
-}
-
void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
@@ -813,10 +738,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
/* If requested length is either too small or too big,
* we use kmalloc() for skb->head allocation.
- * When the small frag allocator is available, prefer it over kmalloc
- * for small fragments
*/
- if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
+ if (len <= SKB_WITH_OVERHEAD(1024) ||
len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
@@ -826,32 +749,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
goto skb_success;
}
+ len = SKB_HEAD_ALIGN(len);
+
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
local_lock_nested_bh(&napi_alloc_cache.bh_lock);
nc = this_cpu_ptr(&napi_alloc_cache);
- if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
- /* we are artificially inflating the allocation size, but
- * that is not as bad as it may look like, as:
- * - 'len' less than GRO_MAX_HEAD makes little sense
- * - On most systems, larger 'len' values lead to fragment
- * size above 512 bytes
- * - kmalloc would use the kmalloc-1k slab for such values
- * - Builds with smaller GRO_MAX_HEAD will very likely do
- * little networking, as that implies no WiFi and no
- * tunnels support, and 32 bits arches.
- */
- len = SZ_1K;
- data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
- pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
- } else {
- len = SKB_HEAD_ALIGN(len);
-
- data = page_frag_alloc(&nc->page, len, gfp_mask);
- pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
- }
+ data = page_frag_alloc(&nc->page, len, gfp_mask);
+ pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
if (unlikely(!data))
The patch below does not apply to the 6.13-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.13.y
git checkout FETCH_HEAD
git cherry-pick -x e5644be4079750a0a0a5a7068fd90b97bf6fac55
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021831-mushiness-herbs-d06c@gregkh' --subject-prefix 'PATCH 6.13.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e5644be4079750a0a0a5a7068fd90b97bf6fac55 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic(a)kernel.org>
Date: Wed, 12 Feb 2025 14:55:14 +0100
Subject: [PATCH] usb: gadget: uvc: Fix unstarted kthread worker
The behaviour of kthread_create_worker() was recently changed to align
with the one of kthread_create(). The kthread worker is created but not
awaken by default. This is to allow the use of kthread_affine_preferred()
and kthread_bind[_mask]() with kthread workers. In order to keep the
old behaviour and wake the kthread up, kthread_run_worker() must be
used. All the pre-existing users have been converted, except for UVC
that was introduced in the same merge window as the API change.
This results in hangs:
INFO: task UVCG:82 blocked for more than 491 seconds.
Tainted: G T 6.13.0-rc2-00014-gb04e317b5226 #1
task:UVCG state:D stack:0 pid:82
Call Trace:
__schedule
schedule
schedule_preempt_disabled
kthread
? kthread_flush_work
ret_from_fork
ret_from_fork_asm
entry_INT80_32
Fix this with converting UVCG kworker to the new API.
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202502121025.55bfa801-lkp@intel.com
Fixes: f0bbfbd16b3b ("usb: gadget: uvc: rework to enqueue in pump worker from encoded queue")
Cc: stable <stable(a)kernel.org>
Cc: Michael Grzeschik <m.grzeschik(a)pengutronix.de>
Signed-off-by: Frederic Weisbecker <frederic(a)kernel.org>
Link: https://lore.kernel.org/r/20250212135514.30539-1-frederic@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
index 79e223713d8b..fb77b0b21790 100644
--- a/drivers/usb/gadget/function/uvc_video.c
+++ b/drivers/usb/gadget/function/uvc_video.c
@@ -818,7 +818,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
return -EINVAL;
/* Allocate a kthread for asynchronous hw submit handler. */
- video->kworker = kthread_create_worker(0, "UVCG");
+ video->kworker = kthread_run_worker(0, "UVCG");
if (IS_ERR(video->kworker)) {
uvcg_err(&video->uvc->func, "failed to create UVCG kworker\n");
return PTR_ERR(video->kworker);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 4ab37fcb42832cdd3e9d5e50653285ca84d6686f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021846-penholder-punisher-66e6@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4ab37fcb42832cdd3e9d5e50653285ca84d6686f Mon Sep 17 00:00:00 2001
From: Jill Donahue <jilliandonahue58(a)gmail.com>
Date: Tue, 11 Feb 2025 10:48:05 -0700
Subject: [PATCH] USB: gadget: f_midi: f_midi_complete to call queue_work
When using USB MIDI, a lock is attempted to be acquired twice through a
re-entrant call to f_midi_transmit, causing a deadlock.
Fix it by using queue_work() to schedule the inner f_midi_transmit() via
a high priority work queue from the completion handler.
Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGV…
Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jill Donahue <jilliandonahue58(a)gmail.com>
Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 47260d65066a..da82598fcef8 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
/* Our transmit completed. See if there's more to go.
* f_midi_transmit eats req, don't queue it again. */
req->length = 0;
- f_midi_transmit(midi);
+ queue_work(system_highpri_wq, &midi->work);
return;
}
break;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 399a45e5237ca14037120b1b895bd38a3b4492ea
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021848-sixtieth-living-e034@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 399a45e5237ca14037120b1b895bd38a3b4492ea Mon Sep 17 00:00:00 2001
From: Roy Luo <royluo(a)google.com>
Date: Tue, 4 Feb 2025 23:36:42 +0000
Subject: [PATCH] usb: gadget: core: flush gadget workqueue after device
removal
device_del() can lead to new work being scheduled in gadget->work
workqueue. This is observed, for example, with the dwc3 driver with the
following call stack:
device_del()
gadget_unbind_driver()
usb_gadget_disconnect_locked()
dwc3_gadget_pullup()
dwc3_gadget_soft_disconnect()
usb_gadget_set_state()
schedule_work(&gadget->work)
Move flush_work() after device_del() to ensure the workqueue is cleaned
up.
Fixes: 5702f75375aa9 ("usb: gadget: udc-core: move sysfs_notify() to a workqueue")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Roy Luo <royluo(a)google.com>
Reviewed-by: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20250204233642.666991-1-royluo@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index a6f46364be65..4b3d5075621a 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1543,8 +1543,8 @@ void usb_del_gadget(struct usb_gadget *gadget)
kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE);
sysfs_remove_link(&udc->dev.kobj, "gadget");
- flush_work(&gadget->work);
device_del(&gadget->dev);
+ flush_work(&gadget->work);
ida_free(&gadget_id_numbers, gadget->id_number);
cancel_work_sync(&udc->vbus_work);
device_unregister(&udc->dev);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 399a45e5237ca14037120b1b895bd38a3b4492ea
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021846-antihero-overall-1bfb@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 399a45e5237ca14037120b1b895bd38a3b4492ea Mon Sep 17 00:00:00 2001
From: Roy Luo <royluo(a)google.com>
Date: Tue, 4 Feb 2025 23:36:42 +0000
Subject: [PATCH] usb: gadget: core: flush gadget workqueue after device
removal
device_del() can lead to new work being scheduled in gadget->work
workqueue. This is observed, for example, with the dwc3 driver with the
following call stack:
device_del()
gadget_unbind_driver()
usb_gadget_disconnect_locked()
dwc3_gadget_pullup()
dwc3_gadget_soft_disconnect()
usb_gadget_set_state()
schedule_work(&gadget->work)
Move flush_work() after device_del() to ensure the workqueue is cleaned
up.
Fixes: 5702f75375aa9 ("usb: gadget: udc-core: move sysfs_notify() to a workqueue")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Roy Luo <royluo(a)google.com>
Reviewed-by: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20250204233642.666991-1-royluo@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index a6f46364be65..4b3d5075621a 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1543,8 +1543,8 @@ void usb_del_gadget(struct usb_gadget *gadget)
kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE);
sysfs_remove_link(&udc->dev.kobj, "gadget");
- flush_work(&gadget->work);
device_del(&gadget->dev);
+ flush_work(&gadget->work);
ida_free(&gadget_id_numbers, gadget->id_number);
cancel_work_sync(&udc->vbus_work);
device_unregister(&udc->dev);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 399a45e5237ca14037120b1b895bd38a3b4492ea
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021844-stimuli-handmade-9628@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 399a45e5237ca14037120b1b895bd38a3b4492ea Mon Sep 17 00:00:00 2001
From: Roy Luo <royluo(a)google.com>
Date: Tue, 4 Feb 2025 23:36:42 +0000
Subject: [PATCH] usb: gadget: core: flush gadget workqueue after device
removal
device_del() can lead to new work being scheduled in gadget->work
workqueue. This is observed, for example, with the dwc3 driver with the
following call stack:
device_del()
gadget_unbind_driver()
usb_gadget_disconnect_locked()
dwc3_gadget_pullup()
dwc3_gadget_soft_disconnect()
usb_gadget_set_state()
schedule_work(&gadget->work)
Move flush_work() after device_del() to ensure the workqueue is cleaned
up.
Fixes: 5702f75375aa9 ("usb: gadget: udc-core: move sysfs_notify() to a workqueue")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Roy Luo <royluo(a)google.com>
Reviewed-by: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20250204233642.666991-1-royluo@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index a6f46364be65..4b3d5075621a 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1543,8 +1543,8 @@ void usb_del_gadget(struct usb_gadget *gadget)
kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE);
sysfs_remove_link(&udc->dev.kobj, "gadget");
- flush_work(&gadget->work);
device_del(&gadget->dev);
+ flush_work(&gadget->work);
ida_free(&gadget_id_numbers, gadget->id_number);
cancel_work_sync(&udc->vbus_work);
device_unregister(&udc->dev);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 399a45e5237ca14037120b1b895bd38a3b4492ea
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021842-eject-splendor-7c5d@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 399a45e5237ca14037120b1b895bd38a3b4492ea Mon Sep 17 00:00:00 2001
From: Roy Luo <royluo(a)google.com>
Date: Tue, 4 Feb 2025 23:36:42 +0000
Subject: [PATCH] usb: gadget: core: flush gadget workqueue after device
removal
device_del() can lead to new work being scheduled in gadget->work
workqueue. This is observed, for example, with the dwc3 driver with the
following call stack:
device_del()
gadget_unbind_driver()
usb_gadget_disconnect_locked()
dwc3_gadget_pullup()
dwc3_gadget_soft_disconnect()
usb_gadget_set_state()
schedule_work(&gadget->work)
Move flush_work() after device_del() to ensure the workqueue is cleaned
up.
Fixes: 5702f75375aa9 ("usb: gadget: udc-core: move sysfs_notify() to a workqueue")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Roy Luo <royluo(a)google.com>
Reviewed-by: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20250204233642.666991-1-royluo@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index a6f46364be65..4b3d5075621a 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1543,8 +1543,8 @@ void usb_del_gadget(struct usb_gadget *gadget)
kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE);
sysfs_remove_link(&udc->dev.kobj, "gadget");
- flush_work(&gadget->work);
device_del(&gadget->dev);
+ flush_work(&gadget->work);
ida_free(&gadget_id_numbers, gadget->id_number);
cancel_work_sync(&udc->vbus_work);
device_unregister(&udc->dev);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 399a45e5237ca14037120b1b895bd38a3b4492ea
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021839-vacancy-doormat-6a57@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 399a45e5237ca14037120b1b895bd38a3b4492ea Mon Sep 17 00:00:00 2001
From: Roy Luo <royluo(a)google.com>
Date: Tue, 4 Feb 2025 23:36:42 +0000
Subject: [PATCH] usb: gadget: core: flush gadget workqueue after device
removal
device_del() can lead to new work being scheduled in gadget->work
workqueue. This is observed, for example, with the dwc3 driver with the
following call stack:
device_del()
gadget_unbind_driver()
usb_gadget_disconnect_locked()
dwc3_gadget_pullup()
dwc3_gadget_soft_disconnect()
usb_gadget_set_state()
schedule_work(&gadget->work)
Move flush_work() after device_del() to ensure the workqueue is cleaned
up.
Fixes: 5702f75375aa9 ("usb: gadget: udc-core: move sysfs_notify() to a workqueue")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Roy Luo <royluo(a)google.com>
Reviewed-by: Alan Stern <stern(a)rowland.harvard.edu>
Reviewed-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20250204233642.666991-1-royluo@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index a6f46364be65..4b3d5075621a 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1543,8 +1543,8 @@ void usb_del_gadget(struct usb_gadget *gadget)
kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE);
sysfs_remove_link(&udc->dev.kobj, "gadget");
- flush_work(&gadget->work);
device_del(&gadget->dev);
+ flush_work(&gadget->work);
ida_free(&gadget_id_numbers, gadget->id_number);
cancel_work_sync(&udc->vbus_work);
device_unregister(&udc->dev);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 659f5d55feb75782bd46cf130da3c1f240afe9ba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021852-blemish-humility-00ca@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 659f5d55feb75782bd46cf130da3c1f240afe9ba Mon Sep 17 00:00:00 2001
From: Jos Wang <joswang(a)lenovo.com>
Date: Thu, 13 Feb 2025 21:49:21 +0800
Subject: [PATCH] usb: typec: tcpm: PSSourceOffTimer timeout in PR_Swap enters
ERROR_RECOVERY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As PD2.0 spec ("6.5.6.2 PSSourceOffTimer"),the PSSourceOffTimer is
used by the Policy Engine in Dual-Role Power device that is currently
acting as a Sink to timeout on a PS_RDY Message during a Power Role
Swap sequence. This condition leads to a Hard Reset for USB Type-A and
Type-B Plugs and Error Recovery for Type-C plugs and return to USB
Default Operation.
Therefore, after PSSourceOffTimer timeout, the tcpm state machine should
switch from PR_SWAP_SNK_SRC_SINK_OFF to ERROR_RECOVERY. This can also
solve the test items in the USB power delivery compliance test:
TEST.PD.PROT.SNK.12 PR_Swap – PSSourceOffTimer Timeout
[1] https://usb.org/document-library/usb-power-delivery-compliance-test-specifi…
Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jos Wang <joswang(a)lenovo.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Tested-by: Amit Sunil Dhamne <amitsd(a)google.com>
Link: https://lore.kernel.org/r/20250213134921.3798-1-joswang1221@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 47be450d2be3..6bf1a22c785a 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5591,8 +5591,7 @@ static void run_state_machine(struct tcpm_port *port)
tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_USB,
port->pps_data.active, 0);
tcpm_set_charge(port, false);
- tcpm_set_state(port, hard_reset_state(port),
- port->timings.ps_src_off_time);
+ tcpm_set_state(port, ERROR_RECOVERY, port->timings.ps_src_off_time);
break;
case PR_SWAP_SNK_SRC_SOURCE_ON:
tcpm_enable_auto_vbus_discharge(port, true);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 659f5d55feb75782bd46cf130da3c1f240afe9ba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021851-irritable-untracked-3604@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 659f5d55feb75782bd46cf130da3c1f240afe9ba Mon Sep 17 00:00:00 2001
From: Jos Wang <joswang(a)lenovo.com>
Date: Thu, 13 Feb 2025 21:49:21 +0800
Subject: [PATCH] usb: typec: tcpm: PSSourceOffTimer timeout in PR_Swap enters
ERROR_RECOVERY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As PD2.0 spec ("6.5.6.2 PSSourceOffTimer"),the PSSourceOffTimer is
used by the Policy Engine in Dual-Role Power device that is currently
acting as a Sink to timeout on a PS_RDY Message during a Power Role
Swap sequence. This condition leads to a Hard Reset for USB Type-A and
Type-B Plugs and Error Recovery for Type-C plugs and return to USB
Default Operation.
Therefore, after PSSourceOffTimer timeout, the tcpm state machine should
switch from PR_SWAP_SNK_SRC_SINK_OFF to ERROR_RECOVERY. This can also
solve the test items in the USB power delivery compliance test:
TEST.PD.PROT.SNK.12 PR_Swap – PSSourceOffTimer Timeout
[1] https://usb.org/document-library/usb-power-delivery-compliance-test-specifi…
Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jos Wang <joswang(a)lenovo.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Tested-by: Amit Sunil Dhamne <amitsd(a)google.com>
Link: https://lore.kernel.org/r/20250213134921.3798-1-joswang1221@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 47be450d2be3..6bf1a22c785a 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5591,8 +5591,7 @@ static void run_state_machine(struct tcpm_port *port)
tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_USB,
port->pps_data.active, 0);
tcpm_set_charge(port, false);
- tcpm_set_state(port, hard_reset_state(port),
- port->timings.ps_src_off_time);
+ tcpm_set_state(port, ERROR_RECOVERY, port->timings.ps_src_off_time);
break;
case PR_SWAP_SNK_SRC_SOURCE_ON:
tcpm_enable_auto_vbus_discharge(port, true);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 659f5d55feb75782bd46cf130da3c1f240afe9ba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021850-starry-ideally-6050@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 659f5d55feb75782bd46cf130da3c1f240afe9ba Mon Sep 17 00:00:00 2001
From: Jos Wang <joswang(a)lenovo.com>
Date: Thu, 13 Feb 2025 21:49:21 +0800
Subject: [PATCH] usb: typec: tcpm: PSSourceOffTimer timeout in PR_Swap enters
ERROR_RECOVERY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As PD2.0 spec ("6.5.6.2 PSSourceOffTimer"),the PSSourceOffTimer is
used by the Policy Engine in Dual-Role Power device that is currently
acting as a Sink to timeout on a PS_RDY Message during a Power Role
Swap sequence. This condition leads to a Hard Reset for USB Type-A and
Type-B Plugs and Error Recovery for Type-C plugs and return to USB
Default Operation.
Therefore, after PSSourceOffTimer timeout, the tcpm state machine should
switch from PR_SWAP_SNK_SRC_SINK_OFF to ERROR_RECOVERY. This can also
solve the test items in the USB power delivery compliance test:
TEST.PD.PROT.SNK.12 PR_Swap – PSSourceOffTimer Timeout
[1] https://usb.org/document-library/usb-power-delivery-compliance-test-specifi…
Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jos Wang <joswang(a)lenovo.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Tested-by: Amit Sunil Dhamne <amitsd(a)google.com>
Link: https://lore.kernel.org/r/20250213134921.3798-1-joswang1221@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 47be450d2be3..6bf1a22c785a 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5591,8 +5591,7 @@ static void run_state_machine(struct tcpm_port *port)
tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_USB,
port->pps_data.active, 0);
tcpm_set_charge(port, false);
- tcpm_set_state(port, hard_reset_state(port),
- port->timings.ps_src_off_time);
+ tcpm_set_state(port, ERROR_RECOVERY, port->timings.ps_src_off_time);
break;
case PR_SWAP_SNK_SRC_SOURCE_ON:
tcpm_enable_auto_vbus_discharge(port, true);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 659f5d55feb75782bd46cf130da3c1f240afe9ba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021850-dividers-finalist-b7b7@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 659f5d55feb75782bd46cf130da3c1f240afe9ba Mon Sep 17 00:00:00 2001
From: Jos Wang <joswang(a)lenovo.com>
Date: Thu, 13 Feb 2025 21:49:21 +0800
Subject: [PATCH] usb: typec: tcpm: PSSourceOffTimer timeout in PR_Swap enters
ERROR_RECOVERY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As PD2.0 spec ("6.5.6.2 PSSourceOffTimer"),the PSSourceOffTimer is
used by the Policy Engine in Dual-Role Power device that is currently
acting as a Sink to timeout on a PS_RDY Message during a Power Role
Swap sequence. This condition leads to a Hard Reset for USB Type-A and
Type-B Plugs and Error Recovery for Type-C plugs and return to USB
Default Operation.
Therefore, after PSSourceOffTimer timeout, the tcpm state machine should
switch from PR_SWAP_SNK_SRC_SINK_OFF to ERROR_RECOVERY. This can also
solve the test items in the USB power delivery compliance test:
TEST.PD.PROT.SNK.12 PR_Swap – PSSourceOffTimer Timeout
[1] https://usb.org/document-library/usb-power-delivery-compliance-test-specifi…
Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jos Wang <joswang(a)lenovo.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Tested-by: Amit Sunil Dhamne <amitsd(a)google.com>
Link: https://lore.kernel.org/r/20250213134921.3798-1-joswang1221@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 47be450d2be3..6bf1a22c785a 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5591,8 +5591,7 @@ static void run_state_machine(struct tcpm_port *port)
tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_USB,
port->pps_data.active, 0);
tcpm_set_charge(port, false);
- tcpm_set_state(port, hard_reset_state(port),
- port->timings.ps_src_off_time);
+ tcpm_set_state(port, ERROR_RECOVERY, port->timings.ps_src_off_time);
break;
case PR_SWAP_SNK_SRC_SOURCE_ON:
tcpm_enable_auto_vbus_discharge(port, true);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 659f5d55feb75782bd46cf130da3c1f240afe9ba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021849-tree-erased-b30b@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 659f5d55feb75782bd46cf130da3c1f240afe9ba Mon Sep 17 00:00:00 2001
From: Jos Wang <joswang(a)lenovo.com>
Date: Thu, 13 Feb 2025 21:49:21 +0800
Subject: [PATCH] usb: typec: tcpm: PSSourceOffTimer timeout in PR_Swap enters
ERROR_RECOVERY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As PD2.0 spec ("6.5.6.2 PSSourceOffTimer"),the PSSourceOffTimer is
used by the Policy Engine in Dual-Role Power device that is currently
acting as a Sink to timeout on a PS_RDY Message during a Power Role
Swap sequence. This condition leads to a Hard Reset for USB Type-A and
Type-B Plugs and Error Recovery for Type-C plugs and return to USB
Default Operation.
Therefore, after PSSourceOffTimer timeout, the tcpm state machine should
switch from PR_SWAP_SNK_SRC_SINK_OFF to ERROR_RECOVERY. This can also
solve the test items in the USB power delivery compliance test:
TEST.PD.PROT.SNK.12 PR_Swap – PSSourceOffTimer Timeout
[1] https://usb.org/document-library/usb-power-delivery-compliance-test-specifi…
Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jos Wang <joswang(a)lenovo.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Tested-by: Amit Sunil Dhamne <amitsd(a)google.com>
Link: https://lore.kernel.org/r/20250213134921.3798-1-joswang1221@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 47be450d2be3..6bf1a22c785a 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5591,8 +5591,7 @@ static void run_state_machine(struct tcpm_port *port)
tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_USB,
port->pps_data.active, 0);
tcpm_set_charge(port, false);
- tcpm_set_state(port, hard_reset_state(port),
- port->timings.ps_src_off_time);
+ tcpm_set_state(port, ERROR_RECOVERY, port->timings.ps_src_off_time);
break;
case PR_SWAP_SNK_SRC_SOURCE_ON:
tcpm_enable_auto_vbus_discharge(port, true);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 659f5d55feb75782bd46cf130da3c1f240afe9ba
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021848-lend-city-f5bb@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 659f5d55feb75782bd46cf130da3c1f240afe9ba Mon Sep 17 00:00:00 2001
From: Jos Wang <joswang(a)lenovo.com>
Date: Thu, 13 Feb 2025 21:49:21 +0800
Subject: [PATCH] usb: typec: tcpm: PSSourceOffTimer timeout in PR_Swap enters
ERROR_RECOVERY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As PD2.0 spec ("6.5.6.2 PSSourceOffTimer"),the PSSourceOffTimer is
used by the Policy Engine in Dual-Role Power device that is currently
acting as a Sink to timeout on a PS_RDY Message during a Power Role
Swap sequence. This condition leads to a Hard Reset for USB Type-A and
Type-B Plugs and Error Recovery for Type-C plugs and return to USB
Default Operation.
Therefore, after PSSourceOffTimer timeout, the tcpm state machine should
switch from PR_SWAP_SNK_SRC_SINK_OFF to ERROR_RECOVERY. This can also
solve the test items in the USB power delivery compliance test:
TEST.PD.PROT.SNK.12 PR_Swap – PSSourceOffTimer Timeout
[1] https://usb.org/document-library/usb-power-delivery-compliance-test-specifi…
Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Jos Wang <joswang(a)lenovo.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus(a)linux.intel.com>
Tested-by: Amit Sunil Dhamne <amitsd(a)google.com>
Link: https://lore.kernel.org/r/20250213134921.3798-1-joswang1221@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 47be450d2be3..6bf1a22c785a 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5591,8 +5591,7 @@ static void run_state_machine(struct tcpm_port *port)
tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_USB,
port->pps_data.active, 0);
tcpm_set_charge(port, false);
- tcpm_set_state(port, hard_reset_state(port),
- port->timings.ps_src_off_time);
+ tcpm_set_state(port, ERROR_RECOVERY, port->timings.ps_src_off_time);
break;
case PR_SWAP_SNK_SRC_SOURCE_ON:
tcpm_enable_auto_vbus_discharge(port, true);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x d3a8c28426fc1fb3252753a9f1db0d691ffc21b0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021803-magnesium-breeze-0a67@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d3a8c28426fc1fb3252753a9f1db0d691ffc21b0 Mon Sep 17 00:00:00 2001
From: Selvarasu Ganesan <selvarasu.g(a)samsung.com>
Date: Sat, 1 Feb 2025 22:09:02 +0530
Subject: [PATCH] usb: dwc3: Fix timeout issue during controller enter/exit
from halt state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
There is a frequent timeout during controller enter/exit from halt state
after toggling the run_stop bit by SW. This timeout occurs when
performing frequent role switches between host and device, causing
device enumeration issues due to the timeout. This issue was not present
when USB2 suspend PHY was disabled by passing the SNPS quirks
(snps,dis_u2_susphy_quirk and snps,dis_enblslpm_quirk) from the DTS.
However, there is a requirement to enable USB2 suspend PHY by setting of
GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY bits when controller starts
in gadget or host mode results in the timeout issue.
This commit addresses this timeout issue by ensuring that the bits
GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY are cleared before starting
the dwc3_gadget_run_stop sequence and restoring them after the
dwc3_gadget_run_stop sequence is completed.
Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Selvarasu Ganesan <selvarasu.g(a)samsung.com>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20250201163903.459-1-selvarasu.g@samsung.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index d27af65eb08a..ddd6b2ce5710 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2629,10 +2629,38 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on)
{
u32 reg;
u32 timeout = 2000;
+ u32 saved_config = 0;
if (pm_runtime_suspended(dwc->dev))
return 0;
+ /*
+ * When operating in USB 2.0 speeds (HS/FS), ensure that
+ * GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY are cleared before starting
+ * or stopping the controller. This resolves timeout issues that occur
+ * during frequent role switches between host and device modes.
+ *
+ * Save and clear these settings, then restore them after completing the
+ * controller start or stop sequence.
+ *
+ * This solution was discovered through experimentation as it is not
+ * mentioned in the dwc3 programming guide. It has been tested on an
+ * Exynos platforms.
+ */
+ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+ if (reg & DWC3_GUSB2PHYCFG_SUSPHY) {
+ saved_config |= DWC3_GUSB2PHYCFG_SUSPHY;
+ reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
+ }
+
+ if (reg & DWC3_GUSB2PHYCFG_ENBLSLPM) {
+ saved_config |= DWC3_GUSB2PHYCFG_ENBLSLPM;
+ reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM;
+ }
+
+ if (saved_config)
+ dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
+
reg = dwc3_readl(dwc->regs, DWC3_DCTL);
if (is_on) {
if (DWC3_VER_IS_WITHIN(DWC3, ANY, 187A)) {
@@ -2660,6 +2688,12 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on)
reg &= DWC3_DSTS_DEVCTRLHLT;
} while (--timeout && !(!is_on ^ !reg));
+ if (saved_config) {
+ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+ reg |= saved_config;
+ dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
+ }
+
if (!timeout)
return -ETIMEDOUT;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x d3a8c28426fc1fb3252753a9f1db0d691ffc21b0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021803-frugally-entity-330b@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d3a8c28426fc1fb3252753a9f1db0d691ffc21b0 Mon Sep 17 00:00:00 2001
From: Selvarasu Ganesan <selvarasu.g(a)samsung.com>
Date: Sat, 1 Feb 2025 22:09:02 +0530
Subject: [PATCH] usb: dwc3: Fix timeout issue during controller enter/exit
from halt state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
There is a frequent timeout during controller enter/exit from halt state
after toggling the run_stop bit by SW. This timeout occurs when
performing frequent role switches between host and device, causing
device enumeration issues due to the timeout. This issue was not present
when USB2 suspend PHY was disabled by passing the SNPS quirks
(snps,dis_u2_susphy_quirk and snps,dis_enblslpm_quirk) from the DTS.
However, there is a requirement to enable USB2 suspend PHY by setting of
GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY bits when controller starts
in gadget or host mode results in the timeout issue.
This commit addresses this timeout issue by ensuring that the bits
GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY are cleared before starting
the dwc3_gadget_run_stop sequence and restoring them after the
dwc3_gadget_run_stop sequence is completed.
Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Selvarasu Ganesan <selvarasu.g(a)samsung.com>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20250201163903.459-1-selvarasu.g@samsung.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index d27af65eb08a..ddd6b2ce5710 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2629,10 +2629,38 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on)
{
u32 reg;
u32 timeout = 2000;
+ u32 saved_config = 0;
if (pm_runtime_suspended(dwc->dev))
return 0;
+ /*
+ * When operating in USB 2.0 speeds (HS/FS), ensure that
+ * GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY are cleared before starting
+ * or stopping the controller. This resolves timeout issues that occur
+ * during frequent role switches between host and device modes.
+ *
+ * Save and clear these settings, then restore them after completing the
+ * controller start or stop sequence.
+ *
+ * This solution was discovered through experimentation as it is not
+ * mentioned in the dwc3 programming guide. It has been tested on an
+ * Exynos platforms.
+ */
+ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+ if (reg & DWC3_GUSB2PHYCFG_SUSPHY) {
+ saved_config |= DWC3_GUSB2PHYCFG_SUSPHY;
+ reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
+ }
+
+ if (reg & DWC3_GUSB2PHYCFG_ENBLSLPM) {
+ saved_config |= DWC3_GUSB2PHYCFG_ENBLSLPM;
+ reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM;
+ }
+
+ if (saved_config)
+ dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
+
reg = dwc3_readl(dwc->regs, DWC3_DCTL);
if (is_on) {
if (DWC3_VER_IS_WITHIN(DWC3, ANY, 187A)) {
@@ -2660,6 +2688,12 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on)
reg &= DWC3_DSTS_DEVCTRLHLT;
} while (--timeout && !(!is_on ^ !reg));
+ if (saved_config) {
+ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+ reg |= saved_config;
+ dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
+ }
+
if (!timeout)
return -ETIMEDOUT;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x d3a8c28426fc1fb3252753a9f1db0d691ffc21b0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021802-sharply-case-0286@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d3a8c28426fc1fb3252753a9f1db0d691ffc21b0 Mon Sep 17 00:00:00 2001
From: Selvarasu Ganesan <selvarasu.g(a)samsung.com>
Date: Sat, 1 Feb 2025 22:09:02 +0530
Subject: [PATCH] usb: dwc3: Fix timeout issue during controller enter/exit
from halt state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
There is a frequent timeout during controller enter/exit from halt state
after toggling the run_stop bit by SW. This timeout occurs when
performing frequent role switches between host and device, causing
device enumeration issues due to the timeout. This issue was not present
when USB2 suspend PHY was disabled by passing the SNPS quirks
(snps,dis_u2_susphy_quirk and snps,dis_enblslpm_quirk) from the DTS.
However, there is a requirement to enable USB2 suspend PHY by setting of
GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY bits when controller starts
in gadget or host mode results in the timeout issue.
This commit addresses this timeout issue by ensuring that the bits
GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY are cleared before starting
the dwc3_gadget_run_stop sequence and restoring them after the
dwc3_gadget_run_stop sequence is completed.
Fixes: 72246da40f37 ("usb: Introduce DesignWare USB3 DRD Driver")
Cc: stable <stable(a)kernel.org>
Signed-off-by: Selvarasu Ganesan <selvarasu.g(a)samsung.com>
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Link: https://lore.kernel.org/r/20250201163903.459-1-selvarasu.g@samsung.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index d27af65eb08a..ddd6b2ce5710 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2629,10 +2629,38 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on)
{
u32 reg;
u32 timeout = 2000;
+ u32 saved_config = 0;
if (pm_runtime_suspended(dwc->dev))
return 0;
+ /*
+ * When operating in USB 2.0 speeds (HS/FS), ensure that
+ * GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY are cleared before starting
+ * or stopping the controller. This resolves timeout issues that occur
+ * during frequent role switches between host and device modes.
+ *
+ * Save and clear these settings, then restore them after completing the
+ * controller start or stop sequence.
+ *
+ * This solution was discovered through experimentation as it is not
+ * mentioned in the dwc3 programming guide. It has been tested on an
+ * Exynos platforms.
+ */
+ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+ if (reg & DWC3_GUSB2PHYCFG_SUSPHY) {
+ saved_config |= DWC3_GUSB2PHYCFG_SUSPHY;
+ reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
+ }
+
+ if (reg & DWC3_GUSB2PHYCFG_ENBLSLPM) {
+ saved_config |= DWC3_GUSB2PHYCFG_ENBLSLPM;
+ reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM;
+ }
+
+ if (saved_config)
+ dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
+
reg = dwc3_readl(dwc->regs, DWC3_DCTL);
if (is_on) {
if (DWC3_VER_IS_WITHIN(DWC3, ANY, 187A)) {
@@ -2660,6 +2688,12 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on)
reg &= DWC3_DSTS_DEVCTRLHLT;
} while (--timeout && !(!is_on ^ !reg));
+ if (saved_config) {
+ reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
+ reg |= saved_config;
+ dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
+ }
+
if (!timeout)
return -ETIMEDOUT;
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 60b8f711143de7cd9c0f55be0fe7eb94b19eb5c7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021821-bullseye-travel-f568@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 60b8f711143de7cd9c0f55be0fe7eb94b19eb5c7 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt(a)goodmis.org>
Date: Thu, 13 Feb 2025 13:41:32 -0500
Subject: [PATCH] tracing: Have the error of __tracing_resize_ring_buffer()
passed to user
Currently if __tracing_resize_ring_buffer() returns an error, the
tracing_resize_ringbuffer() returns -ENOMEM. But it may not be a memory
issue that caused the function to fail. If the ring buffer is memory
mapped, then the resizing of the ring buffer will be disabled. But if the
user tries to resize the buffer, it will get an -ENOMEM returned, which is
confusing because there is plenty of memory. The actual error returned was
-EBUSY, which would make much more sense to the user.
Cc: stable(a)vger.kernel.org
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Vincent Donnefort <vdonnefort(a)google.com>
Link: https://lore.kernel.org/20250213134132.7e4505d7@gandalf.local.home
Fixes: 117c39200d9d7 ("ring-buffer: Introducing ring-buffer mapping functions")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1496a5ac33ae..25ff37aab00f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5977,8 +5977,6 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
unsigned long size, int cpu_id)
{
- int ret;
-
guard(mutex)(&trace_types_lock);
if (cpu_id != RING_BUFFER_ALL_CPUS) {
@@ -5987,11 +5985,7 @@ ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
return -EINVAL;
}
- ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
- if (ret < 0)
- ret = -ENOMEM;
-
- return ret;
+ return __tracing_resize_ring_buffer(tr, size, cpu_id);
}
static void update_last_data(struct trace_array *tr)
The patch below does not apply to the 6.13-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.13.y
git checkout FETCH_HEAD
git cherry-pick -x 60b8f711143de7cd9c0f55be0fe7eb94b19eb5c7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021820-swinger-shopping-6edb@gregkh' --subject-prefix 'PATCH 6.13.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 60b8f711143de7cd9c0f55be0fe7eb94b19eb5c7 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt(a)goodmis.org>
Date: Thu, 13 Feb 2025 13:41:32 -0500
Subject: [PATCH] tracing: Have the error of __tracing_resize_ring_buffer()
passed to user
Currently if __tracing_resize_ring_buffer() returns an error, the
tracing_resize_ringbuffer() returns -ENOMEM. But it may not be a memory
issue that caused the function to fail. If the ring buffer is memory
mapped, then the resizing of the ring buffer will be disabled. But if the
user tries to resize the buffer, it will get an -ENOMEM returned, which is
confusing because there is plenty of memory. The actual error returned was
-EBUSY, which would make much more sense to the user.
Cc: stable(a)vger.kernel.org
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Vincent Donnefort <vdonnefort(a)google.com>
Link: https://lore.kernel.org/20250213134132.7e4505d7@gandalf.local.home
Fixes: 117c39200d9d7 ("ring-buffer: Introducing ring-buffer mapping functions")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1496a5ac33ae..25ff37aab00f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5977,8 +5977,6 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
unsigned long size, int cpu_id)
{
- int ret;
-
guard(mutex)(&trace_types_lock);
if (cpu_id != RING_BUFFER_ALL_CPUS) {
@@ -5987,11 +5985,7 @@ ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
return -EINVAL;
}
- ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
- if (ret < 0)
- ret = -ENOMEM;
-
- return ret;
+ return __tracing_resize_ring_buffer(tr, size, cpu_id);
}
static void update_last_data(struct trace_array *tr)
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 8802766324e1f5d414a81ac43365c20142e85603
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021856-apostle-aggregate-3dc4@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8802766324e1f5d414a81ac43365c20142e85603 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Wed, 12 Feb 2025 13:46:46 +0000
Subject: [PATCH] io_uring/kbuf: reallocate buf lists on upgrade
IORING_REGISTER_PBUF_RING can reuse an old struct io_buffer_list if it
was created for legacy selected buffer and has been emptied. It violates
the requirement that most of the field should stay stable after publish.
Always reallocate it instead.
Cc: stable(a)vger.kernel.org
Reported-by: Pumpkin Chang <pumpkin(a)devco.re>
Fixes: 2fcabce2d7d34 ("io_uring: disallow mixed provided buffer group registrations")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 04bf493eecae..8e72de7712ac 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -415,6 +415,13 @@ void io_destroy_buffers(struct io_ring_ctx *ctx)
}
}
+static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
+{
+ scoped_guard(mutex, &ctx->mmap_lock)
+ WARN_ON_ONCE(xa_erase(&ctx->io_bl_xa, bl->bgid) != bl);
+ io_put_bl(ctx, bl);
+}
+
int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
@@ -636,12 +643,13 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
/* if mapped buffer ring OR classic exists, don't allow */
if (bl->flags & IOBL_BUF_RING || !list_empty(&bl->buf_list))
return -EEXIST;
- } else {
- free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
- if (!bl)
- return -ENOMEM;
+ io_destroy_bl(ctx, bl);
}
+ free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+ if (!bl)
+ return -ENOMEM;
+
mmap_offset = (unsigned long)reg.bgid << IORING_OFF_PBUF_SHIFT;
ring_size = flex_array_size(br, bufs, reg.ring_entries);
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x 409f45387c937145adeeeebc6d6032c2ec232b35
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021858-default-pledge-8039@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 409f45387c937145adeeeebc6d6032c2ec232b35 Mon Sep 17 00:00:00 2001
From: Ashish Kalra <ashish.kalra(a)amd.com>
Date: Mon, 10 Feb 2025 22:54:18 +0000
Subject: [PATCH] x86/sev: Fix broken SNP support with KVM module built-in
Fix issues with enabling SNP host support and effectively SNP support
which is broken with respect to the KVM module being built-in.
SNP host support is enabled in snp_rmptable_init() which is invoked as
device_initcall(). SNP check on IOMMU is done during IOMMU PCI init
(IOMMU_PCI_INIT stage). And for that reason snp_rmptable_init() is
currently invoked via device_initcall() and cannot be invoked via
subsys_initcall() as core IOMMU subsystem gets initialized via
subsys_initcall().
Now, if kvm_amd module is built-in, it gets initialized before SNP host
support is enabled in snp_rmptable_init() :
[ 10.131811] kvm_amd: TSC scaling supported
[ 10.136384] kvm_amd: Nested Virtualization enabled
[ 10.141734] kvm_amd: Nested Paging enabled
[ 10.146304] kvm_amd: LBR virtualization supported
[ 10.151557] kvm_amd: SEV enabled (ASIDs 100 - 509)
[ 10.156905] kvm_amd: SEV-ES enabled (ASIDs 1 - 99)
[ 10.162256] kvm_amd: SEV-SNP enabled (ASIDs 1 - 99)
[ 10.171508] kvm_amd: Virtual VMLOAD VMSAVE supported
[ 10.177052] kvm_amd: Virtual GIF supported
...
...
[ 10.201648] kvm_amd: in svm_enable_virtualization_cpu
And then svm_x86_ops->enable_virtualization_cpu()
(svm_enable_virtualization_cpu) programs MSR_VM_HSAVE_PA as following:
wrmsrl(MSR_VM_HSAVE_PA, sd->save_area_pa);
So VM_HSAVE_PA is non-zero before SNP support is enabled on all CPUs.
snp_rmptable_init() gets invoked after svm_enable_virtualization_cpu()
as following :
...
[ 11.256138] kvm_amd: in svm_enable_virtualization_cpu
...
[ 11.264918] SEV-SNP: in snp_rmptable_init
This triggers a #GP exception in snp_rmptable_init() when snp_enable()
is invoked to set SNP_EN in SYSCFG MSR:
[ 11.294289] unchecked MSR access error: WRMSR to 0xc0010010 (tried to write 0x0000000003fc0000) at rIP: 0xffffffffaf5d5c28 (native_write_msr+0x8/0x30)
...
[ 11.294404] Call Trace:
[ 11.294482] <IRQ>
[ 11.294513] ? show_stack_regs+0x26/0x30
[ 11.294522] ? ex_handler_msr+0x10f/0x180
[ 11.294529] ? search_extable+0x2b/0x40
[ 11.294538] ? fixup_exception+0x2dd/0x340
[ 11.294542] ? exc_general_protection+0x14f/0x440
[ 11.294550] ? asm_exc_general_protection+0x2b/0x30
[ 11.294557] ? __pfx_snp_enable+0x10/0x10
[ 11.294567] ? native_write_msr+0x8/0x30
[ 11.294570] ? __snp_enable+0x5d/0x70
[ 11.294575] snp_enable+0x19/0x20
[ 11.294578] __flush_smp_call_function_queue+0x9c/0x3a0
[ 11.294586] generic_smp_call_function_single_interrupt+0x17/0x20
[ 11.294589] __sysvec_call_function+0x20/0x90
[ 11.294596] sysvec_call_function+0x80/0xb0
[ 11.294601] </IRQ>
[ 11.294603] <TASK>
[ 11.294605] asm_sysvec_call_function+0x1f/0x30
...
[ 11.294631] arch_cpu_idle+0xd/0x20
[ 11.294633] default_idle_call+0x34/0xd0
[ 11.294636] do_idle+0x1f1/0x230
[ 11.294643] ? complete+0x71/0x80
[ 11.294649] cpu_startup_entry+0x30/0x40
[ 11.294652] start_secondary+0x12d/0x160
[ 11.294655] common_startup_64+0x13e/0x141
[ 11.294662] </TASK>
This #GP exception is getting triggered due to the following errata for
AMD family 19h Models 10h-1Fh Processors:
Processor may generate spurious #GP(0) Exception on WRMSR instruction:
Description:
The Processor will generate a spurious #GP(0) Exception on a WRMSR
instruction if the following conditions are all met:
- the target of the WRMSR is a SYSCFG register.
- the write changes the value of SYSCFG.SNPEn from 0 to 1.
- One of the threads that share the physical core has a non-zero
value in the VM_HSAVE_PA MSR.
The document being referred to above:
https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/revisi…
To summarize, with kvm_amd module being built-in, KVM/SVM initialization
happens before host SNP is enabled and this SVM initialization
sets VM_HSAVE_PA to non-zero, which then triggers a #GP when
SYSCFG.SNPEn is being set and this will subsequently cause
SNP_INIT(_EX) to fail with INVALID_CONFIG error as SYSCFG[SnpEn] is not
set on all CPUs.
Essentially SNP host enabling code should be invoked before KVM
initialization, which is currently not the case when KVM is built-in.
Add fix to call snp_rmptable_init() early from iommu_snp_enable()
directly and not invoked via device_initcall() which enables SNP host
support before KVM initialization with kvm_amd module built-in.
Add additional handling for `iommu=off` or `amd_iommu=off` options.
Note that IOMMUs need to be enabled for SNP initialization, therefore,
if host SNP support is enabled but late IOMMU initialization fails
then that will cause PSP driver's SNP_INIT to fail as IOMMU SNP sanity
checks in SNP firmware will fail with invalid configuration error as
below:
[ 9.723114] ccp 0000:23:00.1: sev enabled
[ 9.727602] ccp 0000:23:00.1: psp enabled
[ 9.732527] ccp 0000:a2:00.1: enabling device (0000 -> 0002)
[ 9.739098] ccp 0000:a2:00.1: no command queues available
[ 9.745167] ccp 0000:a2:00.1: psp enabled
[ 9.805337] ccp 0000:23:00.1: SEV-SNP: failed to INIT rc -5, error 0x3
[ 9.866426] ccp 0000:23:00.1: SEV API:1.53 build:5
Fixes: c3b86e61b756 ("x86/cpufeatures: Enable/unmask SEV-SNP CPU feature")
Co-developed-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Co-developed-by: Vasant Hegde <vasant.hegde(a)amd.com>
Signed-off-by: Vasant Hegde <vasant.hegde(a)amd.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Ashish Kalra <ashish.kalra(a)amd.com>
Acked-by: Joerg Roedel <jroedel(a)suse.de>
Message-ID: <138b520fb83964782303b43ade4369cd181fdd9c.1739226950.git.ashish.kalra(a)amd.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 5d9685f92e5c..1581246491b5 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -531,6 +531,7 @@ static inline void __init snp_secure_tsc_init(void) { }
#ifdef CONFIG_KVM_AMD_SEV
bool snp_probe_rmptable_info(void);
+int snp_rmptable_init(void);
int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level);
void snp_dump_hva_rmpentry(unsigned long address);
int psmash(u64 pfn);
@@ -541,6 +542,7 @@ void kdump_sev_callback(void);
void snp_fixup_e820_tables(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
+static inline int snp_rmptable_init(void) { return -ENOSYS; }
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
static inline void snp_dump_hva_rmpentry(unsigned long address) {}
static inline int psmash(u64 pfn) { return -ENODEV; }
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 1dcc027ec77e..42e74a5a7d78 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -505,19 +505,19 @@ static bool __init setup_rmptable(void)
* described in the SNP_INIT_EX firmware command description in the SNP
* firmware ABI spec.
*/
-static int __init snp_rmptable_init(void)
+int __init snp_rmptable_init(void)
{
unsigned int i;
u64 val;
- if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
- return 0;
+ if (WARN_ON_ONCE(!cc_platform_has(CC_ATTR_HOST_SEV_SNP)))
+ return -ENOSYS;
- if (!amd_iommu_snp_en)
- goto nosnp;
+ if (WARN_ON_ONCE(!amd_iommu_snp_en))
+ return -ENOSYS;
if (!setup_rmptable())
- goto nosnp;
+ return -ENOSYS;
/*
* Check if SEV-SNP is already enabled, this can happen in case of
@@ -530,7 +530,7 @@ static int __init snp_rmptable_init(void)
/* Zero out the RMP bookkeeping area */
if (!clear_rmptable_bookkeeping()) {
free_rmp_segment_table();
- goto nosnp;
+ return -ENOSYS;
}
/* Zero out the RMP entries */
@@ -562,17 +562,8 @@ static int __init snp_rmptable_init(void)
crash_kexec_post_notifiers = true;
return 0;
-
-nosnp:
- cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
- return -ENOSYS;
}
-/*
- * This must be called after the IOMMU has been initialized.
- */
-device_initcall(snp_rmptable_init);
-
static void set_rmp_segment_info(unsigned int segment_shift)
{
rmp_segment_shift = segment_shift;
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index c5cd92edada0..2fecfed75e54 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3194,7 +3194,7 @@ static bool __init detect_ivrs(void)
return true;
}
-static void iommu_snp_enable(void)
+static __init void iommu_snp_enable(void)
{
#ifdef CONFIG_KVM_AMD_SEV
if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
@@ -3219,6 +3219,14 @@ static void iommu_snp_enable(void)
goto disable_snp;
}
+ /*
+ * Enable host SNP support once SNP support is checked on IOMMU.
+ */
+ if (snp_rmptable_init()) {
+ pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n");
+ goto disable_snp;
+ }
+
pr_info("IOMMU SNP support enabled.\n");
return;
@@ -3318,6 +3326,19 @@ static int __init iommu_go_to_state(enum iommu_init_state state)
ret = state_next();
}
+ /*
+ * SNP platform initilazation requires IOMMUs to be fully configured.
+ * If the SNP support on IOMMUs has NOT been checked, simply mark SNP
+ * as unsupported. If the SNP support on IOMMUs has been checked and
+ * host SNP support enabled but RMP enforcement has not been enabled
+ * in IOMMUs, then the system is in a half-baked state, but can limp
+ * along as all memory should be Hypervisor-Owned in the RMP. WARN,
+ * but leave SNP as "supported" to avoid confusing the kernel.
+ */
+ if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) &&
+ !WARN_ON_ONCE(amd_iommu_snp_en))
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
+
return ret;
}
@@ -3426,18 +3447,23 @@ void __init amd_iommu_detect(void)
int ret;
if (no_iommu || (iommu_detected && !gart_iommu_aperture))
- return;
+ goto disable_snp;
if (!amd_iommu_sme_check())
- return;
+ goto disable_snp;
ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
if (ret)
- return;
+ goto disable_snp;
amd_iommu_detected = true;
iommu_detected = 1;
x86_init.iommu.iommu_init = amd_iommu_init;
+ return;
+
+disable_snp:
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
}
/****************************************************************************
The patch below does not apply to the 6.13-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.13.y
git checkout FETCH_HEAD
git cherry-pick -x 409f45387c937145adeeeebc6d6032c2ec232b35
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021856-heave-blade-985e@gregkh' --subject-prefix 'PATCH 6.13.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 409f45387c937145adeeeebc6d6032c2ec232b35 Mon Sep 17 00:00:00 2001
From: Ashish Kalra <ashish.kalra(a)amd.com>
Date: Mon, 10 Feb 2025 22:54:18 +0000
Subject: [PATCH] x86/sev: Fix broken SNP support with KVM module built-in
Fix issues with enabling SNP host support and effectively SNP support
which is broken with respect to the KVM module being built-in.
SNP host support is enabled in snp_rmptable_init() which is invoked as
device_initcall(). SNP check on IOMMU is done during IOMMU PCI init
(IOMMU_PCI_INIT stage). And for that reason snp_rmptable_init() is
currently invoked via device_initcall() and cannot be invoked via
subsys_initcall() as core IOMMU subsystem gets initialized via
subsys_initcall().
Now, if kvm_amd module is built-in, it gets initialized before SNP host
support is enabled in snp_rmptable_init() :
[ 10.131811] kvm_amd: TSC scaling supported
[ 10.136384] kvm_amd: Nested Virtualization enabled
[ 10.141734] kvm_amd: Nested Paging enabled
[ 10.146304] kvm_amd: LBR virtualization supported
[ 10.151557] kvm_amd: SEV enabled (ASIDs 100 - 509)
[ 10.156905] kvm_amd: SEV-ES enabled (ASIDs 1 - 99)
[ 10.162256] kvm_amd: SEV-SNP enabled (ASIDs 1 - 99)
[ 10.171508] kvm_amd: Virtual VMLOAD VMSAVE supported
[ 10.177052] kvm_amd: Virtual GIF supported
...
...
[ 10.201648] kvm_amd: in svm_enable_virtualization_cpu
And then svm_x86_ops->enable_virtualization_cpu()
(svm_enable_virtualization_cpu) programs MSR_VM_HSAVE_PA as following:
wrmsrl(MSR_VM_HSAVE_PA, sd->save_area_pa);
So VM_HSAVE_PA is non-zero before SNP support is enabled on all CPUs.
snp_rmptable_init() gets invoked after svm_enable_virtualization_cpu()
as following :
...
[ 11.256138] kvm_amd: in svm_enable_virtualization_cpu
...
[ 11.264918] SEV-SNP: in snp_rmptable_init
This triggers a #GP exception in snp_rmptable_init() when snp_enable()
is invoked to set SNP_EN in SYSCFG MSR:
[ 11.294289] unchecked MSR access error: WRMSR to 0xc0010010 (tried to write 0x0000000003fc0000) at rIP: 0xffffffffaf5d5c28 (native_write_msr+0x8/0x30)
...
[ 11.294404] Call Trace:
[ 11.294482] <IRQ>
[ 11.294513] ? show_stack_regs+0x26/0x30
[ 11.294522] ? ex_handler_msr+0x10f/0x180
[ 11.294529] ? search_extable+0x2b/0x40
[ 11.294538] ? fixup_exception+0x2dd/0x340
[ 11.294542] ? exc_general_protection+0x14f/0x440
[ 11.294550] ? asm_exc_general_protection+0x2b/0x30
[ 11.294557] ? __pfx_snp_enable+0x10/0x10
[ 11.294567] ? native_write_msr+0x8/0x30
[ 11.294570] ? __snp_enable+0x5d/0x70
[ 11.294575] snp_enable+0x19/0x20
[ 11.294578] __flush_smp_call_function_queue+0x9c/0x3a0
[ 11.294586] generic_smp_call_function_single_interrupt+0x17/0x20
[ 11.294589] __sysvec_call_function+0x20/0x90
[ 11.294596] sysvec_call_function+0x80/0xb0
[ 11.294601] </IRQ>
[ 11.294603] <TASK>
[ 11.294605] asm_sysvec_call_function+0x1f/0x30
...
[ 11.294631] arch_cpu_idle+0xd/0x20
[ 11.294633] default_idle_call+0x34/0xd0
[ 11.294636] do_idle+0x1f1/0x230
[ 11.294643] ? complete+0x71/0x80
[ 11.294649] cpu_startup_entry+0x30/0x40
[ 11.294652] start_secondary+0x12d/0x160
[ 11.294655] common_startup_64+0x13e/0x141
[ 11.294662] </TASK>
This #GP exception is getting triggered due to the following errata for
AMD family 19h Models 10h-1Fh Processors:
Processor may generate spurious #GP(0) Exception on WRMSR instruction:
Description:
The Processor will generate a spurious #GP(0) Exception on a WRMSR
instruction if the following conditions are all met:
- the target of the WRMSR is a SYSCFG register.
- the write changes the value of SYSCFG.SNPEn from 0 to 1.
- One of the threads that share the physical core has a non-zero
value in the VM_HSAVE_PA MSR.
The document being referred to above:
https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/revisi…
To summarize, with kvm_amd module being built-in, KVM/SVM initialization
happens before host SNP is enabled and this SVM initialization
sets VM_HSAVE_PA to non-zero, which then triggers a #GP when
SYSCFG.SNPEn is being set and this will subsequently cause
SNP_INIT(_EX) to fail with INVALID_CONFIG error as SYSCFG[SnpEn] is not
set on all CPUs.
Essentially SNP host enabling code should be invoked before KVM
initialization, which is currently not the case when KVM is built-in.
Add fix to call snp_rmptable_init() early from iommu_snp_enable()
directly and not invoked via device_initcall() which enables SNP host
support before KVM initialization with kvm_amd module built-in.
Add additional handling for `iommu=off` or `amd_iommu=off` options.
Note that IOMMUs need to be enabled for SNP initialization, therefore,
if host SNP support is enabled but late IOMMU initialization fails
then that will cause PSP driver's SNP_INIT to fail as IOMMU SNP sanity
checks in SNP firmware will fail with invalid configuration error as
below:
[ 9.723114] ccp 0000:23:00.1: sev enabled
[ 9.727602] ccp 0000:23:00.1: psp enabled
[ 9.732527] ccp 0000:a2:00.1: enabling device (0000 -> 0002)
[ 9.739098] ccp 0000:a2:00.1: no command queues available
[ 9.745167] ccp 0000:a2:00.1: psp enabled
[ 9.805337] ccp 0000:23:00.1: SEV-SNP: failed to INIT rc -5, error 0x3
[ 9.866426] ccp 0000:23:00.1: SEV API:1.53 build:5
Fixes: c3b86e61b756 ("x86/cpufeatures: Enable/unmask SEV-SNP CPU feature")
Co-developed-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Co-developed-by: Vasant Hegde <vasant.hegde(a)amd.com>
Signed-off-by: Vasant Hegde <vasant.hegde(a)amd.com>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Ashish Kalra <ashish.kalra(a)amd.com>
Acked-by: Joerg Roedel <jroedel(a)suse.de>
Message-ID: <138b520fb83964782303b43ade4369cd181fdd9c.1739226950.git.ashish.kalra(a)amd.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 5d9685f92e5c..1581246491b5 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -531,6 +531,7 @@ static inline void __init snp_secure_tsc_init(void) { }
#ifdef CONFIG_KVM_AMD_SEV
bool snp_probe_rmptable_info(void);
+int snp_rmptable_init(void);
int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level);
void snp_dump_hva_rmpentry(unsigned long address);
int psmash(u64 pfn);
@@ -541,6 +542,7 @@ void kdump_sev_callback(void);
void snp_fixup_e820_tables(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
+static inline int snp_rmptable_init(void) { return -ENOSYS; }
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
static inline void snp_dump_hva_rmpentry(unsigned long address) {}
static inline int psmash(u64 pfn) { return -ENODEV; }
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 1dcc027ec77e..42e74a5a7d78 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -505,19 +505,19 @@ static bool __init setup_rmptable(void)
* described in the SNP_INIT_EX firmware command description in the SNP
* firmware ABI spec.
*/
-static int __init snp_rmptable_init(void)
+int __init snp_rmptable_init(void)
{
unsigned int i;
u64 val;
- if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
- return 0;
+ if (WARN_ON_ONCE(!cc_platform_has(CC_ATTR_HOST_SEV_SNP)))
+ return -ENOSYS;
- if (!amd_iommu_snp_en)
- goto nosnp;
+ if (WARN_ON_ONCE(!amd_iommu_snp_en))
+ return -ENOSYS;
if (!setup_rmptable())
- goto nosnp;
+ return -ENOSYS;
/*
* Check if SEV-SNP is already enabled, this can happen in case of
@@ -530,7 +530,7 @@ static int __init snp_rmptable_init(void)
/* Zero out the RMP bookkeeping area */
if (!clear_rmptable_bookkeeping()) {
free_rmp_segment_table();
- goto nosnp;
+ return -ENOSYS;
}
/* Zero out the RMP entries */
@@ -562,17 +562,8 @@ static int __init snp_rmptable_init(void)
crash_kexec_post_notifiers = true;
return 0;
-
-nosnp:
- cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
- return -ENOSYS;
}
-/*
- * This must be called after the IOMMU has been initialized.
- */
-device_initcall(snp_rmptable_init);
-
static void set_rmp_segment_info(unsigned int segment_shift)
{
rmp_segment_shift = segment_shift;
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index c5cd92edada0..2fecfed75e54 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3194,7 +3194,7 @@ static bool __init detect_ivrs(void)
return true;
}
-static void iommu_snp_enable(void)
+static __init void iommu_snp_enable(void)
{
#ifdef CONFIG_KVM_AMD_SEV
if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
@@ -3219,6 +3219,14 @@ static void iommu_snp_enable(void)
goto disable_snp;
}
+ /*
+ * Enable host SNP support once SNP support is checked on IOMMU.
+ */
+ if (snp_rmptable_init()) {
+ pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n");
+ goto disable_snp;
+ }
+
pr_info("IOMMU SNP support enabled.\n");
return;
@@ -3318,6 +3326,19 @@ static int __init iommu_go_to_state(enum iommu_init_state state)
ret = state_next();
}
+ /*
+ * SNP platform initilazation requires IOMMUs to be fully configured.
+ * If the SNP support on IOMMUs has NOT been checked, simply mark SNP
+ * as unsupported. If the SNP support on IOMMUs has been checked and
+ * host SNP support enabled but RMP enforcement has not been enabled
+ * in IOMMUs, then the system is in a half-baked state, but can limp
+ * along as all memory should be Hypervisor-Owned in the RMP. WARN,
+ * but leave SNP as "supported" to avoid confusing the kernel.
+ */
+ if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) &&
+ !WARN_ON_ONCE(amd_iommu_snp_en))
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
+
return ret;
}
@@ -3426,18 +3447,23 @@ void __init amd_iommu_detect(void)
int ret;
if (no_iommu || (iommu_detected && !gart_iommu_aperture))
- return;
+ goto disable_snp;
if (!amd_iommu_sme_check())
- return;
+ goto disable_snp;
ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
if (ret)
- return;
+ goto disable_snp;
amd_iommu_detected = true;
iommu_detected = 1;
x86_init.iommu.iommu_init = amd_iommu_init;
+ return;
+
+disable_snp:
+ if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
+ cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
}
/****************************************************************************
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 46d6c6f3ef0eaff71c2db6d77d4e2ebb7adac34f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021837-volumes-twig-2514@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 46d6c6f3ef0eaff71c2db6d77d4e2ebb7adac34f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Wed, 29 Jan 2025 17:08:25 -0800
Subject: [PATCH] KVM: nSVM: Enter guest mode before initializing nested NPT
MMU
When preparing vmcb02 for nested VMRUN (or state restore), "enter" guest
mode prior to initializing the MMU for nested NPT so that guest_mode is
set in the MMU's role. KVM's model is that all L2 MMUs are tagged with
guest_mode, as the behavior of hypervisor MMUs tends to be significantly
different than kernel MMUs.
Practically speaking, the bug is relatively benign, as KVM only directly
queries role.guest_mode in kvm_mmu_free_guest_mode_roots() and
kvm_mmu_page_ad_need_write_protect(), which SVM doesn't use, and in paths
that are optimizations (mmu_page_zap_pte() and
shadow_mmu_try_split_huge_pages()).
And while the role is incorprated into shadow page usage, because nested
NPT requires KVM to be using NPT for L1, reusing shadow pages across L1
and L2 is impossible as L1 MMUs will always have direct=1, while L2 MMUs
will have direct=0.
Hoist the TLB processing and setting of HF_GUEST_MASK to the beginning
of the flow instead of forcing guest_mode in the MMU, as nothing in
nested_vmcb02_prepare_control() between the old and new locations touches
TLB flush requests or HF_GUEST_MASK, i.e. there's no reason to present
inconsistent vCPU state to the MMU.
Fixes: 69cb877487de ("KVM: nSVM: move MMU setup to nested_prepare_vmcb_control")
Cc: stable(a)vger.kernel.org
Reported-by: Yosry Ahmed <yosry.ahmed(a)linux.dev>
Reviewed-by: Yosry Ahmed <yosry.ahmed(a)linux.dev>
Link: https://lore.kernel.org/r/20250130010825.220346-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 74c20dbb92da..d4ac4a1f8b81 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5540,7 +5540,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
union kvm_mmu_page_role root_role;
/* NPT requires CR0.PG=1. */
- WARN_ON_ONCE(cpu_role.base.direct);
+ WARN_ON_ONCE(cpu_role.base.direct || !cpu_role.base.guest_mode);
root_role = cpu_role.base;
root_role.level = kvm_mmu_get_tdp_level(vcpu);
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index d77b094d9a4d..04c375bf1ac2 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -646,6 +646,11 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
u32 pause_count12;
u32 pause_thresh12;
+ nested_svm_transition_tlb_flush(vcpu);
+
+ /* Enter Guest-Mode */
+ enter_guest_mode(vcpu);
+
/*
* Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
* exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
@@ -762,11 +767,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
}
}
- nested_svm_transition_tlb_flush(vcpu);
-
- /* Enter Guest-Mode */
- enter_guest_mode(vcpu);
-
/*
* Merge guest and host intercepts - must be called with vcpu in
* guest-mode to take effect.
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 46d6c6f3ef0eaff71c2db6d77d4e2ebb7adac34f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021824-strict-motivator-41ae@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 46d6c6f3ef0eaff71c2db6d77d4e2ebb7adac34f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Wed, 29 Jan 2025 17:08:25 -0800
Subject: [PATCH] KVM: nSVM: Enter guest mode before initializing nested NPT
MMU
When preparing vmcb02 for nested VMRUN (or state restore), "enter" guest
mode prior to initializing the MMU for nested NPT so that guest_mode is
set in the MMU's role. KVM's model is that all L2 MMUs are tagged with
guest_mode, as the behavior of hypervisor MMUs tends to be significantly
different than kernel MMUs.
Practically speaking, the bug is relatively benign, as KVM only directly
queries role.guest_mode in kvm_mmu_free_guest_mode_roots() and
kvm_mmu_page_ad_need_write_protect(), which SVM doesn't use, and in paths
that are optimizations (mmu_page_zap_pte() and
shadow_mmu_try_split_huge_pages()).
And while the role is incorprated into shadow page usage, because nested
NPT requires KVM to be using NPT for L1, reusing shadow pages across L1
and L2 is impossible as L1 MMUs will always have direct=1, while L2 MMUs
will have direct=0.
Hoist the TLB processing and setting of HF_GUEST_MASK to the beginning
of the flow instead of forcing guest_mode in the MMU, as nothing in
nested_vmcb02_prepare_control() between the old and new locations touches
TLB flush requests or HF_GUEST_MASK, i.e. there's no reason to present
inconsistent vCPU state to the MMU.
Fixes: 69cb877487de ("KVM: nSVM: move MMU setup to nested_prepare_vmcb_control")
Cc: stable(a)vger.kernel.org
Reported-by: Yosry Ahmed <yosry.ahmed(a)linux.dev>
Reviewed-by: Yosry Ahmed <yosry.ahmed(a)linux.dev>
Link: https://lore.kernel.org/r/20250130010825.220346-1-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 74c20dbb92da..d4ac4a1f8b81 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5540,7 +5540,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0,
union kvm_mmu_page_role root_role;
/* NPT requires CR0.PG=1. */
- WARN_ON_ONCE(cpu_role.base.direct);
+ WARN_ON_ONCE(cpu_role.base.direct || !cpu_role.base.guest_mode);
root_role = cpu_role.base;
root_role.level = kvm_mmu_get_tdp_level(vcpu);
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index d77b094d9a4d..04c375bf1ac2 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -646,6 +646,11 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
u32 pause_count12;
u32 pause_thresh12;
+ nested_svm_transition_tlb_flush(vcpu);
+
+ /* Enter Guest-Mode */
+ enter_guest_mode(vcpu);
+
/*
* Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
* exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
@@ -762,11 +767,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
}
}
- nested_svm_transition_tlb_flush(vcpu);
-
- /* Enter Guest-Mode */
- enter_guest_mode(vcpu);
-
/*
* Merge guest and host intercepts - must be called with vcpu in
* guest-mode to take effect.
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x a8de7f100bb5989d9c3627d3a223ee1c863f3b69
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021851-nautical-buffoon-d8b1@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a8de7f100bb5989d9c3627d3a223ee1c863f3b69 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 17 Jan 2025 16:34:51 -0800
Subject: [PATCH] KVM: x86: Reject Hyper-V's SEND_IPI hypercalls if local APIC
isn't in-kernel
Advertise support for Hyper-V's SEND_IPI and SEND_IPI_EX hypercalls if and
only if the local API is emulated/virtualized by KVM, and explicitly reject
said hypercalls if the local APIC is emulated in userspace, i.e. don't rely
on userspace to opt-in to KVM_CAP_HYPERV_ENFORCE_CPUID.
Rejecting SEND_IPI and SEND_IPI_EX fixes a NULL-pointer dereference if
Hyper-V enlightenments are exposed to the guest without an in-kernel local
APIC:
dump_stack+0xbe/0xfd
__kasan_report.cold+0x34/0x84
kasan_report+0x3a/0x50
__apic_accept_irq+0x3a/0x5c0
kvm_hv_send_ipi.isra.0+0x34e/0x820
kvm_hv_hypercall+0x8d9/0x9d0
kvm_emulate_hypercall+0x506/0x7e0
__vmx_handle_exit+0x283/0xb60
vmx_handle_exit+0x1d/0xd0
vcpu_enter_guest+0x16b0/0x24c0
vcpu_run+0xc0/0x550
kvm_arch_vcpu_ioctl_run+0x170/0x6d0
kvm_vcpu_ioctl+0x413/0xb20
__se_sys_ioctl+0x111/0x160
do_syscal1_64+0x30/0x40
entry_SYSCALL_64_after_hwframe+0x67/0xd1
Note, checking the sending vCPU is sufficient, as the per-VM irqchip_mode
can't be modified after vCPUs are created, i.e. if one vCPU has an
in-kernel local APIC, then all vCPUs have an in-kernel local APIC.
Reported-by: Dongjie Zou <zoudongjie(a)huawei.com>
Fixes: 214ff83d4473 ("KVM: x86: hyperv: implement PV IPI send hypercalls")
Fixes: 2bc39970e932 ("x86/kvm/hyper-v: Introduce KVM_GET_SUPPORTED_HV_CPUID")
Cc: stable(a)vger.kernel.org
Reviewed-by: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Link: https://lore.kernel.org/r/20250118003454.2619573-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 6a6dd5a84f22..6ebeb6cea6c0 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2226,6 +2226,9 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
u32 vector;
bool all_cpus;
+ if (!lapic_in_kernel(vcpu))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
if (hc->code == HVCALL_SEND_IPI) {
if (!hc->fast) {
if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi,
@@ -2852,7 +2855,8 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
- ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
+ if (!vcpu || lapic_in_kernel(vcpu))
+ ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
if (evmcs_ver)
ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x a8de7f100bb5989d9c3627d3a223ee1c863f3b69
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021850-exes-cabana-e868@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From a8de7f100bb5989d9c3627d3a223ee1c863f3b69 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 17 Jan 2025 16:34:51 -0800
Subject: [PATCH] KVM: x86: Reject Hyper-V's SEND_IPI hypercalls if local APIC
isn't in-kernel
Advertise support for Hyper-V's SEND_IPI and SEND_IPI_EX hypercalls if and
only if the local API is emulated/virtualized by KVM, and explicitly reject
said hypercalls if the local APIC is emulated in userspace, i.e. don't rely
on userspace to opt-in to KVM_CAP_HYPERV_ENFORCE_CPUID.
Rejecting SEND_IPI and SEND_IPI_EX fixes a NULL-pointer dereference if
Hyper-V enlightenments are exposed to the guest without an in-kernel local
APIC:
dump_stack+0xbe/0xfd
__kasan_report.cold+0x34/0x84
kasan_report+0x3a/0x50
__apic_accept_irq+0x3a/0x5c0
kvm_hv_send_ipi.isra.0+0x34e/0x820
kvm_hv_hypercall+0x8d9/0x9d0
kvm_emulate_hypercall+0x506/0x7e0
__vmx_handle_exit+0x283/0xb60
vmx_handle_exit+0x1d/0xd0
vcpu_enter_guest+0x16b0/0x24c0
vcpu_run+0xc0/0x550
kvm_arch_vcpu_ioctl_run+0x170/0x6d0
kvm_vcpu_ioctl+0x413/0xb20
__se_sys_ioctl+0x111/0x160
do_syscal1_64+0x30/0x40
entry_SYSCALL_64_after_hwframe+0x67/0xd1
Note, checking the sending vCPU is sufficient, as the per-VM irqchip_mode
can't be modified after vCPUs are created, i.e. if one vCPU has an
in-kernel local APIC, then all vCPUs have an in-kernel local APIC.
Reported-by: Dongjie Zou <zoudongjie(a)huawei.com>
Fixes: 214ff83d4473 ("KVM: x86: hyperv: implement PV IPI send hypercalls")
Fixes: 2bc39970e932 ("x86/kvm/hyper-v: Introduce KVM_GET_SUPPORTED_HV_CPUID")
Cc: stable(a)vger.kernel.org
Reviewed-by: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Link: https://lore.kernel.org/r/20250118003454.2619573-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 6a6dd5a84f22..6ebeb6cea6c0 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2226,6 +2226,9 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
u32 vector;
bool all_cpus;
+ if (!lapic_in_kernel(vcpu))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
if (hc->code == HVCALL_SEND_IPI) {
if (!hc->fast) {
if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi,
@@ -2852,7 +2855,8 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
- ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
+ if (!vcpu || lapic_in_kernel(vcpu))
+ ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
if (evmcs_ver)
ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x e977499820782ab1c69f354d9f41b6d9ad1f43d9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021815-sublease-unneeded-0077@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e977499820782ab1c69f354d9f41b6d9ad1f43d9 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das(a)intel.com>
Date: Mon, 10 Feb 2025 15:36:54 +0100
Subject: [PATCH] drm/xe: Carve out wopcm portion from the stolen memory
The top of stolen memory is WOPCM, which shouldn't be accessed. Remove
this portion from the stolen memory region for discrete platforms.
This was already done for integrated, but was missing for discrete
platforms.
This also moves get_wopcm_size() so detect_bar2_dgfx() and
detect_bar2_integrated can use the same function.
v2: Improve commit message and suitable stable version tag(Lucas)
Fixes: d8b52a02cb40 ("drm/xe: Implement stolen memory.")
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Cc: Matthew Auld <matthew.auld(a)intel.com>
Cc: Lucas De Marchi <lucas.demarchi(a)intel.com>
Cc: stable(a)vger.kernel.org # v6.11+
Reviewed-by: Lucas De Marchi <lucas.demarchi(a)intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250210143654.2076747-1-nirm…
Signed-off-by: Nirmoy Das <nirmoy.das(a)intel.com>
(cherry picked from commit 2c7f45cc7e197a792ce5c693e56ea48f60b312da)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index 423856cc18d4..d414421f8c13 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -57,38 +57,6 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe);
}
-static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
-{
- struct xe_tile *tile = xe_device_get_root_tile(xe);
- struct xe_mmio *mmio = xe_root_tile_mmio(xe);
- struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
- u64 stolen_size;
- u64 tile_offset;
- u64 tile_size;
-
- tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start;
- tile_size = tile->mem.vram.actual_physical_size;
-
- /* Use DSM base address instead for stolen memory */
- mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
- if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
- return 0;
-
- stolen_size = tile_size - mgr->stolen_base;
-
- /* Verify usage fits in the actual resource available */
- if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR))
- mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base;
-
- /*
- * There may be few KB of platform dependent reserved memory at the end
- * of vram which is not part of the DSM. Such reserved memory portion is
- * always less then DSM granularity so align down the stolen_size to DSM
- * granularity to accommodate such reserve vram portion.
- */
- return ALIGN_DOWN(stolen_size, SZ_1M);
-}
-
static u32 get_wopcm_size(struct xe_device *xe)
{
u32 wopcm_size;
@@ -112,6 +80,44 @@ static u32 get_wopcm_size(struct xe_device *xe)
return wopcm_size;
}
+static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
+{
+ struct xe_tile *tile = xe_device_get_root_tile(xe);
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ u64 stolen_size, wopcm_size;
+ u64 tile_offset;
+ u64 tile_size;
+
+ tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start;
+ tile_size = tile->mem.vram.actual_physical_size;
+
+ /* Use DSM base address instead for stolen memory */
+ mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
+ if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
+ return 0;
+
+ /* Carve out the top of DSM as it contains the reserved WOPCM region */
+ wopcm_size = get_wopcm_size(xe);
+ if (drm_WARN_ON(&xe->drm, !wopcm_size))
+ return 0;
+
+ stolen_size = tile_size - mgr->stolen_base;
+ stolen_size -= wopcm_size;
+
+ /* Verify usage fits in the actual resource available */
+ if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR))
+ mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base;
+
+ /*
+ * There may be few KB of platform dependent reserved memory at the end
+ * of vram which is not part of the DSM. Such reserved memory portion is
+ * always less then DSM granularity so align down the stolen_size to DSM
+ * granularity to accommodate such reserve vram portion.
+ */
+ return ALIGN_DOWN(stolen_size, SZ_1M);
+}
+
static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
There are some issues with the enetc driver, some of which are specific
to the LS1028A platform, and some of which were introduced recently when
i.MX95 ENETC support was added, so this patch set aims to clean up those
issues.
Wei Fang (8):
net: enetc: fix the off-by-one issue in enetc_map_tx_buffs()
net: enetc: correct the tx_swbd statistics
net: enetc: correct the xdp_tx statistics
net: enetc: VFs do not support HWTSTAMP_TX_ONESTEP_SYNC
net: enetc: update UDP checksum when updating originTimestamp field
net: enetc: add missing enetc4_link_deinit()
net: enetc: remove the mm_lock from the ENETC v4 driver
net: enetc: correct the EMDIO base offset for ENETC v4
drivers/net/ethernet/freescale/enetc/enetc.c | 53 +++++++++++++++----
.../net/ethernet/freescale/enetc/enetc4_hw.h | 3 ++
.../net/ethernet/freescale/enetc/enetc4_pf.c | 2 +-
.../ethernet/freescale/enetc/enetc_ethtool.c | 7 ++-
.../freescale/enetc/enetc_pf_common.c | 10 +++-
5 files changed, 60 insertions(+), 15 deletions(-)
--
2.34.1
From: Ge Yang <yangge1116(a)126.com>
Since the introduction of commit b65d4adbc0f0 ("mm: hugetlb: defer freeing
of HugeTLB pages"), which supports deferring the freeing of HugeTLB pages,
the allocation of contiguous memory through cma_alloc() may fail
probabilistically.
In the CMA allocation process, if it is found that the CMA area is occupied
by in-use hugepage folios, these in-use hugepage folios need to be migrated
to another location. When there are no available hugepage folios in the
free HugeTLB pool during the migration of in-use HugeTLB pages, new folios
are allocated from the buddy system. A temporary state is set on the newly
allocated folio. Upon completion of the hugepage folio migration, the
temporary state is transferred from the new folios to the old folios.
Normally, when the old folios with the temporary state are freed, it is
directly released back to the buddy system. However, due to the deferred
freeing of HugeTLB pages, the PageBuddy() check fails, ultimately leading
to the failure of cma_alloc().
Here is a simplified call trace illustrating the process:
cma_alloc()
->__alloc_contig_migrate_range() // Migrate in-use hugepage
->unmap_and_move_huge_page()
->folio_putback_hugetlb() // Free old folios
->test_pages_isolated()
->__test_page_isolated_in_pageblock()
->PageBuddy(page) // Check if the page is in buddy
To resolve this issue, we have implemented a function named
wait_for_hugepage_folios_freed(). This function ensures that the hugepage
folios are properly released back to the buddy system after their migration
is completed. By invoking wait_for_hugepage_folios_freed() following the
migration process, we guarantee that when test_pages_isolated() is
executed, it will successfully pass.
Fixes: b65d4adbc0f0 ("mm: hugetlb: defer freeing of HugeTLB pages")
Signed-off-by: Ge Yang <yangge1116(a)126.com>
---
include/linux/hugetlb.h | 5 +++++
mm/hugetlb.c | 7 +++++++
mm/migrate.c | 16 ++++++++++++++--
3 files changed, 26 insertions(+), 2 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6c6546b..c39e0d5 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -697,6 +697,7 @@ bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m);
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+void wait_for_hugepage_folios_freed(struct hstate *h);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr, bool cow_from_owner);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
@@ -1092,6 +1093,10 @@ static inline int replace_free_hugepage_folios(unsigned long start_pfn,
return 0;
}
+static inline void wait_for_hugepage_folios_freed(struct hstate *h)
+{
+}
+
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
bool cow_from_owner)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 30bc34d..64cae39 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2955,6 +2955,13 @@ int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn)
return ret;
}
+void wait_for_hugepage_folios_freed(struct hstate *h)
+{
+ WARN_ON(!h);
+
+ flush_free_hpage_work(h);
+}
+
typedef enum {
/*
* For either 0/1: we checked the per-vma resv map, and one resv
diff --git a/mm/migrate.c b/mm/migrate.c
index fb19a18..5dd1851 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1448,6 +1448,7 @@ static int unmap_and_move_huge_page(new_folio_t get_new_folio,
int page_was_mapped = 0;
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
+ unsigned long size;
if (folio_ref_count(src) == 1) {
/* page was freed from under us. So we are done. */
@@ -1533,9 +1534,20 @@ static int unmap_and_move_huge_page(new_folio_t get_new_folio,
out_unlock:
folio_unlock(src);
out:
- if (rc == MIGRATEPAGE_SUCCESS)
+ if (rc == MIGRATEPAGE_SUCCESS) {
+ size = folio_size(src);
folio_putback_hugetlb(src);
- else if (rc != -EAGAIN)
+
+ /*
+ * Due to the deferred freeing of HugeTLB folios, the hugepage 'src' may
+ * not immediately release to the buddy system. This can lead to failure
+ * in allocating memory through the cma_alloc() function. To ensure that
+ * the hugepage folios are properly released back to the buddy system,
+ * we invoke the wait_for_hugepage_folios_freed() function to wait for
+ * the release to complete.
+ */
+ wait_for_hugepage_folios_freed(size_to_hstate(size));
+ } else if (rc != -EAGAIN)
list_move_tail(&src->lru, ret);
/*
--
2.7.4
From: Stefan Eichenberger <stefan.eichenberger(a)toradex.com>
The current solution for powering off the Apalis iMX6 is not functioning
as intended. To resolve this, it is necessary to power off the
vgen2_reg, which will also set the POWER_ENABLE_MOCI signal to a low
state. This ensures the carrier board is properly informed to initiate
its power-off sequence.
The new solution uses the regulator-poweroff driver, which will power
off the regulator during a system shutdown.
CC: stable(a)vger.kernel.org
Fixes: 4eb56e26f92e ("ARM: dts: imx6q-apalis: Command pmic to standby for poweroff")
Signed-off-by: Stefan Eichenberger <stefan.eichenberger(a)toradex.com>
---
arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi
index 1c72da417011..614b65821995 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-apalis.dtsi
@@ -108,6 +108,11 @@ lvds_panel_in: endpoint {
};
};
+ poweroff {
+ compatible = "regulator-poweroff";
+ cpu-supply = <&vgen2_reg>;
+ };
+
reg_module_3v3: regulator-module-3v3 {
compatible = "regulator-fixed";
regulator-always-on;
@@ -236,10 +241,6 @@ &can2 {
status = "disabled";
};
-&clks {
- fsl,pmic-stby-poweroff;
-};
-
/* Apalis SPI1 */
&ecspi1 {
cs-gpios = <&gpio5 25 GPIO_ACTIVE_LOW>;
@@ -527,7 +528,6 @@ &i2c2 {
pmic: pmic@8 {
compatible = "fsl,pfuze100";
- fsl,pmic-stby-poweroff;
reg = <0x08>;
regulators {
--
2.45.2
The desired clock frequency was correctly set to 400MHz in the device tree
but was lowered by the driver to 300MHz breaking 4K 60Hz content playback.
Fix the issue by removing the driver call to clk_set_rate(), which reduce
the amount of board specific code.
Fixes: 003afda97c65 ("media: verisilicon: Enable AV1 decoder on rk3588")
Cc: stable(a)vger.kernel.org
Signed-off-by: Nicolas Dufresne <nicolas.dufresne(a)collabora.com>
---
This patch fixes user report of AV1 4K60 decoder not being fast enough
on RK3588 based SoC. This is a break from Hantro original authors
habbit of coding the frequencies in the driver instead of specifying this
frequency in the device tree. The other calls to clk_set_rate() are left
since this would require modifying many dtsi files, which would then be
unsuitable for backport.
---
Changes in v2:
- Completely remove the unused init function, the driver is null-safe
- Link to v1: https://lore.kernel.org/r/20250217-b4-hantro-av1-clock-rate-v1-1-65b91132c5…
---
drivers/media/platform/verisilicon/rockchip_vpu_hw.c | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu_hw.c b/drivers/media/platform/verisilicon/rockchip_vpu_hw.c
index 964122e7c355934cd80eb442219f6ba51bba8b71..842040f713c15e6ff295771bc9fa5a7b66e584b2 100644
--- a/drivers/media/platform/verisilicon/rockchip_vpu_hw.c
+++ b/drivers/media/platform/verisilicon/rockchip_vpu_hw.c
@@ -17,7 +17,6 @@
#define RK3066_ACLK_MAX_FREQ (300 * 1000 * 1000)
#define RK3288_ACLK_MAX_FREQ (400 * 1000 * 1000)
-#define RK3588_ACLK_MAX_FREQ (300 * 1000 * 1000)
#define ROCKCHIP_VPU981_MIN_SIZE 64
@@ -440,13 +439,6 @@ static int rk3066_vpu_hw_init(struct hantro_dev *vpu)
return 0;
}
-static int rk3588_vpu981_hw_init(struct hantro_dev *vpu)
-{
- /* Bump ACLKs to max. possible freq. to improve performance. */
- clk_set_rate(vpu->clocks[0].clk, RK3588_ACLK_MAX_FREQ);
- return 0;
-}
-
static int rockchip_vpu_hw_init(struct hantro_dev *vpu)
{
/* Bump ACLK to max. possible freq. to improve performance. */
@@ -807,7 +799,6 @@ const struct hantro_variant rk3588_vpu981_variant = {
.codec_ops = rk3588_vpu981_codec_ops,
.irqs = rk3588_vpu981_irqs,
.num_irqs = ARRAY_SIZE(rk3588_vpu981_irqs),
- .init = rk3588_vpu981_hw_init,
.clk_names = rk3588_vpu981_vpu_clk_names,
.num_clocks = ARRAY_SIZE(rk3588_vpu981_vpu_clk_names)
};
---
base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b
change-id: 20250217-b4-hantro-av1-clock-rate-e5497f1499df
Best regards,
--
Nicolas Dufresne <nicolas.dufresne(a)collabora.com>
Our CI built linux-stable-rc.git queue/6.1 at commit eef4a8a45ba1
("btrfs: output the reason for open_ctree() failure").
(built for arm and arm64, albeit I don't think it matters.)
Building perf produced 2 build errors which I wanted to report
even before the RC1 is out.
| ...tools/perf/util/namespaces.c:247:27: error: invalid type argument of '->' (have 'int')
| 247 | RC_CHK_ACCESS(nsi)->in_pidns = true;
| | ^~
introduced by commit 93520bacf784 ("perf namespaces: Introduce
nsinfo__set_in_pidns()"). The RC_CK_ACCSS macro was introduced in 6.4.
Removing the macro made this go away ( nsi->in_pidns = true; ).
Second perf build error:
| ld: ...tools/perf/util/machine.c:1176: undefined reference to `kallsyms__get_symbol_start'
Introduced by commits:
710c2e913aa9 perf machine: Don't ignore _etext when not a text symbol
69a87a32f5cd perf machine: Include data symbols in the kernel map
These two use the function kallsyms__get_symbol_start added with:
f9dd531c5b82 perf symbols: Add kallsyms__get_symbol_start()
So f9dd531c5b82 would additionally be needed.
The kernel itself built fine, due to the perf error we don't have runtime
testresults.
Best regards
Max
I'm announcing the release of the 6.12.15 kernel.
This is ONLY needed for users of the XFS filesystem. If you do not use
XFS, no need to upgrade. If you do use XFS, please upgrade.
The updated 6.12.y git tree can be found at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git linux-6.12.y
and can be browsed at the normal kernel.org git web browser:
https://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git;a=summary
thanks,
greg k-h
------------
Makefile | 2 +-
fs/xfs/xfs_quota.h | 5 +++--
fs/xfs/xfs_trans.c | 10 +++-------
fs/xfs/xfs_trans_dquot.c | 31 ++++++++++++++++++++++++++-----
4 files changed, 33 insertions(+), 15 deletions(-)
Darrick J. Wong (1):
xfs: don't lose solo dquot update transactions
Greg Kroah-Hartman (1):
Linux 6.12.15
The quilt patch titled
Subject: mm: pgtable: fix incorrect reclaim of non-empty PTE pages
has been removed from the -mm tree. Its filename was
mm-pgtable-fix-incorrect-reclaim-of-non-empty-pte-pages.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Qi Zheng <zhengqi.arch(a)bytedance.com>
Subject: mm: pgtable: fix incorrect reclaim of non-empty PTE pages
Date: Tue, 11 Feb 2025 15:26:25 +0800
In zap_pte_range(), if the pte lock was released midway, the pte entries
may be refilled with physical pages by another thread, which may cause a
non-empty PTE page to be reclaimed and eventually cause the system to
crash.
To fix it, fall back to the slow path in this case to recheck if all pte
entries are still none.
Link: https://lkml.kernel.org/r/20250211072625.89188-1-zhengqi.arch@bytedance.com
Fixes: 6375e95f381e ("mm: pgtable: reclaim empty PTE page in madvise(MADV_DONTNEED)")
Signed-off-by: Qi Zheng <zhengqi.arch(a)bytedance.com>
Reported-by: Christian Brauner <brauner(a)kernel.org>
Closes: https://lore.kernel.org/all/20250207-anbot-bankfilialen-acce9d79a2c7@braune…
Reported-by: Qu Wenruo <quwenruo.btrfs(a)gmx.com>
Closes: https://lore.kernel.org/all/152296f3-5c81-4a94-97f3-004108fba7be@gmx.com/
Tested-by: Zi Yan <ziy(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Cc: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Jann Horn <jannh(a)google.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Zi Yan <ziy(a)nvidia.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
--- a/mm/memory.c~mm-pgtable-fix-incorrect-reclaim-of-non-empty-pte-pages
+++ a/mm/memory.c
@@ -1719,7 +1719,7 @@ static unsigned long zap_pte_range(struc
pmd_t pmdval;
unsigned long start = addr;
bool can_reclaim_pt = reclaim_pt_is_enabled(start, end, details);
- bool direct_reclaim = false;
+ bool direct_reclaim = true;
int nr;
retry:
@@ -1734,8 +1734,10 @@ retry:
do {
bool any_skipped = false;
- if (need_resched())
+ if (need_resched()) {
+ direct_reclaim = false;
break;
+ }
nr = do_zap_pte_range(tlb, vma, pte, addr, end, details, rss,
&force_flush, &force_break, &any_skipped);
@@ -1743,11 +1745,20 @@ retry:
can_reclaim_pt = false;
if (unlikely(force_break)) {
addr += nr * PAGE_SIZE;
+ direct_reclaim = false;
break;
}
} while (pte += nr, addr += PAGE_SIZE * nr, addr != end);
- if (can_reclaim_pt && addr == end)
+ /*
+ * Fast path: try to hold the pmd lock and unmap the PTE page.
+ *
+ * If the pte lock was released midway (retry case), or if the attempt
+ * to hold the pmd lock failed, then we need to recheck all pte entries
+ * to ensure they are still none, thereby preventing the pte entries
+ * from being repopulated by another thread.
+ */
+ if (can_reclaim_pt && direct_reclaim && addr == end)
direct_reclaim = try_get_and_clear_pmd(mm, pmd, &pmdval);
add_mm_rss_vec(mm, rss);
_
Patches currently in -mm which might be from zhengqi.arch(a)bytedance.com are
arm-pgtable-fix-null-pointer-dereference-issue.patch
The quilt patch titled
Subject: mm,madvise,hugetlb: check for 0-length range after end address adjustment
has been removed from the -mm tree. Its filename was
mmmadvisehugetlb-check-for-0-length-range-after-end-address-adjustment.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ricardo Ca��uelo Navarro <rcn(a)igalia.com>
Subject: mm,madvise,hugetlb: check for 0-length range after end address adjustment
Date: Mon, 3 Feb 2025 08:52:06 +0100
Add a sanity check to madvise_dontneed_free() to address a corner case in
madvise where a race condition causes the current vma being processed to
be backed by a different page size.
During a madvise(MADV_DONTNEED) call on a memory region registered with a
userfaultfd, there's a period of time where the process mm lock is
temporarily released in order to send a UFFD_EVENT_REMOVE and let
userspace handle the event. During this time, the vma covering the
current address range may change due to an explicit mmap done concurrently
by another thread.
If, after that change, the memory region, which was originally backed by
4KB pages, is now backed by hugepages, the end address is rounded down to
a hugepage boundary to avoid data loss (see "Fixes" below). This rounding
may cause the end address to be truncated to the same address as the
start.
Make this corner case follow the same semantics as in other similar cases
where the requested region has zero length (ie. return 0).
This will make madvise_walk_vmas() continue to the next vma in the range
(this time holding the process mm lock) which, due to the prev pointer
becoming stale because of the vma change, will be the same hugepage-backed
vma that was just checked before. The next time madvise_dontneed_free()
runs for this vma, if the start address isn't aligned to a hugepage
boundary, it'll return -EINVAL, which is also in line with the madvise
api.
From userspace perspective, madvise() will return EINVAL because the start
address isn't aligned according to the new vma alignment requirements
(hugepage), even though it was correctly page-aligned when the call was
issued.
Link: https://lkml.kernel.org/r/20250203075206.1452208-1-rcn@igalia.com
Fixes: 8ebe0a5eaaeb ("mm,madvise,hugetlb: fix unexpected data loss with MADV_DONTNEED on hugetlbfs")
Signed-off-by: Ricardo Ca��uelo Navarro <rcn(a)igalia.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: Florent Revest <revest(a)google.com>
Cc: Rik van Riel <riel(a)surriel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/madvise.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
--- a/mm/madvise.c~mmmadvisehugetlb-check-for-0-length-range-after-end-address-adjustment
+++ a/mm/madvise.c
@@ -933,7 +933,16 @@ static long madvise_dontneed_free(struct
*/
end = vma->vm_end;
}
- VM_WARN_ON(start >= end);
+ /*
+ * If the memory region between start and end was
+ * originally backed by 4kB pages and then remapped to
+ * be backed by hugepages while mmap_lock was dropped,
+ * the adjustment for hugetlb vma above may have rounded
+ * end down to the start address.
+ */
+ if (start == end)
+ return 0;
+ VM_WARN_ON(start > end);
}
if (behavior == MADV_DONTNEED || behavior == MADV_DONTNEED_LOCKED)
_
Patches currently in -mm which might be from rcn(a)igalia.com are
The quilt patch titled
Subject: mm/zswap: fix inconsistency when zswap_store_page() fails
has been removed from the -mm tree. Its filename was
mm-zswap-fix-inconsistency-when-zswap_store_page-fails.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Hyeonggon Yoo <42.hyeyoo(a)gmail.com>
Subject: mm/zswap: fix inconsistency when zswap_store_page() fails
Date: Wed, 29 Jan 2025 19:08:44 +0900
Commit b7c0ccdfbafd ("mm: zswap: support large folios in zswap_store()")
skips charging any zswap entries when it failed to zswap the entire folio.
However, when some base pages are zswapped but it failed to zswap the
entire folio, the zswap operation is rolled back. When freeing zswap
entries for those pages, zswap_entry_free() uncharges the zswap entries
that were not previously charged, causing zswap charging to become
inconsistent.
This inconsistency triggers two warnings with following steps:
# On a machine with 64GiB of RAM and 36GiB of zswap
$ stress-ng --bigheap 2 # wait until the OOM-killer kills stress-ng
$ sudo reboot
The two warnings are:
in mm/memcontrol.c:163, function obj_cgroup_release():
WARN_ON_ONCE(nr_bytes & (PAGE_SIZE - 1));
in mm/page_counter.c:60, function page_counter_cancel():
if (WARN_ONCE(new < 0, "page_counter underflow: %ld nr_pages=%lu\n",
new, nr_pages))
zswap_stored_pages also becomes inconsistent in the same way.
As suggested by Kanchana, increment zswap_stored_pages and charge zswap
entries within zswap_store_page() when it succeeds. This way,
zswap_entry_free() will decrement the counter and uncharge the entries
when it failed to zswap the entire folio.
While this could potentially be optimized by batching objcg charging and
incrementing the counter, let's focus on fixing the bug this time and
leave the optimization for later after some evaluation.
After resolving the inconsistency, the warnings disappear.
[42.hyeyoo(a)gmail.com: refactor zswap_store_page()]
Link: https://lkml.kernel.org/r/20250131082037.2426-1-42.hyeyoo@gmail.com
Link: https://lkml.kernel.org/r/20250129100844.2935-1-42.hyeyoo@gmail.com
Fixes: b7c0ccdfbafd ("mm: zswap: support large folios in zswap_store()")
Co-developed-by: Kanchana P Sridhar <kanchana.p.sridhar(a)intel.com>
Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar(a)intel.com>
Signed-off-by: Hyeonggon Yoo <42.hyeyoo(a)gmail.com>
Acked-by: Yosry Ahmed <yosry.ahmed(a)linux.dev>
Acked-by: Nhat Pham <nphamcs(a)gmail.com>
Cc: Chengming Zhou <chengming.zhou(a)linux.dev>
Cc: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/zswap.c | 35 ++++++++++++++++-------------------
1 file changed, 16 insertions(+), 19 deletions(-)
--- a/mm/zswap.c~mm-zswap-fix-inconsistency-when-zswap_store_page-fails
+++ a/mm/zswap.c
@@ -1445,9 +1445,9 @@ resched:
* main API
**********************************/
-static ssize_t zswap_store_page(struct page *page,
- struct obj_cgroup *objcg,
- struct zswap_pool *pool)
+static bool zswap_store_page(struct page *page,
+ struct obj_cgroup *objcg,
+ struct zswap_pool *pool)
{
swp_entry_t page_swpentry = page_swap_entry(page);
struct zswap_entry *entry, *old;
@@ -1456,7 +1456,7 @@ static ssize_t zswap_store_page(struct p
entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page));
if (!entry) {
zswap_reject_kmemcache_fail++;
- return -EINVAL;
+ return false;
}
if (!zswap_compress(page, entry, pool))
@@ -1483,13 +1483,17 @@ static ssize_t zswap_store_page(struct p
/*
* The entry is successfully compressed and stored in the tree, there is
- * no further possibility of failure. Grab refs to the pool and objcg.
- * These refs will be dropped by zswap_entry_free() when the entry is
- * removed from the tree.
+ * no further possibility of failure. Grab refs to the pool and objcg,
+ * charge zswap memory, and increment zswap_stored_pages.
+ * The opposite actions will be performed by zswap_entry_free()
+ * when the entry is removed from the tree.
*/
zswap_pool_get(pool);
- if (objcg)
+ if (objcg) {
obj_cgroup_get(objcg);
+ obj_cgroup_charge_zswap(objcg, entry->length);
+ }
+ atomic_long_inc(&zswap_stored_pages);
/*
* We finish initializing the entry while it's already in xarray.
@@ -1510,13 +1514,13 @@ static ssize_t zswap_store_page(struct p
zswap_lru_add(&zswap_list_lru, entry);
}
- return entry->length;
+ return true;
store_failed:
zpool_free(pool->zpool, entry->handle);
compress_failed:
zswap_entry_cache_free(entry);
- return -EINVAL;
+ return false;
}
bool zswap_store(struct folio *folio)
@@ -1526,7 +1530,6 @@ bool zswap_store(struct folio *folio)
struct obj_cgroup *objcg = NULL;
struct mem_cgroup *memcg = NULL;
struct zswap_pool *pool;
- size_t compressed_bytes = 0;
bool ret = false;
long index;
@@ -1564,20 +1567,14 @@ bool zswap_store(struct folio *folio)
for (index = 0; index < nr_pages; ++index) {
struct page *page = folio_page(folio, index);
- ssize_t bytes;
- bytes = zswap_store_page(page, objcg, pool);
- if (bytes < 0)
+ if (!zswap_store_page(page, objcg, pool))
goto put_pool;
- compressed_bytes += bytes;
}
- if (objcg) {
- obj_cgroup_charge_zswap(objcg, compressed_bytes);
+ if (objcg)
count_objcg_events(objcg, ZSWPOUT, nr_pages);
- }
- atomic_long_add(nr_pages, &zswap_stored_pages);
count_vm_events(ZSWPOUT, nr_pages);
ret = true;
_
Patches currently in -mm which might be from 42.hyeyoo(a)gmail.com are
The quilt patch titled
Subject: lib/iov_iter: fix import_iovec_ubuf iovec management
has been removed from the -mm tree. Its filename was
lib-iov_iter-fix-import_iovec_ubuf-iovec-management.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Pavel Begunkov <asml.silence(a)gmail.com>
Subject: lib/iov_iter: fix import_iovec_ubuf iovec management
Date: Fri, 31 Jan 2025 14:13:15 +0000
import_iovec() says that it should always be fine to kfree the iovec
returned in @iovp regardless of the error code. __import_iovec_ubuf()
never reallocates it and thus should clear the pointer even in cases when
copy_iovec_*() fail.
Link: https://lkml.kernel.org/r/378ae26923ffc20fd5e41b4360d673bf47b1775b.17383324…
Fixes: 3b2deb0e46da ("iov_iter: import single vector iovecs as ITER_UBUF")
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Reviewed-by: Jens Axboe <axboe(a)kernel.dk>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/iov_iter.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/lib/iov_iter.c~lib-iov_iter-fix-import_iovec_ubuf-iovec-management
+++ a/lib/iov_iter.c
@@ -1428,6 +1428,8 @@ static ssize_t __import_iovec_ubuf(int t
struct iovec *iov = *iovp;
ssize_t ret;
+ *iovp = NULL;
+
if (compat)
ret = copy_compat_iovec_from_user(iov, uvec, 1);
else
@@ -1438,7 +1440,6 @@ static ssize_t __import_iovec_ubuf(int t
ret = import_ubuf(type, iov->iov_base, iov->iov_len, i);
if (unlikely(ret))
return ret;
- *iovp = NULL;
return i->count;
}
_
Patches currently in -mm which might be from asml.silence(a)gmail.com are
The patch titled
Subject: hwpoison, memory_hotplug: lock folio before unmap hwpoisoned folio
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ma Wupeng <mawupeng1(a)huawei.com>
Subject: hwpoison, memory_hotplug: lock folio before unmap hwpoisoned folio
Date: Mon, 17 Feb 2025 09:43:29 +0800
Commit b15c87263a69 ("hwpoison, memory_hotplug: allow hwpoisoned pages to
be offlined) add page poison checks in do_migrate_range in order to make
offline hwpoisoned page possible by introducing isolate_lru_page and
try_to_unmap for hwpoisoned page. However folio lock must be held before
calling try_to_unmap. Add it to fix this problem.
Warning will be produced if folio is not locked during unmap:
------------[ cut here ]------------
kernel BUG at ./include/linux/swapops.h:400!
Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP
Modules linked in:
CPU: 4 UID: 0 PID: 411 Comm: bash Tainted: G W 6.13.0-rc1-00016-g3c434c7ee82a-dirty #41
Tainted: [W]=WARN
Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
pstate: 40400005 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : try_to_unmap_one+0xb08/0xd3c
lr : try_to_unmap_one+0x3dc/0xd3c
Call trace:
try_to_unmap_one+0xb08/0xd3c (P)
try_to_unmap_one+0x3dc/0xd3c (L)
rmap_walk_anon+0xdc/0x1f8
rmap_walk+0x3c/0x58
try_to_unmap+0x88/0x90
unmap_poisoned_folio+0x30/0xa8
do_migrate_range+0x4a0/0x568
offline_pages+0x5a4/0x670
memory_block_action+0x17c/0x374
memory_subsys_offline+0x3c/0x78
device_offline+0xa4/0xd0
state_store+0x8c/0xf0
dev_attr_store+0x18/0x2c
sysfs_kf_write+0x44/0x54
kernfs_fop_write_iter+0x118/0x1a8
vfs_write+0x3a8/0x4bc
ksys_write+0x6c/0xf8
__arm64_sys_write+0x1c/0x28
invoke_syscall+0x44/0x100
el0_svc_common.constprop.0+0x40/0xe0
do_el0_svc+0x1c/0x28
el0_svc+0x30/0xd0
el0t_64_sync_handler+0xc8/0xcc
el0t_64_sync+0x198/0x19c
Code: f9407be0 b5fff320 d4210000 17ffff97 (d4210000)
---[ end trace 0000000000000000 ]---
Link: https://lkml.kernel.org/r/20250217014329.3610326-4-mawupeng1@huawei.com
Fixes: b15c87263a69 ("hwpoison, memory_hotplug: allow hwpoisoned pages to be offlined")
Signed-off-by: Ma Wupeng <mawupeng1(a)huawei.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory_hotplug.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
--- a/mm/memory_hotplug.c~hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio
+++ a/mm/memory_hotplug.c
@@ -1832,8 +1832,11 @@ static void do_migrate_range(unsigned lo
(folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) {
if (WARN_ON(folio_test_lru(folio)))
folio_isolate_lru(folio);
- if (folio_mapped(folio))
+ if (folio_mapped(folio)) {
+ folio_lock(folio);
unmap_poisoned_folio(folio, pfn, false);
+ folio_unlock(folio);
+ }
goto put_folio;
}
_
Patches currently in -mm which might be from mawupeng1(a)huawei.com are
mm-memory-failure-update-ttu-flag-inside-unmap_poisoned_folio.patch
mm-memory-hotplug-check-folio-ref-count-first-in-do_migrate_range.patch
hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch
The patch titled
Subject: mm: memory-hotplug: check folio ref count first in do_migrate_range
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-memory-hotplug-check-folio-ref-count-first-in-do_migrate_range.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ma Wupeng <mawupeng1(a)huawei.com>
Subject: mm: memory-hotplug: check folio ref count first in do_migrate_range
Date: Mon, 17 Feb 2025 09:43:28 +0800
If a folio has an increased reference count, folio_try_get() will acquire
it, perform necessary operations, and then release it. In the case of a
poisoned folio without an elevated reference count (which is unlikely for
memory-failure), folio_try_get() will simply bypass it.
Therefore, relocate the folio_try_get() function, responsible for checking
and acquiring this reference count at first.
Link: https://lkml.kernel.org/r/20250217014329.3610326-3-mawupeng1@huawei.com
Signed-off-by: Ma Wupeng <mawupeng1(a)huawei.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory_hotplug.c | 20 +++++++-------------
1 file changed, 7 insertions(+), 13 deletions(-)
--- a/mm/memory_hotplug.c~mm-memory-hotplug-check-folio-ref-count-first-in-do_migrate_range
+++ a/mm/memory_hotplug.c
@@ -1822,12 +1822,12 @@ static void do_migrate_range(unsigned lo
if (folio_test_large(folio))
pfn = folio_pfn(folio) + folio_nr_pages(folio) - 1;
- /*
- * HWPoison pages have elevated reference counts so the migration would
- * fail on them. It also doesn't make any sense to migrate them in the
- * first place. Still try to unmap such a page in case it is still mapped
- * (keep the unmap as the catch all safety net).
- */
+ if (!folio_try_get(folio))
+ continue;
+
+ if (unlikely(page_folio(page) != folio))
+ goto put_folio;
+
if (folio_test_hwpoison(folio) ||
(folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) {
if (WARN_ON(folio_test_lru(folio)))
@@ -1835,14 +1835,8 @@ static void do_migrate_range(unsigned lo
if (folio_mapped(folio))
unmap_poisoned_folio(folio, pfn, false);
- continue;
- }
-
- if (!folio_try_get(folio))
- continue;
-
- if (unlikely(page_folio(page) != folio))
goto put_folio;
+ }
if (!isolate_folio_to_list(folio, &source)) {
if (__ratelimit(&migrate_rs)) {
_
Patches currently in -mm which might be from mawupeng1(a)huawei.com are
mm-memory-failure-update-ttu-flag-inside-unmap_poisoned_folio.patch
mm-memory-hotplug-check-folio-ref-count-first-in-do_migrate_range.patch
hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch
The patch titled
Subject: mm: memory-failure: update ttu flag inside unmap_poisoned_folio
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-memory-failure-update-ttu-flag-inside-unmap_poisoned_folio.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ma Wupeng <mawupeng1(a)huawei.com>
Subject: mm: memory-failure: update ttu flag inside unmap_poisoned_folio
Date: Mon, 17 Feb 2025 09:43:27 +0800
Patch series "mm: memory_failure: unmap poisoned folio during migrate
properly", v3.
Fix two bugs during folio migration if the folio is poisoned.
This patch (of 3):
Commit 6da6b1d4a7df ("mm/hwpoison: convert TTU_IGNORE_HWPOISON to
TTU_HWPOISON") introduce TTU_HWPOISON to replace TTU_IGNORE_HWPOISON in
order to stop send SIGBUS signal when accessing an error page after a
memory error on a clean folio. However during page migration, anon folio
must be set with TTU_HWPOISON during unmap_*(). For pagecache we need
some policy just like the one in hwpoison_user_mappings to set this flag.
So move this policy from hwpoison_user_mappings to unmap_poisoned_folio to
handle this warning properly.
Warning will be produced during unamp poison folio with the following log:
------------[ cut here ]------------
WARNING: CPU: 1 PID: 365 at mm/rmap.c:1847 try_to_unmap_one+0x8fc/0xd3c
Modules linked in:
CPU: 1 UID: 0 PID: 365 Comm: bash Tainted: G W 6.13.0-rc1-00018-gacdb4bbda7ab #42
Tainted: [W]=WARN
Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : try_to_unmap_one+0x8fc/0xd3c
lr : try_to_unmap_one+0x3dc/0xd3c
Call trace:
try_to_unmap_one+0x8fc/0xd3c (P)
try_to_unmap_one+0x3dc/0xd3c (L)
rmap_walk_anon+0xdc/0x1f8
rmap_walk+0x3c/0x58
try_to_unmap+0x88/0x90
unmap_poisoned_folio+0x30/0xa8
do_migrate_range+0x4a0/0x568
offline_pages+0x5a4/0x670
memory_block_action+0x17c/0x374
memory_subsys_offline+0x3c/0x78
device_offline+0xa4/0xd0
state_store+0x8c/0xf0
dev_attr_store+0x18/0x2c
sysfs_kf_write+0x44/0x54
kernfs_fop_write_iter+0x118/0x1a8
vfs_write+0x3a8/0x4bc
ksys_write+0x6c/0xf8
__arm64_sys_write+0x1c/0x28
invoke_syscall+0x44/0x100
el0_svc_common.constprop.0+0x40/0xe0
do_el0_svc+0x1c/0x28
el0_svc+0x30/0xd0
el0t_64_sync_handler+0xc8/0xcc
el0t_64_sync+0x198/0x19c
---[ end trace 0000000000000000 ]---
Link: https://lkml.kernel.org/r/20250217014329.3610326-1-mawupeng1@huawei.com
Link: https://lkml.kernel.org/r/20250217014329.3610326-2-mawupeng1@huawei.com
Fixes: 6da6b1d4a7df ("mm/hwpoison: convert TTU_IGNORE_HWPOISON to TTU_HWPOISON")
Signed-off-by: Ma Wupeng <mawupeng1(a)huawei.com>
Suggested-by: David Hildenbrand <david(a)redhat.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Ma Wupeng <mawupeng1(a)huawei.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/internal.h | 5 ++-
mm/memory-failure.c | 61 +++++++++++++++++++++---------------------
mm/memory_hotplug.c | 3 +-
3 files changed, 36 insertions(+), 33 deletions(-)
--- a/mm/internal.h~mm-memory-failure-update-ttu-flag-inside-unmap_poisoned_folio
+++ a/mm/internal.h
@@ -1115,7 +1115,7 @@ static inline int find_next_best_node(in
* mm/memory-failure.c
*/
#ifdef CONFIG_MEMORY_FAILURE
-void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu);
+int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill);
void shake_folio(struct folio *folio);
extern int hwpoison_filter(struct page *p);
@@ -1138,8 +1138,9 @@ unsigned long page_mapped_in_vma(const s
struct vm_area_struct *vma);
#else
-static inline void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu)
+static inline int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill)
{
+ return -EBUSY;
}
#endif
--- a/mm/memory-failure.c~mm-memory-failure-update-ttu-flag-inside-unmap_poisoned_folio
+++ a/mm/memory-failure.c
@@ -1556,8 +1556,34 @@ static int get_hwpoison_page(struct page
return ret;
}
-void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu)
+int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill)
{
+ enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON;
+ struct address_space *mapping;
+
+ if (folio_test_swapcache(folio)) {
+ pr_err("%#lx: keeping poisoned page in swap cache\n", pfn);
+ ttu &= ~TTU_HWPOISON;
+ }
+
+ /*
+ * Propagate the dirty bit from PTEs to struct page first, because we
+ * need this to decide if we should kill or just drop the page.
+ * XXX: the dirty test could be racy: set_page_dirty() may not always
+ * be called inside page lock (it's recommended but not enforced).
+ */
+ mapping = folio_mapping(folio);
+ if (!must_kill && !folio_test_dirty(folio) && mapping &&
+ mapping_can_writeback(mapping)) {
+ if (folio_mkclean(folio)) {
+ folio_set_dirty(folio);
+ } else {
+ ttu &= ~TTU_HWPOISON;
+ pr_info("%#lx: corrupted page was clean: dropped without side effects\n",
+ pfn);
+ }
+ }
+
if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {
struct address_space *mapping;
@@ -1572,7 +1598,7 @@ void unmap_poisoned_folio(struct folio *
if (!mapping) {
pr_info("%#lx: could not lock mapping for mapped hugetlb folio\n",
folio_pfn(folio));
- return;
+ return -EBUSY;
}
try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
@@ -1580,6 +1606,8 @@ void unmap_poisoned_folio(struct folio *
} else {
try_to_unmap(folio, ttu);
}
+
+ return folio_mapped(folio) ? -EBUSY : 0;
}
/*
@@ -1589,8 +1617,6 @@ void unmap_poisoned_folio(struct folio *
static bool hwpoison_user_mappings(struct folio *folio, struct page *p,
unsigned long pfn, int flags)
{
- enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON;
- struct address_space *mapping;
LIST_HEAD(tokill);
bool unmap_success;
int forcekill;
@@ -1613,29 +1639,6 @@ static bool hwpoison_user_mappings(struc
if (!folio_mapped(folio))
return true;
- if (folio_test_swapcache(folio)) {
- pr_err("%#lx: keeping poisoned page in swap cache\n", pfn);
- ttu &= ~TTU_HWPOISON;
- }
-
- /*
- * Propagate the dirty bit from PTEs to struct page first, because we
- * need this to decide if we should kill or just drop the page.
- * XXX: the dirty test could be racy: set_page_dirty() may not always
- * be called inside page lock (it's recommended but not enforced).
- */
- mapping = folio_mapping(folio);
- if (!(flags & MF_MUST_KILL) && !folio_test_dirty(folio) && mapping &&
- mapping_can_writeback(mapping)) {
- if (folio_mkclean(folio)) {
- folio_set_dirty(folio);
- } else {
- ttu &= ~TTU_HWPOISON;
- pr_info("%#lx: corrupted page was clean: dropped without side effects\n",
- pfn);
- }
- }
-
/*
* First collect all the processes that have the page
* mapped in dirty form. This has to be done before try_to_unmap,
@@ -1643,9 +1646,7 @@ static bool hwpoison_user_mappings(struc
*/
collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED);
- unmap_poisoned_folio(folio, ttu);
-
- unmap_success = !folio_mapped(folio);
+ unmap_success = !unmap_poisoned_folio(folio, pfn, flags & MF_MUST_KILL);
if (!unmap_success)
pr_err("%#lx: failed to unmap page (folio mapcount=%d)\n",
pfn, folio_mapcount(folio));
--- a/mm/memory_hotplug.c~mm-memory-failure-update-ttu-flag-inside-unmap_poisoned_folio
+++ a/mm/memory_hotplug.c
@@ -1833,7 +1833,8 @@ static void do_migrate_range(unsigned lo
if (WARN_ON(folio_test_lru(folio)))
folio_isolate_lru(folio);
if (folio_mapped(folio))
- unmap_poisoned_folio(folio, TTU_IGNORE_MLOCK);
+ unmap_poisoned_folio(folio, pfn, false);
+
continue;
}
_
Patches currently in -mm which might be from mawupeng1(a)huawei.com are
mm-memory-failure-update-ttu-flag-inside-unmap_poisoned_folio.patch
mm-memory-hotplug-check-folio-ref-count-first-in-do_migrate_range.patch
hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch
The patch titled
Subject: arm: pgtable: fix NULL pointer dereference issue
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
arm-pgtable-fix-null-pointer-dereference-issue.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Qi Zheng <zhengqi.arch(a)bytedance.com>
Subject: arm: pgtable: fix NULL pointer dereference issue
Date: Mon, 17 Feb 2025 10:49:24 +0800
When update_mmu_cache_range() is called by update_mmu_cache(), the vmf
parameter is NULL, which will cause a NULL pointer dereference issue in
adjust_pte():
Unable to handle kernel NULL pointer dereference at virtual address 00000030 when read
Hardware name: Atmel AT91SAM9
PC is at update_mmu_cache_range+0x1e0/0x278
LR is at pte_offset_map_rw_nolock+0x18/0x2c
Call trace:
update_mmu_cache_range from remove_migration_pte+0x29c/0x2ec
remove_migration_pte from rmap_walk_file+0xcc/0x130
rmap_walk_file from remove_migration_ptes+0x90/0xa4
remove_migration_ptes from migrate_pages_batch+0x6d4/0x858
migrate_pages_batch from migrate_pages+0x188/0x488
migrate_pages from compact_zone+0x56c/0x954
compact_zone from compact_node+0x90/0xf0
compact_node from kcompactd+0x1d4/0x204
kcompactd from kthread+0x120/0x12c
kthread from ret_from_fork+0x14/0x38
Exception stack(0xc0d8bfb0 to 0xc0d8bff8)
To fix it, do not rely on whether 'ptl' is equal to decide whether to hold
the pte lock, but decide it by whether CONFIG_SPLIT_PTE_PTLOCKS is
enabled. In addition, if two vmas map to the same PTE page, there is no
need to hold the pte lock again, otherwise a deadlock will occur. Just
add the need_lock parameter to let adjust_pte() know this information.
Link: https://lkml.kernel.org/r/20250217024924.57996-1-zhengqi.arch@bytedance.com
Fixes: fc9c45b71f43 ("arm: adjust_pte() use pte_offset_map_rw_nolock()")
Signed-off-by: Qi Zheng <zhengqi.arch(a)bytedance.com>
Reported-by: Ezra Buehler <ezra.buehler(a)husqvarnagroup.com>
Closes: https://lore.kernel.org/lkml/CAM1KZSmZ2T_riHvay+7cKEFxoPgeVpHkVFTzVVEQ1BO0c…
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Hugh Dickens <hughd(a)google.com>
Cc: Muchun Song <muchun.song(a)linux.dev>
Cc: Qi Zheng <zhengqi.arch(a)bytedance.com>
Cc: Russel King <linux(a)armlinux.org.uk>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/arm/mm/fault-armv.c | 37 +++++++++++++++++++++++++------------
1 file changed, 25 insertions(+), 12 deletions(-)
--- a/arch/arm/mm/fault-armv.c~arm-pgtable-fix-null-pointer-dereference-issue
+++ a/arch/arm/mm/fault-armv.c
@@ -62,7 +62,7 @@ static int do_adjust_pte(struct vm_area_
}
static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
- unsigned long pfn, struct vm_fault *vmf)
+ unsigned long pfn, bool need_lock)
{
spinlock_t *ptl;
pgd_t *pgd;
@@ -99,12 +99,11 @@ again:
if (!pte)
return 0;
- /*
- * If we are using split PTE locks, then we need to take the page
- * lock here. Otherwise we are using shared mm->page_table_lock
- * which is already locked, thus cannot take it.
- */
- if (ptl != vmf->ptl) {
+ if (need_lock) {
+ /*
+ * Use nested version here to indicate that we are already
+ * holding one similar spinlock.
+ */
spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) {
pte_unmap_unlock(pte, ptl);
@@ -114,7 +113,7 @@ again:
ret = do_adjust_pte(vma, address, pfn, pte);
- if (ptl != vmf->ptl)
+ if (need_lock)
spin_unlock(ptl);
pte_unmap(pte);
@@ -123,9 +122,10 @@ again:
static void
make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep, unsigned long pfn,
- struct vm_fault *vmf)
+ unsigned long addr, pte_t *ptep, unsigned long pfn)
{
+ const unsigned long pmd_start_addr = ALIGN_DOWN(addr, PMD_SIZE);
+ const unsigned long pmd_end_addr = pmd_start_addr + PMD_SIZE;
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *mpnt;
unsigned long offset;
@@ -142,6 +142,14 @@ make_coherent(struct address_space *mapp
flush_dcache_mmap_lock(mapping);
vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
/*
+ * If we are using split PTE locks, then we need to take the pte
+ * lock. Otherwise we are using shared mm->page_table_lock which
+ * is already locked, thus cannot take it.
+ */
+ bool need_lock = IS_ENABLED(CONFIG_SPLIT_PTE_PTLOCKS);
+ unsigned long mpnt_addr;
+
+ /*
* If this VMA is not in our MM, we can ignore it.
* Note that we intentionally mask out the VMA
* that we are fixing up.
@@ -151,7 +159,12 @@ make_coherent(struct address_space *mapp
if (!(mpnt->vm_flags & VM_MAYSHARE))
continue;
offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
- aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn, vmf);
+ mpnt_addr = mpnt->vm_start + offset;
+
+ /* Avoid deadlocks by not grabbing the same PTE lock again. */
+ if (mpnt_addr >= pmd_start_addr && mpnt_addr < pmd_end_addr)
+ need_lock = false;
+ aliases += adjust_pte(mpnt, mpnt_addr, pfn, need_lock);
}
flush_dcache_mmap_unlock(mapping);
if (aliases)
@@ -194,7 +207,7 @@ void update_mmu_cache_range(struct vm_fa
__flush_dcache_folio(mapping, folio);
if (mapping) {
if (cache_is_vivt())
- make_coherent(mapping, vma, addr, ptep, pfn, vmf);
+ make_coherent(mapping, vma, addr, ptep, pfn);
else if (vma->vm_flags & VM_EXEC)
__flush_icache_all();
}
_
Patches currently in -mm which might be from zhengqi.arch(a)bytedance.com are
mm-pgtable-fix-incorrect-reclaim-of-non-empty-pte-pages.patch
arm-pgtable-fix-null-pointer-dereference-issue.patch
The patch titled
Subject: mm/hwpoison: fix incorrect "not recovered" report for recovered clean pages
has been added to the -mm mm-unstable branch. Its filename is
mm-hwpoison-fix-incorrect-not-recovered-report-for-recovered-clean-pages.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Shuai Xue <xueshuai(a)linux.alibaba.com>
Subject: mm/hwpoison: fix incorrect "not recovered" report for recovered clean pages
Date: Mon, 17 Feb 2025 14:33:34 +0800
When an uncorrected memory error is consumed there is a race between the
CMCI from the memory controller reporting an uncorrected error with a UCNA
signature, and the core reporting and SRAR signature machine check when
the data is about to be consumed.
If the CMCI wins that race, the page is marked poisoned when
uc_decode_notifier() calls memory_failure(). For dirty pages,
memory_failure() invokes try_to_unmap() with the TTU_HWPOISON flag,
converting the PTE to a hwpoison entry. As a result,
kill_accessing_process():
- call walk_page_range() and return 1 regardless of whether
try_to_unmap() succeeds or fails,
- call kill_proc() to make sure a SIGBUS is sent
- return -EHWPOISON to indicate that SIGBUS is already sent to the
process and kill_me_maybe() doesn't have to send it again.
However, for clean pages, the TTU_HWPOISON flag is cleared, leaving the
PTE unchanged and not converted to a hwpoison entry. Conversely, for
clean pages where PTE entries are not marked as hwpoison,
kill_accessing_process() returns -EFAULT, causing kill_me_maybe() to send
a SIGBUS.
Console log looks like this:
Memory failure: 0x827ca68: corrupted page was clean: dropped without side effects
Memory failure: 0x827ca68: recovery action for clean LRU page: Recovered
Memory failure: 0x827ca68: already hardware poisoned
mce: Memory error not recovered
To fix it, return 0 for "corrupted page was clean", preventing an
unnecessary SIGBUS.
Link: https://lkml.kernel.org/r/20250217063335.22257-5-xueshuai@linux.alibaba.com
Fixes: 046545a661af ("mm/hwpoison: fix error page recovered but reported "not recovered"")
Signed-off-by: Shuai Xue <xueshuai(a)linux.alibaba.com>
Cc: <stable(a)vger.kernel.org>
Cc: Acked-by:Thomas Gleixner <tglx(a)linutronix.de>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jane Chu <jane.chu(a)oracle.com>
Cc: Jarkko Sakkinen <jarkko(a)kernel.org>
Cc: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Cc: Josh Poimboeuf <jpoimboe(a)kernel.org>
Cc: linmiaohe <linmiaohe(a)huawei.com>
Cc: "Luck, Tony" <tony.luck(a)intel.com>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Ruidong Tian <tianruidong(a)linux.alibaba.com>
Cc: Yazen Ghannam <yazen.ghannam(a)amd.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory-failure.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
--- a/mm/memory-failure.c~mm-hwpoison-fix-incorrect-not-recovered-report-for-recovered-clean-pages
+++ a/mm/memory-failure.c
@@ -881,12 +881,17 @@ static int kill_accessing_process(struct
mmap_read_lock(p->mm);
ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwpoison_walk_ops,
(void *)&priv);
+ /*
+ * ret = 1 when CMCI wins, regardless of whether try_to_unmap()
+ * succeeds or fails, then kill the process with SIGBUS.
+ * ret = 0 when poison page is a clean page and it's dropped, no
+ * SIGBUS is needed.
+ */
if (ret == 1 && priv.tk.addr)
kill_proc(&priv.tk, pfn, flags);
- else
- ret = 0;
mmap_read_unlock(p->mm);
- return ret > 0 ? -EHWPOISON : -EFAULT;
+
+ return ret > 0 ? -EHWPOISON : 0;
}
/*
_
Patches currently in -mm which might be from xueshuai(a)linux.alibaba.com are
x86-mce-collect-error-message-for-severities-below-mce_panic_severity.patch
x86-mce-dump-error-msg-from-severities.patch
x86-mce-add-ex_type_efault_reg-as-in-kernel-recovery-context-to-fix-copy-from-user-operations-regression.patch
mm-hwpoison-fix-incorrect-not-recovered-report-for-recovered-clean-pages.patch
mm-memory-failure-move-return-value-documentation-to-function-declaration.patch
The patch titled
Subject: x86/mce: add EX_TYPE_EFAULT_REG as in-kernel recovery context to fix copy-from-user operations regression
has been added to the -mm mm-unstable branch. Its filename is
x86-mce-add-ex_type_efault_reg-as-in-kernel-recovery-context-to-fix-copy-from-user-operations-regression.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Shuai Xue <xueshuai(a)linux.alibaba.com>
Subject: x86/mce: add EX_TYPE_EFAULT_REG as in-kernel recovery context to fix copy-from-user operations regression
Date: Mon, 17 Feb 2025 14:33:33 +0800
Commit 4c132d1d844a ("x86/futex: Remove .fixup usage") introduced a new
extable fixup type, EX_TYPE_EFAULT_REG, and later patches updated the
extable fixup type for copy-from-user operations, changing it from
EX_TYPE_UACCESS to EX_TYPE_EFAULT_REG.
Specifically, commit 99641e094d6c ("x86/uaccess: Remove .fixup usage")
altered the extable fixup type for the get_user family, while commit
4c132d1d844a ("x86/futex: Remove .fixup usage") addressed the futex
operations. This change inadvertently caused a regression where the error
context for some copy-from-user operations no longer functions as an
in-kernel recovery context, leading to kernel panics with the message:
"Machine check: Data load in unrecoverable area of kernel."
To fix the regression, add EX_TYPE_EFAULT_REG as a in-kernel recovery
context for copy-from-user operations.
Link: https://lkml.kernel.org/r/20250217063335.22257-4-xueshuai@linux.alibaba.com
Signed-off-by: Shuai Xue <xueshuai(a)linux.alibaba.com>
Fixes: 4c132d1d844a ("x86/futex: Remove .fixup usage")
Cc: <stable(a)vger.kernel.org>
Cc: Acked-by:Thomas Gleixner <tglx(a)linutronix.de>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Jane Chu <jane.chu(a)oracle.com>
Cc: Jarkko Sakkinen <jarkko(a)kernel.org>
Cc: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Cc: Josh Poimboeuf <jpoimboe(a)kernel.org>
Cc: linmiaohe <linmiaohe(a)huawei.com>
Cc: "Luck, Tony" <tony.luck(a)intel.com>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Ruidong Tian <tianruidong(a)linux.alibaba.com>
Cc: Yazen Ghannam <yazen.ghannam(a)amd.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/x86/kernel/cpu/mce/severity.c | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)
--- a/arch/x86/kernel/cpu/mce/severity.c~x86-mce-add-ex_type_efault_reg-as-in-kernel-recovery-context-to-fix-copy-from-user-operations-regression
+++ a/arch/x86/kernel/cpu/mce/severity.c
@@ -16,6 +16,7 @@
#include <asm/traps.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
+#include <linux/extable.h>
#include "internal.h"
@@ -285,7 +286,8 @@ static bool is_copy_from_user(struct pt_
*/
static noinstr int error_context(struct mce *m, struct pt_regs *regs)
{
- int fixup_type;
+ const struct exception_table_entry *e;
+ int fixup_type, imm;
bool copy_user;
if ((m->cs & 3) == 3)
@@ -294,9 +296,14 @@ static noinstr int error_context(struct
if (!mc_recoverable(m->mcgstatus))
return IN_KERNEL;
+ e = search_exception_tables(m->ip);
+ if (!e)
+ return IN_KERNEL;
+
/* Allow instrumentation around external facilities usage. */
instrumentation_begin();
- fixup_type = ex_get_fixup_type(m->ip);
+ fixup_type = FIELD_GET(EX_DATA_TYPE_MASK, e->data);
+ imm = FIELD_GET(EX_DATA_IMM_MASK, e->data);
copy_user = is_copy_from_user(regs);
instrumentation_end();
@@ -304,9 +311,13 @@ static noinstr int error_context(struct
case EX_TYPE_UACCESS:
if (!copy_user)
return IN_KERNEL;
- m->kflags |= MCE_IN_KERNEL_COPYIN;
- fallthrough;
-
+ m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
+ return IN_KERNEL_RECOV;
+ case EX_TYPE_IMM_REG:
+ if (!copy_user || imm != -EFAULT)
+ return IN_KERNEL;
+ m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
+ return IN_KERNEL_RECOV;
case EX_TYPE_FAULT_MCE_SAFE:
case EX_TYPE_DEFAULT_MCE_SAFE:
m->kflags |= MCE_IN_KERNEL_RECOV;
_
Patches currently in -mm which might be from xueshuai(a)linux.alibaba.com are
x86-mce-collect-error-message-for-severities-below-mce_panic_severity.patch
x86-mce-dump-error-msg-from-severities.patch
x86-mce-add-ex_type_efault_reg-as-in-kernel-recovery-context-to-fix-copy-from-user-operations-regression.patch
mm-hwpoison-fix-incorrect-not-recovered-report-for-recovered-clean-pages.patch
mm-memory-failure-move-return-value-documentation-to-function-declaration.patch
The patch titled
Subject: m68k: sun3: add check for __pgd_alloc()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
m68k-sun3-add-check-for-__pgd_alloc.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Haoxiang Li <haoxiang_li2024(a)163.com>
Subject: m68k: sun3: add check for __pgd_alloc()
Date: Tue, 18 Feb 2025 00:00:17 +0800
Add check for the return value of __pgd_alloc() in pgd_alloc() to prevent
null pointer dereference.
Link: https://lkml.kernel.org/r/20250217160017.2375536-1-haoxiang_li2024@163.com
Fixes: a9b3c355c2e6 ("asm-generic: pgalloc: provide generic __pgd_{alloc,free}")
Signed-off-by: Haoxiang Li <haoxiang_li2024(a)163.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Geert Uytterhoeven <geert(a)linux-m68k.org>
Cc: Kevin Brodsky <kevin.brodsky(a)arm.com>
Cc: Qi Zheng <zhengqi.arch(a)bytedance.com>
Cc: Sam Creasey <sammy(a)sammy.net>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/m68k/include/asm/sun3_pgalloc.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
--- a/arch/m68k/include/asm/sun3_pgalloc.h~m68k-sun3-add-check-for-__pgd_alloc
+++ a/arch/m68k/include/asm/sun3_pgalloc.h
@@ -44,8 +44,10 @@ static inline pgd_t * pgd_alloc(struct m
pgd_t *new_pgd;
new_pgd = __pgd_alloc(mm, 0);
- memcpy(new_pgd, swapper_pg_dir, PAGE_SIZE);
- memset(new_pgd, 0, (PAGE_OFFSET >> PGDIR_SHIFT));
+ if (likely(new_pgd != NULL)) {
+ memcpy(new_pgd, swapper_pg_dir, PAGE_SIZE);
+ memset(new_pgd, 0, (PAGE_OFFSET >> PGDIR_SHIFT));
+ }
return new_pgd;
}
_
Patches currently in -mm which might be from haoxiang_li2024(a)163.com are
m68k-sun3-add-check-for-__pgd_alloc.patch
The patch titled
Subject: selftests/damon/damos_quota_goal: handle minimum quota that cannot be further reduced
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
selftests-damon-damos_quota_goal-handle-minimum-quota-that-cannot-be-further-reduced.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: selftests/damon/damos_quota_goal: handle minimum quota that cannot be further reduced
Date: Mon, 17 Feb 2025 10:23:04 -0800
damos_quota_goal.py selftest see if DAMOS quota goals tuning feature
increases or reduces the effective size quota for given score as expected.
The tuning feature sets the minimum quota size as one byte, so if the
effective size quota is already one, we cannot expect it further be
reduced. However the test is not aware of the edge case, and fails since
it shown no expected change of the effective quota. Handle the case by
updating the failure logic for no change to see if it was the case, and
simply skips to next test input.
Link: https://lkml.kernel.org/r/20250217182304.45215-1-sj@kernel.org
Fixes: f1c07c0a1662b ("selftests/damon: add a test for DAMOS quota goal")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202502171423.b28a918d-lkp@intel.com
Cc: Shuah Khan (Samsung OSG) <shuah(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [6.10.x]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/damon/damos_quota_goal.py | 3 +++
1 file changed, 3 insertions(+)
--- a/tools/testing/selftests/damon/damos_quota_goal.py~selftests-damon-damos_quota_goal-handle-minimum-quota-that-cannot-be-further-reduced
+++ a/tools/testing/selftests/damon/damos_quota_goal.py
@@ -63,6 +63,9 @@ def main():
if last_effective_bytes != 0 else -1.0))
if last_effective_bytes == goal.effective_bytes:
+ # effective quota was already minimum that cannot be more reduced
+ if expect_increase is False and last_effective_bytes == 1:
+ continue
print('efective bytes not changed: %d' % goal.effective_bytes)
exit(1)
_
Patches currently in -mm which might be from sj(a)kernel.org are
selftests-damon-damos_quota_goal-handle-minimum-quota-that-cannot-be-further-reduced.patch
mm-madvise-split-out-mmap-locking-operations-for-madvise.patch
mm-madvise-split-out-madvise-input-validity-check.patch
mm-madvise-split-out-madvise-behavior-execution.patch
mm-madvise-remove-redundant-mmap_lock-operations-from-process_madvise.patch
mm-damon-avoid-applying-damos-action-to-same-entity-multiple-times.patch
mm-damon-core-unset-damos-walk_completed-after-confimed-set.patch
mm-damon-core-do-not-call-damos_walk_control-walk-if-walk-is-completed.patch
mm-damon-core-do-damos-walking-in-entire-regions-granularity.patch
In mii_nway_restart() during the line:
bmcr = mii->mdio_read(mii->dev, mii->phy_id, MII_BMCR);
The code attempts to call mii->mdio_read which is ch9200_mdio_read().
ch9200_mdio_read() utilises a local buffer, which is initialised
with control_read():
unsigned char buff[2];
However buff is conditionally initialised inside control_read():
if (err == size) {
memcpy(data, buf, size);
}
If the condition of "err == size" is not met, then buff remains
uninitialised. Once this happens the uninitialised buff is accessed
and returned during ch9200_mdio_read():
return (buff[0] | buff[1] << 8);
The problem stems from the fact that ch9200_mdio_read() ignores the
return value of control_read(), leading to uinit-access of buff.
To fix this we should check the return value of control_read()
and return early on error.
Signed-off-by: Qasim Ijaz <qasdev00(a)gmail.com>
Reported-by: syzbot <syzbot+3361c2d6f78a3e0892f9(a)syzkaller.appspotmail.com>
Tested-by: syzbot <syzbot+3361c2d6f78a3e0892f9(a)syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=3361c2d6f78a3e0892f9
Fixes: 4a476bd6d1d9 ("usbnet: New driver for QinHeng CH9200 devices")
Cc: stable(a)vger.kernel.org
---
drivers/net/mii.c | 2 ++
drivers/net/usb/ch9200.c | 7 +++++--
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/net/mii.c b/drivers/net/mii.c
index 37bc3131d31a..e305bf0f1d04 100644
--- a/drivers/net/mii.c
+++ b/drivers/net/mii.c
@@ -464,6 +464,8 @@ int mii_nway_restart (struct mii_if_info *mii)
/* if autoneg is off, it's an error */
bmcr = mii->mdio_read(mii->dev, mii->phy_id, MII_BMCR);
+ if (bmcr < 0)
+ return bmcr;
if (bmcr & BMCR_ANENABLE) {
bmcr |= BMCR_ANRESTART;
diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c
index f69d9b902da0..e32d3c282dc1 100644
--- a/drivers/net/usb/ch9200.c
+++ b/drivers/net/usb/ch9200.c
@@ -178,6 +178,7 @@ static int ch9200_mdio_read(struct net_device *netdev, int phy_id, int loc)
{
struct usbnet *dev = netdev_priv(netdev);
unsigned char buff[2];
+ int ret;
netdev_dbg(netdev, "%s phy_id:%02x loc:%02x\n",
__func__, phy_id, loc);
@@ -185,8 +186,10 @@ static int ch9200_mdio_read(struct net_device *netdev, int phy_id, int loc)
if (phy_id != 0)
return -ENODEV;
- control_read(dev, REQUEST_READ, 0, loc * 2, buff, 0x02,
- CONTROL_TIMEOUT_MS);
+ ret = control_read(dev, REQUEST_READ, 0, loc * 2, buff, 0x02,
+ CONTROL_TIMEOUT_MS);
+ if (ret != 2)
+ return ret < 0 ? ret : -EINVAL;
return (buff[0] | buff[1] << 8);
}
--
2.39.5
Christoph reports that their rk3399 system dies since we merged
773c05f417fa1 ("irqchip/gic-v3: Work around insecure GIC
integrations").
It appears that some rk3399 have some secure payloads, and that
the firmware sets SCR_EL3.FIQ==1. Obivously, disabling security
in that configuration leads to even more problems.
Let's revisit the workaround by:
- making it rk3399 specific
- checking whether Group-0 is available, which is a good proxy
for SCR_EL3.FIQ being 0
- either apply the workaround if Group-0 is available, or disable
pseudo-NMIs if not
Note that this doesn't mean that the secure side is able to receive
interrupts anyway, as we make all interrupts non-secure anyway.
Clearly, nobody ever tested secure interrupts on this platform.
With that, Christoph is able to use their rk3399.
Reported-by: Christoph Fritz <chf.fritz(a)googlemail.com>
Tested-by: Christoph Fritz <chf.fritz(a)googlemail.com>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
Fixes: 773c05f417fa1 ("irqchip/gic-v3: Work around insecure GIC integrations")
Link: https://lore.kernel.org/r/b1266652fb64857246e8babdf268d0df8f0c36d9.camel@go…
---
drivers/irqchip/irq-gic-v3.c | 53 +++++++++++++++++++++++++++---------
1 file changed, 40 insertions(+), 13 deletions(-)
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 76dce0aac2465..270d7a4d85a6d 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -44,6 +44,7 @@ static u8 dist_prio_nmi __ro_after_init = GICV3_PRIO_NMI;
#define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0)
#define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1)
#define FLAGS_WORKAROUND_ASR_ERRATUM_8601001 (1ULL << 2)
+#define FLAGS_WORKAROUND_INSECURE (1ULL << 3)
#define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1)
@@ -83,6 +84,8 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
#define GIC_LINE_NR min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
#define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer)
+static bool nmi_support_forbidden;
+
/*
* There are 16 SGIs, though we only actually use 8 in Linux. The other 8 SGIs
* are potentially stolen by the secure side. Some code, especially code dealing
@@ -163,21 +166,27 @@ static void __init gic_prio_init(void)
{
bool ds;
- ds = gic_dist_security_disabled();
- if (!ds) {
- u32 val;
-
- val = readl_relaxed(gic_data.dist_base + GICD_CTLR);
- val |= GICD_CTLR_DS;
- writel_relaxed(val, gic_data.dist_base + GICD_CTLR);
+ cpus_have_group0 = gic_has_group0();
- ds = gic_dist_security_disabled();
- if (ds)
- pr_warn("Broken GIC integration, security disabled");
+ ds = gic_dist_security_disabled();
+ if ((gic_data.flags & FLAGS_WORKAROUND_INSECURE) && !ds) {
+ if (cpus_have_group0) {
+ u32 val;
+
+ val = readl_relaxed(gic_data.dist_base + GICD_CTLR);
+ val |= GICD_CTLR_DS;
+ writel_relaxed(val, gic_data.dist_base + GICD_CTLR);
+
+ ds = gic_dist_security_disabled();
+ if (ds)
+ pr_warn("Broken GIC integration, security disabled\n");
+ } else {
+ pr_warn("Broken GIC integration, pNMI forbidden\n");
+ nmi_support_forbidden = true;
+ }
}
cpus_have_security_disabled = ds;
- cpus_have_group0 = gic_has_group0();
/*
* How priority values are used by the GIC depends on two things:
@@ -209,7 +218,7 @@ static void __init gic_prio_init(void)
* be in the non-secure range, we program the non-secure values into
* the distributor to match the PMR values we want.
*/
- if (cpus_have_group0 & !cpus_have_security_disabled) {
+ if (cpus_have_group0 && !cpus_have_security_disabled) {
dist_prio_irq = __gicv3_prio_to_ns(dist_prio_irq);
dist_prio_nmi = __gicv3_prio_to_ns(dist_prio_nmi);
}
@@ -1922,6 +1931,18 @@ static bool gic_enable_quirk_arm64_2941627(void *data)
return true;
}
+static bool gic_enable_quirk_rk3399(void *data)
+{
+ struct gic_chip_data *d = data;
+
+ if (of_machine_is_compatible("rockchip,rk3399")) {
+ d->flags |= FLAGS_WORKAROUND_INSECURE;
+ return true;
+ }
+
+ return false;
+}
+
static bool rd_set_non_coherent(void *data)
{
struct gic_chip_data *d = data;
@@ -1996,6 +2017,12 @@ static const struct gic_quirk gic_quirks[] = {
.property = "dma-noncoherent",
.init = rd_set_non_coherent,
},
+ {
+ .desc = "GICv3: Insecure RK3399 integration",
+ .iidr = 0x0000043b,
+ .mask = 0xff000fff,
+ .init = gic_enable_quirk_rk3399,
+ },
{
}
};
@@ -2004,7 +2031,7 @@ static void gic_enable_nmi_support(void)
{
int i;
- if (!gic_prio_masking_enabled())
+ if (!gic_prio_masking_enabled() || nmi_support_forbidden)
return;
rdist_nmi_refs = kcalloc(gic_data.ppi_nr + SGI_NR,
--
2.39.2
The desired clock frequency was correctly set to 400MHz in the device tree
but was lowered by the driver to 300MHz breaking 4K 60Hz content playback.
Fix the issue by removing the driver call to clk_set_rate(), which reduce
the amount of board specific code.
Fixes: 003afda97c65 ("media: verisilicon: Enable AV1 decoder on rk3588")
Signed-off-by: Nicolas Dufresne <nicolas.dufresne(a)collabora.com>
---
This patch fixes user report of AV1 4K60 decoder not being fast enough
on RK3588 based SoC. This is a break from Hantro original authors
habbit of coding the frequencies in the driver instead of specifying this
frequency in the device tree. The other calls to clk_set_rate() are left
since this would require modifying many dtsi files, which would then be
unsuitable for backport.
---
drivers/media/platform/verisilicon/rockchip_vpu_hw.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/drivers/media/platform/verisilicon/rockchip_vpu_hw.c b/drivers/media/platform/verisilicon/rockchip_vpu_hw.c
index 964122e7c355934cd80eb442219f6ba51bba8b71..9d8eab33556d62733ec7ec6b5e685c86ba7086e4 100644
--- a/drivers/media/platform/verisilicon/rockchip_vpu_hw.c
+++ b/drivers/media/platform/verisilicon/rockchip_vpu_hw.c
@@ -17,7 +17,6 @@
#define RK3066_ACLK_MAX_FREQ (300 * 1000 * 1000)
#define RK3288_ACLK_MAX_FREQ (400 * 1000 * 1000)
-#define RK3588_ACLK_MAX_FREQ (300 * 1000 * 1000)
#define ROCKCHIP_VPU981_MIN_SIZE 64
@@ -440,10 +439,9 @@ static int rk3066_vpu_hw_init(struct hantro_dev *vpu)
return 0;
}
+/* TODO just remove, the CLK are defined correctly in the DTS */
static int rk3588_vpu981_hw_init(struct hantro_dev *vpu)
{
- /* Bump ACLKs to max. possible freq. to improve performance. */
- clk_set_rate(vpu->clocks[0].clk, RK3588_ACLK_MAX_FREQ);
return 0;
}
@@ -807,7 +805,6 @@ const struct hantro_variant rk3588_vpu981_variant = {
.codec_ops = rk3588_vpu981_codec_ops,
.irqs = rk3588_vpu981_irqs,
.num_irqs = ARRAY_SIZE(rk3588_vpu981_irqs),
- .init = rk3588_vpu981_hw_init,
.clk_names = rk3588_vpu981_vpu_clk_names,
.num_clocks = ARRAY_SIZE(rk3588_vpu981_vpu_clk_names)
};
---
base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b
change-id: 20250217-b4-hantro-av1-clock-rate-e5497f1499df
Best regards,
--
Nicolas Dufresne <nicolas.dufresne(a)collabora.com>
damos_quota_goal.py selftest see if DAMOS quota goals tuning feature
increases or reduces the effective size quota for given score as
expected. The tuning feature sets the minimum quota size as one byte,
so if the effective size quota is already one, we cannot expect it
further be reduced. However the test is not aware of the edge case, and
fails since it shown no expected change of the effective quota. Handle
the case by updating the failure logic for no change to see if it was
the case, and simply skips to next test input.
Fixes: f1c07c0a1662b ("selftests/damon: add a test for DAMOS quota goal")
Cc: <stable(a)vger.kernel.org> # 6.10.x
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202502171423.b28a918d-lkp@intel.com
Signed-off-by: SeongJae Park <sj(a)kernel.org>
---
tools/testing/selftests/damon/damos_quota_goal.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py
index 18246f3b62f7..f76e0412b564 100755
--- a/tools/testing/selftests/damon/damos_quota_goal.py
+++ b/tools/testing/selftests/damon/damos_quota_goal.py
@@ -63,6 +63,9 @@ def main():
if last_effective_bytes != 0 else -1.0))
if last_effective_bytes == goal.effective_bytes:
+ # effective quota was already minimum that cannot be more reduced
+ if expect_increase is False and last_effective_bytes == 1:
+ continue
print('efective bytes not changed: %d' % goal.effective_bytes)
exit(1)
base-commit: 20017459916819f8ae15ca3840e71fbf0ea8354e
--
2.39.5
From: Darrick J. Wong <djwong(a)kernel.org>
commit 07137e925fa951646325762bda6bd2503dfe64c6 upstream
Quota counter updates are tracked via incore objects which hang off the
xfs_trans object. These changes are then turned into dirty log items in
xfs_trans_apply_dquot_deltas just prior to commiting the log items to
the CIL.
However, updating the incore deltas do not cause XFS_TRANS_DIRTY to be
set on the transaction. In other words, a pure quota counter update
will be silently discarded if there are no other dirty log items
attached to the transaction.
This is currently not the case anywhere in the filesystem because quota
updates always dirty at least one other metadata item, but a subsequent
bug fix will add dquot log item precommits, so we actually need a dirty
dquot log item prior to xfs_trans_run_precommits. Also let's not leave
a logic bomb.
Cc: <stable(a)vger.kernel.org> # v2.6.35
Fixes: 0924378a689ccb ("xfs: split out iclog writing from xfs_trans_commit()")
Signed-off-by: "Darrick J. Wong" <djwong(a)kernel.org>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
---
fs/xfs/xfs_quota.h | 5 +++--
fs/xfs/xfs_trans.c | 10 +++-------
fs/xfs/xfs_trans_dquot.c | 31 ++++++++++++++++++++++++++-----
3 files changed, 32 insertions(+), 14 deletions(-)
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 23d71a55bbc006..032f3a70f21ddd 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -96,7 +96,8 @@ extern void xfs_trans_free_dqinfo(struct xfs_trans *);
extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *,
uint, int64_t);
extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *);
-extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *);
+void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *tp,
+ bool already_locked);
int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, struct xfs_inode *ip,
int64_t dblocks, int64_t rblocks, bool force);
extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
@@ -166,7 +167,7 @@ static inline void xfs_trans_mod_dquot_byino(struct xfs_trans *tp,
{
}
#define xfs_trans_apply_dquot_deltas(tp)
-#define xfs_trans_unreserve_and_mod_dquots(tp)
+#define xfs_trans_unreserve_and_mod_dquots(tp, a)
static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp,
struct xfs_inode *ip, int64_t dblocks, int64_t rblocks,
bool force)
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index ee46051db12dde..39cd11cbe21fcb 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -840,6 +840,7 @@ __xfs_trans_commit(
*/
if (tp->t_flags & XFS_TRANS_SB_DIRTY)
xfs_trans_apply_sb_deltas(tp);
+ xfs_trans_apply_dquot_deltas(tp);
error = xfs_trans_run_precommits(tp);
if (error)
@@ -868,11 +869,6 @@ __xfs_trans_commit(
ASSERT(tp->t_ticket != NULL);
- /*
- * If we need to update the superblock, then do it now.
- */
- xfs_trans_apply_dquot_deltas(tp);
-
xlog_cil_commit(log, tp, &commit_seq, regrant);
xfs_trans_free(tp);
@@ -898,7 +894,7 @@ __xfs_trans_commit(
* the dqinfo portion to be. All that means is that we have some
* (non-persistent) quota reservations that need to be unreserved.
*/
- xfs_trans_unreserve_and_mod_dquots(tp);
+ xfs_trans_unreserve_and_mod_dquots(tp, true);
if (tp->t_ticket) {
if (regrant && !xlog_is_shutdown(log))
xfs_log_ticket_regrant(log, tp->t_ticket);
@@ -992,7 +988,7 @@ xfs_trans_cancel(
}
#endif
xfs_trans_unreserve_and_mod_sb(tp);
- xfs_trans_unreserve_and_mod_dquots(tp);
+ xfs_trans_unreserve_and_mod_dquots(tp, false);
if (tp->t_ticket) {
xfs_log_ticket_ungrant(log, tp->t_ticket);
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index b368e13424c4f4..b92eeaa1a2a9e7 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -602,6 +602,24 @@ xfs_trans_apply_dquot_deltas(
ASSERT(dqp->q_blk.reserved >= dqp->q_blk.count);
ASSERT(dqp->q_ino.reserved >= dqp->q_ino.count);
ASSERT(dqp->q_rtb.reserved >= dqp->q_rtb.count);
+
+ /*
+ * We've applied the count changes and given back
+ * whatever reservation we didn't use. Zero out the
+ * dqtrx fields.
+ */
+ qtrx->qt_blk_res = 0;
+ qtrx->qt_bcount_delta = 0;
+ qtrx->qt_delbcnt_delta = 0;
+
+ qtrx->qt_rtblk_res = 0;
+ qtrx->qt_rtblk_res_used = 0;
+ qtrx->qt_rtbcount_delta = 0;
+ qtrx->qt_delrtb_delta = 0;
+
+ qtrx->qt_ino_res = 0;
+ qtrx->qt_ino_res_used = 0;
+ qtrx->qt_icount_delta = 0;
}
}
}
@@ -638,7 +656,8 @@ xfs_trans_unreserve_and_mod_dquots_hook(
*/
void
xfs_trans_unreserve_and_mod_dquots(
- struct xfs_trans *tp)
+ struct xfs_trans *tp,
+ bool already_locked)
{
int i, j;
struct xfs_dquot *dqp;
@@ -667,10 +686,12 @@ xfs_trans_unreserve_and_mod_dquots(
* about the number of blocks used field, or deltas.
* Also we don't bother to zero the fields.
*/
- locked = false;
+ locked = already_locked;
if (qtrx->qt_blk_res) {
- xfs_dqlock(dqp);
- locked = true;
+ if (!locked) {
+ xfs_dqlock(dqp);
+ locked = true;
+ }
dqp->q_blk.reserved -=
(xfs_qcnt_t)qtrx->qt_blk_res;
}
@@ -691,7 +712,7 @@ xfs_trans_unreserve_and_mod_dquots(
dqp->q_rtb.reserved -=
(xfs_qcnt_t)qtrx->qt_rtblk_res;
}
- if (locked)
+ if (locked && !already_locked)
xfs_dqunlock(dqp);
}
Hello,
This bug report was raised on an IOMMU regression found recently:
https://bugzilla.kernel.org/show_bug.cgi?id=219499
This has been fixed by the following commit in v6.14-rc3:
commit ef75966abf95 ("iommu/amd: Expicitly enable CNTRL.EPHEn bit in
resume path")
I confirmed it backports cleanly to the LTS 6.12.y kernel. Can we
please have it backported there and to 6.13.y?
Thanks!
Hi, all.
recently met an issue: tc-flower not worked when configured dst_port
and src_port range in one rule.
detailed like this:
$ tc qdisc add dev ens38 ingress
$ tc filter add dev ens38 ingress protocol ip flower ip_proto udp \
dst_port 5000 src_port 2000-3000 action drop
I try to find the root cause in kernel source code:
1. FLOW_DISSECTOR_KEY_PORTS and FLOW_DISSECTOR_KEY_PORTS_RANGE flag of
mask->dissector were set
in fl_classify from flow_dissector.c.
2. then skb_flow_dissect -> __skb_flow_dissect -> __skb_flow_dissect_ports.
3. FLOW_DISSECTOR_KEY_PORTS handled and FLOW_DISSECTOR_KEY_PORTS_RANGE
not handled
in __skb_flow_dissect_ports, so tp_range.tp.src was 0 here expected
the actual skb source port.
By the way, __skb_flow_bpf_to_target function may has the same issue.
Please help confirm and fix it, thank you.
source code of __skb_flow_dissect_ports in flow_dissector.c as below:
static void
__skb_flow_dissect_ports(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container, const void *data,
int nhoff, u8 ip_proto, int hlen)
{
enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX;
struct flow_dissector_key_ports *key_ports;
if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
dissector_ports = FLOW_DISSECTOR_KEY_PORTS;
else if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS_RANGE))
dissector_ports = FLOW_DISSECTOR_KEY_PORTS_RANGE;
if (dissector_ports == FLOW_DISSECTOR_KEY_MAX)
return;
key_ports = skb_flow_dissector_target(flow_dissector,
dissector_ports,
target_container);
key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
data, hlen);
}
Best regards.
Currently memremap(MEMREMAP_WB) can produce decrypted/shared mapping:
memremap(MEMREMAP_WB)
arch_memremap_wb()
ioremap_cache()
__ioremap_caller(.encrytped = false)
In such cases, the IORES_MAP_ENCRYPTED flag on the memory will determine
if the resulting mapping is encrypted or decrypted.
Creating a decrypted mapping without explicit request from the caller is
risky:
- It can inadvertently expose the guest's data and compromise the
guest.
- Accessing private memory via shared/decrypted mapping on TDX will
either trigger implicit conversion to shared or #VE (depending on
VMM implementation).
Implicit conversion is destructive: subsequent access to the same
memory via private mapping will trigger a hard-to-debug #VE crash.
The kernel already provides a way to request decrypted mapping
explicitly via the MEMREMAP_DEC flag.
Modify memremap(MEMREMAP_WB) to produce encrypted/private mapping by
default unless MEMREMAP_DEC is specified or if the kernel runs on
a machine with SME enabled.
It fixes the crash due to #VE on kexec in TDX guests if CONFIG_EISA is
enabled.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Cc: stable(a)vger.kernel.org # 6.11+
Cc: Tom Lendacky <thomas.lendacky(a)amd.com>
Cc: Ashish Kalra <ashish.kalra(a)amd.com>
Cc: "Maciej W. Rozycki" <macro(a)orcam.me.uk>
---
arch/x86/include/asm/io.h | 3 +++
arch/x86/mm/ioremap.c | 8 ++++++++
2 files changed, 11 insertions(+)
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index ed580c7f9d0a..1a0dc2b2bf5b 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -175,6 +175,9 @@ extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, un
extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size);
#define ioremap_encrypted ioremap_encrypted
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags);
+#define arch_memremap_wb arch_memremap_wb
+
/**
* ioremap - map bus memory into CPU space
* @offset: bus address of the memory
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 8d29163568a7..a4b23d2e92d2 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -503,6 +503,14 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags)
+{
+ if ((flags & MEMREMAP_DEC) || cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
+ return (void __force *)ioremap_cache(phys_addr, size);
+
+ return (void __force *)ioremap_encrypted(phys_addr, size);
+}
+
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
--
2.47.2
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 1aaf8c122918aa8897605a9aa1e8ed6600d6f930
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021008-recharger-fastball-ffab@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1aaf8c122918aa8897605a9aa1e8ed6600d6f930 Mon Sep 17 00:00:00 2001
From: Zhaoyang Huang <zhaoyang.huang(a)unisoc.com>
Date: Tue, 21 Jan 2025 10:01:59 +0800
Subject: [PATCH] mm: gup: fix infinite loop within __get_longterm_locked
We can run into an infinite loop in __get_longterm_locked() when
collect_longterm_unpinnable_folios() finds only folios that are isolated
from the LRU or were never added to the LRU. This can happen when all
folios to be pinned are never added to the LRU, for example when
vm_ops->fault allocated pages using cma_alloc() and never added them to
the LRU.
Fix it by simply taking a look at the list in the single caller, to see if
anything was added.
[zhaoyang.huang(a)unisoc.com: move definition of local]
Link: https://lkml.kernel.org/r/20250122012604.3654667-1-zhaoyang.huang@unisoc.com
Link: https://lkml.kernel.org/r/20250121020159.3636477-1-zhaoyang.huang@unisoc.com
Fixes: 67e139b02d99 ("mm/gup.c: refactor check_and_migrate_movable_pages()")
Signed-off-by: Zhaoyang Huang <zhaoyang.huang(a)unisoc.com>
Reviewed-by: John Hubbard <jhubbard(a)nvidia.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Suggested-by: David Hildenbrand <david(a)redhat.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Aijun Sun <aijun.sun(a)unisoc.com>
Cc: Alistair Popple <apopple(a)nvidia.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/mm/gup.c b/mm/gup.c
index 9aaf338cc1f4..3883b307780e 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2320,13 +2320,13 @@ static void pofs_unpin(struct pages_or_folios *pofs)
/*
* Returns the number of collected folios. Return value is always >= 0.
*/
-static unsigned long collect_longterm_unpinnable_folios(
+static void collect_longterm_unpinnable_folios(
struct list_head *movable_folio_list,
struct pages_or_folios *pofs)
{
- unsigned long i, collected = 0;
struct folio *prev_folio = NULL;
bool drain_allow = true;
+ unsigned long i;
for (i = 0; i < pofs->nr_entries; i++) {
struct folio *folio = pofs_get_folio(pofs, i);
@@ -2338,8 +2338,6 @@ static unsigned long collect_longterm_unpinnable_folios(
if (folio_is_longterm_pinnable(folio))
continue;
- collected++;
-
if (folio_is_device_coherent(folio))
continue;
@@ -2361,8 +2359,6 @@ static unsigned long collect_longterm_unpinnable_folios(
NR_ISOLATED_ANON + folio_is_file_lru(folio),
folio_nr_pages(folio));
}
-
- return collected;
}
/*
@@ -2439,11 +2435,9 @@ static long
check_and_migrate_movable_pages_or_folios(struct pages_or_folios *pofs)
{
LIST_HEAD(movable_folio_list);
- unsigned long collected;
- collected = collect_longterm_unpinnable_folios(&movable_folio_list,
- pofs);
- if (!collected)
+ collect_longterm_unpinnable_folios(&movable_folio_list, pofs);
+ if (list_empty(&movable_folio_list))
return 0;
return migrate_longterm_unpinnable_folios(&movable_folio_list, pofs);
The xHC resources allocated for USB devices are not released in correct order after resuming in case when while suspend device was reconnected.
This issue has been detected during the fallowing scenario:
- connect hub HS to root port
- connect LS/FS device to hub port
- wait for enumeration to finish
- force DUT to suspend
- reconnect hub attached to root port
- wake DUT
For this scenario during enumeration of USB LS/FS device the Cadence xHC reports completion error code for xHCi commands because the devices was not property disconnected and in result the xHC resources has not been correct freed.
XHCI specification doesn't mention that device can be reset in any order so, we should not treat this issue as Cadence xHC controller bug.
Similar as during disconnecting in this case the device should be cleared starting form the last usb device in tree toward the root hub.
To fix this issue usbcore driver should disconnect all USB devices connected to hub which was reconnected while suspending.
Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver")
cc: <stable(a)vger.kernel.org>
Signed-off-by: Pawel Laszczak <pawell(a)cadence.com>
---
drivers/usb/core/hub.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 0cd44f1fd56d..2473cbf317a8 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3627,10 +3627,12 @@ static int finish_port_resume(struct usb_device *udev)
* the device will be rediscovered.
*/
retry_reset_resume:
- if (udev->quirks & USB_QUIRK_RESET)
+ if (udev->quirks & USB_QUIRK_RESET) {
status = -ENODEV;
- else
+ } else {
+ hub_disconnect_children(udev);
status = usb_reset_and_verify_device(udev);
+ }
}
/* 10.5.4.5 says be sure devices in the tree are still there.
--
2.43.0
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 318e8c339c9a0891c389298bb328ed0762a9935e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021722-poplar-spoilage-a69f@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 318e8c339c9a0891c389298bb328ed0762a9935e Mon Sep 17 00:00:00 2001
From: Patrick Bellasi <derkling(a)google.com>
Date: Wed, 5 Feb 2025 14:04:41 +0000
Subject: [PATCH] x86/cpu/kvm: SRSO: Fix possible missing IBPB on VM-Exit
In [1] the meaning of the synthetic IBPB flags has been redefined for a
better separation of concerns:
- ENTRY_IBPB -- issue IBPB on entry only
- IBPB_ON_VMEXIT -- issue IBPB on VM-Exit only
and the Retbleed mitigations have been updated to match this new
semantics.
Commit [2] was merged shortly before [1], and their interaction was not
handled properly. This resulted in IBPB not being triggered on VM-Exit
in all SRSO mitigation configs requesting an IBPB there.
Specifically, an IBPB on VM-Exit is triggered only when
X86_FEATURE_IBPB_ON_VMEXIT is set. However:
- X86_FEATURE_IBPB_ON_VMEXIT is not set for "spec_rstack_overflow=ibpb",
because before [1] having X86_FEATURE_ENTRY_IBPB was enough. Hence,
an IBPB is triggered on entry but the expected IBPB on VM-exit is
not.
- X86_FEATURE_IBPB_ON_VMEXIT is not set also when
"spec_rstack_overflow=ibpb-vmexit" if X86_FEATURE_ENTRY_IBPB is
already set.
That's because before [1] this was effectively redundant. Hence, e.g.
a "retbleed=ibpb spec_rstack_overflow=bpb-vmexit" config mistakenly
reports the machine still vulnerable to SRSO, despite an IBPB being
triggered both on entry and VM-Exit, because of the Retbleed selected
mitigation config.
- UNTRAIN_RET_VM won't still actually do anything unless
CONFIG_MITIGATION_IBPB_ENTRY is set.
For "spec_rstack_overflow=ibpb", enable IBPB on both entry and VM-Exit
and clear X86_FEATURE_RSB_VMEXIT which is made superfluous by
X86_FEATURE_IBPB_ON_VMEXIT. This effectively makes this mitigation
option similar to the one for 'retbleed=ibpb', thus re-order the code
for the RETBLEED_MITIGATION_IBPB option to be less confusing by having
all features enabling before the disabling of the not needed ones.
For "spec_rstack_overflow=ibpb-vmexit", guard this mitigation setting
with CONFIG_MITIGATION_IBPB_ENTRY to ensure UNTRAIN_RET_VM sequence is
effectively compiled in. Drop instead the CONFIG_MITIGATION_SRSO guard,
since none of the SRSO compile cruft is required in this configuration.
Also, check only that the required microcode is present to effectively
enabled the IBPB on VM-Exit.
Finally, update the KConfig description for CONFIG_MITIGATION_IBPB_ENTRY
to list also all SRSO config settings enabled by this guard.
Fixes: 864bcaa38ee4 ("x86/cpu/kvm: Provide UNTRAIN_RET_VM") [1]
Fixes: d893832d0e1e ("x86/srso: Add IBPB on VMEXIT") [2]
Reported-by: Yosry Ahmed <yosryahmed(a)google.com>
Signed-off-by: Patrick Bellasi <derkling(a)google.com>
Reviewed-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: stable(a)kernel.org
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 87198d957e2f..be2c311f5118 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2599,7 +2599,8 @@ config MITIGATION_IBPB_ENTRY
depends on CPU_SUP_AMD && X86_64
default y
help
- Compile the kernel with support for the retbleed=ibpb mitigation.
+ Compile the kernel with support for the retbleed=ibpb and
+ spec_rstack_overflow={ibpb,ibpb-vmexit} mitigations.
config MITIGATION_IBRS_ENTRY
bool "Enable IBRS on kernel entry"
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 5a505aa65489..a5d0998d7604 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1115,6 +1115,8 @@ static void __init retbleed_select_mitigation(void)
case RETBLEED_MITIGATION_IBPB:
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+ mitigate_smt = true;
/*
* IBPB on entry already obviates the need for
@@ -1124,9 +1126,6 @@ static void __init retbleed_select_mitigation(void)
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
- setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
- mitigate_smt = true;
-
/*
* There is no need for RSB filling: entry_ibpb() ensures
* all predictions, including the RSB, are invalidated,
@@ -2646,6 +2645,7 @@ static void __init srso_select_mitigation(void)
if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB;
/*
@@ -2655,6 +2655,13 @@ static void __init srso_select_mitigation(void)
*/
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
+
+ /*
+ * There is no need for RSB filling: entry_ibpb() ensures
+ * all predictions, including the RSB, are invalidated,
+ * regardless of IBPB implementation.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
@@ -2663,8 +2670,8 @@ static void __init srso_select_mitigation(void)
ibpb_on_vmexit:
case SRSO_CMD_IBPB_ON_VMEXIT:
- if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) {
- if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
+ if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
+ if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
@@ -2676,8 +2683,8 @@ static void __init srso_select_mitigation(void)
setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
- pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n");
- }
+ pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
+ }
break;
default:
break;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 318e8c339c9a0891c389298bb328ed0762a9935e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021721-brunch-mankind-56c2@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 318e8c339c9a0891c389298bb328ed0762a9935e Mon Sep 17 00:00:00 2001
From: Patrick Bellasi <derkling(a)google.com>
Date: Wed, 5 Feb 2025 14:04:41 +0000
Subject: [PATCH] x86/cpu/kvm: SRSO: Fix possible missing IBPB on VM-Exit
In [1] the meaning of the synthetic IBPB flags has been redefined for a
better separation of concerns:
- ENTRY_IBPB -- issue IBPB on entry only
- IBPB_ON_VMEXIT -- issue IBPB on VM-Exit only
and the Retbleed mitigations have been updated to match this new
semantics.
Commit [2] was merged shortly before [1], and their interaction was not
handled properly. This resulted in IBPB not being triggered on VM-Exit
in all SRSO mitigation configs requesting an IBPB there.
Specifically, an IBPB on VM-Exit is triggered only when
X86_FEATURE_IBPB_ON_VMEXIT is set. However:
- X86_FEATURE_IBPB_ON_VMEXIT is not set for "spec_rstack_overflow=ibpb",
because before [1] having X86_FEATURE_ENTRY_IBPB was enough. Hence,
an IBPB is triggered on entry but the expected IBPB on VM-exit is
not.
- X86_FEATURE_IBPB_ON_VMEXIT is not set also when
"spec_rstack_overflow=ibpb-vmexit" if X86_FEATURE_ENTRY_IBPB is
already set.
That's because before [1] this was effectively redundant. Hence, e.g.
a "retbleed=ibpb spec_rstack_overflow=bpb-vmexit" config mistakenly
reports the machine still vulnerable to SRSO, despite an IBPB being
triggered both on entry and VM-Exit, because of the Retbleed selected
mitigation config.
- UNTRAIN_RET_VM won't still actually do anything unless
CONFIG_MITIGATION_IBPB_ENTRY is set.
For "spec_rstack_overflow=ibpb", enable IBPB on both entry and VM-Exit
and clear X86_FEATURE_RSB_VMEXIT which is made superfluous by
X86_FEATURE_IBPB_ON_VMEXIT. This effectively makes this mitigation
option similar to the one for 'retbleed=ibpb', thus re-order the code
for the RETBLEED_MITIGATION_IBPB option to be less confusing by having
all features enabling before the disabling of the not needed ones.
For "spec_rstack_overflow=ibpb-vmexit", guard this mitigation setting
with CONFIG_MITIGATION_IBPB_ENTRY to ensure UNTRAIN_RET_VM sequence is
effectively compiled in. Drop instead the CONFIG_MITIGATION_SRSO guard,
since none of the SRSO compile cruft is required in this configuration.
Also, check only that the required microcode is present to effectively
enabled the IBPB on VM-Exit.
Finally, update the KConfig description for CONFIG_MITIGATION_IBPB_ENTRY
to list also all SRSO config settings enabled by this guard.
Fixes: 864bcaa38ee4 ("x86/cpu/kvm: Provide UNTRAIN_RET_VM") [1]
Fixes: d893832d0e1e ("x86/srso: Add IBPB on VMEXIT") [2]
Reported-by: Yosry Ahmed <yosryahmed(a)google.com>
Signed-off-by: Patrick Bellasi <derkling(a)google.com>
Reviewed-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: stable(a)kernel.org
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 87198d957e2f..be2c311f5118 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2599,7 +2599,8 @@ config MITIGATION_IBPB_ENTRY
depends on CPU_SUP_AMD && X86_64
default y
help
- Compile the kernel with support for the retbleed=ibpb mitigation.
+ Compile the kernel with support for the retbleed=ibpb and
+ spec_rstack_overflow={ibpb,ibpb-vmexit} mitigations.
config MITIGATION_IBRS_ENTRY
bool "Enable IBRS on kernel entry"
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 5a505aa65489..a5d0998d7604 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1115,6 +1115,8 @@ static void __init retbleed_select_mitigation(void)
case RETBLEED_MITIGATION_IBPB:
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+ mitigate_smt = true;
/*
* IBPB on entry already obviates the need for
@@ -1124,9 +1126,6 @@ static void __init retbleed_select_mitigation(void)
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
- setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
- mitigate_smt = true;
-
/*
* There is no need for RSB filling: entry_ibpb() ensures
* all predictions, including the RSB, are invalidated,
@@ -2646,6 +2645,7 @@ static void __init srso_select_mitigation(void)
if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB;
/*
@@ -2655,6 +2655,13 @@ static void __init srso_select_mitigation(void)
*/
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
+
+ /*
+ * There is no need for RSB filling: entry_ibpb() ensures
+ * all predictions, including the RSB, are invalidated,
+ * regardless of IBPB implementation.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
@@ -2663,8 +2670,8 @@ static void __init srso_select_mitigation(void)
ibpb_on_vmexit:
case SRSO_CMD_IBPB_ON_VMEXIT:
- if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) {
- if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
+ if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
+ if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
@@ -2676,8 +2683,8 @@ static void __init srso_select_mitigation(void)
setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
- pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n");
- }
+ pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
+ }
break;
default:
break;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 318e8c339c9a0891c389298bb328ed0762a9935e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021719-composure-nimble-a2a1@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 318e8c339c9a0891c389298bb328ed0762a9935e Mon Sep 17 00:00:00 2001
From: Patrick Bellasi <derkling(a)google.com>
Date: Wed, 5 Feb 2025 14:04:41 +0000
Subject: [PATCH] x86/cpu/kvm: SRSO: Fix possible missing IBPB on VM-Exit
In [1] the meaning of the synthetic IBPB flags has been redefined for a
better separation of concerns:
- ENTRY_IBPB -- issue IBPB on entry only
- IBPB_ON_VMEXIT -- issue IBPB on VM-Exit only
and the Retbleed mitigations have been updated to match this new
semantics.
Commit [2] was merged shortly before [1], and their interaction was not
handled properly. This resulted in IBPB not being triggered on VM-Exit
in all SRSO mitigation configs requesting an IBPB there.
Specifically, an IBPB on VM-Exit is triggered only when
X86_FEATURE_IBPB_ON_VMEXIT is set. However:
- X86_FEATURE_IBPB_ON_VMEXIT is not set for "spec_rstack_overflow=ibpb",
because before [1] having X86_FEATURE_ENTRY_IBPB was enough. Hence,
an IBPB is triggered on entry but the expected IBPB on VM-exit is
not.
- X86_FEATURE_IBPB_ON_VMEXIT is not set also when
"spec_rstack_overflow=ibpb-vmexit" if X86_FEATURE_ENTRY_IBPB is
already set.
That's because before [1] this was effectively redundant. Hence, e.g.
a "retbleed=ibpb spec_rstack_overflow=bpb-vmexit" config mistakenly
reports the machine still vulnerable to SRSO, despite an IBPB being
triggered both on entry and VM-Exit, because of the Retbleed selected
mitigation config.
- UNTRAIN_RET_VM won't still actually do anything unless
CONFIG_MITIGATION_IBPB_ENTRY is set.
For "spec_rstack_overflow=ibpb", enable IBPB on both entry and VM-Exit
and clear X86_FEATURE_RSB_VMEXIT which is made superfluous by
X86_FEATURE_IBPB_ON_VMEXIT. This effectively makes this mitigation
option similar to the one for 'retbleed=ibpb', thus re-order the code
for the RETBLEED_MITIGATION_IBPB option to be less confusing by having
all features enabling before the disabling of the not needed ones.
For "spec_rstack_overflow=ibpb-vmexit", guard this mitigation setting
with CONFIG_MITIGATION_IBPB_ENTRY to ensure UNTRAIN_RET_VM sequence is
effectively compiled in. Drop instead the CONFIG_MITIGATION_SRSO guard,
since none of the SRSO compile cruft is required in this configuration.
Also, check only that the required microcode is present to effectively
enabled the IBPB on VM-Exit.
Finally, update the KConfig description for CONFIG_MITIGATION_IBPB_ENTRY
to list also all SRSO config settings enabled by this guard.
Fixes: 864bcaa38ee4 ("x86/cpu/kvm: Provide UNTRAIN_RET_VM") [1]
Fixes: d893832d0e1e ("x86/srso: Add IBPB on VMEXIT") [2]
Reported-by: Yosry Ahmed <yosryahmed(a)google.com>
Signed-off-by: Patrick Bellasi <derkling(a)google.com>
Reviewed-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: stable(a)kernel.org
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 87198d957e2f..be2c311f5118 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2599,7 +2599,8 @@ config MITIGATION_IBPB_ENTRY
depends on CPU_SUP_AMD && X86_64
default y
help
- Compile the kernel with support for the retbleed=ibpb mitigation.
+ Compile the kernel with support for the retbleed=ibpb and
+ spec_rstack_overflow={ibpb,ibpb-vmexit} mitigations.
config MITIGATION_IBRS_ENTRY
bool "Enable IBRS on kernel entry"
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 5a505aa65489..a5d0998d7604 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1115,6 +1115,8 @@ static void __init retbleed_select_mitigation(void)
case RETBLEED_MITIGATION_IBPB:
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+ mitigate_smt = true;
/*
* IBPB on entry already obviates the need for
@@ -1124,9 +1126,6 @@ static void __init retbleed_select_mitigation(void)
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
- setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
- mitigate_smt = true;
-
/*
* There is no need for RSB filling: entry_ibpb() ensures
* all predictions, including the RSB, are invalidated,
@@ -2646,6 +2645,7 @@ static void __init srso_select_mitigation(void)
if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB;
/*
@@ -2655,6 +2655,13 @@ static void __init srso_select_mitigation(void)
*/
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
+
+ /*
+ * There is no need for RSB filling: entry_ibpb() ensures
+ * all predictions, including the RSB, are invalidated,
+ * regardless of IBPB implementation.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
@@ -2663,8 +2670,8 @@ static void __init srso_select_mitigation(void)
ibpb_on_vmexit:
case SRSO_CMD_IBPB_ON_VMEXIT:
- if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) {
- if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
+ if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
+ if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
@@ -2676,8 +2683,8 @@ static void __init srso_select_mitigation(void)
setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
- pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n");
- }
+ pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
+ }
break;
default:
break;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 318e8c339c9a0891c389298bb328ed0762a9935e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021718-puzzle-prenatal-af0e@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 318e8c339c9a0891c389298bb328ed0762a9935e Mon Sep 17 00:00:00 2001
From: Patrick Bellasi <derkling(a)google.com>
Date: Wed, 5 Feb 2025 14:04:41 +0000
Subject: [PATCH] x86/cpu/kvm: SRSO: Fix possible missing IBPB on VM-Exit
In [1] the meaning of the synthetic IBPB flags has been redefined for a
better separation of concerns:
- ENTRY_IBPB -- issue IBPB on entry only
- IBPB_ON_VMEXIT -- issue IBPB on VM-Exit only
and the Retbleed mitigations have been updated to match this new
semantics.
Commit [2] was merged shortly before [1], and their interaction was not
handled properly. This resulted in IBPB not being triggered on VM-Exit
in all SRSO mitigation configs requesting an IBPB there.
Specifically, an IBPB on VM-Exit is triggered only when
X86_FEATURE_IBPB_ON_VMEXIT is set. However:
- X86_FEATURE_IBPB_ON_VMEXIT is not set for "spec_rstack_overflow=ibpb",
because before [1] having X86_FEATURE_ENTRY_IBPB was enough. Hence,
an IBPB is triggered on entry but the expected IBPB on VM-exit is
not.
- X86_FEATURE_IBPB_ON_VMEXIT is not set also when
"spec_rstack_overflow=ibpb-vmexit" if X86_FEATURE_ENTRY_IBPB is
already set.
That's because before [1] this was effectively redundant. Hence, e.g.
a "retbleed=ibpb spec_rstack_overflow=bpb-vmexit" config mistakenly
reports the machine still vulnerable to SRSO, despite an IBPB being
triggered both on entry and VM-Exit, because of the Retbleed selected
mitigation config.
- UNTRAIN_RET_VM won't still actually do anything unless
CONFIG_MITIGATION_IBPB_ENTRY is set.
For "spec_rstack_overflow=ibpb", enable IBPB on both entry and VM-Exit
and clear X86_FEATURE_RSB_VMEXIT which is made superfluous by
X86_FEATURE_IBPB_ON_VMEXIT. This effectively makes this mitigation
option similar to the one for 'retbleed=ibpb', thus re-order the code
for the RETBLEED_MITIGATION_IBPB option to be less confusing by having
all features enabling before the disabling of the not needed ones.
For "spec_rstack_overflow=ibpb-vmexit", guard this mitigation setting
with CONFIG_MITIGATION_IBPB_ENTRY to ensure UNTRAIN_RET_VM sequence is
effectively compiled in. Drop instead the CONFIG_MITIGATION_SRSO guard,
since none of the SRSO compile cruft is required in this configuration.
Also, check only that the required microcode is present to effectively
enabled the IBPB on VM-Exit.
Finally, update the KConfig description for CONFIG_MITIGATION_IBPB_ENTRY
to list also all SRSO config settings enabled by this guard.
Fixes: 864bcaa38ee4 ("x86/cpu/kvm: Provide UNTRAIN_RET_VM") [1]
Fixes: d893832d0e1e ("x86/srso: Add IBPB on VMEXIT") [2]
Reported-by: Yosry Ahmed <yosryahmed(a)google.com>
Signed-off-by: Patrick Bellasi <derkling(a)google.com>
Reviewed-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: stable(a)kernel.org
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 87198d957e2f..be2c311f5118 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2599,7 +2599,8 @@ config MITIGATION_IBPB_ENTRY
depends on CPU_SUP_AMD && X86_64
default y
help
- Compile the kernel with support for the retbleed=ibpb mitigation.
+ Compile the kernel with support for the retbleed=ibpb and
+ spec_rstack_overflow={ibpb,ibpb-vmexit} mitigations.
config MITIGATION_IBRS_ENTRY
bool "Enable IBRS on kernel entry"
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 5a505aa65489..a5d0998d7604 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1115,6 +1115,8 @@ static void __init retbleed_select_mitigation(void)
case RETBLEED_MITIGATION_IBPB:
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+ mitigate_smt = true;
/*
* IBPB on entry already obviates the need for
@@ -1124,9 +1126,6 @@ static void __init retbleed_select_mitigation(void)
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
- setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
- mitigate_smt = true;
-
/*
* There is no need for RSB filling: entry_ibpb() ensures
* all predictions, including the RSB, are invalidated,
@@ -2646,6 +2645,7 @@ static void __init srso_select_mitigation(void)
if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB;
/*
@@ -2655,6 +2655,13 @@ static void __init srso_select_mitigation(void)
*/
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
+
+ /*
+ * There is no need for RSB filling: entry_ibpb() ensures
+ * all predictions, including the RSB, are invalidated,
+ * regardless of IBPB implementation.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
@@ -2663,8 +2670,8 @@ static void __init srso_select_mitigation(void)
ibpb_on_vmexit:
case SRSO_CMD_IBPB_ON_VMEXIT:
- if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) {
- if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
+ if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
+ if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
@@ -2676,8 +2683,8 @@ static void __init srso_select_mitigation(void)
setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
- pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n");
- }
+ pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
+ }
break;
default:
break;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 318e8c339c9a0891c389298bb328ed0762a9935e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025021717-pelt-wick-0392@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 318e8c339c9a0891c389298bb328ed0762a9935e Mon Sep 17 00:00:00 2001
From: Patrick Bellasi <derkling(a)google.com>
Date: Wed, 5 Feb 2025 14:04:41 +0000
Subject: [PATCH] x86/cpu/kvm: SRSO: Fix possible missing IBPB on VM-Exit
In [1] the meaning of the synthetic IBPB flags has been redefined for a
better separation of concerns:
- ENTRY_IBPB -- issue IBPB on entry only
- IBPB_ON_VMEXIT -- issue IBPB on VM-Exit only
and the Retbleed mitigations have been updated to match this new
semantics.
Commit [2] was merged shortly before [1], and their interaction was not
handled properly. This resulted in IBPB not being triggered on VM-Exit
in all SRSO mitigation configs requesting an IBPB there.
Specifically, an IBPB on VM-Exit is triggered only when
X86_FEATURE_IBPB_ON_VMEXIT is set. However:
- X86_FEATURE_IBPB_ON_VMEXIT is not set for "spec_rstack_overflow=ibpb",
because before [1] having X86_FEATURE_ENTRY_IBPB was enough. Hence,
an IBPB is triggered on entry but the expected IBPB on VM-exit is
not.
- X86_FEATURE_IBPB_ON_VMEXIT is not set also when
"spec_rstack_overflow=ibpb-vmexit" if X86_FEATURE_ENTRY_IBPB is
already set.
That's because before [1] this was effectively redundant. Hence, e.g.
a "retbleed=ibpb spec_rstack_overflow=bpb-vmexit" config mistakenly
reports the machine still vulnerable to SRSO, despite an IBPB being
triggered both on entry and VM-Exit, because of the Retbleed selected
mitigation config.
- UNTRAIN_RET_VM won't still actually do anything unless
CONFIG_MITIGATION_IBPB_ENTRY is set.
For "spec_rstack_overflow=ibpb", enable IBPB on both entry and VM-Exit
and clear X86_FEATURE_RSB_VMEXIT which is made superfluous by
X86_FEATURE_IBPB_ON_VMEXIT. This effectively makes this mitigation
option similar to the one for 'retbleed=ibpb', thus re-order the code
for the RETBLEED_MITIGATION_IBPB option to be less confusing by having
all features enabling before the disabling of the not needed ones.
For "spec_rstack_overflow=ibpb-vmexit", guard this mitigation setting
with CONFIG_MITIGATION_IBPB_ENTRY to ensure UNTRAIN_RET_VM sequence is
effectively compiled in. Drop instead the CONFIG_MITIGATION_SRSO guard,
since none of the SRSO compile cruft is required in this configuration.
Also, check only that the required microcode is present to effectively
enabled the IBPB on VM-Exit.
Finally, update the KConfig description for CONFIG_MITIGATION_IBPB_ENTRY
to list also all SRSO config settings enabled by this guard.
Fixes: 864bcaa38ee4 ("x86/cpu/kvm: Provide UNTRAIN_RET_VM") [1]
Fixes: d893832d0e1e ("x86/srso: Add IBPB on VMEXIT") [2]
Reported-by: Yosry Ahmed <yosryahmed(a)google.com>
Signed-off-by: Patrick Bellasi <derkling(a)google.com>
Reviewed-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Cc: stable(a)kernel.org
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 87198d957e2f..be2c311f5118 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2599,7 +2599,8 @@ config MITIGATION_IBPB_ENTRY
depends on CPU_SUP_AMD && X86_64
default y
help
- Compile the kernel with support for the retbleed=ibpb mitigation.
+ Compile the kernel with support for the retbleed=ibpb and
+ spec_rstack_overflow={ibpb,ibpb-vmexit} mitigations.
config MITIGATION_IBRS_ENTRY
bool "Enable IBRS on kernel entry"
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 5a505aa65489..a5d0998d7604 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1115,6 +1115,8 @@ static void __init retbleed_select_mitigation(void)
case RETBLEED_MITIGATION_IBPB:
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+ mitigate_smt = true;
/*
* IBPB on entry already obviates the need for
@@ -1124,9 +1126,6 @@ static void __init retbleed_select_mitigation(void)
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
- setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
- mitigate_smt = true;
-
/*
* There is no need for RSB filling: entry_ibpb() ensures
* all predictions, including the RSB, are invalidated,
@@ -2646,6 +2645,7 @@ static void __init srso_select_mitigation(void)
if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB;
/*
@@ -2655,6 +2655,13 @@ static void __init srso_select_mitigation(void)
*/
setup_clear_cpu_cap(X86_FEATURE_UNRET);
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
+
+ /*
+ * There is no need for RSB filling: entry_ibpb() ensures
+ * all predictions, including the RSB, are invalidated,
+ * regardless of IBPB implementation.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
@@ -2663,8 +2670,8 @@ static void __init srso_select_mitigation(void)
ibpb_on_vmexit:
case SRSO_CMD_IBPB_ON_VMEXIT:
- if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) {
- if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
+ if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) {
+ if (has_microcode) {
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
@@ -2676,8 +2683,8 @@ static void __init srso_select_mitigation(void)
setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
}
} else {
- pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n");
- }
+ pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n");
+ }
break;
default:
break;