The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x b6b26d86c61c441144c72f842f7469bb686e1211
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182911148202(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
b6b26d86c61c ("iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter")
1198d2316dc4 ("iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options")
bbe3a106580c ("iommu/amd: Add PCI segment support for ivrs_[ioapic/hpet/acpihid] commands")
42bb5aa04338 ("iommu/amd: Increase timeout waiting for GA log enablement")
03ebe48e235f ("Merge branches 'iommu/fixes', 'arm/renesas', 'arm/mediatek', 'arm/tegra', 'arm/omap', 'arm/smmu', 'x86/vt-d', 'x86/amd' and 'core' into next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b6b26d86c61c441144c72f842f7469bb686e1211 Mon Sep 17 00:00:00 2001
From: Gavrilov Ilia <Ilia.Gavrilov(a)infotecs.ru>
Date: Thu, 2 Feb 2023 08:26:56 +0000
Subject: [PATCH] iommu/amd: Add a length limitation for the ivrs_acpihid
command-line parameter
The 'acpiid' buffer in the parse_ivrs_acpihid function may overflow,
because the string specifier in the format string sscanf()
has no width limitation.
Found by InfoTeCS on behalf of Linux Verification Center
(linuxtesting.org) with SVACE.
Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilia.Gavrilov <Ilia.Gavrilov(a)infotecs.ru>
Reviewed-by: Kim Phillips <kim.phillips(a)amd.com>
Link: https://lore.kernel.org/r/20230202082719.1513849-1-Ilia.Gavrilov@infotecs.ru
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 467b194975b3..19a46b9f7357 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3475,15 +3475,26 @@ static int __init parse_ivrs_hpet(char *str)
return 1;
}
+#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
+
static int __init parse_ivrs_acpihid(char *str)
{
u32 seg = 0, bus, dev, fn;
char *hid, *uid, *p, *addr;
- char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
+ char acpiid[ACPIID_LEN] = {0};
int i;
addr = strchr(str, '@');
if (!addr) {
+ addr = strchr(str, '=');
+ if (!addr)
+ goto not_found;
+
+ ++addr;
+
+ if (strlen(addr) > ACPIID_LEN)
+ goto not_found;
+
if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
@@ -3496,6 +3507,9 @@ static int __init parse_ivrs_acpihid(char *str)
/* We have the '@', make it the terminator to get just the acpiid */
*addr++ = 0;
+ if (strlen(str) > ACPIID_LEN + 1)
+ goto not_found;
+
if (sscanf(str, "=%s", acpiid) != 1)
goto not_found;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x b6b26d86c61c441144c72f842f7469bb686e1211
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182911197225(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
b6b26d86c61c ("iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter")
1198d2316dc4 ("iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options")
bbe3a106580c ("iommu/amd: Add PCI segment support for ivrs_[ioapic/hpet/acpihid] commands")
42bb5aa04338 ("iommu/amd: Increase timeout waiting for GA log enablement")
03ebe48e235f ("Merge branches 'iommu/fixes', 'arm/renesas', 'arm/mediatek', 'arm/tegra', 'arm/omap', 'arm/smmu', 'x86/vt-d', 'x86/amd' and 'core' into next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b6b26d86c61c441144c72f842f7469bb686e1211 Mon Sep 17 00:00:00 2001
From: Gavrilov Ilia <Ilia.Gavrilov(a)infotecs.ru>
Date: Thu, 2 Feb 2023 08:26:56 +0000
Subject: [PATCH] iommu/amd: Add a length limitation for the ivrs_acpihid
command-line parameter
The 'acpiid' buffer in the parse_ivrs_acpihid function may overflow,
because the string specifier in the format string sscanf()
has no width limitation.
Found by InfoTeCS on behalf of Linux Verification Center
(linuxtesting.org) with SVACE.
Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilia.Gavrilov <Ilia.Gavrilov(a)infotecs.ru>
Reviewed-by: Kim Phillips <kim.phillips(a)amd.com>
Link: https://lore.kernel.org/r/20230202082719.1513849-1-Ilia.Gavrilov@infotecs.ru
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 467b194975b3..19a46b9f7357 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3475,15 +3475,26 @@ static int __init parse_ivrs_hpet(char *str)
return 1;
}
+#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
+
static int __init parse_ivrs_acpihid(char *str)
{
u32 seg = 0, bus, dev, fn;
char *hid, *uid, *p, *addr;
- char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
+ char acpiid[ACPIID_LEN] = {0};
int i;
addr = strchr(str, '@');
if (!addr) {
+ addr = strchr(str, '=');
+ if (!addr)
+ goto not_found;
+
+ ++addr;
+
+ if (strlen(addr) > ACPIID_LEN)
+ goto not_found;
+
if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
@@ -3496,6 +3507,9 @@ static int __init parse_ivrs_acpihid(char *str)
/* We have the '@', make it the terminator to get just the acpiid */
*addr++ = 0;
+ if (strlen(str) > ACPIID_LEN + 1)
+ goto not_found;
+
if (sscanf(str, "=%s", acpiid) != 1)
goto not_found;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x b6b26d86c61c441144c72f842f7469bb686e1211
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167818291020229(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
b6b26d86c61c ("iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter")
1198d2316dc4 ("iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options")
bbe3a106580c ("iommu/amd: Add PCI segment support for ivrs_[ioapic/hpet/acpihid] commands")
42bb5aa04338 ("iommu/amd: Increase timeout waiting for GA log enablement")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b6b26d86c61c441144c72f842f7469bb686e1211 Mon Sep 17 00:00:00 2001
From: Gavrilov Ilia <Ilia.Gavrilov(a)infotecs.ru>
Date: Thu, 2 Feb 2023 08:26:56 +0000
Subject: [PATCH] iommu/amd: Add a length limitation for the ivrs_acpihid
command-line parameter
The 'acpiid' buffer in the parse_ivrs_acpihid function may overflow,
because the string specifier in the format string sscanf()
has no width limitation.
Found by InfoTeCS on behalf of Linux Verification Center
(linuxtesting.org) with SVACE.
Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilia.Gavrilov <Ilia.Gavrilov(a)infotecs.ru>
Reviewed-by: Kim Phillips <kim.phillips(a)amd.com>
Link: https://lore.kernel.org/r/20230202082719.1513849-1-Ilia.Gavrilov@infotecs.ru
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 467b194975b3..19a46b9f7357 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3475,15 +3475,26 @@ static int __init parse_ivrs_hpet(char *str)
return 1;
}
+#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
+
static int __init parse_ivrs_acpihid(char *str)
{
u32 seg = 0, bus, dev, fn;
char *hid, *uid, *p, *addr;
- char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
+ char acpiid[ACPIID_LEN] = {0};
int i;
addr = strchr(str, '@');
if (!addr) {
+ addr = strchr(str, '=');
+ if (!addr)
+ goto not_found;
+
+ ++addr;
+
+ if (strlen(addr) > ACPIID_LEN)
+ goto not_found;
+
if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
@@ -3496,6 +3507,9 @@ static int __init parse_ivrs_acpihid(char *str)
/* We have the '@', make it the terminator to get just the acpiid */
*addr++ = 0;
+ if (strlen(str) > ACPIID_LEN + 1)
+ goto not_found;
+
if (sscanf(str, "=%s", acpiid) != 1)
goto not_found;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x b6b26d86c61c441144c72f842f7469bb686e1211
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182909225188(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
b6b26d86c61c ("iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter")
1198d2316dc4 ("iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options")
bbe3a106580c ("iommu/amd: Add PCI segment support for ivrs_[ioapic/hpet/acpihid] commands")
42bb5aa04338 ("iommu/amd: Increase timeout waiting for GA log enablement")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b6b26d86c61c441144c72f842f7469bb686e1211 Mon Sep 17 00:00:00 2001
From: Gavrilov Ilia <Ilia.Gavrilov(a)infotecs.ru>
Date: Thu, 2 Feb 2023 08:26:56 +0000
Subject: [PATCH] iommu/amd: Add a length limitation for the ivrs_acpihid
command-line parameter
The 'acpiid' buffer in the parse_ivrs_acpihid function may overflow,
because the string specifier in the format string sscanf()
has no width limitation.
Found by InfoTeCS on behalf of Linux Verification Center
(linuxtesting.org) with SVACE.
Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilia.Gavrilov <Ilia.Gavrilov(a)infotecs.ru>
Reviewed-by: Kim Phillips <kim.phillips(a)amd.com>
Link: https://lore.kernel.org/r/20230202082719.1513849-1-Ilia.Gavrilov@infotecs.ru
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 467b194975b3..19a46b9f7357 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3475,15 +3475,26 @@ static int __init parse_ivrs_hpet(char *str)
return 1;
}
+#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
+
static int __init parse_ivrs_acpihid(char *str)
{
u32 seg = 0, bus, dev, fn;
char *hid, *uid, *p, *addr;
- char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
+ char acpiid[ACPIID_LEN] = {0};
int i;
addr = strchr(str, '@');
if (!addr) {
+ addr = strchr(str, '=');
+ if (!addr)
+ goto not_found;
+
+ ++addr;
+
+ if (strlen(addr) > ACPIID_LEN)
+ goto not_found;
+
if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
@@ -3496,6 +3507,9 @@ static int __init parse_ivrs_acpihid(char *str)
/* We have the '@', make it the terminator to get just the acpiid */
*addr++ = 0;
+ if (strlen(str) > ACPIID_LEN + 1)
+ goto not_found;
+
if (sscanf(str, "=%s", acpiid) != 1)
goto not_found;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x b6b26d86c61c441144c72f842f7469bb686e1211
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182908118156(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
b6b26d86c61c ("iommu/amd: Add a length limitation for the ivrs_acpihid command-line parameter")
1198d2316dc4 ("iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet and ivrs_acpihid options")
bbe3a106580c ("iommu/amd: Add PCI segment support for ivrs_[ioapic/hpet/acpihid] commands")
42bb5aa04338 ("iommu/amd: Increase timeout waiting for GA log enablement")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b6b26d86c61c441144c72f842f7469bb686e1211 Mon Sep 17 00:00:00 2001
From: Gavrilov Ilia <Ilia.Gavrilov(a)infotecs.ru>
Date: Thu, 2 Feb 2023 08:26:56 +0000
Subject: [PATCH] iommu/amd: Add a length limitation for the ivrs_acpihid
command-line parameter
The 'acpiid' buffer in the parse_ivrs_acpihid function may overflow,
because the string specifier in the format string sscanf()
has no width limitation.
Found by InfoTeCS on behalf of Linux Verification Center
(linuxtesting.org) with SVACE.
Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter")
Cc: stable(a)vger.kernel.org
Signed-off-by: Ilia.Gavrilov <Ilia.Gavrilov(a)infotecs.ru>
Reviewed-by: Kim Phillips <kim.phillips(a)amd.com>
Link: https://lore.kernel.org/r/20230202082719.1513849-1-Ilia.Gavrilov@infotecs.ru
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 467b194975b3..19a46b9f7357 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3475,15 +3475,26 @@ static int __init parse_ivrs_hpet(char *str)
return 1;
}
+#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN)
+
static int __init parse_ivrs_acpihid(char *str)
{
u32 seg = 0, bus, dev, fn;
char *hid, *uid, *p, *addr;
- char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
+ char acpiid[ACPIID_LEN] = {0};
int i;
addr = strchr(str, '@');
if (!addr) {
+ addr = strchr(str, '=');
+ if (!addr)
+ goto not_found;
+
+ ++addr;
+
+ if (strlen(addr) > ACPIID_LEN)
+ goto not_found;
+
if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
@@ -3496,6 +3507,9 @@ static int __init parse_ivrs_acpihid(char *str)
/* We have the '@', make it the terminator to get just the acpiid */
*addr++ = 0;
+ if (strlen(str) > ACPIID_LEN + 1)
+ goto not_found;
+
if (sscanf(str, "=%s", acpiid) != 1)
goto not_found;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 4e7d2a8f0b52abf23b1dc13b3d88bc0923383cd5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167818283215417(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
4e7d2a8f0b52 ("ktest.pl: Add RUN_TIMEOUT option with default unlimited")
3e1d3678844b ("ktest: Add CONNECT_TIMEOUT to change the connection timeout time")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4e7d2a8f0b52abf23b1dc13b3d88bc0923383cd5 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt(a)goodmis.org>
Date: Wed, 18 Jan 2023 16:37:25 -0500
Subject: [PATCH] ktest.pl: Add RUN_TIMEOUT option with default unlimited
There is a disconnect between the run_command function and the
wait_for_input. The wait_for_input has a default timeout of 2 minutes. But
if that happens, the run_command loop will exit out to the waitpid() of
the executing command. This fails in that it no longer monitors the
command, and also, the ssh to the test box can hang when its finished, as
it's waiting for the pipe it's writing to to flush, but the loop that
reads that pipe has already exited, leaving the command stuck, and the
test hangs.
Instead, make the default "wait_for_input" of the run_command infinite,
and allow the user to override it if they want with a default timeout
option "RUN_TIMEOUT".
But this fixes the hang that happens when the pipe is full and the ssh
session never exits.
Cc: stable(a)vger.kernel.org
Fixes: 6e98d1b4415fe ("ktest: Add timeout to ssh command")
Signed-off-by: Steven Rostedt <rostedt(a)goodmis.org>
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 74801811372f..822794ca4029 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -178,6 +178,7 @@ my $store_failures;
my $store_successes;
my $test_name;
my $timeout;
+my $run_timeout;
my $connect_timeout;
my $config_bisect_exec;
my $booted_timeout;
@@ -340,6 +341,7 @@ my %option_map = (
"STORE_SUCCESSES" => \$store_successes,
"TEST_NAME" => \$test_name,
"TIMEOUT" => \$timeout,
+ "RUN_TIMEOUT" => \$run_timeout,
"CONNECT_TIMEOUT" => \$connect_timeout,
"CONFIG_BISECT_EXEC" => \$config_bisect_exec,
"BOOTED_TIMEOUT" => \$booted_timeout,
@@ -1858,6 +1860,14 @@ sub run_command {
$command =~ s/\$SSH_USER/$ssh_user/g;
$command =~ s/\$MACHINE/$machine/g;
+ if (!defined($timeout)) {
+ $timeout = $run_timeout;
+ }
+
+ if (!defined($timeout)) {
+ $timeout = -1; # tell wait_for_input to wait indefinitely
+ }
+
doprint("$command ... ");
$start_time = time;
@@ -1884,13 +1894,10 @@ sub run_command {
while (1) {
my $fp = \*CMD;
- if (defined($timeout)) {
- doprint "timeout = $timeout\n";
- }
my $line = wait_for_input($fp, $timeout);
if (!defined($line)) {
my $now = time;
- if (defined($timeout) && (($now - $start_time) >= $timeout)) {
+ if ($timeout >= 0 && (($now - $start_time) >= $timeout)) {
doprint "Hit timeout of $timeout, killing process\n";
$hit_timeout = 1;
kill 9, $pid;
@@ -2062,6 +2069,11 @@ sub wait_for_input {
$time = $timeout;
}
+ if ($time < 0) {
+ # Negative number means wait indefinitely
+ undef $time;
+ }
+
$rin = '';
vec($rin, fileno($fp), 1) = 1;
vec($rin, fileno(\*STDIN), 1) = 1;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index 2d0fe15a096d..f43477a9b857 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -817,6 +817,11 @@
# is issued instead of a reboot.
# CONNECT_TIMEOUT = 25
+# The timeout in seconds for how long to wait for any running command
+# to timeout. If not defined, it will let it go indefinitely.
+# (default undefined)
+#RUN_TIMEOUT = 600
+
# In between tests, a reboot of the box may occur, and this
# is the time to wait for the console after it stops producing
# output. Some machines may not produce a large lag on reboot
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 4fbd2f83fda0ca44a2ec6421ca3508b355b31858
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182774218237(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
4fbd2f83fda0 ("kprobes: Fix to handle forcibly unoptimized kprobes on freeing_list")
223a76b268c9 ("kprobes: Fix coding style issues")
9c89bb8e3272 ("kprobes: treewide: Cleanup the error messages for kprobes")
02afb8d6048d ("kprobe: Simplify prepare_kprobe() by dropping redundant version")
9840cfcb97fc ("Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4fbd2f83fda0ca44a2ec6421ca3508b355b31858 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
Date: Tue, 21 Feb 2023 08:49:16 +0900
Subject: [PATCH] kprobes: Fix to handle forcibly unoptimized kprobes on
freeing_list
Since forcibly unoptimized kprobes will be put on the freeing_list directly
in the unoptimize_kprobe(), do_unoptimize_kprobes() must continue to check
the freeing_list even if unoptimizing_list is empty.
This bug can happen if a kprobe is put in an instruction which is in the
middle of the jump-replaced instruction sequence of an optprobe, *and* the
optprobe is recently unregistered and queued on unoptimizing_list.
In this case, the optprobe will be unoptimized forcibly (means immediately)
and put it into the freeing_list, expecting the optprobe will be handled in
do_unoptimize_kprobe().
But if there is no other optprobes on the unoptimizing_list, current code
returns from the do_unoptimize_kprobe() soon and does not handle the
optprobe which is on the freeing_list. Then the optprobe will hit the
WARN_ON_ONCE() in the do_free_cleaned_kprobes(), because it is not handled
in the latter loop of the do_unoptimize_kprobe().
To solve this issue, do not return from do_unoptimize_kprobes() immediately
even if unoptimizing_list is empty.
Moreover, this change affects another case. kill_optimized_kprobes() expects
kprobe_optimizer() will just free the optprobe on freeing_list.
So I changed it to just do list_move() to freeing_list if optprobes are on
unoptimizing list. And the do_unoptimize_kprobe() will skip
arch_disarm_kprobe() if the probe on freeing_list has gone flag.
Link: https://lore.kernel.org/all/Y8URdIfVr3pq2X8w@xpf.sh.intel.com/
Link: https://lore.kernel.org/all/167448024501.3253718.13037333683110512967.stgit…
Fixes: e4add247789e ("kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic")
Reported-by: Pengfei Xu <pengfei.xu(a)intel.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Cc: stable(a)vger.kernel.org
Acked-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1c18ecf9f98b..6b6aff00b3b6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -555,17 +555,15 @@ static void do_unoptimize_kprobes(void)
/* See comment in do_optimize_kprobes() */
lockdep_assert_cpus_held();
- /* Unoptimization must be done anytime */
- if (list_empty(&unoptimizing_list))
- return;
+ if (!list_empty(&unoptimizing_list))
+ arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- /* Loop on 'freeing_list' for disarming */
+ /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
/* Switching from detour code to origin */
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- /* Disarm probes if marked disabled */
- if (kprobe_disabled(&op->kp))
+ /* Disarm probes if marked disabled and not gone */
+ if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp))
arch_disarm_kprobe(&op->kp);
if (kprobe_unused(&op->kp)) {
/*
@@ -797,14 +795,13 @@ static void kill_optimized_kprobe(struct kprobe *p)
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
if (kprobe_unused(p)) {
- /* Enqueue if it is unused */
- list_add(&op->list, &freeing_list);
/*
- * Remove unused probes from the hash list. After waiting
- * for synchronization, this probe is reclaimed.
- * (reclaiming is done by do_free_cleaned_kprobes().)
+ * Unused kprobe is on unoptimizing or freeing list. We move it
+ * to freeing_list and let the kprobe_optimizer() remove it from
+ * the kprobe hash list and free it.
*/
- hlist_del_rcu(&op->kp.hlist);
+ if (optprobe_queued_unopt(op))
+ list_move(&op->list, &freeing_list);
}
/* Don't touch the code, because it is already freed. */
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 4fbd2f83fda0ca44a2ec6421ca3508b355b31858
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167818277111745(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
4fbd2f83fda0 ("kprobes: Fix to handle forcibly unoptimized kprobes on freeing_list")
223a76b268c9 ("kprobes: Fix coding style issues")
9c89bb8e3272 ("kprobes: treewide: Cleanup the error messages for kprobes")
02afb8d6048d ("kprobe: Simplify prepare_kprobe() by dropping redundant version")
9840cfcb97fc ("Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4fbd2f83fda0ca44a2ec6421ca3508b355b31858 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
Date: Tue, 21 Feb 2023 08:49:16 +0900
Subject: [PATCH] kprobes: Fix to handle forcibly unoptimized kprobes on
freeing_list
Since forcibly unoptimized kprobes will be put on the freeing_list directly
in the unoptimize_kprobe(), do_unoptimize_kprobes() must continue to check
the freeing_list even if unoptimizing_list is empty.
This bug can happen if a kprobe is put in an instruction which is in the
middle of the jump-replaced instruction sequence of an optprobe, *and* the
optprobe is recently unregistered and queued on unoptimizing_list.
In this case, the optprobe will be unoptimized forcibly (means immediately)
and put it into the freeing_list, expecting the optprobe will be handled in
do_unoptimize_kprobe().
But if there is no other optprobes on the unoptimizing_list, current code
returns from the do_unoptimize_kprobe() soon and does not handle the
optprobe which is on the freeing_list. Then the optprobe will hit the
WARN_ON_ONCE() in the do_free_cleaned_kprobes(), because it is not handled
in the latter loop of the do_unoptimize_kprobe().
To solve this issue, do not return from do_unoptimize_kprobes() immediately
even if unoptimizing_list is empty.
Moreover, this change affects another case. kill_optimized_kprobes() expects
kprobe_optimizer() will just free the optprobe on freeing_list.
So I changed it to just do list_move() to freeing_list if optprobes are on
unoptimizing list. And the do_unoptimize_kprobe() will skip
arch_disarm_kprobe() if the probe on freeing_list has gone flag.
Link: https://lore.kernel.org/all/Y8URdIfVr3pq2X8w@xpf.sh.intel.com/
Link: https://lore.kernel.org/all/167448024501.3253718.13037333683110512967.stgit…
Fixes: e4add247789e ("kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic")
Reported-by: Pengfei Xu <pengfei.xu(a)intel.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Cc: stable(a)vger.kernel.org
Acked-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1c18ecf9f98b..6b6aff00b3b6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -555,17 +555,15 @@ static void do_unoptimize_kprobes(void)
/* See comment in do_optimize_kprobes() */
lockdep_assert_cpus_held();
- /* Unoptimization must be done anytime */
- if (list_empty(&unoptimizing_list))
- return;
+ if (!list_empty(&unoptimizing_list))
+ arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- /* Loop on 'freeing_list' for disarming */
+ /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
/* Switching from detour code to origin */
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- /* Disarm probes if marked disabled */
- if (kprobe_disabled(&op->kp))
+ /* Disarm probes if marked disabled and not gone */
+ if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp))
arch_disarm_kprobe(&op->kp);
if (kprobe_unused(&op->kp)) {
/*
@@ -797,14 +795,13 @@ static void kill_optimized_kprobe(struct kprobe *p)
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
if (kprobe_unused(p)) {
- /* Enqueue if it is unused */
- list_add(&op->list, &freeing_list);
/*
- * Remove unused probes from the hash list. After waiting
- * for synchronization, this probe is reclaimed.
- * (reclaiming is done by do_free_cleaned_kprobes().)
+ * Unused kprobe is on unoptimizing or freeing list. We move it
+ * to freeing_list and let the kprobe_optimizer() remove it from
+ * the kprobe hash list and free it.
*/
- hlist_del_rcu(&op->kp.hlist);
+ if (optprobe_queued_unopt(op))
+ list_move(&op->list, &freeing_list);
}
/* Don't touch the code, because it is already freed. */
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 4fbd2f83fda0ca44a2ec6421ca3508b355b31858
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182764221188(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
4fbd2f83fda0 ("kprobes: Fix to handle forcibly unoptimized kprobes on freeing_list")
223a76b268c9 ("kprobes: Fix coding style issues")
9c89bb8e3272 ("kprobes: treewide: Cleanup the error messages for kprobes")
02afb8d6048d ("kprobe: Simplify prepare_kprobe() by dropping redundant version")
9840cfcb97fc ("Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4fbd2f83fda0ca44a2ec6421ca3508b355b31858 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
Date: Tue, 21 Feb 2023 08:49:16 +0900
Subject: [PATCH] kprobes: Fix to handle forcibly unoptimized kprobes on
freeing_list
Since forcibly unoptimized kprobes will be put on the freeing_list directly
in the unoptimize_kprobe(), do_unoptimize_kprobes() must continue to check
the freeing_list even if unoptimizing_list is empty.
This bug can happen if a kprobe is put in an instruction which is in the
middle of the jump-replaced instruction sequence of an optprobe, *and* the
optprobe is recently unregistered and queued on unoptimizing_list.
In this case, the optprobe will be unoptimized forcibly (means immediately)
and put it into the freeing_list, expecting the optprobe will be handled in
do_unoptimize_kprobe().
But if there is no other optprobes on the unoptimizing_list, current code
returns from the do_unoptimize_kprobe() soon and does not handle the
optprobe which is on the freeing_list. Then the optprobe will hit the
WARN_ON_ONCE() in the do_free_cleaned_kprobes(), because it is not handled
in the latter loop of the do_unoptimize_kprobe().
To solve this issue, do not return from do_unoptimize_kprobes() immediately
even if unoptimizing_list is empty.
Moreover, this change affects another case. kill_optimized_kprobes() expects
kprobe_optimizer() will just free the optprobe on freeing_list.
So I changed it to just do list_move() to freeing_list if optprobes are on
unoptimizing list. And the do_unoptimize_kprobe() will skip
arch_disarm_kprobe() if the probe on freeing_list has gone flag.
Link: https://lore.kernel.org/all/Y8URdIfVr3pq2X8w@xpf.sh.intel.com/
Link: https://lore.kernel.org/all/167448024501.3253718.13037333683110512967.stgit…
Fixes: e4add247789e ("kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic")
Reported-by: Pengfei Xu <pengfei.xu(a)intel.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Cc: stable(a)vger.kernel.org
Acked-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1c18ecf9f98b..6b6aff00b3b6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -555,17 +555,15 @@ static void do_unoptimize_kprobes(void)
/* See comment in do_optimize_kprobes() */
lockdep_assert_cpus_held();
- /* Unoptimization must be done anytime */
- if (list_empty(&unoptimizing_list))
- return;
+ if (!list_empty(&unoptimizing_list))
+ arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- /* Loop on 'freeing_list' for disarming */
+ /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
/* Switching from detour code to origin */
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- /* Disarm probes if marked disabled */
- if (kprobe_disabled(&op->kp))
+ /* Disarm probes if marked disabled and not gone */
+ if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp))
arch_disarm_kprobe(&op->kp);
if (kprobe_unused(&op->kp)) {
/*
@@ -797,14 +795,13 @@ static void kill_optimized_kprobe(struct kprobe *p)
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
if (kprobe_unused(p)) {
- /* Enqueue if it is unused */
- list_add(&op->list, &freeing_list);
/*
- * Remove unused probes from the hash list. After waiting
- * for synchronization, this probe is reclaimed.
- * (reclaiming is done by do_free_cleaned_kprobes().)
+ * Unused kprobe is on unoptimizing or freeing list. We move it
+ * to freeing_list and let the kprobe_optimizer() remove it from
+ * the kprobe hash list and free it.
*/
- hlist_del_rcu(&op->kp.hlist);
+ if (optprobe_queued_unopt(op))
+ list_move(&op->list, &freeing_list);
}
/* Don't touch the code, because it is already freed. */
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 4fbd2f83fda0ca44a2ec6421ca3508b355b31858
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781827635613(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
4fbd2f83fda0 ("kprobes: Fix to handle forcibly unoptimized kprobes on freeing_list")
223a76b268c9 ("kprobes: Fix coding style issues")
9c89bb8e3272 ("kprobes: treewide: Cleanup the error messages for kprobes")
02afb8d6048d ("kprobe: Simplify prepare_kprobe() by dropping redundant version")
9840cfcb97fc ("Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4fbd2f83fda0ca44a2ec6421ca3508b355b31858 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
Date: Tue, 21 Feb 2023 08:49:16 +0900
Subject: [PATCH] kprobes: Fix to handle forcibly unoptimized kprobes on
freeing_list
Since forcibly unoptimized kprobes will be put on the freeing_list directly
in the unoptimize_kprobe(), do_unoptimize_kprobes() must continue to check
the freeing_list even if unoptimizing_list is empty.
This bug can happen if a kprobe is put in an instruction which is in the
middle of the jump-replaced instruction sequence of an optprobe, *and* the
optprobe is recently unregistered and queued on unoptimizing_list.
In this case, the optprobe will be unoptimized forcibly (means immediately)
and put it into the freeing_list, expecting the optprobe will be handled in
do_unoptimize_kprobe().
But if there is no other optprobes on the unoptimizing_list, current code
returns from the do_unoptimize_kprobe() soon and does not handle the
optprobe which is on the freeing_list. Then the optprobe will hit the
WARN_ON_ONCE() in the do_free_cleaned_kprobes(), because it is not handled
in the latter loop of the do_unoptimize_kprobe().
To solve this issue, do not return from do_unoptimize_kprobes() immediately
even if unoptimizing_list is empty.
Moreover, this change affects another case. kill_optimized_kprobes() expects
kprobe_optimizer() will just free the optprobe on freeing_list.
So I changed it to just do list_move() to freeing_list if optprobes are on
unoptimizing list. And the do_unoptimize_kprobe() will skip
arch_disarm_kprobe() if the probe on freeing_list has gone flag.
Link: https://lore.kernel.org/all/Y8URdIfVr3pq2X8w@xpf.sh.intel.com/
Link: https://lore.kernel.org/all/167448024501.3253718.13037333683110512967.stgit…
Fixes: e4add247789e ("kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic")
Reported-by: Pengfei Xu <pengfei.xu(a)intel.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Cc: stable(a)vger.kernel.org
Acked-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1c18ecf9f98b..6b6aff00b3b6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -555,17 +555,15 @@ static void do_unoptimize_kprobes(void)
/* See comment in do_optimize_kprobes() */
lockdep_assert_cpus_held();
- /* Unoptimization must be done anytime */
- if (list_empty(&unoptimizing_list))
- return;
+ if (!list_empty(&unoptimizing_list))
+ arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- /* Loop on 'freeing_list' for disarming */
+ /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
/* Switching from detour code to origin */
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- /* Disarm probes if marked disabled */
- if (kprobe_disabled(&op->kp))
+ /* Disarm probes if marked disabled and not gone */
+ if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp))
arch_disarm_kprobe(&op->kp);
if (kprobe_unused(&op->kp)) {
/*
@@ -797,14 +795,13 @@ static void kill_optimized_kprobe(struct kprobe *p)
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
if (kprobe_unused(p)) {
- /* Enqueue if it is unused */
- list_add(&op->list, &freeing_list);
/*
- * Remove unused probes from the hash list. After waiting
- * for synchronization, this probe is reclaimed.
- * (reclaiming is done by do_free_cleaned_kprobes().)
+ * Unused kprobe is on unoptimizing or freeing list. We move it
+ * to freeing_list and let the kprobe_optimizer() remove it from
+ * the kprobe hash list and free it.
*/
- hlist_del_rcu(&op->kp.hlist);
+ if (optprobe_queued_unopt(op))
+ list_move(&op->list, &freeing_list);
}
/* Don't touch the code, because it is already freed. */
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 4fbd2f83fda0ca44a2ec6421ca3508b355b31858
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781827629218(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
4fbd2f83fda0 ("kprobes: Fix to handle forcibly unoptimized kprobes on freeing_list")
223a76b268c9 ("kprobes: Fix coding style issues")
9c89bb8e3272 ("kprobes: treewide: Cleanup the error messages for kprobes")
02afb8d6048d ("kprobe: Simplify prepare_kprobe() by dropping redundant version")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4fbd2f83fda0ca44a2ec6421ca3508b355b31858 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
Date: Tue, 21 Feb 2023 08:49:16 +0900
Subject: [PATCH] kprobes: Fix to handle forcibly unoptimized kprobes on
freeing_list
Since forcibly unoptimized kprobes will be put on the freeing_list directly
in the unoptimize_kprobe(), do_unoptimize_kprobes() must continue to check
the freeing_list even if unoptimizing_list is empty.
This bug can happen if a kprobe is put in an instruction which is in the
middle of the jump-replaced instruction sequence of an optprobe, *and* the
optprobe is recently unregistered and queued on unoptimizing_list.
In this case, the optprobe will be unoptimized forcibly (means immediately)
and put it into the freeing_list, expecting the optprobe will be handled in
do_unoptimize_kprobe().
But if there is no other optprobes on the unoptimizing_list, current code
returns from the do_unoptimize_kprobe() soon and does not handle the
optprobe which is on the freeing_list. Then the optprobe will hit the
WARN_ON_ONCE() in the do_free_cleaned_kprobes(), because it is not handled
in the latter loop of the do_unoptimize_kprobe().
To solve this issue, do not return from do_unoptimize_kprobes() immediately
even if unoptimizing_list is empty.
Moreover, this change affects another case. kill_optimized_kprobes() expects
kprobe_optimizer() will just free the optprobe on freeing_list.
So I changed it to just do list_move() to freeing_list if optprobes are on
unoptimizing list. And the do_unoptimize_kprobe() will skip
arch_disarm_kprobe() if the probe on freeing_list has gone flag.
Link: https://lore.kernel.org/all/Y8URdIfVr3pq2X8w@xpf.sh.intel.com/
Link: https://lore.kernel.org/all/167448024501.3253718.13037333683110512967.stgit…
Fixes: e4add247789e ("kprobes: Fix optimize_kprobe()/unoptimize_kprobe() cancellation logic")
Reported-by: Pengfei Xu <pengfei.xu(a)intel.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Cc: stable(a)vger.kernel.org
Acked-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1c18ecf9f98b..6b6aff00b3b6 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -555,17 +555,15 @@ static void do_unoptimize_kprobes(void)
/* See comment in do_optimize_kprobes() */
lockdep_assert_cpus_held();
- /* Unoptimization must be done anytime */
- if (list_empty(&unoptimizing_list))
- return;
+ if (!list_empty(&unoptimizing_list))
+ arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
- /* Loop on 'freeing_list' for disarming */
+ /* Loop on 'freeing_list' for disarming and removing from kprobe hash list */
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
/* Switching from detour code to origin */
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- /* Disarm probes if marked disabled */
- if (kprobe_disabled(&op->kp))
+ /* Disarm probes if marked disabled and not gone */
+ if (kprobe_disabled(&op->kp) && !kprobe_gone(&op->kp))
arch_disarm_kprobe(&op->kp);
if (kprobe_unused(&op->kp)) {
/*
@@ -797,14 +795,13 @@ static void kill_optimized_kprobe(struct kprobe *p)
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
if (kprobe_unused(p)) {
- /* Enqueue if it is unused */
- list_add(&op->list, &freeing_list);
/*
- * Remove unused probes from the hash list. After waiting
- * for synchronization, this probe is reclaimed.
- * (reclaiming is done by do_free_cleaned_kprobes().)
+ * Unused kprobe is on unoptimizing or freeing list. We move it
+ * to freeing_list and let the kprobe_optimizer() remove it from
+ * the kprobe hash list and free it.
*/
- hlist_del_rcu(&op->kp.hlist);
+ if (optprobe_queued_unopt(op))
+ list_move(&op->list, &freeing_list);
}
/* Don't touch the code, because it is already freed. */
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 83d29d439cd3ef23041570d55841f814af2ecac0
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167818274565224(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
83d29d439cd3 ("ktest.pl: Give back console on Ctrt^C on monitor")
c2d84ddb338c ("ktest.pl: Add MAIL_COMMAND option to define how to send email")
59f89eb1e3dd ("ktest.pl: Use run_command to execute sending mail")
8604b0c4bc9a ("ktest.pl: Kill test if mailer is not supported")
be1546b87f3b ("ktest.pl: Add MAIL_PATH option to define where to find the mailer")
f5ef48855733 ("ktest.pl: No need to print no mailer is specified when mailto is not")
eaaa1e283ada ("Ktest: add email options to sample.config")
92db453e7eeb ("Ktest: Add SigInt handling")
2ceb2d85b669 ("Ktest: Add email support")
40667fb5fda0 ("ktest.pl: Make finding config-bisect.pl dynamic")
133087f0623e ("ktest.pl: Have ktest.pl pass -r to config-bisect.pl to reset bisect")
b337f9790a0c ("ktest.pl: Allow for the config-bisect.pl output to display to console")
a9adc261e978 ("ktest: Use config-bisect.pl in ktest.pl")
0f0db065999c ("ktest: Add standalone config-bisect.pl program")
3e1d3678844b ("ktest: Add CONNECT_TIMEOUT to change the connection timeout time")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 83d29d439cd3ef23041570d55841f814af2ecac0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt(a)goodmis.org>
Date: Wed, 18 Jan 2023 16:32:13 -0500
Subject: [PATCH] ktest.pl: Give back console on Ctrt^C on monitor
When monitoring the console output, the stdout is being redirected to do
so. If Ctrl^C is hit during this mode, the stdout is not back to the
console, the user does not see anything they type (no echo).
Add "end_monitor" to the SIGINT interrupt handler to give back the console
on Ctrl^C.
Cc: stable(a)vger.kernel.org
Fixes: 9f2cdcbbb90e7 ("ktest: Give console process a dedicated tty")
Signed-off-by: Steven Rostedt <rostedt(a)goodmis.org>
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 62823a4232ab..74801811372f 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -4201,6 +4201,9 @@ sub send_email {
}
sub cancel_test {
+ if ($monitor_cnt) {
+ end_monitor;
+ }
if ($email_when_canceled) {
my $name = get_test_name;
send_email("KTEST: Your [$name] test was cancelled",
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e5cfefa97bccf956ea0bb6464c1f6c84fd7a8d9f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167818241553113(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
e5cfefa97bcc ("block: fix scan partition for exclusively open device again")
0f77b29ad14e ("block: Revert "block: Do not reread partition table on exclusively open device"")
36369f46e917 ("block: Do not reread partition table on exclusively open device")
704b914f15fb ("blk-mq: move srcu from blk_mq_hw_ctx to request_queue")
2a904d00855f ("blk-mq: remove hctx_lock and hctx_unlock")
1e9c23034d7b ("blk-mq: move more plug handling from blk_mq_submit_bio into blk_add_rq_to_plug")
0c5bcc92d94a ("blk-mq: simplify the plug handling in blk_mq_submit_bio")
e16e506ccd67 ("block: merge disk_scan_partitions and blkdev_reread_part")
95febeb61bf8 ("block: fix missing queue put in error path")
b637108a4022 ("blk-mq: fix filesystem I/O request allocation")
b131f2011115 ("blk-mq: rename blk_attempt_bio_merge")
9ef4d0209cba ("blk-mq: add one API for waiting until quiesce is done")
900e08075202 ("block: move queue enter logic into blk_mq_submit_bio()")
c98cb5bbdab1 ("block: make bio_queue_enter() fast-path available inline")
71539717c105 ("block: split request allocation components into helpers")
a1cb65377e70 ("blk-mq: only try to run plug merge if request has same queue with incoming bio")
781dd830ec4f ("block: move RQF_ELV setting into allocators")
a2247f19ee1c ("block: Add independent access ranges support")
e94f68527a35 ("block: kill extra rcu lock/unlock in queue enter")
179ae84f7ef5 ("block: clean up blk_mq_submit_bio() merging")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e5cfefa97bccf956ea0bb6464c1f6c84fd7a8d9f Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Fri, 17 Feb 2023 10:22:00 +0800
Subject: [PATCH] block: fix scan partition for exclusively open device again
As explained in commit 36369f46e917 ("block: Do not reread partition table
on exclusively open device"), reread partition on the device that is
exclusively opened by someone else is problematic.
This patch will make sure partition scan will only be proceed if current
thread open the device exclusively, or the device is not opened
exclusively, and in the later case, other scanners and exclusive openers
will be blocked temporarily until partition scan is done.
Fixes: 10c70d95c0f2 ("block: remove the bd_openers checks in blk_drop_partitions")
Cc: <stable(a)vger.kernel.org>
Suggested-by: Jan Kara <jack(a)suse.cz>
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Link: https://lore.kernel.org/r/20230217022200.3092987-3-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/genhd.c b/block/genhd.c
index 820e27d88cbf..c0a73cd76bb1 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -359,6 +359,7 @@ EXPORT_SYMBOL_GPL(disk_uevent);
int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
{
struct block_device *bdev;
+ int ret = 0;
if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN))
return -EINVAL;
@@ -368,11 +369,27 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
return -EBUSY;
set_bit(GD_NEED_PART_SCAN, &disk->state);
- bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL);
+ /*
+ * If the device is opened exclusively by current thread already, it's
+ * safe to scan partitons, otherwise, use bd_prepare_to_claim() to
+ * synchronize with other exclusive openers and other partition
+ * scanners.
+ */
+ if (!(mode & FMODE_EXCL)) {
+ ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions);
+ if (ret)
+ return ret;
+ }
+
+ bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL);
if (IS_ERR(bdev))
- return PTR_ERR(bdev);
- blkdev_put(bdev, mode);
- return 0;
+ ret = PTR_ERR(bdev);
+ else
+ blkdev_put(bdev, mode);
+
+ if (!(mode & FMODE_EXCL))
+ bd_abort_claiming(disk->part0, disk_scan_partitions);
+ return ret;
}
/**
@@ -494,6 +511,11 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
if (ret)
goto out_unregister_bdi;
+ /* Make sure the first partition scan will be proceed */
+ if (get_capacity(disk) && !(disk->flags & GENHD_FL_NO_PART) &&
+ !test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
+
bdev_add(disk->part0, ddev->devt);
if (get_capacity(disk))
disk_scan_partitions(disk, FMODE_READ);
diff --git a/block/ioctl.c b/block/ioctl.c
index 6dd49d877584..9c5f637ff153 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -528,7 +528,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
return -EACCES;
if (bdev_is_partition(bdev))
return -EINVAL;
- return disk_scan_partitions(bdev->bd_disk, mode & ~FMODE_EXCL);
+ return disk_scan_partitions(bdev->bd_disk, mode);
case BLKTRACESTART:
case BLKTRACESTOP:
case BLKTRACETEARDOWN:
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x e5cfefa97bccf956ea0bb6464c1f6c84fd7a8d9f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182415126165(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
e5cfefa97bcc ("block: fix scan partition for exclusively open device again")
0f77b29ad14e ("block: Revert "block: Do not reread partition table on exclusively open device"")
36369f46e917 ("block: Do not reread partition table on exclusively open device")
704b914f15fb ("blk-mq: move srcu from blk_mq_hw_ctx to request_queue")
2a904d00855f ("blk-mq: remove hctx_lock and hctx_unlock")
1e9c23034d7b ("blk-mq: move more plug handling from blk_mq_submit_bio into blk_add_rq_to_plug")
0c5bcc92d94a ("blk-mq: simplify the plug handling in blk_mq_submit_bio")
e16e506ccd67 ("block: merge disk_scan_partitions and blkdev_reread_part")
95febeb61bf8 ("block: fix missing queue put in error path")
b637108a4022 ("blk-mq: fix filesystem I/O request allocation")
b131f2011115 ("blk-mq: rename blk_attempt_bio_merge")
9ef4d0209cba ("blk-mq: add one API for waiting until quiesce is done")
900e08075202 ("block: move queue enter logic into blk_mq_submit_bio()")
c98cb5bbdab1 ("block: make bio_queue_enter() fast-path available inline")
71539717c105 ("block: split request allocation components into helpers")
a1cb65377e70 ("blk-mq: only try to run plug merge if request has same queue with incoming bio")
781dd830ec4f ("block: move RQF_ELV setting into allocators")
a2247f19ee1c ("block: Add independent access ranges support")
e94f68527a35 ("block: kill extra rcu lock/unlock in queue enter")
179ae84f7ef5 ("block: clean up blk_mq_submit_bio() merging")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e5cfefa97bccf956ea0bb6464c1f6c84fd7a8d9f Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3(a)huawei.com>
Date: Fri, 17 Feb 2023 10:22:00 +0800
Subject: [PATCH] block: fix scan partition for exclusively open device again
As explained in commit 36369f46e917 ("block: Do not reread partition table
on exclusively open device"), reread partition on the device that is
exclusively opened by someone else is problematic.
This patch will make sure partition scan will only be proceed if current
thread open the device exclusively, or the device is not opened
exclusively, and in the later case, other scanners and exclusive openers
will be blocked temporarily until partition scan is done.
Fixes: 10c70d95c0f2 ("block: remove the bd_openers checks in blk_drop_partitions")
Cc: <stable(a)vger.kernel.org>
Suggested-by: Jan Kara <jack(a)suse.cz>
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Link: https://lore.kernel.org/r/20230217022200.3092987-3-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/block/genhd.c b/block/genhd.c
index 820e27d88cbf..c0a73cd76bb1 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -359,6 +359,7 @@ EXPORT_SYMBOL_GPL(disk_uevent);
int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
{
struct block_device *bdev;
+ int ret = 0;
if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN))
return -EINVAL;
@@ -368,11 +369,27 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
return -EBUSY;
set_bit(GD_NEED_PART_SCAN, &disk->state);
- bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL);
+ /*
+ * If the device is opened exclusively by current thread already, it's
+ * safe to scan partitons, otherwise, use bd_prepare_to_claim() to
+ * synchronize with other exclusive openers and other partition
+ * scanners.
+ */
+ if (!(mode & FMODE_EXCL)) {
+ ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions);
+ if (ret)
+ return ret;
+ }
+
+ bdev = blkdev_get_by_dev(disk_devt(disk), mode & ~FMODE_EXCL, NULL);
if (IS_ERR(bdev))
- return PTR_ERR(bdev);
- blkdev_put(bdev, mode);
- return 0;
+ ret = PTR_ERR(bdev);
+ else
+ blkdev_put(bdev, mode);
+
+ if (!(mode & FMODE_EXCL))
+ bd_abort_claiming(disk->part0, disk_scan_partitions);
+ return ret;
}
/**
@@ -494,6 +511,11 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
if (ret)
goto out_unregister_bdi;
+ /* Make sure the first partition scan will be proceed */
+ if (get_capacity(disk) && !(disk->flags & GENHD_FL_NO_PART) &&
+ !test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
+ set_bit(GD_NEED_PART_SCAN, &disk->state);
+
bdev_add(disk->part0, ddev->devt);
if (get_capacity(disk))
disk_scan_partitions(disk, FMODE_READ);
diff --git a/block/ioctl.c b/block/ioctl.c
index 6dd49d877584..9c5f637ff153 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -528,7 +528,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
return -EACCES;
if (bdev_is_partition(bdev))
return -EINVAL;
- return disk_scan_partitions(bdev->bd_disk, mode & ~FMODE_EXCL);
+ return disk_scan_partitions(bdev->bd_disk, mode);
case BLKTRACESTART:
case BLKTRACESTOP:
case BLKTRACETEARDOWN:
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 80d2c29e09e663761c2778167a625b25ffe01b6f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182329223204(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
80d2c29e09e6 ("regulator: core: Use ktime_get_boottime() to determine how long a regulator was off")
218320fec294 ("regulator: core: Fix off-on-delay-us for always-on/boot-on regulators")
261f06315cf7 ("regulator: Flag uncontrollable regulators as always_on")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 80d2c29e09e663761c2778167a625b25ffe01b6f Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka(a)chromium.org>
Date: Thu, 23 Feb 2023 00:33:30 +0000
Subject: [PATCH] regulator: core: Use ktime_get_boottime() to determine how
long a regulator was off
For regulators with 'off-on-delay-us' the regulator framework currently
uses ktime_get() to determine how long the regulator has been off
before re-enabling it (after a delay if needed). A problem with using
ktime_get() is that it doesn't account for the time the system is
suspended. As a result a regulator with a longer 'off-on-delay' (e.g.
500ms) that was switched off during suspend might still incurr in a
delay on resume before it is re-enabled, even though the regulator
might have been off for hours. ktime_get_boottime() accounts for
suspend time, use it instead of ktime_get().
Fixes: a8ce7bd89689 ("regulator: core: Fix off_on_delay handling")
Cc: stable(a)vger.kernel.org # 5.13+
Signed-off-by: Matthias Kaehlcke <mka(a)chromium.org>
Reviewed-by: Stephen Boyd <swboyd(a)chromium.org>
Link: https://lore.kernel.org/r/20230223003301.v2.1.I9719661b8eb0a73b8c416f9c26cf…
Signed-off-by: Mark Brown <broonie(a)kernel.org>
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index ae69e493913d..4fcd36055b02 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1584,7 +1584,7 @@ static int set_machine_constraints(struct regulator_dev *rdev)
}
if (rdev->desc->off_on_delay)
- rdev->last_off = ktime_get();
+ rdev->last_off = ktime_get_boottime();
/* If the constraints say the regulator should be on at this point
* and we have control then make sure it is enabled.
@@ -2673,7 +2673,7 @@ static int _regulator_do_enable(struct regulator_dev *rdev)
* this regulator was disabled.
*/
ktime_t end = ktime_add_us(rdev->last_off, rdev->desc->off_on_delay);
- s64 remaining = ktime_us_delta(end, ktime_get());
+ s64 remaining = ktime_us_delta(end, ktime_get_boottime());
if (remaining > 0)
_regulator_delay_helper(remaining);
@@ -2912,7 +2912,7 @@ static int _regulator_do_disable(struct regulator_dev *rdev)
}
if (rdev->desc->off_on_delay)
- rdev->last_off = ktime_get();
+ rdev->last_off = ktime_get_boottime();
trace_regulator_disable_complete(rdev_get_name(rdev));
Hi,
This series of two patches fixes the issue introduced in
cf586021642d80 ("drm/i915/gt: Pipelined page migration") where,
as reported by Matt, in a chain of requests an error is reported
only if happens in the last request.
However Chris noticed that without ensuring exclusivity in the
locking we might end up in some deadlock. That's why patch 1
throttles for the ringspace in order to make sure that no one is
holding it.
Version 1 of this patch has been reviewed by matt and this
version is adding Chris exclusive locking.
Thanks Chris for this work.
Andi
Changelog
=========
v1 -> v2
- Add patch 1 for ensuring exclusive locking of the timeline
- Reword git commit of patch 2.
Andi Shyti (1):
drm/i915/gt: Make sure that errors are propagated through request
chains
Chris Wilson (1):
drm/i915: Throttle for ringspace prior to taking the timeline mutex
drivers/gpu/drm/i915/gt/intel_context.c | 41 +++++++++++++++++++++++++
drivers/gpu/drm/i915/gt/intel_context.h | 2 ++
drivers/gpu/drm/i915/gt/intel_migrate.c | 39 +++++++++++++++++------
drivers/gpu/drm/i915/i915_request.c | 3 ++
4 files changed, 75 insertions(+), 10 deletions(-)
--
2.39.1
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e74a68468062d7ebd8ce17069e12ccc64cc6a58c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781822685041(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
e74a68468062 ("arm64: Reset KASAN tag in copy_highpage with HW tags only")
d77e59a8fccd ("arm64: mte: Lock a page for MTE tag initialisation")
e059853d14ca ("arm64: mte: Fix/clarify the PG_mte_tagged semantics")
a8e5e5146ad0 ("arm64: mte: Avoid setting PG_mte_tagged if no tags cleared or restored")
20794545c146 ("arm64: kasan: Revert "arm64: mte: reset the page tag in page->flags"")
70c248aca9e7 ("mm: kasan: Skip unpoisoning of user pages")
da08e9b79323 ("mm/shmem: convert shmem_swapin_page() to shmem_swapin_folio()")
b1d0ec3a9a25 ("mm/shmem: convert shmem_getpage_gfp to use a folio")
72827e5c2bcb ("mm/shmem: convert shmem_alloc_and_acct_page to use a folio")
069d849cde3a ("mm/shmem: turn shmem_should_replace_page into shmem_should_replace_folio")
b7dd44a12cf2 ("mm/shmem: convert shmem_add_to_page_cache to take a folio")
dfe98499ef28 ("shmem: convert shmem_alloc_hugepage() to use vma_alloc_folio()")
e9d0ca922816 ("kasan, page_alloc: rework kasan_unpoison_pages call site")
7e3cbba65de2 ("kasan, page_alloc: move kernel_init_free_pages in post_alloc_hook")
89b271163328 ("kasan, page_alloc: move SetPageSkipKASanPoison in post_alloc_hook")
9294b1281d0a ("kasan, page_alloc: combine tag_clear_highpage calls in post_alloc_hook")
b42090ae6f3a ("kasan, page_alloc: merge kasan_alloc_pages into post_alloc_hook")
b8491b9052fe ("kasan, page_alloc: refactor init checks in post_alloc_hook")
1c0e5b24f117 ("kasan: only apply __GFP_ZEROTAGS when memory is zeroed")
7c13c163e036 ("kasan, page_alloc: merge kasan_free_pages into free_pages_prepare")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e74a68468062d7ebd8ce17069e12ccc64cc6a58c Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc(a)google.com>
Date: Tue, 14 Feb 2023 21:09:11 -0800
Subject: [PATCH] arm64: Reset KASAN tag in copy_highpage with HW tags only
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
During page migration, the copy_highpage function is used to copy the
page data to the target page. If the source page is a userspace page
with MTE tags, the KASAN tag of the target page must have the match-all
tag in order to avoid tag check faults during subsequent accesses to the
page by the kernel. However, the target page may have been allocated in
a number of ways, some of which will use the KASAN allocator and will
therefore end up setting the KASAN tag to a non-match-all tag. Therefore,
update the target page's KASAN tag to match the source page.
We ended up unintentionally fixing this issue as a result of a bad
merge conflict resolution between commit e059853d14ca ("arm64: mte:
Fix/clarify the PG_mte_tagged semantics") and commit 20794545c146 ("arm64:
kasan: Revert "arm64: mte: reset the page tag in page->flags""), which
preserved a tag reset for PG_mte_tagged pages which was considered to be
unnecessary at the time. Because SW tags KASAN uses separate tag storage,
update the code to only reset the tags when HW tags KASAN is enabled.
Signed-off-by: Peter Collingbourne <pcc(a)google.com>
Link: https://linux-review.googlesource.com/id/If303d8a709438d3ff5af5fd8570650583…
Reported-by: "Kuan-Ying Lee (李冠穎)" <Kuan-Ying.Lee(a)mediatek.com>
Cc: <stable(a)vger.kernel.org> # 6.1
Fixes: 20794545c146 ("arm64: kasan: Revert "arm64: mte: reset the page tag in page->flags"")
Reviewed-by: Andrey Konovalov <andreyknvl(a)gmail.com>
Link: https://lore.kernel.org/r/20230215050911.1433132-1-pcc@google.com
Signed-off-by: Catalin Marinas <catalin.marinas(a)arm.com>
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 8dd5a8fe64b4..4aadcfb01754 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -22,7 +22,8 @@ void copy_highpage(struct page *to, struct page *from)
copy_page(kto, kfrom);
if (system_supports_mte() && page_mte_tagged(from)) {
- page_kasan_tag_reset(to);
+ if (kasan_hw_tags_enabled())
+ page_kasan_tag_reset(to);
/* It's a new page, shouldn't have been tagged yet */
WARN_ON_ONCE(!try_page_mte_tagging(to));
mte_copy_page_tags(kto, kfrom);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 33e2c595e2e4016991ead44933a29d1ef93d5f26
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781821949666(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
33e2c595e2e4 ("ARM: dts: exynos: correct TMU phandle in Exynos5250")
7e86ef5cc896 ("ARM: dts: exynos: Override thermal by label in Exynos5250")
be00300147ae ("ARM: dts: exynos: Move pmu and timer nodes out of soc")
670734f55810 ("ARM: dts: exynos: Add all CPUs in cooling maps")
eb9e16d8573e ("ARM: dts: exynos: Convert exynos5250.dtsi to opp-v2 bindings")
672f33198bee ("arm: dts: exynos: Add missing cooling device properties for CPUs")
c769eaf7a85d ("ARM: dts: exynos: Split Trats2 DTS in preparation for Midas boards")
cd6f55457eb4 ("ARM: dts: exynos: Remove "cooling-{min|max}-level" for CPU nodes")
73a901d09a21 ("ARM: dts: exynos: Add soc node to exynos4")
9097b4bd9fce ("ARM: dts: exynos: Use pmu label in exynos4412")
8ce5c46d0230 ("ARM: dts: exynos: Use labels instead of full paths in exynos4412-trats2")
e030be47ac48 ("ARM: dts: exynos: Use labels instead of full paths in exynos4210")
e58864515240 ("ARM: dts: exynos: Use pinctrl labels in exynos4412-pinctrl")
88c166cec136 ("ARM: dts: exynos: Use pinctrl labels in exynos4210-pinctrl")
3be1ecf291df ("ARM: dts: exynos: Use lower case hex addresses in node unit addresses")
6351fe9375f4 ("ARM: dts: exynos: Add G3D power domain to Exynos5250")
c0d40bb3ac71 ("ARM: dts: exynos: Add audio power domain to Exynos5250")
9fbb4c096323 ("ARM: dts: exynos: Fix power domain node names for Exynos5250")
528832d4c01a ("ARM: dts: exynos: Add audio power domain support to Exynos542x SoCs")
cdd745c8c76b ("ARM: dts: exynos: Remove duplicate definitions of SSS nodes for Exynos5")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33e2c595e2e4016991ead44933a29d1ef93d5f26 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Thu, 9 Feb 2023 11:58:38 +0100
Subject: [PATCH] ARM: dts: exynos: correct TMU phandle in Exynos5250
TMU node uses 0 as thermal-sensor-cells, thus thermal zone referencing
it must not have an argument to phandle.
Cc: <stable(a)vger.kernel.org>
Fixes: 9843a2236003 ("ARM: dts: Provide dt bindings identical for Exynos TMU")
Link: https://lore.kernel.org/r/20230209105841.779596-3-krzysztof.kozlowski@linar…
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index 4708dcd575a7..01751706ff96 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -1107,7 +1107,7 @@
&cpu_thermal {
polling-delay-passive = <0>;
polling-delay = <0>;
- thermal-sensors = <&tmu 0>;
+ thermal-sensors = <&tmu>;
cooling-maps {
map0 {
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 33e2c595e2e4016991ead44933a29d1ef93d5f26
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182193224134(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
33e2c595e2e4 ("ARM: dts: exynos: correct TMU phandle in Exynos5250")
7e86ef5cc896 ("ARM: dts: exynos: Override thermal by label in Exynos5250")
be00300147ae ("ARM: dts: exynos: Move pmu and timer nodes out of soc")
670734f55810 ("ARM: dts: exynos: Add all CPUs in cooling maps")
eb9e16d8573e ("ARM: dts: exynos: Convert exynos5250.dtsi to opp-v2 bindings")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33e2c595e2e4016991ead44933a29d1ef93d5f26 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Thu, 9 Feb 2023 11:58:38 +0100
Subject: [PATCH] ARM: dts: exynos: correct TMU phandle in Exynos5250
TMU node uses 0 as thermal-sensor-cells, thus thermal zone referencing
it must not have an argument to phandle.
Cc: <stable(a)vger.kernel.org>
Fixes: 9843a2236003 ("ARM: dts: Provide dt bindings identical for Exynos TMU")
Link: https://lore.kernel.org/r/20230209105841.779596-3-krzysztof.kozlowski@linar…
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index 4708dcd575a7..01751706ff96 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -1107,7 +1107,7 @@
&cpu_thermal {
polling-delay-passive = <0>;
polling-delay = <0>;
- thermal-sensors = <&tmu 0>;
+ thermal-sensors = <&tmu>;
cooling-maps {
map0 {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 33e2c595e2e4016991ead44933a29d1ef93d5f26
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167818219214813(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
33e2c595e2e4 ("ARM: dts: exynos: correct TMU phandle in Exynos5250")
7e86ef5cc896 ("ARM: dts: exynos: Override thermal by label in Exynos5250")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 33e2c595e2e4016991ead44933a29d1ef93d5f26 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Thu, 9 Feb 2023 11:58:38 +0100
Subject: [PATCH] ARM: dts: exynos: correct TMU phandle in Exynos5250
TMU node uses 0 as thermal-sensor-cells, thus thermal zone referencing
it must not have an argument to phandle.
Cc: <stable(a)vger.kernel.org>
Fixes: 9843a2236003 ("ARM: dts: Provide dt bindings identical for Exynos TMU")
Link: https://lore.kernel.org/r/20230209105841.779596-3-krzysztof.kozlowski@linar…
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index 4708dcd575a7..01751706ff96 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -1107,7 +1107,7 @@
&cpu_thermal {
polling-delay-passive = <0>;
polling-delay = <0>;
- thermal-sensors = <&tmu 0>;
+ thermal-sensors = <&tmu>;
cooling-maps {
map0 {
The interconnect framework currently expects that providers are only
removed when there are no users and after all nodes have been removed.
There is currently nothing that guarantees this to be the case and the
framework does not do any reference counting, but refusing to remove the
provider is never correct as that would leave a dangling pointer to a
resource that is about to be released in the global provider list (e.g.
accessible through debugfs).
Replace the current sanity checks with WARN_ON() so that the provider is
always removed.
Fixes: 11f1ceca7031 ("interconnect: Add generic on-chip interconnect API")
Cc: stable(a)vger.kernel.org # 5.1: 680f8666baf6: interconnect: Make icc_provider_del() return void
Reviewed-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/interconnect/core.c | 14 ++------------
1 file changed, 2 insertions(+), 12 deletions(-)
diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 5217f449eeec..cabb6f5df83e 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -1065,18 +1065,8 @@ EXPORT_SYMBOL_GPL(icc_provider_add);
void icc_provider_del(struct icc_provider *provider)
{
mutex_lock(&icc_lock);
- if (provider->users) {
- pr_warn("interconnect provider still has %d users\n",
- provider->users);
- mutex_unlock(&icc_lock);
- return;
- }
-
- if (!list_empty(&provider->nodes)) {
- pr_warn("interconnect provider still has nodes\n");
- mutex_unlock(&icc_lock);
- return;
- }
+ WARN_ON(provider->users);
+ WARN_ON(!list_empty(&provider->nodes));
list_del(&provider->provider_list);
mutex_unlock(&icc_lock);
--
2.39.2
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 408ab6786dbf6dd696488054c9559681112ef994
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182153823(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
408ab6786dbf ("ARM: dts: exynos: correct TMU phandle in Exynos4210")
1708f56081e2 ("ARM: dts: exynos: Override thermal by label in Exynos4210")
c31b11c3eb4d ("ARM: dts: exynos: Fix language typo and indentation")
9a8665ab920b ("ARM: dts: exynos: Add soc node to exynos4210")
e030be47ac48 ("ARM: dts: exynos: Use labels instead of full paths in exynos4210")
88c166cec136 ("ARM: dts: exynos: Use pinctrl labels in exynos4210-pinctrl")
3be1ecf291df ("ARM: dts: exynos: Use lower case hex addresses in node unit addresses")
6351fe9375f4 ("ARM: dts: exynos: Add G3D power domain to Exynos5250")
c0d40bb3ac71 ("ARM: dts: exynos: Add audio power domain to Exynos5250")
9fbb4c096323 ("ARM: dts: exynos: Fix power domain node names for Exynos5250")
528832d4c01a ("ARM: dts: exynos: Add audio power domain support to Exynos542x SoCs")
cdd745c8c76b ("ARM: dts: exynos: Remove duplicate definitions of SSS nodes for Exynos5")
8dccafaa281a ("arm: dts: fix unit-address leading 0s")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 408ab6786dbf6dd696488054c9559681112ef994 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Thu, 9 Feb 2023 11:58:37 +0100
Subject: [PATCH] ARM: dts: exynos: correct TMU phandle in Exynos4210
TMU node uses 0 as thermal-sensor-cells, thus thermal zone referencing
it must not have an argument to phandle. Since thermal-sensors property is
already defined in included exynos4-cpu-thermal.dtsi, drop it from
exynos4210.dtsi to fix the error and remoev redundancy.
Fixes: 9843a2236003 ("ARM: dts: Provide dt bindings identical for Exynos TMU")
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20230209105841.779596-2-krzysztof.kozlowski@linar…
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi
index 2c25cc37934e..f8c6c5d1906a 100644
--- a/arch/arm/boot/dts/exynos4210.dtsi
+++ b/arch/arm/boot/dts/exynos4210.dtsi
@@ -393,7 +393,6 @@
&cpu_thermal {
polling-delay-passive = <0>;
polling-delay = <0>;
- thermal-sensors = <&tmu 0>;
};
&gic {
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 408ab6786dbf6dd696488054c9559681112ef994
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678182152118124(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
408ab6786dbf ("ARM: dts: exynos: correct TMU phandle in Exynos4210")
1708f56081e2 ("ARM: dts: exynos: Override thermal by label in Exynos4210")
c31b11c3eb4d ("ARM: dts: exynos: Fix language typo and indentation")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 408ab6786dbf6dd696488054c9559681112ef994 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Thu, 9 Feb 2023 11:58:37 +0100
Subject: [PATCH] ARM: dts: exynos: correct TMU phandle in Exynos4210
TMU node uses 0 as thermal-sensor-cells, thus thermal zone referencing
it must not have an argument to phandle. Since thermal-sensors property is
already defined in included exynos4-cpu-thermal.dtsi, drop it from
exynos4210.dtsi to fix the error and remoev redundancy.
Fixes: 9843a2236003 ("ARM: dts: Provide dt bindings identical for Exynos TMU")
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20230209105841.779596-2-krzysztof.kozlowski@linar…
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi
index 2c25cc37934e..f8c6c5d1906a 100644
--- a/arch/arm/boot/dts/exynos4210.dtsi
+++ b/arch/arm/boot/dts/exynos4210.dtsi
@@ -393,7 +393,6 @@
&cpu_thermal {
polling-delay-passive = <0>;
polling-delay = <0>;
- thermal-sensors = <&tmu 0>;
};
&gic {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 408ab6786dbf6dd696488054c9559681112ef994
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781821519722(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
408ab6786dbf ("ARM: dts: exynos: correct TMU phandle in Exynos4210")
1708f56081e2 ("ARM: dts: exynos: Override thermal by label in Exynos4210")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 408ab6786dbf6dd696488054c9559681112ef994 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
Date: Thu, 9 Feb 2023 11:58:37 +0100
Subject: [PATCH] ARM: dts: exynos: correct TMU phandle in Exynos4210
TMU node uses 0 as thermal-sensor-cells, thus thermal zone referencing
it must not have an argument to phandle. Since thermal-sensors property is
already defined in included exynos4-cpu-thermal.dtsi, drop it from
exynos4210.dtsi to fix the error and remoev redundancy.
Fixes: 9843a2236003 ("ARM: dts: Provide dt bindings identical for Exynos TMU")
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20230209105841.779596-2-krzysztof.kozlowski@linar…
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org>
diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi
index 2c25cc37934e..f8c6c5d1906a 100644
--- a/arch/arm/boot/dts/exynos4210.dtsi
+++ b/arch/arm/boot/dts/exynos4210.dtsi
@@ -393,7 +393,6 @@
&cpu_thermal {
polling-delay-passive = <0>;
polling-delay = <0>;
- thermal-sensors = <&tmu 0>;
};
&gic {
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x f0f0cfdc3a024e21161714f2e05f0df3b84d42ad
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678181773130199(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
f0f0cfdc3a02 ("mtd: spi-nor: Fix shift-out-of-bounds in spi_nor_set_erase_type")
cb481b92d10f ("mtd: spi-nor: Move SFDP logic out of the core")
a0900d0195d2 ("mtd: spi-nor: Prepare core / manufacturer code split")
69228a0224c5 ("Merge tag 'mtk-mtd-spi-move' into spi-nor/next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f0f0cfdc3a024e21161714f2e05f0df3b84d42ad Mon Sep 17 00:00:00 2001
From: Louis Rannou <lrannou(a)baylibre.com>
Date: Fri, 3 Feb 2023 09:07:54 +0200
Subject: [PATCH] mtd: spi-nor: Fix shift-out-of-bounds in
spi_nor_set_erase_type
spi_nor_set_erase_type() was used either to set or to mask out an erase
type. When we used it to mask out an erase type a shift-out-of-bounds
was hit:
UBSAN: shift-out-of-bounds in drivers/mtd/spi-nor/core.c:2237:24
shift exponent 4294967295 is too large for 32-bit type 'int'
The setting of the size_{shift, mask} and of the opcode are unnecessary
when the erase size is zero, as throughout the code just the erase size
is considered to determine whether an erase type is supported or not.
Setting the opcode to 0xFF was wrong too as nobody guarantees that 0xFF
is an unused opcode. Thus when masking out an erase type, just set the
erase size to zero. This will fix the shift-out-of-bounds.
Fixes: 5390a8df769e ("mtd: spi-nor: add support to non-uniform SFDP SPI NOR flash memories")
Cc: stable(a)vger.kernel.org
Reported-by: Alexander Stein <Alexander.Stein(a)tq-group.com>
Signed-off-by: Louis Rannou <lrannou(a)baylibre.com>
Tested-by: Alexander Stein <Alexander.Stein(a)tq-group.com>
Link: https://lore.kernel.org/r/20230203070754.50677-1-tudor.ambarus@linaro.org
[ta: refine changes, new commit message, fix compilation error]
Signed-off-by: Tudor Ambarus <tudor.ambarus(a)linaro.org>
diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
index b500655f7937..3012758bbafa 100644
--- a/drivers/mtd/spi-nor/core.c
+++ b/drivers/mtd/spi-nor/core.c
@@ -2026,6 +2026,15 @@ void spi_nor_set_erase_type(struct spi_nor_erase_type *erase, u32 size,
erase->size_mask = (1 << erase->size_shift) - 1;
}
+/**
+ * spi_nor_mask_erase_type() - mask out a SPI NOR erase type
+ * @erase: pointer to a structure that describes a SPI NOR erase type
+ */
+void spi_nor_mask_erase_type(struct spi_nor_erase_type *erase)
+{
+ erase->size = 0;
+}
+
/**
* spi_nor_init_uniform_erase_map() - Initialize uniform erase map
* @map: the erase map of the SPI NOR
diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h
index f6d012e1f681..25423225c29d 100644
--- a/drivers/mtd/spi-nor/core.h
+++ b/drivers/mtd/spi-nor/core.h
@@ -681,6 +681,7 @@ void spi_nor_set_pp_settings(struct spi_nor_pp_command *pp, u8 opcode,
void spi_nor_set_erase_type(struct spi_nor_erase_type *erase, u32 size,
u8 opcode);
+void spi_nor_mask_erase_type(struct spi_nor_erase_type *erase);
struct spi_nor_erase_region *
spi_nor_region_next(struct spi_nor_erase_region *region);
void spi_nor_init_uniform_erase_map(struct spi_nor_erase_map *map,
diff --git a/drivers/mtd/spi-nor/sfdp.c b/drivers/mtd/spi-nor/sfdp.c
index 3acc01c3a900..526c5d57b905 100644
--- a/drivers/mtd/spi-nor/sfdp.c
+++ b/drivers/mtd/spi-nor/sfdp.c
@@ -875,7 +875,7 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
*/
for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++)
if (!(regions_erase_type & BIT(erase[i].idx)))
- spi_nor_set_erase_type(&erase[i], 0, 0xFF);
+ spi_nor_mask_erase_type(&erase[i]);
return 0;
}
@@ -1089,7 +1089,7 @@ static int spi_nor_parse_4bait(struct spi_nor *nor,
erase_type[i].opcode = (dwords[SFDP_DWORD(2)] >>
erase_type[i].idx * 8) & 0xFF;
else
- spi_nor_set_erase_type(&erase_type[i], 0u, 0xFF);
+ spi_nor_mask_erase_type(&erase_type[i]);
}
/*
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x b5d3ae202fbfe055aa2a8ae8524531ee1dcab717
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16780996985892(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
b5d3ae202fbf ("io_uring: handle TIF_NOTIFY_RESUME when checking for task_work")
7cfe7a09489c ("io_uring: clear TIF_NOTIFY_SIGNAL if set and task_work not available")
46a525e199e4 ("io_uring: don't gate task_work run on TIF_NOTIFY_SIGNAL")
c0e0d6ba25f1 ("io_uring: add IORING_SETUP_DEFER_TASKRUN")
b4c98d59a787 ("io_uring: introduce io_has_work")
78a861b94959 ("io_uring: add sync cancelation API through io_uring_register()")
c34398a8c018 ("io_uring: remove __io_req_task_work_add")
ed5ccb3beeba ("io_uring: remove priority tw list optimisation")
625d38b3fd34 ("io_uring: improve io_run_task_work()")
4a0fef62788b ("io_uring: optimize io_uring_task layout")
253993210bd8 ("io_uring: introduce locking helpers for CQE posting")
305bef988708 ("io_uring: hide eventfd assumptions in eventfd paths")
affa87db9010 ("io_uring: fix multi ctx cancellation")
d9dee4302a7c ("io_uring: remove ->flush_cqes optimisation")
a830ffd28780 ("io_uring: move io_eventfd_signal()")
9046c6415be6 ("io_uring: reshuffle io_uring/io_uring.h")
d142c3ec8d16 ("io_uring: remove extra io_commit_cqring()")
ab1c84d855cf ("io_uring: make io_uring_types.h public")
68494a65d0e2 ("io_uring: introduce io_req_cqe_overflow()")
faf88dde060f ("io_uring: don't inline __io_get_cqe()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From b5d3ae202fbfe055aa2a8ae8524531ee1dcab717 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Tue, 24 Jan 2023 08:24:25 -0700
Subject: [PATCH] io_uring: handle TIF_NOTIFY_RESUME when checking for
task_work
If TIF_NOTIFY_RESUME is set, then we need to call resume_user_mode_work()
for PF_IO_WORKER threads. They never return to usermode, hence never get
a chance to process any items that are marked by this flag. Most notably
this includes the final put of files, but also any throttling markers set
by block cgroups.
Cc: stable(a)vger.kernel.org # 5.10+
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index c68edf9872a5..d58cfe062da9 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -3,6 +3,7 @@
#include <linux/errno.h>
#include <linux/lockdep.h>
+#include <linux/resume_user_mode.h>
#include <linux/io_uring_types.h>
#include <uapi/linux/eventpoll.h>
#include "io-wq.h"
@@ -274,6 +275,13 @@ static inline int io_run_task_work(void)
*/
if (test_thread_flag(TIF_NOTIFY_SIGNAL))
clear_notify_signal();
+ /*
+ * PF_IO_WORKER never returns to userspace, so check here if we have
+ * notify work that needs processing.
+ */
+ if (current->flags & PF_IO_WORKER &&
+ test_thread_flag(TIF_NOTIFY_RESUME))
+ resume_user_mode_work(NULL);
if (task_work_pending(current)) {
__set_current_state(TASK_RUNNING);
task_work_run();
When ucsi_init() fails, ucsi->connector is NULL, yet in case of
ucsi_acpi we may still get events which cause the ucs_acpi code to call
ucsi_connector_change(), which then derefs the NULL ucsi->connector
pointer.
Fix this by not setting ucsi->ntfy inside ucsi_init() until ucsi_init()
has succeeded, so that ucsi_connector_change() ignores the events
because UCSI_ENABLE_NTFY_CONNECTOR_CHANGE is not set in the ntfy mask.
Fixes: bdc62f2bae8f ("usb: typec: ucsi: Simplified registration and I/O API")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
Changes in v2:
-Delay setting ucsi->ntfy in ucsi_init() instead of adding a NULL pointer
check to ucsi_connector_change()
---
drivers/usb/typec/ucsi/ucsi.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 1cf8947c6d66..8cbbb002fefe 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -1205,7 +1205,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index)
static int ucsi_init(struct ucsi *ucsi)
{
struct ucsi_connector *con;
- u64 command;
+ u64 command, ntfy;
int ret;
int i;
@@ -1217,8 +1217,8 @@ static int ucsi_init(struct ucsi *ucsi)
}
/* Enable basic notifications */
- ucsi->ntfy = UCSI_ENABLE_NTFY_CMD_COMPLETE | UCSI_ENABLE_NTFY_ERROR;
- command = UCSI_SET_NOTIFICATION_ENABLE | ucsi->ntfy;
+ ntfy = UCSI_ENABLE_NTFY_CMD_COMPLETE | UCSI_ENABLE_NTFY_ERROR;
+ command = UCSI_SET_NOTIFICATION_ENABLE | ntfy;
ret = ucsi_send_command(ucsi, command, NULL, 0);
if (ret < 0)
goto err_reset;
@@ -1250,12 +1250,13 @@ static int ucsi_init(struct ucsi *ucsi)
}
/* Enable all notifications */
- ucsi->ntfy = UCSI_ENABLE_NTFY_ALL;
- command = UCSI_SET_NOTIFICATION_ENABLE | ucsi->ntfy;
+ ntfy = UCSI_ENABLE_NTFY_ALL;
+ command = UCSI_SET_NOTIFICATION_ENABLE | ntfy;
ret = ucsi_send_command(ucsi, command, NULL, 0);
if (ret < 0)
goto err_unregister;
+ ucsi->ntfy = ntfy;
return 0;
err_unregister:
--
2.39.1
Commit 130a96d698d7 ("usb: typec: ucsi: acpi: Increase command
completion timeout value") increased the timeout from 5 seconds
to 60 seconds due to issues related to alternate mode discovery.
After the alternate mode discovery switch to polled mode
the timeout was reduced, but instead of being set back to
5 seconds it was reduced to 1 second.
This is causing problems when using a Lenovo ThinkPad X1 yoga gen7
connected over Type-C to a LG 27UL850-W (charging DP over Type-C).
When the monitor is already connected at boot the following error
is logged: "PPM init failed (-110)", /sys/class/typec is empty and
on unplugging the NULL pointer deref fixed earlier in this series
happens.
When the monitor is connected after boot the following error
is logged instead: "GET_CONNECTOR_STATUS failed (-110)".
Setting the timeout back to 5 seconds fixes both cases.
Fixes: e08065069fc7 ("usb: typec: ucsi: acpi: Reduce the command completion timeout")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/usb/typec/ucsi/ucsi_acpi.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c
index ce0c8ef80c04..62206a6b8ea7 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -78,7 +78,7 @@ static int ucsi_acpi_sync_write(struct ucsi *ucsi, unsigned int offset,
if (ret)
goto out_clear_bit;
- if (!wait_for_completion_timeout(&ua->complete, HZ))
+ if (!wait_for_completion_timeout(&ua->complete, 5 * HZ))
ret = -ETIMEDOUT;
out_clear_bit:
--
2.39.1
ucsi_init() which runs from a workqueue sets ucsi->connector and
on an error will clear it again.
ucsi->connector gets dereferenced by ucsi_resume(), this checks for
ucsi->connector being NULL in case ucsi_init() has not finished yet;
or in case ucsi_init() has failed.
ucsi_init() setting ucsi->connector and then clearing it again on
an error creates a race where the check in ucsi_resume() may pass,
only to have ucsi->connector free-ed underneath it when ucsi_init()
hits an error.
Fix this race by making ucsi_init() store the connector array in
a local variable and only assign it to ucsi->connector on success.
Fixes: bdc62f2bae8f ("usb: typec: ucsi: Simplified registration and I/O API")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/usb/typec/ucsi/ucsi.c | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 8cbbb002fefe..15a2c91581a8 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -1039,9 +1039,8 @@ static struct fwnode_handle *ucsi_find_fwnode(struct ucsi_connector *con)
return NULL;
}
-static int ucsi_register_port(struct ucsi *ucsi, int index)
+static int ucsi_register_port(struct ucsi *ucsi, int index, struct ucsi_connector *con)
{
- struct ucsi_connector *con = &ucsi->connector[index];
struct typec_capability *cap = &con->typec_cap;
enum typec_accessory *accessory = cap->accessory;
enum usb_role u_role = USB_ROLE_NONE;
@@ -1204,7 +1203,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index)
*/
static int ucsi_init(struct ucsi *ucsi)
{
- struct ucsi_connector *con;
+ struct ucsi_connector *con, *connector;
u64 command, ntfy;
int ret;
int i;
@@ -1235,16 +1234,15 @@ static int ucsi_init(struct ucsi *ucsi)
}
/* Allocate the connectors. Released in ucsi_unregister() */
- ucsi->connector = kcalloc(ucsi->cap.num_connectors + 1,
- sizeof(*ucsi->connector), GFP_KERNEL);
- if (!ucsi->connector) {
+ connector = kcalloc(ucsi->cap.num_connectors + 1, sizeof(*connector), GFP_KERNEL);
+ if (!connector) {
ret = -ENOMEM;
goto err_reset;
}
/* Register all connectors */
for (i = 0; i < ucsi->cap.num_connectors; i++) {
- ret = ucsi_register_port(ucsi, i);
+ ret = ucsi_register_port(ucsi, i, &connector[i]);
if (ret)
goto err_unregister;
}
@@ -1256,11 +1254,12 @@ static int ucsi_init(struct ucsi *ucsi)
if (ret < 0)
goto err_unregister;
+ ucsi->connector = connector;
ucsi->ntfy = ntfy;
return 0;
err_unregister:
- for (con = ucsi->connector; con->port; con++) {
+ for (con = connector; con->port; con++) {
ucsi_unregister_partner(con);
ucsi_unregister_altmodes(con, UCSI_RECIPIENT_CON);
ucsi_unregister_port_psy(con);
@@ -1269,10 +1268,7 @@ static int ucsi_init(struct ucsi *ucsi)
typec_unregister_port(con->port);
con->port = NULL;
}
-
- kfree(ucsi->connector);
- ucsi->connector = NULL;
-
+ kfree(connector);
err_reset:
memset(&ucsi->cap, 0, sizeof(ucsi->cap));
ucsi_reset_ppm(ucsi);
--
2.39.1
If the controller is not marked as cache coherent, then kernel will
try to ensure coherency during dma-ops and that may cause data corruption.
So, mark the PCIe node as dma-coherent as the devices on PCIe bus are
cache coherent.
Cc: stable(a)vger.kernel.org
Fixes: 92e0ee9f83b3 ("arm64: dts: qcom: sc7280: Add PCIe and PHY related node")
Signed-off-by: Krishna chaitanya chundru <quic_krichai(a)quicinc.com>
---
changes since v1:
- Updated the commit text.
---
---
arch/arm64/boot/dts/qcom/sc7280.dtsi | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi
index bdcb749..8f4ab6b 100644
--- a/arch/arm64/boot/dts/qcom/sc7280.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi
@@ -2131,6 +2131,8 @@
pinctrl-names = "default";
pinctrl-0 = <&pcie1_clkreq_n>;
+ dma-coherent;
+
iommus = <&apps_smmu 0x1c80 0x1>;
iommu-map = <0x0 &apps_smmu 0x1c80 0x1>,
--
2.7.4
The slice IDs for CVPFW, CPUSS1 and CPUWHT currently overflow the 32bit
LLCC config registers, which means it is writing beyond the upper limit
of the ATTR0_CFGn and ATTR1_CFGn range of registers. But the most obvious
impact is the fact that the mentioned slices do not get configured at all,
which will result in reduced performance. Fix that by using the slice ID
values taken from the latest LLCC SC table.
Fixes: ec69dfbdc426 ("soc: qcom: llcc: Add sc8180x and sc8280xp configurations")
Cc: stable(a)vger.kernel.org # 5.19+
Signed-off-by: Abel Vesa <abel.vesa(a)linaro.org>
Tested-by: Juerg Haefliger <juerg.haefliger(a)canonical.com>
Reviewed-by: Sai Prakash Ranjan <quic_saipraka(a)quicinc.com>
Acked-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Reviewed-by: Johan Hovold <johan+linaro(a)kernel.org>
---
The v3 is here:
https://lore.kernel.org/all/20230219165701.2557446-1-abel.vesa@linaro.org/
Changes since v3:
* explicitly mentioned in the commit message the fact that some random
registers might get written and the fact that there is an impact
on performance.
* Added Johan's R-b tag
Changes since v2:
* specifically mentioned the 3 slice IDs that are being fixed and
what is happening without this patch
* added stabke Cc line
* added Juerg's T-b tag
* added Sai's R-b tag
* added Konrad's A-b tag
Changes since v1:
* dropped the LLCC_GPU and LLCC_WRCACHE max_cap changes
* took the new values from documentatio this time rather than
downstream kernel
drivers/soc/qcom/llcc-qcom.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c
index 23ce2f78c4ed..26efe12012a0 100644
--- a/drivers/soc/qcom/llcc-qcom.c
+++ b/drivers/soc/qcom/llcc-qcom.c
@@ -191,9 +191,9 @@ static const struct llcc_slice_config sc8280xp_data[] = {
{ LLCC_CVP, 28, 512, 3, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 },
{ LLCC_APTCM, 30, 1024, 3, 1, 0x0, 0x1, 1, 0, 0, 1, 0, 0 },
{ LLCC_WRCACHE, 31, 1024, 1, 1, 0xfff, 0x0, 0, 0, 0, 0, 1, 0 },
- { LLCC_CVPFW, 32, 512, 1, 0, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 },
- { LLCC_CPUSS1, 33, 2048, 1, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 },
- { LLCC_CPUHWT, 36, 512, 1, 1, 0xfff, 0x0, 0, 0, 0, 0, 1, 0 },
+ { LLCC_CVPFW, 17, 512, 1, 0, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 },
+ { LLCC_CPUSS1, 3, 2048, 1, 1, 0xfff, 0x0, 0, 0, 0, 1, 0, 0 },
+ { LLCC_CPUHWT, 5, 512, 1, 1, 0xfff, 0x0, 0, 0, 0, 0, 1, 0 },
};
static const struct llcc_slice_config sdm845_data[] = {
--
2.34.1
From: "andrew.yang" <andrew.yang(a)mediatek.com>
commit 3f98c9a62c338bbe06a215c9491e6166ea39bf82 upstream.
damon_get_folio() would always increase folio _refcount and
folio_isolate_lru() would increase folio _refcount if the folio's lru flag
is set.
If an unevictable folio isolated successfully, there will be two more
_refcount. The one from folio_isolate_lru() will be decreased in
folio_puback_lru(), but the other one from damon_get_folio() will be left
behind. This causes a pin page.
Whatever the case, the _refcount from damon_get_folio() should be
decreased.
Link: https://lkml.kernel.org/r/20230222064223.6735-1-andrew.yang@mediatek.com
Fixes: 57223ac29584 ("mm/damon/paddr: support the pageout scheme")
Signed-off-by: andrew.yang <andrew.yang(a)mediatek.com>
Reviewed-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org> [5.16.x]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: SeongJae Park <sj(a)kernel.org>
---
This is a backport of the mainline patch for v6.1.y and v6.2.y.
mm/damon/paddr.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index e1a4315c4be6..402d30b37aba 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -219,12 +219,11 @@ static unsigned long damon_pa_pageout(struct damon_region *r)
put_page(page);
continue;
}
- if (PageUnevictable(page)) {
+ if (PageUnevictable(page))
putback_lru_page(page);
- } else {
+ else
list_add(&page->lru, &page_list);
- put_page(page);
- }
+ put_page(page);
}
applied = reclaim_pages(&page_list);
cond_resched();
--
2.25.1
HELLO,
AM PASCAL DONALD,A CITIZEN OF ENGLAND.I WOULD LIKE TO APPLY THROUGH
THIS MEDIUM FOR YOUR MAXIMUM COOPERATION TO SECURE AN OPPORTUNITY TO
TRANSFER A SUBSTANTIAL FUND FOR AN INVESTMENT IN YOUR COUNTRY.
I WILL BE LOOKING FORWARD TO HEAR FROM YOU URGENTLY.
PLEASE SEND EVERY REPLY TO MY DIRECT EMAIL AS STATED BELOW:
REGARDS,
MR.PASCAL DONALD.(ESQ)
dr.pascaldonald(a)outlook.com
The patch titled
Subject: mm/damon/paddr: fix folio_nr_pages() after folio_put() in damon_pa_mark_accessed_or_deactivate()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-damon-paddr-fix-folio_nr_pages-after-folio_put-in-damon_pa_mark_accessed_or_deactivate.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/paddr: fix folio_nr_pages() after folio_put() in damon_pa_mark_accessed_or_deactivate()
Date: Sat, 4 Mar 2023 19:39:49 +0000
damon_pa_mark_accessed_or_deactivate() is accessing a folio via
folio_nr_pages() after folio_put() for the folio has invoked. Fix it.
Link: https://lkml.kernel.org/r/20230304193949.296391-3-sj@kernel.org
Fixes: f70da5ee8fe1 ("mm/damon: convert damon_pa_mark_accessed_or_deactivate() to use folios")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: Vishal Moola (Oracle) <vishal.moola(a)gmail.com>
Cc: <stable(a)vger.kernel.org> [6.2.x]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/mm/damon/paddr.c~mm-damon-paddr-fix-folio_nr_pages-after-folio_put-in-damon_pa_mark_accessed_or_deactivate
+++ a/mm/damon/paddr.c
@@ -280,8 +280,8 @@ static inline unsigned long damon_pa_mar
folio_mark_accessed(folio);
else
folio_deactivate(folio);
- folio_put(folio);
applied += folio_nr_pages(folio);
+ folio_put(folio);
}
return applied * PAGE_SIZE;
}
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-paddr-fix-folio_size-call-after-folio_put-in-damon_pa_young.patch
mm-damon-paddr-fix-folio_nr_pages-after-folio_put-in-damon_pa_mark_accessed_or_deactivate.patch
This patch introduce a new internal flag per lkb value to handle
internal flags which are handled not on wire. The current lkb internal
flags stored as lkb->lkb_flags are split in upper and lower bits, the
lower bits are used to share internal flags over wire for other cluster
wide lkb copies on other nodes.
In commit 61bed0baa4db ("fs: dlm: use a non-static queue for callbacks")
we introduced a new internal flag for pending callbacks for the dlm
callback queue. This flag is protected by the lkb->lkb_cb_lock lock.
This patch overlooked that on dlm receive path and the mentioned upper
and lower bits, that dlm will read the flags, mask it and write it
back. As example receive_flags() in fs/dlm/lock.c. This flag
manipulation is not done atomically and is not protected by
lkb->lkb_cb_lock. This has unknown side effects of the current callback
handling.
In future we should move to set/clear/test bit functionality and avoid
read, mask and writing back flag values. In later patches we will move
the upper parts to the new introduced internal lkb flags which are not
shared between other cluster nodes to the new non shared internal flag
field to avoid similar issues.
Cc: stable(a)vger.kernel.org
Fixes: 61bed0baa4db ("fs: dlm: use a non-static queue for callbacks")
Reported-by: Bob Peterson <rpeterso(a)redhat.com>
Signed-off-by: Alexander Aring <aahringo(a)redhat.com>
---
changes since v2:
- change the name of DLM_IFL_CB_PENDING to DLM_IFL_CB_PENDING_BIT so
the user of this flag knows it has a BIT meaning.
fs/dlm/ast.c | 9 ++++-----
fs/dlm/dlm_internal.h | 5 ++++-
fs/dlm/user.c | 2 +-
3 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 26fef9945cc9..39805aea3336 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -45,7 +45,7 @@ void dlm_purge_lkb_callbacks(struct dlm_lkb *lkb)
kref_put(&cb->ref, dlm_release_callback);
}
- lkb->lkb_flags &= ~DLM_IFL_CB_PENDING;
+ clear_bit(DLM_IFL_CB_PENDING_BIT, &lkb->lkb_iflags);
/* invalidate */
dlm_callback_set_last_ptr(&lkb->lkb_last_cast, NULL);
@@ -103,10 +103,9 @@ int dlm_enqueue_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
cb->sb_status = status;
cb->sb_flags = (sbflags & 0x000000FF);
kref_init(&cb->ref);
- if (!(lkb->lkb_flags & DLM_IFL_CB_PENDING)) {
- lkb->lkb_flags |= DLM_IFL_CB_PENDING;
+ if (!test_and_set_bit(DLM_IFL_CB_PENDING_BIT, &lkb->lkb_iflags))
rv = DLM_ENQUEUE_CALLBACK_NEED_SCHED;
- }
+
list_add_tail(&cb->list, &lkb->lkb_callbacks);
if (flags & DLM_CB_CAST)
@@ -209,7 +208,7 @@ void dlm_callback_work(struct work_struct *work)
spin_lock(&lkb->lkb_cb_lock);
rv = dlm_dequeue_lkb_callback(lkb, &cb);
if (rv == DLM_DEQUEUE_CALLBACK_EMPTY) {
- lkb->lkb_flags &= ~DLM_IFL_CB_PENDING;
+ clear_bit(DLM_IFL_CB_PENDING_BIT, &lkb->lkb_iflags);
spin_unlock(&lkb->lkb_cb_lock);
break;
}
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index ab1a55337a6e..9bf70962bc49 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -211,7 +211,9 @@ struct dlm_args {
#endif
#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */
-#define DLM_IFL_CB_PENDING 0x04000000
+
+#define DLM_IFL_CB_PENDING_BIT 0
+
/* least significant 2 bytes are message changed, they are full transmitted
* but at receive side only the 2 bytes LSB will be set.
*
@@ -246,6 +248,7 @@ struct dlm_lkb {
uint32_t lkb_exflags; /* external flags from caller */
uint32_t lkb_sbflags; /* lksb flags */
uint32_t lkb_flags; /* internal flags */
+ unsigned long lkb_iflags; /* internal flags */
uint32_t lkb_lvbseq; /* lvb sequence number */
int8_t lkb_status; /* granted, waiting, convert */
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 35129505ddda..688a480879e4 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -884,7 +884,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
goto try_another;
case DLM_DEQUEUE_CALLBACK_LAST:
list_del_init(&lkb->lkb_cb_list);
- lkb->lkb_flags &= ~DLM_IFL_CB_PENDING;
+ clear_bit(DLM_IFL_CB_PENDING_BIT, &lkb->lkb_iflags);
break;
case DLM_DEQUEUE_CALLBACK_SUCCESS:
break;
--
2.31.1
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x f58680085478dd292435727210122960d38e8014
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678099737150217(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
f58680085478 ("io_uring: add reschedule point to handle_tw_list()")
c6dd763c2460 ("io_uring: trace task_work_run")
3a0c037b0e16 ("io_uring: batch task_work")
f88262e60bb9 ("io_uring: lockless task list")
ed5ccb3beeba ("io_uring: remove priority tw list optimisation")
4a0fef62788b ("io_uring: optimize io_uring_task layout")
253993210bd8 ("io_uring: introduce locking helpers for CQE posting")
305bef988708 ("io_uring: hide eventfd assumptions in eventfd paths")
d9dee4302a7c ("io_uring: remove ->flush_cqes optimisation")
a830ffd28780 ("io_uring: move io_eventfd_signal()")
9046c6415be6 ("io_uring: reshuffle io_uring/io_uring.h")
d142c3ec8d16 ("io_uring: remove extra io_commit_cqring()")
68494a65d0e2 ("io_uring: introduce io_req_cqe_overflow()")
faf88dde060f ("io_uring: don't inline __io_get_cqe()")
d245bca6375b ("io_uring: don't expose io_fill_cqe_aux()")
6a02e4be8187 ("io_uring: inline ->registered_rings")
aa1e90f64ee5 ("io_uring: move small helpers to headers")
aa1e90f64ee5 ("io_uring: move small helpers to headers")
aa1e90f64ee5 ("io_uring: move small helpers to headers")
aa1e90f64ee5 ("io_uring: move small helpers to headers")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f58680085478dd292435727210122960d38e8014 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Fri, 27 Jan 2023 09:50:31 -0700
Subject: [PATCH] io_uring: add reschedule point to handle_tw_list()
If CONFIG_PREEMPT_NONE is set and the task_work chains are long, we
could be running into issues blocking others for too long. Add a
reschedule check in handle_tw_list(), and flush the ctx if we need to
reschedule.
Cc: stable(a)vger.kernel.org # 5.10+
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index fab581a31dc1..acf6d9680d76 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1179,10 +1179,16 @@ static unsigned int handle_tw_list(struct llist_node *node,
/* if not contended, grab and improve batching */
*locked = mutex_trylock(&(*ctx)->uring_lock);
percpu_ref_get(&(*ctx)->refs);
- }
+ } else if (!*locked)
+ *locked = mutex_trylock(&(*ctx)->uring_lock);
req->io_task_work.func(req, locked);
node = next;
count++;
+ if (unlikely(need_resched())) {
+ ctx_flush_and_put(*ctx, locked);
+ *ctx = NULL;
+ cond_resched();
+ }
}
return count;
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 8932c32c3053accd50702b36e944ac2016cd103c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678128552113160(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
8932c32c3053 ("irqdomain: Fix domain registration race")
20c36ce2164f ("irqdomain: Change the type of 'size' in __irq_domain_add() to be consistent")
3a1d24ca9573 ("irq/irqdomain: Fix comment typo")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8932c32c3053accd50702b36e944ac2016cd103c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:49 +0100
Subject: [PATCH] irqdomain: Fix domain registration race
Hierarchical domains created using irq_domain_create_hierarchy() are
currently added to the domain list before having been fully initialised.
This specifically means that a racing allocation request might fail to
allocate irq data for the inner domains of a hierarchy in case the
parent domain pointer has not yet been set up.
Note that this is not really any issue for irqchip drivers that are
registered early (e.g. via IRQCHIP_DECLARE() or IRQCHIP_ACPI_DECLARE())
but could potentially cause trouble with drivers that are registered
later (e.g. modular drivers using IRQCHIP_PLATFORM_DRIVER_BEGIN(),
gpiochip drivers, etc.).
Fixes: afb7da83b9f4 ("irqdomain: Introduce helper function irq_domain_add_hierarchy()")
Cc: stable(a)vger.kernel.org # 3.19
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
[ johan: add commit message ]
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-8-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index df0cbad1b0d7..a6d1b108b8f7 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -126,23 +126,12 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
}
EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
-/**
- * __irq_domain_add() - Allocate a new irq_domain data structure
- * @fwnode: firmware node for the interrupt controller
- * @size: Size of linear map; 0 for radix mapping only
- * @hwirq_max: Maximum number of interrupts supported by controller
- * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
- * direct mapping
- * @ops: domain callbacks
- * @host_data: Controller private data pointer
- *
- * Allocates and initializes an irq_domain structure.
- * Returns pointer to IRQ domain, or NULL on failure.
- */
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
- irq_hw_number_t hwirq_max, int direct_max,
- const struct irq_domain_ops *ops,
- void *host_data)
+static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
+ unsigned int size,
+ irq_hw_number_t hwirq_max,
+ int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
{
struct irqchip_fwid *fwid;
struct irq_domain *domain;
@@ -230,12 +219,44 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s
irq_domain_check_hierarchy(domain);
+ return domain;
+}
+
+static void __irq_domain_publish(struct irq_domain *domain)
+{
mutex_lock(&irq_domain_mutex);
debugfs_add_domain_dir(domain);
list_add(&domain->link, &irq_domain_list);
mutex_unlock(&irq_domain_mutex);
pr_debug("Added domain %s\n", domain->name);
+}
+
+/**
+ * __irq_domain_add() - Allocate a new irq_domain data structure
+ * @fwnode: firmware node for the interrupt controller
+ * @size: Size of linear map; 0 for radix mapping only
+ * @hwirq_max: Maximum number of interrupts supported by controller
+ * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
+ * direct mapping
+ * @ops: domain callbacks
+ * @host_data: Controller private data pointer
+ *
+ * Allocates and initializes an irq_domain structure.
+ * Returns pointer to IRQ domain, or NULL on failure.
+ */
+struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+ irq_hw_number_t hwirq_max, int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ struct irq_domain *domain;
+
+ domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max,
+ ops, host_data);
+ if (domain)
+ __irq_domain_publish(domain);
+
return domain;
}
EXPORT_SYMBOL_GPL(__irq_domain_add);
@@ -1138,12 +1159,15 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
struct irq_domain *domain;
if (size)
- domain = irq_domain_create_linear(fwnode, size, ops, host_data);
+ domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data);
else
- domain = irq_domain_create_tree(fwnode, ops, host_data);
+ domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);
+
if (domain) {
domain->parent = parent;
domain->flags |= flags;
+
+ __irq_domain_publish(domain);
}
return domain;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 8932c32c3053accd50702b36e944ac2016cd103c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678128551129114(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
8932c32c3053 ("irqdomain: Fix domain registration race")
20c36ce2164f ("irqdomain: Change the type of 'size' in __irq_domain_add() to be consistent")
3a1d24ca9573 ("irq/irqdomain: Fix comment typo")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8932c32c3053accd50702b36e944ac2016cd103c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:49 +0100
Subject: [PATCH] irqdomain: Fix domain registration race
Hierarchical domains created using irq_domain_create_hierarchy() are
currently added to the domain list before having been fully initialised.
This specifically means that a racing allocation request might fail to
allocate irq data for the inner domains of a hierarchy in case the
parent domain pointer has not yet been set up.
Note that this is not really any issue for irqchip drivers that are
registered early (e.g. via IRQCHIP_DECLARE() or IRQCHIP_ACPI_DECLARE())
but could potentially cause trouble with drivers that are registered
later (e.g. modular drivers using IRQCHIP_PLATFORM_DRIVER_BEGIN(),
gpiochip drivers, etc.).
Fixes: afb7da83b9f4 ("irqdomain: Introduce helper function irq_domain_add_hierarchy()")
Cc: stable(a)vger.kernel.org # 3.19
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
[ johan: add commit message ]
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-8-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index df0cbad1b0d7..a6d1b108b8f7 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -126,23 +126,12 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
}
EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
-/**
- * __irq_domain_add() - Allocate a new irq_domain data structure
- * @fwnode: firmware node for the interrupt controller
- * @size: Size of linear map; 0 for radix mapping only
- * @hwirq_max: Maximum number of interrupts supported by controller
- * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
- * direct mapping
- * @ops: domain callbacks
- * @host_data: Controller private data pointer
- *
- * Allocates and initializes an irq_domain structure.
- * Returns pointer to IRQ domain, or NULL on failure.
- */
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
- irq_hw_number_t hwirq_max, int direct_max,
- const struct irq_domain_ops *ops,
- void *host_data)
+static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
+ unsigned int size,
+ irq_hw_number_t hwirq_max,
+ int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
{
struct irqchip_fwid *fwid;
struct irq_domain *domain;
@@ -230,12 +219,44 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s
irq_domain_check_hierarchy(domain);
+ return domain;
+}
+
+static void __irq_domain_publish(struct irq_domain *domain)
+{
mutex_lock(&irq_domain_mutex);
debugfs_add_domain_dir(domain);
list_add(&domain->link, &irq_domain_list);
mutex_unlock(&irq_domain_mutex);
pr_debug("Added domain %s\n", domain->name);
+}
+
+/**
+ * __irq_domain_add() - Allocate a new irq_domain data structure
+ * @fwnode: firmware node for the interrupt controller
+ * @size: Size of linear map; 0 for radix mapping only
+ * @hwirq_max: Maximum number of interrupts supported by controller
+ * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
+ * direct mapping
+ * @ops: domain callbacks
+ * @host_data: Controller private data pointer
+ *
+ * Allocates and initializes an irq_domain structure.
+ * Returns pointer to IRQ domain, or NULL on failure.
+ */
+struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+ irq_hw_number_t hwirq_max, int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ struct irq_domain *domain;
+
+ domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max,
+ ops, host_data);
+ if (domain)
+ __irq_domain_publish(domain);
+
return domain;
}
EXPORT_SYMBOL_GPL(__irq_domain_add);
@@ -1138,12 +1159,15 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
struct irq_domain *domain;
if (size)
- domain = irq_domain_create_linear(fwnode, size, ops, host_data);
+ domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data);
else
- domain = irq_domain_create_tree(fwnode, ops, host_data);
+ domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);
+
if (domain) {
domain->parent = parent;
domain->flags |= flags;
+
+ __irq_domain_publish(domain);
}
return domain;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 8932c32c3053accd50702b36e944ac2016cd103c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781285504764(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
8932c32c3053 ("irqdomain: Fix domain registration race")
20c36ce2164f ("irqdomain: Change the type of 'size' in __irq_domain_add() to be consistent")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8932c32c3053accd50702b36e944ac2016cd103c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:49 +0100
Subject: [PATCH] irqdomain: Fix domain registration race
Hierarchical domains created using irq_domain_create_hierarchy() are
currently added to the domain list before having been fully initialised.
This specifically means that a racing allocation request might fail to
allocate irq data for the inner domains of a hierarchy in case the
parent domain pointer has not yet been set up.
Note that this is not really any issue for irqchip drivers that are
registered early (e.g. via IRQCHIP_DECLARE() or IRQCHIP_ACPI_DECLARE())
but could potentially cause trouble with drivers that are registered
later (e.g. modular drivers using IRQCHIP_PLATFORM_DRIVER_BEGIN(),
gpiochip drivers, etc.).
Fixes: afb7da83b9f4 ("irqdomain: Introduce helper function irq_domain_add_hierarchy()")
Cc: stable(a)vger.kernel.org # 3.19
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
[ johan: add commit message ]
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-8-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index df0cbad1b0d7..a6d1b108b8f7 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -126,23 +126,12 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
}
EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
-/**
- * __irq_domain_add() - Allocate a new irq_domain data structure
- * @fwnode: firmware node for the interrupt controller
- * @size: Size of linear map; 0 for radix mapping only
- * @hwirq_max: Maximum number of interrupts supported by controller
- * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
- * direct mapping
- * @ops: domain callbacks
- * @host_data: Controller private data pointer
- *
- * Allocates and initializes an irq_domain structure.
- * Returns pointer to IRQ domain, or NULL on failure.
- */
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
- irq_hw_number_t hwirq_max, int direct_max,
- const struct irq_domain_ops *ops,
- void *host_data)
+static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
+ unsigned int size,
+ irq_hw_number_t hwirq_max,
+ int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
{
struct irqchip_fwid *fwid;
struct irq_domain *domain;
@@ -230,12 +219,44 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s
irq_domain_check_hierarchy(domain);
+ return domain;
+}
+
+static void __irq_domain_publish(struct irq_domain *domain)
+{
mutex_lock(&irq_domain_mutex);
debugfs_add_domain_dir(domain);
list_add(&domain->link, &irq_domain_list);
mutex_unlock(&irq_domain_mutex);
pr_debug("Added domain %s\n", domain->name);
+}
+
+/**
+ * __irq_domain_add() - Allocate a new irq_domain data structure
+ * @fwnode: firmware node for the interrupt controller
+ * @size: Size of linear map; 0 for radix mapping only
+ * @hwirq_max: Maximum number of interrupts supported by controller
+ * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
+ * direct mapping
+ * @ops: domain callbacks
+ * @host_data: Controller private data pointer
+ *
+ * Allocates and initializes an irq_domain structure.
+ * Returns pointer to IRQ domain, or NULL on failure.
+ */
+struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+ irq_hw_number_t hwirq_max, int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ struct irq_domain *domain;
+
+ domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max,
+ ops, host_data);
+ if (domain)
+ __irq_domain_publish(domain);
+
return domain;
}
EXPORT_SYMBOL_GPL(__irq_domain_add);
@@ -1138,12 +1159,15 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
struct irq_domain *domain;
if (size)
- domain = irq_domain_create_linear(fwnode, size, ops, host_data);
+ domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data);
else
- domain = irq_domain_create_tree(fwnode, ops, host_data);
+ domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);
+
if (domain) {
domain->parent = parent;
domain->flags |= flags;
+
+ __irq_domain_publish(domain);
}
return domain;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 8932c32c3053accd50702b36e944ac2016cd103c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781285492232(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
8932c32c3053 ("irqdomain: Fix domain registration race")
20c36ce2164f ("irqdomain: Change the type of 'size' in __irq_domain_add() to be consistent")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 8932c32c3053accd50702b36e944ac2016cd103c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:49 +0100
Subject: [PATCH] irqdomain: Fix domain registration race
Hierarchical domains created using irq_domain_create_hierarchy() are
currently added to the domain list before having been fully initialised.
This specifically means that a racing allocation request might fail to
allocate irq data for the inner domains of a hierarchy in case the
parent domain pointer has not yet been set up.
Note that this is not really any issue for irqchip drivers that are
registered early (e.g. via IRQCHIP_DECLARE() or IRQCHIP_ACPI_DECLARE())
but could potentially cause trouble with drivers that are registered
later (e.g. modular drivers using IRQCHIP_PLATFORM_DRIVER_BEGIN(),
gpiochip drivers, etc.).
Fixes: afb7da83b9f4 ("irqdomain: Introduce helper function irq_domain_add_hierarchy()")
Cc: stable(a)vger.kernel.org # 3.19
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
[ johan: add commit message ]
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-8-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index df0cbad1b0d7..a6d1b108b8f7 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -126,23 +126,12 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
}
EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
-/**
- * __irq_domain_add() - Allocate a new irq_domain data structure
- * @fwnode: firmware node for the interrupt controller
- * @size: Size of linear map; 0 for radix mapping only
- * @hwirq_max: Maximum number of interrupts supported by controller
- * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
- * direct mapping
- * @ops: domain callbacks
- * @host_data: Controller private data pointer
- *
- * Allocates and initializes an irq_domain structure.
- * Returns pointer to IRQ domain, or NULL on failure.
- */
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
- irq_hw_number_t hwirq_max, int direct_max,
- const struct irq_domain_ops *ops,
- void *host_data)
+static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
+ unsigned int size,
+ irq_hw_number_t hwirq_max,
+ int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
{
struct irqchip_fwid *fwid;
struct irq_domain *domain;
@@ -230,12 +219,44 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s
irq_domain_check_hierarchy(domain);
+ return domain;
+}
+
+static void __irq_domain_publish(struct irq_domain *domain)
+{
mutex_lock(&irq_domain_mutex);
debugfs_add_domain_dir(domain);
list_add(&domain->link, &irq_domain_list);
mutex_unlock(&irq_domain_mutex);
pr_debug("Added domain %s\n", domain->name);
+}
+
+/**
+ * __irq_domain_add() - Allocate a new irq_domain data structure
+ * @fwnode: firmware node for the interrupt controller
+ * @size: Size of linear map; 0 for radix mapping only
+ * @hwirq_max: Maximum number of interrupts supported by controller
+ * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
+ * direct mapping
+ * @ops: domain callbacks
+ * @host_data: Controller private data pointer
+ *
+ * Allocates and initializes an irq_domain structure.
+ * Returns pointer to IRQ domain, or NULL on failure.
+ */
+struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+ irq_hw_number_t hwirq_max, int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ struct irq_domain *domain;
+
+ domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max,
+ ops, host_data);
+ if (domain)
+ __irq_domain_publish(domain);
+
return domain;
}
EXPORT_SYMBOL_GPL(__irq_domain_add);
@@ -1138,12 +1159,15 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
struct irq_domain *domain;
if (size)
- domain = irq_domain_create_linear(fwnode, size, ops, host_data);
+ domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data);
else
- domain = irq_domain_create_tree(fwnode, ops, host_data);
+ domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);
+
if (domain) {
domain->parent = parent;
domain->flags |= flags;
+
+ __irq_domain_publish(domain);
}
return domain;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x d55f7f4c58c07beb5050a834bf57ae2ede599c7e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812851896254(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
d55f7f4c58c0 ("irqdomain: Refactor __irq_domain_alloc_irqs()")
55567976629e ("genirq/irqdomain: Allow partial trimming of irq_data hierarchy")
87f2d1c662fa ("genirq/irqdomain: Check pointer in irq_domain_alloc_irqs_hierarchy()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d55f7f4c58c07beb5050a834bf57ae2ede599c7e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:47 +0100
Subject: [PATCH] irqdomain: Refactor __irq_domain_alloc_irqs()
Refactor __irq_domain_alloc_irqs() so that it can be called internally
while holding the irq_domain_mutex.
This will be used to fix a shared-interrupt mapping race, hence the
Fixes tag.
Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
Cc: stable(a)vger.kernel.org # 4.8
Tested-by: Hsin-Yi Wang <hsinyi(a)chromium.org>
Tested-by: Mark-PK Tsai <mark-pk.tsai(a)mediatek.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-6-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 9f95047e4bc7..78fb4800c0d2 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1441,40 +1441,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
}
-/**
- * __irq_domain_alloc_irqs - Allocate IRQs from domain
- * @domain: domain to allocate from
- * @irq_base: allocate specified IRQ number if irq_base >= 0
- * @nr_irqs: number of IRQs to allocate
- * @node: NUMA node id for memory allocation
- * @arg: domain specific argument
- * @realloc: IRQ descriptors have already been allocated if true
- * @affinity: Optional irq affinity mask for multiqueue devices
- *
- * Allocate IRQ numbers and initialized all data structures to support
- * hierarchy IRQ domains.
- * Parameter @realloc is mainly to support legacy IRQs.
- * Returns error code or allocated IRQ number
- *
- * The whole process to setup an IRQ has been split into two steps.
- * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
- * descriptor and required hardware resources. The second step,
- * irq_domain_activate_irq(), is to program the hardware with preallocated
- * resources. In this way, it's easier to rollback when failing to
- * allocate resources.
- */
-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
- unsigned int nr_irqs, int node, void *arg,
- bool realloc, const struct irq_affinity_desc *affinity)
+static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
{
int i, ret, virq;
- if (domain == NULL) {
- domain = irq_default_domain;
- if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
- return -EINVAL;
- }
-
if (realloc && irq_base >= 0) {
virq = irq_base;
} else {
@@ -1493,24 +1465,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
goto out_free_desc;
}
- mutex_lock(&irq_domain_mutex);
ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
- if (ret < 0) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret < 0)
goto out_free_irq_data;
- }
for (i = 0; i < nr_irqs; i++) {
ret = irq_domain_trim_hierarchy(virq + i);
- if (ret) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret)
goto out_free_irq_data;
- }
}
-
+
for (i = 0; i < nr_irqs; i++)
irq_domain_insert_irq(virq + i);
- mutex_unlock(&irq_domain_mutex);
return virq;
@@ -1520,6 +1486,48 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
irq_free_descs(virq, nr_irqs);
return ret;
}
+
+/**
+ * __irq_domain_alloc_irqs - Allocate IRQs from domain
+ * @domain: domain to allocate from
+ * @irq_base: allocate specified IRQ number if irq_base >= 0
+ * @nr_irqs: number of IRQs to allocate
+ * @node: NUMA node id for memory allocation
+ * @arg: domain specific argument
+ * @realloc: IRQ descriptors have already been allocated if true
+ * @affinity: Optional irq affinity mask for multiqueue devices
+ *
+ * Allocate IRQ numbers and initialized all data structures to support
+ * hierarchy IRQ domains.
+ * Parameter @realloc is mainly to support legacy IRQs.
+ * Returns error code or allocated IRQ number
+ *
+ * The whole process to setup an IRQ has been split into two steps.
+ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+ * descriptor and required hardware resources. The second step,
+ * irq_domain_activate_irq(), is to program the hardware with preallocated
+ * resources. In this way, it's easier to rollback when failing to
+ * allocate resources.
+ */
+int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
+{
+ int ret;
+
+ if (domain == NULL) {
+ domain = irq_default_domain;
+ if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+ return -EINVAL;
+ }
+
+ mutex_lock(&irq_domain_mutex);
+ ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg,
+ realloc, affinity);
+ mutex_unlock(&irq_domain_mutex);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs);
/* The irq_data was moved, fix the revmap to refer to the new location */
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x d55f7f4c58c07beb5050a834bf57ae2ede599c7e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678128518177232(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
d55f7f4c58c0 ("irqdomain: Refactor __irq_domain_alloc_irqs()")
55567976629e ("genirq/irqdomain: Allow partial trimming of irq_data hierarchy")
87f2d1c662fa ("genirq/irqdomain: Check pointer in irq_domain_alloc_irqs_hierarchy()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d55f7f4c58c07beb5050a834bf57ae2ede599c7e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:47 +0100
Subject: [PATCH] irqdomain: Refactor __irq_domain_alloc_irqs()
Refactor __irq_domain_alloc_irqs() so that it can be called internally
while holding the irq_domain_mutex.
This will be used to fix a shared-interrupt mapping race, hence the
Fixes tag.
Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
Cc: stable(a)vger.kernel.org # 4.8
Tested-by: Hsin-Yi Wang <hsinyi(a)chromium.org>
Tested-by: Mark-PK Tsai <mark-pk.tsai(a)mediatek.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-6-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 9f95047e4bc7..78fb4800c0d2 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1441,40 +1441,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
}
-/**
- * __irq_domain_alloc_irqs - Allocate IRQs from domain
- * @domain: domain to allocate from
- * @irq_base: allocate specified IRQ number if irq_base >= 0
- * @nr_irqs: number of IRQs to allocate
- * @node: NUMA node id for memory allocation
- * @arg: domain specific argument
- * @realloc: IRQ descriptors have already been allocated if true
- * @affinity: Optional irq affinity mask for multiqueue devices
- *
- * Allocate IRQ numbers and initialized all data structures to support
- * hierarchy IRQ domains.
- * Parameter @realloc is mainly to support legacy IRQs.
- * Returns error code or allocated IRQ number
- *
- * The whole process to setup an IRQ has been split into two steps.
- * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
- * descriptor and required hardware resources. The second step,
- * irq_domain_activate_irq(), is to program the hardware with preallocated
- * resources. In this way, it's easier to rollback when failing to
- * allocate resources.
- */
-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
- unsigned int nr_irqs, int node, void *arg,
- bool realloc, const struct irq_affinity_desc *affinity)
+static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
{
int i, ret, virq;
- if (domain == NULL) {
- domain = irq_default_domain;
- if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
- return -EINVAL;
- }
-
if (realloc && irq_base >= 0) {
virq = irq_base;
} else {
@@ -1493,24 +1465,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
goto out_free_desc;
}
- mutex_lock(&irq_domain_mutex);
ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
- if (ret < 0) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret < 0)
goto out_free_irq_data;
- }
for (i = 0; i < nr_irqs; i++) {
ret = irq_domain_trim_hierarchy(virq + i);
- if (ret) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret)
goto out_free_irq_data;
- }
}
-
+
for (i = 0; i < nr_irqs; i++)
irq_domain_insert_irq(virq + i);
- mutex_unlock(&irq_domain_mutex);
return virq;
@@ -1520,6 +1486,48 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
irq_free_descs(virq, nr_irqs);
return ret;
}
+
+/**
+ * __irq_domain_alloc_irqs - Allocate IRQs from domain
+ * @domain: domain to allocate from
+ * @irq_base: allocate specified IRQ number if irq_base >= 0
+ * @nr_irqs: number of IRQs to allocate
+ * @node: NUMA node id for memory allocation
+ * @arg: domain specific argument
+ * @realloc: IRQ descriptors have already been allocated if true
+ * @affinity: Optional irq affinity mask for multiqueue devices
+ *
+ * Allocate IRQ numbers and initialized all data structures to support
+ * hierarchy IRQ domains.
+ * Parameter @realloc is mainly to support legacy IRQs.
+ * Returns error code or allocated IRQ number
+ *
+ * The whole process to setup an IRQ has been split into two steps.
+ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+ * descriptor and required hardware resources. The second step,
+ * irq_domain_activate_irq(), is to program the hardware with preallocated
+ * resources. In this way, it's easier to rollback when failing to
+ * allocate resources.
+ */
+int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
+{
+ int ret;
+
+ if (domain == NULL) {
+ domain = irq_default_domain;
+ if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+ return -EINVAL;
+ }
+
+ mutex_lock(&irq_domain_mutex);
+ ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg,
+ realloc, affinity);
+ mutex_unlock(&irq_domain_mutex);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs);
/* The irq_data was moved, fix the revmap to refer to the new location */
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x d55f7f4c58c07beb5050a834bf57ae2ede599c7e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812851762200(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
d55f7f4c58c0 ("irqdomain: Refactor __irq_domain_alloc_irqs()")
55567976629e ("genirq/irqdomain: Allow partial trimming of irq_data hierarchy")
87f2d1c662fa ("genirq/irqdomain: Check pointer in irq_domain_alloc_irqs_hierarchy()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d55f7f4c58c07beb5050a834bf57ae2ede599c7e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:47 +0100
Subject: [PATCH] irqdomain: Refactor __irq_domain_alloc_irqs()
Refactor __irq_domain_alloc_irqs() so that it can be called internally
while holding the irq_domain_mutex.
This will be used to fix a shared-interrupt mapping race, hence the
Fixes tag.
Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
Cc: stable(a)vger.kernel.org # 4.8
Tested-by: Hsin-Yi Wang <hsinyi(a)chromium.org>
Tested-by: Mark-PK Tsai <mark-pk.tsai(a)mediatek.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-6-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 9f95047e4bc7..78fb4800c0d2 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1441,40 +1441,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
}
-/**
- * __irq_domain_alloc_irqs - Allocate IRQs from domain
- * @domain: domain to allocate from
- * @irq_base: allocate specified IRQ number if irq_base >= 0
- * @nr_irqs: number of IRQs to allocate
- * @node: NUMA node id for memory allocation
- * @arg: domain specific argument
- * @realloc: IRQ descriptors have already been allocated if true
- * @affinity: Optional irq affinity mask for multiqueue devices
- *
- * Allocate IRQ numbers and initialized all data structures to support
- * hierarchy IRQ domains.
- * Parameter @realloc is mainly to support legacy IRQs.
- * Returns error code or allocated IRQ number
- *
- * The whole process to setup an IRQ has been split into two steps.
- * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
- * descriptor and required hardware resources. The second step,
- * irq_domain_activate_irq(), is to program the hardware with preallocated
- * resources. In this way, it's easier to rollback when failing to
- * allocate resources.
- */
-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
- unsigned int nr_irqs, int node, void *arg,
- bool realloc, const struct irq_affinity_desc *affinity)
+static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
{
int i, ret, virq;
- if (domain == NULL) {
- domain = irq_default_domain;
- if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
- return -EINVAL;
- }
-
if (realloc && irq_base >= 0) {
virq = irq_base;
} else {
@@ -1493,24 +1465,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
goto out_free_desc;
}
- mutex_lock(&irq_domain_mutex);
ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
- if (ret < 0) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret < 0)
goto out_free_irq_data;
- }
for (i = 0; i < nr_irqs; i++) {
ret = irq_domain_trim_hierarchy(virq + i);
- if (ret) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret)
goto out_free_irq_data;
- }
}
-
+
for (i = 0; i < nr_irqs; i++)
irq_domain_insert_irq(virq + i);
- mutex_unlock(&irq_domain_mutex);
return virq;
@@ -1520,6 +1486,48 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
irq_free_descs(virq, nr_irqs);
return ret;
}
+
+/**
+ * __irq_domain_alloc_irqs - Allocate IRQs from domain
+ * @domain: domain to allocate from
+ * @irq_base: allocate specified IRQ number if irq_base >= 0
+ * @nr_irqs: number of IRQs to allocate
+ * @node: NUMA node id for memory allocation
+ * @arg: domain specific argument
+ * @realloc: IRQ descriptors have already been allocated if true
+ * @affinity: Optional irq affinity mask for multiqueue devices
+ *
+ * Allocate IRQ numbers and initialized all data structures to support
+ * hierarchy IRQ domains.
+ * Parameter @realloc is mainly to support legacy IRQs.
+ * Returns error code or allocated IRQ number
+ *
+ * The whole process to setup an IRQ has been split into two steps.
+ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+ * descriptor and required hardware resources. The second step,
+ * irq_domain_activate_irq(), is to program the hardware with preallocated
+ * resources. In this way, it's easier to rollback when failing to
+ * allocate resources.
+ */
+int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
+{
+ int ret;
+
+ if (domain == NULL) {
+ domain = irq_default_domain;
+ if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+ return -EINVAL;
+ }
+
+ mutex_lock(&irq_domain_mutex);
+ ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg,
+ realloc, affinity);
+ mutex_unlock(&irq_domain_mutex);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs);
/* The irq_data was moved, fix the revmap to refer to the new location */
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x d55f7f4c58c07beb5050a834bf57ae2ede599c7e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781285161731(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
d55f7f4c58c0 ("irqdomain: Refactor __irq_domain_alloc_irqs()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d55f7f4c58c07beb5050a834bf57ae2ede599c7e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Mon, 13 Feb 2023 11:42:47 +0100
Subject: [PATCH] irqdomain: Refactor __irq_domain_alloc_irqs()
Refactor __irq_domain_alloc_irqs() so that it can be called internally
while holding the irq_domain_mutex.
This will be used to fix a shared-interrupt mapping race, hence the
Fixes tag.
Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
Cc: stable(a)vger.kernel.org # 4.8
Tested-by: Hsin-Yi Wang <hsinyi(a)chromium.org>
Tested-by: Mark-PK Tsai <mark-pk.tsai(a)mediatek.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Link: https://lore.kernel.org/r/20230213104302.17307-6-johan+linaro@kernel.org
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 9f95047e4bc7..78fb4800c0d2 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1441,40 +1441,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
}
-/**
- * __irq_domain_alloc_irqs - Allocate IRQs from domain
- * @domain: domain to allocate from
- * @irq_base: allocate specified IRQ number if irq_base >= 0
- * @nr_irqs: number of IRQs to allocate
- * @node: NUMA node id for memory allocation
- * @arg: domain specific argument
- * @realloc: IRQ descriptors have already been allocated if true
- * @affinity: Optional irq affinity mask for multiqueue devices
- *
- * Allocate IRQ numbers and initialized all data structures to support
- * hierarchy IRQ domains.
- * Parameter @realloc is mainly to support legacy IRQs.
- * Returns error code or allocated IRQ number
- *
- * The whole process to setup an IRQ has been split into two steps.
- * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
- * descriptor and required hardware resources. The second step,
- * irq_domain_activate_irq(), is to program the hardware with preallocated
- * resources. In this way, it's easier to rollback when failing to
- * allocate resources.
- */
-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
- unsigned int nr_irqs, int node, void *arg,
- bool realloc, const struct irq_affinity_desc *affinity)
+static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
{
int i, ret, virq;
- if (domain == NULL) {
- domain = irq_default_domain;
- if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
- return -EINVAL;
- }
-
if (realloc && irq_base >= 0) {
virq = irq_base;
} else {
@@ -1493,24 +1465,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
goto out_free_desc;
}
- mutex_lock(&irq_domain_mutex);
ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
- if (ret < 0) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret < 0)
goto out_free_irq_data;
- }
for (i = 0; i < nr_irqs; i++) {
ret = irq_domain_trim_hierarchy(virq + i);
- if (ret) {
- mutex_unlock(&irq_domain_mutex);
+ if (ret)
goto out_free_irq_data;
- }
}
-
+
for (i = 0; i < nr_irqs; i++)
irq_domain_insert_irq(virq + i);
- mutex_unlock(&irq_domain_mutex);
return virq;
@@ -1520,6 +1486,48 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
irq_free_descs(virq, nr_irqs);
return ret;
}
+
+/**
+ * __irq_domain_alloc_irqs - Allocate IRQs from domain
+ * @domain: domain to allocate from
+ * @irq_base: allocate specified IRQ number if irq_base >= 0
+ * @nr_irqs: number of IRQs to allocate
+ * @node: NUMA node id for memory allocation
+ * @arg: domain specific argument
+ * @realloc: IRQ descriptors have already been allocated if true
+ * @affinity: Optional irq affinity mask for multiqueue devices
+ *
+ * Allocate IRQ numbers and initialized all data structures to support
+ * hierarchy IRQ domains.
+ * Parameter @realloc is mainly to support legacy IRQs.
+ * Returns error code or allocated IRQ number
+ *
+ * The whole process to setup an IRQ has been split into two steps.
+ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+ * descriptor and required hardware resources. The second step,
+ * irq_domain_activate_irq(), is to program the hardware with preallocated
+ * resources. In this way, it's easier to rollback when failing to
+ * allocate resources.
+ */
+int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+ unsigned int nr_irqs, int node, void *arg,
+ bool realloc, const struct irq_affinity_desc *affinity)
+{
+ int ret;
+
+ if (domain == NULL) {
+ domain = irq_default_domain;
+ if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+ return -EINVAL;
+ }
+
+ mutex_lock(&irq_domain_mutex);
+ ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg,
+ realloc, affinity);
+ mutex_unlock(&irq_domain_mutex);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs);
/* The irq_data was moved, fix the revmap to refer to the new location */
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 6ded703c56c21bfb259725d4f1831a5feb563e9b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812503219822(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
6ded703c56c2 ("brd: check for REQ_NOWAIT and set correct page allocation mask")
db0ccc44a20b ("brd: return 0/-error from brd_insert_page()")
ba91fd01aad2 ("block/brd: Use the enum req_op type")
3e08773c3841 ("block: switch polling to be bio based")
6ce913fe3eee ("block: rename REQ_HIPRI to REQ_POLLED")
d729cf9acb93 ("io_uring: don't sleep when polling for I/O")
ef99b2d37666 ("block: replace the spin argument to blk_iopoll with a flags argument")
28a1ae6b9dab ("blk-mq: remove blk_qc_t_valid")
efbabbe121f9 ("blk-mq: remove blk_qc_t_to_tag and blk_qc_t_is_internal")
c6699d6fe0ff ("blk-mq: factor out a "classic" poll helper")
f70299f0d58e ("blk-mq: factor out a blk_qc_to_hctx helper")
71fc3f5e2c00 ("block: don't try to poll multi-bio I/Os in __blkdev_direct_IO")
0f38d7664615 ("blk-mq: cleanup blk_mq_submit_bio")
47c122e35d7e ("block: pre-allocate requests if plug is started and is a batch")
24b83deb29b7 ("block: move struct request to blk-mq.h")
badf7f643787 ("block: move a few merge helpers out of <linux/blkdev.h>")
2e9bc3465ac5 ("block: move elevator.h to block/")
9778ac77c202 ("block: remove the struct blk_queue_ctx forward declaration")
90138237a562 ("block: remove the unused blk_queue_state enum")
cc9c884dd7f4 ("block: call submit_bio_checks under q_usage_counter")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6ded703c56c21bfb259725d4f1831a5feb563e9b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Thu, 16 Feb 2023 08:01:08 -0700
Subject: [PATCH] brd: check for REQ_NOWAIT and set correct page allocation
mask
If REQ_NOWAIT is set, then do a non-blocking allocation if the operation
is a write and we need to insert a new page. Currently REQ_NOWAIT cannot
be set as the queue isn't marked as supporting nowait, this change is in
preparation for allowing that.
radix_tree_preload() warns on attempting to call it with an allocation
mask that doesn't allow blocking. While that warning could arguably
be removed, we need to handle radix insertion failures anyway as they
are more likely if we cannot block to get memory.
Remove legacy BUG_ON()'s and turn them into proper errors instead, one
for the allocation failure and one for finding a page that doesn't
match the correct index.
Cc: stable(a)vger.kernel.org # 5.10+
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 15a148d5aad9..00f3c5b51a01 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -80,26 +80,21 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
/*
* Insert a new page for a given sector, if one does not already exist.
*/
-static int brd_insert_page(struct brd_device *brd, sector_t sector)
+static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
{
pgoff_t idx;
struct page *page;
- gfp_t gfp_flags;
+ int ret = 0;
page = brd_lookup_page(brd, sector);
if (page)
return 0;
- /*
- * Must use NOIO because we don't want to recurse back into the
- * block or filesystem layers from page reclaim.
- */
- gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM;
- page = alloc_page(gfp_flags);
+ page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
if (!page)
return -ENOMEM;
- if (radix_tree_preload(GFP_NOIO)) {
+ if (gfpflags_allow_blocking(gfp) && radix_tree_preload(gfp)) {
__free_page(page);
return -ENOMEM;
}
@@ -110,15 +105,17 @@ static int brd_insert_page(struct brd_device *brd, sector_t sector)
if (radix_tree_insert(&brd->brd_pages, idx, page)) {
__free_page(page);
page = radix_tree_lookup(&brd->brd_pages, idx);
- BUG_ON(!page);
- BUG_ON(page->index != idx);
+ if (!page)
+ ret = -ENOMEM;
+ else if (page->index != idx)
+ ret = -EIO;
} else {
brd->brd_nr_pages++;
}
spin_unlock(&brd->brd_lock);
radix_tree_preload_end();
- return 0;
+ return ret;
}
/*
@@ -167,19 +164,20 @@ static void brd_free_pages(struct brd_device *brd)
/*
* copy_to_brd_setup must be called before copy_to_brd. It may sleep.
*/
-static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
+static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
+ gfp_t gfp)
{
unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
size_t copy;
int ret;
copy = min_t(size_t, n, PAGE_SIZE - offset);
- ret = brd_insert_page(brd, sector);
+ ret = brd_insert_page(brd, sector, gfp);
if (ret)
return ret;
if (copy < n) {
sector += copy >> SECTOR_SHIFT;
- ret = brd_insert_page(brd, sector);
+ ret = brd_insert_page(brd, sector, gfp);
}
return ret;
}
@@ -254,20 +252,26 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
* Process a single bvec of a bio.
*/
static int brd_do_bvec(struct brd_device *brd, struct page *page,
- unsigned int len, unsigned int off, enum req_op op,
+ unsigned int len, unsigned int off, blk_opf_t opf,
sector_t sector)
{
void *mem;
int err = 0;
- if (op_is_write(op)) {
- err = copy_to_brd_setup(brd, sector, len);
+ if (op_is_write(opf)) {
+ /*
+ * Must use NOIO because we don't want to recurse back into the
+ * block or filesystem layers from page reclaim.
+ */
+ gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO;
+
+ err = copy_to_brd_setup(brd, sector, len, gfp);
if (err)
goto out;
}
mem = kmap_atomic(page);
- if (!op_is_write(op)) {
+ if (!op_is_write(opf)) {
copy_from_brd(mem + off, brd, sector, len);
flush_dcache_page(page);
} else {
@@ -296,8 +300,12 @@ static void brd_submit_bio(struct bio *bio)
(len & (SECTOR_SIZE - 1)));
err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
- bio_op(bio), sector);
+ bio->bi_opf, sector);
if (err) {
+ if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) {
+ bio_wouldblock_error(bio);
+ return;
+ }
bio_io_error(bio);
return;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 6ded703c56c21bfb259725d4f1831a5feb563e9b
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678125031200144(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
6ded703c56c2 ("brd: check for REQ_NOWAIT and set correct page allocation mask")
db0ccc44a20b ("brd: return 0/-error from brd_insert_page()")
ba91fd01aad2 ("block/brd: Use the enum req_op type")
3e08773c3841 ("block: switch polling to be bio based")
6ce913fe3eee ("block: rename REQ_HIPRI to REQ_POLLED")
d729cf9acb93 ("io_uring: don't sleep when polling for I/O")
ef99b2d37666 ("block: replace the spin argument to blk_iopoll with a flags argument")
28a1ae6b9dab ("blk-mq: remove blk_qc_t_valid")
efbabbe121f9 ("blk-mq: remove blk_qc_t_to_tag and blk_qc_t_is_internal")
c6699d6fe0ff ("blk-mq: factor out a "classic" poll helper")
f70299f0d58e ("blk-mq: factor out a blk_qc_to_hctx helper")
71fc3f5e2c00 ("block: don't try to poll multi-bio I/Os in __blkdev_direct_IO")
0f38d7664615 ("blk-mq: cleanup blk_mq_submit_bio")
47c122e35d7e ("block: pre-allocate requests if plug is started and is a batch")
24b83deb29b7 ("block: move struct request to blk-mq.h")
badf7f643787 ("block: move a few merge helpers out of <linux/blkdev.h>")
2e9bc3465ac5 ("block: move elevator.h to block/")
9778ac77c202 ("block: remove the struct blk_queue_ctx forward declaration")
90138237a562 ("block: remove the unused blk_queue_state enum")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 6ded703c56c21bfb259725d4f1831a5feb563e9b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Thu, 16 Feb 2023 08:01:08 -0700
Subject: [PATCH] brd: check for REQ_NOWAIT and set correct page allocation
mask
If REQ_NOWAIT is set, then do a non-blocking allocation if the operation
is a write and we need to insert a new page. Currently REQ_NOWAIT cannot
be set as the queue isn't marked as supporting nowait, this change is in
preparation for allowing that.
radix_tree_preload() warns on attempting to call it with an allocation
mask that doesn't allow blocking. While that warning could arguably
be removed, we need to handle radix insertion failures anyway as they
are more likely if we cannot block to get memory.
Remove legacy BUG_ON()'s and turn them into proper errors instead, one
for the allocation failure and one for finding a page that doesn't
match the correct index.
Cc: stable(a)vger.kernel.org # 5.10+
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 15a148d5aad9..00f3c5b51a01 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -80,26 +80,21 @@ static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
/*
* Insert a new page for a given sector, if one does not already exist.
*/
-static int brd_insert_page(struct brd_device *brd, sector_t sector)
+static int brd_insert_page(struct brd_device *brd, sector_t sector, gfp_t gfp)
{
pgoff_t idx;
struct page *page;
- gfp_t gfp_flags;
+ int ret = 0;
page = brd_lookup_page(brd, sector);
if (page)
return 0;
- /*
- * Must use NOIO because we don't want to recurse back into the
- * block or filesystem layers from page reclaim.
- */
- gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM;
- page = alloc_page(gfp_flags);
+ page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
if (!page)
return -ENOMEM;
- if (radix_tree_preload(GFP_NOIO)) {
+ if (gfpflags_allow_blocking(gfp) && radix_tree_preload(gfp)) {
__free_page(page);
return -ENOMEM;
}
@@ -110,15 +105,17 @@ static int brd_insert_page(struct brd_device *brd, sector_t sector)
if (radix_tree_insert(&brd->brd_pages, idx, page)) {
__free_page(page);
page = radix_tree_lookup(&brd->brd_pages, idx);
- BUG_ON(!page);
- BUG_ON(page->index != idx);
+ if (!page)
+ ret = -ENOMEM;
+ else if (page->index != idx)
+ ret = -EIO;
} else {
brd->brd_nr_pages++;
}
spin_unlock(&brd->brd_lock);
radix_tree_preload_end();
- return 0;
+ return ret;
}
/*
@@ -167,19 +164,20 @@ static void brd_free_pages(struct brd_device *brd)
/*
* copy_to_brd_setup must be called before copy_to_brd. It may sleep.
*/
-static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
+static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n,
+ gfp_t gfp)
{
unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
size_t copy;
int ret;
copy = min_t(size_t, n, PAGE_SIZE - offset);
- ret = brd_insert_page(brd, sector);
+ ret = brd_insert_page(brd, sector, gfp);
if (ret)
return ret;
if (copy < n) {
sector += copy >> SECTOR_SHIFT;
- ret = brd_insert_page(brd, sector);
+ ret = brd_insert_page(brd, sector, gfp);
}
return ret;
}
@@ -254,20 +252,26 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
* Process a single bvec of a bio.
*/
static int brd_do_bvec(struct brd_device *brd, struct page *page,
- unsigned int len, unsigned int off, enum req_op op,
+ unsigned int len, unsigned int off, blk_opf_t opf,
sector_t sector)
{
void *mem;
int err = 0;
- if (op_is_write(op)) {
- err = copy_to_brd_setup(brd, sector, len);
+ if (op_is_write(opf)) {
+ /*
+ * Must use NOIO because we don't want to recurse back into the
+ * block or filesystem layers from page reclaim.
+ */
+ gfp_t gfp = opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO;
+
+ err = copy_to_brd_setup(brd, sector, len, gfp);
if (err)
goto out;
}
mem = kmap_atomic(page);
- if (!op_is_write(op)) {
+ if (!op_is_write(opf)) {
copy_from_brd(mem + off, brd, sector, len);
flush_dcache_page(page);
} else {
@@ -296,8 +300,12 @@ static void brd_submit_bio(struct bio *bio)
(len & (SECTOR_SIZE - 1)));
err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
- bio_op(bio), sector);
+ bio->bi_opf, sector);
if (err) {
+ if (err == -ENOMEM && bio->bi_opf & REQ_NOWAIT) {
+ bio_wouldblock_error(bio);
+ return;
+ }
bio_io_error(bio);
return;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 67205f80be9910207481406c47f7d85e703fb2e9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812500026148(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
67205f80be99 ("brd: mark as nowait compatible")
e1528830bd4e ("block/brd: add error handling support for add_disk()")
f7bf35862477 ("brd: reduce the brd_devices_mutex scope")
7f9b348cb5e9 ("brd: convert to blk_alloc_disk/blk_cleanup_disk")
f4be591f1436 ("brd: expose number of allocated pages in debugfs")
7cc178a6b994 ("brd: use __register_blkdev to allocate devices on demand")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 67205f80be9910207481406c47f7d85e703fb2e9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Wed, 15 Feb 2023 16:43:47 -0700
Subject: [PATCH] brd: mark as nowait compatible
By default, non-mq drivers do not support nowait. This causes io_uring
to use a slower path as the driver cannot be trust not to block. brd
can safely set the nowait flag, as worst case all it does is a NOIO
allocation.
For io_uring, this makes a substantial difference. Before:
submitter=0, tid=453, file=/dev/ram0, node=-1
polled=0, fixedbufs=1/0, register_files=1, buffered=0, QD=128
Engine=io_uring, sq_ring=128, cq_ring=128
IOPS=440.03K, BW=1718MiB/s, IOS/call=32/31
IOPS=428.96K, BW=1675MiB/s, IOS/call=32/32
IOPS=442.59K, BW=1728MiB/s, IOS/call=32/31
IOPS=419.65K, BW=1639MiB/s, IOS/call=32/32
IOPS=426.82K, BW=1667MiB/s, IOS/call=32/31
and after:
submitter=0, tid=354, file=/dev/ram0, node=-1
polled=0, fixedbufs=1/0, register_files=1, buffered=0, QD=128
Engine=io_uring, sq_ring=128, cq_ring=128
IOPS=3.37M, BW=13.15GiB/s, IOS/call=32/31
IOPS=3.45M, BW=13.46GiB/s, IOS/call=32/31
IOPS=3.43M, BW=13.42GiB/s, IOS/call=32/32
IOPS=3.43M, BW=13.39GiB/s, IOS/call=32/31
IOPS=3.43M, BW=13.38GiB/s, IOS/call=32/31
or about an 8x in difference. Now that brd is prepared to deal with
REQ_NOWAIT reads/writes, mark it as supporting that.
Cc: stable(a)vger.kernel.org # 5.10+
Link: https://lore.kernel.org/linux-block/20230203103005.31290-1-p.raghav@samsung…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 00f3c5b51a01..740631dcdd0e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -418,6 +418,7 @@ static int brd_alloc(int i)
/* Tell the block layer that this is not a rotational device */
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue);
err = add_disk(disk);
if (err)
goto out_cleanup_disk;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 67205f80be9910207481406c47f7d85e703fb2e9
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812499941176(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
67205f80be99 ("brd: mark as nowait compatible")
e1528830bd4e ("block/brd: add error handling support for add_disk()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 67205f80be9910207481406c47f7d85e703fb2e9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Wed, 15 Feb 2023 16:43:47 -0700
Subject: [PATCH] brd: mark as nowait compatible
By default, non-mq drivers do not support nowait. This causes io_uring
to use a slower path as the driver cannot be trust not to block. brd
can safely set the nowait flag, as worst case all it does is a NOIO
allocation.
For io_uring, this makes a substantial difference. Before:
submitter=0, tid=453, file=/dev/ram0, node=-1
polled=0, fixedbufs=1/0, register_files=1, buffered=0, QD=128
Engine=io_uring, sq_ring=128, cq_ring=128
IOPS=440.03K, BW=1718MiB/s, IOS/call=32/31
IOPS=428.96K, BW=1675MiB/s, IOS/call=32/32
IOPS=442.59K, BW=1728MiB/s, IOS/call=32/31
IOPS=419.65K, BW=1639MiB/s, IOS/call=32/32
IOPS=426.82K, BW=1667MiB/s, IOS/call=32/31
and after:
submitter=0, tid=354, file=/dev/ram0, node=-1
polled=0, fixedbufs=1/0, register_files=1, buffered=0, QD=128
Engine=io_uring, sq_ring=128, cq_ring=128
IOPS=3.37M, BW=13.15GiB/s, IOS/call=32/31
IOPS=3.45M, BW=13.46GiB/s, IOS/call=32/31
IOPS=3.43M, BW=13.42GiB/s, IOS/call=32/32
IOPS=3.43M, BW=13.39GiB/s, IOS/call=32/31
IOPS=3.43M, BW=13.38GiB/s, IOS/call=32/31
or about an 8x in difference. Now that brd is prepared to deal with
REQ_NOWAIT reads/writes, mark it as supporting that.
Cc: stable(a)vger.kernel.org # 5.10+
Link: https://lore.kernel.org/linux-block/20230203103005.31290-1-p.raghav@samsung…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 00f3c5b51a01..740631dcdd0e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -418,6 +418,7 @@ static int brd_alloc(int i)
/* Tell the block layer that this is not a rotational device */
blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NOWAIT, disk->queue);
err = add_disk(disk);
if (err)
goto out_cleanup_disk;
Hi -stable team,
please backport the commit f1aa2eb5ea05 ("sysctl: fix proc_dobool() usability")
to the stable kernels containing commit 83efeeeb3d04 ("tty: Allow TIOCSTI to be disabled").
(Which seems only to be the 6.2 branch only at the moment)
Without this backport the sysctl dev.net.legacy_tiocsti to enable
ioctl(TIOCSTI) is not functional. So on kernels that don't enable
CONFIG_LEGACY_TIOCSTI, ioctl(TIOCSTI) is not usable at all.
This ioctl is used for the copy-and-paste functionality of the
screenreader "fenrir".
( https://github.com/chrys87/fenrir )
Reported-by: Storm Dragon <stormdragon2976(a)gmail.com>
Link: https://lore.kernel.org/lkml/ZAOi9hDBTYqoAZuI@hotmail.com/
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 26044aff37a5455b19a91785086914fd33053ef4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781236386157(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
26044aff37a5 ("x86/crash: Disable virt in core NMI crash handler to avoid double shootdown")
ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported")
4d1d0977a215 ("x86: Fix a handful of typos")
22ca7ee933a3 ("x86/apic: Provide and use helper for send_IPI_allbutself()")
6a1cb5f5c641 ("x86/apic: Add static key to Control IPI shorthands")
bdda3b93e660 ("x86/apic: Move no_ipi_broadcast() out of 32bit")
9c92374b631d ("x86/cpu: Move arch_smt_update() to a neutral place")
c94f0718fb1c ("x86/apic: Consolidate the apic local headers")
ba77b2a02e00 ("x86/apic: Move apic_flat_64 header into apic directory")
8b542da37287 ("x86/apic: Move ipi header into apic directory")
521b82fee98c ("x86/apic: Cleanup the include maze")
cdc86c9d1f82 ("x86/apic: Move IPI inlines into ipi.c")
2591bc4e8d70 ("x86/kgbd: Use NMI_VECTOR not APIC_DM_NMI")
747d5a1bf293 ("x86/reboot: Always use NMI fallback when shutdown via reboot vector IPI fails")
7e300dabb7e7 ("treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 223")
fa4bff165070 ("Merge branch 'x86-mds-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 26044aff37a5455b19a91785086914fd33053ef4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Wed, 30 Nov 2022 23:36:47 +0000
Subject: [PATCH] x86/crash: Disable virt in core NMI crash handler to avoid
double shootdown
Disable virtualization in crash_nmi_callback() and rework the
emergency_vmx_disable_all() path to do an NMI shootdown if and only if a
shootdown has not already occurred. NMI crash shootdown fundamentally
can't support multiple invocations as responding CPUs are deliberately
put into halt state without unblocking NMIs. But, the emergency reboot
path doesn't have any work of its own, it simply cares about disabling
virtualization, i.e. so long as a shootdown occurred, emergency reboot
doesn't care who initiated the shootdown, or when.
If "crash_kexec_post_notifiers" is specified on the kernel command line,
panic() will invoke crash_smp_send_stop() and result in a second call to
nmi_shootdown_cpus() during native_machine_emergency_restart().
Invoke the callback _before_ disabling virtualization, as the current
VMCS needs to be cleared before doing VMXOFF. Note, this results in a
subtle change in ordering between disabling virtualization and stopping
Intel PT on the responding CPUs. While VMX and Intel PT do interact,
VMXOFF and writes to MSR_IA32_RTIT_CTL do not induce faults between one
another, which is all that matters when panicking.
Harden nmi_shootdown_cpus() against multiple invocations to try and
capture any such kernel bugs via a WARN instead of hanging the system
during a crash/dump, e.g. prior to the recent hardening of
register_nmi_handler(), re-registering the NMI handler would trigger a
double list_add() and hang the system if CONFIG_BUG_ON_DATA_CORRUPTION=y.
list_add double add: new=ffffffff82220800, prev=ffffffff8221cfe8, next=ffffffff82220800.
WARNING: CPU: 2 PID: 1319 at lib/list_debug.c:29 __list_add_valid+0x67/0x70
Call Trace:
__register_nmi_handler+0xcf/0x130
nmi_shootdown_cpus+0x39/0x90
native_machine_emergency_restart+0x1c9/0x1d0
panic+0x237/0x29b
Extract the disabling logic to a common helper to deduplicate code, and
to prepare for doing the shootdown in the emergency reboot path if SVM
is supported.
Note, prior to commit ed72736183c4 ("x86/reboot: Force all cpus to exit
VMX root if VMX is supported"), nmi_shootdown_cpus() was subtly protected
against a second invocation by a cpu_vmx_enabled() check as the kdump
handler would disable VMX if it ran first.
Fixes: ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported")
Cc: stable(a)vger.kernel.org
Reported-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: Paolo Bonzini <pbonzini(a)redhat.com>
Link: https://lore.kernel.org/all/20220427224924.592546-2-gpiccoli@igalia.com
Tested-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Link: https://lore.kernel.org/r/20221130233650.1404148-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 04c17be9b5fd..bc5b4d788c08 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
+void cpu_emergency_disable_virtualization(void);
+
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_panic_self_stop(struct pt_regs *regs);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 305514431f26..cdd92ab43cda 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -37,7 +37,6 @@
#include <linux/kdebug.h>
#include <asm/cpu.h>
#include <asm/reboot.h>
-#include <asm/virtext.h>
#include <asm/intel_pt.h>
#include <asm/crash.h>
#include <asm/cmdline.h>
@@ -81,15 +80,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Disable VMX or SVM if needed.
- *
- * We need to disable virtualization on all CPUs.
- * Having VMX or SVM enabled on any CPU may break rebooting
- * after the kdump kernel has finished its task.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
-
/*
* Disable Intel PT to stop its logging
*/
@@ -148,12 +138,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Booting kdump kernel with VMX or SVM enabled won't work,
- * because (among other limitations) we can't disable paging
- * with the virt flags.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
+ cpu_emergency_disable_virtualization();
/*
* Disable Intel PT to stop its logging
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c3636ea4aa71..374c14b3a9bf 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -528,10 +528,7 @@ static inline void kb_wait(void)
}
}
-static void vmxoff_nmi(int cpu, struct pt_regs *regs)
-{
- cpu_emergency_vmxoff();
-}
+static inline void nmi_shootdown_cpus_on_restart(void);
/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
static void emergency_vmx_disable_all(void)
@@ -554,7 +551,7 @@ static void emergency_vmx_disable_all(void)
__cpu_emergency_vmxoff();
/* Halt and exit VMX root operation on the other CPUs. */
- nmi_shootdown_cpus(vmxoff_nmi);
+ nmi_shootdown_cpus_on_restart();
}
}
@@ -795,6 +792,17 @@ void machine_crash_shutdown(struct pt_regs *regs)
/* This is the CPU performing the emergency shutdown work. */
int crashing_cpu = -1;
+/*
+ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
+ * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
+ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
+ */
+void cpu_emergency_disable_virtualization(void)
+{
+ cpu_emergency_vmxoff();
+ cpu_emergency_svm_disable();
+}
+
#if defined(CONFIG_SMP)
static nmi_shootdown_cb shootdown_callback;
@@ -817,7 +825,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
local_irq_disable();
- shootdown_callback(cpu, regs);
+ if (shootdown_callback)
+ shootdown_callback(cpu, regs);
+
+ /*
+ * Prepare the CPU for reboot _after_ invoking the callback so that the
+ * callback can safely use virtualization instructions, e.g. VMCLEAR.
+ */
+ cpu_emergency_disable_virtualization();
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
@@ -828,18 +843,32 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
}
-/*
- * Halt all other CPUs, calling the specified function on each of them
+/**
+ * nmi_shootdown_cpus - Stop other CPUs via NMI
+ * @callback: Optional callback to be invoked from the NMI handler
+ *
+ * The NMI handler on the remote CPUs invokes @callback, if not
+ * NULL, first and then disables virtualization to ensure that
+ * INIT is recognized during reboot.
*
- * This function can be used to halt all other CPUs on crash
- * or emergency reboot time. The function passed as parameter
- * will be called inside a NMI handler on all CPUs.
+ * nmi_shootdown_cpus() can only be invoked once. After the first
+ * invocation all other CPUs are stuck in crash_nmi_callback() and
+ * cannot respond to a second NMI.
*/
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
{
unsigned long msecs;
+
local_irq_disable();
+ /*
+ * Avoid certain doom if a shootdown already occurred; re-registering
+ * the NMI handler will cause list corruption, modifying the callback
+ * will do who knows what, etc...
+ */
+ if (WARN_ON_ONCE(crash_ipi_issued))
+ return;
+
/* Make a note of crashing cpu. Will be used in NMI callback. */
crashing_cpu = safe_smp_processor_id();
@@ -867,7 +896,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
msecs--;
}
- /* Leave the nmi callback set */
+ /*
+ * Leave the nmi callback set, shootdown is a one-time thing. Clearing
+ * the callback could result in a NULL pointer dereference if a CPU
+ * (finally) responds after the timeout expires.
+ */
+}
+
+static inline void nmi_shootdown_cpus_on_restart(void)
+{
+ if (!crash_ipi_issued)
+ nmi_shootdown_cpus(NULL);
}
/*
@@ -897,6 +936,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
/* No other CPUs to shoot down */
}
+static inline void nmi_shootdown_cpus_on_restart(void) { }
+
void run_crash_ipi_callback(struct pt_regs *regs)
{
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 26044aff37a5455b19a91785086914fd33053ef4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678123636156122(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
26044aff37a5 ("x86/crash: Disable virt in core NMI crash handler to avoid double shootdown")
ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported")
4d1d0977a215 ("x86: Fix a handful of typos")
22ca7ee933a3 ("x86/apic: Provide and use helper for send_IPI_allbutself()")
6a1cb5f5c641 ("x86/apic: Add static key to Control IPI shorthands")
bdda3b93e660 ("x86/apic: Move no_ipi_broadcast() out of 32bit")
9c92374b631d ("x86/cpu: Move arch_smt_update() to a neutral place")
c94f0718fb1c ("x86/apic: Consolidate the apic local headers")
ba77b2a02e00 ("x86/apic: Move apic_flat_64 header into apic directory")
8b542da37287 ("x86/apic: Move ipi header into apic directory")
521b82fee98c ("x86/apic: Cleanup the include maze")
cdc86c9d1f82 ("x86/apic: Move IPI inlines into ipi.c")
2591bc4e8d70 ("x86/kgbd: Use NMI_VECTOR not APIC_DM_NMI")
747d5a1bf293 ("x86/reboot: Always use NMI fallback when shutdown via reboot vector IPI fails")
7e300dabb7e7 ("treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 223")
fa4bff165070 ("Merge branch 'x86-mds-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 26044aff37a5455b19a91785086914fd33053ef4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Wed, 30 Nov 2022 23:36:47 +0000
Subject: [PATCH] x86/crash: Disable virt in core NMI crash handler to avoid
double shootdown
Disable virtualization in crash_nmi_callback() and rework the
emergency_vmx_disable_all() path to do an NMI shootdown if and only if a
shootdown has not already occurred. NMI crash shootdown fundamentally
can't support multiple invocations as responding CPUs are deliberately
put into halt state without unblocking NMIs. But, the emergency reboot
path doesn't have any work of its own, it simply cares about disabling
virtualization, i.e. so long as a shootdown occurred, emergency reboot
doesn't care who initiated the shootdown, or when.
If "crash_kexec_post_notifiers" is specified on the kernel command line,
panic() will invoke crash_smp_send_stop() and result in a second call to
nmi_shootdown_cpus() during native_machine_emergency_restart().
Invoke the callback _before_ disabling virtualization, as the current
VMCS needs to be cleared before doing VMXOFF. Note, this results in a
subtle change in ordering between disabling virtualization and stopping
Intel PT on the responding CPUs. While VMX and Intel PT do interact,
VMXOFF and writes to MSR_IA32_RTIT_CTL do not induce faults between one
another, which is all that matters when panicking.
Harden nmi_shootdown_cpus() against multiple invocations to try and
capture any such kernel bugs via a WARN instead of hanging the system
during a crash/dump, e.g. prior to the recent hardening of
register_nmi_handler(), re-registering the NMI handler would trigger a
double list_add() and hang the system if CONFIG_BUG_ON_DATA_CORRUPTION=y.
list_add double add: new=ffffffff82220800, prev=ffffffff8221cfe8, next=ffffffff82220800.
WARNING: CPU: 2 PID: 1319 at lib/list_debug.c:29 __list_add_valid+0x67/0x70
Call Trace:
__register_nmi_handler+0xcf/0x130
nmi_shootdown_cpus+0x39/0x90
native_machine_emergency_restart+0x1c9/0x1d0
panic+0x237/0x29b
Extract the disabling logic to a common helper to deduplicate code, and
to prepare for doing the shootdown in the emergency reboot path if SVM
is supported.
Note, prior to commit ed72736183c4 ("x86/reboot: Force all cpus to exit
VMX root if VMX is supported"), nmi_shootdown_cpus() was subtly protected
against a second invocation by a cpu_vmx_enabled() check as the kdump
handler would disable VMX if it ran first.
Fixes: ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported")
Cc: stable(a)vger.kernel.org
Reported-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: Paolo Bonzini <pbonzini(a)redhat.com>
Link: https://lore.kernel.org/all/20220427224924.592546-2-gpiccoli@igalia.com
Tested-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Link: https://lore.kernel.org/r/20221130233650.1404148-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 04c17be9b5fd..bc5b4d788c08 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
+void cpu_emergency_disable_virtualization(void);
+
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_panic_self_stop(struct pt_regs *regs);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 305514431f26..cdd92ab43cda 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -37,7 +37,6 @@
#include <linux/kdebug.h>
#include <asm/cpu.h>
#include <asm/reboot.h>
-#include <asm/virtext.h>
#include <asm/intel_pt.h>
#include <asm/crash.h>
#include <asm/cmdline.h>
@@ -81,15 +80,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Disable VMX or SVM if needed.
- *
- * We need to disable virtualization on all CPUs.
- * Having VMX or SVM enabled on any CPU may break rebooting
- * after the kdump kernel has finished its task.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
-
/*
* Disable Intel PT to stop its logging
*/
@@ -148,12 +138,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Booting kdump kernel with VMX or SVM enabled won't work,
- * because (among other limitations) we can't disable paging
- * with the virt flags.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
+ cpu_emergency_disable_virtualization();
/*
* Disable Intel PT to stop its logging
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c3636ea4aa71..374c14b3a9bf 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -528,10 +528,7 @@ static inline void kb_wait(void)
}
}
-static void vmxoff_nmi(int cpu, struct pt_regs *regs)
-{
- cpu_emergency_vmxoff();
-}
+static inline void nmi_shootdown_cpus_on_restart(void);
/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
static void emergency_vmx_disable_all(void)
@@ -554,7 +551,7 @@ static void emergency_vmx_disable_all(void)
__cpu_emergency_vmxoff();
/* Halt and exit VMX root operation on the other CPUs. */
- nmi_shootdown_cpus(vmxoff_nmi);
+ nmi_shootdown_cpus_on_restart();
}
}
@@ -795,6 +792,17 @@ void machine_crash_shutdown(struct pt_regs *regs)
/* This is the CPU performing the emergency shutdown work. */
int crashing_cpu = -1;
+/*
+ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
+ * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
+ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
+ */
+void cpu_emergency_disable_virtualization(void)
+{
+ cpu_emergency_vmxoff();
+ cpu_emergency_svm_disable();
+}
+
#if defined(CONFIG_SMP)
static nmi_shootdown_cb shootdown_callback;
@@ -817,7 +825,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
local_irq_disable();
- shootdown_callback(cpu, regs);
+ if (shootdown_callback)
+ shootdown_callback(cpu, regs);
+
+ /*
+ * Prepare the CPU for reboot _after_ invoking the callback so that the
+ * callback can safely use virtualization instructions, e.g. VMCLEAR.
+ */
+ cpu_emergency_disable_virtualization();
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
@@ -828,18 +843,32 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
}
-/*
- * Halt all other CPUs, calling the specified function on each of them
+/**
+ * nmi_shootdown_cpus - Stop other CPUs via NMI
+ * @callback: Optional callback to be invoked from the NMI handler
+ *
+ * The NMI handler on the remote CPUs invokes @callback, if not
+ * NULL, first and then disables virtualization to ensure that
+ * INIT is recognized during reboot.
*
- * This function can be used to halt all other CPUs on crash
- * or emergency reboot time. The function passed as parameter
- * will be called inside a NMI handler on all CPUs.
+ * nmi_shootdown_cpus() can only be invoked once. After the first
+ * invocation all other CPUs are stuck in crash_nmi_callback() and
+ * cannot respond to a second NMI.
*/
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
{
unsigned long msecs;
+
local_irq_disable();
+ /*
+ * Avoid certain doom if a shootdown already occurred; re-registering
+ * the NMI handler will cause list corruption, modifying the callback
+ * will do who knows what, etc...
+ */
+ if (WARN_ON_ONCE(crash_ipi_issued))
+ return;
+
/* Make a note of crashing cpu. Will be used in NMI callback. */
crashing_cpu = safe_smp_processor_id();
@@ -867,7 +896,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
msecs--;
}
- /* Leave the nmi callback set */
+ /*
+ * Leave the nmi callback set, shootdown is a one-time thing. Clearing
+ * the callback could result in a NULL pointer dereference if a CPU
+ * (finally) responds after the timeout expires.
+ */
+}
+
+static inline void nmi_shootdown_cpus_on_restart(void)
+{
+ if (!crash_ipi_issued)
+ nmi_shootdown_cpus(NULL);
}
/*
@@ -897,6 +936,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
/* No other CPUs to shoot down */
}
+static inline void nmi_shootdown_cpus_on_restart(void) { }
+
void run_crash_ipi_callback(struct pt_regs *regs)
{
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 26044aff37a5455b19a91785086914fd33053ef4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678123634056(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
26044aff37a5 ("x86/crash: Disable virt in core NMI crash handler to avoid double shootdown")
ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported")
4d1d0977a215 ("x86: Fix a handful of typos")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 26044aff37a5455b19a91785086914fd33053ef4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Wed, 30 Nov 2022 23:36:47 +0000
Subject: [PATCH] x86/crash: Disable virt in core NMI crash handler to avoid
double shootdown
Disable virtualization in crash_nmi_callback() and rework the
emergency_vmx_disable_all() path to do an NMI shootdown if and only if a
shootdown has not already occurred. NMI crash shootdown fundamentally
can't support multiple invocations as responding CPUs are deliberately
put into halt state without unblocking NMIs. But, the emergency reboot
path doesn't have any work of its own, it simply cares about disabling
virtualization, i.e. so long as a shootdown occurred, emergency reboot
doesn't care who initiated the shootdown, or when.
If "crash_kexec_post_notifiers" is specified on the kernel command line,
panic() will invoke crash_smp_send_stop() and result in a second call to
nmi_shootdown_cpus() during native_machine_emergency_restart().
Invoke the callback _before_ disabling virtualization, as the current
VMCS needs to be cleared before doing VMXOFF. Note, this results in a
subtle change in ordering between disabling virtualization and stopping
Intel PT on the responding CPUs. While VMX and Intel PT do interact,
VMXOFF and writes to MSR_IA32_RTIT_CTL do not induce faults between one
another, which is all that matters when panicking.
Harden nmi_shootdown_cpus() against multiple invocations to try and
capture any such kernel bugs via a WARN instead of hanging the system
during a crash/dump, e.g. prior to the recent hardening of
register_nmi_handler(), re-registering the NMI handler would trigger a
double list_add() and hang the system if CONFIG_BUG_ON_DATA_CORRUPTION=y.
list_add double add: new=ffffffff82220800, prev=ffffffff8221cfe8, next=ffffffff82220800.
WARNING: CPU: 2 PID: 1319 at lib/list_debug.c:29 __list_add_valid+0x67/0x70
Call Trace:
__register_nmi_handler+0xcf/0x130
nmi_shootdown_cpus+0x39/0x90
native_machine_emergency_restart+0x1c9/0x1d0
panic+0x237/0x29b
Extract the disabling logic to a common helper to deduplicate code, and
to prepare for doing the shootdown in the emergency reboot path if SVM
is supported.
Note, prior to commit ed72736183c4 ("x86/reboot: Force all cpus to exit
VMX root if VMX is supported"), nmi_shootdown_cpus() was subtly protected
against a second invocation by a cpu_vmx_enabled() check as the kdump
handler would disable VMX if it ran first.
Fixes: ed72736183c4 ("x86/reboot: Force all cpus to exit VMX root if VMX is supported")
Cc: stable(a)vger.kernel.org
Reported-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Cc: Vitaly Kuznetsov <vkuznets(a)redhat.com>
Cc: Paolo Bonzini <pbonzini(a)redhat.com>
Link: https://lore.kernel.org/all/20220427224924.592546-2-gpiccoli@igalia.com
Tested-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Link: https://lore.kernel.org/r/20221130233650.1404148-2-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 04c17be9b5fd..bc5b4d788c08 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,6 +25,8 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
+void cpu_emergency_disable_virtualization(void);
+
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_panic_self_stop(struct pt_regs *regs);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 305514431f26..cdd92ab43cda 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -37,7 +37,6 @@
#include <linux/kdebug.h>
#include <asm/cpu.h>
#include <asm/reboot.h>
-#include <asm/virtext.h>
#include <asm/intel_pt.h>
#include <asm/crash.h>
#include <asm/cmdline.h>
@@ -81,15 +80,6 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Disable VMX or SVM if needed.
- *
- * We need to disable virtualization on all CPUs.
- * Having VMX or SVM enabled on any CPU may break rebooting
- * after the kdump kernel has finished its task.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
-
/*
* Disable Intel PT to stop its logging
*/
@@ -148,12 +138,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
*/
cpu_crash_vmclear_loaded_vmcss();
- /* Booting kdump kernel with VMX or SVM enabled won't work,
- * because (among other limitations) we can't disable paging
- * with the virt flags.
- */
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
+ cpu_emergency_disable_virtualization();
/*
* Disable Intel PT to stop its logging
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c3636ea4aa71..374c14b3a9bf 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -528,10 +528,7 @@ static inline void kb_wait(void)
}
}
-static void vmxoff_nmi(int cpu, struct pt_regs *regs)
-{
- cpu_emergency_vmxoff();
-}
+static inline void nmi_shootdown_cpus_on_restart(void);
/* Use NMIs as IPIs to tell all CPUs to disable virtualization */
static void emergency_vmx_disable_all(void)
@@ -554,7 +551,7 @@ static void emergency_vmx_disable_all(void)
__cpu_emergency_vmxoff();
/* Halt and exit VMX root operation on the other CPUs. */
- nmi_shootdown_cpus(vmxoff_nmi);
+ nmi_shootdown_cpus_on_restart();
}
}
@@ -795,6 +792,17 @@ void machine_crash_shutdown(struct pt_regs *regs)
/* This is the CPU performing the emergency shutdown work. */
int crashing_cpu = -1;
+/*
+ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
+ * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
+ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
+ */
+void cpu_emergency_disable_virtualization(void)
+{
+ cpu_emergency_vmxoff();
+ cpu_emergency_svm_disable();
+}
+
#if defined(CONFIG_SMP)
static nmi_shootdown_cb shootdown_callback;
@@ -817,7 +825,14 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
local_irq_disable();
- shootdown_callback(cpu, regs);
+ if (shootdown_callback)
+ shootdown_callback(cpu, regs);
+
+ /*
+ * Prepare the CPU for reboot _after_ invoking the callback so that the
+ * callback can safely use virtualization instructions, e.g. VMCLEAR.
+ */
+ cpu_emergency_disable_virtualization();
atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
@@ -828,18 +843,32 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
return NMI_HANDLED;
}
-/*
- * Halt all other CPUs, calling the specified function on each of them
+/**
+ * nmi_shootdown_cpus - Stop other CPUs via NMI
+ * @callback: Optional callback to be invoked from the NMI handler
+ *
+ * The NMI handler on the remote CPUs invokes @callback, if not
+ * NULL, first and then disables virtualization to ensure that
+ * INIT is recognized during reboot.
*
- * This function can be used to halt all other CPUs on crash
- * or emergency reboot time. The function passed as parameter
- * will be called inside a NMI handler on all CPUs.
+ * nmi_shootdown_cpus() can only be invoked once. After the first
+ * invocation all other CPUs are stuck in crash_nmi_callback() and
+ * cannot respond to a second NMI.
*/
void nmi_shootdown_cpus(nmi_shootdown_cb callback)
{
unsigned long msecs;
+
local_irq_disable();
+ /*
+ * Avoid certain doom if a shootdown already occurred; re-registering
+ * the NMI handler will cause list corruption, modifying the callback
+ * will do who knows what, etc...
+ */
+ if (WARN_ON_ONCE(crash_ipi_issued))
+ return;
+
/* Make a note of crashing cpu. Will be used in NMI callback. */
crashing_cpu = safe_smp_processor_id();
@@ -867,7 +896,17 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
msecs--;
}
- /* Leave the nmi callback set */
+ /*
+ * Leave the nmi callback set, shootdown is a one-time thing. Clearing
+ * the callback could result in a NULL pointer dereference if a CPU
+ * (finally) responds after the timeout expires.
+ */
+}
+
+static inline void nmi_shootdown_cpus_on_restart(void)
+{
+ if (!crash_ipi_issued)
+ nmi_shootdown_cpus(NULL);
}
/*
@@ -897,6 +936,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
/* No other CPUs to shoot down */
}
+static inline void nmi_shootdown_cpus_on_restart(void) { }
+
void run_crash_ipi_callback(struct pt_regs *regs)
{
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x f2d3155e2a6bac44d16f04415a321e8707d895c6
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167810000636131(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
f2d3155e2a6b ("KVM: s390: disable migration mode when dirty tracking is disabled")
ec5c86976674 ("KVM: s390: Skip gfn/size sanity checks on memslot DELETE or FLAGS_ONLY")
cf5b486922dc ("KVM: s390: Use "new" memslot instead of userspace memory region")
106ee47dc633 ("docs: kvm: Convert api.txt to ReST format")
6c972ba685d5 ("docs: kvm: convert devices/vm.txt to ReST")
aff7aeea5483 ("docs: kvm: convert devices/vfio.txt to ReST")
e777a5bd98c6 ("docs: kvm: convert devices/vcpu.txt to ReST")
e94474300361 ("docs: kvm: convert devices/s390_flic.txt to ReST")
05c47036c62e ("docs: kvm: convert devices/mpic.txt to ReST")
c0d1c8a0af59 ("docs: kvm: devices/arm-vgit-v3.txt to ReST")
d371c011fc5e ("docs: kvm: devices/arm-vgic-its.txt to ReST format")
7bd460fc1dfa ("docs: kvm: add arm/pvtime.rst to index.rst")
290a6bb06de9 ("arm64: KVM: Add UAPI notes for swapped registers")
22945688acd4 ("KVM: PPC: Book3S HV: Support reset of secure guest")
008e359c76d8 ("KVM: PPC: Book3S HV: Radix changes for secure guest")
60f0a643aa44 ("KVM: PPC: Book3S HV: Shared pages support for secure guests")
ca9f4942670c ("KVM: PPC: Book3S HV: Support for running secure guests")
a4b28f5c6798 ("Merge remote-tracking branch 'kvmarm/kvm-arm64/stolen-time' into kvmarm-master/next")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f2d3155e2a6bac44d16f04415a321e8707d895c6 Mon Sep 17 00:00:00 2001
From: Nico Boehr <nrb(a)linux.ibm.com>
Date: Fri, 27 Jan 2023 15:05:32 +0100
Subject: [PATCH] KVM: s390: disable migration mode when dirty tracking is
disabled
Migration mode is a VM attribute which enables tracking of changes in
storage attributes (PGSTE). It assumes dirty tracking is enabled on all
memslots to keep a dirty bitmap of pages with changed storage attributes.
When enabling migration mode, we currently check that dirty tracking is
enabled for all memslots. However, userspace can disable dirty tracking
without disabling migration mode.
Since migration mode is pointless with dirty tracking disabled, disable
migration mode whenever userspace disables dirty tracking on any slot.
Also update the documentation to clarify that dirty tracking must be
enabled when enabling migration mode, which is already enforced by the
code in kvm_s390_vm_start_migration().
Also highlight in the documentation for KVM_S390_GET_CMMA_BITS that it
can now fail with -EINVAL when dirty tracking is disabled while
migration mode is on. Move all the error codes to a table so this stays
readable.
To disable migration mode, slots_lock should be held, which is taken
in kvm_set_memory_region() and thus held in
kvm_arch_prepare_memory_region().
Restructure the prepare code a bit so all the sanity checking is done
before disabling migration mode. This ensures migration mode isn't
disabled when some sanity check fails.
Cc: stable(a)vger.kernel.org
Fixes: 190df4a212a7 ("KVM: s390: CMMA tracking, ESSA emulation, migration mode")
Signed-off-by: Nico Boehr <nrb(a)linux.ibm.com>
Reviewed-by: Janosch Frank <frankja(a)linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda(a)linux.ibm.com>
Link: https://lore.kernel.org/r/20230127140532.230651-2-nrb@linux.ibm.com
Message-Id: <20230127140532.230651-2-nrb(a)linux.ibm.com>
[frankja(a)linux.ibm.com: fixed commit message typo, moved api.rst error table upwards]
Signed-off-by: Janosch Frank <frankja(a)linux.ibm.com>
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index deb494f759ed..8cd7fd05d53b 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4449,6 +4449,18 @@ not holding a previously reported uncorrected error).
:Parameters: struct kvm_s390_cmma_log (in, out)
:Returns: 0 on success, a negative value on error
+Errors:
+
+ ====== =============================================================
+ ENOMEM not enough memory can be allocated to complete the task
+ ENXIO if CMMA is not enabled
+ EINVAL if KVM_S390_CMMA_PEEK is not set but migration mode was not enabled
+ EINVAL if KVM_S390_CMMA_PEEK is not set but dirty tracking has been
+ disabled (and thus migration mode was automatically disabled)
+ EFAULT if the userspace address is invalid or if no page table is
+ present for the addresses (e.g. when using hugepages).
+ ====== =============================================================
+
This ioctl is used to get the values of the CMMA bits on the s390
architecture. It is meant to be used in two scenarios:
@@ -4529,12 +4541,6 @@ mask is unused.
values points to the userspace buffer where the result will be stored.
-This ioctl can fail with -ENOMEM if not enough memory can be allocated to
-complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if
-KVM_S390_CMMA_PEEK is not set but migration mode was not enabled, with
--EFAULT if the userspace address is invalid or if no page table is
-present for the addresses (e.g. when using hugepages).
-
4.108 KVM_S390_SET_CMMA_BITS
----------------------------
diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst
index 60acc39e0e93..147efec626e5 100644
--- a/Documentation/virt/kvm/devices/vm.rst
+++ b/Documentation/virt/kvm/devices/vm.rst
@@ -302,6 +302,10 @@ Allows userspace to start migration mode, needed for PGSTE migration.
Setting this attribute when migration mode is already active will have
no effects.
+Dirty tracking must be enabled on all memslots, else -EINVAL is returned. When
+dirty tracking is disabled on any memslot, migration mode is automatically
+stopped.
+
:Parameters: none
:Returns: -ENOMEM if there is not enough free memory to start migration mode;
-EINVAL if the state of the VM is invalid (e.g. no memory defined);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e4890e04b210..cb72f9a09fb3 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -5633,23 +5633,40 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (kvm_s390_pv_get_handle(kvm))
return -EINVAL;
- if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
- return 0;
+ if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) {
+ /*
+ * A few sanity checks. We can have memory slots which have to be
+ * located/ended at a segment boundary (1MB). The memory in userland is
+ * ok to be fragmented into various different vmas. It is okay to mmap()
+ * and munmap() stuff in this slot after doing this call at any time
+ */
- /* A few sanity checks. We can have memory slots which have to be
- located/ended at a segment boundary (1MB). The memory in userland is
- ok to be fragmented into various different vmas. It is okay to mmap()
- and munmap() stuff in this slot after doing this call at any time */
+ if (new->userspace_addr & 0xffffful)
+ return -EINVAL;
- if (new->userspace_addr & 0xffffful)
- return -EINVAL;
+ size = new->npages * PAGE_SIZE;
+ if (size & 0xffffful)
+ return -EINVAL;
- size = new->npages * PAGE_SIZE;
- if (size & 0xffffful)
- return -EINVAL;
+ if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
+ return -EINVAL;
+ }
- if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
- return -EINVAL;
+ if (!kvm->arch.migration_mode)
+ return 0;
+
+ /*
+ * Turn off migration mode when:
+ * - userspace creates a new memslot with dirty logging off,
+ * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
+ * dirty logging is turned off.
+ * Migration mode expects dirty page logging being enabled to store
+ * its dirty bitmap.
+ */
+ if (change != KVM_MR_DELETE &&
+ !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+ WARN(kvm_s390_vm_stop_migration(kvm),
+ "Failed to stop migration mode");
return 0;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x ab52be1b310bcb39e6745d34a8f0e8475d67381a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812345579100(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
ab52be1b310b ("KVM: x86: Inject #GP on x2APIC WRMSR that sets reserved bits 63:32")
a57a31684d7b ("KVM: x86: Treat x2APIC's ICR as a 64-bit register, not two 32-bit regs")
5429478d038f ("KVM: x86: Add helpers to handle 64-bit APIC MSR read/writes")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ab52be1b310bcb39e6745d34a8f0e8475d67381a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Sat, 7 Jan 2023 01:10:21 +0000
Subject: [PATCH] KVM: x86: Inject #GP on x2APIC WRMSR that sets reserved bits
63:32
Reject attempts to set bits 63:32 for 32-bit x2APIC registers, i.e. all
x2APIC registers except ICR. Per Intel's SDM:
Non-zero writes (by WRMSR instruction) to reserved bits to these
registers will raise a general protection fault exception
Opportunistically fix a typo in a nearby comment.
Reported-by: Marc Orr <marcorr(a)google.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Maxim Levitsky <mlevitsk(a)redhat.com>
Link: https://lore.kernel.org/r/20230107011025.565472-3-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9aca006b2d22..814b65106057 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -3114,13 +3114,17 @@ static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data)
{
/*
- * ICR is a 64-bit register in x2APIC mode (and Hyper'v PV vAPIC) and
+ * ICR is a 64-bit register in x2APIC mode (and Hyper-V PV vAPIC) and
* can be written as such, all other registers remain accessible only
* through 32-bit reads/writes.
*/
if (reg == APIC_ICR)
return kvm_x2apic_icr_write(apic, data);
+ /* Bits 63:32 are reserved in all other registers. */
+ if (data >> 32)
+ return 1;
+
return kvm_lapic_reg_write(apic, reg, (u32)data);
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 5aede752a839904059c2b5d68be0dc4501c6c15f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678123417242209(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
5aede752a839 ("KVM: SVM: Process ICR on AVIC IPI delivery failure due to invalid target")
b51818afdc1d ("KVM: SVM: Don't rewrite guest ICR on AVIC IPI virtualization failure")
dd4589eee99d ("Revert "svm: Add warning message for AVIC IPI invalid target"")
63129754178c ("KVM: SVM: Pass struct kvm_vcpu to exit handlers (and many, many other places)")
2a32a77cefa6 ("KVM: SVM: merge update_cr0_intercept into svm_set_cr0")
11f0cbf0c605 ("KVM: nSVM: Trace VM-Enter consistency check failures")
6906e06db9b0 ("KVM: nSVM: Add missing checks for reserved bits to svm_set_nested_state()")
c08f390a75c1 ("KVM: nSVM: only copy L1 non-VMLOAD/VMSAVE data in svm_set_nested_state()")
9e8f0fbfff1a ("KVM: nSVM: rename functions and variables according to vmcbXY nomenclature")
193015adf40d ("KVM: nSVM: Track the ASID generation of the vmcb vmrun through the vmcb")
af18fa775d07 ("KVM: nSVM: Track the physical cpu of the vmcb vmrun through the vmcb")
4995a3685f1b ("KVM: SVM: Use a separate vmcb for the nested L2 guest")
6d1b867d0456 ("KVM: SVM: Don't strip the C-bit from CR2 on #PF interception")
43c11d91fb1e ("KVM: x86: to track if L1 is running L2 VM")
9e46f6c6c959 ("KVM: SVM: Clear the CR4 register on reset")
2df8d3807ce7 ("KVM: SVM: Fix nested VM-Exit on #GP interception handling")
d2df592fd8c6 ("KVM: nSVM: prepare guest save area while is_guest_mode is true")
a04aead144fd ("KVM: nSVM: fix running nested guests when npt=0")
996ff5429e98 ("KVM: x86: move kvm_inject_gp up from kvm_set_dr to callers")
e6c804a848d6 ("KVM: SVM: Move AVIC vCPU kicking snippet to helper function")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5aede752a839904059c2b5d68be0dc4501c6c15f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 6 Jan 2023 01:12:37 +0000
Subject: [PATCH] KVM: SVM: Process ICR on AVIC IPI delivery failure due to
invalid target
Emulate ICR writes on AVIC IPI failures due to invalid targets using the
same logic as failures due to invalid types. AVIC acceleration fails if
_any_ of the targets are invalid, and crucially VM-Exits before sending
IPIs to targets that _are_ valid. In logical mode, the destination is a
bitmap, i.e. a single IPI can target multiple logical IDs. Doing nothing
causes KVM to drop IPIs if at least one target is valid and at least one
target is invalid.
Fixes: 18f40c53e10f ("svm: Add VMEXIT handlers for AVIC")
Cc: stable(a)vger.kernel.org
Reviewed-by: Paolo Bonzini <pbonzini(a)redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk(a)redhat.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20230106011306.85230-5-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 712330b80891..3b2c88b168ba 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -502,14 +502,18 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
switch (id) {
+ case AVIC_IPI_FAILURE_INVALID_TARGET:
case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
/*
* Emulate IPIs that are not handled by AVIC hardware, which
- * only virtualizes Fixed, Edge-Triggered INTRs. The exit is
- * a trap, e.g. ICR holds the correct value and RIP has been
- * advanced, KVM is responsible only for emulating the IPI.
- * Sadly, hardware may sometimes leave the BUSY flag set, in
- * which case KVM needs to emulate the ICR write as well in
+ * only virtualizes Fixed, Edge-Triggered INTRs, and falls over
+ * if _any_ targets are invalid, e.g. if the logical mode mask
+ * is a superset of running vCPUs.
+ *
+ * The exit is a trap, e.g. ICR holds the correct value and RIP
+ * has been advanced, KVM is responsible only for emulating the
+ * IPI. Sadly, hardware may sometimes leave the BUSY flag set,
+ * in which case KVM needs to emulate the ICR write as well in
* order to clear the BUSY flag.
*/
if (icrl & APIC_ICR_BUSY)
@@ -525,8 +529,6 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
*/
avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index);
break;
- case AVIC_IPI_FAILURE_INVALID_TARGET:
- break;
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
break;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 5aede752a839904059c2b5d68be0dc4501c6c15f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812341610168(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
5aede752a839 ("KVM: SVM: Process ICR on AVIC IPI delivery failure due to invalid target")
b51818afdc1d ("KVM: SVM: Don't rewrite guest ICR on AVIC IPI virtualization failure")
dd4589eee99d ("Revert "svm: Add warning message for AVIC IPI invalid target"")
63129754178c ("KVM: SVM: Pass struct kvm_vcpu to exit handlers (and many, many other places)")
2a32a77cefa6 ("KVM: SVM: merge update_cr0_intercept into svm_set_cr0")
11f0cbf0c605 ("KVM: nSVM: Trace VM-Enter consistency check failures")
6906e06db9b0 ("KVM: nSVM: Add missing checks for reserved bits to svm_set_nested_state()")
c08f390a75c1 ("KVM: nSVM: only copy L1 non-VMLOAD/VMSAVE data in svm_set_nested_state()")
9e8f0fbfff1a ("KVM: nSVM: rename functions and variables according to vmcbXY nomenclature")
193015adf40d ("KVM: nSVM: Track the ASID generation of the vmcb vmrun through the vmcb")
af18fa775d07 ("KVM: nSVM: Track the physical cpu of the vmcb vmrun through the vmcb")
4995a3685f1b ("KVM: SVM: Use a separate vmcb for the nested L2 guest")
6d1b867d0456 ("KVM: SVM: Don't strip the C-bit from CR2 on #PF interception")
43c11d91fb1e ("KVM: x86: to track if L1 is running L2 VM")
9e46f6c6c959 ("KVM: SVM: Clear the CR4 register on reset")
2df8d3807ce7 ("KVM: SVM: Fix nested VM-Exit on #GP interception handling")
d2df592fd8c6 ("KVM: nSVM: prepare guest save area while is_guest_mode is true")
a04aead144fd ("KVM: nSVM: fix running nested guests when npt=0")
996ff5429e98 ("KVM: x86: move kvm_inject_gp up from kvm_set_dr to callers")
e6c804a848d6 ("KVM: SVM: Move AVIC vCPU kicking snippet to helper function")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5aede752a839904059c2b5d68be0dc4501c6c15f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 6 Jan 2023 01:12:37 +0000
Subject: [PATCH] KVM: SVM: Process ICR on AVIC IPI delivery failure due to
invalid target
Emulate ICR writes on AVIC IPI failures due to invalid targets using the
same logic as failures due to invalid types. AVIC acceleration fails if
_any_ of the targets are invalid, and crucially VM-Exits before sending
IPIs to targets that _are_ valid. In logical mode, the destination is a
bitmap, i.e. a single IPI can target multiple logical IDs. Doing nothing
causes KVM to drop IPIs if at least one target is valid and at least one
target is invalid.
Fixes: 18f40c53e10f ("svm: Add VMEXIT handlers for AVIC")
Cc: stable(a)vger.kernel.org
Reviewed-by: Paolo Bonzini <pbonzini(a)redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk(a)redhat.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20230106011306.85230-5-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 712330b80891..3b2c88b168ba 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -502,14 +502,18 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
switch (id) {
+ case AVIC_IPI_FAILURE_INVALID_TARGET:
case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
/*
* Emulate IPIs that are not handled by AVIC hardware, which
- * only virtualizes Fixed, Edge-Triggered INTRs. The exit is
- * a trap, e.g. ICR holds the correct value and RIP has been
- * advanced, KVM is responsible only for emulating the IPI.
- * Sadly, hardware may sometimes leave the BUSY flag set, in
- * which case KVM needs to emulate the ICR write as well in
+ * only virtualizes Fixed, Edge-Triggered INTRs, and falls over
+ * if _any_ targets are invalid, e.g. if the logical mode mask
+ * is a superset of running vCPUs.
+ *
+ * The exit is a trap, e.g. ICR holds the correct value and RIP
+ * has been advanced, KVM is responsible only for emulating the
+ * IPI. Sadly, hardware may sometimes leave the BUSY flag set,
+ * in which case KVM needs to emulate the ICR write as well in
* order to clear the BUSY flag.
*/
if (icrl & APIC_ICR_BUSY)
@@ -525,8 +529,6 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
*/
avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index);
break;
- case AVIC_IPI_FAILURE_INVALID_TARGET:
- break;
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
break;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 5aede752a839904059c2b5d68be0dc4501c6c15f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678123415238203(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
5aede752a839 ("KVM: SVM: Process ICR on AVIC IPI delivery failure due to invalid target")
b51818afdc1d ("KVM: SVM: Don't rewrite guest ICR on AVIC IPI virtualization failure")
dd4589eee99d ("Revert "svm: Add warning message for AVIC IPI invalid target"")
63129754178c ("KVM: SVM: Pass struct kvm_vcpu to exit handlers (and many, many other places)")
2a32a77cefa6 ("KVM: SVM: merge update_cr0_intercept into svm_set_cr0")
11f0cbf0c605 ("KVM: nSVM: Trace VM-Enter consistency check failures")
6906e06db9b0 ("KVM: nSVM: Add missing checks for reserved bits to svm_set_nested_state()")
c08f390a75c1 ("KVM: nSVM: only copy L1 non-VMLOAD/VMSAVE data in svm_set_nested_state()")
9e8f0fbfff1a ("KVM: nSVM: rename functions and variables according to vmcbXY nomenclature")
193015adf40d ("KVM: nSVM: Track the ASID generation of the vmcb vmrun through the vmcb")
af18fa775d07 ("KVM: nSVM: Track the physical cpu of the vmcb vmrun through the vmcb")
4995a3685f1b ("KVM: SVM: Use a separate vmcb for the nested L2 guest")
6d1b867d0456 ("KVM: SVM: Don't strip the C-bit from CR2 on #PF interception")
43c11d91fb1e ("KVM: x86: to track if L1 is running L2 VM")
9e46f6c6c959 ("KVM: SVM: Clear the CR4 register on reset")
2df8d3807ce7 ("KVM: SVM: Fix nested VM-Exit on #GP interception handling")
d2df592fd8c6 ("KVM: nSVM: prepare guest save area while is_guest_mode is true")
a04aead144fd ("KVM: nSVM: fix running nested guests when npt=0")
996ff5429e98 ("KVM: x86: move kvm_inject_gp up from kvm_set_dr to callers")
e6c804a848d6 ("KVM: SVM: Move AVIC vCPU kicking snippet to helper function")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5aede752a839904059c2b5d68be0dc4501c6c15f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 6 Jan 2023 01:12:37 +0000
Subject: [PATCH] KVM: SVM: Process ICR on AVIC IPI delivery failure due to
invalid target
Emulate ICR writes on AVIC IPI failures due to invalid targets using the
same logic as failures due to invalid types. AVIC acceleration fails if
_any_ of the targets are invalid, and crucially VM-Exits before sending
IPIs to targets that _are_ valid. In logical mode, the destination is a
bitmap, i.e. a single IPI can target multiple logical IDs. Doing nothing
causes KVM to drop IPIs if at least one target is valid and at least one
target is invalid.
Fixes: 18f40c53e10f ("svm: Add VMEXIT handlers for AVIC")
Cc: stable(a)vger.kernel.org
Reviewed-by: Paolo Bonzini <pbonzini(a)redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk(a)redhat.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20230106011306.85230-5-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 712330b80891..3b2c88b168ba 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -502,14 +502,18 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
switch (id) {
+ case AVIC_IPI_FAILURE_INVALID_TARGET:
case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
/*
* Emulate IPIs that are not handled by AVIC hardware, which
- * only virtualizes Fixed, Edge-Triggered INTRs. The exit is
- * a trap, e.g. ICR holds the correct value and RIP has been
- * advanced, KVM is responsible only for emulating the IPI.
- * Sadly, hardware may sometimes leave the BUSY flag set, in
- * which case KVM needs to emulate the ICR write as well in
+ * only virtualizes Fixed, Edge-Triggered INTRs, and falls over
+ * if _any_ targets are invalid, e.g. if the logical mode mask
+ * is a superset of running vCPUs.
+ *
+ * The exit is a trap, e.g. ICR holds the correct value and RIP
+ * has been advanced, KVM is responsible only for emulating the
+ * IPI. Sadly, hardware may sometimes leave the BUSY flag set,
+ * in which case KVM needs to emulate the ICR write as well in
* order to clear the BUSY flag.
*/
if (icrl & APIC_ICR_BUSY)
@@ -525,8 +529,6 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
*/
avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index);
break;
- case AVIC_IPI_FAILURE_INVALID_TARGET:
- break;
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
break;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 5aede752a839904059c2b5d68be0dc4501c6c15f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812341440188(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
5aede752a839 ("KVM: SVM: Process ICR on AVIC IPI delivery failure due to invalid target")
b51818afdc1d ("KVM: SVM: Don't rewrite guest ICR on AVIC IPI virtualization failure")
dd4589eee99d ("Revert "svm: Add warning message for AVIC IPI invalid target"")
63129754178c ("KVM: SVM: Pass struct kvm_vcpu to exit handlers (and many, many other places)")
2a32a77cefa6 ("KVM: SVM: merge update_cr0_intercept into svm_set_cr0")
11f0cbf0c605 ("KVM: nSVM: Trace VM-Enter consistency check failures")
6906e06db9b0 ("KVM: nSVM: Add missing checks for reserved bits to svm_set_nested_state()")
c08f390a75c1 ("KVM: nSVM: only copy L1 non-VMLOAD/VMSAVE data in svm_set_nested_state()")
9e8f0fbfff1a ("KVM: nSVM: rename functions and variables according to vmcbXY nomenclature")
193015adf40d ("KVM: nSVM: Track the ASID generation of the vmcb vmrun through the vmcb")
af18fa775d07 ("KVM: nSVM: Track the physical cpu of the vmcb vmrun through the vmcb")
4995a3685f1b ("KVM: SVM: Use a separate vmcb for the nested L2 guest")
6d1b867d0456 ("KVM: SVM: Don't strip the C-bit from CR2 on #PF interception")
43c11d91fb1e ("KVM: x86: to track if L1 is running L2 VM")
9e46f6c6c959 ("KVM: SVM: Clear the CR4 register on reset")
2df8d3807ce7 ("KVM: SVM: Fix nested VM-Exit on #GP interception handling")
d2df592fd8c6 ("KVM: nSVM: prepare guest save area while is_guest_mode is true")
a04aead144fd ("KVM: nSVM: fix running nested guests when npt=0")
996ff5429e98 ("KVM: x86: move kvm_inject_gp up from kvm_set_dr to callers")
e6c804a848d6 ("KVM: SVM: Move AVIC vCPU kicking snippet to helper function")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5aede752a839904059c2b5d68be0dc4501c6c15f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 6 Jan 2023 01:12:37 +0000
Subject: [PATCH] KVM: SVM: Process ICR on AVIC IPI delivery failure due to
invalid target
Emulate ICR writes on AVIC IPI failures due to invalid targets using the
same logic as failures due to invalid types. AVIC acceleration fails if
_any_ of the targets are invalid, and crucially VM-Exits before sending
IPIs to targets that _are_ valid. In logical mode, the destination is a
bitmap, i.e. a single IPI can target multiple logical IDs. Doing nothing
causes KVM to drop IPIs if at least one target is valid and at least one
target is invalid.
Fixes: 18f40c53e10f ("svm: Add VMEXIT handlers for AVIC")
Cc: stable(a)vger.kernel.org
Reviewed-by: Paolo Bonzini <pbonzini(a)redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk(a)redhat.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20230106011306.85230-5-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 712330b80891..3b2c88b168ba 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -502,14 +502,18 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
switch (id) {
+ case AVIC_IPI_FAILURE_INVALID_TARGET:
case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
/*
* Emulate IPIs that are not handled by AVIC hardware, which
- * only virtualizes Fixed, Edge-Triggered INTRs. The exit is
- * a trap, e.g. ICR holds the correct value and RIP has been
- * advanced, KVM is responsible only for emulating the IPI.
- * Sadly, hardware may sometimes leave the BUSY flag set, in
- * which case KVM needs to emulate the ICR write as well in
+ * only virtualizes Fixed, Edge-Triggered INTRs, and falls over
+ * if _any_ targets are invalid, e.g. if the logical mode mask
+ * is a superset of running vCPUs.
+ *
+ * The exit is a trap, e.g. ICR holds the correct value and RIP
+ * has been advanced, KVM is responsible only for emulating the
+ * IPI. Sadly, hardware may sometimes leave the BUSY flag set,
+ * in which case KVM needs to emulate the ICR write as well in
* order to clear the BUSY flag.
*/
if (icrl & APIC_ICR_BUSY)
@@ -525,8 +529,6 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
*/
avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index);
break;
- case AVIC_IPI_FAILURE_INVALID_TARGET:
- break;
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
break;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 5aede752a839904059c2b5d68be0dc4501c6c15f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167812341322173(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
5aede752a839 ("KVM: SVM: Process ICR on AVIC IPI delivery failure due to invalid target")
b51818afdc1d ("KVM: SVM: Don't rewrite guest ICR on AVIC IPI virtualization failure")
dd4589eee99d ("Revert "svm: Add warning message for AVIC IPI invalid target"")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5aede752a839904059c2b5d68be0dc4501c6c15f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 6 Jan 2023 01:12:37 +0000
Subject: [PATCH] KVM: SVM: Process ICR on AVIC IPI delivery failure due to
invalid target
Emulate ICR writes on AVIC IPI failures due to invalid targets using the
same logic as failures due to invalid types. AVIC acceleration fails if
_any_ of the targets are invalid, and crucially VM-Exits before sending
IPIs to targets that _are_ valid. In logical mode, the destination is a
bitmap, i.e. a single IPI can target multiple logical IDs. Doing nothing
causes KVM to drop IPIs if at least one target is valid and at least one
target is invalid.
Fixes: 18f40c53e10f ("svm: Add VMEXIT handlers for AVIC")
Cc: stable(a)vger.kernel.org
Reviewed-by: Paolo Bonzini <pbonzini(a)redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk(a)redhat.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20230106011306.85230-5-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 712330b80891..3b2c88b168ba 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -502,14 +502,18 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
switch (id) {
+ case AVIC_IPI_FAILURE_INVALID_TARGET:
case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
/*
* Emulate IPIs that are not handled by AVIC hardware, which
- * only virtualizes Fixed, Edge-Triggered INTRs. The exit is
- * a trap, e.g. ICR holds the correct value and RIP has been
- * advanced, KVM is responsible only for emulating the IPI.
- * Sadly, hardware may sometimes leave the BUSY flag set, in
- * which case KVM needs to emulate the ICR write as well in
+ * only virtualizes Fixed, Edge-Triggered INTRs, and falls over
+ * if _any_ targets are invalid, e.g. if the logical mode mask
+ * is a superset of running vCPUs.
+ *
+ * The exit is a trap, e.g. ICR holds the correct value and RIP
+ * has been advanced, KVM is responsible only for emulating the
+ * IPI. Sadly, hardware may sometimes leave the BUSY flag set,
+ * in which case KVM needs to emulate the ICR write as well in
* order to clear the BUSY flag.
*/
if (icrl & APIC_ICR_BUSY)
@@ -525,8 +529,6 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
*/
avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index);
break;
- case AVIC_IPI_FAILURE_INVALID_TARGET:
- break;
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
break;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 97a71c444a147ae41c7d0ab5b3d855d7f762f3ed
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678123340223133(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
97a71c444a14 ("KVM: x86: Purge "highest ISR" cache when updating APICv state")
ce0a58f4756c ("KVM: x86: Move "apicv_active" into "struct kvm_lapic"")
d39850f57d21 ("KVM: x86: Drop @vcpu parameter from kvm_x86_ops.hwapic_isr_update()")
47e8eec83262 ("Merge tag 'kvmarm-5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 97a71c444a147ae41c7d0ab5b3d855d7f762f3ed Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Fri, 6 Jan 2023 01:12:35 +0000
Subject: [PATCH] KVM: x86: Purge "highest ISR" cache when updating APICv state
Purge the "highest ISR" cache when updating APICv state on a vCPU. The
cache must not be used when APICv is active as hardware may emulate EOIs
(and other operations) without exiting to KVM.
This fixes a bug where KVM will effectively block IRQs in perpetuity due
to the "highest ISR" never getting reset if APICv is activated on a vCPU
while an IRQ is in-service. Hardware emulates the EOI and KVM never gets
a chance to update its cache.
Fixes: b26a695a1d78 ("kvm: lapic: Introduce APICv update helper function")
Cc: stable(a)vger.kernel.org
Cc: Suravee Suthikulpanit <suravee.suthikulpanit(a)amd.com>
Cc: Maxim Levitsky <mlevitsk(a)redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini(a)redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk(a)redhat.com>
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20230106011306.85230-3-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5c0f93fc073a..33a661d82da7 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2424,6 +2424,7 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
*/
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
}
+ apic->highest_isr_cache = -1;
}
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -2479,7 +2480,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
}
kvm_apic_update_apicv(vcpu);
- apic->highest_isr_cache = -1;
update_divide_count(apic);
atomic_set(&apic->lapic_timer.pending, 0);
@@ -2767,7 +2767,6 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
__start_apic_timer(apic, APIC_TMCCT);
kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
kvm_apic_update_apicv(vcpu);
- apic->highest_isr_cache = -1;
if (apic->apicv_active) {
static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x e32b120071ea114efc0b4ddd439547750b85f618
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167811895425359(a)kroah.com' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
e32b120071ea ("KVM: VMX: Do _all_ initialization before exposing /dev/kvm to userspace")
4f8396b96a9f ("KVM: x86: Move guts of kvm_arch_init() to standalone helper")
da66de44b01e ("KVM: VMX: Don't bother disabling eVMCS static key on module exit")
2916b70fc342 ("KVM: VMX: Reset eVMCS controls in VP assist page during hardware disabling")
94bda2f4cd86 ("KVM: x86: Reject loading KVM if host.PAT[0] != WB")
fdc298da8661 ("KVM: x86: Move kvm_ops_static_call_update() to x86.c")
5e17b2ee45b9 ("kvm: x86: Require const tsc for RT")
925088781eed ("KVM: x86: Fix pointer mistmatch warning when patching RET0 static calls")
5be2226f417d ("KVM: x86: allow defining return-0 static calls")
abb6d479e226 ("KVM: x86: make several APIC virtualization callbacks optional")
dd2319c61888 ("KVM: x86: warn on incorrectly NULL members of kvm_x86_ops")
e4fc23bad813 ("KVM: x86: remove KVM_X86_OP_NULL and mark optional kvm_x86_ops")
8a2897853c53 ("KVM: x86: return 1 unconditionally for availability of KVM_CAP_VAPIC")
db6e7adf8de9 ("KVM: SVM: Rename AVIC helpers to use "avic" prefix instead of "svm"")
4e71cad31c62 ("Merge remote-tracking branch 'kvm/master' into HEAD")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e32b120071ea114efc0b4ddd439547750b85f618 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Wed, 30 Nov 2022 23:08:58 +0000
Subject: [PATCH] KVM: VMX: Do _all_ initialization before exposing /dev/kvm to
userspace
Call kvm_init() only after _all_ setup is complete, as kvm_init() exposes
/dev/kvm to userspace and thus allows userspace to create VMs (and call
other ioctls). E.g. KVM will encounter a NULL pointer when attempting to
add a vCPU to the per-CPU loaded_vmcss_on_cpu list if userspace is able to
create a VM before vmx_init() configures said list.
BUG: kernel NULL pointer dereference, address: 0000000000000008
#PF: supervisor write access in kernel mode
#PF: error_code(0x0002) - not-present page
PGD 0 P4D 0
Oops: 0002 [#1] SMP
CPU: 6 PID: 1143 Comm: stable Not tainted 6.0.0-rc7+ #988
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
RIP: 0010:vmx_vcpu_load_vmcs+0x68/0x230 [kvm_intel]
<TASK>
vmx_vcpu_load+0x16/0x60 [kvm_intel]
kvm_arch_vcpu_load+0x32/0x1f0 [kvm]
vcpu_load+0x2f/0x40 [kvm]
kvm_arch_vcpu_create+0x231/0x310 [kvm]
kvm_vm_ioctl+0x79f/0xe10 [kvm]
? handle_mm_fault+0xb1/0x220
__x64_sys_ioctl+0x80/0xb0
do_syscall_64+0x2b/0x50
entry_SYSCALL_64_after_hwframe+0x46/0xb0
RIP: 0033:0x7f5a6b05743b
</TASK>
Modules linked in: vhost_net vhost vhost_iotlb tap kvm_intel(+) kvm irqbypass
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20221130230934.1014142-15-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1e2eab6bda16..e0e3f2c1f681 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8564,19 +8564,23 @@ static void vmx_cleanup_l1d_flush(void)
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
}
-static void vmx_exit(void)
+static void __vmx_exit(void)
{
+ allow_smaller_maxphyaddr = false;
+
#ifdef CONFIG_KEXEC_CORE
RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
synchronize_rcu();
#endif
+ vmx_cleanup_l1d_flush();
+}
+static void vmx_exit(void)
+{
kvm_exit();
kvm_x86_vendor_exit();
- vmx_cleanup_l1d_flush();
-
- allow_smaller_maxphyaddr = false;
+ __vmx_exit();
}
module_exit(vmx_exit);
@@ -8594,11 +8598,6 @@ static int __init vmx_init(void)
if (r)
return r;
- r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
- __alignof__(struct vcpu_vmx), THIS_MODULE);
- if (r)
- goto err_kvm_init;
-
/*
* Must be called after common x86 init so enable_ept is properly set
* up. Hand the parameter mitigation value in which was stored in
@@ -8632,11 +8631,20 @@ static int __init vmx_init(void)
if (!enable_ept)
allow_smaller_maxphyaddr = true;
+ /*
+ * Common KVM initialization _must_ come last, after this, /dev/kvm is
+ * exposed to userspace!
+ */
+ r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
+ __alignof__(struct vcpu_vmx), THIS_MODULE);
+ if (r)
+ goto err_kvm_init;
+
return 0;
-err_l1d_flush:
- vmx_exit();
err_kvm_init:
+ __vmx_exit();
+err_l1d_flush:
kvm_x86_vendor_exit();
return r;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x e32b120071ea114efc0b4ddd439547750b85f618
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '16781189523792(a)kroah.com' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
e32b120071ea ("KVM: VMX: Do _all_ initialization before exposing /dev/kvm to userspace")
4f8396b96a9f ("KVM: x86: Move guts of kvm_arch_init() to standalone helper")
da66de44b01e ("KVM: VMX: Don't bother disabling eVMCS static key on module exit")
2916b70fc342 ("KVM: VMX: Reset eVMCS controls in VP assist page during hardware disabling")
94bda2f4cd86 ("KVM: x86: Reject loading KVM if host.PAT[0] != WB")
fdc298da8661 ("KVM: x86: Move kvm_ops_static_call_update() to x86.c")
5e17b2ee45b9 ("kvm: x86: Require const tsc for RT")
925088781eed ("KVM: x86: Fix pointer mistmatch warning when patching RET0 static calls")
5be2226f417d ("KVM: x86: allow defining return-0 static calls")
abb6d479e226 ("KVM: x86: make several APIC virtualization callbacks optional")
dd2319c61888 ("KVM: x86: warn on incorrectly NULL members of kvm_x86_ops")
e4fc23bad813 ("KVM: x86: remove KVM_X86_OP_NULL and mark optional kvm_x86_ops")
8a2897853c53 ("KVM: x86: return 1 unconditionally for availability of KVM_CAP_VAPIC")
db6e7adf8de9 ("KVM: SVM: Rename AVIC helpers to use "avic" prefix instead of "svm"")
4e71cad31c62 ("Merge remote-tracking branch 'kvm/master' into HEAD")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e32b120071ea114efc0b4ddd439547750b85f618 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Wed, 30 Nov 2022 23:08:58 +0000
Subject: [PATCH] KVM: VMX: Do _all_ initialization before exposing /dev/kvm to
userspace
Call kvm_init() only after _all_ setup is complete, as kvm_init() exposes
/dev/kvm to userspace and thus allows userspace to create VMs (and call
other ioctls). E.g. KVM will encounter a NULL pointer when attempting to
add a vCPU to the per-CPU loaded_vmcss_on_cpu list if userspace is able to
create a VM before vmx_init() configures said list.
BUG: kernel NULL pointer dereference, address: 0000000000000008
#PF: supervisor write access in kernel mode
#PF: error_code(0x0002) - not-present page
PGD 0 P4D 0
Oops: 0002 [#1] SMP
CPU: 6 PID: 1143 Comm: stable Not tainted 6.0.0-rc7+ #988
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
RIP: 0010:vmx_vcpu_load_vmcs+0x68/0x230 [kvm_intel]
<TASK>
vmx_vcpu_load+0x16/0x60 [kvm_intel]
kvm_arch_vcpu_load+0x32/0x1f0 [kvm]
vcpu_load+0x2f/0x40 [kvm]
kvm_arch_vcpu_create+0x231/0x310 [kvm]
kvm_vm_ioctl+0x79f/0xe10 [kvm]
? handle_mm_fault+0xb1/0x220
__x64_sys_ioctl+0x80/0xb0
do_syscall_64+0x2b/0x50
entry_SYSCALL_64_after_hwframe+0x46/0xb0
RIP: 0033:0x7f5a6b05743b
</TASK>
Modules linked in: vhost_net vhost vhost_iotlb tap kvm_intel(+) kvm irqbypass
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20221130230934.1014142-15-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1e2eab6bda16..e0e3f2c1f681 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8564,19 +8564,23 @@ static void vmx_cleanup_l1d_flush(void)
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
}
-static void vmx_exit(void)
+static void __vmx_exit(void)
{
+ allow_smaller_maxphyaddr = false;
+
#ifdef CONFIG_KEXEC_CORE
RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
synchronize_rcu();
#endif
+ vmx_cleanup_l1d_flush();
+}
+static void vmx_exit(void)
+{
kvm_exit();
kvm_x86_vendor_exit();
- vmx_cleanup_l1d_flush();
-
- allow_smaller_maxphyaddr = false;
+ __vmx_exit();
}
module_exit(vmx_exit);
@@ -8594,11 +8598,6 @@ static int __init vmx_init(void)
if (r)
return r;
- r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
- __alignof__(struct vcpu_vmx), THIS_MODULE);
- if (r)
- goto err_kvm_init;
-
/*
* Must be called after common x86 init so enable_ept is properly set
* up. Hand the parameter mitigation value in which was stored in
@@ -8632,11 +8631,20 @@ static int __init vmx_init(void)
if (!enable_ept)
allow_smaller_maxphyaddr = true;
+ /*
+ * Common KVM initialization _must_ come last, after this, /dev/kvm is
+ * exposed to userspace!
+ */
+ r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
+ __alignof__(struct vcpu_vmx), THIS_MODULE);
+ if (r)
+ goto err_kvm_init;
+
return 0;
-err_l1d_flush:
- vmx_exit();
err_kvm_init:
+ __vmx_exit();
+err_l1d_flush:
kvm_x86_vendor_exit();
return r;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x e32b120071ea114efc0b4ddd439547750b85f618
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167811895112883(a)kroah.com' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
e32b120071ea ("KVM: VMX: Do _all_ initialization before exposing /dev/kvm to userspace")
4f8396b96a9f ("KVM: x86: Move guts of kvm_arch_init() to standalone helper")
da66de44b01e ("KVM: VMX: Don't bother disabling eVMCS static key on module exit")
2916b70fc342 ("KVM: VMX: Reset eVMCS controls in VP assist page during hardware disabling")
94bda2f4cd86 ("KVM: x86: Reject loading KVM if host.PAT[0] != WB")
fdc298da8661 ("KVM: x86: Move kvm_ops_static_call_update() to x86.c")
5e17b2ee45b9 ("kvm: x86: Require const tsc for RT")
925088781eed ("KVM: x86: Fix pointer mistmatch warning when patching RET0 static calls")
5be2226f417d ("KVM: x86: allow defining return-0 static calls")
abb6d479e226 ("KVM: x86: make several APIC virtualization callbacks optional")
dd2319c61888 ("KVM: x86: warn on incorrectly NULL members of kvm_x86_ops")
e4fc23bad813 ("KVM: x86: remove KVM_X86_OP_NULL and mark optional kvm_x86_ops")
8a2897853c53 ("KVM: x86: return 1 unconditionally for availability of KVM_CAP_VAPIC")
db6e7adf8de9 ("KVM: SVM: Rename AVIC helpers to use "avic" prefix instead of "svm"")
4e71cad31c62 ("Merge remote-tracking branch 'kvm/master' into HEAD")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e32b120071ea114efc0b4ddd439547750b85f618 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Wed, 30 Nov 2022 23:08:58 +0000
Subject: [PATCH] KVM: VMX: Do _all_ initialization before exposing /dev/kvm to
userspace
Call kvm_init() only after _all_ setup is complete, as kvm_init() exposes
/dev/kvm to userspace and thus allows userspace to create VMs (and call
other ioctls). E.g. KVM will encounter a NULL pointer when attempting to
add a vCPU to the per-CPU loaded_vmcss_on_cpu list if userspace is able to
create a VM before vmx_init() configures said list.
BUG: kernel NULL pointer dereference, address: 0000000000000008
#PF: supervisor write access in kernel mode
#PF: error_code(0x0002) - not-present page
PGD 0 P4D 0
Oops: 0002 [#1] SMP
CPU: 6 PID: 1143 Comm: stable Not tainted 6.0.0-rc7+ #988
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
RIP: 0010:vmx_vcpu_load_vmcs+0x68/0x230 [kvm_intel]
<TASK>
vmx_vcpu_load+0x16/0x60 [kvm_intel]
kvm_arch_vcpu_load+0x32/0x1f0 [kvm]
vcpu_load+0x2f/0x40 [kvm]
kvm_arch_vcpu_create+0x231/0x310 [kvm]
kvm_vm_ioctl+0x79f/0xe10 [kvm]
? handle_mm_fault+0xb1/0x220
__x64_sys_ioctl+0x80/0xb0
do_syscall_64+0x2b/0x50
entry_SYSCALL_64_after_hwframe+0x46/0xb0
RIP: 0033:0x7f5a6b05743b
</TASK>
Modules linked in: vhost_net vhost vhost_iotlb tap kvm_intel(+) kvm irqbypass
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20221130230934.1014142-15-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1e2eab6bda16..e0e3f2c1f681 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8564,19 +8564,23 @@ static void vmx_cleanup_l1d_flush(void)
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
}
-static void vmx_exit(void)
+static void __vmx_exit(void)
{
+ allow_smaller_maxphyaddr = false;
+
#ifdef CONFIG_KEXEC_CORE
RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
synchronize_rcu();
#endif
+ vmx_cleanup_l1d_flush();
+}
+static void vmx_exit(void)
+{
kvm_exit();
kvm_x86_vendor_exit();
- vmx_cleanup_l1d_flush();
-
- allow_smaller_maxphyaddr = false;
+ __vmx_exit();
}
module_exit(vmx_exit);
@@ -8594,11 +8598,6 @@ static int __init vmx_init(void)
if (r)
return r;
- r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
- __alignof__(struct vcpu_vmx), THIS_MODULE);
- if (r)
- goto err_kvm_init;
-
/*
* Must be called after common x86 init so enable_ept is properly set
* up. Hand the parameter mitigation value in which was stored in
@@ -8632,11 +8631,20 @@ static int __init vmx_init(void)
if (!enable_ept)
allow_smaller_maxphyaddr = true;
+ /*
+ * Common KVM initialization _must_ come last, after this, /dev/kvm is
+ * exposed to userspace!
+ */
+ r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
+ __alignof__(struct vcpu_vmx), THIS_MODULE);
+ if (r)
+ goto err_kvm_init;
+
return 0;
-err_l1d_flush:
- vmx_exit();
err_kvm_init:
+ __vmx_exit();
+err_l1d_flush:
kvm_x86_vendor_exit();
return r;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x e32b120071ea114efc0b4ddd439547750b85f618
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '1678118950130149(a)kroah.com' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
e32b120071ea ("KVM: VMX: Do _all_ initialization before exposing /dev/kvm to userspace")
4f8396b96a9f ("KVM: x86: Move guts of kvm_arch_init() to standalone helper")
da66de44b01e ("KVM: VMX: Don't bother disabling eVMCS static key on module exit")
2916b70fc342 ("KVM: VMX: Reset eVMCS controls in VP assist page during hardware disabling")
94bda2f4cd86 ("KVM: x86: Reject loading KVM if host.PAT[0] != WB")
fdc298da8661 ("KVM: x86: Move kvm_ops_static_call_update() to x86.c")
5e17b2ee45b9 ("kvm: x86: Require const tsc for RT")
925088781eed ("KVM: x86: Fix pointer mistmatch warning when patching RET0 static calls")
5be2226f417d ("KVM: x86: allow defining return-0 static calls")
abb6d479e226 ("KVM: x86: make several APIC virtualization callbacks optional")
dd2319c61888 ("KVM: x86: warn on incorrectly NULL members of kvm_x86_ops")
e4fc23bad813 ("KVM: x86: remove KVM_X86_OP_NULL and mark optional kvm_x86_ops")
8a2897853c53 ("KVM: x86: return 1 unconditionally for availability of KVM_CAP_VAPIC")
db6e7adf8de9 ("KVM: SVM: Rename AVIC helpers to use "avic" prefix instead of "svm"")
4e71cad31c62 ("Merge remote-tracking branch 'kvm/master' into HEAD")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e32b120071ea114efc0b4ddd439547750b85f618 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Wed, 30 Nov 2022 23:08:58 +0000
Subject: [PATCH] KVM: VMX: Do _all_ initialization before exposing /dev/kvm to
userspace
Call kvm_init() only after _all_ setup is complete, as kvm_init() exposes
/dev/kvm to userspace and thus allows userspace to create VMs (and call
other ioctls). E.g. KVM will encounter a NULL pointer when attempting to
add a vCPU to the per-CPU loaded_vmcss_on_cpu list if userspace is able to
create a VM before vmx_init() configures said list.
BUG: kernel NULL pointer dereference, address: 0000000000000008
#PF: supervisor write access in kernel mode
#PF: error_code(0x0002) - not-present page
PGD 0 P4D 0
Oops: 0002 [#1] SMP
CPU: 6 PID: 1143 Comm: stable Not tainted 6.0.0-rc7+ #988
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
RIP: 0010:vmx_vcpu_load_vmcs+0x68/0x230 [kvm_intel]
<TASK>
vmx_vcpu_load+0x16/0x60 [kvm_intel]
kvm_arch_vcpu_load+0x32/0x1f0 [kvm]
vcpu_load+0x2f/0x40 [kvm]
kvm_arch_vcpu_create+0x231/0x310 [kvm]
kvm_vm_ioctl+0x79f/0xe10 [kvm]
? handle_mm_fault+0xb1/0x220
__x64_sys_ioctl+0x80/0xb0
do_syscall_64+0x2b/0x50
entry_SYSCALL_64_after_hwframe+0x46/0xb0
RIP: 0033:0x7f5a6b05743b
</TASK>
Modules linked in: vhost_net vhost vhost_iotlb tap kvm_intel(+) kvm irqbypass
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20221130230934.1014142-15-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1e2eab6bda16..e0e3f2c1f681 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8564,19 +8564,23 @@ static void vmx_cleanup_l1d_flush(void)
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
}
-static void vmx_exit(void)
+static void __vmx_exit(void)
{
+ allow_smaller_maxphyaddr = false;
+
#ifdef CONFIG_KEXEC_CORE
RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
synchronize_rcu();
#endif
+ vmx_cleanup_l1d_flush();
+}
+static void vmx_exit(void)
+{
kvm_exit();
kvm_x86_vendor_exit();
- vmx_cleanup_l1d_flush();
-
- allow_smaller_maxphyaddr = false;
+ __vmx_exit();
}
module_exit(vmx_exit);
@@ -8594,11 +8598,6 @@ static int __init vmx_init(void)
if (r)
return r;
- r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
- __alignof__(struct vcpu_vmx), THIS_MODULE);
- if (r)
- goto err_kvm_init;
-
/*
* Must be called after common x86 init so enable_ept is properly set
* up. Hand the parameter mitigation value in which was stored in
@@ -8632,11 +8631,20 @@ static int __init vmx_init(void)
if (!enable_ept)
allow_smaller_maxphyaddr = true;
+ /*
+ * Common KVM initialization _must_ come last, after this, /dev/kvm is
+ * exposed to userspace!
+ */
+ r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
+ __alignof__(struct vcpu_vmx), THIS_MODULE);
+ if (r)
+ goto err_kvm_init;
+
return 0;
-err_l1d_flush:
- vmx_exit();
err_kvm_init:
+ __vmx_exit();
+err_l1d_flush:
kvm_x86_vendor_exit();
return r;
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e32b120071ea114efc0b4ddd439547750b85f618
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '167811894915491(a)kroah.com' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
e32b120071ea ("KVM: VMX: Do _all_ initialization before exposing /dev/kvm to userspace")
4f8396b96a9f ("KVM: x86: Move guts of kvm_arch_init() to standalone helper")
da66de44b01e ("KVM: VMX: Don't bother disabling eVMCS static key on module exit")
2916b70fc342 ("KVM: VMX: Reset eVMCS controls in VP assist page during hardware disabling")
94bda2f4cd86 ("KVM: x86: Reject loading KVM if host.PAT[0] != WB")
fdc298da8661 ("KVM: x86: Move kvm_ops_static_call_update() to x86.c")
5e17b2ee45b9 ("kvm: x86: Require const tsc for RT")
925088781eed ("KVM: x86: Fix pointer mistmatch warning when patching RET0 static calls")
5be2226f417d ("KVM: x86: allow defining return-0 static calls")
abb6d479e226 ("KVM: x86: make several APIC virtualization callbacks optional")
dd2319c61888 ("KVM: x86: warn on incorrectly NULL members of kvm_x86_ops")
e4fc23bad813 ("KVM: x86: remove KVM_X86_OP_NULL and mark optional kvm_x86_ops")
8a2897853c53 ("KVM: x86: return 1 unconditionally for availability of KVM_CAP_VAPIC")
db6e7adf8de9 ("KVM: SVM: Rename AVIC helpers to use "avic" prefix instead of "svm"")
4e71cad31c62 ("Merge remote-tracking branch 'kvm/master' into HEAD")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e32b120071ea114efc0b4ddd439547750b85f618 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc(a)google.com>
Date: Wed, 30 Nov 2022 23:08:58 +0000
Subject: [PATCH] KVM: VMX: Do _all_ initialization before exposing /dev/kvm to
userspace
Call kvm_init() only after _all_ setup is complete, as kvm_init() exposes
/dev/kvm to userspace and thus allows userspace to create VMs (and call
other ioctls). E.g. KVM will encounter a NULL pointer when attempting to
add a vCPU to the per-CPU loaded_vmcss_on_cpu list if userspace is able to
create a VM before vmx_init() configures said list.
BUG: kernel NULL pointer dereference, address: 0000000000000008
#PF: supervisor write access in kernel mode
#PF: error_code(0x0002) - not-present page
PGD 0 P4D 0
Oops: 0002 [#1] SMP
CPU: 6 PID: 1143 Comm: stable Not tainted 6.0.0-rc7+ #988
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
RIP: 0010:vmx_vcpu_load_vmcs+0x68/0x230 [kvm_intel]
<TASK>
vmx_vcpu_load+0x16/0x60 [kvm_intel]
kvm_arch_vcpu_load+0x32/0x1f0 [kvm]
vcpu_load+0x2f/0x40 [kvm]
kvm_arch_vcpu_create+0x231/0x310 [kvm]
kvm_vm_ioctl+0x79f/0xe10 [kvm]
? handle_mm_fault+0xb1/0x220
__x64_sys_ioctl+0x80/0xb0
do_syscall_64+0x2b/0x50
entry_SYSCALL_64_after_hwframe+0x46/0xb0
RIP: 0033:0x7f5a6b05743b
</TASK>
Modules linked in: vhost_net vhost vhost_iotlb tap kvm_intel(+) kvm irqbypass
Cc: stable(a)vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc(a)google.com>
Message-Id: <20221130230934.1014142-15-seanjc(a)google.com>
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1e2eab6bda16..e0e3f2c1f681 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8564,19 +8564,23 @@ static void vmx_cleanup_l1d_flush(void)
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_AUTO;
}
-static void vmx_exit(void)
+static void __vmx_exit(void)
{
+ allow_smaller_maxphyaddr = false;
+
#ifdef CONFIG_KEXEC_CORE
RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
synchronize_rcu();
#endif
+ vmx_cleanup_l1d_flush();
+}
+static void vmx_exit(void)
+{
kvm_exit();
kvm_x86_vendor_exit();
- vmx_cleanup_l1d_flush();
-
- allow_smaller_maxphyaddr = false;
+ __vmx_exit();
}
module_exit(vmx_exit);
@@ -8594,11 +8598,6 @@ static int __init vmx_init(void)
if (r)
return r;
- r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
- __alignof__(struct vcpu_vmx), THIS_MODULE);
- if (r)
- goto err_kvm_init;
-
/*
* Must be called after common x86 init so enable_ept is properly set
* up. Hand the parameter mitigation value in which was stored in
@@ -8632,11 +8631,20 @@ static int __init vmx_init(void)
if (!enable_ept)
allow_smaller_maxphyaddr = true;
+ /*
+ * Common KVM initialization _must_ come last, after this, /dev/kvm is
+ * exposed to userspace!
+ */
+ r = kvm_init(&vmx_init_ops, sizeof(struct vcpu_vmx),
+ __alignof__(struct vcpu_vmx), THIS_MODULE);
+ if (r)
+ goto err_kvm_init;
+
return 0;
-err_l1d_flush:
- vmx_exit();
err_kvm_init:
+ __vmx_exit();
+err_l1d_flush:
kvm_x86_vendor_exit();
return r;
}