The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x dd4f730b557ce701a2cd4f604bf1e57667bd8b6e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025040805-untaken-baggage-498c@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dd4f730b557ce701a2cd4f604bf1e57667bd8b6e Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan(a)kernel.org>
Date: Mon, 10 Feb 2025 21:28:25 -0500
Subject: [PATCH] ACPI: platform-profile: Fix CFI violation when accessing
sysfs files
When an attribute group is created with sysfs_create_group(), the
->sysfs_ops() callback is set to kobj_sysfs_ops, which sets the ->show()
and ->store() callbacks to kobj_attr_show() and kobj_attr_store()
respectively. These functions use container_of() to get the respective
callback from the passed attribute, meaning that these callbacks need to
be of the same type as the callbacks in 'struct kobj_attribute'.
However, ->show() and ->store() in the platform_profile driver are
defined for struct device_attribute with the help of DEVICE_ATTR_RO()
and DEVICE_ATTR_RW(), which results in a CFI violation when accessing
platform_profile or platform_profile_choices under /sys/firmware/acpi
because the types do not match:
CFI failure at kobj_attr_show+0x19/0x30 (target: platform_profile_choices_show+0x0/0x140; expected type: 0x7a69590c)
There is no functional issue from the type mismatch because the layout
of 'struct kobj_attribute' and 'struct device_attribute' are the same,
so the container_of() cast does not break anything aside from CFI.
Change the type of platform_profile_choices_show() and
platform_profile_{show,store}() to match the callbacks in
'struct kobj_attribute' and update the attribute variables to
match, which resolves the CFI violation.
Cc: All applicable <stable(a)vger.kernel.org>
Fixes: a2ff95e018f1 ("ACPI: platform: Add platform profile support")
Reported-by: John Rowley <lkml(a)johnrowley.me>
Closes: https://github.com/ClangBuiltLinux/linux/issues/2047
Tested-by: John Rowley <lkml(a)johnrowley.me>
Reviewed-by: Sami Tolvanen <samitolvanen(a)google.com>
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Reviewed-by: Mark Pearson <mpearson-lenovo(a)squebb.ca>
Tested-by: Mark Pearson <mpearson-lenovo(a)squebb.ca>
Link: https://patch.msgid.link/20250210-acpi-platform_profile-fix-cfi-violation-v…
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c
index fc92e43d0fe9..1b6317f759f9 100644
--- a/drivers/acpi/platform_profile.c
+++ b/drivers/acpi/platform_profile.c
@@ -260,14 +260,14 @@ static int _aggregate_choices(struct device *dev, void *data)
/**
* platform_profile_choices_show - Show the available profile choices for legacy sysfs interface
- * @dev: The device
+ * @kobj: The kobject
* @attr: The attribute
* @buf: The buffer to write to
*
* Return: The number of bytes written
*/
-static ssize_t platform_profile_choices_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t platform_profile_choices_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buf)
{
unsigned long aggregate[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)];
@@ -333,14 +333,14 @@ static int _store_and_notify(struct device *dev, void *data)
/**
* platform_profile_show - Show the current profile for legacy sysfs interface
- * @dev: The device
+ * @kobj: The kobject
* @attr: The attribute
* @buf: The buffer to write to
*
* Return: The number of bytes written
*/
-static ssize_t platform_profile_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t platform_profile_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buf)
{
enum platform_profile_option profile = PLATFORM_PROFILE_LAST;
@@ -362,15 +362,15 @@ static ssize_t platform_profile_show(struct device *dev,
/**
* platform_profile_store - Set the profile for legacy sysfs interface
- * @dev: The device
+ * @kobj: The kobject
* @attr: The attribute
* @buf: The buffer to read from
* @count: The number of bytes to read
*
* Return: The number of bytes read
*/
-static ssize_t platform_profile_store(struct device *dev,
- struct device_attribute *attr,
+static ssize_t platform_profile_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
const char *buf, size_t count)
{
unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)];
@@ -401,12 +401,12 @@ static ssize_t platform_profile_store(struct device *dev,
return count;
}
-static DEVICE_ATTR_RO(platform_profile_choices);
-static DEVICE_ATTR_RW(platform_profile);
+static struct kobj_attribute attr_platform_profile_choices = __ATTR_RO(platform_profile_choices);
+static struct kobj_attribute attr_platform_profile = __ATTR_RW(platform_profile);
static struct attribute *platform_profile_attrs[] = {
- &dev_attr_platform_profile_choices.attr,
- &dev_attr_platform_profile.attr,
+ &attr_platform_profile_choices.attr,
+ &attr_platform_profile.attr,
NULL
};
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x dd4f730b557ce701a2cd4f604bf1e57667bd8b6e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025040804-submitter-spur-cf1a@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dd4f730b557ce701a2cd4f604bf1e57667bd8b6e Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan(a)kernel.org>
Date: Mon, 10 Feb 2025 21:28:25 -0500
Subject: [PATCH] ACPI: platform-profile: Fix CFI violation when accessing
sysfs files
When an attribute group is created with sysfs_create_group(), the
->sysfs_ops() callback is set to kobj_sysfs_ops, which sets the ->show()
and ->store() callbacks to kobj_attr_show() and kobj_attr_store()
respectively. These functions use container_of() to get the respective
callback from the passed attribute, meaning that these callbacks need to
be of the same type as the callbacks in 'struct kobj_attribute'.
However, ->show() and ->store() in the platform_profile driver are
defined for struct device_attribute with the help of DEVICE_ATTR_RO()
and DEVICE_ATTR_RW(), which results in a CFI violation when accessing
platform_profile or platform_profile_choices under /sys/firmware/acpi
because the types do not match:
CFI failure at kobj_attr_show+0x19/0x30 (target: platform_profile_choices_show+0x0/0x140; expected type: 0x7a69590c)
There is no functional issue from the type mismatch because the layout
of 'struct kobj_attribute' and 'struct device_attribute' are the same,
so the container_of() cast does not break anything aside from CFI.
Change the type of platform_profile_choices_show() and
platform_profile_{show,store}() to match the callbacks in
'struct kobj_attribute' and update the attribute variables to
match, which resolves the CFI violation.
Cc: All applicable <stable(a)vger.kernel.org>
Fixes: a2ff95e018f1 ("ACPI: platform: Add platform profile support")
Reported-by: John Rowley <lkml(a)johnrowley.me>
Closes: https://github.com/ClangBuiltLinux/linux/issues/2047
Tested-by: John Rowley <lkml(a)johnrowley.me>
Reviewed-by: Sami Tolvanen <samitolvanen(a)google.com>
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Reviewed-by: Mark Pearson <mpearson-lenovo(a)squebb.ca>
Tested-by: Mark Pearson <mpearson-lenovo(a)squebb.ca>
Link: https://patch.msgid.link/20250210-acpi-platform_profile-fix-cfi-violation-v…
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c
index fc92e43d0fe9..1b6317f759f9 100644
--- a/drivers/acpi/platform_profile.c
+++ b/drivers/acpi/platform_profile.c
@@ -260,14 +260,14 @@ static int _aggregate_choices(struct device *dev, void *data)
/**
* platform_profile_choices_show - Show the available profile choices for legacy sysfs interface
- * @dev: The device
+ * @kobj: The kobject
* @attr: The attribute
* @buf: The buffer to write to
*
* Return: The number of bytes written
*/
-static ssize_t platform_profile_choices_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t platform_profile_choices_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buf)
{
unsigned long aggregate[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)];
@@ -333,14 +333,14 @@ static int _store_and_notify(struct device *dev, void *data)
/**
* platform_profile_show - Show the current profile for legacy sysfs interface
- * @dev: The device
+ * @kobj: The kobject
* @attr: The attribute
* @buf: The buffer to write to
*
* Return: The number of bytes written
*/
-static ssize_t platform_profile_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t platform_profile_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buf)
{
enum platform_profile_option profile = PLATFORM_PROFILE_LAST;
@@ -362,15 +362,15 @@ static ssize_t platform_profile_show(struct device *dev,
/**
* platform_profile_store - Set the profile for legacy sysfs interface
- * @dev: The device
+ * @kobj: The kobject
* @attr: The attribute
* @buf: The buffer to read from
* @count: The number of bytes to read
*
* Return: The number of bytes read
*/
-static ssize_t platform_profile_store(struct device *dev,
- struct device_attribute *attr,
+static ssize_t platform_profile_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
const char *buf, size_t count)
{
unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)];
@@ -401,12 +401,12 @@ static ssize_t platform_profile_store(struct device *dev,
return count;
}
-static DEVICE_ATTR_RO(platform_profile_choices);
-static DEVICE_ATTR_RW(platform_profile);
+static struct kobj_attribute attr_platform_profile_choices = __ATTR_RO(platform_profile_choices);
+static struct kobj_attribute attr_platform_profile = __ATTR_RW(platform_profile);
static struct attribute *platform_profile_attrs[] = {
- &dev_attr_platform_profile_choices.attr,
- &dev_attr_platform_profile.attr,
+ &attr_platform_profile_choices.attr,
+ &attr_platform_profile.attr,
NULL
};
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x dd4f730b557ce701a2cd4f604bf1e57667bd8b6e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025040804-level-crystal-ca73@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dd4f730b557ce701a2cd4f604bf1e57667bd8b6e Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan(a)kernel.org>
Date: Mon, 10 Feb 2025 21:28:25 -0500
Subject: [PATCH] ACPI: platform-profile: Fix CFI violation when accessing
sysfs files
When an attribute group is created with sysfs_create_group(), the
->sysfs_ops() callback is set to kobj_sysfs_ops, which sets the ->show()
and ->store() callbacks to kobj_attr_show() and kobj_attr_store()
respectively. These functions use container_of() to get the respective
callback from the passed attribute, meaning that these callbacks need to
be of the same type as the callbacks in 'struct kobj_attribute'.
However, ->show() and ->store() in the platform_profile driver are
defined for struct device_attribute with the help of DEVICE_ATTR_RO()
and DEVICE_ATTR_RW(), which results in a CFI violation when accessing
platform_profile or platform_profile_choices under /sys/firmware/acpi
because the types do not match:
CFI failure at kobj_attr_show+0x19/0x30 (target: platform_profile_choices_show+0x0/0x140; expected type: 0x7a69590c)
There is no functional issue from the type mismatch because the layout
of 'struct kobj_attribute' and 'struct device_attribute' are the same,
so the container_of() cast does not break anything aside from CFI.
Change the type of platform_profile_choices_show() and
platform_profile_{show,store}() to match the callbacks in
'struct kobj_attribute' and update the attribute variables to
match, which resolves the CFI violation.
Cc: All applicable <stable(a)vger.kernel.org>
Fixes: a2ff95e018f1 ("ACPI: platform: Add platform profile support")
Reported-by: John Rowley <lkml(a)johnrowley.me>
Closes: https://github.com/ClangBuiltLinux/linux/issues/2047
Tested-by: John Rowley <lkml(a)johnrowley.me>
Reviewed-by: Sami Tolvanen <samitolvanen(a)google.com>
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Reviewed-by: Mark Pearson <mpearson-lenovo(a)squebb.ca>
Tested-by: Mark Pearson <mpearson-lenovo(a)squebb.ca>
Link: https://patch.msgid.link/20250210-acpi-platform_profile-fix-cfi-violation-v…
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c
index fc92e43d0fe9..1b6317f759f9 100644
--- a/drivers/acpi/platform_profile.c
+++ b/drivers/acpi/platform_profile.c
@@ -260,14 +260,14 @@ static int _aggregate_choices(struct device *dev, void *data)
/**
* platform_profile_choices_show - Show the available profile choices for legacy sysfs interface
- * @dev: The device
+ * @kobj: The kobject
* @attr: The attribute
* @buf: The buffer to write to
*
* Return: The number of bytes written
*/
-static ssize_t platform_profile_choices_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t platform_profile_choices_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buf)
{
unsigned long aggregate[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)];
@@ -333,14 +333,14 @@ static int _store_and_notify(struct device *dev, void *data)
/**
* platform_profile_show - Show the current profile for legacy sysfs interface
- * @dev: The device
+ * @kobj: The kobject
* @attr: The attribute
* @buf: The buffer to write to
*
* Return: The number of bytes written
*/
-static ssize_t platform_profile_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t platform_profile_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buf)
{
enum platform_profile_option profile = PLATFORM_PROFILE_LAST;
@@ -362,15 +362,15 @@ static ssize_t platform_profile_show(struct device *dev,
/**
* platform_profile_store - Set the profile for legacy sysfs interface
- * @dev: The device
+ * @kobj: The kobject
* @attr: The attribute
* @buf: The buffer to read from
* @count: The number of bytes to read
*
* Return: The number of bytes read
*/
-static ssize_t platform_profile_store(struct device *dev,
- struct device_attribute *attr,
+static ssize_t platform_profile_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
const char *buf, size_t count)
{
unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)];
@@ -401,12 +401,12 @@ static ssize_t platform_profile_store(struct device *dev,
return count;
}
-static DEVICE_ATTR_RO(platform_profile_choices);
-static DEVICE_ATTR_RW(platform_profile);
+static struct kobj_attribute attr_platform_profile_choices = __ATTR_RO(platform_profile_choices);
+static struct kobj_attribute attr_platform_profile = __ATTR_RW(platform_profile);
static struct attribute *platform_profile_attrs[] = {
- &dev_attr_platform_profile_choices.attr,
- &dev_attr_platform_profile.attr,
+ &attr_platform_profile_choices.attr,
+ &attr_platform_profile.attr,
NULL
};
On 07/04/2025 15:57, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> sfc: rip out MDIO support
>
> to the 6.14-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
...
> Stable-dep-of: 8241ecec1cdc ("sfc: fix NULL dereferences in ef100_process_design_param()")
I wouldn't say it's really a dependency; it's completely unrelated, just
happens to create a textual diff conflict which should be pretty trivial
to resolve (it's only the contexts that overlap, not the actual changes).
Obviously you can take this in if you want but I would've said this was a
bit big for -stable.
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 2fa87c71d2adb4b82c105f9191e6120340feff00
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025040812-alumni-tightness-cb66@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2fa87c71d2adb4b82c105f9191e6120340feff00 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede(a)redhat.com>
Date: Tue, 25 Mar 2025 22:04:50 +0100
Subject: [PATCH] ACPI: x86: Extend Lenovo Yoga Tab 3 quirk with skip GPIO
event-handlers
Depending on the secureboot signature on EFI\BOOT\BOOTX86.EFI the
Lenovo Yoga Tab 3 UEFI will switch its OSID ACPI variable between
1 (Windows) and 4 (Android(GMIN)).
In Windows mode a GPIO event handler gets installed for GPO1 pin 5,
causing Linux' x86-android-tables code which deals with the general
brokenness of this device's ACPI tables to fail to probe with:
[ 17.853705] x86_android_tablets: error -16 getting GPIO INT33FF:01 5
[ 17.859623] x86_android_tablets x86_android_tablets: probe with driver
which renders sound, the touchscreen, charging-management,
battery-monitoring and more non functional.
Add ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS to the existing quirks for this
device to fix this.
Reported-by: Agoston Lorincz <pipacsba(a)gmail.com>
Closes: https://lore.kernel.org/platform-driver-x86/CAMEzqD+DNXrAvUOHviB2O2bjtcbmo3…
Cc: All applicable <stable(a)kernel.org>
Fixes: fe820db35275 ("ACPI: x86: Add skip i2c clients quirk for Lenovo Yoga Tab 3 Pro (YT3-X90F)")
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
Link: https://patch.msgid.link/20250325210450.358506-1-hdegoede@redhat.com
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index 068c1612660b..4ee30c2897a2 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -374,7 +374,8 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
DMI_MATCH(DMI_PRODUCT_VERSION, "Blade3-10A-001"),
},
.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
- ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+ ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
+ ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
},
{
/* Medion Lifetab S10346 */
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x f9bdf1f953392c9edd69a7f884f78c0390127029
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025040839-deputize-undertook-8a56@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f9bdf1f953392c9edd69a7f884f78c0390127029 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang(a)linux.intel.com>
Date: Tue, 21 Jan 2025 07:23:01 -0800
Subject: [PATCH] perf/x86/intel: Avoid disable PMU if !cpuc->enabled in sample
read
The WARN_ON(this_cpu_read(cpu_hw_events.enabled)) in the
intel_pmu_save_and_restart_reload() is triggered, when sampling read
topdown events.
In a NMI handler, the cpu_hw_events.enabled is set and used to indicate
the status of core PMU. The generic pmu->pmu_disable_count, updated in
the perf_pmu_disable/enable pair, is not touched.
However, the perf_pmu_disable/enable pair is invoked when sampling read
in a NMI handler. The cpuc->enabled is mistakenly set by the
perf_pmu_enable().
Avoid disabling PMU if the core PMU is already disabled.
Merge the logic together.
Fixes: 7b2c05a15d29 ("perf/x86/intel: Generic support for hardware TopDown metrics")
Suggested-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/r/20250121152303.3128733-2-kan.liang@linux.intel.com
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2acea83526c6..1ccc961f8182 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2785,28 +2785,33 @@ static u64 icl_update_topdown_event(struct perf_event *event)
DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
-static void intel_pmu_read_topdown_event(struct perf_event *event)
-{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-
- /* Only need to call update_topdown_event() once for group read. */
- if ((cpuc->txn_flags & PERF_PMU_TXN_READ) &&
- !is_slots_event(event))
- return;
-
- perf_pmu_disable(event->pmu);
- static_call(intel_pmu_update_topdown_event)(event);
- perf_pmu_enable(event->pmu);
-}
-
static void intel_pmu_read_event(struct perf_event *event)
{
- if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
- intel_pmu_auto_reload_read(event);
- else if (is_topdown_count(event))
- intel_pmu_read_topdown_event(event);
- else
- x86_perf_event_update(event);
+ if (event->hw.flags & (PERF_X86_EVENT_AUTO_RELOAD | PERF_X86_EVENT_TOPDOWN)) {
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ bool pmu_enabled = cpuc->enabled;
+
+ /* Only need to call update_topdown_event() once for group read. */
+ if (is_metric_event(event) && (cpuc->txn_flags & PERF_PMU_TXN_READ))
+ return;
+
+ cpuc->enabled = 0;
+ if (pmu_enabled)
+ intel_pmu_disable_all();
+
+ if (is_topdown_event(event))
+ static_call(intel_pmu_update_topdown_event)(event);
+ else
+ intel_pmu_drain_pebs_buffer();
+
+ cpuc->enabled = pmu_enabled;
+ if (pmu_enabled)
+ intel_pmu_enable_all(0);
+
+ return;
+ }
+
+ x86_perf_event_update(event);
}
static void intel_pmu_enable_fixed(struct perf_event *event)
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 322963b02a91..eb14b46423e5 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -953,7 +953,7 @@ int intel_pmu_drain_bts_buffer(void)
return 1;
}
-static inline void intel_pmu_drain_pebs_buffer(void)
+void intel_pmu_drain_pebs_buffer(void)
{
struct perf_sample_data data;
@@ -2094,15 +2094,6 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
return NULL;
}
-void intel_pmu_auto_reload_read(struct perf_event *event)
-{
- WARN_ON(!(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD));
-
- perf_pmu_disable(event->pmu);
- intel_pmu_drain_pebs_buffer();
- perf_pmu_enable(event->pmu);
-}
-
/*
* Special variant of intel_pmu_save_and_restart() for auto-reload.
*/
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 084e9196b458..536a112f6353 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1644,7 +1644,7 @@ void intel_pmu_pebs_disable_all(void);
void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
-void intel_pmu_auto_reload_read(struct perf_event *event);
+void intel_pmu_drain_pebs_buffer(void);
void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 9462e74c5c983cce34019bfb27f734552bebe59f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025040821-pleading-cone-6de8@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9462e74c5c983cce34019bfb27f734552bebe59f Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Date: Fri, 28 Mar 2025 15:47:49 -0700
Subject: [PATCH] platform/x86: ISST: Correct command storage data length
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
After resume/online turbo limit ratio (TRL) is restored partially if
the admin explicitly changed TRL from user space.
A hash table is used to store SST mail box and MSR settings when modified
to restore those settings after resume or online. This uses a struct
isst_cmd field "data" to store these settings. This is a 64 bit field.
But isst_store_new_cmd() is only assigning as u32. This results in
truncation of 32 bits.
Change the argument to u64 from u32.
Fixes: f607874f35cb ("platform/x86: ISST: Restore state on resume")
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250328224749.2691272-1-srinivas.pandruvada@linu…
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
index dbcd3087aaa4..31239a93dd71 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
@@ -84,7 +84,7 @@ static DECLARE_HASHTABLE(isst_hash, 8);
static DEFINE_MUTEX(isst_hash_lock);
static int isst_store_new_cmd(int cmd, u32 cpu, int mbox_cmd_type, u32 param,
- u32 data)
+ u64 data)
{
struct isst_cmd *sst_cmd;
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 9462e74c5c983cce34019bfb27f734552bebe59f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025040821-underdone-luster-6e41@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9462e74c5c983cce34019bfb27f734552bebe59f Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Date: Fri, 28 Mar 2025 15:47:49 -0700
Subject: [PATCH] platform/x86: ISST: Correct command storage data length
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
After resume/online turbo limit ratio (TRL) is restored partially if
the admin explicitly changed TRL from user space.
A hash table is used to store SST mail box and MSR settings when modified
to restore those settings after resume or online. This uses a struct
isst_cmd field "data" to store these settings. This is a 64 bit field.
But isst_store_new_cmd() is only assigning as u32. This results in
truncation of 32 bits.
Change the argument to u64 from u32.
Fixes: f607874f35cb ("platform/x86: ISST: Restore state on resume")
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250328224749.2691272-1-srinivas.pandruvada@linu…
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen(a)linux.intel.com>
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
index dbcd3087aaa4..31239a93dd71 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
@@ -84,7 +84,7 @@ static DECLARE_HASHTABLE(isst_hash, 8);
static DEFINE_MUTEX(isst_hash_lock);
static int isst_store_new_cmd(int cmd, u32 cpu, int mbox_cmd_type, u32 param,
- u32 data)
+ u64 data)
{
struct isst_cmd *sst_cmd;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 9f98a4f4e7216dbe366010b4cdcab6b220f229c4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025040846-deafening-unmanaged-f966@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9f98a4f4e7216dbe366010b4cdcab6b220f229c4 Mon Sep 17 00:00:00 2001
From: Vishal Annapurve <vannapurve(a)google.com>
Date: Fri, 28 Feb 2025 01:44:15 +0000
Subject: [PATCH] x86/tdx: Fix arch_safe_halt() execution for TDX VMs
Direct HLT instruction execution causes #VEs for TDX VMs which is routed
to hypervisor via TDCALL. If HLT is executed in STI-shadow, resulting #VE
handler will enable interrupts before TDCALL is routed to hypervisor
leading to missed wakeup events, as current TDX spec doesn't expose
interruptibility state information to allow #VE handler to selectively
enable interrupts.
Commit bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests")
prevented the idle routines from executing HLT instruction in STI-shadow.
But it missed the paravirt routine which can be reached via this path
as an example:
kvm_wait() =>
safe_halt() =>
raw_safe_halt() =>
arch_safe_halt() =>
irq.safe_halt() =>
pv_native_safe_halt()
To reliably handle arch_safe_halt() for TDX VMs, introduce explicit
dependency on CONFIG_PARAVIRT and override paravirt halt()/safe_halt()
routines with TDX-safe versions that execute direct TDCALL and needed
interrupt flag updates. Executing direct TDCALL brings in additional
benefit of avoiding HLT related #VEs altogether.
As tested by Ryan Afranji:
"Tested with the specjbb2015 benchmark. It has heavy lock contention which leads
to many halt calls. TDX VMs suffered a poor score before this patchset.
Verified the major performance improvement with this patchset applied."
Fixes: bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests")
Signed-off-by: Vishal Annapurve <vannapurve(a)google.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Tested-by: Ryan Afranji <afranji(a)google.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250228014416.3925664-3-vannapurve@google.com
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 05b4eca156cf..f614c0522a0b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -878,6 +878,7 @@ config INTEL_TDX_GUEST
depends on X86_64 && CPU_SUP_INTEL
depends on X86_X2APIC
depends on EFI_STUB
+ depends on PARAVIRT
select ARCH_HAS_CC_PLATFORM
select X86_MEM_ENCRYPT
select X86_MCE
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 7772b01ab738..aa0eb4057226 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -14,6 +14,7 @@
#include <asm/ia32.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
+#include <asm/paravirt_types.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
#include <asm/traps.h>
@@ -398,7 +399,7 @@ static int handle_halt(struct ve_info *ve)
return ve_instr_len(ve);
}
-void __cpuidle tdx_safe_halt(void)
+void __cpuidle tdx_halt(void)
{
const bool irq_disabled = false;
@@ -409,6 +410,16 @@ void __cpuidle tdx_safe_halt(void)
WARN_ONCE(1, "HLT instruction emulation failed\n");
}
+static void __cpuidle tdx_safe_halt(void)
+{
+ tdx_halt();
+ /*
+ * "__cpuidle" section doesn't support instrumentation, so stick
+ * with raw_* variant that avoids tracing hooks.
+ */
+ raw_local_irq_enable();
+}
+
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
{
struct tdx_module_args args = {
@@ -1109,6 +1120,19 @@ void __init tdx_early_init(void)
x86_platform.guest.enc_kexec_begin = tdx_kexec_begin;
x86_platform.guest.enc_kexec_finish = tdx_kexec_finish;
+ /*
+ * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that
+ * will enable interrupts before HLT TDCALL invocation if executed
+ * in STI-shadow, possibly resulting in missed wakeup events.
+ *
+ * Modify all possible HLT execution paths to use TDX specific routines
+ * that directly execute TDCALL and toggle the interrupt state as
+ * needed after TDCALL completion. This also reduces HLT related #VEs
+ * in addition to having a reliable halt logic execution.
+ */
+ pv_ops.irq.safe_halt = tdx_safe_halt;
+ pv_ops.irq.halt = tdx_halt;
+
/*
* TDX intercepts the RDMSR to read the X2APIC ID in the parallel
* bringup low level code. That raises #VE which cannot be handled
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 65394aa9b49f..4a1922ec80cf 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -58,7 +58,7 @@ void tdx_get_ve_info(struct ve_info *ve);
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
-void tdx_safe_halt(void);
+void tdx_halt(void);
bool tdx_early_handle_ve(struct pt_regs *regs);
@@ -72,7 +72,7 @@ void __init tdx_dump_td_ctls(u64 td_ctls);
#else
static inline void tdx_early_init(void) { };
-static inline void tdx_safe_halt(void) { };
+static inline void tdx_halt(void) { };
static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 91f6ff618852..962c3ce39323 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -939,7 +939,7 @@ void __init select_idle_routine(void)
static_call_update(x86_idle, mwait_idle);
} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
pr_info("using TDX aware idle routine\n");
- static_call_update(x86_idle, tdx_safe_halt);
+ static_call_update(x86_idle, tdx_halt);
} else {
static_call_update(x86_idle, default_idle);
}
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 9f98a4f4e7216dbe366010b4cdcab6b220f229c4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025040845-quantum-situation-f9b0@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 9f98a4f4e7216dbe366010b4cdcab6b220f229c4 Mon Sep 17 00:00:00 2001
From: Vishal Annapurve <vannapurve(a)google.com>
Date: Fri, 28 Feb 2025 01:44:15 +0000
Subject: [PATCH] x86/tdx: Fix arch_safe_halt() execution for TDX VMs
Direct HLT instruction execution causes #VEs for TDX VMs which is routed
to hypervisor via TDCALL. If HLT is executed in STI-shadow, resulting #VE
handler will enable interrupts before TDCALL is routed to hypervisor
leading to missed wakeup events, as current TDX spec doesn't expose
interruptibility state information to allow #VE handler to selectively
enable interrupts.
Commit bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests")
prevented the idle routines from executing HLT instruction in STI-shadow.
But it missed the paravirt routine which can be reached via this path
as an example:
kvm_wait() =>
safe_halt() =>
raw_safe_halt() =>
arch_safe_halt() =>
irq.safe_halt() =>
pv_native_safe_halt()
To reliably handle arch_safe_halt() for TDX VMs, introduce explicit
dependency on CONFIG_PARAVIRT and override paravirt halt()/safe_halt()
routines with TDX-safe versions that execute direct TDCALL and needed
interrupt flag updates. Executing direct TDCALL brings in additional
benefit of avoiding HLT related #VEs altogether.
As tested by Ryan Afranji:
"Tested with the specjbb2015 benchmark. It has heavy lock contention which leads
to many halt calls. TDX VMs suffered a poor score before this patchset.
Verified the major performance improvement with this patchset applied."
Fixes: bfe6ed0c6727 ("x86/tdx: Add HLT support for TDX guests")
Signed-off-by: Vishal Annapurve <vannapurve(a)google.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Tested-by: Ryan Afranji <afranji(a)google.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Brian Gerst <brgerst(a)gmail.com>
Cc: Juergen Gross <jgross(a)suse.com>
Cc: H. Peter Anvin <hpa(a)zytor.com>
Cc: Linus Torvalds <torvalds(a)linux-foundation.org>
Cc: Josh Poimboeuf <jpoimboe(a)redhat.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250228014416.3925664-3-vannapurve@google.com
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 05b4eca156cf..f614c0522a0b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -878,6 +878,7 @@ config INTEL_TDX_GUEST
depends on X86_64 && CPU_SUP_INTEL
depends on X86_X2APIC
depends on EFI_STUB
+ depends on PARAVIRT
select ARCH_HAS_CC_PLATFORM
select X86_MEM_ENCRYPT
select X86_MCE
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 7772b01ab738..aa0eb4057226 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -14,6 +14,7 @@
#include <asm/ia32.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
+#include <asm/paravirt_types.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
#include <asm/traps.h>
@@ -398,7 +399,7 @@ static int handle_halt(struct ve_info *ve)
return ve_instr_len(ve);
}
-void __cpuidle tdx_safe_halt(void)
+void __cpuidle tdx_halt(void)
{
const bool irq_disabled = false;
@@ -409,6 +410,16 @@ void __cpuidle tdx_safe_halt(void)
WARN_ONCE(1, "HLT instruction emulation failed\n");
}
+static void __cpuidle tdx_safe_halt(void)
+{
+ tdx_halt();
+ /*
+ * "__cpuidle" section doesn't support instrumentation, so stick
+ * with raw_* variant that avoids tracing hooks.
+ */
+ raw_local_irq_enable();
+}
+
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
{
struct tdx_module_args args = {
@@ -1109,6 +1120,19 @@ void __init tdx_early_init(void)
x86_platform.guest.enc_kexec_begin = tdx_kexec_begin;
x86_platform.guest.enc_kexec_finish = tdx_kexec_finish;
+ /*
+ * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that
+ * will enable interrupts before HLT TDCALL invocation if executed
+ * in STI-shadow, possibly resulting in missed wakeup events.
+ *
+ * Modify all possible HLT execution paths to use TDX specific routines
+ * that directly execute TDCALL and toggle the interrupt state as
+ * needed after TDCALL completion. This also reduces HLT related #VEs
+ * in addition to having a reliable halt logic execution.
+ */
+ pv_ops.irq.safe_halt = tdx_safe_halt;
+ pv_ops.irq.halt = tdx_halt;
+
/*
* TDX intercepts the RDMSR to read the X2APIC ID in the parallel
* bringup low level code. That raises #VE which cannot be handled
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 65394aa9b49f..4a1922ec80cf 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -58,7 +58,7 @@ void tdx_get_ve_info(struct ve_info *ve);
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
-void tdx_safe_halt(void);
+void tdx_halt(void);
bool tdx_early_handle_ve(struct pt_regs *regs);
@@ -72,7 +72,7 @@ void __init tdx_dump_td_ctls(u64 td_ctls);
#else
static inline void tdx_early_init(void) { };
-static inline void tdx_safe_halt(void) { };
+static inline void tdx_halt(void) { };
static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 91f6ff618852..962c3ce39323 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -939,7 +939,7 @@ void __init select_idle_routine(void)
static_call_update(x86_idle, mwait_idle);
} else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) {
pr_info("using TDX aware idle routine\n");
- static_call_update(x86_idle, tdx_safe_halt);
+ static_call_update(x86_idle, tdx_halt);
} else {
static_call_update(x86_idle, default_idle);
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1a15bb8303b6b104e78028b6c68f76a0d4562134
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025040811-capable-unblock-8997@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1a15bb8303b6b104e78028b6c68f76a0d4562134 Mon Sep 17 00:00:00 2001
From: Shuai Xue <xueshuai(a)linux.alibaba.com>
Date: Wed, 12 Mar 2025 19:28:50 +0800
Subject: [PATCH] x86/mce: use is_copy_from_user() to determine copy-from-user
context
Patch series "mm/hwpoison: Fix regressions in memory failure handling",
v4.
## 1. What am I trying to do:
This patchset resolves two critical regressions related to memory failure
handling that have appeared in the upstream kernel since version 5.17, as
compared to 5.10 LTS.
- copyin case: poison found in user page while kernel copying from user space
- instr case: poison found while instruction fetching in user space
## 2. What is the expected outcome and why
- For copyin case:
Kernel can recover from poison found where kernel is doing get_user() or
copy_from_user() if those places get an error return and the kernel return
-EFAULT to the process instead of crashing. More specifily, MCE handler
checks the fixup handler type to decide whether an in kernel #MC can be
recovered. When EX_TYPE_UACCESS is found, the PC jumps to recovery code
specified in _ASM_EXTABLE_FAULT() and return a -EFAULT to user space.
- For instr case:
If a poison found while instruction fetching in user space, full recovery
is possible. User process takes #PF, Linux allocates a new page and fills
by reading from storage.
## 3. What actually happens and why
- For copyin case: kernel panic since v5.17
Commit 4c132d1d844a ("x86/futex: Remove .fixup usage") introduced a new
extable fixup type, EX_TYPE_EFAULT_REG, and later patches updated the
extable fixup type for copy-from-user operations, changing it from
EX_TYPE_UACCESS to EX_TYPE_EFAULT_REG. It breaks previous EX_TYPE_UACCESS
handling when posion found in get_user() or copy_from_user().
- For instr case: user process is killed by a SIGBUS signal due to #CMCI
and #MCE race
When an uncorrected memory error is consumed there is a race between the
CMCI from the memory controller reporting an uncorrected error with a UCNA
signature, and the core reporting and SRAR signature machine check when
the data is about to be consumed.
### Background: why *UN*corrected errors tied to *C*MCI in Intel platform [1]
Prior to Icelake memory controllers reported patrol scrub events that
detected a previously unseen uncorrected error in memory by signaling a
broadcast machine check with an SRAO (Software Recoverable Action
Optional) signature in the machine check bank. This was overkill because
it's not an urgent problem that no core is on the verge of consuming that
bad data. It's also found that multi SRAO UCE may cause nested MCE
interrupts and finally become an IERR.
Hence, Intel downgrades the machine check bank signature of patrol scrub
from SRAO to UCNA (Uncorrected, No Action required), and signal changed to
#CMCI. Just to add to the confusion, Linux does take an action (in
uc_decode_notifier()) to try to offline the page despite the UC*NA*
signature name.
### Background: why #CMCI and #MCE race when poison is consuming in
Intel platform [1]
Having decided that CMCI/UCNA is the best action for patrol scrub errors,
the memory controller uses it for reads too. But the memory controller is
executing asynchronously from the core, and can't tell the difference
between a "real" read and a speculative read. So it will do CMCI/UCNA if
an error is found in any read.
Thus:
1) Core is clever and thinks address A is needed soon, issues a
speculative read.
2) Core finds it is going to use address A soon after sending the read
request
3) The CMCI from the memory controller is in a race with MCE from the
core that will soon try to retire the load from address A.
Quite often (because speculation has got better) the CMCI from the memory
controller is delivered before the core is committed to the instruction
reading address A, so the interrupt is taken, and Linux offlines the page
(marking it as poison).
## Why user process is killed for instr case
Commit 046545a661af ("mm/hwpoison: fix error page recovered but reported
"not recovered"") tries to fix noise message "Memory error not recovered"
and skips duplicate SIGBUSs due to the race. But it also introduced a bug
that kill_accessing_process() return -EHWPOISON for instr case, as result,
kill_me_maybe() send a SIGBUS to user process.
# 4. The fix, in my opinion, should be:
- For copyin case:
The key point is whether the error context is in a read from user memory.
We do not care about the ex-type if we know its a MOV reading from
userspace.
is_copy_from_user() return true when both of the following two checks are
true:
- the current instruction is copy
- source address is user memory
If copy_user is true, we set
m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
Then do_machine_check() will try fixup_exception() first.
- For instr case: let kill_accessing_process() return 0 to prevent a SIGBUS.
- For patch 3:
The return value of memory_failure() is quite important while discussed
instr case regression with Tony and Miaohe for patch 2, so add comment
about the return value.
This patch (of 3):
Commit 4c132d1d844a ("x86/futex: Remove .fixup usage") introduced a new
extable fixup type, EX_TYPE_EFAULT_REG, and commit 4c132d1d844a
("x86/futex: Remove .fixup usage") updated the extable fixup type for
copy-from-user operations, changing it from EX_TYPE_UACCESS to
EX_TYPE_EFAULT_REG. The error context for copy-from-user operations no
longer functions as an in-kernel recovery context. Consequently, the
error context for copy-from-user operations no longer functions as an
in-kernel recovery context, resulting in kernel panics with the message:
"Machine check: Data load in unrecoverable area of kernel."
To address this, it is crucial to identify if an error context involves a
read operation from user memory. The function is_copy_from_user() can be
utilized to determine:
- the current operation is copy
- when reading user memory
When these conditions are met, is_copy_from_user() will return true,
confirming that it is indeed a direct copy from user memory. This check
is essential for correctly handling the context of errors in these
operations without relying on the extable fixup types that previously
allowed for in-kernel recovery.
So, use is_copy_from_user() to determine if a context is copy user directly.
Link: https://lkml.kernel.org/r/20250312112852.82415-1-xueshuai@linux.alibaba.com
Link: https://lkml.kernel.org/r/20250312112852.82415-2-xueshuai@linux.alibaba.com
Fixes: 4c132d1d844a ("x86/futex: Remove .fixup usage")
Signed-off-by: Shuai Xue <xueshuai(a)linux.alibaba.com>
Suggested-by: Peter Zijlstra <peterz(a)infradead.org>
Acked-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Tested-by: Tony Luck <tony.luck(a)intel.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Josh Poimboeuf <jpoimboe(a)kernel.org>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Ruidong Tian <tianruidong(a)linux.alibaba.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Yazen Ghannam <yazen.ghannam(a)amd.com>
Cc: Jane Chu <jane.chu(a)oracle.com>
Cc: Jarkko Sakkinen <jarkko(a)kernel.org>
Cc: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index dac4d64dfb2a..2235a7477436 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -300,13 +300,12 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
copy_user = is_copy_from_user(regs);
instrumentation_end();
- switch (fixup_type) {
- case EX_TYPE_UACCESS:
- if (!copy_user)
- return IN_KERNEL;
- m->kflags |= MCE_IN_KERNEL_COPYIN;
- fallthrough;
+ if (copy_user) {
+ m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
+ return IN_KERNEL_RECOV;
+ }
+ switch (fixup_type) {
case EX_TYPE_FAULT_MCE_SAFE:
case EX_TYPE_DEFAULT_MCE_SAFE:
m->kflags |= MCE_IN_KERNEL_RECOV;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 1a15bb8303b6b104e78028b6c68f76a0d4562134
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025040808-ammonia-petal-1583@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1a15bb8303b6b104e78028b6c68f76a0d4562134 Mon Sep 17 00:00:00 2001
From: Shuai Xue <xueshuai(a)linux.alibaba.com>
Date: Wed, 12 Mar 2025 19:28:50 +0800
Subject: [PATCH] x86/mce: use is_copy_from_user() to determine copy-from-user
context
Patch series "mm/hwpoison: Fix regressions in memory failure handling",
v4.
## 1. What am I trying to do:
This patchset resolves two critical regressions related to memory failure
handling that have appeared in the upstream kernel since version 5.17, as
compared to 5.10 LTS.
- copyin case: poison found in user page while kernel copying from user space
- instr case: poison found while instruction fetching in user space
## 2. What is the expected outcome and why
- For copyin case:
Kernel can recover from poison found where kernel is doing get_user() or
copy_from_user() if those places get an error return and the kernel return
-EFAULT to the process instead of crashing. More specifily, MCE handler
checks the fixup handler type to decide whether an in kernel #MC can be
recovered. When EX_TYPE_UACCESS is found, the PC jumps to recovery code
specified in _ASM_EXTABLE_FAULT() and return a -EFAULT to user space.
- For instr case:
If a poison found while instruction fetching in user space, full recovery
is possible. User process takes #PF, Linux allocates a new page and fills
by reading from storage.
## 3. What actually happens and why
- For copyin case: kernel panic since v5.17
Commit 4c132d1d844a ("x86/futex: Remove .fixup usage") introduced a new
extable fixup type, EX_TYPE_EFAULT_REG, and later patches updated the
extable fixup type for copy-from-user operations, changing it from
EX_TYPE_UACCESS to EX_TYPE_EFAULT_REG. It breaks previous EX_TYPE_UACCESS
handling when posion found in get_user() or copy_from_user().
- For instr case: user process is killed by a SIGBUS signal due to #CMCI
and #MCE race
When an uncorrected memory error is consumed there is a race between the
CMCI from the memory controller reporting an uncorrected error with a UCNA
signature, and the core reporting and SRAR signature machine check when
the data is about to be consumed.
### Background: why *UN*corrected errors tied to *C*MCI in Intel platform [1]
Prior to Icelake memory controllers reported patrol scrub events that
detected a previously unseen uncorrected error in memory by signaling a
broadcast machine check with an SRAO (Software Recoverable Action
Optional) signature in the machine check bank. This was overkill because
it's not an urgent problem that no core is on the verge of consuming that
bad data. It's also found that multi SRAO UCE may cause nested MCE
interrupts and finally become an IERR.
Hence, Intel downgrades the machine check bank signature of patrol scrub
from SRAO to UCNA (Uncorrected, No Action required), and signal changed to
#CMCI. Just to add to the confusion, Linux does take an action (in
uc_decode_notifier()) to try to offline the page despite the UC*NA*
signature name.
### Background: why #CMCI and #MCE race when poison is consuming in
Intel platform [1]
Having decided that CMCI/UCNA is the best action for patrol scrub errors,
the memory controller uses it for reads too. But the memory controller is
executing asynchronously from the core, and can't tell the difference
between a "real" read and a speculative read. So it will do CMCI/UCNA if
an error is found in any read.
Thus:
1) Core is clever and thinks address A is needed soon, issues a
speculative read.
2) Core finds it is going to use address A soon after sending the read
request
3) The CMCI from the memory controller is in a race with MCE from the
core that will soon try to retire the load from address A.
Quite often (because speculation has got better) the CMCI from the memory
controller is delivered before the core is committed to the instruction
reading address A, so the interrupt is taken, and Linux offlines the page
(marking it as poison).
## Why user process is killed for instr case
Commit 046545a661af ("mm/hwpoison: fix error page recovered but reported
"not recovered"") tries to fix noise message "Memory error not recovered"
and skips duplicate SIGBUSs due to the race. But it also introduced a bug
that kill_accessing_process() return -EHWPOISON for instr case, as result,
kill_me_maybe() send a SIGBUS to user process.
# 4. The fix, in my opinion, should be:
- For copyin case:
The key point is whether the error context is in a read from user memory.
We do not care about the ex-type if we know its a MOV reading from
userspace.
is_copy_from_user() return true when both of the following two checks are
true:
- the current instruction is copy
- source address is user memory
If copy_user is true, we set
m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
Then do_machine_check() will try fixup_exception() first.
- For instr case: let kill_accessing_process() return 0 to prevent a SIGBUS.
- For patch 3:
The return value of memory_failure() is quite important while discussed
instr case regression with Tony and Miaohe for patch 2, so add comment
about the return value.
This patch (of 3):
Commit 4c132d1d844a ("x86/futex: Remove .fixup usage") introduced a new
extable fixup type, EX_TYPE_EFAULT_REG, and commit 4c132d1d844a
("x86/futex: Remove .fixup usage") updated the extable fixup type for
copy-from-user operations, changing it from EX_TYPE_UACCESS to
EX_TYPE_EFAULT_REG. The error context for copy-from-user operations no
longer functions as an in-kernel recovery context. Consequently, the
error context for copy-from-user operations no longer functions as an
in-kernel recovery context, resulting in kernel panics with the message:
"Machine check: Data load in unrecoverable area of kernel."
To address this, it is crucial to identify if an error context involves a
read operation from user memory. The function is_copy_from_user() can be
utilized to determine:
- the current operation is copy
- when reading user memory
When these conditions are met, is_copy_from_user() will return true,
confirming that it is indeed a direct copy from user memory. This check
is essential for correctly handling the context of errors in these
operations without relying on the extable fixup types that previously
allowed for in-kernel recovery.
So, use is_copy_from_user() to determine if a context is copy user directly.
Link: https://lkml.kernel.org/r/20250312112852.82415-1-xueshuai@linux.alibaba.com
Link: https://lkml.kernel.org/r/20250312112852.82415-2-xueshuai@linux.alibaba.com
Fixes: 4c132d1d844a ("x86/futex: Remove .fixup usage")
Signed-off-by: Shuai Xue <xueshuai(a)linux.alibaba.com>
Suggested-by: Peter Zijlstra <peterz(a)infradead.org>
Acked-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Tested-by: Tony Luck <tony.luck(a)intel.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Josh Poimboeuf <jpoimboe(a)kernel.org>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Ruidong Tian <tianruidong(a)linux.alibaba.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Yazen Ghannam <yazen.ghannam(a)amd.com>
Cc: Jane Chu <jane.chu(a)oracle.com>
Cc: Jarkko Sakkinen <jarkko(a)kernel.org>
Cc: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index dac4d64dfb2a..2235a7477436 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -300,13 +300,12 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
copy_user = is_copy_from_user(regs);
instrumentation_end();
- switch (fixup_type) {
- case EX_TYPE_UACCESS:
- if (!copy_user)
- return IN_KERNEL;
- m->kflags |= MCE_IN_KERNEL_COPYIN;
- fallthrough;
+ if (copy_user) {
+ m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
+ return IN_KERNEL_RECOV;
+ }
+ switch (fixup_type) {
case EX_TYPE_FAULT_MCE_SAFE:
case EX_TYPE_DEFAULT_MCE_SAFE:
m->kflags |= MCE_IN_KERNEL_RECOV;
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 1a15bb8303b6b104e78028b6c68f76a0d4562134
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025040805-enlisted-regular-7bac@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1a15bb8303b6b104e78028b6c68f76a0d4562134 Mon Sep 17 00:00:00 2001
From: Shuai Xue <xueshuai(a)linux.alibaba.com>
Date: Wed, 12 Mar 2025 19:28:50 +0800
Subject: [PATCH] x86/mce: use is_copy_from_user() to determine copy-from-user
context
Patch series "mm/hwpoison: Fix regressions in memory failure handling",
v4.
## 1. What am I trying to do:
This patchset resolves two critical regressions related to memory failure
handling that have appeared in the upstream kernel since version 5.17, as
compared to 5.10 LTS.
- copyin case: poison found in user page while kernel copying from user space
- instr case: poison found while instruction fetching in user space
## 2. What is the expected outcome and why
- For copyin case:
Kernel can recover from poison found where kernel is doing get_user() or
copy_from_user() if those places get an error return and the kernel return
-EFAULT to the process instead of crashing. More specifily, MCE handler
checks the fixup handler type to decide whether an in kernel #MC can be
recovered. When EX_TYPE_UACCESS is found, the PC jumps to recovery code
specified in _ASM_EXTABLE_FAULT() and return a -EFAULT to user space.
- For instr case:
If a poison found while instruction fetching in user space, full recovery
is possible. User process takes #PF, Linux allocates a new page and fills
by reading from storage.
## 3. What actually happens and why
- For copyin case: kernel panic since v5.17
Commit 4c132d1d844a ("x86/futex: Remove .fixup usage") introduced a new
extable fixup type, EX_TYPE_EFAULT_REG, and later patches updated the
extable fixup type for copy-from-user operations, changing it from
EX_TYPE_UACCESS to EX_TYPE_EFAULT_REG. It breaks previous EX_TYPE_UACCESS
handling when posion found in get_user() or copy_from_user().
- For instr case: user process is killed by a SIGBUS signal due to #CMCI
and #MCE race
When an uncorrected memory error is consumed there is a race between the
CMCI from the memory controller reporting an uncorrected error with a UCNA
signature, and the core reporting and SRAR signature machine check when
the data is about to be consumed.
### Background: why *UN*corrected errors tied to *C*MCI in Intel platform [1]
Prior to Icelake memory controllers reported patrol scrub events that
detected a previously unseen uncorrected error in memory by signaling a
broadcast machine check with an SRAO (Software Recoverable Action
Optional) signature in the machine check bank. This was overkill because
it's not an urgent problem that no core is on the verge of consuming that
bad data. It's also found that multi SRAO UCE may cause nested MCE
interrupts and finally become an IERR.
Hence, Intel downgrades the machine check bank signature of patrol scrub
from SRAO to UCNA (Uncorrected, No Action required), and signal changed to
#CMCI. Just to add to the confusion, Linux does take an action (in
uc_decode_notifier()) to try to offline the page despite the UC*NA*
signature name.
### Background: why #CMCI and #MCE race when poison is consuming in
Intel platform [1]
Having decided that CMCI/UCNA is the best action for patrol scrub errors,
the memory controller uses it for reads too. But the memory controller is
executing asynchronously from the core, and can't tell the difference
between a "real" read and a speculative read. So it will do CMCI/UCNA if
an error is found in any read.
Thus:
1) Core is clever and thinks address A is needed soon, issues a
speculative read.
2) Core finds it is going to use address A soon after sending the read
request
3) The CMCI from the memory controller is in a race with MCE from the
core that will soon try to retire the load from address A.
Quite often (because speculation has got better) the CMCI from the memory
controller is delivered before the core is committed to the instruction
reading address A, so the interrupt is taken, and Linux offlines the page
(marking it as poison).
## Why user process is killed for instr case
Commit 046545a661af ("mm/hwpoison: fix error page recovered but reported
"not recovered"") tries to fix noise message "Memory error not recovered"
and skips duplicate SIGBUSs due to the race. But it also introduced a bug
that kill_accessing_process() return -EHWPOISON for instr case, as result,
kill_me_maybe() send a SIGBUS to user process.
# 4. The fix, in my opinion, should be:
- For copyin case:
The key point is whether the error context is in a read from user memory.
We do not care about the ex-type if we know its a MOV reading from
userspace.
is_copy_from_user() return true when both of the following two checks are
true:
- the current instruction is copy
- source address is user memory
If copy_user is true, we set
m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
Then do_machine_check() will try fixup_exception() first.
- For instr case: let kill_accessing_process() return 0 to prevent a SIGBUS.
- For patch 3:
The return value of memory_failure() is quite important while discussed
instr case regression with Tony and Miaohe for patch 2, so add comment
about the return value.
This patch (of 3):
Commit 4c132d1d844a ("x86/futex: Remove .fixup usage") introduced a new
extable fixup type, EX_TYPE_EFAULT_REG, and commit 4c132d1d844a
("x86/futex: Remove .fixup usage") updated the extable fixup type for
copy-from-user operations, changing it from EX_TYPE_UACCESS to
EX_TYPE_EFAULT_REG. The error context for copy-from-user operations no
longer functions as an in-kernel recovery context. Consequently, the
error context for copy-from-user operations no longer functions as an
in-kernel recovery context, resulting in kernel panics with the message:
"Machine check: Data load in unrecoverable area of kernel."
To address this, it is crucial to identify if an error context involves a
read operation from user memory. The function is_copy_from_user() can be
utilized to determine:
- the current operation is copy
- when reading user memory
When these conditions are met, is_copy_from_user() will return true,
confirming that it is indeed a direct copy from user memory. This check
is essential for correctly handling the context of errors in these
operations without relying on the extable fixup types that previously
allowed for in-kernel recovery.
So, use is_copy_from_user() to determine if a context is copy user directly.
Link: https://lkml.kernel.org/r/20250312112852.82415-1-xueshuai@linux.alibaba.com
Link: https://lkml.kernel.org/r/20250312112852.82415-2-xueshuai@linux.alibaba.com
Fixes: 4c132d1d844a ("x86/futex: Remove .fixup usage")
Signed-off-by: Shuai Xue <xueshuai(a)linux.alibaba.com>
Suggested-by: Peter Zijlstra <peterz(a)infradead.org>
Acked-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Tested-by: Tony Luck <tony.luck(a)intel.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Borislav Betkov <bp(a)alien8.de>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Josh Poimboeuf <jpoimboe(a)kernel.org>
Cc: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Ruidong Tian <tianruidong(a)linux.alibaba.com>
Cc: Thomas Gleinxer <tglx(a)linutronix.de>
Cc: Yazen Ghannam <yazen.ghannam(a)amd.com>
Cc: Jane Chu <jane.chu(a)oracle.com>
Cc: Jarkko Sakkinen <jarkko(a)kernel.org>
Cc: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index dac4d64dfb2a..2235a7477436 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -300,13 +300,12 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
copy_user = is_copy_from_user(regs);
instrumentation_end();
- switch (fixup_type) {
- case EX_TYPE_UACCESS:
- if (!copy_user)
- return IN_KERNEL;
- m->kflags |= MCE_IN_KERNEL_COPYIN;
- fallthrough;
+ if (copy_user) {
+ m->kflags |= MCE_IN_KERNEL_COPYIN | MCE_IN_KERNEL_RECOV;
+ return IN_KERNEL_RECOV;
+ }
+ switch (fixup_type) {
case EX_TYPE_FAULT_MCE_SAFE:
case EX_TYPE_DEFAULT_MCE_SAFE:
m->kflags |= MCE_IN_KERNEL_RECOV;
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 60f3caff1492e5b8616b9578c4bedb5c0a88ed14
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025040845-repeater-uninvited-9d3b@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 60f3caff1492e5b8616b9578c4bedb5c0a88ed14 Mon Sep 17 00:00:00 2001
From: Hengqi Chen <hengqi.chen(a)gmail.com>
Date: Sun, 30 Mar 2025 16:31:09 +0800
Subject: [PATCH] LoongArch: BPF: Don't override subprog's return value
The verifier test `calls: div by 0 in subprog` triggers a panic at the
ld.bu instruction. The ld.bu insn is trying to load byte from memory
address returned by the subprog. The subprog actually set the correct
address at the a5 register (dedicated register for BPF return values).
But at commit 73c359d1d356 ("LoongArch: BPF: Sign-extend return values")
we also sign extended a5 to the a0 register (return value in LoongArch).
For function call insn, we later propagate the a0 register back to a5
register. This is right for native calls but wrong for bpf2bpf calls
which expect zero-extended return value in a5 register. So only move a0
to a5 for native calls (i.e. non-BPF_PSEUDO_CALL).
Cc: stable(a)vger.kernel.org
Fixes: 73c359d1d356 ("LoongArch: BPF: Sign-extend return values")
Signed-off-by: Hengqi Chen <hengqi.chen(a)gmail.com>
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index a06bf89fed67..fa1500d4aa3e 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -907,7 +907,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
move_addr(ctx, t1, func_addr);
emit_insn(ctx, jirl, LOONGARCH_GPR_RA, t1, 0);
- move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
+
+ if (insn->src_reg != BPF_PSEUDO_CALL)
+ move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
+
break;
/* tail call */
On 4/7/25 17:25, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> wifi: mac80211: remove debugfs dir for virtual monitor
>
> to the 6.13-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> wifi-mac80211-remove-debugfs-dir-for-virtual-monitor.patch
> and it can be found in the queue-6.13 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
The commit here is causing a sparse warning. Please always add commit
861d0445e72e ("wifi: mac80211: Fix sparse warning for monitor_sdata")
when you backport this patch to avoid that. (So far I got the backport
notification for 6.12 and 6.13.)
It's also no big deal when you decide to not backport this patch.
>
>
> commit 193bafe31a2a08b5631a98ecf148fa0d18e94b0d
> Author: Alexander Wetzel <Alexander(a)wetzel-home.de>
> Date: Tue Feb 4 17:42:40 2025 +0100
>
> wifi: mac80211: remove debugfs dir for virtual monitor
>
> [ Upstream commit 646262c71aca87bb66945933abe4e620796d6c5a ]
>
> Don't call ieee80211_debugfs_recreate_netdev() for virtual monitor
> interface when deleting it.
>
> The virtual monitor interface shouldn't have debugfs entries and trying
> to update them will *create* them on deletion.
>
> And when the virtual monitor interface is created/destroyed multiple
> times we'll get warnings about debugfs name conflicts.
>
> Signed-off-by: Alexander Wetzel <Alexander(a)wetzel-home.de>
> Link: https://patch.msgid.link/20250204164240.370153-1-Alexander@wetzel-home.de
> Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
> index 299d38e9e8630..2fc60e1e77a55 100644
> --- a/net/mac80211/driver-ops.c
> +++ b/net/mac80211/driver-ops.c
> @@ -116,8 +116,14 @@ void drv_remove_interface(struct ieee80211_local *local,
>
> sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER;
>
> - /* Remove driver debugfs entries */
> - ieee80211_debugfs_recreate_netdev(sdata, sdata->vif.valid_links);
> + /*
> + * Remove driver debugfs entries.
> + * The virtual monitor interface doesn't get a debugfs
> + * entry, so it's exempt here.
> + */
> + if (sdata != local->monitor_sdata)
> + ieee80211_debugfs_recreate_netdev(sdata,
> + sdata->vif.valid_links);
>
> trace_drv_remove_interface(local, sdata);
> local->ops->remove_interface(&local->hw, &sdata->vif);
> diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
> index 806dffa48ef92..04b3626387309 100644
> --- a/net/mac80211/iface.c
> +++ b/net/mac80211/iface.c
> @@ -1212,16 +1212,17 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
> return;
> }
>
> - RCU_INIT_POINTER(local->monitor_sdata, NULL);
> - mutex_unlock(&local->iflist_mtx);
> -
> - synchronize_net();
> -
> + clear_bit(SDATA_STATE_RUNNING, &sdata->state);
> ieee80211_link_release_channel(&sdata->deflink);
>
> if (ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF))
> drv_remove_interface(local, sdata);
>
> + RCU_INIT_POINTER(local->monitor_sdata, NULL);
> + mutex_unlock(&local->iflist_mtx);
> +
> + synchronize_net();
> +
> kfree(sdata);
> }
>
The sdio_read32() calls sd_read(), but does not handle the error if
sd_read() fails. This could lead to subsequent operations processing
invalid data. A proper implementation can be found in sdio_readN(),
which has an error handling for the sd_read().
Add error handling for the sd_read() to free tmpbuf and return error
code if sd_read() fails. This ensure that the memcpy() is only performed
when the read operation is successful.
Since none of the callers check for the errors, there is no need to
return the error code propagated from sd_read(). Returning SDIO_ERR_VAL32
might be a better choice, which is a specialized error code for SDIO.
Another problem of returning propagated error code is that the error
code is a s32 type value, which is not fit with the u32 type return value
of the sdio_read32().
An practical option would be to go through all the callers and add error
handling, which need to pass a pointer to u32 *val and return zero on
success or negative on failure. It is not a better choice since will cost
unnecessary effort on the error code.
The other opion is to replace sd_read() by sd_read32(), which return an
u32 type error code that can be directly used as the return value of
sdio_read32(). But, it is also a bad choice to use sd_read32() in a
alignment failed branch.
Fixes: 554c0a3abf21 ("staging: Add rtl8723bs sdio wifi driver")
Cc: stable(a)vger.kernel.org # v4.12+
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
v7: Fix error code and add patch explanation
v6: Fix improper code to propagate error code
v5: Fix error code
v4: Add change log and fix error code
v3: Add Cc flag
v2: Change code to initialize val
drivers/staging/rtl8723bs/hal/sdio_ops.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/staging/rtl8723bs/hal/sdio_ops.c b/drivers/staging/rtl8723bs/hal/sdio_ops.c
index 21e9f1858745..d79d41727042 100644
--- a/drivers/staging/rtl8723bs/hal/sdio_ops.c
+++ b/drivers/staging/rtl8723bs/hal/sdio_ops.c
@@ -185,7 +185,12 @@ static u32 sdio_read32(struct intf_hdl *intfhdl, u32 addr)
return SDIO_ERR_VAL32;
ftaddr &= ~(u16)0x3;
- sd_read(intfhdl, ftaddr, 8, tmpbuf);
+ err = sd_read(intfhdl, ftaddr, 8, tmpbuf);
+ if (err) {
+ kfree(tmpbuf);
+ return SDIO_ERR_VAL32;
+ }
+
memcpy(&le_tmp, tmpbuf + shift, 4);
val = le32_to_cpu(le_tmp);
--
2.42.0.windows.2
In tpacpi_battery_init(), the return value of tpacpi_check_quirks() needs
to be checked. The battery should not be hooked if there is no matched
battery information in quirk table.
Add an error check and return -ENODEV immediately if the device fail
the check.
Fixes: 1a32ebb26ba9 ("platform/x86: thinkpad_acpi: Support battery quirk")
Cc: stable(a)vger.kernel.org
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
v3: Fix error code
v2: Fix double assignment error
drivers/platform/x86/thinkpad_acpi.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 2cfb2ac3f465..ab538bf53716 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -9974,6 +9974,8 @@ static int __init tpacpi_battery_init(struct ibm_init_struct *ibm)
tp_features.battery_force_primary = tpacpi_check_quirks(
battery_quirk_table,
ARRAY_SIZE(battery_quirk_table));
+ if (!tp_features.battery_force_primary)
+ return -ENODEV;
battery_hook_register(&battery_hook);
return 0;
--
2.42.0.windows.2
From: Alex Hung <alex.hung(a)amd.com>
[ Upstream commit 8aa2864044b9d13e95fe224f32e808afbf79ecdf ]
[WHY & HOW]
dc->links[] has max size of MAX_LINKS and NULL is return when trying to
access with out-of-bound index.
This fixes 3 OVERRUN and 1 RESOURCE_LEAK issues reported by Coverity.
Reviewed-by: Harry Wentland <harry.wentland(a)amd.com>
Acked-by: Tom Chung <chiahsuan.chung(a)amd.com>
Signed-off-by: Alex Hung <alex.hung(a)amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
[The macro MAX_LINKS is introduced by Commit 60df5628144b ("drm/amd/display:
handle invalid connector indices") after 6.10. So here we still use the
original array length MAX_PIPES * 2]
Signed-off-by: Jianqi Ren <jianqi.ren.cn(a)windriver.com>
Signed-off-by: He Zhe <zhe.he(a)windriver.com>
---
Verified the build test
---
drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
index f365773d5714..e9b3c1c7a931 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_exports.c
@@ -37,6 +37,9 @@
#include "dce/dce_i2c.h"
struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_index)
{
+ if (link_index >= (MAX_PIPES * 2))
+ return NULL;
+
return dc->links[link_index];
}
--
2.34.1
On destroy, we should set each node dead. But current code miss this
when the maple tree has only the root node.
The reason is mt_destroy_walk() leverage mte_destroy_descend() to set
node dead, but this is skipped since the only root node is a leaf.
Fixes this by setting the node dead if it is a leaf.
Fixes: 54a611b60590 ("Maple Tree: add new data structure")
Signed-off-by: Wei Yang <richard.weiyang(a)gmail.com>
CC: Liam R. Howlett <Liam.Howlett(a)Oracle.com>
Cc: <stable(a)vger.kernel.org>
---
v2:
* move the operation into mt_destroy_walk()
* adjust the title accordingly
---
lib/maple_tree.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 4bd5a5be1440..0696e8d1c4e9 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -5284,6 +5284,7 @@ static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
struct maple_enode *start;
if (mte_is_leaf(enode)) {
+ mte_set_node_dead(enode);
node->type = mte_node_type(enode);
goto free_leaf;
}
--
2.34.1
Replace kzalloc with kvzalloc for the exit_dump buffer allocation, which
can require large contiguous memory (up to order=9) depending on the
implementation. This change prevents allocation failures by allowing the
system to fall back to vmalloc when contiguous memory allocation fails.
Since this buffer is only used for debugging purposes, physical memory
contiguity is not required, making vmalloc a suitable alternative.
Cc: stable(a)vger.kernel.org
Fixes: 07814a9439a3b0 ("sched_ext: Print debug dump after an error exit")
Suggested-by: Rik van Riel <riel(a)surriel.com>
Signed-off-by: Breno Leitao <leitao(a)debian.org>
---
kernel/sched/ext.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 66bcd40a28ca1..c82725f9b0559 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4639,7 +4639,7 @@ static struct scx_exit_info *alloc_exit_info(size_t exit_dump_len)
ei->bt = kcalloc(SCX_EXIT_BT_LEN, sizeof(ei->bt[0]), GFP_KERNEL);
ei->msg = kzalloc(SCX_EXIT_MSG_LEN, GFP_KERNEL);
- ei->dump = kzalloc(exit_dump_len, GFP_KERNEL);
+ ei->dump = kvzalloc(exit_dump_len, GFP_KERNEL);
if (!ei->bt || !ei->msg || !ei->dump) {
free_exit_info(ei);
---
base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
change-id: 20250407-scx-11dbf94803c3
Best regards,
--
Breno Leitao <leitao(a)debian.org>
On Mon, Apr 07 2025 at 10:07, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> timekeeping: Fix possible inconsistencies in _COARSE clockids
>
> to the 6.14-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> timekeeping-fix-possible-inconsistencies-in-_coarse-.patch
> and it can be found in the queue-6.14 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
As I asked on the stable list already, please do not add that to any
stable tree. It has been reverted in Linus tree and the problem will be
fixed differently.
Thanks,
tglx
From: Oleg Nesterov <oleg(a)redhat.com>
[ Upstream commit 975776841e689dd8ba36df9fa72ac3eca3c2957a ]
kernel/sched/isolation.c obviously makes no sense without CONFIG_SMP, but
the Kconfig entry we have right now:
config CPU_ISOLATION
bool "CPU isolation"
depends on SMP || COMPILE_TEST
allows the creation of pointless .config's which cause
build failures.
Reported-by: kernel test robot <lkp(a)intel.com>
Signed-off-by: Oleg Nesterov <oleg(a)redhat.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Link: https://lore.kernel.org/r/20250330134955.GA7910@redhat.com
Closes: https://lore.kernel.org/oe-kbuild-all/202503260646.lrUqD3j5-lkp@intel.com/
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
init/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/init/Kconfig b/init/Kconfig
index f641518f4ac5c..01beb047aff2f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -559,7 +559,7 @@ endmenu # "CPU/Task time and stats accounting"
config CPU_ISOLATION
bool "CPU isolation"
- depends on SMP || COMPILE_TEST
+ depends on SMP
default y
help
Make sure that CPUs running critical tasks are not disturbed by
--
2.39.5
From: Oleg Nesterov <oleg(a)redhat.com>
[ Upstream commit 975776841e689dd8ba36df9fa72ac3eca3c2957a ]
kernel/sched/isolation.c obviously makes no sense without CONFIG_SMP, but
the Kconfig entry we have right now:
config CPU_ISOLATION
bool "CPU isolation"
depends on SMP || COMPILE_TEST
allows the creation of pointless .config's which cause
build failures.
Reported-by: kernel test robot <lkp(a)intel.com>
Signed-off-by: Oleg Nesterov <oleg(a)redhat.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Link: https://lore.kernel.org/r/20250330134955.GA7910@redhat.com
Closes: https://lore.kernel.org/oe-kbuild-all/202503260646.lrUqD3j5-lkp@intel.com/
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
init/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/init/Kconfig b/init/Kconfig
index 9807c66b24bb6..d9dd8cbe99b11 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -656,7 +656,7 @@ endmenu # "CPU/Task time and stats accounting"
config CPU_ISOLATION
bool "CPU isolation"
- depends on SMP || COMPILE_TEST
+ depends on SMP
default y
help
Make sure that CPUs running critical tasks are not disturbed by
--
2.39.5
From: Oleg Nesterov <oleg(a)redhat.com>
[ Upstream commit 975776841e689dd8ba36df9fa72ac3eca3c2957a ]
kernel/sched/isolation.c obviously makes no sense without CONFIG_SMP, but
the Kconfig entry we have right now:
config CPU_ISOLATION
bool "CPU isolation"
depends on SMP || COMPILE_TEST
allows the creation of pointless .config's which cause
build failures.
Reported-by: kernel test robot <lkp(a)intel.com>
Signed-off-by: Oleg Nesterov <oleg(a)redhat.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Link: https://lore.kernel.org/r/20250330134955.GA7910@redhat.com
Closes: https://lore.kernel.org/oe-kbuild-all/202503260646.lrUqD3j5-lkp@intel.com/
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
init/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/init/Kconfig b/init/Kconfig
index dafc3ba6fa7a1..c7d81fc823964 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -674,7 +674,7 @@ endmenu # "CPU/Task time and stats accounting"
config CPU_ISOLATION
bool "CPU isolation"
- depends on SMP || COMPILE_TEST
+ depends on SMP
default y
help
Make sure that CPUs running critical tasks are not disturbed by
--
2.39.5
From: Oleg Nesterov <oleg(a)redhat.com>
[ Upstream commit 975776841e689dd8ba36df9fa72ac3eca3c2957a ]
kernel/sched/isolation.c obviously makes no sense without CONFIG_SMP, but
the Kconfig entry we have right now:
config CPU_ISOLATION
bool "CPU isolation"
depends on SMP || COMPILE_TEST
allows the creation of pointless .config's which cause
build failures.
Reported-by: kernel test robot <lkp(a)intel.com>
Signed-off-by: Oleg Nesterov <oleg(a)redhat.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Link: https://lore.kernel.org/r/20250330134955.GA7910@redhat.com
Closes: https://lore.kernel.org/oe-kbuild-all/202503260646.lrUqD3j5-lkp@intel.com/
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
init/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/init/Kconfig b/init/Kconfig
index b6786ddc88a80..8b6a2848da4a5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -678,7 +678,7 @@ endmenu # "CPU/Task time and stats accounting"
config CPU_ISOLATION
bool "CPU isolation"
- depends on SMP || COMPILE_TEST
+ depends on SMP
default y
help
Make sure that CPUs running critical tasks are not disturbed by
--
2.39.5
From: Oleg Nesterov <oleg(a)redhat.com>
[ Upstream commit 975776841e689dd8ba36df9fa72ac3eca3c2957a ]
kernel/sched/isolation.c obviously makes no sense without CONFIG_SMP, but
the Kconfig entry we have right now:
config CPU_ISOLATION
bool "CPU isolation"
depends on SMP || COMPILE_TEST
allows the creation of pointless .config's which cause
build failures.
Reported-by: kernel test robot <lkp(a)intel.com>
Signed-off-by: Oleg Nesterov <oleg(a)redhat.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Link: https://lore.kernel.org/r/20250330134955.GA7910@redhat.com
Closes: https://lore.kernel.org/oe-kbuild-all/202503260646.lrUqD3j5-lkp@intel.com/
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
init/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/init/Kconfig b/init/Kconfig
index 1105cb53f391a..8b630143c720f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -689,7 +689,7 @@ endmenu # "CPU/Task time and stats accounting"
config CPU_ISOLATION
bool "CPU isolation"
- depends on SMP || COMPILE_TEST
+ depends on SMP
default y
help
Make sure that CPUs running critical tasks are not disturbed by
--
2.39.5
From: Thomas Weißschuh <linux(a)weissschuh.net>
[ Upstream commit dbdffaf50ff9cee3259a7cef8a7bd9e0f0ba9f13 ]
Remap source path prefixes in all output, including compiler
diagnostics, debug information, macro expansions, etc.
This removes a few absolute paths from the binary and also makes it
possible to use core::panic::Location properly.
Equivalent to the same configuration done for C sources in
commit 1d3730f0012f ("kbuild: support -fmacro-prefix-map for external
modules") and commit a73619a845d5 ("kbuild: use -fmacro-prefix-map to
make __FILE__ a relative path").
Link: https://doc.rust-lang.org/rustc/command-line-arguments.html#--remap-path-pr…
Acked-by: Miguel Ojeda <ojeda(a)kernel.org>
Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net>
Tested-by: Gary Guo <gary(a)garyguo.net>
Signed-off-by: Masahiro Yamada <masahiroy(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
Makefile | 1 +
1 file changed, 1 insertion(+)
diff --git a/Makefile b/Makefile
index 21a34e8991ac6..9c1b66bf4bf6e 100644
--- a/Makefile
+++ b/Makefile
@@ -1067,6 +1067,7 @@ endif
# change __FILE__ to the relative path to the source directory
ifdef building_out_of_srctree
KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srcroot)/=)
+KBUILD_RUSTFLAGS += --remap-path-prefix=$(srcroot)/=
endif
# include additional Makefiles when needed
--
2.39.5
From: Thomas Weißschuh <linux(a)weissschuh.net>
[ Upstream commit dbdffaf50ff9cee3259a7cef8a7bd9e0f0ba9f13 ]
Remap source path prefixes in all output, including compiler
diagnostics, debug information, macro expansions, etc.
This removes a few absolute paths from the binary and also makes it
possible to use core::panic::Location properly.
Equivalent to the same configuration done for C sources in
commit 1d3730f0012f ("kbuild: support -fmacro-prefix-map for external
modules") and commit a73619a845d5 ("kbuild: use -fmacro-prefix-map to
make __FILE__ a relative path").
Link: https://doc.rust-lang.org/rustc/command-line-arguments.html#--remap-path-pr…
Acked-by: Miguel Ojeda <ojeda(a)kernel.org>
Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net>
Tested-by: Gary Guo <gary(a)garyguo.net>
Signed-off-by: Masahiro Yamada <masahiroy(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
Makefile | 1 +
1 file changed, 1 insertion(+)
diff --git a/Makefile b/Makefile
index 8b6764d44a610..c4eeb97ab3df2 100644
--- a/Makefile
+++ b/Makefile
@@ -1068,6 +1068,7 @@ endif
# change __FILE__ to the relative path to the source directory
ifdef building_out_of_srctree
KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srcroot)/=)
+KBUILD_RUSTFLAGS += --remap-path-prefix=$(srcroot)/=
endif
# include additional Makefiles when needed
--
2.39.5
Asus laptops with sound PCI subsystem ID 1043:1f43 have the DMICs
connected to the host instead of the CS42L43 so need the
SOC_SDW_CODEC_MIC quirk.
Link: https://github.com/thesofproject/sof/issues/9930
Fixes: 084344970808 ("ASoC: Intel: sof_sdw: Add quirk for Asus Zenbook S14")
Signed-off-by: Peter Ujfalusi <peter.ujfalusi(a)linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao(a)linux.intel.com>
Reviewed-by: Simon Trimmer <simont(a)opensource.cirrus.com>
Cc: stable(a)vger.kernel.org
---
Hi,
The S16 variant of the Zenbook has 4 PCH connected DMICs and the new
topology needed for it is released alongside with the -2ch variant for
the S14.
Add a Fixes tag so this is backported to 6.14 since we have at least one
user with this laptop without audio support.
Regards,
Peter
sound/soc/intel/boards/sof_sdw.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c
index 90dafa810b2e..095d08b3fc82 100644
--- a/sound/soc/intel/boards/sof_sdw.c
+++ b/sound/soc/intel/boards/sof_sdw.c
@@ -764,6 +764,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = {
static const struct snd_pci_quirk sof_sdw_ssid_quirk_table[] = {
SND_PCI_QUIRK(0x1043, 0x1e13, "ASUS Zenbook S14", SOC_SDW_CODEC_MIC),
+ SND_PCI_QUIRK(0x1043, 0x1f43, "ASUS Zenbook S16", SOC_SDW_CODEC_MIC),
{}
};
--
2.49.0
Case values introduced in commit
5f78e1fb7a3e ("ASoC: qcom: Add driver support for audioreach solution")
cause out of bounds access in arrays of sc7280 driver data (e.g. in case
of RX_CODEC_DMA_RX_0 in sc7280_snd_hw_params()).
Redefine LPASS_MAX_PORTS to consider the maximum possible port id for
q6dsp as sc7280 driver utilizes some of those values.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: 77d0ffef793d ("ASoC: qcom: Add macro for lpass DAI id's max limit")
Cc: stable(a)vger.kernel.org # v6.0+
Suggested-by: Mikhail Kobuk <m.kobuk(a)ispras.ru>
Suggested-by: Alexey Khoroshilov <khoroshilov(a)ispras.ru>
Signed-off-by: Evgeny Pimenov <pimenoveu12(a)gmail.com>
---
sound/soc/qcom/lpass.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/sound/soc/qcom/lpass.h b/sound/soc/qcom/lpass.h
index 27a2bf9a6613..de3ec6f594c1 100644
--- a/sound/soc/qcom/lpass.h
+++ b/sound/soc/qcom/lpass.h
@@ -13,10 +13,11 @@
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <dt-bindings/sound/qcom,lpass.h>
+#include <dt-bindings/sound/qcom,q6afe.h>
#include "lpass-hdmi.h"
#define LPASS_AHBIX_CLOCK_FREQUENCY 131072000
-#define LPASS_MAX_PORTS (LPASS_CDC_DMA_VA_TX8 + 1)
+#define LPASS_MAX_PORTS (DISPLAY_PORT_RX_7 + 1)
#define LPASS_MAX_MI2S_PORTS (8)
#define LPASS_MAX_DMA_CHANNELS (8)
#define LPASS_MAX_HDMI_DMA_CHANNELS (4)
--
2.39.5
Commit a8091f039c1e ("maple_tree: add MAS_UNDERFLOW and MAS_OVERFLOW
states") adds more status during maple tree walk. But it introduce a
typo on the status check during walk.
It expects to mean neither active nor start, we would restart the walk,
while current code means we would always restart the walk.
Fixes: a8091f039c1e ("maple_tree: add MAS_UNDERFLOW and MAS_OVERFLOW states")
Signed-off-by: Wei Yang <richard.weiyang(a)gmail.com>
CC: Liam R. Howlett <Liam.Howlett(a)Oracle.com>
CC: <stable(a)vger.kernel.org>
Reviewed-by: Liam R. Howlett <Liam.Howlett(a)Oracle.com>
---
lib/maple_tree.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 0696e8d1c4e9..81970b3a6af7 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -4895,7 +4895,7 @@ void *mas_walk(struct ma_state *mas)
{
void *entry;
- if (!mas_is_active(mas) || !mas_is_start(mas))
+ if (!mas_is_active(mas) && !mas_is_start(mas))
mas->status = ma_start;
retry:
entry = mas_state_walk(mas);
--
2.34.1
The patch titled
Subject: mm: fix filemap_get_folios_contig returning batches of identical folios
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-fix-filemap_get_folios_contig-returning-batches-of-identical-folios.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: "Vishal Moola (Oracle)" <vishal.moola(a)gmail.com>
Subject: mm: fix filemap_get_folios_contig returning batches of identical folios
Date: Thu, 3 Apr 2025 16:54:17 -0700
filemap_get_folios_contig() is supposed to return distinct folios found
within [start, end]. Large folios in the Xarray become multi-index
entries. xas_next() can iterate through the sub-indexes before finding a
sibling entry and breaking out of the loop.
This can result in a returned folio_batch containing an indeterminate
number of duplicate folios, which forces the callers to skeptically handle
the returned batch. This is inefficient and incurs a large maintenance
overhead.
We can fix this by calling xas_advance() after we have successfully adding
a folio to the batch to ensure our Xarray is positioned such that it will
correctly find the next folio - similar to filemap_get_read_batch().
Link: https://lkml.kernel.org/r/Z-8s1-kiIDkzgRbc@fedora
Fixes: 35b471467f88 ("filemap: add filemap_get_folios_contig()")
Signed-off-by: Vishal Moola (Oracle) <vishal.moola(a)gmail.com>
Reported-by: Qu Wenruo <quwenruo.btrfs(a)gmx.com>
Closes: https://lkml.kernel.org/r/b714e4de-2583-4035-b829-72cfb5eb6fc6@gmx.com
Tested-by: Qu Wenruo <quwenruo.btrfs(a)gmx.com>
Cc: Matthew Wilcow (Oracle) <willy(a)infradead.org>
Cc: Vivek Kasireddy <vivek.kasireddy(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/filemap.c | 1 +
1 file changed, 1 insertion(+)
--- a/mm/filemap.c~mm-fix-filemap_get_folios_contig-returning-batches-of-identical-folios
+++ a/mm/filemap.c
@@ -2244,6 +2244,7 @@ unsigned filemap_get_folios_contig(struc
*start = folio->index + nr;
goto out;
}
+ xas_advance(&xas, folio_next_index(folio) - 1);
continue;
put_folio:
folio_put(folio);
_
Patches currently in -mm which might be from vishal.moola(a)gmail.com are
mm-compaction-fix-bug-in-hugetlb-handling-pathway.patch
mm-fix-filemap_get_folios_contig-returning-batches-of-identical-folios.patch
mm-compaction-use-folio-in-hugetlb-pathway.patch
From: Miguel Ojeda <ojeda(a)kernel.org>
Similar to what was done for `Zeroable<NonNull<T>>` in commit
df27cef15360 ("rust: init: fix `Zeroable` implementation for
`Option<NonNull<T>>` and `Option<KBox<T>>`"), the latest Rust
documentation [1] says it guarantees that `transmute::<_,
Option<T>>([0u8; size_of::<T>()])` is sound and produces
`Option::<T>::None` only in some cases. In particular, it says:
`Box<U>` (specifically, only `Box<U, Global>`) when `U: Sized`
Thus restrict the `impl` to `Sized`, and use similar wording as in that
commit too.
Link: https://doc.rust-lang.org/stable/std/option/index.html#representation [1]
Signed-off-by: Miguel Ojeda <ojeda(a)kernel.org>
Link: https://github.com/Rust-for-Linux/pin-init/pull/32/commits/a6007cf555e5946b…
Fixes: 9b2299af3b92 ("rust: pin-init: add `std` and `alloc` support from the user-space version")
Cc: stable(a)vger.kernel.org
[ Adjust mentioned commit to the one from the kernel. - Benno ]
Signed-off-by: Benno Lossin <benno.lossin(a)proton.me>
---
rust/pin-init/src/alloc.rs | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/rust/pin-init/src/alloc.rs b/rust/pin-init/src/alloc.rs
index e16baa3b434e..5017f57442d8 100644
--- a/rust/pin-init/src/alloc.rs
+++ b/rust/pin-init/src/alloc.rs
@@ -17,11 +17,9 @@
pub extern crate alloc;
-// SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee).
-//
-// In this case we are allowed to use `T: ?Sized`, since all zeros is the `None` variant and there
-// is no problem with a VTABLE pointer being null.
-unsafe impl<T: ?Sized> ZeroableOption for Box<T> {}
+// SAFETY: All zeros is equivalent to `None` (option layout optimization guarantee:
+// <https://doc.rust-lang.org/stable/std/option/index.html#representation>).
+unsafe impl<T> ZeroableOption for Box<T> {}
/// Smart pointer that can initialize memory in-place.
pub trait InPlaceInit<T>: Sized {
--
2.48.1
From: Al Viro <viro(a)zeniv.linux.org.uk>
[ Upstream commit bdb43af4fdb39f844ede401bdb1258f67a580a27 ]
failure to allocate inode => leaked dentry...
this one had been there since the initial merge; to be fair,
if we are that far OOM, the odds of failing at that particular
allocation are low...
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/infiniband/hw/qib/qib_fs.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index e336d778e076e..5ec67e3c2d03c 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -56,6 +56,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode = new_inode(dir->i_sb);
if (!inode) {
+ dput(dentry);
error = -EPERM;
goto bail;
}
--
2.39.5
From: Al Viro <viro(a)zeniv.linux.org.uk>
[ Upstream commit bdb43af4fdb39f844ede401bdb1258f67a580a27 ]
failure to allocate inode => leaked dentry...
this one had been there since the initial merge; to be fair,
if we are that far OOM, the odds of failing at that particular
allocation are low...
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/infiniband/hw/qib/qib_fs.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index e336d778e076e..5ec67e3c2d03c 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -56,6 +56,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode = new_inode(dir->i_sb);
if (!inode) {
+ dput(dentry);
error = -EPERM;
goto bail;
}
--
2.39.5
From: Al Viro <viro(a)zeniv.linux.org.uk>
[ Upstream commit bdb43af4fdb39f844ede401bdb1258f67a580a27 ]
failure to allocate inode => leaked dentry...
this one had been there since the initial merge; to be fair,
if we are that far OOM, the odds of failing at that particular
allocation are low...
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/infiniband/hw/qib/qib_fs.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index 8665e506404f9..774037ea44a92 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -56,6 +56,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode = new_inode(dir->i_sb);
if (!inode) {
+ dput(dentry);
error = -EPERM;
goto bail;
}
--
2.39.5
From: Al Viro <viro(a)zeniv.linux.org.uk>
[ Upstream commit bdb43af4fdb39f844ede401bdb1258f67a580a27 ]
failure to allocate inode => leaked dentry...
this one had been there since the initial merge; to be fair,
if we are that far OOM, the odds of failing at that particular
allocation are low...
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/infiniband/hw/qib/qib_fs.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index 182a89bb24ef4..caade796bc3cb 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -55,6 +55,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode = new_inode(dir->i_sb);
if (!inode) {
+ dput(dentry);
error = -EPERM;
goto bail;
}
--
2.39.5
From: Al Viro <viro(a)zeniv.linux.org.uk>
[ Upstream commit bdb43af4fdb39f844ede401bdb1258f67a580a27 ]
failure to allocate inode => leaked dentry...
this one had been there since the initial merge; to be fair,
if we are that far OOM, the odds of failing at that particular
allocation are low...
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/infiniband/hw/qib/qib_fs.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index 11155e0fb8395..35d777976c295 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -55,6 +55,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode = new_inode(dir->i_sb);
if (!inode) {
+ dput(dentry);
error = -EPERM;
goto bail;
}
--
2.39.5
From: Al Viro <viro(a)zeniv.linux.org.uk>
[ Upstream commit bdb43af4fdb39f844ede401bdb1258f67a580a27 ]
failure to allocate inode => leaked dentry...
this one had been there since the initial merge; to be fair,
if we are that far OOM, the odds of failing at that particular
allocation are low...
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/infiniband/hw/qib/qib_fs.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index b27791029fa93..b9f4a2937c3ac 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -55,6 +55,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode = new_inode(dir->i_sb);
if (!inode) {
+ dput(dentry);
error = -EPERM;
goto bail;
}
--
2.39.5
From: Al Viro <viro(a)zeniv.linux.org.uk>
[ Upstream commit bdb43af4fdb39f844ede401bdb1258f67a580a27 ]
failure to allocate inode => leaked dentry...
this one had been there since the initial merge; to be fair,
if we are that far OOM, the odds of failing at that particular
allocation are low...
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/infiniband/hw/qib/qib_fs.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index b27791029fa93..b9f4a2937c3ac 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -55,6 +55,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode = new_inode(dir->i_sb);
if (!inode) {
+ dput(dentry);
error = -EPERM;
goto bail;
}
--
2.39.5
From: Al Viro <viro(a)zeniv.linux.org.uk>
[ Upstream commit bdb43af4fdb39f844ede401bdb1258f67a580a27 ]
failure to allocate inode => leaked dentry...
this one had been there since the initial merge; to be fair,
if we are that far OOM, the odds of failing at that particular
allocation are low...
Signed-off-by: Al Viro <viro(a)zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/infiniband/hw/qib/qib_fs.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index b27791029fa93..b9f4a2937c3ac 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -55,6 +55,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode = new_inode(dir->i_sb);
if (!inode) {
+ dput(dentry);
error = -EPERM;
goto bail;
}
--
2.39.5
The patch titled
Subject: mm: page_alloc: speed up fallbacks in rmqueue_bulk()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-page_alloc-speed-up-fallbacks-in-rmqueue_bulk.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Johannes Weiner <hannes(a)cmpxchg.org>
Subject: mm: page_alloc: speed up fallbacks in rmqueue_bulk()
Date: Mon, 7 Apr 2025 14:01:53 -0400
The test robot identified c2f6ea38fc1b ("mm: page_alloc: don't steal
single pages from biggest buddy") as the root cause of a 56.4% regression
in vm-scalability::lru-file-mmap-read.
Carlos reports an earlier patch, c0cd6f557b90 ("mm: page_alloc: fix
freelist movement during block conversion"), as the root cause for a
regression in worst-case zone->lock+irqoff hold times.
Both of these patches modify the page allocator's fallback path to be less
greedy in an effort to stave off fragmentation. The flip side of this is
that fallbacks are also less productive each time around, which means the
fallback search can run much more frequently.
Carlos' traces point to rmqueue_bulk() specifically, which tries to refill
the percpu cache by allocating a large batch of pages in a loop. It
highlights how once the native freelists are exhausted, the fallback code
first scans orders top-down for whole blocks to claim, then falls back to
a bottom-up search for the smallest buddy to steal. For the next batch
page, it goes through the same thing again.
This can be made more efficient. Since rmqueue_bulk() holds the
zone->lock over the entire batch, the freelists are not subject to outside
changes; when the search for a block to claim has already failed, there is
no point in trying again for the next page.
Modify __rmqueue() to remember the last successful fallback mode, and
restart directly from there on the next rmqueue_bulk() iteration.
Oliver confirms that this improves beyond the regression that the test
robot reported against c2f6ea38fc1b:
commit:
f3b92176f4 ("tools/selftests: add guard region test for /proc/$pid/pagemap")
c2f6ea38fc ("mm: page_alloc: don't steal single pages from biggest buddy")
acc4d5ff0b ("Merge tag 'net-6.15-rc0' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net")
2c847f27c3 ("mm: page_alloc: speed up fallbacks in rmqueue_bulk()") <--- your patch
f3b92176f4f7100f c2f6ea38fc1b640aa7a2e155cc1 acc4d5ff0b61eb1715c498b6536 2c847f27c37da65a93d23c237c5
---------------- --------------------------- --------------------------- ---------------------------
%stddev %change %stddev %change %stddev %change %stddev
\ | \ | \ | \
25525364 �� 3% -56.4% 11135467 -57.8% 10779336 +31.6% 33581409 vm-scalability.throughput
Carlos confirms that worst-case times are almost fully recovered
compared to before the earlier culprit patch:
2dd482ba627d (before freelist hygiene): 1ms
c0cd6f557b90 (after freelist hygiene): 90ms
next-20250319 (steal smallest buddy): 280ms
this patch : 8ms
Link: https://lkml.kernel.org/r/20250407180154.63348-1-hannes@cmpxchg.org
Fixes: c0cd6f557b90 ("mm: page_alloc: fix freelist movement during block conversion")
Fixes: c2f6ea38fc1b ("mm: page_alloc: don't steal single pages from biggest buddy")
Signed-off-by: Johannes Weiner <hannes(a)cmpxchg.org>
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Reported-by: Carlos Song <carlos.song(a)nxp.com>
Tested-by: kernel test robot <oliver.sang(a)intel.com>
Closes: https://lore.kernel.org/oe-lkp/202503271547.fc08b188-lkp@intel.com
Cc: <stable(a)vger.kernel.org> [6.10+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 100 ++++++++++++++++++++++++++++++++++------------
1 file changed, 74 insertions(+), 26 deletions(-)
--- a/mm/page_alloc.c~mm-page_alloc-speed-up-fallbacks-in-rmqueue_bulk
+++ a/mm/page_alloc.c
@@ -2189,11 +2189,11 @@ try_to_claim_block(struct zone *zone, st
* The use of signed ints for order and current_order is a deliberate
* deviation from the rest of this file, to make the for loop
* condition simpler.
- *
- * Return the stolen page, or NULL if none can be found.
*/
+
+/* Try to claim a whole foreign block, take a page, expand the remainder */
static __always_inline struct page *
-__rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
+__rmqueue_claim(struct zone *zone, int order, int start_migratetype,
unsigned int alloc_flags)
{
struct free_area *area;
@@ -2231,14 +2231,26 @@ __rmqueue_fallback(struct zone *zone, in
page = try_to_claim_block(zone, page, current_order, order,
start_migratetype, fallback_mt,
alloc_flags);
- if (page)
- goto got_one;
+ if (page) {
+ trace_mm_page_alloc_extfrag(page, order, current_order,
+ start_migratetype, fallback_mt);
+ return page;
+ }
}
- if (alloc_flags & ALLOC_NOFRAGMENT)
- return NULL;
+ return NULL;
+}
+
+/* Try to steal a single page from a foreign block */
+static __always_inline struct page *
+__rmqueue_steal(struct zone *zone, int order, int start_migratetype)
+{
+ struct free_area *area;
+ int current_order;
+ struct page *page;
+ int fallback_mt;
+ bool claim_block;
- /* No luck claiming pageblock. Find the smallest fallback page */
for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) {
area = &(zone->free_area[current_order]);
fallback_mt = find_suitable_fallback(area, current_order,
@@ -2248,25 +2260,28 @@ __rmqueue_fallback(struct zone *zone, in
page = get_page_from_free_area(area, fallback_mt);
page_del_and_expand(zone, page, order, current_order, fallback_mt);
- goto got_one;
+ trace_mm_page_alloc_extfrag(page, order, current_order,
+ start_migratetype, fallback_mt);
+ return page;
}
return NULL;
-
-got_one:
- trace_mm_page_alloc_extfrag(page, order, current_order,
- start_migratetype, fallback_mt);
-
- return page;
}
+enum rmqueue_mode {
+ RMQUEUE_NORMAL,
+ RMQUEUE_CMA,
+ RMQUEUE_CLAIM,
+ RMQUEUE_STEAL,
+};
+
/*
* Do the hard work of removing an element from the buddy allocator.
* Call me with the zone->lock already held.
*/
static __always_inline struct page *
__rmqueue(struct zone *zone, unsigned int order, int migratetype,
- unsigned int alloc_flags)
+ unsigned int alloc_flags, enum rmqueue_mode *mode)
{
struct page *page;
@@ -2285,16 +2300,47 @@ __rmqueue(struct zone *zone, unsigned in
}
}
- page = __rmqueue_smallest(zone, order, migratetype);
- if (unlikely(!page)) {
- if (alloc_flags & ALLOC_CMA)
+ /*
+ * Try the different freelists, native then foreign.
+ *
+ * The fallback logic is expensive and rmqueue_bulk() calls in
+ * a loop with the zone->lock held, meaning the freelists are
+ * not subject to any outside changes. Remember in *mode where
+ * we found pay dirt, to save us the search on the next call.
+ */
+ switch (*mode) {
+ case RMQUEUE_NORMAL:
+ page = __rmqueue_smallest(zone, order, migratetype);
+ if (page)
+ return page;
+ fallthrough;
+ case RMQUEUE_CMA:
+ if (alloc_flags & ALLOC_CMA) {
page = __rmqueue_cma_fallback(zone, order);
-
- if (!page)
- page = __rmqueue_fallback(zone, order, migratetype,
- alloc_flags);
+ if (page) {
+ *mode = RMQUEUE_CMA;
+ return page;
+ }
+ }
+ fallthrough;
+ case RMQUEUE_CLAIM:
+ page = __rmqueue_claim(zone, order, migratetype, alloc_flags);
+ if (page) {
+ /* Replenished native freelist, back to normal mode */
+ *mode = RMQUEUE_NORMAL;
+ return page;
+ }
+ fallthrough;
+ case RMQUEUE_STEAL:
+ if (!(alloc_flags & ALLOC_NOFRAGMENT)) {
+ page = __rmqueue_steal(zone, order, migratetype);
+ if (page) {
+ *mode = RMQUEUE_STEAL;
+ return page;
+ }
+ }
}
- return page;
+ return NULL;
}
/*
@@ -2306,6 +2352,7 @@ static int rmqueue_bulk(struct zone *zon
unsigned long count, struct list_head *list,
int migratetype, unsigned int alloc_flags)
{
+ enum rmqueue_mode rmqm = RMQUEUE_NORMAL;
unsigned long flags;
int i;
@@ -2317,7 +2364,7 @@ static int rmqueue_bulk(struct zone *zon
}
for (i = 0; i < count; ++i) {
struct page *page = __rmqueue(zone, order, migratetype,
- alloc_flags);
+ alloc_flags, &rmqm);
if (unlikely(page == NULL))
break;
@@ -2930,6 +2977,7 @@ struct page *rmqueue_buddy(struct zone *
{
struct page *page;
unsigned long flags;
+ enum rmqueue_mode rmqm = RMQUEUE_NORMAL;
do {
page = NULL;
@@ -2942,7 +2990,7 @@ struct page *rmqueue_buddy(struct zone *
if (alloc_flags & ALLOC_HIGHATOMIC)
page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
if (!page) {
- page = __rmqueue(zone, order, migratetype, alloc_flags);
+ page = __rmqueue(zone, order, migratetype, alloc_flags, &rmqm);
/*
* If the allocation fails, allow OOM handling and
_
Patches currently in -mm which might be from hannes(a)cmpxchg.org are
mm-page_alloc-speed-up-fallbacks-in-rmqueue_bulk.patch
The current implementation of e820__register_nosave_regions suffers from
multiple serious issues:
- The end of last region is tracked by PFN, causing it to find holes
that aren't there if two consecutive subpage regions are present
- The nosave PFN ranges derived from holes are rounded out (instead of
rounded in) which makes it inconsistent with how explicitly reserved
regions are handled
Fix this by:
- Treating reserved regions as if they were holes, to ensure consistent
handling (rounding out nosave PFN ranges is more correct as the
kernel does not use partial pages)
- Tracking the end of the last RAM region by address instead of pages
to detect holes more precisely
Cc: stable(a)vger.kernel.org
Fixes: e5540f875404 ("x86/boot/e820: Consolidate 'struct e820_entry *entry' local variable names")
Reported-by: Roberto Ricci <io(a)r-ricci.it>
Closes: https://lore.kernel.org/all/Z4WFjBVHpndct7br@desktop0a/
Signed-off-by: Myrrh Periwinkle <myrrhperiwinkle(a)qtmlabs.xyz>
---
The issue of the kernel failing to resume from hibernation after
kexec_load() is used is likely due to kexec-tools passing in a different
e820 memory map from the one provided by system firmware, causing the
e820 consistency check to fail. That issue is not addressed in this
patch and will need to be fixed in kexec-tools instead.
Changes in v3:
- Round out nosave PFN ranges since the kernel does not use partial
pages
- Link to v2: https://lore.kernel.org/r/20250405-fix-e820-nosave-v2-1-d40dbe457c95@qtmlab…
Changes in v2:
- Updated author details
- Rewrote commit message
- Link to v1: https://lore.kernel.org/r/20250405-fix-e820-nosave-v1-1-162633199548@qtmlab…
---
arch/x86/kernel/e820.c | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 57120f0749cc3c23844eeb36820705687e08bbf7..9d8dd8deb2a702bd34b961ca4f5eba8a8d9643d0 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -753,22 +753,21 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len)
void __init e820__register_nosave_regions(unsigned long limit_pfn)
{
int i;
- unsigned long pfn = 0;
+ u64 last_addr = 0;
for (i = 0; i < e820_table->nr_entries; i++) {
struct e820_entry *entry = &e820_table->entries[i];
- if (pfn < PFN_UP(entry->addr))
- register_nosave_region(pfn, PFN_UP(entry->addr));
-
- pfn = PFN_DOWN(entry->addr + entry->size);
-
if (entry->type != E820_TYPE_RAM)
- register_nosave_region(PFN_UP(entry->addr), pfn);
+ continue;
- if (pfn >= limit_pfn)
- break;
+ if (last_addr < entry->addr)
+ register_nosave_region(PFN_DOWN(last_addr), PFN_UP(entry->addr));
+
+ last_addr = entry->addr + entry->size;
}
+
+ register_nosave_region(PFN_DOWN(last_addr), limit_pfn);
}
#ifdef CONFIG_ACPI
---
base-commit: a8662bcd2ff152bfbc751cab20f33053d74d0963
change-id: 20250405-fix-e820-nosave-f5cb985a9a61
Best regards,
--
Myrrh Periwinkle <myrrhperiwinkle(a)qtmlabs.xyz>
From: Roman Smirnov <r.smirnov(a)omp.ru>
[ Upstream commit 2510859475d7f46ed7940db0853f3342bf1b65ee ]
The echo_interval is not limited in any way during mounting,
which makes it possible to write a large number to it. This can
cause an overflow when multiplying ctx->echo_interval by HZ in
match_server().
Add constraints for echo_interval to smb3_fs_context_parse_param().
Found by Linux Verification Center (linuxtesting.org) with Svace.
Fixes: adfeb3e00e8e1 ("cifs: Make echo interval tunable")
Cc: stable(a)vger.kernel.org
Signed-off-by: Roman Smirnov <r.smirnov(a)omp.ru>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Roman Smirnov <r.smirnov(a)omp.ru>
---
fs/cifs/connect.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a3c0e6a4e484..322b8be73bb8 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1915,6 +1915,12 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
__func__);
goto cifs_parse_mount_err;
}
+ if (option < SMB_ECHO_INTERVAL_MIN ||
+ option > SMB_ECHO_INTERVAL_MAX) {
+ cifs_dbg(VFS, "%s: Echo interval is out of bounds\n",
+ __func__);
+ goto cifs_parse_mount_err;
+ }
vol->echo_interval = option;
break;
case Opt_snapshot:
--
2.43.0
From: Roman Smirnov <r.smirnov(a)omp.ru>
[ Upstream commit 2510859475d7f46ed7940db0853f3342bf1b65ee ]
The echo_interval is not limited in any way during mounting,
which makes it possible to write a large number to it. This can
cause an overflow when multiplying ctx->echo_interval by HZ in
match_server().
Add constraints for echo_interval to smb3_fs_context_parse_param().
Found by Linux Verification Center (linuxtesting.org) with Svace.
Fixes: adfeb3e00e8e1 ("cifs: Make echo interval tunable")
Cc: stable(a)vger.kernel.org
Signed-off-by: Roman Smirnov <r.smirnov(a)omp.ru>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
Signed-off-by: Roman Smirnov <r.smirnov(a)omp.ru>
---
fs/cifs/fs_context.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index fb3651513f83..3479f0072cf6 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -1088,6 +1088,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
}
break;
case Opt_echo_interval:
+ if (result.uint_32 < SMB_ECHO_INTERVAL_MIN ||
+ result.uint_32 > SMB_ECHO_INTERVAL_MAX) {
+ cifs_errorf(fc, "echo interval is out of bounds\n");
+ goto cifs_parse_mount_err;
+ }
ctx->echo_interval = result.uint_32;
break;
case Opt_snapshot:
--
2.43.0
In tpacpi_battery_init(), the return value of tpacpi_check_quirks() needs
to be checked. The battery should not be hooked if there is no matched
battery information in quirk table.
Add an error check and return -ENODEV immediately if the device fail
the check.
Fixes: 1a32ebb26ba9 ("platform/x86: thinkpad_acpi: Support battery quirk")
Cc: stable(a)vger.kernel.org
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
v2: Fix double assignment error.
drivers/platform/x86/thinkpad_acpi.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 2cfb2ac3f465..93eaca3bd9d1 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -9973,7 +9973,9 @@ static int __init tpacpi_battery_init(struct ibm_init_struct *ibm)
tp_features.battery_force_primary = tpacpi_check_quirks(
battery_quirk_table,
- ARRAY_SIZE(battery_quirk_table));
+ ARRAY_SIZE(battery_quirk_table))
+ if (!tp_features.battery_force_primary)
+ return -ENODEV;
battery_hook_register(&battery_hook);
return 0;
--
2.42.0.windows.2
From: Mario Limonciello <mario.limonciello(a)amd.com>
On systems that only have an SRA sensor connected to SFH the sensor
doesn't get enabled due to a bad optimization condition of breaking
the sensor walk loop.
This optimization is unnecessary in the first place because if there
is only one device then the loop only runs once. Drop the condition
and explicitly mark sensor as enabled.
Reported-by: Yijun Shen <Yijun.Shen(a)dell.com>
Tested-By: Yijun Shen <Yijun_Shen(a)Dell.com>
Fixes: d1c444b47100d ("HID: amd_sfh: Add support to export device operating states")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com>
---
v2:
* Add tag
---
drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
index 25f0ebfcbd5f5..c1bdf1e0d44af 100644
--- a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
+++ b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
@@ -134,9 +134,6 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata)
for (i = 0; i < cl_data->num_hid_devices; i++) {
cl_data->sensor_sts[i] = SENSOR_DISABLED;
- if (cl_data->num_hid_devices == 1 && cl_data->sensor_idx[0] == SRA_IDX)
- break;
-
if (cl_data->sensor_idx[i] == SRA_IDX) {
info.sensor_idx = cl_data->sensor_idx[i];
writel(0, privdata->mmio + amd_get_p2c_val(privdata, 0));
@@ -145,8 +142,10 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata)
(privdata, cl_data->sensor_idx[i], ENABLE_SENSOR);
cl_data->sensor_sts[i] = (status == 0) ? SENSOR_ENABLED : SENSOR_DISABLED;
- if (cl_data->sensor_sts[i] == SENSOR_ENABLED)
+ if (cl_data->sensor_sts[i] == SENSOR_ENABLED) {
+ cl_data->is_any_sensor_enabled = true;
privdata->dev_en.is_sra_present = true;
+ }
continue;
}
--
2.43.0
The e1000_suspend_workarounds_ich8lan() calls e1e_rphy_locked to disable
the SMB release, but does not check its return value. A proper
implementation can be found in e1000_resume_workarounds_pchlan() from
/source/drivers/net/ethernet/intel/e1000e/ich8lan.c.
Add an error check for e1e_rphy_locked(). Log the error message and jump
to 'release' label if the e1e_rphy_locked() fails.
Fixes: 2fbe4526e5aa ("e1000e: initial support for i217")
Cc: stable(a)vger.kernel.org # v3.5+
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
drivers/net/ethernet/intel/e1000e/ich8lan.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 2f9655cf5dd9..d16e3aa50809 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -5497,7 +5497,11 @@ void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw)
e1e_wphy_locked(hw, I217_SxCTRL, phy_reg);
/* Disable the SMB release on LCD reset. */
- e1e_rphy_locked(hw, I217_MEMPWR, &phy_reg);
+ ret_val = e1e_rphy_locked(hw, I217_MEMPWR, &phy_reg);
+ if (ret_val) {
+ e_dbg("Fail to Disable the SMB release on LCD reset.");
+ goto release;
+ }
phy_reg &= ~I217_MEMPWR_DISABLE_SMB_RELEASE;
e1e_wphy_locked(hw, I217_MEMPWR, phy_reg);
}
--
2.42.0.windows.2
The is_vram() is checking the current placement, however if we consider
exported VRAM with dynamic dma-buf, it looks possible for the xe driver
to async evict the memory, notifying the importer, however importer does
not have to call unmap_attachment() immediately, but rather just as
"soon as possible", like when the dma-resv idles. Following from this we
would then pipeline the move, attaching the fence to the manager, and
then update the current placement. But when the unmap_attachment() runs
at some later point we might see that is_vram() is now false, and take
the complete wrong path when dma-unmapping the sg, leading to
explosions.
To fix this check if the sgl was mapping a struct page.
v2:
- The attachment can be mapped multiple times it seems, so we can't
really rely on encoding something in the attachment->priv. Instead
see if the page_link has an encoded struct page. For vram we expect
this to be NULL.
Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4563
Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Cc: Matthew Brost <matthew.brost(a)intel.com>
Cc: <stable(a)vger.kernel.org> # v6.8+
---
drivers/gpu/drm/xe/xe_dma_buf.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index f67803e15a0e..f7a20264ea33 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -145,10 +145,7 @@ static void xe_dma_buf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
- struct dma_buf *dma_buf = attach->dmabuf;
- struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv);
-
- if (!xe_bo_is_vram(bo)) {
+ if (sg_page(sgt->sgl)) {
dma_unmap_sgtable(attach->dev, sgt, dir, 0);
sg_free_table(sgt);
kfree(sgt);
--
2.49.0
Dear Linux Stable Maintainers,
while maintaining downstream Linux releases, we noticed that we have to
backport some patches manually, because they are not picked up by your
automated backporting. Some of these backports can be done with
improved cherry-pick tooling. We have implemented a script/tool "git-
fuzzy-pick" which we would like to share. Besides picking more commits,
the tool also supports executing a validation script right after
picking, e.g. compiling the modified source file. Picking stats and
details are presented below.
We would like to discuss whether you can integrate this improved tool
into into your daily workflows. We already found the stable-tools
repository [1] with some scripts that help automate backporting. To
contribute git-fuzzy-pick there, we would need you to declare a license
for the current state of this repository.
To test backporting performance, we tried to backport stable-candidate
patches from 6.12 to 6.1. Specifically, on tag 6.1.125 we executed the
command stable show-missing-stable v6.12.12..v6.12.17 to collect
patches considered for backporting. This results in 431 backport
candidates. When using git-fuzzy-pick, we can pick 9 patches more than
with default cherry-picking. All modifications have been validated by
attempting to build the
object files of the modified C source files with make using the kernels
“allyesconfig” configuration.
196 Cherry-picked with --strategy=recursive --Xpatience -x
1 Applied with patch -p1 ... --fuzz=1
8 Applied with patch -p1 ... --fuzz=2
Please let us know how to best share the tool with you! Long term, we
would like to integrate it into your backporting workflow, so that more
kernel commits can be applied automatically.
Best,
Norbert
[1]
https://kernel.googlesource.com/pub/scm/linux/kernel/git/sashal/stable-tool…
[ resent due to a wrong address for regression reporting, sorry! ]
Hi,
we received a bug report showing the regression on 6.13.1 kernel
against 6.13.0. The symptom is that Chrome and VSCode stopped working
with Gnome Scaling, as reported on openSUSE Tumbleweed bug tracker
https://bugzilla.suse.com/show_bug.cgi?id=1236943
Quoting from there:
"""
I use the latest TW on Gnome with a 4K display and 150%
scaling. Everything has been working fine, but recently both Chrome
and VSCode (installed from official non-openSUSE channels) stopped
working with Scaling.
....
I am using VSCode with:
`--enable-features=UseOzonePlatform --enable-features=WaylandWindowDecorations --ozone-platform-hint=auto` and for Chrome, I select `Preferred Ozone platform` == `Wayland`.
"""
Surprisingly, the bisection pointed to the backport of the commit
b9b588f22a0c049a14885399e27625635ae6ef91 ("libfs: Use d_children list
to iterate simple_offset directories").
Indeed, the revert of this patch on the latest 6.13.4 was confirmed to
fix the issue. Also, the reporter verified that the latest 6.14-rc
release is still affected, too.
For now I have no concrete idea how the patch could break the behavior
of a graphical application like the above. Let us know if you need
something for debugging. (Or at easiest, join to the bugzilla entry
and ask there; or open another bug report at whatever you like.)
BTW, I'll be traveling tomorrow, so my reply will be delayed.
thanks,
Takashi
#regzbot introduced: b9b588f22a0c049a14885399e27625635ae6ef91
#regzbot monitor: https://bugzilla.suse.com/show_bug.cgi?id=1236943
Hi,
Ran into an issue testing 6.1, which I discovered was introduced by
a backport that was done. Here's the fix for it, please add it to
the 6.1-stable mix. Thanks!
--
Jens Axboe
From: Mario Limonciello <mario.limonciello(a)amd.com>
On systems that only have an SRA sensor connected to SFH the sensor
doesn't get enabled due to a bad optimization condition of breaking
the sensor walk loop.
This optimization is unnecessary in the first place because if there
is only one device then the loop only runs once. Drop the condition
and explicitly mark sensor as enabled.
Reported-by: Yijun Shen <Yijun.Shen(a)dell.com>
Tested-By: Yijun Shen <Yijun_Shen(a)Dell.com>
Fixes: d1c444b47100d ("HID: amd_sfh: Add support to export device operating states")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com>
---
v2:
* Add tag
---
drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
index 25f0ebfcbd5f5..c1bdf1e0d44af 100644
--- a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
+++ b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
@@ -134,9 +134,6 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata)
for (i = 0; i < cl_data->num_hid_devices; i++) {
cl_data->sensor_sts[i] = SENSOR_DISABLED;
- if (cl_data->num_hid_devices == 1 && cl_data->sensor_idx[0] == SRA_IDX)
- break;
-
if (cl_data->sensor_idx[i] == SRA_IDX) {
info.sensor_idx = cl_data->sensor_idx[i];
writel(0, privdata->mmio + amd_get_p2c_val(privdata, 0));
@@ -145,8 +142,10 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata)
(privdata, cl_data->sensor_idx[i], ENABLE_SENSOR);
cl_data->sensor_sts[i] = (status == 0) ? SENSOR_ENABLED : SENSOR_DISABLED;
- if (cl_data->sensor_sts[i] == SENSOR_ENABLED)
+ if (cl_data->sensor_sts[i] == SENSOR_ENABLED) {
+ cl_data->is_any_sensor_enabled = true;
privdata->dev_en.is_sra_present = true;
+ }
continue;
}
--
2.43.0
Xen PV guests in DomU have APIC disabled by design, which causes
topology_apply_cmdline_limits_early() to limit the number of possible
CPUs to 1, regardless of the configured number of vCPUs.
This is a regression introduced in version 6.9 in commit 7c0edad3643f
("x86/cpu/topology: Rework possible CPU management") which added an
early check that limits CPUs if apic_is_disabled, without accounting for
the fact that Xen PV guests always disable APIC even when SMP is
supported.
This patch fixes the issue by skipping the apic_is_disabled check for
Xen PV guests, allowing them to boot with the full set of configured vCPUs.
Fixes: 7c0edad3643f ("x86/cpu/topology: Rework possible CPU management")
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: x86(a)kernel.org
Cc: xen-devel(a)lists.xenproject.org
Cc: stable(a)vger.kernel.org # 6.9+
Signed-off-by: Petr Vaněk <arkamar(a)atlas.cz>
---
arch/x86/kernel/cpu/topology.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 01456236a6dd..10aa7f471ec9 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -428,8 +428,13 @@ void __init topology_apply_cmdline_limits_early(void)
{
unsigned int possible = nr_cpu_ids;
- /* 'maxcpus=0' 'nosmp' 'nolapic' */
- if (!setup_max_cpus || apic_is_disabled)
+ /* 'maxcpus=0' 'nosmp' 'nolapic'
+ *
+ * The apic_is_disabled check is ignored for Xen PV domains because Xen
+ * disables ACPI in unprivileged PV DomU guests, which would otherwise limit
+ * CPUs to 1, even if multiple vCPUs were configured.
+ */
+ if (!setup_max_cpus || (!xen_pv_domain() && apic_is_disabled))
possible = 1;
/* 'possible_cpus=N' */
--
2.48.1
From: Jarkko Sakkinen <jarkko.sakkinen(a)opinsys.com>
Add an isolated list of unreferenced keys to be queued for deletion, and
try to pin the keys in the garbage collector before processing anything.
Skip unpinnable keys.
Use this list for blocking the reaping process during the teardown:
1. First off, the keys added to `keys_graveyard` are snapshotted, and the
list is flushed. This the very last step in `key_put()`.
2. `key_put()` reaches zero. This will mark key as busy for the garbage
collector.
3. `key_garbage_collector()` will try to increase refcount, which won't go
above zero. Whenever this happens, the key will be skipped.
Cc: stable(a)vger.kernel.org # v6.1+
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen(a)opinsys.com>
---
v7:
- Fixed multiple definitions (from rebasing, sorry).
v6:
- Rebase went wrong in v5.
v5:
- Rebased on top of v6.15-rc
- Updated commit message to explain how spin lock and refcount
isolate the time window in key_put().
v4:
- Pin the key while processing key type teardown. Skip dead keys.
- Revert key_gc_graveyard back key_gc_unused_keys.
- Rewrote the commit message.
- "unsigned long flags" declaration somehow did make to the previous
patch (sorry).
v3:
- Using spin_lock() fails since key_put() is executed inside IRQs.
Using spin_lock_irqsave() would neither work given the lock is
acquired for /proc/keys. Therefore, separate the lock for
graveyard and key_graveyard before reaping key_serial_tree.
v2:
- Rename key_gc_unused_keys as key_gc_graveyard, and re-document the
function.
---
include/linux/key.h | 7 ++-----
security/keys/gc.c | 40 ++++++++++++++++++++++++----------------
security/keys/internal.h | 5 +++++
security/keys/key.c | 7 +++++--
4 files changed, 36 insertions(+), 23 deletions(-)
diff --git a/include/linux/key.h b/include/linux/key.h
index ba05de8579ec..c50659184bdf 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -195,10 +195,8 @@ enum key_state {
struct key {
refcount_t usage; /* number of references */
key_serial_t serial; /* key serial number */
- union {
- struct list_head graveyard_link;
- struct rb_node serial_node;
- };
+ struct list_head graveyard_link; /* key->usage == 0 */
+ struct rb_node serial_node;
#ifdef CONFIG_KEY_NOTIFICATIONS
struct watch_list *watchers; /* Entities watching this key for changes */
#endif
@@ -236,7 +234,6 @@ struct key {
#define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */
#define KEY_FLAG_KEEP 8 /* set if key should not be removed */
#define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */
-#define KEY_FLAG_FINAL_PUT 10 /* set if final put has happened on key */
/* the key type and key description string
* - the desc is used to match a key against search criteria
diff --git a/security/keys/gc.c b/security/keys/gc.c
index f27223ea4578..0a3beb68633c 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -189,6 +189,7 @@ static void key_garbage_collector(struct work_struct *work)
struct rb_node *cursor;
struct key *key;
time64_t new_timer, limit, expiry;
+ unsigned long flags;
kenter("[%lx,%x]", key_gc_flags, gc_state);
@@ -206,21 +207,35 @@ static void key_garbage_collector(struct work_struct *work)
new_timer = TIME64_MAX;
+ spin_lock_irqsave(&key_graveyard_lock, flags);
+ list_splice_init(&key_graveyard, &graveyard);
+ spin_unlock_irqrestore(&key_graveyard_lock, flags);
+
+ list_for_each_entry(key, &graveyard, graveyard_link) {
+ spin_lock(&key_serial_lock);
+ kdebug("unrefd key %d", key->serial);
+ rb_erase(&key->serial_node, &key_serial_tree);
+ spin_unlock(&key_serial_lock);
+ }
+
/* As only this function is permitted to remove things from the key
* serial tree, if cursor is non-NULL then it will always point to a
* valid node in the tree - even if lock got dropped.
*/
spin_lock(&key_serial_lock);
+ key = NULL;
cursor = rb_first(&key_serial_tree);
continue_scanning:
+ key_put(key);
while (cursor) {
key = rb_entry(cursor, struct key, serial_node);
cursor = rb_next(cursor);
-
- if (test_bit(KEY_FLAG_FINAL_PUT, &key->flags)) {
- smp_mb(); /* Clobber key->user after FINAL_PUT seen. */
- goto found_unreferenced_key;
+ /* key_get(), unless zero: */
+ if (!refcount_inc_not_zero(&key->usage)) {
+ key = NULL;
+ gc_state |= KEY_GC_REAP_AGAIN;
+ goto skip_dead_key;
}
if (unlikely(gc_state & KEY_GC_REAPING_DEAD_1)) {
@@ -274,6 +289,7 @@ static void key_garbage_collector(struct work_struct *work)
spin_lock(&key_serial_lock);
goto continue_scanning;
}
+ key_put(key);
/* We've completed the pass. Set the timer if we need to and queue a
* new cycle if necessary. We keep executing cycles until we find one
@@ -286,6 +302,10 @@ static void key_garbage_collector(struct work_struct *work)
key_schedule_gc(new_timer);
}
+ spin_lock(&key_graveyard_lock);
+ list_splice_init(&key_graveyard, &graveyard);
+ spin_unlock(&key_graveyard_lock);
+
if (unlikely(gc_state & KEY_GC_REAPING_DEAD_2) ||
!list_empty(&graveyard)) {
/* Make sure that all pending keyring payload destructions are
@@ -328,18 +348,6 @@ static void key_garbage_collector(struct work_struct *work)
kleave(" [end %x]", gc_state);
return;
- /* We found an unreferenced key - once we've removed it from the tree,
- * we can safely drop the lock.
- */
-found_unreferenced_key:
- kdebug("unrefd key %d", key->serial);
- rb_erase(&key->serial_node, &key_serial_tree);
- spin_unlock(&key_serial_lock);
-
- list_add_tail(&key->graveyard_link, &graveyard);
- gc_state |= KEY_GC_REAP_AGAIN;
- goto maybe_resched;
-
/* We found a restricted keyring and need to update the restriction if
* it is associated with the dead key type.
*/
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 2cffa6dc8255..4e3d9b322390 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -63,9 +63,14 @@ struct key_user {
int qnbytes; /* number of bytes allocated to this user */
};
+extern struct list_head key_graveyard;
+extern spinlock_t key_graveyard_lock;
+
extern struct rb_root key_user_tree;
extern spinlock_t key_user_lock;
extern struct key_user root_key_user;
+extern struct list_head key_graveyard;
+extern spinlock_t key_graveyard_lock;
extern struct key_user *key_user_lookup(kuid_t uid);
extern void key_user_put(struct key_user *user);
diff --git a/security/keys/key.c b/security/keys/key.c
index 7198cd2ac3a3..7511f2017b6b 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -22,6 +22,8 @@ DEFINE_SPINLOCK(key_serial_lock);
struct rb_root key_user_tree; /* tree of quota records indexed by UID */
DEFINE_SPINLOCK(key_user_lock);
+LIST_HEAD(key_graveyard);
+DEFINE_SPINLOCK(key_graveyard_lock);
unsigned int key_quota_root_maxkeys = 1000000; /* root's key count quota */
unsigned int key_quota_root_maxbytes = 25000000; /* root's key space quota */
@@ -658,8 +660,9 @@ void key_put(struct key *key)
key->user->qnbytes -= key->quotalen;
spin_unlock_irqrestore(&key->user->lock, flags);
}
- smp_mb(); /* key->user before FINAL_PUT set. */
- set_bit(KEY_FLAG_FINAL_PUT, &key->flags);
+ spin_lock_irqsave(&key_graveyard_lock, flags);
+ list_add_tail(&key->graveyard_link, &key_graveyard);
+ spin_unlock_irqrestore(&key_graveyard_lock, flags);
schedule_work(&key_gc_work);
}
}
--
2.39.5
The sdio_read32() calls sd_read(), but does not handle the error if
sd_read() fails. This could lead to subsequent operations processing
invalid data. A proper implementation can be found in sdio_readN().
Add error handling for the sd_read() to free tmpbuf and return error
code if sd_read() fails. This ensure that the memcpy() is only performed
when the read operation is successful.
Fixes: 554c0a3abf21 ("staging: Add rtl8723bs sdio wifi driver")
Cc: stable(a)vger.kernel.org # v4.12+
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
v6: Fix improper code to propagate error code
v5: Fix error code
v4: Add change log and fix error code
v3: Add Cc flag
v2: Change code to initialize val
drivers/staging/rtl8723bs/hal/sdio_ops.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/staging/rtl8723bs/hal/sdio_ops.c b/drivers/staging/rtl8723bs/hal/sdio_ops.c
index 21e9f1858745..eb21c7e55949 100644
--- a/drivers/staging/rtl8723bs/hal/sdio_ops.c
+++ b/drivers/staging/rtl8723bs/hal/sdio_ops.c
@@ -185,7 +185,12 @@ static u32 sdio_read32(struct intf_hdl *intfhdl, u32 addr)
return SDIO_ERR_VAL32;
ftaddr &= ~(u16)0x3;
- sd_read(intfhdl, ftaddr, 8, tmpbuf);
+ err = sd_read(intfhdl, ftaddr, 8, tmpbuf);
+ if (err) {
+ kfree(tmpbuf);
+ return (u32)err;
+ }
+
memcpy(&le_tmp, tmpbuf + shift, 4);
val = le32_to_cpu(le_tmp);
--
2.42.0.windows.2
Disabling HPD polling from i915_hpd_poll_init_work() involves probing
all display connectors explicitly to account for lost hotplug
interrupts. On some platforms (mostly pre-ICL) with HDMI connectors the
I2C EDID bit-banging using udelay() triggers in turn the
workqueue: i915_hpd_poll_init_work [i915] hogged CPU for >10000us 4 times, consider switching to WQ_UNBOUND
warning.
Fix the above by scheduling i915_hpd_poll_init_work() on a WQ_UNBOUND
workqueue. It's ok to use a system WQ, since i915_hpd_poll_init_work()
is properly flushed in intel_hpd_cancel_work().
The connector probing from drm_mode_config::output_poll_work resulting
in the same warning is fixed by the next patch.
Cc: Tejun Heo <tj(a)kernel.org>
Cc: Heiner Kallweit <hkallweit1(a)gmail.com>
CC: stable(a)vger.kernel.org # 6.5
Suggested-by: Tejun Heo <tj(a)kernel.org>
Suggested-by: Heiner Kallweit <hkallweit1(a)gmail.com>
Reported-by: Heiner Kallweit <hkallweit1(a)gmail.com>
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/9245
Link: https://lore.kernel.org/all/f7e21caa-e98d-e5b5-932a-fe12d27fde9b@gmail.com
Signed-off-by: Imre Deak <imre.deak(a)intel.com>
---
drivers/gpu/drm/i915/display/intel_hotplug.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c
index e8562f6f8bb44..accc2fec562a0 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -774,7 +774,7 @@ void intel_hpd_poll_enable(struct drm_i915_private *dev_priv)
* As well, there's no issue if we race here since we always reschedule
* this worker anyway
*/
- queue_work(dev_priv->unordered_wq,
+ queue_work(system_unbound_wq,
&dev_priv->display.hotplug.poll_init_work);
}
@@ -803,7 +803,7 @@ void intel_hpd_poll_disable(struct drm_i915_private *dev_priv)
return;
WRITE_ONCE(dev_priv->display.hotplug.poll_enabled, false);
- queue_work(dev_priv->unordered_wq,
+ queue_work(system_unbound_wq,
&dev_priv->display.hotplug.poll_init_work);
}
--
2.37.2
In tpacpi_battery_init(), the return value of tpacpi_check_quirks() needs
to be checked. The battery should not be hooked if there is no matched
battery information in quirk table.
Add an error check and return -ENODEV immediately if the device fail
the check.
Fixes: 1a32ebb26ba9 ("platform/x86: thinkpad_acpi: Support battery quirk")
Cc: stable(a)vger.kernel.org
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
drivers/platform/x86/thinkpad_acpi.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 2cfb2ac3f465..a3227f60aa43 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -9969,11 +9969,15 @@ static const struct tpacpi_quirk battery_quirk_table[] __initconst = {
static int __init tpacpi_battery_init(struct ibm_init_struct *ibm)
{
+ unsigned long r;
+
memset(&battery_info, 0, sizeof(battery_info));
- tp_features.battery_force_primary = tpacpi_check_quirks(
+ r = tp_features.battery_force_primary = tpacpi_check_quirks(
battery_quirk_table,
ARRAY_SIZE(battery_quirk_table));
+ if (!r)
+ return -ENODEV;
battery_hook_register(&battery_hook);
return 0;
--
2.42.0.windows.2
The patch 099606a7b2d5 didn't cleanly apply to 5.15 due to the
significant difference in codebases.
I've tried to manually bring it back to 5.15 via some minor conflict
resolution but also invoking the newly introduced API using inverted
logic as the conditional statements present in 5.15 are the opposite of
those in 6.1 xen/swiotlib.
Harshvardhan Jha (1):
xen/swiotlb: relax alignment requirements
drivers/xen/swiotlb-xen.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
--
2.47.1
The current implementation uses bias_pad_enable as a reference count to
manage the shared bias pad for all UTMI PHYs. However, during system
suspension with connected USB devices, multiple power-down requests for
the UTMI pad result in a mismatch in the reference count, which in turn
produces warnings such as:
[ 237.762967] WARNING: CPU: 10 PID: 1618 at tegra186_utmi_pad_power_down+0x160/0x170
[ 237.763103] Call trace:
[ 237.763104] tegra186_utmi_pad_power_down+0x160/0x170
[ 237.763107] tegra186_utmi_phy_power_off+0x10/0x30
[ 237.763110] phy_power_off+0x48/0x100
[ 237.763113] tegra_xusb_enter_elpg+0x204/0x500
[ 237.763119] tegra_xusb_suspend+0x48/0x140
[ 237.763122] platform_pm_suspend+0x2c/0xb0
[ 237.763125] dpm_run_callback.isra.0+0x20/0xa0
[ 237.763127] __device_suspend+0x118/0x330
[ 237.763129] dpm_suspend+0x10c/0x1f0
[ 237.763130] dpm_suspend_start+0x88/0xb0
[ 237.763132] suspend_devices_and_enter+0x120/0x500
[ 237.763135] pm_suspend+0x1ec/0x270
The root cause was traced back to the dynamic power-down changes
introduced in commit a30951d31b25 ("xhci: tegra: USB2 pad power controls"),
where the UTMI pad was being powered down without verifying its current
state. This unbalanced behavior led to discrepancies in the reference
count.
To rectify this issue, this patch replaces the single reference counter
with a bitmask, renamed to utmi_pad_enabled. Each bit in the mask
corresponds to one of the four USB2 PHYs, allowing us to track each pad's
enablement status individually.
With this change:
- The bias pad is powered on only when the mask is clear.
- Each UTMI pad is powered on or down based on its corresponding bit
in the mask, preventing redundant operations.
- The overall power state of the shared bias pad is maintained
correctly during suspend/resume cycles.
Cc: stable(a)vger.kernel.org
Fixes: a30951d31b25 ("xhci: tegra: USB2 pad power controls")
Signed-off-by: Wayne Chang <waynec(a)nvidia.com>
---
V1 -> V2: holding the padctl->lock to protect shared bitmask
drivers/phy/tegra/xusb-tegra186.c | 44 +++++++++++++++++++------------
1 file changed, 27 insertions(+), 17 deletions(-)
diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c
index fae6242aa730..cc7b8a6a999f 100644
--- a/drivers/phy/tegra/xusb-tegra186.c
+++ b/drivers/phy/tegra/xusb-tegra186.c
@@ -237,6 +237,8 @@
#define DATA0_VAL_PD BIT(1)
#define USE_XUSB_AO BIT(4)
+#define TEGRA_UTMI_PAD_MAX 4
+
#define TEGRA186_LANE(_name, _offset, _shift, _mask, _type) \
{ \
.name = _name, \
@@ -269,7 +271,7 @@ struct tegra186_xusb_padctl {
/* UTMI bias and tracking */
struct clk *usb2_trk_clk;
- unsigned int bias_pad_enable;
+ DECLARE_BITMAP(utmi_pad_enabled, TEGRA_UTMI_PAD_MAX);
/* padctl context */
struct tegra186_xusb_padctl_context context;
@@ -603,12 +605,8 @@ static void tegra186_utmi_bias_pad_power_on(struct tegra_xusb_padctl *padctl)
u32 value;
int err;
- mutex_lock(&padctl->lock);
-
- if (priv->bias_pad_enable++ > 0) {
- mutex_unlock(&padctl->lock);
+ if (!bitmap_empty(priv->utmi_pad_enabled, TEGRA_UTMI_PAD_MAX))
return;
- }
err = clk_prepare_enable(priv->usb2_trk_clk);
if (err < 0)
@@ -667,17 +665,8 @@ static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl)
struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl);
u32 value;
- mutex_lock(&padctl->lock);
-
- if (WARN_ON(priv->bias_pad_enable == 0)) {
- mutex_unlock(&padctl->lock);
- return;
- }
-
- if (--priv->bias_pad_enable > 0) {
- mutex_unlock(&padctl->lock);
+ if (!bitmap_empty(priv->utmi_pad_enabled, TEGRA_UTMI_PAD_MAX))
return;
- }
value = padctl_readl(padctl, XUSB_PADCTL_USB2_BIAS_PAD_CTL1);
value |= USB2_PD_TRK;
@@ -690,13 +679,13 @@ static void tegra186_utmi_bias_pad_power_off(struct tegra_xusb_padctl *padctl)
clk_disable_unprepare(priv->usb2_trk_clk);
}
- mutex_unlock(&padctl->lock);
}
static void tegra186_utmi_pad_power_on(struct phy *phy)
{
struct tegra_xusb_lane *lane = phy_get_drvdata(phy);
struct tegra_xusb_padctl *padctl = lane->pad->padctl;
+ struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl);
struct tegra_xusb_usb2_port *port;
struct device *dev = padctl->dev;
unsigned int index = lane->index;
@@ -705,9 +694,16 @@ static void tegra186_utmi_pad_power_on(struct phy *phy)
if (!phy)
return;
+ mutex_lock(&padctl->lock);
+ if (test_bit(index, priv->utmi_pad_enabled)) {
+ mutex_unlock(&padctl->lock);
+ return;
+ }
+
port = tegra_xusb_find_usb2_port(padctl, index);
if (!port) {
dev_err(dev, "no port found for USB2 lane %u\n", index);
+ mutex_unlock(&padctl->lock);
return;
}
@@ -724,18 +720,28 @@ static void tegra186_utmi_pad_power_on(struct phy *phy)
value = padctl_readl(padctl, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index));
value &= ~USB2_OTG_PD_DR;
padctl_writel(padctl, value, XUSB_PADCTL_USB2_OTG_PADX_CTL1(index));
+
+ set_bit(index, priv->utmi_pad_enabled);
+ mutex_unlock(&padctl->lock);
}
static void tegra186_utmi_pad_power_down(struct phy *phy)
{
struct tegra_xusb_lane *lane = phy_get_drvdata(phy);
struct tegra_xusb_padctl *padctl = lane->pad->padctl;
+ struct tegra186_xusb_padctl *priv = to_tegra186_xusb_padctl(padctl);
unsigned int index = lane->index;
u32 value;
if (!phy)
return;
+ mutex_lock(&padctl->lock);
+ if (!test_bit(index, priv->utmi_pad_enabled)) {
+ mutex_unlock(&padctl->lock);
+ return;
+ }
+
dev_dbg(padctl->dev, "power down UTMI pad %u\n", index);
value = padctl_readl(padctl, XUSB_PADCTL_USB2_OTG_PADX_CTL0(index));
@@ -748,7 +754,11 @@ static void tegra186_utmi_pad_power_down(struct phy *phy)
udelay(2);
+ clear_bit(index, priv->utmi_pad_enabled);
+
tegra186_utmi_bias_pad_power_off(padctl);
+
+ mutex_unlock(&padctl->lock);
}
static int tegra186_xusb_padctl_vbus_override(struct tegra_xusb_padctl *padctl,
--
2.25.1
From: Jakub Kicinski <kuba(a)kernel.org>
[ Upstream commit 166c2c8a6a4dc2e4ceba9e10cfe81c3e469e3210 ]
If we're redirecting the skb, and haven't called tcf_mirred_forward(),
yet, we need to tell the core to drop the skb by setting the retcode
to SHOT. If we have called tcf_mirred_forward(), however, the skb
is out of our hands and returning SHOT will lead to UaF.
Move the retval override to the error path which actually need it.
Reviewed-by: Michal Swiatkowski <michal.swiatkowski(a)linux.intel.com>
Fixes: e5cf1baf92cb ("act_mirred: use TC_ACT_REINSERT when possible")
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Acked-by: Jamal Hadi Salim <jhs(a)mojatatu.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
[Minor conflict resolved due to code context change.]
Signed-off-by: Jianqi Ren <jianqi.ren.cn(a)windriver.com>
Signed-off-by: He Zhe <zhe.he(a)windriver.com>
---
Verified the build test
---
net/sched/act_mirred.c | 20 ++++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 36395e5db3b4..df64acc8acde 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -259,13 +259,13 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
dev = rcu_dereference_bh(m->tcfm_dev);
if (unlikely(!dev)) {
pr_notice_once("tc mirred: target device is gone\n");
- goto out;
+ goto err_cant_do;
}
if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) {
net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
dev->name);
- goto out;
+ goto err_cant_do;
}
/* we could easily avoid the clone only if called by ingress and clsact;
@@ -279,7 +279,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
if (!use_reinsert) {
skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2)
- goto out;
+ goto err_cant_do;
}
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
@@ -321,12 +321,16 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
}
err = tcf_mirred_forward(want_ingress, skb2);
- if (err) {
-out:
+ if (err)
tcf_action_inc_overlimit_qstats(&m->common);
- if (tcf_mirred_is_act_redirect(m_eaction))
- retval = TC_ACT_SHOT;
- }
+ __this_cpu_dec(mirred_nest_level);
+
+ return retval;
+
+err_cant_do:
+ if (is_redirect)
+ retval = TC_ACT_SHOT;
+ tcf_action_inc_overlimit_qstats(&m->common);
__this_cpu_dec(mirred_nest_level);
return retval;
--
2.34.1
In efx_devlink_info_board_cfg(), the return value of
devlink_info_serial_number_put() needs to be checked.
This could result in silent failures if the function failed.
Add error checking for efx_devlink_info_board_cfg() and
propagate any errors immediately to ensure proper
error handling and prevents silent failures.
Fixes: 14743ddd2495 ("sfc: add devlink info support for ef100")
Cc: stable(a)vger.kernel.org # v6.3+
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
drivers/net/ethernet/sfc/efx_devlink.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/sfc/efx_devlink.c b/drivers/net/ethernet/sfc/efx_devlink.c
index 3cd750820fdd..17279bbd81d5 100644
--- a/drivers/net/ethernet/sfc/efx_devlink.c
+++ b/drivers/net/ethernet/sfc/efx_devlink.c
@@ -581,12 +581,14 @@ static int efx_devlink_info_board_cfg(struct efx_nic *efx,
{
char sn[EFX_MAX_SERIALNUM_LEN];
u8 mac_address[ETH_ALEN];
- int rc;
+ int rc, err;
rc = efx_mcdi_get_board_cfg(efx, (u8 *)mac_address, NULL, NULL);
if (!rc) {
snprintf(sn, EFX_MAX_SERIALNUM_LEN, "%pm", mac_address);
- devlink_info_serial_number_put(req, sn);
+ err = devlink_info_serial_number_put(req, sn);
+ if (err)
+ return err;
}
return rc;
}
--
2.42.0.windows.2
PVH dom0 re-uses logic from PV dom0, in which RAM ranges not assigned to
dom0 are re-used as scratch memory to map foreign and grant pages. Such
logic relies on reporting those unpopulated ranges as RAM to Linux, and
mark them as reserved. This way Linux creates the underlying page
structures required for metadata management.
Such approach works fine on PV because the initial balloon target is
calculated using specific Xen data, that doesn't take into account the
memory type changes described above. However on HVM and PVH the initial
balloon target is calculated using get_num_physpages(), and that function
does take into account the unpopulated RAM regions used as scratch space
for remote domain mappings.
This leads to PVH dom0 having an incorrect initial balloon target, which
causes malfunction (excessive memory freeing) of the balloon driver if the
dom0 memory target is later adjusted from the toolstack.
Fix this by using xen_released_pages to account for any pages that are part
of the memory map, but are already unpopulated when the balloon driver is
initialized. This accounts for any regions used for scratch remote
mappings. Note on x86 xen_released_pages definition is moved to
enlighten.c so it's uniformly available for all Xen-enabled builds.
Take the opportunity to unify PV with PVH/HVM guests regarding the usage of
get_num_physpages(), as that avoids having to add different logic for PV vs
PVH in both balloon_add_regions() and arch_xen_unpopulated_init().
Much like a6aa4eb994ee, the code in this changeset should have been part of
38620fc4e893.
Fixes: a6aa4eb994ee ('xen/x86: add extra pages to unpopulated-alloc if available')
Signed-off-by: Roger Pau Monné <roger.pau(a)citrix.com>
Reviewed-by: Juergen Gross <jgross(a)suse.com>
Cc: stable(a)vger.kernel.org
---
Changes since v2:
- For x86: Move xen_released_pages definition from setup.c (PV specific)
to enlighten.c (shared between all guest modes).
Changes since v1:
- Replace BUG_ON() with a WARN and failure to initialize the balloon
driver.
---
arch/x86/xen/enlighten.c | 10 ++++++++++
arch/x86/xen/setup.c | 3 ---
drivers/xen/balloon.c | 34 ++++++++++++++++++++++++----------
3 files changed, 34 insertions(+), 13 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 43dcd8c7badc..1b7710bd0d05 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -70,6 +70,9 @@ EXPORT_SYMBOL(xen_start_flags);
*/
struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info;
+/* Number of pages released from the initial allocation. */
+unsigned long xen_released_pages;
+
static __ref void xen_get_vendor(void)
{
init_cpu_devs();
@@ -466,6 +469,13 @@ int __init arch_xen_unpopulated_init(struct resource **res)
xen_free_unpopulated_pages(1, &pg);
}
+ /*
+ * Account for the region being in the physmap but unpopulated.
+ * The value in xen_released_pages is used by the balloon
+ * driver to know how much of the physmap is unpopulated and
+ * set an accurate initial memory target.
+ */
+ xen_released_pages += xen_extra_mem[i].n_pfns;
/* Zero so region is not also added to the balloon driver. */
xen_extra_mem[i].n_pfns = 0;
}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index c3db71d96c43..3823e52aef52 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -37,9 +37,6 @@
#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
-/* Number of pages released from the initial allocation. */
-unsigned long xen_released_pages;
-
/* Memory map would allow PCI passthrough. */
bool xen_pv_pci_possible;
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 163f7f1d70f1..ee165f4f7fe6 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -675,7 +675,7 @@ void xen_free_ballooned_pages(unsigned int nr_pages, struct page **pages)
}
EXPORT_SYMBOL(xen_free_ballooned_pages);
-static void __init balloon_add_regions(void)
+static int __init balloon_add_regions(void)
{
unsigned long start_pfn, pages;
unsigned long pfn, extra_pfn_end;
@@ -698,26 +698,38 @@ static void __init balloon_add_regions(void)
for (pfn = start_pfn; pfn < extra_pfn_end; pfn++)
balloon_append(pfn_to_page(pfn));
- balloon_stats.total_pages += extra_pfn_end - start_pfn;
+ /*
+ * Extra regions are accounted for in the physmap, but need
+ * decreasing from current_pages to balloon down the initial
+ * allocation, because they are already accounted for in
+ * total_pages.
+ */
+ if (extra_pfn_end - start_pfn >= balloon_stats.current_pages) {
+ WARN(1, "Extra pages underflow current target");
+ return -ERANGE;
+ }
+ balloon_stats.current_pages -= extra_pfn_end - start_pfn;
}
+
+ return 0;
}
static int __init balloon_init(void)
{
struct task_struct *task;
+ int rc;
if (!xen_domain())
return -ENODEV;
pr_info("Initialising balloon driver\n");
-#ifdef CONFIG_XEN_PV
- balloon_stats.current_pages = xen_pv_domain()
- ? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
- : get_num_physpages();
-#else
- balloon_stats.current_pages = get_num_physpages();
-#endif
+ if (xen_released_pages >= get_num_physpages()) {
+ WARN(1, "Released pages underflow current target");
+ return -ERANGE;
+ }
+
+ balloon_stats.current_pages = get_num_physpages() - xen_released_pages;
balloon_stats.target_pages = balloon_stats.current_pages;
balloon_stats.balloon_low = 0;
balloon_stats.balloon_high = 0;
@@ -734,7 +746,9 @@ static int __init balloon_init(void)
register_sysctl_init("xen/balloon", balloon_table);
#endif
- balloon_add_regions();
+ rc = balloon_add_regions();
+ if (rc)
+ return rc;
task = kthread_run(balloon_thread, NULL, "xen-balloon");
if (IS_ERR(task)) {
--
2.48.1
Once device_register() failed, we should call put_device() to
decrement reference count for cleanup. Or it could cause memory leak.
As comment of device_register() says, 'NOTE: _Never_ directly free
@dev after calling this function, even if it returned an error! Always
use put_device() to give up the reference initialized in this function
instead.'
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
Changes in v5:
- modified the bug description as suggestions;
Changes in v4:
- deleted the redundant initialization;
Changes in v3:
- modified the patch as suggestions;
Changes in v2:
- modified the patch as suggestions.
---
arch/arm/common/locomo.c | 13 +++++--------
1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index cb6ef449b987..45106066a17f 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -223,10 +223,8 @@ locomo_init_one_child(struct locomo *lchip, struct locomo_dev_info *info)
int ret;
dev = kzalloc(sizeof(struct locomo_dev), GFP_KERNEL);
- if (!dev) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!dev)
+ return -ENOMEM;
/*
* If the parent device has a DMA mask associated with it,
@@ -254,10 +252,9 @@ locomo_init_one_child(struct locomo *lchip, struct locomo_dev_info *info)
NO_IRQ : lchip->irq_base + info->irq[0];
ret = device_register(&dev->dev);
- if (ret) {
- out:
- kfree(dev);
- }
+ if (ret)
+ put_device(&dev->dev);
+
return ret;
}
--
2.25.1
Once of_device_register() failed, we should call put_device() to
decrement reference count for cleanup. Or it could cause memory leak.
So fix this by calling put_device(), then the name can be freed in
kobject_cleanup().
As comment of device_add() says, 'if device_add() succeeds, you should
call device_del() when you want to get rid of it. If device_add() has
not succeeded, use only put_device() to drop the reference count'.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: cf44bbc26cf1 ("[SPARC]: Beginnings of generic of_device framework.")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
arch/sparc/kernel/of_device_64.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index f98c2901f335..4272746d7166 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -677,7 +677,7 @@ static struct platform_device * __init scan_one_device(struct device_node *dp,
if (of_device_register(op)) {
printk("%pOF: Could not register of device.\n", dp);
- kfree(op);
+ put_device(&op->dev);
op = NULL;
}
--
2.25.1
sctp_sendmsg() re-uses associations and transports when possible by
doing a lookup based on the socket endpoint and the message destination
address, and then sctp_sendmsg_to_asoc() sets the selected transport in
all the message chunks to be sent.
There's a possible race condition if another thread triggers the removal
of that selected transport, for instance, by explicitly unbinding an
address with setsockopt(SCTP_SOCKOPT_BINDX_REM), after the chunks have
been set up and before the message is sent. This causes the access to
the transport data in sctp_outq_select_transport(), when the association
outqueue is flushed, to do a use-after-free read.
This patch addresses this scenario by checking if the transport still
exists right after the chunks to be sent are set up to use it and before
proceeding to sending them. If the transport was freed since it was
found, the send is aborted. The reason to add the check here is that
once the transport is assigned to the chunks, deleting that transport
is safe, since it will also set chunk->transport to NULL in the affected
chunks. This scenario is correctly handled already, see Fixes below.
The bug was found by a private syzbot instance (see the error report [1]
and the C reproducer that triggers it [2]).
Link: https://people.igalia.com/rcn/kernel_logs/20250402__KASAN_slab-use-after-fr… [1]
Link: https://people.igalia.com/rcn/kernel_logs/20250402__KASAN_slab-use-after-fr… [2]
Cc: stable(a)vger.kernel.org
Fixes: df132eff4638 ("sctp: clear the transport of some out_chunk_list chunks in sctp_assoc_rm_peer")
Signed-off-by: Ricardo Cañuelo Navarro <rcn(a)igalia.com>
---
net/sctp/socket.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 36ee34f483d703ffcfe5ca9e6cc554fba24c75ef..9c5ff44fa73cae6a6a04790800cc33dfa08a8da9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1787,17 +1787,24 @@ static int sctp_sendmsg_check_sflags(struct sctp_association *asoc,
return 1;
}
+static union sctp_addr *sctp_sendmsg_get_daddr(struct sock *sk,
+ const struct msghdr *msg,
+ struct sctp_cmsgs *cmsgs);
+
static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
struct msghdr *msg, size_t msg_len,
struct sctp_transport *transport,
struct sctp_sndrcvinfo *sinfo)
{
+ struct sctp_transport *aux_transport = NULL;
struct sock *sk = asoc->base.sk;
+ struct sctp_endpoint *ep = sctp_sk(sk)->ep;
struct sctp_sock *sp = sctp_sk(sk);
struct net *net = sock_net(sk);
struct sctp_datamsg *datamsg;
bool wait_connect = false;
struct sctp_chunk *chunk;
+ union sctp_addr *daddr;
long timeo;
int err;
@@ -1869,6 +1876,15 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
sctp_set_owner_w(chunk);
chunk->transport = transport;
}
+ /* Fail if transport was deleted after lookup in sctp_sendmsg() */
+ daddr = sctp_sendmsg_get_daddr(sk, msg, NULL);
+ if (daddr) {
+ sctp_endpoint_lookup_assoc(ep, daddr, &aux_transport);
+ if (!aux_transport || aux_transport != transport) {
+ sctp_datamsg_free(datamsg);
+ goto err;
+ }
+ }
err = sctp_primitive_SEND(net, asoc, datamsg);
if (err) {
---
base-commit: 38fec10eb60d687e30c8c6b5420d86e8149f7557
change-id: 20250402-kasan_slab-use-after-free_read_in_sctp_outq_select_transport-46c9c30bcb7d
Once cdev_device_add() failed, we should use put_device() to decrement
reference count for cleanup. Or it could cause memory leak. Although
operations in err_free_ida are similar to the operations in callback
function fsi_slave_release(), put_device() is a correct handling
operation as comments require when cdev_device_add() fails.
As comment of device_add() says, 'if device_add() succeeds, you should
call device_del() when you want to get rid of it. If device_add() has
not succeeded, use only put_device() to drop the reference count'.
Found by code review.
Cc: stable(a)vger.kernel.org
Fixes: 371975b0b075 ("fsi/core: Fix error paths on CFAM init")
Signed-off-by: Ma Ke <make24(a)iscas.ac.cn>
---
drivers/fsi/fsi-core.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c
index 50e8736039fe..c494fc0bd747 100644
--- a/drivers/fsi/fsi-core.c
+++ b/drivers/fsi/fsi-core.c
@@ -1084,7 +1084,8 @@ static int fsi_slave_init(struct fsi_master *master, int link, uint8_t id)
rc = cdev_device_add(&slave->cdev, &slave->dev);
if (rc) {
dev_err(&slave->dev, "Error %d creating slave device\n", rc);
- goto err_free_ida;
+ put_device(&slave->dev);
+ return rc;
}
/* Now that we have the cdev registered with the core, any fatal
@@ -1110,8 +1111,6 @@ static int fsi_slave_init(struct fsi_master *master, int link, uint8_t id)
return 0;
-err_free_ida:
- fsi_free_minor(slave->dev.devt);
err_free:
of_node_put(slave->dev.of_node);
kfree(slave);
--
2.25.1
The differences in the vendor-approved CPU and GPU OPPs for the standard
Rockchip RK3588 variant [1] and the industrial Rockchip RK3588J variant [2]
come from the latter, presumably, supporting an extended temperature range
that's usually associated with industrial applications, despite the two SoC
variant datasheets specifying the same upper limit for the allowed ambient
temperature for both variants. However, the lower temperature limit is
specified much lower for the RK3588J variant. [1][2]
To be on the safe side and to ensure maximum longevity of the RK3588J SoCs,
only the CPU and GPU OPPs that are declared by the vendor to be always safe
for this SoC variant may be provided. As explained by the vendor [3] and
according to the RK3588J datasheet, [2] higher-frequency/higher-voltage
CPU and GPU OPPs can be used as well, but at the risk of reducing the SoC
lifetime expectancy. Presumably, using the higher OPPs may be safe only
when not enjoying the assumed extended temperature range that the RK3588J,
as an SoC variant targeted specifically at higher-temperature, industrial
applications, is made (or binned) for.
Anyone able to keep their RK3588J-based board outside the above-presumed
extended temperature range at all times, and willing to take the associated
risk of possibly reducing the SoC lifetime expectancy, is free to apply
a DT overlay that adds the higher CPU and GPU OPPs.
With all this and the downstream RK3588(J) DT definitions [4][5] in mind,
let's delete the RK3588J CPU and GPU OPPs that are not considered belonging
to the normal operation mode for this SoC variant. To quote the RK3588J
datasheet [2], "normal mode means the chipset works under safety voltage
and frequency; for the industrial environment, highly recommend to keep in
normal mode, the lifetime is reasonably guaranteed", while "overdrive mode
brings higher frequency, and the voltage will increase accordingly; under
the overdrive mode for a long time, the chipset may shorten the lifetime,
especially in high-temperature condition".
To sum the RK3588J datasheet [2] and the vendor-provided DTs up, [4][5]
the maximum allowed CPU core, GPU and NPU frequencies are as follows:
IP core | Normal mode | Overdrive mode
------------+-------------+----------------
Cortex-A55 | 1,296 MHz | 1,704 MHz
Cortex-A76 | 1,608 MHz | 2,016 MHz
GPU | 700 MHz | 850 MHz
NPU | 800 MHz | 950 MHz
Unfortunately, when it comes to the actual voltages for the RK3588J CPU and
GPU OPPs, there's a discrepancy between the RK3588J datasheet [2] and the
downstream kernel code. [4][5] The RK3588J datasheet states that "the max.
working voltage of CPU/GPU/NPU is 0.75 V under the normal mode", while the
downstream kernel code actually allows voltage ranges that go up to 0.95 V,
which is still within the voltage range allowed by the datasheet. However,
the RK3588J datasheet also tells us to "strictly refer to the software
configuration of SDK and the hardware reference design", so let's embrace
the voltage ranges provided by the downstream kernel code, which also
prevents the undesirable theoretical outcome of ending up with no usable
OPPs on a particular board, as a result of the board's voltage regulator(s)
being unable to deliver the exact voltages, for whatever reason.
The above-described voltage ranges for the RK3588J CPU OPPs remain taken
from the downstream kernel code [4][5] by picking the highest, worst-bin
values, which ensure that all RK3588J bins will work reliably. Yes, with
some power inevitably wasted as unnecessarily generated heat, but the
reliability is paramount, together with the longevity. This deficiency
may be revisited separately at some point in the future.
The provided RK3588J CPU OPPs follow the slightly debatable "provide only
the highest-frequency OPP from the same-voltage group" approach that's been
established earlier, [6] as a result of the "same-voltage, lower-frequency"
OPPs being considered inefficient from the IPA governor's standpoint, which
may also be revisited separately at some point in the future.
[1] https://wiki.friendlyelec.com/wiki/images/e/ee/Rockchip_RK3588_Datasheet_V1…
[2] https://wmsc.lcsc.com/wmsc/upload/file/pdf/v2/lcsc/2403201054_Rockchip-RK35…
[3] https://lore.kernel.org/linux-rockchip/e55125ed-64fb-455e-b1e4-cebe2cf006e4…
[4] https://raw.githubusercontent.com/rockchip-linux/kernel/604cec4004abe5a96c7…
[5] https://raw.githubusercontent.com/rockchip-linux/kernel/604cec4004abe5a96c7…
[6] https://lore.kernel.org/all/20240229-rk-dts-additions-v3-5-6afe8473a631@gma…
Fixes: 667885a68658 ("arm64: dts: rockchip: Add OPP data for CPU cores on RK3588j")
Fixes: a7b2070505a2 ("arm64: dts: rockchip: Split GPU OPPs of RK3588 and RK3588j")
Cc: stable(a)vger.kernel.org
Cc: Heiko Stuebner <heiko(a)sntech.de>
Cc: Alexey Charkov <alchark(a)gmail.com>
Helped-by: Quentin Schulz <quentin.schulz(a)cherry.de>
Reviewed-by: Quentin Schulz <quentin.schulz(a)cherry.de>
Signed-off-by: Dragan Simic <dsimic(a)manjaro.org>
---
Notes:
Changes in v2:
- Reworded and expanded the patch description a bit, to include some
more information and to make it more clear what are the implied
speculations and assumptions, and what are the available official
statements from Rockchip, as suggested by Quentin [7]
- Collected Reviewed-by tag from Quentin [7]
Link to v1: https://lore.kernel.org/linux-rockchip/f929da061de35925ea591c969f985430e23c…
[7] https://lore.kernel.org/linux-rockchip/71b7c81b-6a4e-442b-a661-04d63639962a…
arch/arm64/boot/dts/rockchip/rk3588j.dtsi | 53 ++++++++---------------
1 file changed, 17 insertions(+), 36 deletions(-)
diff --git a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi
index bce72bac4503..3045cb3bd68c 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588j.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588j.dtsi
@@ -11,74 +11,59 @@ cluster0_opp_table: opp-table-cluster0 {
compatible = "operating-points-v2";
opp-shared;
- opp-1416000000 {
- opp-hz = /bits/ 64 <1416000000>;
+ opp-1200000000 {
+ opp-hz = /bits/ 64 <1200000000>;
opp-microvolt = <750000 750000 950000>;
clock-latency-ns = <40000>;
opp-suspend;
};
- opp-1608000000 {
- opp-hz = /bits/ 64 <1608000000>;
- opp-microvolt = <887500 887500 950000>;
- clock-latency-ns = <40000>;
- };
- opp-1704000000 {
- opp-hz = /bits/ 64 <1704000000>;
- opp-microvolt = <937500 937500 950000>;
+ opp-1296000000 {
+ opp-hz = /bits/ 64 <1296000000>;
+ opp-microvolt = <775000 775000 950000>;
clock-latency-ns = <40000>;
};
};
cluster1_opp_table: opp-table-cluster1 {
compatible = "operating-points-v2";
opp-shared;
+ opp-1200000000{
+ opp-hz = /bits/ 64 <1200000000>;
+ opp-microvolt = <750000 750000 950000>;
+ clock-latency-ns = <40000>;
+ };
opp-1416000000 {
opp-hz = /bits/ 64 <1416000000>;
- opp-microvolt = <750000 750000 950000>;
+ opp-microvolt = <762500 762500 950000>;
clock-latency-ns = <40000>;
};
opp-1608000000 {
opp-hz = /bits/ 64 <1608000000>;
opp-microvolt = <787500 787500 950000>;
clock-latency-ns = <40000>;
};
- opp-1800000000 {
- opp-hz = /bits/ 64 <1800000000>;
- opp-microvolt = <875000 875000 950000>;
- clock-latency-ns = <40000>;
- };
- opp-2016000000 {
- opp-hz = /bits/ 64 <2016000000>;
- opp-microvolt = <950000 950000 950000>;
- clock-latency-ns = <40000>;
- };
};
cluster2_opp_table: opp-table-cluster2 {
compatible = "operating-points-v2";
opp-shared;
+ opp-1200000000{
+ opp-hz = /bits/ 64 <1200000000>;
+ opp-microvolt = <750000 750000 950000>;
+ clock-latency-ns = <40000>;
+ };
opp-1416000000 {
opp-hz = /bits/ 64 <1416000000>;
- opp-microvolt = <750000 750000 950000>;
+ opp-microvolt = <762500 762500 950000>;
clock-latency-ns = <40000>;
};
opp-1608000000 {
opp-hz = /bits/ 64 <1608000000>;
opp-microvolt = <787500 787500 950000>;
clock-latency-ns = <40000>;
};
- opp-1800000000 {
- opp-hz = /bits/ 64 <1800000000>;
- opp-microvolt = <875000 875000 950000>;
- clock-latency-ns = <40000>;
- };
- opp-2016000000 {
- opp-hz = /bits/ 64 <2016000000>;
- opp-microvolt = <950000 950000 950000>;
- clock-latency-ns = <40000>;
- };
};
gpu_opp_table: opp-table {
@@ -104,10 +89,6 @@ opp-700000000 {
opp-hz = /bits/ 64 <700000000>;
opp-microvolt = <750000 750000 850000>;
};
- opp-850000000 {
- opp-hz = /bits/ 64 <800000000>;
- opp-microvolt = <787500 787500 850000>;
- };
};
};
__VA_OPT__ is a macro that is useful when some arguments can be present
or not to entirely skip some part of a definition. Unfortunately, it
is a too recent addition that some of the still supported old GCC
versions do not know about, and is anyway not part of C11 that is the
version used in the kernel.
Find a trick to remove this macro, typically '__VA_ARGS__ + 0' is a
workaround used in netlink.h which works very well here, as we either
expect:
- 0
- A positive value
- No value, which means the field should be 0.
Reported-by: kernel test robot <lkp(a)intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202503181330.YcDXGy7F-lkp@intel.com/
Fixes: 7ce0d16d5802 ("mtd: spinand: Add an optional frequency to read from cache macros")
Cc: stable(a)vger.kernel.org
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
include/linux/mtd/spinand.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h
index 83301ef11aa9..0bb06aeffa62 100644
--- a/include/linux/mtd/spinand.h
+++ b/include/linux/mtd/spinand.h
@@ -67,7 +67,7 @@
SPI_MEM_OP_ADDR(2, addr, 1), \
SPI_MEM_OP_DUMMY(ndummy, 1), \
SPI_MEM_OP_DATA_IN(len, buf, 1), \
- __VA_OPT__(SPI_MEM_OP_MAX_FREQ(__VA_ARGS__)))
+ SPI_MEM_OP_MAX_FREQ(__VA_ARGS__ + 0))
#define SPINAND_PAGE_READ_FROM_CACHE_FAST_1S_1S_1S_OP(addr, ndummy, buf, len) \
SPI_MEM_OP(SPI_MEM_OP_CMD(0x0b, 1), \
--
2.48.1
In r852_ready(), the dev get from r852_get_dev() need to be checked.
An unstable device should not be ready. A proper implementation can
be found in r852_read_byte(). Add a status check and return 0 when it is
unstable.
Fixes: 50a487e7719c ("mtd: rawnand: Pass a nand_chip object to chip->dev_ready()")
Cc: stable(a)vger.kernel.org # v4.20+
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
drivers/mtd/nand/raw/r852.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index b07c2f8b4035..918974d088cf 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -387,6 +387,9 @@ static int r852_wait(struct nand_chip *chip)
static int r852_ready(struct nand_chip *chip)
{
struct r852_device *dev = r852_get_dev(nand_to_mtd(chip));
+ if (dev->card_unstable)
+ return 0;
+
return !(r852_read_reg(dev, R852_CARD_STA) & R852_CARD_STA_BUSY);
}
--
2.42.0.windows.2
In INFTL_findwriteunit(), the return value of inftl_read_oob()
need to be checked. A proper implementation can be
found in INFTL_deleteblock(). The status will be set as
SECTOR_IGNORE to break from the while-loop correctly
if the inftl_read_oob() fails.
Fixes: 8593fbc68b0d ("[MTD] Rework the out of band handling completely")
Cc: stable(a)vger.kernel.org # v2.6+
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
drivers/mtd/inftlcore.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index 9739387cff8c..58c6e1743f5c 100644
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -482,10 +482,11 @@ static inline u16 INFTL_findwriteunit(struct INFTLrecord *inftl, unsigned block)
silly = MAX_LOOPS;
while (thisEUN <= inftl->lastEUN) {
- inftl_read_oob(mtd, (thisEUN * inftl->EraseSize) +
- blockofs, 8, &retlen, (char *)&bci);
-
- status = bci.Status | bci.Status1;
+ if (inftl_read_oob(mtd, (thisEUN * inftl->EraseSize) +
+ blockofs, 8, &retlen, (char *)&bci) < 0)
+ status = SECTOR_IGNORE;
+ else
+ status = bci.Status | bci.Status1;
pr_debug("INFTL: status of block %d in EUN %d is %x\n",
block , writeEUN, status);
--
2.42.0.windows.2
The sdio_read32() calls sd_read(), but does not handle the error if
sd_read() fails. This could lead to subsequent operations processing
invalid data. A proper implementation can be found in sdio_readN().
Add error handling for the sd_read() to free tmpbuf and return error
code if sd_read() fails. This ensure that the memcpy() is only performed
when the read operation is successful.
Fixes: 554c0a3abf21 ("staging: Add rtl8723bs sdio wifi driver")
Cc: stable(a)vger.kernel.org # v4.12+
Signed-off-by: Wentao Liang <vulab(a)iscas.ac.cn>
---
v5: Fix error code
v4: Add change log and fix error code
v3: Add Cc flag
v2: Change code to initialize val
drivers/staging/rtl8723bs/hal/sdio_ops.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/staging/rtl8723bs/hal/sdio_ops.c b/drivers/staging/rtl8723bs/hal/sdio_ops.c
index 21e9f1858745..d79d41727042 100644
--- a/drivers/staging/rtl8723bs/hal/sdio_ops.c
+++ b/drivers/staging/rtl8723bs/hal/sdio_ops.c
@@ -185,7 +185,12 @@ static u32 sdio_read32(struct intf_hdl *intfhdl, u32 addr)
return SDIO_ERR_VAL32;
ftaddr &= ~(u16)0x3;
- sd_read(intfhdl, ftaddr, 8, tmpbuf);
+ err = sd_read(intfhdl, ftaddr, 8, tmpbuf);
+ if (err) {
+ kfree(tmpbuf);
+ return SDIO_ERR_VAL32;
+ }
+
memcpy(&le_tmp, tmpbuf + shift, 4);
val = le32_to_cpu(le_tmp);
--
2.42.0.windows.2
The JIT compile of ldimm instructions can be anywhere between 1-5
instructions long depending on the value being loaded.
arch_bpf_trampoline_size() provides JIT size of the BPF trampoline
before the buffer for JIT'ing it is allocated. BPF trampoline JIT
code has ldimm instructions that need to load the value of pointer
to struct bpf_tramp_image. But this pointer value is not same while
calling arch_bpf_trampoline_size() & arch_prepare_bpf_trampoline().
So, the size arrived at using arch_bpf_trampoline_size() can vary
from the size needed in arch_prepare_bpf_trampoline(). When the
number of ldimm instructions emitted in arch_bpf_trampoline_size()
is less than the number of ldimm instructions emitted during the
actual JIT compile of trampoline, the below warning is produced:
WARNING: CPU: 8 PID: 204190 at arch/powerpc/net/bpf_jit_comp.c:981 __arch_prepare_bpf_trampoline.isra.0+0xd2c/0xdcc
which is:
/* Make sure the trampoline generation logic doesn't overflow */
if (image && WARN_ON_ONCE(&image[ctx->idx] >
(u32 *)rw_image_end - BPF_INSN_SAFETY)) {
Pass NULL as the first argument to __arch_prepare_bpf_trampoline()
call from arch_bpf_trampoline_size() function, to differentiate it
from how arch_prepare_bpf_trampoline() calls it and ensure maximum
possible instructions are emitted in arch_bpf_trampoline_size() for
ldimm instructions that load a different value during the actual JIT
compile of BPF trampoline.
Fixes: d243b62b7bd3 ("powerpc64/bpf: Add support for bpf trampolines")
Reported-by: Venkat Rao Bagalkote <venkat88(a)linux.ibm.com>
Closes: https://lore.kernel.org/all/6168bfc8-659f-4b5a-a6fb-90a916dde3b3@linux.ibm.…
Cc: stable(a)vger.kernel.org # v6.13+
Signed-off-by: Hari Bathini <hbathini(a)linux.ibm.com>
---
* Removed a redundant '/' accidently added in a comment and resending.
arch/powerpc/net/bpf_jit_comp.c | 29 +++++++++++++++++++++++------
1 file changed, 23 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 2991bb171a9b..c94717ccb2bd 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -833,7 +833,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
EMIT(PPC_RAW_STL(_R26, _R1, nvr_off + SZL));
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- PPC_LI_ADDR(_R3, (unsigned long)im);
+ /*
+ * Emit maximum possible instructions while getting the size of
+ * bpf trampoline to ensure trampoline JIT code doesn't overflow.
+ */
+ PPC_LI_ADDR(_R3, im ? (unsigned long)im :
+ (unsigned long)(~(1UL << (BITS_PER_LONG - 1))));
ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx,
(unsigned long)__bpf_tramp_enter);
if (ret)
@@ -889,7 +894,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off);
/* Reserve space to patch branch instruction to skip fexit progs */
- im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
+ if (im)
+ im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
EMIT(PPC_RAW_NOP());
}
@@ -912,8 +918,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
if (flags & BPF_TRAMP_F_CALL_ORIG) {
- im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
- PPC_LI_ADDR(_R3, im);
+ if (im)
+ im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
+ /*
+ * Emit maximum possible instructions while getting the size of
+ * bpf trampoline to ensure trampoline JIT code doesn't overflow.
+ */
+ PPC_LI_ADDR(_R3, im ? (unsigned long)im :
+ (unsigned long)(~(1UL << (BITS_PER_LONG - 1))));
ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx,
(unsigned long)__bpf_tramp_exit);
if (ret)
@@ -972,7 +984,6 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
struct bpf_tramp_links *tlinks, void *func_addr)
{
- struct bpf_tramp_image im;
void *image;
int ret;
@@ -988,7 +999,13 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
if (!image)
return -ENOMEM;
- ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
+ /*
+ * Pass NULL as bpf_tramp_image pointer to differentiate the intent to get the
+ * buffer size for trampoline here. This differentiation helps in accounting for
+ * maximum possible instructions if the JIT code size is likely to vary during
+ * the actual JIT compile of the trampoline.
+ */
+ ret = __arch_prepare_bpf_trampoline(NULL, image, image + PAGE_SIZE, image,
m, flags, tlinks, func_addr);
bpf_jit_free_exec(image);
--
2.48.1
From: Mario Limonciello <mario.limonciello(a)amd.com>
On some platforms it has been observed that STT limits are not being applied
properly causing poor performance as power limits are set too low.
STT limits that are sent to the platform are supposed to be in Q8.8
format. Convert them before sending.
Reported-by: Yijun Shen <Yijun.Shen(a)dell.com>
Fixes: 7c45534afa443 ("platform/x86/amd/pmf: Add support for PMF Policy Binary")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com>
---
drivers/platform/x86/amd/pmf/tee-if.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/platform/x86/amd/pmf/tee-if.c b/drivers/platform/x86/amd/pmf/tee-if.c
index 5d513161d7302..9a51258df0564 100644
--- a/drivers/platform/x86/amd/pmf/tee-if.c
+++ b/drivers/platform/x86/amd/pmf/tee-if.c
@@ -123,7 +123,7 @@ static void amd_pmf_apply_policies(struct amd_pmf_dev *dev, struct ta_pmf_enact_
case PMF_POLICY_STT_SKINTEMP_APU:
if (dev->prev_data->stt_skintemp_apu != val) {
- amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, val, NULL);
+ amd_pmf_send_cmd(dev, SET_STT_LIMIT_APU, false, val << 8, NULL);
dev_dbg(dev->dev, "update STT_SKINTEMP_APU: %u\n", val);
dev->prev_data->stt_skintemp_apu = val;
}
@@ -131,7 +131,7 @@ static void amd_pmf_apply_policies(struct amd_pmf_dev *dev, struct ta_pmf_enact_
case PMF_POLICY_STT_SKINTEMP_HS2:
if (dev->prev_data->stt_skintemp_hs2 != val) {
- amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, val, NULL);
+ amd_pmf_send_cmd(dev, SET_STT_LIMIT_HS2, false, val << 8, NULL);
dev_dbg(dev->dev, "update STT_SKINTEMP_HS2: %u\n", val);
dev->prev_data->stt_skintemp_hs2 = val;
}
--
2.43.0
Hi Greg,
My apologies! I'll test-build it before submitting again. Thanks for
pointing out my mistake.
build is failing and there is a need for a dependent patch.
Hardik
On Mon, Apr 7, 2025 at 10:43 AM Hardik Gohil <hgohil(a)mvista.com> wrote:
>
> Hi Greg,
>
> My apologies! I'll test-build it before submitting again. Thanks for pointing out my mistake.
>
> build is failing and there is a need for a dependent patch.
>
> Hardik
>
>
> On Thu, Apr 3, 2025 at 7:30 PM Greg KH <gregkh(a)linuxfoundation.org> wrote:
>>
>> On Thu, Apr 03, 2025 at 03:36:05PM +0530, Hardik Gohil wrote:
>> > Hello Greg,
>> >
>> > This patch applies cleanly to the v5.4 kernel.
>> >
>> > dmaengine: ti: edma: Add some null pointer checks to the edma_probe
>> >
>> > [upstream commit 6e2276203ac9ff10fc76917ec9813c660f627369]
>> >
>>
>> You obviously did not test-build this change :(
>>
>> Please always do so when asking for patches to be backported, otherwise
>> we get grumpy as it breaks our workflow...
>>
>> thanks,
>>
>> greg k-h
The patch titled
Subject: mm/vma: add give_up_on_oom option on modify/merge, use in uffd release
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-vma-add-give_up_on_oom-option-on-modify-merge-use-in-uffd-release.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Subject: mm/vma: add give_up_on_oom option on modify/merge, use in uffd release
Date: Fri, 21 Mar 2025 10:09:37 +0000
Currently, if a VMA merge fails due to an OOM condition arising on commit
merge or a failure to duplicate anon_vma's, we report this so the caller
can handle it.
However there are cases where the caller is only ostensibly trying a
merge, and doesn't mind if it fails due to this condition.
Since we do not want to introduce an implicit assumption that we only
actually modify VMAs after OOM conditions might arise, add a 'give up on
oom' option and make an explicit contract that, should this flag be set, we
absolutely will not modify any VMAs should OOM arise and just bail out.
Since it'd be very unusual for a user to try to vma_modify() with this flag
set but be specifying a range within a VMA which ends up being split (which
can fail due to rlimit issues, not only OOM), we add a debug warning for
this condition.
The motivating reason for this is uffd release - syzkaller (and Pedro
Falcato's VERY astute analysis) found a way in which an injected fault on
allocation, triggering an OOM condition on commit merge, would result in
uffd code becoming confused and treating an error value as if it were a VMA
pointer.
To avoid this, we make use of this new VMG flag to ensure that this never
occurs, utilising the fact that, should we be clearing entire VMAs, we do
not wish an OOM event to be reported to us.
Many thanks to Pedro Falcato for his excellent analysis and Jann Horn for
his insightful and intelligent analysis of the situation, both of whom were
instrumental in this fix.
Link: https://lkml.kernel.org/r/20250321100937.46634-1-lorenzo.stoakes@oracle.com
Reported-by: syzbot+20ed41006cf9d842c2b5(a)syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/67dc67f0.050a0220.25ae54.001e.GAE@google.com/
Fixes: 47b16d0462a4 ("mm: abort vma_modify() on merge out of memory failure")
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Suggested-by: Pedro Falcato <pfalcato(a)suse.de>
Suggested-by: Jann Horn <jannh(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/userfaultfd.c | 13 +++++++++--
mm/vma.c | 51 +++++++++++++++++++++++++++++++++++++++++----
mm/vma.h | 9 +++++++
3 files changed, 66 insertions(+), 7 deletions(-)
--- a/mm/userfaultfd.c~mm-vma-add-give_up_on_oom-option-on-modify-merge-use-in-uffd-release
+++ a/mm/userfaultfd.c
@@ -1902,6 +1902,14 @@ struct vm_area_struct *userfaultfd_clear
unsigned long end)
{
struct vm_area_struct *ret;
+ bool give_up_on_oom = false;
+
+ /*
+ * If we are modifying only and not splitting, just give up on the merge
+ * if OOM prevents us from merging successfully.
+ */
+ if (start == vma->vm_start && end == vma->vm_end)
+ give_up_on_oom = true;
/* Reset ptes for the whole vma range if wr-protected */
if (userfaultfd_wp(vma))
@@ -1909,7 +1917,7 @@ struct vm_area_struct *userfaultfd_clear
ret = vma_modify_flags_uffd(vmi, prev, vma, start, end,
vma->vm_flags & ~__VM_UFFD_FLAGS,
- NULL_VM_UFFD_CTX);
+ NULL_VM_UFFD_CTX, give_up_on_oom);
/*
* In the vma_merge() successful mprotect-like case 8:
@@ -1960,7 +1968,8 @@ int userfaultfd_register_range(struct us
new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags;
vma = vma_modify_flags_uffd(&vmi, prev, vma, start, vma_end,
new_flags,
- (struct vm_userfaultfd_ctx){ctx});
+ (struct vm_userfaultfd_ctx){ctx},
+ /* give_up_on_oom = */false);
if (IS_ERR(vma))
return PTR_ERR(vma);
--- a/mm/vma.c~mm-vma-add-give_up_on_oom-option-on-modify-merge-use-in-uffd-release
+++ a/mm/vma.c
@@ -666,6 +666,9 @@ static void vmg_adjust_set_range(struct
/*
* Actually perform the VMA merge operation.
*
+ * IMPORTANT: We guarantee that, should vmg->give_up_on_oom is set, to not
+ * modify any VMAs or cause inconsistent state should an OOM condition arise.
+ *
* Returns 0 on success, or an error value on failure.
*/
static int commit_merge(struct vma_merge_struct *vmg)
@@ -685,6 +688,12 @@ static int commit_merge(struct vma_merge
init_multi_vma_prep(&vp, vma, vmg);
+ /*
+ * If vmg->give_up_on_oom is set, we're safe, because we don't actually
+ * manipulate any VMAs until we succeed at preallocation.
+ *
+ * Past this point, we will not return an error.
+ */
if (vma_iter_prealloc(vmg->vmi, vma))
return -ENOMEM;
@@ -915,7 +924,13 @@ static __must_check struct vm_area_struc
if (anon_dup)
unlink_anon_vmas(anon_dup);
- vmg->state = VMA_MERGE_ERROR_NOMEM;
+ /*
+ * We've cleaned up any cloned anon_vma's, no VMAs have been
+ * modified, no harm no foul if the user requests that we not
+ * report this and just give up, leaving the VMAs unmerged.
+ */
+ if (!vmg->give_up_on_oom)
+ vmg->state = VMA_MERGE_ERROR_NOMEM;
return NULL;
}
@@ -926,7 +941,15 @@ static __must_check struct vm_area_struc
abort:
vma_iter_set(vmg->vmi, start);
vma_iter_load(vmg->vmi);
- vmg->state = VMA_MERGE_ERROR_NOMEM;
+
+ /*
+ * This means we have failed to clone anon_vma's correctly, but no
+ * actual changes to VMAs have occurred, so no harm no foul - if the
+ * user doesn't want this reported and instead just wants to give up on
+ * the merge, allow it.
+ */
+ if (!vmg->give_up_on_oom)
+ vmg->state = VMA_MERGE_ERROR_NOMEM;
return NULL;
}
@@ -1068,6 +1091,10 @@ int vma_expand(struct vma_merge_struct *
/* This should already have been checked by this point. */
VM_WARN_ON_VMG(!can_merge_remove_vma(next), vmg);
vma_start_write(next);
+ /*
+ * In this case we don't report OOM, so vmg->give_up_on_mm is
+ * safe.
+ */
ret = dup_anon_vma(middle, next, &anon_dup);
if (ret)
return ret;
@@ -1090,9 +1117,15 @@ int vma_expand(struct vma_merge_struct *
return 0;
nomem:
- vmg->state = VMA_MERGE_ERROR_NOMEM;
if (anon_dup)
unlink_anon_vmas(anon_dup);
+ /*
+ * If the user requests that we just give upon OOM, we are safe to do so
+ * here, as commit merge provides this contract to us. Nothing has been
+ * changed - no harm no foul, just don't report it.
+ */
+ if (!vmg->give_up_on_oom)
+ vmg->state = VMA_MERGE_ERROR_NOMEM;
return -ENOMEM;
}
@@ -1534,6 +1567,13 @@ static struct vm_area_struct *vma_modify
if (vmg_nomem(vmg))
return ERR_PTR(-ENOMEM);
+ /*
+ * Split can fail for reasons other than OOM, so if the user requests
+ * this it's probably a mistake.
+ */
+ VM_WARN_ON(vmg->give_up_on_oom &&
+ (vma->vm_start != start || vma->vm_end != end));
+
/* Split any preceding portion of the VMA. */
if (vma->vm_start < start) {
int err = split_vma(vmg->vmi, vma, start, 1);
@@ -1602,12 +1642,15 @@ struct vm_area_struct
struct vm_area_struct *vma,
unsigned long start, unsigned long end,
unsigned long new_flags,
- struct vm_userfaultfd_ctx new_ctx)
+ struct vm_userfaultfd_ctx new_ctx,
+ bool give_up_on_oom)
{
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
vmg.flags = new_flags;
vmg.uffd_ctx = new_ctx;
+ if (give_up_on_oom)
+ vmg.give_up_on_oom = true;
return vma_modify(&vmg);
}
--- a/mm/vma.h~mm-vma-add-give_up_on_oom-option-on-modify-merge-use-in-uffd-release
+++ a/mm/vma.h
@@ -114,6 +114,12 @@ struct vma_merge_struct {
*/
bool just_expand :1;
+ /*
+ * If a merge is possible, but an OOM error occurs, give up and don't
+ * execute the merge, returning NULL.
+ */
+ bool give_up_on_oom :1;
+
/* Internal flags set during merge process: */
/*
@@ -255,7 +261,8 @@ __must_check struct vm_area_struct
struct vm_area_struct *vma,
unsigned long start, unsigned long end,
unsigned long new_flags,
- struct vm_userfaultfd_ctx new_ctx);
+ struct vm_userfaultfd_ctx new_ctx,
+ bool give_up_on_oom);
__must_check struct vm_area_struct
*vma_merge_new_range(struct vma_merge_struct *vmg);
_
Patches currently in -mm which might be from lorenzo.stoakes(a)oracle.com are
mm-vma-add-give_up_on_oom-option-on-modify-merge-use-in-uffd-release.patch