Am 26.05.24 um 22:14 schrieb Sasha Levin:
> This is a note to let you know that I've just added the patch titled
>
> platform/x86: xiaomi-wmi: Fix race condition when reporting key events
>
> to the 5.10-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> platform-x86-xiaomi-wmi-fix-race-condition-when-repo.patch
> and it can be found in the queue-5.10 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
Hi,
the underlying race condition can only be triggered since
commit e2ffcda16290 ("ACPI: OSL: Allow Notify () handlers to run on all CPUs"), which
afaik was introduced with kernel 6.8.
Because of this, i do not think that we have to backport this commit to kernels before 6.8.
Thanks,
Armin Wolf
>
> commit 6f4e7901c3ed3c0bd3da7af5854dbb765fad2e00
> Author: Armin Wolf <W_Armin(a)gmx.de>
> Date: Tue Apr 2 16:30:57 2024 +0200
>
> platform/x86: xiaomi-wmi: Fix race condition when reporting key events
>
> [ Upstream commit 290680c2da8061e410bcaec4b21584ed951479af ]
>
> Multiple WMI events can be received concurrently, so multiple instances
> of xiaomi_wmi_notify() can be active at the same time. Since the input
> device is shared between those handlers, the key input sequence can be
> disturbed.
>
> Fix this by protecting the key input sequence with a mutex.
>
> Compile-tested only.
>
> Fixes: edb73f4f0247 ("platform/x86: wmi: add Xiaomi WMI key driver")
> Signed-off-by: Armin Wolf <W_Armin(a)gmx.de>
> Link: https://lore.kernel.org/r/20240402143059.8456-2-W_Armin@gmx.de
> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy(a)linux.intel.com>
> Reviewed-by: Hans de Goede <hdegoede(a)redhat.com>
> Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/drivers/platform/x86/xiaomi-wmi.c b/drivers/platform/x86/xiaomi-wmi.c
> index 54a2546bb93bf..be80f0bda9484 100644
> --- a/drivers/platform/x86/xiaomi-wmi.c
> +++ b/drivers/platform/x86/xiaomi-wmi.c
> @@ -2,8 +2,10 @@
> /* WMI driver for Xiaomi Laptops */
>
> #include <linux/acpi.h>
> +#include <linux/device.h>
> #include <linux/input.h>
> #include <linux/module.h>
> +#include <linux/mutex.h>
> #include <linux/wmi.h>
>
> #include <uapi/linux/input-event-codes.h>
> @@ -20,12 +22,21 @@
>
> struct xiaomi_wmi {
> struct input_dev *input_dev;
> + struct mutex key_lock; /* Protects the key event sequence */
> unsigned int key_code;
> };
>
> +static void xiaomi_mutex_destroy(void *data)
> +{
> + struct mutex *lock = data;
> +
> + mutex_destroy(lock);
> +}
> +
> static int xiaomi_wmi_probe(struct wmi_device *wdev, const void *context)
> {
> struct xiaomi_wmi *data;
> + int ret;
>
> if (wdev == NULL || context == NULL)
> return -EINVAL;
> @@ -35,6 +46,11 @@ static int xiaomi_wmi_probe(struct wmi_device *wdev, const void *context)
> return -ENOMEM;
> dev_set_drvdata(&wdev->dev, data);
>
> + mutex_init(&data->key_lock);
> + ret = devm_add_action_or_reset(&wdev->dev, xiaomi_mutex_destroy, &data->key_lock);
> + if (ret < 0)
> + return ret;
> +
> data->input_dev = devm_input_allocate_device(&wdev->dev);
> if (data->input_dev == NULL)
> return -ENOMEM;
> @@ -59,10 +75,12 @@ static void xiaomi_wmi_notify(struct wmi_device *wdev, union acpi_object *dummy)
> if (data == NULL)
> return;
>
> + mutex_lock(&data->key_lock);
> input_report_key(data->input_dev, data->key_code, 1);
> input_sync(data->input_dev);
> input_report_key(data->input_dev, data->key_code, 0);
> input_sync(data->input_dev);
> + mutex_unlock(&data->key_lock);
> }
>
> static const struct wmi_device_id xiaomi_wmi_id_table[] = {
Am 26.05.24 um 22:07 schrieb Sasha Levin:
> This is a note to let you know that I've just added the patch titled
>
> platform/x86: xiaomi-wmi: Fix race condition when reporting key events
>
> to the 5.15-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> platform-x86-xiaomi-wmi-fix-race-condition-when-repo.patch
> and it can be found in the queue-5.15 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
Hi,
the underlying race condition can only be triggered since
commit e2ffcda16290 ("ACPI: OSL: Allow Notify () handlers to run on all CPUs"), which
afaik was introduced with kernel 6.8.
Because of this, i do not think that we have to backport this commit to kernels before 6.8.
Thanks,
Armin Wolf
>
> commit 1f436551dd453c28c23f800e7273136e526197cb
> Author: Armin Wolf <W_Armin(a)gmx.de>
> Date: Tue Apr 2 16:30:57 2024 +0200
>
> platform/x86: xiaomi-wmi: Fix race condition when reporting key events
>
> [ Upstream commit 290680c2da8061e410bcaec4b21584ed951479af ]
>
> Multiple WMI events can be received concurrently, so multiple instances
> of xiaomi_wmi_notify() can be active at the same time. Since the input
> device is shared between those handlers, the key input sequence can be
> disturbed.
>
> Fix this by protecting the key input sequence with a mutex.
>
> Compile-tested only.
>
> Fixes: edb73f4f0247 ("platform/x86: wmi: add Xiaomi WMI key driver")
> Signed-off-by: Armin Wolf <W_Armin(a)gmx.de>
> Link: https://lore.kernel.org/r/20240402143059.8456-2-W_Armin@gmx.de
> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy(a)linux.intel.com>
> Reviewed-by: Hans de Goede <hdegoede(a)redhat.com>
> Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/drivers/platform/x86/xiaomi-wmi.c b/drivers/platform/x86/xiaomi-wmi.c
> index 54a2546bb93bf..be80f0bda9484 100644
> --- a/drivers/platform/x86/xiaomi-wmi.c
> +++ b/drivers/platform/x86/xiaomi-wmi.c
> @@ -2,8 +2,10 @@
> /* WMI driver for Xiaomi Laptops */
>
> #include <linux/acpi.h>
> +#include <linux/device.h>
> #include <linux/input.h>
> #include <linux/module.h>
> +#include <linux/mutex.h>
> #include <linux/wmi.h>
>
> #include <uapi/linux/input-event-codes.h>
> @@ -20,12 +22,21 @@
>
> struct xiaomi_wmi {
> struct input_dev *input_dev;
> + struct mutex key_lock; /* Protects the key event sequence */
> unsigned int key_code;
> };
>
> +static void xiaomi_mutex_destroy(void *data)
> +{
> + struct mutex *lock = data;
> +
> + mutex_destroy(lock);
> +}
> +
> static int xiaomi_wmi_probe(struct wmi_device *wdev, const void *context)
> {
> struct xiaomi_wmi *data;
> + int ret;
>
> if (wdev == NULL || context == NULL)
> return -EINVAL;
> @@ -35,6 +46,11 @@ static int xiaomi_wmi_probe(struct wmi_device *wdev, const void *context)
> return -ENOMEM;
> dev_set_drvdata(&wdev->dev, data);
>
> + mutex_init(&data->key_lock);
> + ret = devm_add_action_or_reset(&wdev->dev, xiaomi_mutex_destroy, &data->key_lock);
> + if (ret < 0)
> + return ret;
> +
> data->input_dev = devm_input_allocate_device(&wdev->dev);
> if (data->input_dev == NULL)
> return -ENOMEM;
> @@ -59,10 +75,12 @@ static void xiaomi_wmi_notify(struct wmi_device *wdev, union acpi_object *dummy)
> if (data == NULL)
> return;
>
> + mutex_lock(&data->key_lock);
> input_report_key(data->input_dev, data->key_code, 1);
> input_sync(data->input_dev);
> input_report_key(data->input_dev, data->key_code, 0);
> input_sync(data->input_dev);
> + mutex_unlock(&data->key_lock);
> }
>
> static const struct wmi_device_id xiaomi_wmi_id_table[] = {
Am 26.05.24 um 21:57 schrieb Sasha Levin:
> This is a note to let you know that I've just added the patch titled
>
> platform/x86: xiaomi-wmi: Fix race condition when reporting key events
>
> to the 6.1-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> platform-x86-xiaomi-wmi-fix-race-condition-when-repo.patch
> and it can be found in the queue-6.1 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
>
Hi,
the underlying race condition can only be triggered since
commit e2ffcda16290 ("ACPI: OSL: Allow Notify () handlers to run on all CPUs"), which
afaik was introduced with kernel 6.8.
Because of this, i do not think that we have to backport this commit to kernels before 6.8.
Thanks,
Armin Wolf
>
> commit 1abdef69265133db29772ed5cefea2338f8ce173
> Author: Armin Wolf <W_Armin(a)gmx.de>
> Date: Tue Apr 2 16:30:57 2024 +0200
>
> platform/x86: xiaomi-wmi: Fix race condition when reporting key events
>
> [ Upstream commit 290680c2da8061e410bcaec4b21584ed951479af ]
>
> Multiple WMI events can be received concurrently, so multiple instances
> of xiaomi_wmi_notify() can be active at the same time. Since the input
> device is shared between those handlers, the key input sequence can be
> disturbed.
>
> Fix this by protecting the key input sequence with a mutex.
>
> Compile-tested only.
>
> Fixes: edb73f4f0247 ("platform/x86: wmi: add Xiaomi WMI key driver")
> Signed-off-by: Armin Wolf <W_Armin(a)gmx.de>
> Link: https://lore.kernel.org/r/20240402143059.8456-2-W_Armin@gmx.de
> Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy(a)linux.intel.com>
> Reviewed-by: Hans de Goede <hdegoede(a)redhat.com>
> Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/drivers/platform/x86/xiaomi-wmi.c b/drivers/platform/x86/xiaomi-wmi.c
> index 54a2546bb93bf..be80f0bda9484 100644
> --- a/drivers/platform/x86/xiaomi-wmi.c
> +++ b/drivers/platform/x86/xiaomi-wmi.c
> @@ -2,8 +2,10 @@
> /* WMI driver for Xiaomi Laptops */
>
> #include <linux/acpi.h>
> +#include <linux/device.h>
> #include <linux/input.h>
> #include <linux/module.h>
> +#include <linux/mutex.h>
> #include <linux/wmi.h>
>
> #include <uapi/linux/input-event-codes.h>
> @@ -20,12 +22,21 @@
>
> struct xiaomi_wmi {
> struct input_dev *input_dev;
> + struct mutex key_lock; /* Protects the key event sequence */
> unsigned int key_code;
> };
>
> +static void xiaomi_mutex_destroy(void *data)
> +{
> + struct mutex *lock = data;
> +
> + mutex_destroy(lock);
> +}
> +
> static int xiaomi_wmi_probe(struct wmi_device *wdev, const void *context)
> {
> struct xiaomi_wmi *data;
> + int ret;
>
> if (wdev == NULL || context == NULL)
> return -EINVAL;
> @@ -35,6 +46,11 @@ static int xiaomi_wmi_probe(struct wmi_device *wdev, const void *context)
> return -ENOMEM;
> dev_set_drvdata(&wdev->dev, data);
>
> + mutex_init(&data->key_lock);
> + ret = devm_add_action_or_reset(&wdev->dev, xiaomi_mutex_destroy, &data->key_lock);
> + if (ret < 0)
> + return ret;
> +
> data->input_dev = devm_input_allocate_device(&wdev->dev);
> if (data->input_dev == NULL)
> return -ENOMEM;
> @@ -59,10 +75,12 @@ static void xiaomi_wmi_notify(struct wmi_device *wdev, union acpi_object *dummy)
> if (data == NULL)
> return;
>
> + mutex_lock(&data->key_lock);
> input_report_key(data->input_dev, data->key_code, 1);
> input_sync(data->input_dev);
> input_report_key(data->input_dev, data->key_code, 0);
> input_sync(data->input_dev);
> + mutex_unlock(&data->key_lock);
> }
>
> static const struct wmi_device_id xiaomi_wmi_id_table[] = {
Hi,
I noticed that since https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… (which also got backported to -stable kernels) several of messages from dmesg regarding the ATA subsystem are no longer logged.
For example, on my Dell PowerEdge 840 which has only one PATA port I used to get:
scsi host1: ata_piix
ata1: PATA max UDMA/100 cmd 0x1f0 ctl 0x3f6 bmdma 0xfc00 irq 14
ata2: PATA max UDMA/100 cmd 0x170 ctl 0x376 bmdma 0xfc08 irq 15
ata_piix 0000:00:1f.2: MAP [ P0 P2 P1 P3 ]
ata2: port disabled--ignoring
ata1.01: NODEV after polling detection
ata1.00: ATAPI: SAMSUNG CD-R/RW SW-248F, R602, max UDMA/33
After that commit, the following two log entries are missing:
ata2: port disabled--ignoring
ata1.01: NODEV after polling detection
Note that these are just examples, there are many more messages impacted by that.
Looking at the code, these messages are logged via ata_link_dbg / ata_dev_dbg:
ata_link_dbg(link, "port disabled--ignoring\n"); [in drivers/ata/libata-eh.c]
ata_dev_dbg(dev, "NODEV after polling detection\n"); [in drivers/ata/libata-core.c]
The commit change how the logging is called - ata_dev_printk function which was calling printk() directly got replaced with the following macro:
+#define ata_dev_printk(level, dev, fmt, ...) \
+ pr_ ## level("ata%u.%02u: " fmt, \
+ (dev)->link->ap->print_id, \
+ (dev)->link->pmp + (dev)->devno, \
+ ##__VA_ARGS__)
(...)
#define ata_link_dbg(link, fmt, ...) \
- ata_link_printk(link, KERN_DEBUG, fmt, ##__VA_ARGS__)
+ ata_link_printk(debug, link, fmt, ##__VA_ARGS__)
(...)
#define ata_dev_dbg(dev, fmt, ...) \
- ata_dev_printk(dev, KERN_DEBUG, fmt, ##__VA_ARGS__)
+ ata_dev_printk(debug, dev, fmt, ##__VA_ARGS__
So, instead of printk(..., level == KERN_DEBUG, ) we now call pr_debug(...). This is a problem as printk(msg, KERN_DEBUG) != pr_debug(msg).
pr_debug is defined as:
/* If you are writing a driver, please use dev_dbg instead */
#if defined(CONFIG_DYNAMIC_DEBUG) || \
(defined(CONFIG_DYNAMIC_DEBUG_CORE) && defined(DYNAMIC_DEBUG_MODULE))
#include <linux/dynamic_debug.h>
/**
* pr_debug - Print a debug-level message conditionally
* @fmt: format string
* @...: arguments for the format string
*
* This macro expands to dynamic_pr_debug() if CONFIG_DYNAMIC_DEBUG is
* set. Otherwise, if DEBUG is defined, it's equivalent to a printk with
* KERN_DEBUG loglevel. If DEBUG is not defined it does nothing.
*
* It uses pr_fmt() to generate the format string (dynamic_pr_debug() uses
* pr_fmt() internally).
*/
#define pr_debug(fmt, ...) \
dynamic_pr_debug(fmt, ##__VA_ARGS__)
#elif defined(DEBUG)
#define pr_debug(fmt, ...) \
printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#else
#define pr_debug(fmt, ...) \
no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#endif
Without CONFIG_DYNAMIC_DEBUG and if no CONFIG_DEBUG is enabled, the end result is calling no_printk which means nothing gets logged.
Looking at the code, there are more impacted calls, like for example ata_dev_dbg(dev, "disabling queued TRIM support\n") for ATA_HORKAGE_NO_NCQ_TRIM, which also seems like an important information to log, and there are more.
Was this change done intentionally? Note that most of the "debug" printks in libata code seem to be guarded by ata_msg_info / ata_msg_probe / ATA_DEBUG which was sufficient to prevent excess debug information logging.
One of the cases like this was covered in the patch:
-#ifdef ATA_DEBUG
if (status != 0xff && (status & (ATA_BUSY | ATA_DRQ)))
- ata_port_printk(ap, KERN_DEBUG, "abnormal Status 0x%X\n",
- status);
-#endif
+ ata_port_dbg(ap, "abnormal Status 0x%X\n", status);
Assuming this is the intended direction, would it make sense for now to at promote "unconditionally" logged messages from ata_link_dbg/ata_dev_dbg to ata_link_info/ata_dev_info?
Longer term, perhaps we want to revisit ata_msg_info/ata_msg_probe/ATA_DEBUG/ATA_VERBOSE_DEBUG vs ata_dev_printk/ata_link_printk/pr_debug (and maybe also pr_devel), especially that DYNAMIC_DEBUG is available these days...
Best regards,
Krzysztof Olędzki
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x f5d4e04634c9cf68bdf23de08ada0bb92e8befe7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024052609-speckled-elusive-2d6c@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f5d4e04634c9cf68bdf23de08ada0bb92e8befe7 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Date: Mon, 20 May 2024 22:26:19 +0900
Subject: [PATCH] nilfs2: fix use-after-free of timer for log writer thread
Patch series "nilfs2: fix log writer related issues".
This bug fix series covers three nilfs2 log writer-related issues,
including a timer use-after-free issue and potential deadlock issue on
unmount, and a potential freeze issue in event synchronization found
during their analysis. Details are described in each commit log.
This patch (of 3):
A use-after-free issue has been reported regarding the timer sc_timer on
the nilfs_sc_info structure.
The problem is that even though it is used to wake up a sleeping log
writer thread, sc_timer is not shut down until the nilfs_sc_info structure
is about to be freed, and is used regardless of the thread's lifetime.
Fix this issue by limiting the use of sc_timer only while the log writer
thread is alive.
Link: https://lkml.kernel.org/r/20240520132621.4054-1-konishi.ryusuke@gmail.com
Link: https://lkml.kernel.org/r/20240520132621.4054-2-konishi.ryusuke@gmail.com
Fixes: fdce895ea5dd ("nilfs2: change sc_timer from a pointer to an embedded one in struct nilfs_sc_info")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: "Bai, Shuangpeng" <sjb7183(a)psu.edu>
Closes: https://groups.google.com/g/syzkaller/c/MK_LYqtt8ko/m/8rgdWeseAwAJ
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 6be7dd423fbd..7cb34e1c9206 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2118,8 +2118,10 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
{
spin_lock(&sci->sc_state_lock);
if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
- sci->sc_timer.expires = jiffies + sci->sc_interval;
- add_timer(&sci->sc_timer);
+ if (sci->sc_task) {
+ sci->sc_timer.expires = jiffies + sci->sc_interval;
+ add_timer(&sci->sc_timer);
+ }
sci->sc_state |= NILFS_SEGCTOR_COMMIT;
}
spin_unlock(&sci->sc_state_lock);
@@ -2320,10 +2322,21 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
*/
static void nilfs_segctor_accept(struct nilfs_sc_info *sci)
{
+ bool thread_is_alive;
+
spin_lock(&sci->sc_state_lock);
sci->sc_seq_accepted = sci->sc_seq_request;
+ thread_is_alive = (bool)sci->sc_task;
spin_unlock(&sci->sc_state_lock);
- del_timer_sync(&sci->sc_timer);
+
+ /*
+ * This function does not race with the log writer thread's
+ * termination. Therefore, deleting sc_timer, which should not be
+ * done after the log writer thread exits, can be done safely outside
+ * the area protected by sc_state_lock.
+ */
+ if (thread_is_alive)
+ del_timer_sync(&sci->sc_timer);
}
/**
@@ -2349,7 +2362,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
sci->sc_flush_request &= ~FLUSH_DAT_BIT;
/* re-enable timer if checkpoint creation was not done */
- if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
+ if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && sci->sc_task &&
time_before(jiffies, sci->sc_timer.expires))
add_timer(&sci->sc_timer);
}
@@ -2539,6 +2552,7 @@ static int nilfs_segctor_thread(void *arg)
int timeout = 0;
sci->sc_timer_task = current;
+ timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
/* start sync. */
sci->sc_task = current;
@@ -2606,6 +2620,7 @@ static int nilfs_segctor_thread(void *arg)
end_thread:
/* end sync. */
sci->sc_task = NULL;
+ timer_shutdown_sync(&sci->sc_timer);
wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
spin_unlock(&sci->sc_state_lock);
return 0;
@@ -2669,7 +2684,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
INIT_LIST_HEAD(&sci->sc_gc_inodes);
INIT_LIST_HEAD(&sci->sc_iput_queue);
INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
- timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
@@ -2748,7 +2762,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
down_write(&nilfs->ns_segctor_sem);
- timer_shutdown_sync(&sci->sc_timer);
kfree(sci);
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x f5d4e04634c9cf68bdf23de08ada0bb92e8befe7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024052606-skater-friction-3021@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f5d4e04634c9cf68bdf23de08ada0bb92e8befe7 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Date: Mon, 20 May 2024 22:26:19 +0900
Subject: [PATCH] nilfs2: fix use-after-free of timer for log writer thread
Patch series "nilfs2: fix log writer related issues".
This bug fix series covers three nilfs2 log writer-related issues,
including a timer use-after-free issue and potential deadlock issue on
unmount, and a potential freeze issue in event synchronization found
during their analysis. Details are described in each commit log.
This patch (of 3):
A use-after-free issue has been reported regarding the timer sc_timer on
the nilfs_sc_info structure.
The problem is that even though it is used to wake up a sleeping log
writer thread, sc_timer is not shut down until the nilfs_sc_info structure
is about to be freed, and is used regardless of the thread's lifetime.
Fix this issue by limiting the use of sc_timer only while the log writer
thread is alive.
Link: https://lkml.kernel.org/r/20240520132621.4054-1-konishi.ryusuke@gmail.com
Link: https://lkml.kernel.org/r/20240520132621.4054-2-konishi.ryusuke@gmail.com
Fixes: fdce895ea5dd ("nilfs2: change sc_timer from a pointer to an embedded one in struct nilfs_sc_info")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: "Bai, Shuangpeng" <sjb7183(a)psu.edu>
Closes: https://groups.google.com/g/syzkaller/c/MK_LYqtt8ko/m/8rgdWeseAwAJ
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 6be7dd423fbd..7cb34e1c9206 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2118,8 +2118,10 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
{
spin_lock(&sci->sc_state_lock);
if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
- sci->sc_timer.expires = jiffies + sci->sc_interval;
- add_timer(&sci->sc_timer);
+ if (sci->sc_task) {
+ sci->sc_timer.expires = jiffies + sci->sc_interval;
+ add_timer(&sci->sc_timer);
+ }
sci->sc_state |= NILFS_SEGCTOR_COMMIT;
}
spin_unlock(&sci->sc_state_lock);
@@ -2320,10 +2322,21 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
*/
static void nilfs_segctor_accept(struct nilfs_sc_info *sci)
{
+ bool thread_is_alive;
+
spin_lock(&sci->sc_state_lock);
sci->sc_seq_accepted = sci->sc_seq_request;
+ thread_is_alive = (bool)sci->sc_task;
spin_unlock(&sci->sc_state_lock);
- del_timer_sync(&sci->sc_timer);
+
+ /*
+ * This function does not race with the log writer thread's
+ * termination. Therefore, deleting sc_timer, which should not be
+ * done after the log writer thread exits, can be done safely outside
+ * the area protected by sc_state_lock.
+ */
+ if (thread_is_alive)
+ del_timer_sync(&sci->sc_timer);
}
/**
@@ -2349,7 +2362,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
sci->sc_flush_request &= ~FLUSH_DAT_BIT;
/* re-enable timer if checkpoint creation was not done */
- if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
+ if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && sci->sc_task &&
time_before(jiffies, sci->sc_timer.expires))
add_timer(&sci->sc_timer);
}
@@ -2539,6 +2552,7 @@ static int nilfs_segctor_thread(void *arg)
int timeout = 0;
sci->sc_timer_task = current;
+ timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
/* start sync. */
sci->sc_task = current;
@@ -2606,6 +2620,7 @@ static int nilfs_segctor_thread(void *arg)
end_thread:
/* end sync. */
sci->sc_task = NULL;
+ timer_shutdown_sync(&sci->sc_timer);
wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
spin_unlock(&sci->sc_state_lock);
return 0;
@@ -2669,7 +2684,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
INIT_LIST_HEAD(&sci->sc_gc_inodes);
INIT_LIST_HEAD(&sci->sc_iput_queue);
INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
- timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
@@ -2748,7 +2762,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
down_write(&nilfs->ns_segctor_sem);
- timer_shutdown_sync(&sci->sc_timer);
kfree(sci);
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x f5d4e04634c9cf68bdf23de08ada0bb92e8befe7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024052605-pretext-jugular-5085@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f5d4e04634c9cf68bdf23de08ada0bb92e8befe7 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Date: Mon, 20 May 2024 22:26:19 +0900
Subject: [PATCH] nilfs2: fix use-after-free of timer for log writer thread
Patch series "nilfs2: fix log writer related issues".
This bug fix series covers three nilfs2 log writer-related issues,
including a timer use-after-free issue and potential deadlock issue on
unmount, and a potential freeze issue in event synchronization found
during their analysis. Details are described in each commit log.
This patch (of 3):
A use-after-free issue has been reported regarding the timer sc_timer on
the nilfs_sc_info structure.
The problem is that even though it is used to wake up a sleeping log
writer thread, sc_timer is not shut down until the nilfs_sc_info structure
is about to be freed, and is used regardless of the thread's lifetime.
Fix this issue by limiting the use of sc_timer only while the log writer
thread is alive.
Link: https://lkml.kernel.org/r/20240520132621.4054-1-konishi.ryusuke@gmail.com
Link: https://lkml.kernel.org/r/20240520132621.4054-2-konishi.ryusuke@gmail.com
Fixes: fdce895ea5dd ("nilfs2: change sc_timer from a pointer to an embedded one in struct nilfs_sc_info")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: "Bai, Shuangpeng" <sjb7183(a)psu.edu>
Closes: https://groups.google.com/g/syzkaller/c/MK_LYqtt8ko/m/8rgdWeseAwAJ
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 6be7dd423fbd..7cb34e1c9206 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2118,8 +2118,10 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
{
spin_lock(&sci->sc_state_lock);
if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
- sci->sc_timer.expires = jiffies + sci->sc_interval;
- add_timer(&sci->sc_timer);
+ if (sci->sc_task) {
+ sci->sc_timer.expires = jiffies + sci->sc_interval;
+ add_timer(&sci->sc_timer);
+ }
sci->sc_state |= NILFS_SEGCTOR_COMMIT;
}
spin_unlock(&sci->sc_state_lock);
@@ -2320,10 +2322,21 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
*/
static void nilfs_segctor_accept(struct nilfs_sc_info *sci)
{
+ bool thread_is_alive;
+
spin_lock(&sci->sc_state_lock);
sci->sc_seq_accepted = sci->sc_seq_request;
+ thread_is_alive = (bool)sci->sc_task;
spin_unlock(&sci->sc_state_lock);
- del_timer_sync(&sci->sc_timer);
+
+ /*
+ * This function does not race with the log writer thread's
+ * termination. Therefore, deleting sc_timer, which should not be
+ * done after the log writer thread exits, can be done safely outside
+ * the area protected by sc_state_lock.
+ */
+ if (thread_is_alive)
+ del_timer_sync(&sci->sc_timer);
}
/**
@@ -2349,7 +2362,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
sci->sc_flush_request &= ~FLUSH_DAT_BIT;
/* re-enable timer if checkpoint creation was not done */
- if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
+ if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && sci->sc_task &&
time_before(jiffies, sci->sc_timer.expires))
add_timer(&sci->sc_timer);
}
@@ -2539,6 +2552,7 @@ static int nilfs_segctor_thread(void *arg)
int timeout = 0;
sci->sc_timer_task = current;
+ timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
/* start sync. */
sci->sc_task = current;
@@ -2606,6 +2620,7 @@ static int nilfs_segctor_thread(void *arg)
end_thread:
/* end sync. */
sci->sc_task = NULL;
+ timer_shutdown_sync(&sci->sc_timer);
wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
spin_unlock(&sci->sc_state_lock);
return 0;
@@ -2669,7 +2684,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
INIT_LIST_HEAD(&sci->sc_gc_inodes);
INIT_LIST_HEAD(&sci->sc_iput_queue);
INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
- timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
@@ -2748,7 +2762,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
down_write(&nilfs->ns_segctor_sem);
- timer_shutdown_sync(&sci->sc_timer);
kfree(sci);
}
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x f5d4e04634c9cf68bdf23de08ada0bb92e8befe7
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024052603-probing-percolate-6265@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f5d4e04634c9cf68bdf23de08ada0bb92e8befe7 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Date: Mon, 20 May 2024 22:26:19 +0900
Subject: [PATCH] nilfs2: fix use-after-free of timer for log writer thread
Patch series "nilfs2: fix log writer related issues".
This bug fix series covers three nilfs2 log writer-related issues,
including a timer use-after-free issue and potential deadlock issue on
unmount, and a potential freeze issue in event synchronization found
during their analysis. Details are described in each commit log.
This patch (of 3):
A use-after-free issue has been reported regarding the timer sc_timer on
the nilfs_sc_info structure.
The problem is that even though it is used to wake up a sleeping log
writer thread, sc_timer is not shut down until the nilfs_sc_info structure
is about to be freed, and is used regardless of the thread's lifetime.
Fix this issue by limiting the use of sc_timer only while the log writer
thread is alive.
Link: https://lkml.kernel.org/r/20240520132621.4054-1-konishi.ryusuke@gmail.com
Link: https://lkml.kernel.org/r/20240520132621.4054-2-konishi.ryusuke@gmail.com
Fixes: fdce895ea5dd ("nilfs2: change sc_timer from a pointer to an embedded one in struct nilfs_sc_info")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: "Bai, Shuangpeng" <sjb7183(a)psu.edu>
Closes: https://groups.google.com/g/syzkaller/c/MK_LYqtt8ko/m/8rgdWeseAwAJ
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 6be7dd423fbd..7cb34e1c9206 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2118,8 +2118,10 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
{
spin_lock(&sci->sc_state_lock);
if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
- sci->sc_timer.expires = jiffies + sci->sc_interval;
- add_timer(&sci->sc_timer);
+ if (sci->sc_task) {
+ sci->sc_timer.expires = jiffies + sci->sc_interval;
+ add_timer(&sci->sc_timer);
+ }
sci->sc_state |= NILFS_SEGCTOR_COMMIT;
}
spin_unlock(&sci->sc_state_lock);
@@ -2320,10 +2322,21 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
*/
static void nilfs_segctor_accept(struct nilfs_sc_info *sci)
{
+ bool thread_is_alive;
+
spin_lock(&sci->sc_state_lock);
sci->sc_seq_accepted = sci->sc_seq_request;
+ thread_is_alive = (bool)sci->sc_task;
spin_unlock(&sci->sc_state_lock);
- del_timer_sync(&sci->sc_timer);
+
+ /*
+ * This function does not race with the log writer thread's
+ * termination. Therefore, deleting sc_timer, which should not be
+ * done after the log writer thread exits, can be done safely outside
+ * the area protected by sc_state_lock.
+ */
+ if (thread_is_alive)
+ del_timer_sync(&sci->sc_timer);
}
/**
@@ -2349,7 +2362,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
sci->sc_flush_request &= ~FLUSH_DAT_BIT;
/* re-enable timer if checkpoint creation was not done */
- if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
+ if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && sci->sc_task &&
time_before(jiffies, sci->sc_timer.expires))
add_timer(&sci->sc_timer);
}
@@ -2539,6 +2552,7 @@ static int nilfs_segctor_thread(void *arg)
int timeout = 0;
sci->sc_timer_task = current;
+ timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
/* start sync. */
sci->sc_task = current;
@@ -2606,6 +2620,7 @@ static int nilfs_segctor_thread(void *arg)
end_thread:
/* end sync. */
sci->sc_task = NULL;
+ timer_shutdown_sync(&sci->sc_timer);
wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
spin_unlock(&sci->sc_state_lock);
return 0;
@@ -2669,7 +2684,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
INIT_LIST_HEAD(&sci->sc_gc_inodes);
INIT_LIST_HEAD(&sci->sc_iput_queue);
INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
- timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
@@ -2748,7 +2762,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
down_write(&nilfs->ns_segctor_sem);
- timer_shutdown_sync(&sci->sc_timer);
kfree(sci);
}
commit cd94d1b182d2 ("dm/amd/pm: Fix problems with reboot/shutdown for
some SMU 13.0.4/13.0.11 users") attempted to fix shutdown issues
that were reported since commit 31729e8c21ec ("drm/amd/pm: fixes a
random hang in S4 for SMU v13.0.4/11") but caused issues for some
people.
Adjust the workaround flow to properly only apply in the S4 case:
-> For shutdown go through SMU_MSG_PrepareMp1ForUnload
-> For S4 go through SMU_MSG_GfxDeviceDriverReset and
SMU_MSG_PrepareMp1ForUnload
Reported-and-tested-by: lectrode <electrodexsnet(a)gmail.com>
Closes: https://github.com/void-linux/void-packages/issues/50417
Cc: stable(a)vger.kernel.org
Fixes: cd94d1b182d2 ("dm/amd/pm: Fix problems with reboot/shutdown for some SMU 13.0.4/13.0.11 users")
Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com>
---
Cc: regressions(a)lists.linux.dev
---
.../drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 20 ++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index 4abfcd32747d..c7ab0d7027d9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -226,15 +226,17 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en)
struct amdgpu_device *adev = smu->adev;
int ret = 0;
- if (!en && adev->in_s4) {
- /* Adds a GFX reset as workaround just before sending the
- * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering
- * an invalid state.
- */
- ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
- SMU_RESET_MODE_2, NULL);
- if (ret)
- return ret;
+ if (!en && !adev->in_s0ix) {
+ if (adev->in_s4) {
+ /* Adds a GFX reset as workaround just before sending the
+ * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering
+ * an invalid state.
+ */
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
+ SMU_RESET_MODE_2, NULL);
+ if (ret)
+ return ret;
+ }
ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
}
--
2.43.0
FWW this is the output from faddr2line for Linux 6.8.11:
$ scripts/faddr2line --list vmlinux blk_try_enter_queue+0xc/0x75
blk_try_enter_queue+0xc/0x75:
__ref_is_percpu at include/linux/percpu-refcount.h:174 (discriminator 2)
169 * READ_ONCE() is required when fetching it.
170 *
171 * The dependency ordering from the READ_ONCE() pairs
172 * with smp_store_release() in __percpu_ref_switch_to_percpu().
173 */
>174< percpu_ptr = READ_ONCE(ref->percpu_count_ptr);
175
176 /*
177 * Theoretically, the following could test just ATOMIC; however,
178 * then we'd have to mask off DEAD separately as DEAD may be
179 * visible without ATOMIC if we race with percpu_ref_kill(). DEAD
(inlined by) percpu_ref_tryget_live_rcu at
include/linux/percpu-refcount.h:282 (discriminator 2)
277 unsigned long __percpu *percpu_count;
278 bool ret = false;
279
280 WARN_ON_ONCE(!rcu_read_lock_held());
281
>282< if (likely(__ref_is_percpu(ref, &percpu_count))) {
283 this_cpu_inc(*percpu_count);
284 ret = true;
285 } else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) {
286 ret = atomic_long_inc_not_zero(&ref->data->count);
287 }
(inlined by) blk_try_enter_queue at block/blk.h:43 (discriminator 2)
38 void submit_bio_noacct_nocheck(struct bio *bio);
39
40 static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
41 {
42 rcu_read_lock();
>43< if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter))
44 goto fail;
45
46 /*
47 * The code that increments the pm_only counter must ensure that the
48 * counter is globally visible before the queue is unfrozen.