There are two deadlock scenarios that need addressing, which cause problems when the computer goes to sleep, the interface is set down, and hwrng_unregister() is called. When the deadlock is hit, sleep is delayed for tens of seconds, causing it to fail. These scenarios are:
1) The hwrng kthread can't be stopped while it's sleeping, because it uses msleep_interruptible() instead of schedule_timeout_interruptible(). The fix is a simple moving to the correct function. At the same time, we should cleanup a common and useless dmesg splat in the same area.
2) A normal user thread can't be interrupted by hwrng_unregister() while it's sleeping, because hwrng_unregister() is called from elsewhere. The solution here is to keep track of which thread is currently reading, and asleep, and signal that thread when it's time to unregister. There's a bit of book keeping required to prevent lifetime issues on current.
Cc: stable@vger.kernel.org Reported-by: Gregory Erwin gregerwin256@gmail.com Tested-by: Gregory Erwin gregerwin256@gmail.com Acked-by: Toke Høiland-Jørgensen toke@toke.dk Acked-by: Herbert Xu herbert@gondor.apana.org.au Acked-by: Eric W. Biederman ebiederm@xmission.com Fixes: fcd09c90c3c5 ("ath9k: use hw_random API instead of directly dumping into random.c") Link: https://lore.kernel.org/all/CAO+Okf6ZJC5-nTE_EJUGQtd8JiCkiEHytGgDsFGTEjs0c00... Link: https://lore.kernel.org/lkml/CAO+Okf5k+C+SE6pMVfPf-d8MfVPVq4PO7EY8Hys_DVXten... Link: https://bugs.archlinux.org/task/75138 Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com --- Kalle - I'm resending this to you with the various acks collected from the long sprawling thread, in hopes that this actually makes it into your tree. Please let me know if you *don't* want to take this. -Jason
drivers/char/hw_random/core.c | 30 ++++++++++++++++++++++++---- drivers/net/wireless/ath/ath9k/rng.c | 19 +++++++----------- include/linux/sched.h | 1 + include/linux/sched/signal.h | 2 +- kernel/sched/core.c | 6 ++++++ 5 files changed, 41 insertions(+), 17 deletions(-)
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 16f227b995e8..df45c265878e 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -38,6 +38,8 @@ static LIST_HEAD(rng_list); static DEFINE_MUTEX(rng_mutex); /* Protects rng read functions, data_avail, rng_buffer and rng_fillbuf */ static DEFINE_MUTEX(reading_mutex); +/* Keeps track of whoever is wait-reading it currently while holding reading_mutex. */ +static struct task_struct *current_waiting_reader; static int data_avail; static u8 *rng_buffer, *rng_fillbuf; static unsigned short current_quality; @@ -208,6 +210,7 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, int err = 0; int bytes_read, len; struct hwrng *rng; + bool wait;
while (size) { rng = get_current_rng(); @@ -225,9 +228,15 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, goto out_put; } if (!data_avail) { + wait = !(filp->f_flags & O_NONBLOCK); + if (wait && cmpxchg(¤t_waiting_reader, NULL, current) != NULL) { + err = -EINTR; + goto out_unlock_reading; + } bytes_read = rng_get_data(rng, rng_buffer, - rng_buffer_size(), - !(filp->f_flags & O_NONBLOCK)); + rng_buffer_size(), wait); + if (wait && cmpxchg(¤t_waiting_reader, current, NULL) != current) + synchronize_rcu(); if (bytes_read < 0) { err = bytes_read; goto out_unlock_reading; @@ -513,8 +522,9 @@ static int hwrng_fillfn(void *unused) break;
if (rc <= 0) { - pr_warn("hwrng: no data available\n"); - msleep_interruptible(10000); + if (kthread_should_stop()) + break; + schedule_timeout_interruptible(HZ * 10); continue; }
@@ -608,13 +618,21 @@ int hwrng_register(struct hwrng *rng) } EXPORT_SYMBOL_GPL(hwrng_register);
+#define UNREGISTERING_READER ((void *)~0UL) + void hwrng_unregister(struct hwrng *rng) { struct hwrng *old_rng, *new_rng; + struct task_struct *waiting_reader; int err;
mutex_lock(&rng_mutex);
+ rcu_read_lock(); + waiting_reader = xchg(¤t_waiting_reader, UNREGISTERING_READER); + if (waiting_reader && waiting_reader != UNREGISTERING_READER) + set_notify_signal(waiting_reader); + rcu_read_unlock(); old_rng = current_rng; list_del(&rng->list); if (current_rng == rng) { @@ -640,6 +658,10 @@ void hwrng_unregister(struct hwrng *rng) }
wait_for_completion(&rng->cleanup_done); + + mutex_lock(&rng_mutex); + cmpxchg(¤t_waiting_reader, UNREGISTERING_READER, NULL); + mutex_unlock(&rng_mutex); } EXPORT_SYMBOL_GPL(hwrng_unregister);
diff --git a/drivers/net/wireless/ath/ath9k/rng.c b/drivers/net/wireless/ath/ath9k/rng.c index cb5414265a9b..8980dc36509e 100644 --- a/drivers/net/wireless/ath/ath9k/rng.c +++ b/drivers/net/wireless/ath/ath9k/rng.c @@ -52,18 +52,13 @@ static int ath9k_rng_data_read(struct ath_softc *sc, u32 *buf, u32 buf_size) return j << 2; }
-static u32 ath9k_rng_delay_get(u32 fail_stats) +static unsigned long ath9k_rng_delay_get(u32 fail_stats) { - u32 delay; - if (fail_stats < 100) - delay = 10; + return HZ / 100; else if (fail_stats < 105) - delay = 1000; - else - delay = 10000; - - return delay; + return HZ; + return HZ * 10; }
static int ath9k_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) @@ -80,10 +75,10 @@ static int ath9k_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) bytes_read += max & 3UL; memzero_explicit(&word, sizeof(word)); } - if (!wait || !max || likely(bytes_read) || fail_stats > 110) + if (!wait || !max || likely(bytes_read) || fail_stats > 110 || + ((current->flags & PF_KTHREAD) && kthread_should_stop()) || + schedule_timeout_interruptible(ath9k_rng_delay_get(++fail_stats))) break; - - msleep_interruptible(ath9k_rng_delay_get(++fail_stats)); }
if (wait && !bytes_read && max) diff --git a/include/linux/sched.h b/include/linux/sched.h index c46f3a63b758..f164098fb614 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1936,6 +1936,7 @@ extern struct task_struct *find_get_task_by_vpid(pid_t nr);
extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); +extern int wake_up_task_interruptible(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk);
#ifdef CONFIG_SMP diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index cafbe03eed01..56a15f35e7b3 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -364,7 +364,7 @@ static inline void clear_notify_signal(void) static inline bool __set_notify_signal(struct task_struct *task) { return !test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) && - !wake_up_state(task, TASK_INTERRUPTIBLE); + !wake_up_task_interruptible(task); }
/* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index da0bf6fe9ecd..b178940185d7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4280,6 +4280,12 @@ int wake_up_process(struct task_struct *p) } EXPORT_SYMBOL(wake_up_process);
+int wake_up_task_interruptible(struct task_struct *p) +{ + return try_to_wake_up(p, TASK_INTERRUPTIBLE, 0); +} +EXPORT_SYMBOL_GPL(wake_up_task_interruptible); + int wake_up_state(struct task_struct *p, unsigned int state) { return try_to_wake_up(p, state, 0);
On Mon, Jul 25, 2022 at 11:55:36PM +0200, Jason A. Donenfeld wrote:
Kalle - I'm resending this to you with the various acks collected from the long sprawling thread, in hopes that this actually makes it into your tree. Please let me know if you *don't* want to take this. -Jason
Hi Jason:
Thanks for all your effort in resolving this issue.
I think Valentin's concern is valid though. The sleep/wakeup paradigm in this patch-set is slightly unusual.
So what I've done is taken your latest patch, and incorporated Valentin's suggestions on top of it. I don't think there is an issue with other drivers as neither approach really changes them.
While doing so I've found a little problem with your patch.
@@ -608,13 +618,21 @@ int hwrng_register(struct hwrng *rng) } EXPORT_SYMBOL_GPL(hwrng_register); +#define UNREGISTERING_READER ((void *)~0UL)
void hwrng_unregister(struct hwrng *rng) { struct hwrng *old_rng, *new_rng;
- struct task_struct *waiting_reader; int err;
mutex_lock(&rng_mutex);
- rcu_read_lock();
- waiting_reader = xchg(¤t_waiting_reader, UNREGISTERING_READER);
- if (waiting_reader && waiting_reader != UNREGISTERING_READER)
set_notify_signal(waiting_reader);
- rcu_read_unlock(); old_rng = current_rng; list_del(&rng->list); if (current_rng == rng) {
@@ -640,6 +658,10 @@ void hwrng_unregister(struct hwrng *rng) } wait_for_completion(&rng->cleanup_done);
- mutex_lock(&rng_mutex);
- cmpxchg(¤t_waiting_reader, UNREGISTERING_READER, NULL);
- mutex_unlock(&rng_mutex);
In between the setting and clearing of current_waiting_reader, a reader that gets a new RNG may fail simply because it detected the value of UNREGITERING_READER.
So I've fixed this by using a different mechanism to detect whether an RNG is going away, namely list_empty on rng->list. In doing so we no longer need to modify current_waiting_reader in hwrng_unregister and therefore it no longer needs to be modified using xchg.
---8<--- From: Jason A. Donenfeld Jason@zx2c4.com
There are two deadlock scenarios that need addressing, which cause problems when the computer goes to sleep, the interface is set down, and hwrng_unregister() is called. When the deadlock is hit, sleep is delayed for tens of seconds, causing it to fail. These scenarios are:
1) The hwrng kthread can't be stopped while it's sleeping, because it uses msleep_interruptible() instead of schedule_timeout(). The fix is a simple moving to the correct function. At the same time, we should cleanup a common and useless dmesg splat in the same area.
2) A normal user thread can't be interrupted by hwrng_unregister() while it's sleeping, because hwrng_unregister() is called from elsewhere. The solution here is to keep track of which thread is currently reading, and asleep, and signal that thread when it's time to unregister. There's a bit of book keeping required to prevent lifetime issues on current.
Cc: stable@vger.kernel.org Reported-by: Gregory Erwin gregerwin256@gmail.com Fixes: fcd09c90c3c5 ("ath9k: use hw_random API instead of directly dumping into random.c") Link: https://lore.kernel.org/all/CAO+Okf6ZJC5-nTE_EJUGQtd8JiCkiEHytGgDsFGTEjs0c00... Link: https://lore.kernel.org/lkml/CAO+Okf5k+C+SE6pMVfPf-d8MfVPVq4PO7EY8Hys_DVXten... Link: https://bugs.archlinux.org/task/75138 Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 16f227b995e8..4f254b65ce0b 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -10,6 +10,7 @@ * of the GNU General Public License, incorporated herein by reference. */
+#include <linux/atomic.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/err.h> @@ -22,6 +23,7 @@ #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/random.h> +#include <linux/rcupdate.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/uaccess.h> @@ -38,6 +40,8 @@ static LIST_HEAD(rng_list); static DEFINE_MUTEX(rng_mutex); /* Protects rng read functions, data_avail, rng_buffer and rng_fillbuf */ static DEFINE_MUTEX(reading_mutex); +/* Keeps track of whoever is wait-reading it currently while holding reading_mutex. */ +static struct task_struct *current_waiting_reader; static int data_avail; static u8 *rng_buffer, *rng_fillbuf; static unsigned short current_quality; @@ -57,6 +61,23 @@ static void hwrng_manage_rngd(struct hwrng *rng); static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, int wait);
+/* rng_dying without a barrier. Use this if there is a barrier elsewhere. */ +static inline bool __rng_dying(struct hwrng *rng) +{ + return list_empty(&rng->list); +} + +static inline bool rng_dying(struct hwrng *rng) +{ + /* + * This barrier pairs with the one in + * hwrng_unregister. This ensures that + * we see any attempt to unregister rng. + */ + smp_mb(); + return list_empty(&rng->list); +} + static size_t rng_buffer_size(void) { return SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES; @@ -204,6 +225,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, static ssize_t rng_dev_read(struct file *filp, char __user *buf, size_t size, loff_t *offp) { + int synch = false; ssize_t ret = 0; int err = 0; int bytes_read, len; @@ -225,9 +247,16 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, goto out_put; } if (!data_avail) { + bool wait = !(filp->f_flags & O_NONBLOCK); + + if (wait) + current_waiting_reader = current; bytes_read = rng_get_data(rng, rng_buffer, - rng_buffer_size(), - !(filp->f_flags & O_NONBLOCK)); + rng_buffer_size(), wait); + if (wait) { + current_waiting_reader = NULL; + synch |= rng_dying(rng); + } if (bytes_read < 0) { err = bytes_read; goto out_unlock_reading; @@ -269,6 +298,9 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, } } out: + if (synch) + synchronize_rcu(); + return ret ? : err;
out_unlock_reading: @@ -501,20 +533,26 @@ static int hwrng_fillfn(void *unused) if (IS_ERR(rng) || !rng) break; mutex_lock(&reading_mutex); + current_waiting_reader = current; rc = rng_get_data(rng, rng_fillbuf, rng_buffer_size(), 1); + current_waiting_reader = NULL; if (current_quality != rng->quality) rng->quality = current_quality; /* obsolete */ quality = rng->quality; mutex_unlock(&reading_mutex); + if (rng_dying(rng)) + synchronize_rcu(); put_rng(rng);
if (!quality) break;
if (rc <= 0) { - pr_warn("hwrng: no data available\n"); - msleep_interruptible(10000); + set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + __set_current_state(TASK_RUNNING); + schedule_timeout(10 * HZ); continue; }
@@ -616,8 +654,22 @@ void hwrng_unregister(struct hwrng *rng) mutex_lock(&rng_mutex);
old_rng = current_rng; - list_del(&rng->list); + list_del_init(&rng->list); if (current_rng == rng) { + struct task_struct *waiting_reader; + + /* + * Ensure that rng->list is cleared before the + * subsequent read of current_waiting_reader. + */ + smp_mb(); + + rcu_read_lock(); + waiting_reader = current_waiting_reader; + if (waiting_reader) + wake_up_process(waiting_reader); + rcu_read_unlock(); + err = enable_best_rng(); if (err) { drop_current_rng(); @@ -685,6 +737,17 @@ void devm_hwrng_unregister(struct device *dev, struct hwrng *rng) } EXPORT_SYMBOL_GPL(devm_hwrng_unregister);
+long hwrng_msleep(struct hwrng *rng, unsigned int msecs) +{ + set_current_state(TASK_INTERRUPTIBLE); + + if (__rng_dying(rng)) + __set_current_state(TASK_RUNNING); + + return schedule_timeout(msecs); +} +EXPORT_SYMBOL_GPL(hwrng_msleep); + static int __init hwrng_modinit(void) { int ret; diff --git a/drivers/net/wireless/ath/ath9k/rng.c b/drivers/net/wireless/ath/ath9k/rng.c index cb5414265a9b..58c0ab01771b 100644 --- a/drivers/net/wireless/ath/ath9k/rng.c +++ b/drivers/net/wireless/ath/ath9k/rng.c @@ -83,7 +83,8 @@ static int ath9k_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) if (!wait || !max || likely(bytes_read) || fail_stats > 110) break;
- msleep_interruptible(ath9k_rng_delay_get(++fail_stats)); + if (hwrng_msleep(rng, ath9k_rng_delay_get(++fail_stats))) + break; }
if (wait && !bytes_read && max) diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index aa1d4da03538..021a971bfd68 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -61,4 +61,6 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng);
+extern long hwrng_msleep(struct hwrng *rng, unsigned int msecs); + #endif /* LINUX_HWRANDOM_H_ */
Hi Herbert,
On Tue, Jul 26, 2022 at 05:44:41PM +0800, Herbert Xu wrote:
Thanks for all your effort in resolving this issue.
I think Valentin's concern is valid though. The sleep/wakeup paradigm in this patch-set is slightly unusual.
So what I've done is taken your latest patch, and incorporated Valentin's suggestions on top of it. I don't think there is an issue with other drivers as neither approach really changes them.
Thanks so much for taking charge of this patch. I really, really appreciate it. I'm also glad that we now have a working implementation of Valentin's suggestion.
Just two small notes: - I had a hard time testing everything because I don't actually have an ath9k, so I wound up playing with variations on https://xn--4db.cc/vnRj8zQw/diff in case that helps. I assume you've got your own way of testing things, but in case not, maybe that diff is useful. - I'll mark this patch as "other tree" in the wireless tree's patchwork now that you're on board so Kalle doesn't have to deal with it.
In between the setting and clearing of current_waiting_reader, a reader that gets a new RNG may fail simply because it detected the value of UNREGITERING_READER.
Yea, I actually didn't consider this a bug, but just "nothing starts until everything else is totally done" semantics. Not wanting those semantics is understandable. I haven't looked in detail at how you've done that safely without locking issues, but if you've tested it, then I trust it works. When I was playing around with different things, I encountered a number of locking issues, depending on what the last locker was - a user space thread, the rng kthread, or something opening up a reader afresh. So just be sure the various combinations still work.
Also, I like the encapsulation you did with hwrng_msleep(). That's a very clean way of accomplishing what Valentin suggested, a lot cleaner than the gunk I had in mind would be required. It also opens up a different approach for fixing this, which is what I would have preferred from the beginning, but the prereq kernel work hadn't landed yet.
Specifically, Instead of doing all of this task interruption stuff and keeping track of readers and using RCU and all that fragile code, we can instead just use wait_completion_interruptible_timeout() inside of hwrng_msleep(), using a condition variable that's private to the hwrng. Then, when it's time for all the readers to quit, we just set the condition! Ta-da, no need for keeping track of who's reading, the difference between a kthread and a normal thread, and a variety of other concerns.
That won't be possible until 5.20, though, and this patch is needed to fix earlier kernels, so the intermediate step here is still required. But please keep this on the back burner of your mind. The 5.20 enablement patch for that is here:
https://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git/...
When developing that patch, I had used my little test harness above and hwrng to test that it worked as intended. But I hadn't thought of an elegant way of plumbing the condition through without changing up tons of callsites. However, now you've come up with this hwrng_msleep() solution to that. So now all the puzzle pieces are ready for a 5.20 fix moving this toward the nicer condition setup.
A few odd unimportant comments below:
+/* rng_dying without a barrier. Use this if there is a barrier elsewhere. */ +static inline bool __rng_dying(struct hwrng *rng) +{
- return list_empty(&rng->list);
+}
+static inline bool rng_dying(struct hwrng *rng) +{
- /*
* This barrier pairs with the one in
* hwrng_unregister. This ensures that
* we see any attempt to unregister rng.
*/
- smp_mb();
- return list_empty(&rng->list);
Unimportant nit: could call __rng_dying() instead so these don't diverge by accident.
+}
static size_t rng_buffer_size(void) { return SMP_CACHE_BYTES < 32 ? 32 : SMP_CACHE_BYTES; @@ -204,6 +225,7 @@ static inline int rng_get_data(struct hwrng *rng, u8 *buffer, size_t size, static ssize_t rng_dev_read(struct file *filp, char __user *buf, size_t size, loff_t *offp) {
- int synch = false; ssize_t ret = 0; int err = 0; int bytes_read, len;
@@ -225,9 +247,16 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, goto out_put; } if (!data_avail) {
bool wait = !(filp->f_flags & O_NONBLOCK);
if (wait)
current_waiting_reader = current; bytes_read = rng_get_data(rng, rng_buffer,
rng_buffer_size(),
!(filp->f_flags & O_NONBLOCK));
rng_buffer_size(), wait);
if (wait) {
current_waiting_reader = NULL;
synch |= rng_dying(rng);
} if (bytes_read < 0) { err = bytes_read; goto out_unlock_reading;
@@ -269,6 +298,9 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, } } out:
- if (synch)
synchronize_rcu();
The synch usage no longer makes sense to me here. In my version, synchronize_rcu() would almost never be called. It's only purpose was to prevent rng_dev_read() from returning if the critical section inside of hwrng_unregister() was in flight. But now it looks like it will be called everytime there are no RNGs left? Or maybe I understood how this works. Either way, please don't feel that you have to write back an explanation; just make sure it has those same sorts of semantics when testing.
if (rc <= 0) {
pr_warn("hwrng: no data available\n");
msleep_interruptible(10000);
set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop())
__set_current_state(TASK_RUNNING);
schedule_timeout(10 * HZ); continue; }
Here you made a change whose utility I don't understand. My original hunk was:
+ if (kthread_should_stop()) + break; + schedule_timeout_interruptible(HZ * 10);
Which I think is a bit cleaner, as schedule_timeout_interruptible sets the state to INTERRUPTIBLE and such.
Regards, Jason
On 26/07/22 12:17, Jason A. Donenfeld wrote:
if (rc <= 0) {
pr_warn("hwrng: no data available\n");
msleep_interruptible(10000);
set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop())
__set_current_state(TASK_RUNNING);
schedule_timeout(10 * HZ); continue; }
Here you made a change whose utility I don't understand. My original hunk was:
if (kthread_should_stop())
break;
schedule_timeout_interruptible(HZ * 10);
Which I think is a bit cleaner, as schedule_timeout_interruptible sets the state to INTERRUPTIBLE and such.
For any sort of wait loop, you need the state write to happen before the loop-break condition is checked.
Consider:
READ kthread_should_stop() == false kthread_stop() set_bit(KTHREAD_SHOULD_STOP, &kthread->flags); wake_up_process(k); // Reads TASK_RUNNING, schedule_timeout_interruptible(); // does nothing // We're now blocked, having missed a wakeup
That's why the canonical wait loop pattern is:
for (;;) { set_current_state(TASK_UNINTERRUPTIBLE);
if (CONDITION) break;
schedule(); } __set_current_state(TASK_RUNNING);
(grep "wait loop" in kernel/sched/core.c)
Regards, Jason
"Jason A. Donenfeld" Jason@zx2c4.com writes:
On Tue, Jul 26, 2022 at 05:44:41PM +0800, Herbert Xu wrote:
Thanks for all your effort in resolving this issue.
I think Valentin's concern is valid though. The sleep/wakeup paradigm in this patch-set is slightly unusual.
So what I've done is taken your latest patch, and incorporated Valentin's suggestions on top of it. I don't think there is an issue with other drivers as neither approach really changes them.
Thanks so much for taking charge of this patch. I really, really appreciate it. I'm also glad that we now have a working implementation of Valentin's suggestion.
Just two small notes:
- I had a hard time testing everything because I don't actually have an ath9k, so I wound up playing with variations on https://xn--4db.cc/vnRj8zQw/diff in case that helps. I assume you've got your own way of testing things, but in case not, maybe that diff is useful.
- I'll mark this patch as "other tree" in the wireless tree's patchwork now that you're on board so Kalle doesn't have to deal with it.
Please don't touch linux-wireless patchwork project, if other people modify it we don't know what's happening. I prefer that I handle the patches myself in patchwork as that way I'm best up-to-date.
But just so that I understand correctly, after Herbert's patch no ath9k changes is needed anymore? That sounds great.
On Wed, Jul 27, 2022 at 09:32:20AM +0300, Kalle Valo wrote:
But just so that I understand correctly, after Herbert's patch no ath9k changes is needed anymore? That sounds great.
No a small change is still needed in ath9k to completely fix the problem, basically a one-liner. Either I could split that out and give it to you once the core bits land in mainline, or we could just do it in one patch with your ack.
The chances of conflicts are remote.
Cheers,
Herbert Xu herbert@gondor.apana.org.au writes:
On Wed, Jul 27, 2022 at 09:32:20AM +0300, Kalle Valo wrote:
But just so that I understand correctly, after Herbert's patch no ath9k changes is needed anymore? That sounds great.
No a small change is still needed in ath9k to completely fix the problem, basically a one-liner. Either I could split that out and give it to you once the core bits land in mainline, or we could just do it in one patch with your ack.
The chances of conflicts are remote.
It's a lot easier to handle that in a single patch via your tree. So please do CC Toke and linux-wireless when you submit your patch so that we can then ack it.
Hi Jason:
On Tue, Jul 26, 2022 at 12:17:02PM +0200, Jason A. Donenfeld wrote:
Yea, I actually didn't consider this a bug, but just "nothing starts until everything else is totally done" semantics. Not wanting those semantics is understandable. I haven't looked in detail at how you've
The issue is when you switch from one hwrng to another, this could cause a user-space reader to fail during the switch-over. Granted it's not a big deal but the fix isn't that onerous.
done that safely without locking issues, but if you've tested it, then I trust it works. When I was playing around with different things, I encountered a number of locking issues, depending on what the last locker was - a user space thread, the rng kthread, or something opening up a reader afresh. So just be sure the various combinations still work.
Yes this is subtle and I don't plan on pushing this into mainline right away.
Specifically, Instead of doing all of this task interruption stuff and keeping track of readers and using RCU and all that fragile code, we can instead just use wait_completion_interruptible_timeout() inside of hwrng_msleep(), using a condition variable that's private to the hwrng. Then, when it's time for all the readers to quit, we just set the condition! Ta-da, no need for keeping track of who's reading, the difference between a kthread and a normal thread, and a variety of other concerns.
Yes, using a higher-level abstraction than wake_up_process/schedule is always preferred.
Unimportant nit: could call __rng_dying() instead so these don't diverge by accident.
Good idea, I will do this in the next repost.
@@ -269,6 +298,9 @@ static ssize_t rng_dev_read(struct file *filp, char __user *buf, } } out:
- if (synch)
synchronize_rcu();
The synch usage no longer makes sense to me here. In my version, synchronize_rcu() would almost never be called. It's only purpose was to prevent rng_dev_read() from returning if the critical section inside of hwrng_unregister() was in flight. But now it looks like it will be called everytime there are no RNGs left? Or maybe I understood how this works. Either way, please don't feel that you have to write back an explanation; just make sure it has those same sorts of semantics when testing.
The purpose is still the same, prevent the current thread from going away while the RCU critical-section in hwrng_unregister is ongoing.
With my code the synch is triggered if we obtained an rng, then potentially went to sleep (wait == true), and upon finishing we found that the rng is undergoing unregistration (rng_dying == true).
If we switch to completions then this issue goes away because we will be using standard wait queues instead of our own current_waiting_reader.
Here you made a change whose utility I don't understand. My original hunk was:
if (kthread_should_stop())
break;
schedule_timeout_interruptible(HZ * 10);
Which I think is a bit cleaner, as schedule_timeout_interruptible sets the state to INTERRUPTIBLE and such.
Valentin has already explained this. This is the standard paradigm for calling schedule_timeout when you need to ensure that a specific condition wakes up the thread. But as you suggested using a higher- level mechanism such as completions will render this unnecessary.
Thanks,
On Tue, Jul 26, 2022 at 12:17:02PM +0200, Jason A. Donenfeld wrote:
That won't be possible until 5.20, though, and this patch is needed to fix earlier kernels, so the intermediate step here is still required. But please keep this on the back burner of your mind. The 5.20 enablement patch for that is here:
https://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace.git/...
Actually, I don't think this will be needed if we switch over to completions. This is because we will no longer be using kthread_should_stop to determine when we should exit the loop, instead it will be the completion that has the final say.
Cheers,
From: Jason A. Donenfeld Jason@zx2c4.com
There are two deadlock scenarios that need addressing, which cause problems when the computer goes to sleep, the interface is set down, and hwrng_unregister() is called. When the deadlock is hit, sleep is delayed for tens of seconds, causing it to fail. These scenarios are:
1) The hwrng kthread can't be stopped while it's sleeping, because it uses msleep_interruptible() which does not react to kthread_stop.
2) A normal user thread can't be interrupted by hwrng_unregister() while it's sleeping, because hwrng_unregister() is called from elsewhere.
We solve both issues by add a completion object called dying that fulfils waiters once we have started the process in hwrng_unregister.
At the same time, we should cleanup a common and useless dmesg splat in the same area.
Cc: stable@vger.kernel.org Reported-by: Gregory Erwin gregerwin256@gmail.com Fixes: fcd09c90c3c5 ("ath9k: use hw_random API instead of directly dumping into random.c") Link: https://lore.kernel.org/all/CAO+Okf6ZJC5-nTE_EJUGQtd8JiCkiEHytGgDsFGTEjs0c00... Link: https://lore.kernel.org/lkml/CAO+Okf5k+C+SE6pMVfPf-d8MfVPVq4PO7EY8Hys_DVXten... Link: https://bugs.archlinux.org/task/75138 Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 16f227b995e8..21dce7acf086 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -507,16 +508,17 @@ static int hwrng_fillfn(void *unused) rng->quality = current_quality; /* obsolete */ quality = rng->quality; mutex_unlock(&reading_mutex); + + if (rc <= 0) + hwrng_msleep(rng, 10000); + put_rng(rng);
if (!quality) break;
- if (rc <= 0) { - pr_warn("hwrng: no data available\n"); - msleep_interruptible(10000); + if (rc <= 0) continue; - }
/* If we cannot credit at least one bit of entropy, * keep track of the remainder for the next iteration @@ -570,6 +572,7 @@ int hwrng_register(struct hwrng *rng)
init_completion(&rng->cleanup_done); complete(&rng->cleanup_done); + init_completion(&rng->dying);
if (!current_rng || (!cur_rng_set_by_user && rng->quality > current_rng->quality)) { @@ -617,6 +620,7 @@ void hwrng_unregister(struct hwrng *rng)
old_rng = current_rng; list_del(&rng->list); + complete_all(&rng->dying); if (current_rng == rng) { err = enable_best_rng(); if (err) { @@ -685,6 +689,14 @@ void devm_hwrng_unregister(struct device *dev, struct hwrng *rng) } EXPORT_SYMBOL_GPL(devm_hwrng_unregister);
+long hwrng_msleep(struct hwrng *rng, unsigned int msecs) +{ + unsigned long timeout = msecs_to_jiffies(msecs) + 1; + + return wait_for_completion_interruptible_timeout(&rng->dying, timeout); +} +EXPORT_SYMBOL_GPL(hwrng_msleep); + static int __init hwrng_modinit(void) { int ret; diff --git a/drivers/net/wireless/ath/ath9k/rng.c b/drivers/net/wireless/ath/ath9k/rng.c index cb5414265a9b..58c0ab01771b 100644 --- a/drivers/net/wireless/ath/ath9k/rng.c +++ b/drivers/net/wireless/ath/ath9k/rng.c @@ -83,7 +83,8 @@ static int ath9k_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) if (!wait || !max || likely(bytes_read) || fail_stats > 110) break;
- msleep_interruptible(ath9k_rng_delay_get(++fail_stats)); + if (hwrng_msleep(rng, ath9k_rng_delay_get(++fail_stats))) + break; }
if (wait && !bytes_read && max) diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index aa1d4da03538..77c2885c4c13 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -50,6 +50,7 @@ struct hwrng { struct list_head list; struct kref ref; struct completion cleanup_done; + struct completion dying; };
struct device; @@ -61,4 +62,6 @@ extern int devm_hwrng_register(struct device *dev, struct hwrng *rng); extern void hwrng_unregister(struct hwrng *rng); extern void devm_hwrng_unregister(struct device *dve, struct hwrng *rng);
+extern long hwrng_msleep(struct hwrng *rng, unsigned int msecs); + #endif /* LINUX_HWRANDOM_H_ */
On 28 July 2022 12:22:20 CEST, Herbert Xu herbert@gondor.apana.org.au wrote:
From: Jason A. Donenfeld Jason@zx2c4.com
There are two deadlock scenarios that need addressing, which cause problems when the computer goes to sleep, the interface is set down, and hwrng_unregister() is called. When the deadlock is hit, sleep is delayed for tens of seconds, causing it to fail. These scenarios are:
- The hwrng kthread can't be stopped while it's sleeping, because it
uses msleep_interruptible() which does not react to kthread_stop.
- A normal user thread can't be interrupted by hwrng_unregister() while
it's sleeping, because hwrng_unregister() is called from elsewhere.
We solve both issues by add a completion object called dying that fulfils waiters once we have started the process in hwrng_unregister.
At the same time, we should cleanup a common and useless dmesg splat in the same area.
Cc: stable@vger.kernel.org Reported-by: Gregory Erwin gregerwin256@gmail.com Fixes: fcd09c90c3c5 ("ath9k: use hw_random API instead of directly dumping into random.c") Link: https://lore.kernel.org/all/CAO+Okf6ZJC5-nTE_EJUGQtd8JiCkiEHytGgDsFGTEjs0c00... Link: https://lore.kernel.org/lkml/CAO+Okf5k+C+SE6pMVfPf-d8MfVPVq4PO7EY8Hys_DVXten... Link: https://bugs.archlinux.org/task/75138 Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au
Acked-by: Toke Høiland-Jørgensen toke@toke.dk
Toke Høiland-Jørgensen toke@toke.dk writes:
On 28 July 2022 12:22:20 CEST, Herbert Xu herbert@gondor.apana.org.au wrote:
From: Jason A. Donenfeld Jason@zx2c4.com
There are two deadlock scenarios that need addressing, which cause problems when the computer goes to sleep, the interface is set down, and hwrng_unregister() is called. When the deadlock is hit, sleep is delayed for tens of seconds, causing it to fail. These scenarios are:
- The hwrng kthread can't be stopped while it's sleeping, because it
uses msleep_interruptible() which does not react to kthread_stop.
- A normal user thread can't be interrupted by hwrng_unregister() while
it's sleeping, because hwrng_unregister() is called from elsewhere.
We solve both issues by add a completion object called dying that fulfils waiters once we have started the process in hwrng_unregister.
At the same time, we should cleanup a common and useless dmesg splat in the same area.
Cc: stable@vger.kernel.org Reported-by: Gregory Erwin gregerwin256@gmail.com Fixes: fcd09c90c3c5 ("ath9k: use hw_random API instead of directly dumping into random.c") Link: https://lore.kernel.org/all/CAO+Okf6ZJC5-nTE_EJUGQtd8JiCkiEHytGgDsFGTEjs0c00... Link: https://lore.kernel.org/lkml/CAO+Okf5k+C+SE6pMVfPf-d8MfVPVq4PO7EY8Hys_DVXten... Link: https://bugs.archlinux.org/task/75138 Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au
Acked-by: Toke Høiland-Jørgensen toke@toke.dk
Acked-by: Kalle Valo kvalo@kernel.org
Herbert, feel free to take this via your tree. Thanks!
linux-stable-mirror@lists.linaro.org