We should make sure that async workqueue is canceled on exit, but on some corner case, we found that the async workqueue is not canceled on exit in the linux-5.4. So we started an in-depth investigation. Fortunately, we finally found the problem. The commit:
1c4404efcf2c ("io_uring: make sure async workqueue is canceled on exit")
did not completely solve this problem. This patch series to solve this problem completely. And there's no upstream variant of this commit, so this patch series is just fix the linux-5.4.y stable branch.
Muchun Song (2): io_uring: Fix missing smp_mb() in io_cancel_async_work() io_uring: Fix remove irrelevant req from the task_list
Yinyin Zhu (1): io_uring: Fix resource leaking when kill the process
fs/io_uring.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-)
From: Yinyin Zhu zhuyinyin@bytedance.com
The commit <1c4404efcf2c0> ("<io_uring: make sure async workqueue is canceled on exit>") doesn't solve the resource leak problem totally! When kworker is doing a io task for the io_uring, The process which submitted the io task has received a SIGKILL signal from the user. Then the io_cancel_async_work function could have sent a SIGINT signal to the kworker, but the judging condition is wrong. So it doesn't send a SIGINT signal to the kworker, then caused the resource leaking problem. Why the juding condition is wrong? Think that The process is a multi-threaded process, we call the thread of the process which has submitted the io task Thread1. So the req->task is the current macro of the Thread1. when all the threads of the process have done exit procedure, the last thread will call the io_cancel_async_work, but the last thread may not the Thread1, so the req->task is not equal to the task. so it doesn't send the SIGINT signal. To fix this bug, we alter the task attribute of the req with struct files_struct. And the judging condition is "req->files == files"
Fixes: 1c4404efcf2c0 ("io_uring: make sure async workqueue is canceled on exit") Signed-off-by: Yinyin Zhu zhuyinyin@bytedance.com --- fs/io_uring.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index e0200406765c3..de4f7b3a0d789 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -339,7 +339,7 @@ struct io_kiocb { u64 user_data; u32 result; u32 sequence; - struct task_struct *task; + struct files_struct *files;
struct fs_struct *fs;
@@ -513,7 +513,7 @@ static inline void io_queue_async_work(struct io_ring_ctx *ctx, } }
- req->task = current; + req->files = current->files;
spin_lock_irqsave(&ctx->task_lock, flags); list_add(&req->task_list, &ctx->task_list); @@ -3708,7 +3708,7 @@ static int io_uring_fasync(int fd, struct file *file, int on) }
static void io_cancel_async_work(struct io_ring_ctx *ctx, - struct task_struct *task) + struct files_struct *files) { if (list_empty(&ctx->task_list)) return; @@ -3720,7 +3720,7 @@ static void io_cancel_async_work(struct io_ring_ctx *ctx, req = list_first_entry(&ctx->task_list, struct io_kiocb, task_list); list_del_init(&req->task_list); req->flags |= REQ_F_CANCEL; - if (req->work_task && (!task || req->task == task)) + if (req->work_task && (!files || req->files == files)) send_sig(SIGINT, req->work_task, 1); } spin_unlock_irq(&ctx->task_lock); @@ -3745,7 +3745,7 @@ static int io_uring_flush(struct file *file, void *data) struct io_ring_ctx *ctx = file->private_data;
if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) - io_cancel_async_work(ctx, current); + io_cancel_async_work(ctx, data);
return 0; }
The store to req->flags and load req->work_task should not be reordering in io_cancel_async_work(). We should make sure that either we store REQ_F_CANCE flag to req->flags or we see the req->work_task setted in io_sq_wq_submit_work().
Fixes: 1c4404efcf2c ("io_uring: make sure async workqueue is canceled on exit") Signed-off-by: Muchun Song songmuchun@bytedance.com --- fs/io_uring.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index de4f7b3a0d789..adaafe857b074 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2252,6 +2252,12 @@ static void io_sq_wq_submit_work(struct work_struct *work)
if (!ret) { req->work_task = current; + + /* + * Pairs with the smp_store_mb() (B) in + * io_cancel_async_work(). + */ + smp_mb(); /* A */ if (req->flags & REQ_F_CANCEL) { ret = -ECANCELED; goto end_req; @@ -3719,7 +3725,15 @@ static void io_cancel_async_work(struct io_ring_ctx *ctx,
req = list_first_entry(&ctx->task_list, struct io_kiocb, task_list); list_del_init(&req->task_list); - req->flags |= REQ_F_CANCEL; + + /* + * The below executes an smp_mb(), which matches with the + * smp_mb() (A) in io_sq_wq_submit_work() such that either + * we store REQ_F_CANCEL flag to req->flags or we see the + * req->work_task setted in io_sq_wq_submit_work(). + */ + smp_store_mb(req->flags, req->flags | REQ_F_CANCEL); /* B */ + if (req->work_task && (!files || req->files == files)) send_sig(SIGINT, req->work_task, 1); }
If the process 0 has been initialized io_uring is complete, and then fork process 1. If process 1 exits and it leads to delete all reqs from the task_list. If we kill process 0. We will not send SIGINT signal to the kworker. So we can not remove the req from the task_list. The io_sq_wq_submit_work() can do that for us.
Fixes: 1c4404efcf2c ("io_uring: make sure async workqueue is canceled on exit") Signed-off-by: Muchun Song songmuchun@bytedance.com --- fs/io_uring.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index adaafe857b074..2b95be09c0dad 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2277,13 +2277,11 @@ static void io_sq_wq_submit_work(struct work_struct *work) break; cond_resched(); } while (1); -end_req: - if (!list_empty(&req->task_list)) { - spin_lock_irq(&ctx->task_lock); - list_del_init(&req->task_list); - spin_unlock_irq(&ctx->task_lock); - } } +end_req: + spin_lock_irq(&ctx->task_lock); + list_del_init(&req->task_list); + spin_unlock_irq(&ctx->task_lock);
/* drop submission reference */ io_put_req(req); @@ -3716,15 +3714,16 @@ static int io_uring_fasync(int fd, struct file *file, int on) static void io_cancel_async_work(struct io_ring_ctx *ctx, struct files_struct *files) { + struct io_kiocb *req; + if (list_empty(&ctx->task_list)) return;
spin_lock_irq(&ctx->task_lock); - while (!list_empty(&ctx->task_list)) { - struct io_kiocb *req;
- req = list_first_entry(&ctx->task_list, struct io_kiocb, task_list); - list_del_init(&req->task_list); + list_for_each_entry(req, &ctx->task_list, task_list) { + if (files && req->files != files) + continue;
/* * The below executes an smp_mb(), which matches with the @@ -3734,7 +3733,7 @@ static void io_cancel_async_work(struct io_ring_ctx *ctx, */ smp_store_mb(req->flags, req->flags | REQ_F_CANCEL); /* B */
- if (req->work_task && (!files || req->files == files)) + if (req->work_task) send_sig(SIGINT, req->work_task, 1); } spin_unlock_irq(&ctx->task_lock);
On Tue, Sep 15, 2020 at 04:15:48PM +0800, Muchun Song wrote:
We should make sure that async workqueue is canceled on exit, but on some corner case, we found that the async workqueue is not canceled on exit in the linux-5.4. So we started an in-depth investigation. Fortunately, we finally found the problem. The commit:
1c4404efcf2c ("io_uring: make sure async workqueue is canceled on exit")
did not completely solve this problem. This patch series to solve this problem completely. And there's no upstream variant of this commit, so this patch series is just fix the linux-5.4.y stable branch.
Muchun Song (2): io_uring: Fix missing smp_mb() in io_cancel_async_work() io_uring: Fix remove irrelevant req from the task_list
Yinyin Zhu (1): io_uring: Fix resource leaking when kill the process
fs/io_uring.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-)
-- 2.11.0
<formletter>
This is not the correct way to submit patches for inclusion in the stable kernel tree. Please read: https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html for how to do this properly.
</formletter>
linux-stable-mirror@lists.linaro.org