From: Xiaoguang Wang xiaoguang.wang@linux.alibaba.com
[ Upstream commit 65a6543da386838f935d2f03f452c5c0acff2a68 ]
While testing io_uring in arm, we found sometimes io_sq_thread() keeps polling io requests even though there are not inflight io requests in block layer. After some investigations, found a possible race about io_kiocb.flags, see below race codes: 1) in the end of io_write() or io_read() req->flags &= ~REQ_F_NEED_CLEANUP; kfree(iovec); return ret;
2) in io_complete_rw_iopoll() if (res != -EAGAIN) req->flags |= REQ_F_IOPOLL_COMPLETED;
In IOPOLL mode, io requests still maybe completed by interrupt, then above codes are not safe, concurrent modifications to req->flags, which is not protected by lock or is not atomic modifications. I also had disassemble io_complete_rw_iopoll() in arm: req->flags |= REQ_F_IOPOLL_COMPLETED; 0xffff000008387b18 <+76>: ldr w0, [x19,#104] 0xffff000008387b1c <+80>: orr w0, w0, #0x1000 0xffff000008387b20 <+84>: str w0, [x19,#104]
Seems that the "req->flags |= REQ_F_IOPOLL_COMPLETED;" is load and modification, two instructions, which obviously is not atomic.
To fix this issue, add a new iopoll_completed in io_kiocb to indicate whether io request is completed.
Signed-off-by: Xiaoguang Wang xiaoguang.wang@linux.alibaba.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- fs/io_uring.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
--- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -513,7 +513,6 @@ enum { REQ_F_INFLIGHT_BIT, REQ_F_CUR_POS_BIT, REQ_F_NOWAIT_BIT, - REQ_F_IOPOLL_COMPLETED_BIT, REQ_F_LINK_TIMEOUT_BIT, REQ_F_TIMEOUT_BIT, REQ_F_ISREG_BIT, @@ -556,8 +555,6 @@ enum { REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), /* must not punt to workers */ REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), - /* polled IO has completed */ - REQ_F_IOPOLL_COMPLETED = BIT(REQ_F_IOPOLL_COMPLETED_BIT), /* has linked timeout */ REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), /* timeout request */ @@ -618,6 +615,8 @@ struct io_kiocb { int cflags; bool needs_fixed_file; u8 opcode; + /* polled IO has completed */ + u8 iopoll_completed;
u16 buf_index;
@@ -1760,7 +1759,7 @@ static int io_do_iopoll(struct io_ring_c * If we find a request that requires polling, break out * and complete those lists first, if we have entries there. */ - if (req->flags & REQ_F_IOPOLL_COMPLETED) { + if (READ_ONCE(req->iopoll_completed)) { list_move_tail(&req->list, &done); continue; } @@ -1941,7 +1940,7 @@ static void io_complete_rw_iopoll(struct req_set_fail_links(req); req->result = res; if (res != -EAGAIN) - req->flags |= REQ_F_IOPOLL_COMPLETED; + WRITE_ONCE(req->iopoll_completed, 1); }
/* @@ -1974,7 +1973,7 @@ static void io_iopoll_req_issued(struct * For fast devices, IO may have already completed. If it has, add * it to the front so we find it first. */ - if (req->flags & REQ_F_IOPOLL_COMPLETED) + if (READ_ONCE(req->iopoll_completed)) list_add(&req->list, &ctx->poll_list); else list_add_tail(&req->list, &ctx->poll_list); @@ -2098,6 +2097,7 @@ static int io_prep_rw(struct io_kiocb *r kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_complete = io_complete_rw_iopoll; req->result = 0; + req->iopoll_completed = 0; } else { if (kiocb->ki_flags & IOCB_HIPRI) return -EINVAL;