While fixing refcounting, e34ecee2ae79 ("aio: Fix a trinity splat") incorrectly removed explicit RCU grace period before freeing kioctx. The intention seems to be depending on the internal RCU grace periods of percpu_ref; however, percpu_ref uses a different flavor of RCU, sched-RCU. This can lead to kioctx being freed while RCU read protected dereferences are still in progress.
Fix it by updating free_ioctx() to go through call_rcu() explicitly.
v2: Comment added to explain double bouncing.
Signed-off-by: Tejun Heo tj@kernel.org Reported-by: Jann Horn jannh@google.com Fixes: e34ecee2ae79 ("aio: Fix a trinity splat") Cc: Kent Overstreet kent.overstreet@gmail.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: stable@vger.kernel.org --- fs/aio.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c index a062d75..eb2e0cf 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -115,7 +115,8 @@ struct kioctx { struct page **ring_pages; long nr_pages;
- struct work_struct free_work; + struct rcu_head free_rcu; + struct work_struct free_work; /* see free_ioctx() */
/* * signals when all in-flight requests are done @@ -588,6 +589,12 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) return cancel(&kiocb->common); }
+/* + * free_ioctx() should be RCU delayed to synchronize against the RCU + * protected lookup_ioctx() and also needs process context to call + * aio_free_ring(), so the double bouncing through kioctx->free_rcu and + * ->free_work. + */ static void free_ioctx(struct work_struct *work) { struct kioctx *ctx = container_of(work, struct kioctx, free_work); @@ -601,6 +608,14 @@ static void free_ioctx(struct work_struct *work) kmem_cache_free(kioctx_cachep, ctx); }
+static void free_ioctx_rcufn(struct rcu_head *head) +{ + struct kioctx *ctx = container_of(head, struct kioctx, free_rcu); + + INIT_WORK(&ctx->free_work, free_ioctx); + schedule_work(&ctx->free_work); +} + static void free_ioctx_reqs(struct percpu_ref *ref) { struct kioctx *ctx = container_of(ref, struct kioctx, reqs); @@ -609,8 +624,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) complete(&ctx->rq_wait->comp);
- INIT_WORK(&ctx->free_work, free_ioctx); - schedule_work(&ctx->free_work); + /* Synchronize against RCU protected table->table[] dereferences */ + call_rcu(&ctx->free_rcu, free_ioctx_rcufn); }
/* @@ -838,7 +853,7 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, table->table[ctx->id] = NULL; spin_unlock(&mm->ioctx_lock);
- /* percpu_ref_kill() will do the necessary call_rcu() */ + /* free_ioctx_reqs() will do the necessary RCU synchronization */ wake_up_all(&ctx->wait);
/*
While converting ioctx index from a list to a table, db446a08c23d ("aio: convert the ioctx list to table lookup v3") missed tagging kioctx_table->table[] as an array of RCU pointers and using the appropriate RCU accessors. This introduces a small window in the lookup path where init and access may race.
Mark kioctx_table->table[] with __rcu and use the approriate RCU accessors when using the field.
Signed-off-by: Tejun Heo tj@kernel.org Reported-by: Jann Horn jannh@google.com Fixes: db446a08c23d ("aio: convert the ioctx list to table lookup v3") Cc: Benjamin LaHaise bcrl@kvack.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: stable@vger.kernel.org --- fs/aio.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c index eb2e0cf..6bcd3fb 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -68,9 +68,9 @@ struct aio_ring { #define AIO_RING_PAGES 8
struct kioctx_table { - struct rcu_head rcu; - unsigned nr; - struct kioctx *table[]; + struct rcu_head rcu; + unsigned nr; + struct kioctx __rcu *table[]; };
struct kioctx_cpu { @@ -330,7 +330,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma) for (i = 0; i < table->nr; i++) { struct kioctx *ctx;
- ctx = table->table[i]; + ctx = rcu_dereference(table->table[i]); if (ctx && ctx->aio_ring_file == file) { if (!atomic_read(&ctx->dead)) { ctx->user_id = ctx->mmap_base = vma->vm_start; @@ -666,9 +666,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) while (1) { if (table) for (i = 0; i < table->nr; i++) - if (!table->table[i]) { + if (!rcu_access_pointer(table->table[i])) { ctx->id = i; - table->table[i] = ctx; + rcu_assign_pointer(table->table[i], ctx); spin_unlock(&mm->ioctx_lock);
/* While kioctx setup is in progress, @@ -849,8 +849,8 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, }
table = rcu_dereference_raw(mm->ioctx_table); - WARN_ON(ctx != table->table[ctx->id]); - table->table[ctx->id] = NULL; + WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id])); + RCU_INIT_POINTER(table->table[ctx->id], NULL); spin_unlock(&mm->ioctx_lock);
/* free_ioctx_reqs() will do the necessary RCU synchronization */ @@ -895,7 +895,8 @@ void exit_aio(struct mm_struct *mm)
skipped = 0; for (i = 0; i < table->nr; ++i) { - struct kioctx *ctx = table->table[i]; + struct kioctx *ctx = + rcu_dereference_protected(table->table[i], true);
if (!ctx) { skipped++; @@ -1084,7 +1085,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) if (!table || id >= table->nr) goto out;
- ctx = table->table[id]; + ctx = rcu_dereference(table->table[id]); if (ctx && ctx->user_id == ctx_id) { percpu_ref_get(&ctx->users); ret = ctx;
linux-stable-mirror@lists.linaro.org