Add support for the POLLFREE flag to force complete iocb inline in
aio_poll_wake(). A thread may use it to signal it's exit and/or request
to cleanup while pending poll request. In this case, aio_poll_wake()
needs to make sure it doesn't keep any reference to the queue entry
before returning from wake to avoid possible use after free via
poll_cancel() path.
UAF issue was found during binder and aio interactions in certain
sequence of events [1].
The POLLFREE flag is no more exclusive to the epoll and is being
shared with the aio. Remove comment from poll.h to avoid confusion.
[1] https://lore.kernel.org/r/CAKUd0B_TCXRY4h1hTztfwWbNSFQqsudDLn2S_28csgWZmZAG…
Fixes: af5c72b1fc7a ("Fix aio_poll() races")
Signed-off-by: Ramji Jiyani <ramjiyani(a)google.com>
Reviewed-by: Jeff Moyer <jmoyer(a)redhat.com>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
Cc: stable(a)vger.kernel.org # 4.19+
---
Changes since v1:
- Removed parenthesis around POLLFREE macro definition as per review.
- Updated description to refer UAF issue discussion this patch fixes.
- Updated description to remove reference to parenthesis change.
- Added Reviewed-by from Jeff Moyer
Changes since v2:
- Added Fixes tag.
- Added stable tag for backporting on 4.19+ LTS releases
Changes since v3:
- Updated patch description
- Updated Fixes tag to issue manifestation origin
Changes since v4:
- Added Reviewed-by from Christoph Hellwig
---
fs/aio.c | 45 ++++++++++++++++++---------------
include/uapi/asm-generic/poll.h | 2 +-
2 files changed, 26 insertions(+), 21 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 51b08ab01dff..5d539c05df42 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1674,6 +1674,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
{
struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+ struct kioctx *ctx = iocb->ki_ctx;
__poll_t mask = key_to_poll(key);
unsigned long flags;
@@ -1683,29 +1684,33 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
list_del_init(&req->wait.entry);
- if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) {
- struct kioctx *ctx = iocb->ki_ctx;
+ /*
+ * Use irqsave/irqrestore because not all filesystems (e.g. fuse)
+ * call this function with IRQs disabled and because IRQs have to
+ * be disabled before ctx_lock is obtained.
+ */
+ if (mask & POLLFREE) {
+ /* Force complete iocb inline to remove refs to deleted entry */
+ spin_lock_irqsave(&ctx->ctx_lock, flags);
+ } else if (!(mask && spin_trylock_irqsave(&ctx->ctx_lock, flags))) {
+ /* Can't complete iocb inline; schedule for later */
+ schedule_work(&req->work);
+ return 1;
+ }
- /*
- * Try to complete the iocb inline if we can. Use
- * irqsave/irqrestore because not all filesystems (e.g. fuse)
- * call this function with IRQs disabled and because IRQs
- * have to be disabled before ctx_lock is obtained.
- */
- list_del(&iocb->ki_list);
- iocb->ki_res.res = mangle_poll(mask);
- req->done = true;
- if (iocb->ki_eventfd && eventfd_signal_allowed()) {
- iocb = NULL;
- INIT_WORK(&req->work, aio_poll_put_work);
- schedule_work(&req->work);
- }
- spin_unlock_irqrestore(&ctx->ctx_lock, flags);
- if (iocb)
- iocb_put(iocb);
- } else {
+ /* complete iocb inline */
+ list_del(&iocb->ki_list);
+ iocb->ki_res.res = mangle_poll(mask);
+ req->done = true;
+ if (iocb->ki_eventfd && eventfd_signal_allowed()) {
+ iocb = NULL;
+ INIT_WORK(&req->work, aio_poll_put_work);
schedule_work(&req->work);
}
+ spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+ if (iocb)
+ iocb_put(iocb);
+
return 1;
}
diff --git a/include/uapi/asm-generic/poll.h b/include/uapi/asm-generic/poll.h
index 41b509f410bf..f9c520ce4bf4 100644
--- a/include/uapi/asm-generic/poll.h
+++ b/include/uapi/asm-generic/poll.h
@@ -29,7 +29,7 @@
#define POLLRDHUP 0x2000
#endif
-#define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */
+#define POLLFREE (__force __poll_t)0x4000
#define POLL_BUSY_LOOP (__force __poll_t)0x8000
--
2.33.0.1079.g6e70778dc9-goog
The patch titled
Subject: mm/damon/dbgfs: fix missed use of damon_dbgfs_lock
has been removed from the -mm tree. Its filename was
mm-damon-dbgfs-fix-missed-use-of-damon_dbgfs_lock.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/dbgfs: fix missed use of damon_dbgfs_lock
DAMON debugfs is supposed to protect dbgfs_ctxs, dbgfs_nr_ctxs, and
dbgfs_dirs using damon_dbgfs_lock. However, some of the code is accessing
the variables without the protection. This commit fixes it by protecting
all such accesses.
Link: https://lkml.kernel.org/r/20211110145758.16558-3-sj@kernel.org
Fixes: 75c1c2b53c78 ("mm/damon/dbgfs: support multiple contexts")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/dbgfs.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
--- a/mm/damon/dbgfs.c~mm-damon-dbgfs-fix-missed-use-of-damon_dbgfs_lock
+++ a/mm/damon/dbgfs.c
@@ -877,12 +877,14 @@ static ssize_t dbgfs_monitor_on_write(st
return -EINVAL;
}
+ mutex_lock(&damon_dbgfs_lock);
if (!strncmp(kbuf, "on", count)) {
int i;
for (i = 0; i < dbgfs_nr_ctxs; i++) {
if (damon_targets_empty(dbgfs_ctxs[i])) {
kfree(kbuf);
+ mutex_unlock(&damon_dbgfs_lock);
return -EINVAL;
}
}
@@ -892,6 +894,7 @@ static ssize_t dbgfs_monitor_on_write(st
} else {
ret = -EINVAL;
}
+ mutex_unlock(&damon_dbgfs_lock);
if (!ret)
ret = count;
@@ -944,15 +947,16 @@ static int __init __damon_dbgfs_init(voi
static int __init damon_dbgfs_init(void)
{
- int rc;
+ int rc = -ENOMEM;
+ mutex_lock(&damon_dbgfs_lock);
dbgfs_ctxs = kmalloc(sizeof(*dbgfs_ctxs), GFP_KERNEL);
if (!dbgfs_ctxs)
- return -ENOMEM;
+ goto out;
dbgfs_ctxs[0] = dbgfs_new_ctx();
if (!dbgfs_ctxs[0]) {
kfree(dbgfs_ctxs);
- return -ENOMEM;
+ goto out;
}
dbgfs_nr_ctxs = 1;
@@ -963,6 +967,8 @@ static int __init damon_dbgfs_init(void)
pr_err("%s: dbgfs init failed\n", __func__);
}
+out:
+ mutex_unlock(&damon_dbgfs_lock);
return rc;
}
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-remove-some-no-need-func-definitions-in-damonh-file-fix.patch
The patch titled
Subject: mm/damon/dbgfs: use '__GFP_NOWARN' for user-specified size buffer allocation
has been removed from the -mm tree. Its filename was
mm-damon-dbgfs-use-__gfp_nowarn-for-user-specified-size-buffer-allocation.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/dbgfs: use '__GFP_NOWARN' for user-specified size buffer allocation
Patch series "DAMON fixes".
This patch (of 2):
DAMON users can trigger below warning in '__alloc_pages()' by invoking
write() to some DAMON debugfs files with arbitrarily high count argument,
because DAMON debugfs interface allocates some buffers based on the
user-specified 'count'.
if (unlikely(order >= MAX_ORDER)) {
WARN_ON_ONCE(!(gfp & __GFP_NOWARN));
return NULL;
}
Because the DAMON debugfs interface code checks failure of the
'kmalloc()', this commit simply suppresses the warnings by adding
'__GFP_NOWARN' flag.
Link: https://lkml.kernel.org/r/20211110145758.16558-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20211110145758.16558-2-sj@kernel.org
Fixes: 4bc05954d007 ("mm/damon: implement a debugfs-based user space interface")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/dbgfs.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
--- a/mm/damon/dbgfs.c~mm-damon-dbgfs-use-__gfp_nowarn-for-user-specified-size-buffer-allocation
+++ a/mm/damon/dbgfs.c
@@ -32,7 +32,7 @@ static char *user_input_str(const char _
if (*ppos)
return ERR_PTR(-EINVAL);
- kbuf = kmalloc(count + 1, GFP_KERNEL);
+ kbuf = kmalloc(count + 1, GFP_KERNEL | __GFP_NOWARN);
if (!kbuf)
return ERR_PTR(-ENOMEM);
@@ -133,7 +133,7 @@ static ssize_t dbgfs_schemes_read(struct
char *kbuf;
ssize_t len;
- kbuf = kmalloc(count, GFP_KERNEL);
+ kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
if (!kbuf)
return -ENOMEM;
@@ -452,7 +452,7 @@ static ssize_t dbgfs_init_regions_read(s
char *kbuf;
ssize_t len;
- kbuf = kmalloc(count, GFP_KERNEL);
+ kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
if (!kbuf)
return -ENOMEM;
@@ -578,7 +578,7 @@ static ssize_t dbgfs_kdamond_pid_read(st
char *kbuf;
ssize_t len;
- kbuf = kmalloc(count, GFP_KERNEL);
+ kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
if (!kbuf)
return -ENOMEM;
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-remove-some-no-need-func-definitions-in-damonh-file-fix.patch
The patch titled
Subject: hugetlb, userfaultfd: fix reservation restore on userfaultfd error
has been removed from the -mm tree. Its filename was
hugetlb-userfaultfd-fix-reservation-restore-on-userfaultfd-error.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Mina Almasry <almasrymina(a)google.com>
Subject: hugetlb, userfaultfd: fix reservation restore on userfaultfd error
Currently in the is_continue case in hugetlb_mcopy_atomic_pte(), if we
bail out using "goto out_release_unlock;" in the cases where idx >= size,
or !huge_pte_none(), the code will detect that new_pagecache_page ==
false, and so call restore_reserve_on_error(). In this case I see
restore_reserve_on_error() delete the reservation, and the following call
to remove_inode_hugepages() will increment h->resv_hugepages causing a
100% reproducible leak.
We should treat the is_continue case similar to adding a page into the
pagecache and set new_pagecache_page to true, to indicate that there is no
reservation to restore on the error path, and we need not call
restore_reserve_on_error(). Rename new_pagecache_page to
page_in_pagecache to make that clear.
Link: https://lkml.kernel.org/r/20211117193825.378528-1-almasrymina@google.com
Fixes: c7b1850dfb41 ("hugetlb: don't pass page cache pages to restore_reserve_on_error")
Signed-off-by: Mina Almasry <almasrymina(a)google.com>
Reported-by: James Houghton <jthoughton(a)google.com>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Wei Xu <weixugc(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
--- a/mm/hugetlb.c~hugetlb-userfaultfd-fix-reservation-restore-on-userfaultfd-error
+++ a/mm/hugetlb.c
@@ -5736,13 +5736,14 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
int ret = -ENOMEM;
struct page *page;
int writable;
- bool new_pagecache_page = false;
+ bool page_in_pagecache = false;
if (is_continue) {
ret = -EFAULT;
page = find_lock_page(mapping, idx);
if (!page)
goto out;
+ page_in_pagecache = true;
} else if (!*pagep) {
/* If a page already exists, then it's UFFDIO_COPY for
* a non-missing case. Return -EEXIST.
@@ -5830,7 +5831,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
ret = huge_add_to_page_cache(page, mapping, idx);
if (ret)
goto out_release_nounlock;
- new_pagecache_page = true;
+ page_in_pagecache = true;
}
ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
@@ -5894,7 +5895,7 @@ out_release_unlock:
if (vm_shared || is_continue)
unlock_page(page);
out_release_nounlock:
- if (!new_pagecache_page)
+ if (!page_in_pagecache)
restore_reserve_on_error(h, dst_vma, dst_addr, page);
put_page(page);
goto out;
_
Patches currently in -mm which might be from almasrymina(a)google.com are
The patch titled
Subject: mm: kmemleak: slob: respect SLAB_NOLEAKTRACE flag
has been removed from the -mm tree. Its filename was
mm-kmemleak-slob-respect-slab_noleaktrace-flag.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Rustam Kovhaev <rkovhaev(a)gmail.com>
Subject: mm: kmemleak: slob: respect SLAB_NOLEAKTRACE flag
When kmemleak is enabled for SLOB, system does not boot and does not print
anything to the console. At the very early stage in the boot process we
hit infinite recursion from kmemleak_init() and eventually kernel crashes.
kmemleak_init() specifies SLAB_NOLEAKTRACE for KMEM_CACHE(), but
kmem_cache_create_usercopy() removes it because CACHE_CREATE_MASK is not
valid for SLOB.
Let's fix CACHE_CREATE_MASK and make kmemleak work with SLOB
Link: https://lkml.kernel.org/r/20211115020850.3154366-1-rkovhaev@gmail.com
Fixes: d8843922fba4 ("slab: Ignore internal flags in cache creation")
Signed-off-by: Rustam Kovhaev <rkovhaev(a)gmail.com>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Reviewed-by: Muchun Song <songmuchun(a)bytedance.com>
Cc: Christoph Lameter <cl(a)linux.com>
Cc: Pekka Enberg <penberg(a)kernel.org>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: Glauber Costa <glommer(a)parallels.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/slab.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/slab.h~mm-kmemleak-slob-respect-slab_noleaktrace-flag
+++ a/mm/slab.h
@@ -147,7 +147,7 @@ static inline slab_flags_t kmem_cache_fl
#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | SLAB_ACCOUNT)
#else
-#define SLAB_CACHE_FLAGS (0)
+#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE)
#endif
/* Common flags available with current configuration */
_
Patches currently in -mm which might be from rkovhaev(a)gmail.com are