The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9ddb3c14afba8bc5950ed297f02d4ae05ff35cd1 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy(a)infradead.org>
Date: Fri, 14 May 2021 17:27:24 -0700
Subject: [PATCH] mm: fix struct page layout on 32-bit systems
32-bit architectures which expect 8-byte alignment for 8-byte integers and
need 64-bit DMA addresses (arm, mips, ppc) had their struct page
inadvertently expanded in 2019. When the dma_addr_t was added, it forced
the alignment of the union to 8 bytes, which inserted a 4 byte gap between
'flags' and the union.
Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
This restores the alignment to that of an unsigned long. We always
store the low bits in the first word to prevent the PageTail bit from
being inadvertently set on a big endian platform. If that happened,
get_user_pages_fast() racing against a page which was freed and
reallocated to the page_pool could dereference a bogus compound_head(),
which would be hard to trace back to this cause.
Link: https://lkml.kernel.org/r/20210510153211.1504886-1-willy@infradead.org
Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
Signed-off-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Acked-by: Ilias Apalodimas <ilias.apalodimas(a)linaro.org>
Acked-by: Jesper Dangaard Brouer <brouer(a)redhat.com>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Tested-by: Matteo Croce <mcroce(a)linux.microsoft.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6613b26a8894..5aacc1c10a45 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -97,10 +97,10 @@ struct page {
};
struct { /* page_pool used by netstack */
/**
- * @dma_addr: might require a 64-bit value even on
+ * @dma_addr: might require a 64-bit value on
* 32-bit architectures.
*/
- dma_addr_t dma_addr;
+ unsigned long dma_addr[2];
};
struct { /* slab, slob and slub */
union {
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 6d517a37c18b..b4b6de909c93 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -198,7 +198,17 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
{
- return page->dma_addr;
+ dma_addr_t ret = page->dma_addr[0];
+ if (sizeof(dma_addr_t) > sizeof(unsigned long))
+ ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
+ return ret;
+}
+
+static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+ page->dma_addr[0] = addr;
+ if (sizeof(dma_addr_t) > sizeof(unsigned long))
+ page->dma_addr[1] = upper_32_bits(addr);
}
static inline bool is_page_pool_compiled_in(void)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9ec1aa9640ad..3c4c4c7a0402 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool,
struct page *page,
unsigned int dma_sync_size)
{
+ dma_addr_t dma_addr = page_pool_get_dma_addr(page);
+
dma_sync_size = min(dma_sync_size, pool->p.max_len);
- dma_sync_single_range_for_device(pool->p.dev, page->dma_addr,
+ dma_sync_single_range_for_device(pool->p.dev, dma_addr,
pool->p.offset, dma_sync_size,
pool->p.dma_dir);
}
@@ -195,7 +197,7 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
if (dma_mapping_error(pool->p.dev, dma))
return false;
- page->dma_addr = dma;
+ page_pool_set_dma_addr(page, dma);
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
@@ -331,13 +333,13 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
*/
goto skip_dma_unmap;
- dma = page->dma_addr;
+ dma = page_pool_get_dma_addr(page);
- /* When page is unmapped, it cannot be returned our pool */
+ /* When page is unmapped, it cannot be returned to our pool */
dma_unmap_page_attrs(pool->p.dev, dma,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC);
- page->dma_addr = 0;
+ page_pool_set_dma_addr(page, 0);
skip_dma_unmap:
/* This may be the last page returned, releasing the pool, so
* it is not safe to reference pool afterwards.
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9ddb3c14afba8bc5950ed297f02d4ae05ff35cd1 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy(a)infradead.org>
Date: Fri, 14 May 2021 17:27:24 -0700
Subject: [PATCH] mm: fix struct page layout on 32-bit systems
32-bit architectures which expect 8-byte alignment for 8-byte integers and
need 64-bit DMA addresses (arm, mips, ppc) had their struct page
inadvertently expanded in 2019. When the dma_addr_t was added, it forced
the alignment of the union to 8 bytes, which inserted a 4 byte gap between
'flags' and the union.
Fix this by storing the dma_addr_t in one or two adjacent unsigned longs.
This restores the alignment to that of an unsigned long. We always
store the low bits in the first word to prevent the PageTail bit from
being inadvertently set on a big endian platform. If that happened,
get_user_pages_fast() racing against a page which was freed and
reallocated to the page_pool could dereference a bogus compound_head(),
which would be hard to trace back to this cause.
Link: https://lkml.kernel.org/r/20210510153211.1504886-1-willy@infradead.org
Fixes: c25fff7171be ("mm: add dma_addr_t to struct page")
Signed-off-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Acked-by: Ilias Apalodimas <ilias.apalodimas(a)linaro.org>
Acked-by: Jesper Dangaard Brouer <brouer(a)redhat.com>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Tested-by: Matteo Croce <mcroce(a)linux.microsoft.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6613b26a8894..5aacc1c10a45 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -97,10 +97,10 @@ struct page {
};
struct { /* page_pool used by netstack */
/**
- * @dma_addr: might require a 64-bit value even on
+ * @dma_addr: might require a 64-bit value on
* 32-bit architectures.
*/
- dma_addr_t dma_addr;
+ unsigned long dma_addr[2];
};
struct { /* slab, slob and slub */
union {
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 6d517a37c18b..b4b6de909c93 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -198,7 +198,17 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
{
- return page->dma_addr;
+ dma_addr_t ret = page->dma_addr[0];
+ if (sizeof(dma_addr_t) > sizeof(unsigned long))
+ ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
+ return ret;
+}
+
+static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+ page->dma_addr[0] = addr;
+ if (sizeof(dma_addr_t) > sizeof(unsigned long))
+ page->dma_addr[1] = upper_32_bits(addr);
}
static inline bool is_page_pool_compiled_in(void)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9ec1aa9640ad..3c4c4c7a0402 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool,
struct page *page,
unsigned int dma_sync_size)
{
+ dma_addr_t dma_addr = page_pool_get_dma_addr(page);
+
dma_sync_size = min(dma_sync_size, pool->p.max_len);
- dma_sync_single_range_for_device(pool->p.dev, page->dma_addr,
+ dma_sync_single_range_for_device(pool->p.dev, dma_addr,
pool->p.offset, dma_sync_size,
pool->p.dma_dir);
}
@@ -195,7 +197,7 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
if (dma_mapping_error(pool->p.dev, dma))
return false;
- page->dma_addr = dma;
+ page_pool_set_dma_addr(page, dma);
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
@@ -331,13 +333,13 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
*/
goto skip_dma_unmap;
- dma = page->dma_addr;
+ dma = page_pool_get_dma_addr(page);
- /* When page is unmapped, it cannot be returned our pool */
+ /* When page is unmapped, it cannot be returned to our pool */
dma_unmap_page_attrs(pool->p.dev, dma,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC);
- page->dma_addr = 0;
+ page_pool_set_dma_addr(page, 0);
skip_dma_unmap:
/* This may be the last page returned, releasing the pool, so
* it is not safe to reference pool afterwards.
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 8ec7791bae1327b1c279c5cd6e929c3b12daaf0a Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe(a)ellerman.id.au>
Date: Thu, 6 May 2021 14:49:58 +1000
Subject: [PATCH] powerpc/64s: Fix crashes when toggling stf barrier
The STF (store-to-load forwarding) barrier mitigation can be
enabled/disabled at runtime via a debugfs file (stf_barrier), which
causes the kernel to patch itself to enable/disable the relevant
mitigations.
However depending on which mitigation we're using, it may not be safe to
do that patching while other CPUs are active. For example the following
crash:
User access of kernel address (c00000003fff5af0) - exploit attempt? (uid: 0)
segfault (11) at c00000003fff5af0 nip 7fff8ad12198 lr 7fff8ad121f8 code 1
code: 40820128 e93c00d0 e9290058 7c292840 40810058 38600000 4bfd9a81 e8410018
code: 2c030006 41810154 3860ffb6 e9210098 <e94d8ff0> 7d295279 39400000 40820a3c
Shows that we returned to userspace without restoring the user r13
value, due to executing the partially patched STF exit code.
Fix it by doing the patching under stop machine. The CPUs that aren't
doing the patching will be spinning in the core of the stop machine
logic. That is currently sufficient for our purposes, because none of
the patching we do is to that code or anywhere in the vicinity.
Fixes: a048a07d7f45 ("powerpc/64s: Add support for a store forwarding barrier at kernel entry/exit")
Cc: stable(a)vger.kernel.org # v4.17+
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20210506044959.1298123-1-mpe@ellerman.id.au
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 1fd31b4b0e13..10083add8b33 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -14,6 +14,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
#include <asm/cputable.h>
#include <asm/code-patching.h>
#include <asm/page.h>
@@ -227,11 +228,25 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
: "unknown");
}
+static int __do_stf_barrier_fixups(void *data)
+{
+ enum stf_barrier_type *types = data;
+
+ do_stf_entry_barrier_fixups(*types);
+ do_stf_exit_barrier_fixups(*types);
+
+ return 0;
+}
void do_stf_barrier_fixups(enum stf_barrier_type types)
{
- do_stf_entry_barrier_fixups(types);
- do_stf_exit_barrier_fixups(types);
+ /*
+ * The call to the fallback entry flush, and the fallback/sync-ori exit
+ * flush can not be safely patched in/out while other CPUs are executing
+ * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs
+ * spin in the stop machine core with interrupts hard disabled.
+ */
+ stop_machine(__do_stf_barrier_fixups, &types, NULL);
}
void do_uaccess_flush_fixups(enum l1d_flush_type types)
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 8ec7791bae1327b1c279c5cd6e929c3b12daaf0a Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe(a)ellerman.id.au>
Date: Thu, 6 May 2021 14:49:58 +1000
Subject: [PATCH] powerpc/64s: Fix crashes when toggling stf barrier
The STF (store-to-load forwarding) barrier mitigation can be
enabled/disabled at runtime via a debugfs file (stf_barrier), which
causes the kernel to patch itself to enable/disable the relevant
mitigations.
However depending on which mitigation we're using, it may not be safe to
do that patching while other CPUs are active. For example the following
crash:
User access of kernel address (c00000003fff5af0) - exploit attempt? (uid: 0)
segfault (11) at c00000003fff5af0 nip 7fff8ad12198 lr 7fff8ad121f8 code 1
code: 40820128 e93c00d0 e9290058 7c292840 40810058 38600000 4bfd9a81 e8410018
code: 2c030006 41810154 3860ffb6 e9210098 <e94d8ff0> 7d295279 39400000 40820a3c
Shows that we returned to userspace without restoring the user r13
value, due to executing the partially patched STF exit code.
Fix it by doing the patching under stop machine. The CPUs that aren't
doing the patching will be spinning in the core of the stop machine
logic. That is currently sufficient for our purposes, because none of
the patching we do is to that code or anywhere in the vicinity.
Fixes: a048a07d7f45 ("powerpc/64s: Add support for a store forwarding barrier at kernel entry/exit")
Cc: stable(a)vger.kernel.org # v4.17+
Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au>
Link: https://lore.kernel.org/r/20210506044959.1298123-1-mpe@ellerman.id.au
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 1fd31b4b0e13..10083add8b33 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -14,6 +14,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
#include <asm/cputable.h>
#include <asm/code-patching.h>
#include <asm/page.h>
@@ -227,11 +228,25 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
: "unknown");
}
+static int __do_stf_barrier_fixups(void *data)
+{
+ enum stf_barrier_type *types = data;
+
+ do_stf_entry_barrier_fixups(*types);
+ do_stf_exit_barrier_fixups(*types);
+
+ return 0;
+}
void do_stf_barrier_fixups(enum stf_barrier_type types)
{
- do_stf_entry_barrier_fixups(types);
- do_stf_exit_barrier_fixups(types);
+ /*
+ * The call to the fallback entry flush, and the fallback/sync-ori exit
+ * flush can not be safely patched in/out while other CPUs are executing
+ * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs
+ * spin in the stop machine core with interrupts hard disabled.
+ */
+ stop_machine(__do_stf_barrier_fixups, &types, NULL);
}
void do_uaccess_flush_fixups(enum l1d_flush_type types)
The patch below does not apply to the 5.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 447c19f3b5074409c794b350b10306e1da1ef4ba Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Fri, 14 May 2021 12:02:50 +0100
Subject: [PATCH] io_uring: fix ltout double free on completion race
Always remove linked timeout on io_link_timeout_fn() from the master
request link list, otherwise we may get use-after-free when first
io_link_timeout_fn() puts linked timeout in the fail path, and then
will be found and put on master's free.
Cc: stable(a)vger.kernel.org # 5.10+
Fixes: 90cd7e424969d ("io_uring: track link timeout's master explicitly")
Reported-and-tested-by: syzbot+5a864149dd970b546223(a)syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/69c46bf6ce37fec4fdcd98f0882e18eb07ce693a.16209901…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9ac5e278a91e..599102cc6dfc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6354,10 +6354,11 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
* We don't expect the list to be empty, that will only happen if we
* race with the completion of the linked work.
*/
- if (prev && req_ref_inc_not_zero(prev))
+ if (prev) {
io_remove_next_linked(prev);
- else
- prev = NULL;
+ if (!req_ref_inc_not_zero(prev))
+ prev = NULL;
+ }
spin_unlock_irqrestore(&ctx->completion_lock, flags);
if (prev) {
The patch below does not apply to the 5.11-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 447c19f3b5074409c794b350b10306e1da1ef4ba Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Fri, 14 May 2021 12:02:50 +0100
Subject: [PATCH] io_uring: fix ltout double free on completion race
Always remove linked timeout on io_link_timeout_fn() from the master
request link list, otherwise we may get use-after-free when first
io_link_timeout_fn() puts linked timeout in the fail path, and then
will be found and put on master's free.
Cc: stable(a)vger.kernel.org # 5.10+
Fixes: 90cd7e424969d ("io_uring: track link timeout's master explicitly")
Reported-and-tested-by: syzbot+5a864149dd970b546223(a)syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/69c46bf6ce37fec4fdcd98f0882e18eb07ce693a.16209901…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9ac5e278a91e..599102cc6dfc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6354,10 +6354,11 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
* We don't expect the list to be empty, that will only happen if we
* race with the completion of the linked work.
*/
- if (prev && req_ref_inc_not_zero(prev))
+ if (prev) {
io_remove_next_linked(prev);
- else
- prev = NULL;
+ if (!req_ref_inc_not_zero(prev))
+ prev = NULL;
+ }
spin_unlock_irqrestore(&ctx->completion_lock, flags);
if (prev) {
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 447c19f3b5074409c794b350b10306e1da1ef4ba Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Fri, 14 May 2021 12:02:50 +0100
Subject: [PATCH] io_uring: fix ltout double free on completion race
Always remove linked timeout on io_link_timeout_fn() from the master
request link list, otherwise we may get use-after-free when first
io_link_timeout_fn() puts linked timeout in the fail path, and then
will be found and put on master's free.
Cc: stable(a)vger.kernel.org # 5.10+
Fixes: 90cd7e424969d ("io_uring: track link timeout's master explicitly")
Reported-and-tested-by: syzbot+5a864149dd970b546223(a)syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/69c46bf6ce37fec4fdcd98f0882e18eb07ce693a.16209901…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9ac5e278a91e..599102cc6dfc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6354,10 +6354,11 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
* We don't expect the list to be empty, that will only happen if we
* race with the completion of the linked work.
*/
- if (prev && req_ref_inc_not_zero(prev))
+ if (prev) {
io_remove_next_linked(prev);
- else
- prev = NULL;
+ if (!req_ref_inc_not_zero(prev))
+ prev = NULL;
+ }
spin_unlock_irqrestore(&ctx->completion_lock, flags);
if (prev) {
The patch below does not apply to the 5.11-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a298232ee6b9a1d5d732aa497ff8be0d45b5bd82 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Fri, 7 May 2021 21:06:38 +0100
Subject: [PATCH] io_uring: fix link timeout refs
WARNING: CPU: 0 PID: 10242 at lib/refcount.c:28 refcount_warn_saturate+0x15b/0x1a0 lib/refcount.c:28
RIP: 0010:refcount_warn_saturate+0x15b/0x1a0 lib/refcount.c:28
Call Trace:
__refcount_sub_and_test include/linux/refcount.h:283 [inline]
__refcount_dec_and_test include/linux/refcount.h:315 [inline]
refcount_dec_and_test include/linux/refcount.h:333 [inline]
io_put_req fs/io_uring.c:2140 [inline]
io_queue_linked_timeout fs/io_uring.c:6300 [inline]
__io_queue_sqe+0xbef/0xec0 fs/io_uring.c:6354
io_submit_sqe fs/io_uring.c:6534 [inline]
io_submit_sqes+0x2bbd/0x7c50 fs/io_uring.c:6660
__do_sys_io_uring_enter fs/io_uring.c:9240 [inline]
__se_sys_io_uring_enter+0x256/0x1d60 fs/io_uring.c:9182
io_link_timeout_fn() should put only one reference of the linked timeout
request, however in case of racing with the master request's completion
first io_req_complete() puts one and then io_put_req_deferred() is
called.
Cc: stable(a)vger.kernel.org # 5.12+
Fixes: 9ae1f8dd372e0 ("io_uring: fix inconsistent lock state")
Reported-by: syzbot+a2910119328ce8e7996f(a)syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/ff51018ff29de5ffa76f09273ef48cb24c720368.16204176…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f46acbbeed57..9ac5e278a91e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6363,10 +6363,10 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
if (prev) {
io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
io_put_req_deferred(prev, 1);
+ io_put_req_deferred(req, 1);
} else {
io_req_complete_post(req, -ETIME, 0);
}
- io_put_req_deferred(req, 1);
return HRTIMER_NORESTART;
}
The patch below does not apply to the 5.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a298232ee6b9a1d5d732aa497ff8be0d45b5bd82 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Fri, 7 May 2021 21:06:38 +0100
Subject: [PATCH] io_uring: fix link timeout refs
WARNING: CPU: 0 PID: 10242 at lib/refcount.c:28 refcount_warn_saturate+0x15b/0x1a0 lib/refcount.c:28
RIP: 0010:refcount_warn_saturate+0x15b/0x1a0 lib/refcount.c:28
Call Trace:
__refcount_sub_and_test include/linux/refcount.h:283 [inline]
__refcount_dec_and_test include/linux/refcount.h:315 [inline]
refcount_dec_and_test include/linux/refcount.h:333 [inline]
io_put_req fs/io_uring.c:2140 [inline]
io_queue_linked_timeout fs/io_uring.c:6300 [inline]
__io_queue_sqe+0xbef/0xec0 fs/io_uring.c:6354
io_submit_sqe fs/io_uring.c:6534 [inline]
io_submit_sqes+0x2bbd/0x7c50 fs/io_uring.c:6660
__do_sys_io_uring_enter fs/io_uring.c:9240 [inline]
__se_sys_io_uring_enter+0x256/0x1d60 fs/io_uring.c:9182
io_link_timeout_fn() should put only one reference of the linked timeout
request, however in case of racing with the master request's completion
first io_req_complete() puts one and then io_put_req_deferred() is
called.
Cc: stable(a)vger.kernel.org # 5.12+
Fixes: 9ae1f8dd372e0 ("io_uring: fix inconsistent lock state")
Reported-by: syzbot+a2910119328ce8e7996f(a)syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/ff51018ff29de5ffa76f09273ef48cb24c720368.16204176…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f46acbbeed57..9ac5e278a91e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6363,10 +6363,10 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
if (prev) {
io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
io_put_req_deferred(prev, 1);
+ io_put_req_deferred(req, 1);
} else {
io_req_complete_post(req, -ETIME, 0);
}
- io_put_req_deferred(req, 1);
return HRTIMER_NORESTART;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From a298232ee6b9a1d5d732aa497ff8be0d45b5bd82 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence(a)gmail.com>
Date: Fri, 7 May 2021 21:06:38 +0100
Subject: [PATCH] io_uring: fix link timeout refs
WARNING: CPU: 0 PID: 10242 at lib/refcount.c:28 refcount_warn_saturate+0x15b/0x1a0 lib/refcount.c:28
RIP: 0010:refcount_warn_saturate+0x15b/0x1a0 lib/refcount.c:28
Call Trace:
__refcount_sub_and_test include/linux/refcount.h:283 [inline]
__refcount_dec_and_test include/linux/refcount.h:315 [inline]
refcount_dec_and_test include/linux/refcount.h:333 [inline]
io_put_req fs/io_uring.c:2140 [inline]
io_queue_linked_timeout fs/io_uring.c:6300 [inline]
__io_queue_sqe+0xbef/0xec0 fs/io_uring.c:6354
io_submit_sqe fs/io_uring.c:6534 [inline]
io_submit_sqes+0x2bbd/0x7c50 fs/io_uring.c:6660
__do_sys_io_uring_enter fs/io_uring.c:9240 [inline]
__se_sys_io_uring_enter+0x256/0x1d60 fs/io_uring.c:9182
io_link_timeout_fn() should put only one reference of the linked timeout
request, however in case of racing with the master request's completion
first io_req_complete() puts one and then io_put_req_deferred() is
called.
Cc: stable(a)vger.kernel.org # 5.12+
Fixes: 9ae1f8dd372e0 ("io_uring: fix inconsistent lock state")
Reported-by: syzbot+a2910119328ce8e7996f(a)syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Link: https://lore.kernel.org/r/ff51018ff29de5ffa76f09273ef48cb24c720368.16204176…
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f46acbbeed57..9ac5e278a91e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6363,10 +6363,10 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
if (prev) {
io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
io_put_req_deferred(prev, 1);
+ io_put_req_deferred(req, 1);
} else {
io_req_complete_post(req, -ETIME, 0);
}
- io_put_req_deferred(req, 1);
return HRTIMER_NORESTART;
}