From: Eric Dumazet edumazet@google.com
This patch series is to backport upstream commit: 915d975b2ffa: net: deal with integer overflows in kmalloc_reserve()
patch 1-3/4 backport requires to apply patch 4/4 to fix roundup issue in kmalloc_reserve()
1/4 net: add SKB_HEAD_ALIGN() helper 2/4 net: remove osize variable in __alloc_skb() 3/4 net: factorize code in kmalloc_reserve() 4/4 net: deal with integer overflows in kmalloc_reserve()
include/linux/skbuff.h | 8 ++++++++ net/core/skbuff.c | 49 +++++++++++++++++++++---------------------------- 2 files changed, 29 insertions(+), 28 deletions(-)
From: Eric Dumazet edumazet@google.com
commit 115f1a5c42bdad9a9ea356fc0b4a39ec7537947f upstream.
We have many places using this expression:
SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
Use of SKB_HEAD_ALIGN() will allow to clean them.
Signed-off-by: Eric Dumazet edumazet@google.com Acked-by: Soheil Hassas Yeganeh soheil@google.com Acked-by: Paolo Abeni pabeni@redhat.com Reviewed-by: Alexander Duyck alexanderduyck@fb.com Signed-off-by: Jakub Kicinski kuba@kernel.org [Ajay: Regenerated the patch for v6.1.y] Signed-off-by: Ajay Kaher akaher@vmware.com --- include/linux/skbuff.h | 8 ++++++++ net/core/skbuff.c | 18 ++++++------------ 2 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cc5ed2c..2feee14 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -261,6 +261,14 @@ #define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES) #define SKB_WITH_OVERHEAD(X) \ ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + +/* For X bytes available in skb->head, what is the minimal + * allocation needed, knowing struct skb_shared_info needs + * to be aligned. + */ +#define SKB_HEAD_ALIGN(X) (SKB_DATA_ALIGN(X) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + #define SKB_MAX_ORDER(X, ORDER) \ SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X)) #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 24bf4aa..4aea8f5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -504,8 +504,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, * aligned memory blocks, unless SLUB/SLAB debug is enabled. * Both skb->head and skb_shared_info are cache line aligned. */ - size = SKB_DATA_ALIGN(size); - size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + size = SKB_HEAD_ALIGN(size); osize = kmalloc_size_roundup(size); data = kmalloc_reserve(osize, gfp_mask, node, &pfmemalloc); if (unlikely(!data)) @@ -578,8 +577,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, goto skb_success; }
- len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - len = SKB_DATA_ALIGN(len); + len = SKB_HEAD_ALIGN(len);
if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; @@ -678,8 +676,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, data = page_frag_alloc_1k(&nc->page_small, gfp_mask); pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small); } else { - len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - len = SKB_DATA_ALIGN(len); + len = SKB_HEAD_ALIGN(len);
data = page_frag_alloc(&nc->page, len, gfp_mask); pfmemalloc = nc->page.pfmemalloc; @@ -1837,8 +1834,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC;
- size = SKB_DATA_ALIGN(size); - size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + size = SKB_HEAD_ALIGN(size); size = kmalloc_size_roundup(size); data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL); if (!data) @@ -6204,8 +6200,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC;
- size = SKB_DATA_ALIGN(size); - size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + size = SKB_HEAD_ALIGN(size); size = kmalloc_size_roundup(size); data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL); if (!data) @@ -6323,8 +6318,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC;
- size = SKB_DATA_ALIGN(size); - size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + size = SKB_HEAD_ALIGN(size); size = kmalloc_size_roundup(size); data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL); if (!data)
From: Eric Dumazet edumazet@google.com
commit 65998d2bf857b9ae5acc1f3b70892bd1b429ccab upstream.
This is a cleanup patch, to prepare following change.
Signed-off-by: Eric Dumazet edumazet@google.com Acked-by: Soheil Hassas Yeganeh soheil@google.com Acked-by: Paolo Abeni pabeni@redhat.com Reviewed-by: Alexander Duyck alexanderduyck@fb.com Signed-off-by: Jakub Kicinski kuba@kernel.org [Ajay: Regenerated the patch for v6.1.y] Signed-off-by: Ajay Kaher akaher@vmware.com --- net/core/skbuff.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4aea8f5..1c059b6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -479,7 +479,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, { struct kmem_cache *cache; struct sk_buff *skb; - unsigned int osize; bool pfmemalloc; u8 *data;
@@ -505,16 +504,15 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, * Both skb->head and skb_shared_info are cache line aligned. */ size = SKB_HEAD_ALIGN(size); - osize = kmalloc_size_roundup(size); - data = kmalloc_reserve(osize, gfp_mask, node, &pfmemalloc); + size = kmalloc_size_roundup(size); + data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); if (unlikely(!data)) goto nodata; /* kmalloc_size_roundup() might give us more room than requested. * Put skb_shared_info exactly at the end of allocated zone, * to allow max possible filling before reallocation. */ - size = SKB_WITH_OVERHEAD(osize); - prefetchw(data + size); + prefetchw(data + SKB_WITH_OVERHEAD(size));
/* * Only clear those fields we need to clear, not those that we will @@ -522,7 +520,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); - __build_skb_around(skb, data, osize); + __build_skb_around(skb, data, size); skb->pfmemalloc = pfmemalloc;
if (flags & SKB_ALLOC_FCLONE) {
From: Eric Dumazet edumazet@google.com
commit 5c0e820cbbbe2d1c4cea5cd2bfc1302c123436df upstream.
All kmalloc_reserve() callers have to make the same computation, we can factorize them, to prepare following patch in the series.
Signed-off-by: Eric Dumazet edumazet@google.com Acked-by: Soheil Hassas Yeganeh soheil@google.com Acked-by: Paolo Abeni pabeni@redhat.com Reviewed-by: Alexander Duyck alexanderduyck@fb.com Signed-off-by: Jakub Kicinski kuba@kernel.org [Ajay: Regenerated the patch for v6.1.y] Signed-off-by: Ajay Kaher akaher@vmware.com --- net/core/skbuff.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1c059b6..fb8d100 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -424,17 +424,20 @@ EXPORT_SYMBOL(napi_build_skb); * may be used. Otherwise, the packet data may be discarded until enough * memory is free */ -static void *kmalloc_reserve(size_t size, gfp_t flags, int node, +static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, bool *pfmemalloc) { - void *obj; bool ret_pfmemalloc = false; + unsigned int obj_size; + void *obj;
+ obj_size = SKB_HEAD_ALIGN(*size); + *size = obj_size = kmalloc_size_roundup(obj_size); /* * Try a regular allocation, when that fails and we're not entitled * to the reserves, fail. */ - obj = kmalloc_node_track_caller(size, + obj = kmalloc_node_track_caller(obj_size, flags | __GFP_NOMEMALLOC | __GFP_NOWARN, node); if (obj || !(gfp_pfmemalloc_allowed(flags))) @@ -442,7 +445,7 @@ static void *kmalloc_reserve(size_t size, gfp_t flags, int node,
/* Try again but now we are using pfmemalloc reserves */ ret_pfmemalloc = true; - obj = kmalloc_node_track_caller(size, flags, node); + obj = kmalloc_node_track_caller(obj_size, flags, node);
out: if (pfmemalloc) @@ -503,9 +506,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, * aligned memory blocks, unless SLUB/SLAB debug is enabled. * Both skb->head and skb_shared_info are cache line aligned. */ - size = SKB_HEAD_ALIGN(size); - size = kmalloc_size_roundup(size); - data = kmalloc_reserve(size, gfp_mask, node, &pfmemalloc); + data = kmalloc_reserve(&size, gfp_mask, node, &pfmemalloc); if (unlikely(!data)) goto nodata; /* kmalloc_size_roundup() might give us more room than requested. @@ -1832,9 +1833,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC;
- size = SKB_HEAD_ALIGN(size); - size = kmalloc_size_roundup(size); - data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL); + data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL); if (!data) goto nodata; size = SKB_WITH_OVERHEAD(size); @@ -6198,9 +6197,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC;
- size = SKB_HEAD_ALIGN(size); - size = kmalloc_size_roundup(size); - data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL); + data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL); if (!data) return -ENOMEM; size = SKB_WITH_OVERHEAD(size); @@ -6316,9 +6313,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, if (skb_pfmemalloc(skb)) gfp_mask |= __GFP_MEMALLOC;
- size = SKB_HEAD_ALIGN(size); - size = kmalloc_size_roundup(size); - data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL); + data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL); if (!data) return -ENOMEM; size = SKB_WITH_OVERHEAD(size);
From: Eric Dumazet edumazet@google.com
commit 915d975b2ffa58a14bfcf16fafe00c41315949ff upstream.
Blamed commit changed: ptr = kmalloc(size); if (ptr) size = ksize(ptr);
to: size = kmalloc_size_roundup(size); ptr = kmalloc(size);
This allowed various crash as reported by syzbot [1] and Kyle Zeng.
Problem is that if @size is bigger than 0x80000001, kmalloc_size_roundup(size) returns 2^32.
kmalloc_reserve() uses a 32bit variable (obj_size), so 2^32 is truncated to 0.
kmalloc(0) returns ZERO_SIZE_PTR which is not handled by skb allocations.
Following trace can be triggered if a netdev->mtu is set close to 0x7fffffff
We might in the future limit netdev->mtu to more sensible limit (like KMALLOC_MAX_SIZE).
This patch is based on a syzbot report, and also a report and tentative fix from Kyle Zeng.
[1] BUG: KASAN: user-memory-access in __build_skb_around net/core/skbuff.c:294 [inline] BUG: KASAN: user-memory-access in __alloc_skb+0x3c4/0x6e8 net/core/skbuff.c:527 Write of size 32 at addr 00000000fffffd10 by task syz-executor.4/22554
CPU: 1 PID: 22554 Comm: syz-executor.4 Not tainted 6.1.39-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/03/2023 Call trace: dump_backtrace+0x1c8/0x1f4 arch/arm64/kernel/stacktrace.c:279 show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:286 __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x120/0x1a0 lib/dump_stack.c:106 print_report+0xe4/0x4b4 mm/kasan/report.c:398 kasan_report+0x150/0x1ac mm/kasan/report.c:495 kasan_check_range+0x264/0x2a4 mm/kasan/generic.c:189 memset+0x40/0x70 mm/kasan/shadow.c:44 __build_skb_around net/core/skbuff.c:294 [inline] __alloc_skb+0x3c4/0x6e8 net/core/skbuff.c:527 alloc_skb include/linux/skbuff.h:1316 [inline] igmpv3_newpack+0x104/0x1088 net/ipv4/igmp.c:359 add_grec+0x81c/0x1124 net/ipv4/igmp.c:534 igmpv3_send_cr net/ipv4/igmp.c:667 [inline] igmp_ifc_timer_expire+0x1b0/0x1008 net/ipv4/igmp.c:810 call_timer_fn+0x1c0/0x9f0 kernel/time/timer.c:1474 expire_timers kernel/time/timer.c:1519 [inline] __run_timers+0x54c/0x710 kernel/time/timer.c:1790 run_timer_softirq+0x28/0x4c kernel/time/timer.c:1803 _stext+0x380/0xfbc ____do_softirq+0x14/0x20 arch/arm64/kernel/irq.c:79 call_on_irq_stack+0x24/0x4c arch/arm64/kernel/entry.S:891 do_softirq_own_stack+0x20/0x2c arch/arm64/kernel/irq.c:84 invoke_softirq kernel/softirq.c:437 [inline] __irq_exit_rcu+0x1c0/0x4cc kernel/softirq.c:683 irq_exit_rcu+0x14/0x78 kernel/softirq.c:695 el0_interrupt+0x7c/0x2e0 arch/arm64/kernel/entry-common.c:717 __el0_irq_handler_common+0x18/0x24 arch/arm64/kernel/entry-common.c:724 el0t_64_irq_handler+0x10/0x1c arch/arm64/kernel/entry-common.c:729 el0t_64_irq+0x1a0/0x1a4 arch/arm64/kernel/entry.S:584
Fixes: 12d6c1d3a2ad ("skbuff: Proactively round up to kmalloc bucket size") Reported-by: syzbot syzkaller@googlegroups.com Reported-by: Kyle Zeng zengyhkyle@gmail.com Signed-off-by: Eric Dumazet edumazet@google.com Cc: Kees Cook keescook@chromium.org Cc: Vlastimil Babka vbabka@suse.cz Signed-off-by: David S. Miller davem@davemloft.net [Ajay: Regenerated the patch for v6.1.y] Signed-off-by: Ajay Kaher akaher@vmware.com --- net/core/skbuff.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fb8d100..8dca4a7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -428,11 +428,17 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node, bool *pfmemalloc) { bool ret_pfmemalloc = false; - unsigned int obj_size; + size_t obj_size; void *obj;
obj_size = SKB_HEAD_ALIGN(*size); - *size = obj_size = kmalloc_size_roundup(obj_size); + + obj_size = kmalloc_size_roundup(obj_size); + /* The following cast might truncate high-order bits of obj_size, this + * is harmless because kmalloc(obj_size >= 2^32) will fail anyway. + */ + *size = (unsigned int)obj_size; + /* * Try a regular allocation, when that fails and we're not entitled * to the reserves, fail.
On Fri, Sep 15, 2023 at 11:51:01PM +0530, Ajay Kaher wrote:
From: Eric Dumazet edumazet@google.com
This patch series is to backport upstream commit: 915d975b2ffa: net: deal with integer overflows in kmalloc_reserve()
patch 1-3/4 backport requires to apply patch 4/4 to fix roundup issue in kmalloc_reserve()
Thanks so much for these backports. I attempted it but couldn't figure it out myself.
all now queued up,
greg k-h
linux-stable-mirror@lists.linaro.org