The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2b88cba55883eaafbc9b7cbff0b2c7cdba71ed01 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet(a)google.com>
Date: Mon, 21 Feb 2022 19:21:13 -0800
Subject: [PATCH] net: preserve skb_end_offset() in skb_unclone_keeptruesize()
syzbot found another way to trigger the infamous WARN_ON_ONCE(delta < len)
in skb_try_coalesce() [1]
I was able to root cause the issue to kfence.
When kfence is in action, the following assertion is no longer true:
int size = xxxx;
void *ptr1 = kmalloc(size, gfp);
void *ptr2 = kmalloc(size, gfp);
if (ptr1 && ptr2)
ASSERT(ksize(ptr1) == ksize(ptr2));
We attempted to fix these issues in the blamed commits, but forgot
that TCP was possibly shifting data after skb_unclone_keeptruesize()
has been used, notably from tcp_retrans_try_collapse().
So we not only need to keep same skb->truesize value,
we also need to make sure TCP wont fill new tailroom
that pskb_expand_head() was able to get from a
addr = kmalloc(...) followed by ksize(addr)
Split skb_unclone_keeptruesize() into two parts:
1) Inline skb_unclone_keeptruesize() for the common case,
when skb is not cloned.
2) Out of line __skb_unclone_keeptruesize() for the 'slow path'.
WARNING: CPU: 1 PID: 6490 at net/core/skbuff.c:5295 skb_try_coalesce+0x1235/0x1560 net/core/skbuff.c:5295
Modules linked in:
CPU: 1 PID: 6490 Comm: syz-executor161 Not tainted 5.17.0-rc4-syzkaller-00229-g4f12b742eb2b #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:skb_try_coalesce+0x1235/0x1560 net/core/skbuff.c:5295
Code: bf 01 00 00 00 0f b7 c0 89 c6 89 44 24 20 e8 62 24 4e fa 8b 44 24 20 83 e8 01 0f 85 e5 f0 ff ff e9 87 f4 ff ff e8 cb 20 4e fa <0f> 0b e9 06 f9 ff ff e8 af b2 95 fa e9 69 f0 ff ff e8 95 b2 95 fa
RSP: 0018:ffffc900063af268 EFLAGS: 00010293
RAX: 0000000000000000 RBX: 00000000ffffffd5 RCX: 0000000000000000
RDX: ffff88806fc05700 RSI: ffffffff872abd55 RDI: 0000000000000003
RBP: ffff88806e675500 R08: 00000000ffffffd5 R09: 0000000000000000
R10: ffffffff872ab659 R11: 0000000000000000 R12: ffff88806dd554e8
R13: ffff88806dd9bac0 R14: ffff88806dd9a2c0 R15: 0000000000000155
FS: 00007f18014f9700(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020002000 CR3: 000000006be7a000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
tcp_try_coalesce net/ipv4/tcp_input.c:4651 [inline]
tcp_try_coalesce+0x393/0x920 net/ipv4/tcp_input.c:4630
tcp_queue_rcv+0x8a/0x6e0 net/ipv4/tcp_input.c:4914
tcp_data_queue+0x11fd/0x4bb0 net/ipv4/tcp_input.c:5025
tcp_rcv_established+0x81e/0x1ff0 net/ipv4/tcp_input.c:5947
tcp_v4_do_rcv+0x65e/0x980 net/ipv4/tcp_ipv4.c:1719
sk_backlog_rcv include/net/sock.h:1037 [inline]
__release_sock+0x134/0x3b0 net/core/sock.c:2779
release_sock+0x54/0x1b0 net/core/sock.c:3311
sk_wait_data+0x177/0x450 net/core/sock.c:2821
tcp_recvmsg_locked+0xe28/0x1fd0 net/ipv4/tcp.c:2457
tcp_recvmsg+0x137/0x610 net/ipv4/tcp.c:2572
inet_recvmsg+0x11b/0x5e0 net/ipv4/af_inet.c:850
sock_recvmsg_nosec net/socket.c:948 [inline]
sock_recvmsg net/socket.c:966 [inline]
sock_recvmsg net/socket.c:962 [inline]
____sys_recvmsg+0x2c4/0x600 net/socket.c:2632
___sys_recvmsg+0x127/0x200 net/socket.c:2674
__sys_recvmsg+0xe2/0x1a0 net/socket.c:2704
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
Fixes: c4777efa751d ("net: add and use skb_unclone_keeptruesize() helper")
Fixes: 097b9146c0e2 ("net: fix up truesize of cloned skb in skb_prepare_for_shift()")
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Cc: Marco Elver <elver(a)google.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 115be7f73487..31be38078918 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1795,19 +1795,19 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
return 0;
}
-/* This variant of skb_unclone() makes sure skb->truesize is not changed */
+/* This variant of skb_unclone() makes sure skb->truesize
+ * and skb_end_offset() are not changed, whenever a new skb->head is needed.
+ *
+ * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X))
+ * when various debugging features are in place.
+ */
+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri);
static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
{
might_sleep_if(gfpflags_allow_blocking(pri));
- if (skb_cloned(skb)) {
- unsigned int save = skb->truesize;
- int res;
-
- res = pskb_expand_head(skb, 0, 0, pri);
- skb->truesize = save;
- return res;
- }
+ if (skb_cloned(skb))
+ return __skb_unclone_keeptruesize(skb, pri);
return 0;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 27a2296241c9..725f2b356769 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1787,6 +1787,38 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
}
EXPORT_SYMBOL(skb_realloc_headroom);
+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
+{
+ unsigned int saved_end_offset, saved_truesize;
+ struct skb_shared_info *shinfo;
+ int res;
+
+ saved_end_offset = skb_end_offset(skb);
+ saved_truesize = skb->truesize;
+
+ res = pskb_expand_head(skb, 0, 0, pri);
+ if (res)
+ return res;
+
+ skb->truesize = saved_truesize;
+
+ if (likely(skb_end_offset(skb) == saved_end_offset))
+ return 0;
+
+ shinfo = skb_shinfo(skb);
+
+ /* We are about to change back skb->end,
+ * we need to move skb_shinfo() to its new location.
+ */
+ memmove(skb->head + saved_end_offset,
+ shinfo,
+ offsetof(struct skb_shared_info, frags[shinfo->nr_frags]));
+
+ skb_set_end_offset(skb, saved_end_offset);
+
+ return 0;
+}
+
/**
* skb_expand_head - reallocate header of &sk_buff
* @skb: buffer to reallocate
The patch below does not apply to the 5.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2b88cba55883eaafbc9b7cbff0b2c7cdba71ed01 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet(a)google.com>
Date: Mon, 21 Feb 2022 19:21:13 -0800
Subject: [PATCH] net: preserve skb_end_offset() in skb_unclone_keeptruesize()
syzbot found another way to trigger the infamous WARN_ON_ONCE(delta < len)
in skb_try_coalesce() [1]
I was able to root cause the issue to kfence.
When kfence is in action, the following assertion is no longer true:
int size = xxxx;
void *ptr1 = kmalloc(size, gfp);
void *ptr2 = kmalloc(size, gfp);
if (ptr1 && ptr2)
ASSERT(ksize(ptr1) == ksize(ptr2));
We attempted to fix these issues in the blamed commits, but forgot
that TCP was possibly shifting data after skb_unclone_keeptruesize()
has been used, notably from tcp_retrans_try_collapse().
So we not only need to keep same skb->truesize value,
we also need to make sure TCP wont fill new tailroom
that pskb_expand_head() was able to get from a
addr = kmalloc(...) followed by ksize(addr)
Split skb_unclone_keeptruesize() into two parts:
1) Inline skb_unclone_keeptruesize() for the common case,
when skb is not cloned.
2) Out of line __skb_unclone_keeptruesize() for the 'slow path'.
WARNING: CPU: 1 PID: 6490 at net/core/skbuff.c:5295 skb_try_coalesce+0x1235/0x1560 net/core/skbuff.c:5295
Modules linked in:
CPU: 1 PID: 6490 Comm: syz-executor161 Not tainted 5.17.0-rc4-syzkaller-00229-g4f12b742eb2b #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:skb_try_coalesce+0x1235/0x1560 net/core/skbuff.c:5295
Code: bf 01 00 00 00 0f b7 c0 89 c6 89 44 24 20 e8 62 24 4e fa 8b 44 24 20 83 e8 01 0f 85 e5 f0 ff ff e9 87 f4 ff ff e8 cb 20 4e fa <0f> 0b e9 06 f9 ff ff e8 af b2 95 fa e9 69 f0 ff ff e8 95 b2 95 fa
RSP: 0018:ffffc900063af268 EFLAGS: 00010293
RAX: 0000000000000000 RBX: 00000000ffffffd5 RCX: 0000000000000000
RDX: ffff88806fc05700 RSI: ffffffff872abd55 RDI: 0000000000000003
RBP: ffff88806e675500 R08: 00000000ffffffd5 R09: 0000000000000000
R10: ffffffff872ab659 R11: 0000000000000000 R12: ffff88806dd554e8
R13: ffff88806dd9bac0 R14: ffff88806dd9a2c0 R15: 0000000000000155
FS: 00007f18014f9700(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020002000 CR3: 000000006be7a000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
tcp_try_coalesce net/ipv4/tcp_input.c:4651 [inline]
tcp_try_coalesce+0x393/0x920 net/ipv4/tcp_input.c:4630
tcp_queue_rcv+0x8a/0x6e0 net/ipv4/tcp_input.c:4914
tcp_data_queue+0x11fd/0x4bb0 net/ipv4/tcp_input.c:5025
tcp_rcv_established+0x81e/0x1ff0 net/ipv4/tcp_input.c:5947
tcp_v4_do_rcv+0x65e/0x980 net/ipv4/tcp_ipv4.c:1719
sk_backlog_rcv include/net/sock.h:1037 [inline]
__release_sock+0x134/0x3b0 net/core/sock.c:2779
release_sock+0x54/0x1b0 net/core/sock.c:3311
sk_wait_data+0x177/0x450 net/core/sock.c:2821
tcp_recvmsg_locked+0xe28/0x1fd0 net/ipv4/tcp.c:2457
tcp_recvmsg+0x137/0x610 net/ipv4/tcp.c:2572
inet_recvmsg+0x11b/0x5e0 net/ipv4/af_inet.c:850
sock_recvmsg_nosec net/socket.c:948 [inline]
sock_recvmsg net/socket.c:966 [inline]
sock_recvmsg net/socket.c:962 [inline]
____sys_recvmsg+0x2c4/0x600 net/socket.c:2632
___sys_recvmsg+0x127/0x200 net/socket.c:2674
__sys_recvmsg+0xe2/0x1a0 net/socket.c:2704
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
Fixes: c4777efa751d ("net: add and use skb_unclone_keeptruesize() helper")
Fixes: 097b9146c0e2 ("net: fix up truesize of cloned skb in skb_prepare_for_shift()")
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Cc: Marco Elver <elver(a)google.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 115be7f73487..31be38078918 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1795,19 +1795,19 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
return 0;
}
-/* This variant of skb_unclone() makes sure skb->truesize is not changed */
+/* This variant of skb_unclone() makes sure skb->truesize
+ * and skb_end_offset() are not changed, whenever a new skb->head is needed.
+ *
+ * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X))
+ * when various debugging features are in place.
+ */
+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri);
static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
{
might_sleep_if(gfpflags_allow_blocking(pri));
- if (skb_cloned(skb)) {
- unsigned int save = skb->truesize;
- int res;
-
- res = pskb_expand_head(skb, 0, 0, pri);
- skb->truesize = save;
- return res;
- }
+ if (skb_cloned(skb))
+ return __skb_unclone_keeptruesize(skb, pri);
return 0;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 27a2296241c9..725f2b356769 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1787,6 +1787,38 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
}
EXPORT_SYMBOL(skb_realloc_headroom);
+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
+{
+ unsigned int saved_end_offset, saved_truesize;
+ struct skb_shared_info *shinfo;
+ int res;
+
+ saved_end_offset = skb_end_offset(skb);
+ saved_truesize = skb->truesize;
+
+ res = pskb_expand_head(skb, 0, 0, pri);
+ if (res)
+ return res;
+
+ skb->truesize = saved_truesize;
+
+ if (likely(skb_end_offset(skb) == saved_end_offset))
+ return 0;
+
+ shinfo = skb_shinfo(skb);
+
+ /* We are about to change back skb->end,
+ * we need to move skb_shinfo() to its new location.
+ */
+ memmove(skb->head + saved_end_offset,
+ shinfo,
+ offsetof(struct skb_shared_info, frags[shinfo->nr_frags]));
+
+ skb_set_end_offset(skb, saved_end_offset);
+
+ return 0;
+}
+
/**
* skb_expand_head - reallocate header of &sk_buff
* @skb: buffer to reallocate
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2b88cba55883eaafbc9b7cbff0b2c7cdba71ed01 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet(a)google.com>
Date: Mon, 21 Feb 2022 19:21:13 -0800
Subject: [PATCH] net: preserve skb_end_offset() in skb_unclone_keeptruesize()
syzbot found another way to trigger the infamous WARN_ON_ONCE(delta < len)
in skb_try_coalesce() [1]
I was able to root cause the issue to kfence.
When kfence is in action, the following assertion is no longer true:
int size = xxxx;
void *ptr1 = kmalloc(size, gfp);
void *ptr2 = kmalloc(size, gfp);
if (ptr1 && ptr2)
ASSERT(ksize(ptr1) == ksize(ptr2));
We attempted to fix these issues in the blamed commits, but forgot
that TCP was possibly shifting data after skb_unclone_keeptruesize()
has been used, notably from tcp_retrans_try_collapse().
So we not only need to keep same skb->truesize value,
we also need to make sure TCP wont fill new tailroom
that pskb_expand_head() was able to get from a
addr = kmalloc(...) followed by ksize(addr)
Split skb_unclone_keeptruesize() into two parts:
1) Inline skb_unclone_keeptruesize() for the common case,
when skb is not cloned.
2) Out of line __skb_unclone_keeptruesize() for the 'slow path'.
WARNING: CPU: 1 PID: 6490 at net/core/skbuff.c:5295 skb_try_coalesce+0x1235/0x1560 net/core/skbuff.c:5295
Modules linked in:
CPU: 1 PID: 6490 Comm: syz-executor161 Not tainted 5.17.0-rc4-syzkaller-00229-g4f12b742eb2b #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:skb_try_coalesce+0x1235/0x1560 net/core/skbuff.c:5295
Code: bf 01 00 00 00 0f b7 c0 89 c6 89 44 24 20 e8 62 24 4e fa 8b 44 24 20 83 e8 01 0f 85 e5 f0 ff ff e9 87 f4 ff ff e8 cb 20 4e fa <0f> 0b e9 06 f9 ff ff e8 af b2 95 fa e9 69 f0 ff ff e8 95 b2 95 fa
RSP: 0018:ffffc900063af268 EFLAGS: 00010293
RAX: 0000000000000000 RBX: 00000000ffffffd5 RCX: 0000000000000000
RDX: ffff88806fc05700 RSI: ffffffff872abd55 RDI: 0000000000000003
RBP: ffff88806e675500 R08: 00000000ffffffd5 R09: 0000000000000000
R10: ffffffff872ab659 R11: 0000000000000000 R12: ffff88806dd554e8
R13: ffff88806dd9bac0 R14: ffff88806dd9a2c0 R15: 0000000000000155
FS: 00007f18014f9700(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020002000 CR3: 000000006be7a000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
tcp_try_coalesce net/ipv4/tcp_input.c:4651 [inline]
tcp_try_coalesce+0x393/0x920 net/ipv4/tcp_input.c:4630
tcp_queue_rcv+0x8a/0x6e0 net/ipv4/tcp_input.c:4914
tcp_data_queue+0x11fd/0x4bb0 net/ipv4/tcp_input.c:5025
tcp_rcv_established+0x81e/0x1ff0 net/ipv4/tcp_input.c:5947
tcp_v4_do_rcv+0x65e/0x980 net/ipv4/tcp_ipv4.c:1719
sk_backlog_rcv include/net/sock.h:1037 [inline]
__release_sock+0x134/0x3b0 net/core/sock.c:2779
release_sock+0x54/0x1b0 net/core/sock.c:3311
sk_wait_data+0x177/0x450 net/core/sock.c:2821
tcp_recvmsg_locked+0xe28/0x1fd0 net/ipv4/tcp.c:2457
tcp_recvmsg+0x137/0x610 net/ipv4/tcp.c:2572
inet_recvmsg+0x11b/0x5e0 net/ipv4/af_inet.c:850
sock_recvmsg_nosec net/socket.c:948 [inline]
sock_recvmsg net/socket.c:966 [inline]
sock_recvmsg net/socket.c:962 [inline]
____sys_recvmsg+0x2c4/0x600 net/socket.c:2632
___sys_recvmsg+0x127/0x200 net/socket.c:2674
__sys_recvmsg+0xe2/0x1a0 net/socket.c:2704
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
Fixes: c4777efa751d ("net: add and use skb_unclone_keeptruesize() helper")
Fixes: 097b9146c0e2 ("net: fix up truesize of cloned skb in skb_prepare_for_shift()")
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Cc: Marco Elver <elver(a)google.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 115be7f73487..31be38078918 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1795,19 +1795,19 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
return 0;
}
-/* This variant of skb_unclone() makes sure skb->truesize is not changed */
+/* This variant of skb_unclone() makes sure skb->truesize
+ * and skb_end_offset() are not changed, whenever a new skb->head is needed.
+ *
+ * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X))
+ * when various debugging features are in place.
+ */
+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri);
static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
{
might_sleep_if(gfpflags_allow_blocking(pri));
- if (skb_cloned(skb)) {
- unsigned int save = skb->truesize;
- int res;
-
- res = pskb_expand_head(skb, 0, 0, pri);
- skb->truesize = save;
- return res;
- }
+ if (skb_cloned(skb))
+ return __skb_unclone_keeptruesize(skb, pri);
return 0;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 27a2296241c9..725f2b356769 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1787,6 +1787,38 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
}
EXPORT_SYMBOL(skb_realloc_headroom);
+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
+{
+ unsigned int saved_end_offset, saved_truesize;
+ struct skb_shared_info *shinfo;
+ int res;
+
+ saved_end_offset = skb_end_offset(skb);
+ saved_truesize = skb->truesize;
+
+ res = pskb_expand_head(skb, 0, 0, pri);
+ if (res)
+ return res;
+
+ skb->truesize = saved_truesize;
+
+ if (likely(skb_end_offset(skb) == saved_end_offset))
+ return 0;
+
+ shinfo = skb_shinfo(skb);
+
+ /* We are about to change back skb->end,
+ * we need to move skb_shinfo() to its new location.
+ */
+ memmove(skb->head + saved_end_offset,
+ shinfo,
+ offsetof(struct skb_shared_info, frags[shinfo->nr_frags]));
+
+ skb_set_end_offset(skb, saved_end_offset);
+
+ return 0;
+}
+
/**
* skb_expand_head - reallocate header of &sk_buff
* @skb: buffer to reallocate
The patch below does not apply to the 5.17-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2b88cba55883eaafbc9b7cbff0b2c7cdba71ed01 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet(a)google.com>
Date: Mon, 21 Feb 2022 19:21:13 -0800
Subject: [PATCH] net: preserve skb_end_offset() in skb_unclone_keeptruesize()
syzbot found another way to trigger the infamous WARN_ON_ONCE(delta < len)
in skb_try_coalesce() [1]
I was able to root cause the issue to kfence.
When kfence is in action, the following assertion is no longer true:
int size = xxxx;
void *ptr1 = kmalloc(size, gfp);
void *ptr2 = kmalloc(size, gfp);
if (ptr1 && ptr2)
ASSERT(ksize(ptr1) == ksize(ptr2));
We attempted to fix these issues in the blamed commits, but forgot
that TCP was possibly shifting data after skb_unclone_keeptruesize()
has been used, notably from tcp_retrans_try_collapse().
So we not only need to keep same skb->truesize value,
we also need to make sure TCP wont fill new tailroom
that pskb_expand_head() was able to get from a
addr = kmalloc(...) followed by ksize(addr)
Split skb_unclone_keeptruesize() into two parts:
1) Inline skb_unclone_keeptruesize() for the common case,
when skb is not cloned.
2) Out of line __skb_unclone_keeptruesize() for the 'slow path'.
WARNING: CPU: 1 PID: 6490 at net/core/skbuff.c:5295 skb_try_coalesce+0x1235/0x1560 net/core/skbuff.c:5295
Modules linked in:
CPU: 1 PID: 6490 Comm: syz-executor161 Not tainted 5.17.0-rc4-syzkaller-00229-g4f12b742eb2b #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:skb_try_coalesce+0x1235/0x1560 net/core/skbuff.c:5295
Code: bf 01 00 00 00 0f b7 c0 89 c6 89 44 24 20 e8 62 24 4e fa 8b 44 24 20 83 e8 01 0f 85 e5 f0 ff ff e9 87 f4 ff ff e8 cb 20 4e fa <0f> 0b e9 06 f9 ff ff e8 af b2 95 fa e9 69 f0 ff ff e8 95 b2 95 fa
RSP: 0018:ffffc900063af268 EFLAGS: 00010293
RAX: 0000000000000000 RBX: 00000000ffffffd5 RCX: 0000000000000000
RDX: ffff88806fc05700 RSI: ffffffff872abd55 RDI: 0000000000000003
RBP: ffff88806e675500 R08: 00000000ffffffd5 R09: 0000000000000000
R10: ffffffff872ab659 R11: 0000000000000000 R12: ffff88806dd554e8
R13: ffff88806dd9bac0 R14: ffff88806dd9a2c0 R15: 0000000000000155
FS: 00007f18014f9700(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020002000 CR3: 000000006be7a000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
tcp_try_coalesce net/ipv4/tcp_input.c:4651 [inline]
tcp_try_coalesce+0x393/0x920 net/ipv4/tcp_input.c:4630
tcp_queue_rcv+0x8a/0x6e0 net/ipv4/tcp_input.c:4914
tcp_data_queue+0x11fd/0x4bb0 net/ipv4/tcp_input.c:5025
tcp_rcv_established+0x81e/0x1ff0 net/ipv4/tcp_input.c:5947
tcp_v4_do_rcv+0x65e/0x980 net/ipv4/tcp_ipv4.c:1719
sk_backlog_rcv include/net/sock.h:1037 [inline]
__release_sock+0x134/0x3b0 net/core/sock.c:2779
release_sock+0x54/0x1b0 net/core/sock.c:3311
sk_wait_data+0x177/0x450 net/core/sock.c:2821
tcp_recvmsg_locked+0xe28/0x1fd0 net/ipv4/tcp.c:2457
tcp_recvmsg+0x137/0x610 net/ipv4/tcp.c:2572
inet_recvmsg+0x11b/0x5e0 net/ipv4/af_inet.c:850
sock_recvmsg_nosec net/socket.c:948 [inline]
sock_recvmsg net/socket.c:966 [inline]
sock_recvmsg net/socket.c:962 [inline]
____sys_recvmsg+0x2c4/0x600 net/socket.c:2632
___sys_recvmsg+0x127/0x200 net/socket.c:2674
__sys_recvmsg+0xe2/0x1a0 net/socket.c:2704
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x44/0xae
Fixes: c4777efa751d ("net: add and use skb_unclone_keeptruesize() helper")
Fixes: 097b9146c0e2 ("net: fix up truesize of cloned skb in skb_prepare_for_shift()")
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Cc: Marco Elver <elver(a)google.com>
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 115be7f73487..31be38078918 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1795,19 +1795,19 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
return 0;
}
-/* This variant of skb_unclone() makes sure skb->truesize is not changed */
+/* This variant of skb_unclone() makes sure skb->truesize
+ * and skb_end_offset() are not changed, whenever a new skb->head is needed.
+ *
+ * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X))
+ * when various debugging features are in place.
+ */
+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri);
static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
{
might_sleep_if(gfpflags_allow_blocking(pri));
- if (skb_cloned(skb)) {
- unsigned int save = skb->truesize;
- int res;
-
- res = pskb_expand_head(skb, 0, 0, pri);
- skb->truesize = save;
- return res;
- }
+ if (skb_cloned(skb))
+ return __skb_unclone_keeptruesize(skb, pri);
return 0;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 27a2296241c9..725f2b356769 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1787,6 +1787,38 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
}
EXPORT_SYMBOL(skb_realloc_headroom);
+int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
+{
+ unsigned int saved_end_offset, saved_truesize;
+ struct skb_shared_info *shinfo;
+ int res;
+
+ saved_end_offset = skb_end_offset(skb);
+ saved_truesize = skb->truesize;
+
+ res = pskb_expand_head(skb, 0, 0, pri);
+ if (res)
+ return res;
+
+ skb->truesize = saved_truesize;
+
+ if (likely(skb_end_offset(skb) == saved_end_offset))
+ return 0;
+
+ shinfo = skb_shinfo(skb);
+
+ /* We are about to change back skb->end,
+ * we need to move skb_shinfo() to its new location.
+ */
+ memmove(skb->head + saved_end_offset,
+ shinfo,
+ offsetof(struct skb_shared_info, frags[shinfo->nr_frags]));
+
+ skb_set_end_offset(skb, saved_end_offset);
+
+ return 0;
+}
+
/**
* skb_expand_head - reallocate header of &sk_buff
* @skb: buffer to reallocate
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7336905a89f19173bf9301cd50a24421162f417c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba(a)redhat.com>
Date: Fri, 10 Dec 2021 14:43:36 +0100
Subject: [PATCH] gfs2: gfs2_setattr_size error path fix
When gfs2_setattr_size() fails, it calls gfs2_rs_delete(ip, NULL) to get
rid of any reservations the inode may have. Instead, it should pass in
the inode's write count as the second parameter to allow
gfs2_rs_delete() to figure out if the inode has any writers left.
In a next step, there are two instances of gfs2_rs_delete(ip, NULL) left
where we know that there can be no other users of the inode. Replace
those with gfs2_rs_deltree(&ip->i_res) to avoid the unnecessary write
count check.
With that, gfs2_rs_delete() is only called with the inode's actual write
count, so get rid of the second parameter.
Fixes: a097dc7e24cb ("GFS2: Make rgrp reservations part of the gfs2_inode structure")
Signed-off-by: Andreas Gruenbacher <agruenba(a)redhat.com>
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index d67108489148..fbdb7a30470a 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -2146,7 +2146,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
ret = do_shrink(inode, newsize);
out:
- gfs2_rs_delete(ip, NULL);
+ gfs2_rs_delete(ip);
gfs2_qa_put(ip);
return ret;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 8c39a8571b1f..5bac4f6e8e05 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -706,7 +706,7 @@ static int gfs2_release(struct inode *inode, struct file *file)
if (file->f_mode & FMODE_WRITE) {
if (gfs2_rs_active(&ip->i_res))
- gfs2_rs_delete(ip, &inode->i_writecount);
+ gfs2_rs_delete(ip);
gfs2_qa_put(ip);
}
return 0;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 89905f4f29bb..66a123306aec 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -793,7 +793,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (free_vfs_inode) /* else evict will do the put for us */
gfs2_glock_put(ip->i_gl);
}
- gfs2_rs_delete(ip, NULL);
+ gfs2_rs_deltree(&ip->i_res);
gfs2_qa_put(ip);
fail_free_acls:
posix_acl_release(default_acl);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9b04a570c582..9df8a84f85a6 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -680,13 +680,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
/**
* gfs2_rs_delete - delete a multi-block reservation
* @ip: The inode for this reservation
- * @wcount: The inode's write count, or NULL
*
*/
-void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
+void gfs2_rs_delete(struct gfs2_inode *ip)
{
+ struct inode *inode = &ip->i_inode;
+
down_write(&ip->i_rw_mutex);
- if ((wcount == NULL) || (atomic_read(wcount) <= 1))
+ if (atomic_read(&inode->i_writecount) <= 1)
gfs2_rs_deltree(&ip->i_res);
up_write(&ip->i_rw_mutex);
}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 3e2ca1fb4305..46dd94e9e085 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
bool dinode, u64 *generation);
extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
-extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
+extern void gfs2_rs_delete(struct gfs2_inode *ip);
extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
u64 bstart, u32 blen, int meta);
extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 64c67090f503..143a47359d1b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1396,7 +1396,7 @@ static void gfs2_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
if (ip->i_qadata)
gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
- gfs2_rs_delete(ip, NULL);
+ gfs2_rs_deltree(&ip->i_res);
gfs2_ordered_del_inode(ip);
clear_inode(inode);
gfs2_dir_hash_inval(ip);
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7336905a89f19173bf9301cd50a24421162f417c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba(a)redhat.com>
Date: Fri, 10 Dec 2021 14:43:36 +0100
Subject: [PATCH] gfs2: gfs2_setattr_size error path fix
When gfs2_setattr_size() fails, it calls gfs2_rs_delete(ip, NULL) to get
rid of any reservations the inode may have. Instead, it should pass in
the inode's write count as the second parameter to allow
gfs2_rs_delete() to figure out if the inode has any writers left.
In a next step, there are two instances of gfs2_rs_delete(ip, NULL) left
where we know that there can be no other users of the inode. Replace
those with gfs2_rs_deltree(&ip->i_res) to avoid the unnecessary write
count check.
With that, gfs2_rs_delete() is only called with the inode's actual write
count, so get rid of the second parameter.
Fixes: a097dc7e24cb ("GFS2: Make rgrp reservations part of the gfs2_inode structure")
Signed-off-by: Andreas Gruenbacher <agruenba(a)redhat.com>
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index d67108489148..fbdb7a30470a 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -2146,7 +2146,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
ret = do_shrink(inode, newsize);
out:
- gfs2_rs_delete(ip, NULL);
+ gfs2_rs_delete(ip);
gfs2_qa_put(ip);
return ret;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 8c39a8571b1f..5bac4f6e8e05 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -706,7 +706,7 @@ static int gfs2_release(struct inode *inode, struct file *file)
if (file->f_mode & FMODE_WRITE) {
if (gfs2_rs_active(&ip->i_res))
- gfs2_rs_delete(ip, &inode->i_writecount);
+ gfs2_rs_delete(ip);
gfs2_qa_put(ip);
}
return 0;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 89905f4f29bb..66a123306aec 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -793,7 +793,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (free_vfs_inode) /* else evict will do the put for us */
gfs2_glock_put(ip->i_gl);
}
- gfs2_rs_delete(ip, NULL);
+ gfs2_rs_deltree(&ip->i_res);
gfs2_qa_put(ip);
fail_free_acls:
posix_acl_release(default_acl);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9b04a570c582..9df8a84f85a6 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -680,13 +680,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
/**
* gfs2_rs_delete - delete a multi-block reservation
* @ip: The inode for this reservation
- * @wcount: The inode's write count, or NULL
*
*/
-void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
+void gfs2_rs_delete(struct gfs2_inode *ip)
{
+ struct inode *inode = &ip->i_inode;
+
down_write(&ip->i_rw_mutex);
- if ((wcount == NULL) || (atomic_read(wcount) <= 1))
+ if (atomic_read(&inode->i_writecount) <= 1)
gfs2_rs_deltree(&ip->i_res);
up_write(&ip->i_rw_mutex);
}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 3e2ca1fb4305..46dd94e9e085 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
bool dinode, u64 *generation);
extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
-extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
+extern void gfs2_rs_delete(struct gfs2_inode *ip);
extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
u64 bstart, u32 blen, int meta);
extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 64c67090f503..143a47359d1b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1396,7 +1396,7 @@ static void gfs2_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
if (ip->i_qadata)
gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
- gfs2_rs_delete(ip, NULL);
+ gfs2_rs_deltree(&ip->i_res);
gfs2_ordered_del_inode(ip);
clear_inode(inode);
gfs2_dir_hash_inval(ip);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7336905a89f19173bf9301cd50a24421162f417c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba(a)redhat.com>
Date: Fri, 10 Dec 2021 14:43:36 +0100
Subject: [PATCH] gfs2: gfs2_setattr_size error path fix
When gfs2_setattr_size() fails, it calls gfs2_rs_delete(ip, NULL) to get
rid of any reservations the inode may have. Instead, it should pass in
the inode's write count as the second parameter to allow
gfs2_rs_delete() to figure out if the inode has any writers left.
In a next step, there are two instances of gfs2_rs_delete(ip, NULL) left
where we know that there can be no other users of the inode. Replace
those with gfs2_rs_deltree(&ip->i_res) to avoid the unnecessary write
count check.
With that, gfs2_rs_delete() is only called with the inode's actual write
count, so get rid of the second parameter.
Fixes: a097dc7e24cb ("GFS2: Make rgrp reservations part of the gfs2_inode structure")
Signed-off-by: Andreas Gruenbacher <agruenba(a)redhat.com>
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index d67108489148..fbdb7a30470a 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -2146,7 +2146,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
ret = do_shrink(inode, newsize);
out:
- gfs2_rs_delete(ip, NULL);
+ gfs2_rs_delete(ip);
gfs2_qa_put(ip);
return ret;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 8c39a8571b1f..5bac4f6e8e05 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -706,7 +706,7 @@ static int gfs2_release(struct inode *inode, struct file *file)
if (file->f_mode & FMODE_WRITE) {
if (gfs2_rs_active(&ip->i_res))
- gfs2_rs_delete(ip, &inode->i_writecount);
+ gfs2_rs_delete(ip);
gfs2_qa_put(ip);
}
return 0;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 89905f4f29bb..66a123306aec 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -793,7 +793,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (free_vfs_inode) /* else evict will do the put for us */
gfs2_glock_put(ip->i_gl);
}
- gfs2_rs_delete(ip, NULL);
+ gfs2_rs_deltree(&ip->i_res);
gfs2_qa_put(ip);
fail_free_acls:
posix_acl_release(default_acl);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9b04a570c582..9df8a84f85a6 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -680,13 +680,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
/**
* gfs2_rs_delete - delete a multi-block reservation
* @ip: The inode for this reservation
- * @wcount: The inode's write count, or NULL
*
*/
-void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
+void gfs2_rs_delete(struct gfs2_inode *ip)
{
+ struct inode *inode = &ip->i_inode;
+
down_write(&ip->i_rw_mutex);
- if ((wcount == NULL) || (atomic_read(wcount) <= 1))
+ if (atomic_read(&inode->i_writecount) <= 1)
gfs2_rs_deltree(&ip->i_res);
up_write(&ip->i_rw_mutex);
}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 3e2ca1fb4305..46dd94e9e085 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
bool dinode, u64 *generation);
extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
-extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
+extern void gfs2_rs_delete(struct gfs2_inode *ip);
extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
u64 bstart, u32 blen, int meta);
extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 64c67090f503..143a47359d1b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1396,7 +1396,7 @@ static void gfs2_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
if (ip->i_qadata)
gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
- gfs2_rs_delete(ip, NULL);
+ gfs2_rs_deltree(&ip->i_res);
gfs2_ordered_del_inode(ip);
clear_inode(inode);
gfs2_dir_hash_inval(ip);
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7336905a89f19173bf9301cd50a24421162f417c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba(a)redhat.com>
Date: Fri, 10 Dec 2021 14:43:36 +0100
Subject: [PATCH] gfs2: gfs2_setattr_size error path fix
When gfs2_setattr_size() fails, it calls gfs2_rs_delete(ip, NULL) to get
rid of any reservations the inode may have. Instead, it should pass in
the inode's write count as the second parameter to allow
gfs2_rs_delete() to figure out if the inode has any writers left.
In a next step, there are two instances of gfs2_rs_delete(ip, NULL) left
where we know that there can be no other users of the inode. Replace
those with gfs2_rs_deltree(&ip->i_res) to avoid the unnecessary write
count check.
With that, gfs2_rs_delete() is only called with the inode's actual write
count, so get rid of the second parameter.
Fixes: a097dc7e24cb ("GFS2: Make rgrp reservations part of the gfs2_inode structure")
Signed-off-by: Andreas Gruenbacher <agruenba(a)redhat.com>
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index d67108489148..fbdb7a30470a 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -2146,7 +2146,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
ret = do_shrink(inode, newsize);
out:
- gfs2_rs_delete(ip, NULL);
+ gfs2_rs_delete(ip);
gfs2_qa_put(ip);
return ret;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 8c39a8571b1f..5bac4f6e8e05 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -706,7 +706,7 @@ static int gfs2_release(struct inode *inode, struct file *file)
if (file->f_mode & FMODE_WRITE) {
if (gfs2_rs_active(&ip->i_res))
- gfs2_rs_delete(ip, &inode->i_writecount);
+ gfs2_rs_delete(ip);
gfs2_qa_put(ip);
}
return 0;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 89905f4f29bb..66a123306aec 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -793,7 +793,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (free_vfs_inode) /* else evict will do the put for us */
gfs2_glock_put(ip->i_gl);
}
- gfs2_rs_delete(ip, NULL);
+ gfs2_rs_deltree(&ip->i_res);
gfs2_qa_put(ip);
fail_free_acls:
posix_acl_release(default_acl);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9b04a570c582..9df8a84f85a6 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -680,13 +680,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
/**
* gfs2_rs_delete - delete a multi-block reservation
* @ip: The inode for this reservation
- * @wcount: The inode's write count, or NULL
*
*/
-void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
+void gfs2_rs_delete(struct gfs2_inode *ip)
{
+ struct inode *inode = &ip->i_inode;
+
down_write(&ip->i_rw_mutex);
- if ((wcount == NULL) || (atomic_read(wcount) <= 1))
+ if (atomic_read(&inode->i_writecount) <= 1)
gfs2_rs_deltree(&ip->i_res);
up_write(&ip->i_rw_mutex);
}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 3e2ca1fb4305..46dd94e9e085 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
bool dinode, u64 *generation);
extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
-extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
+extern void gfs2_rs_delete(struct gfs2_inode *ip);
extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
u64 bstart, u32 blen, int meta);
extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 64c67090f503..143a47359d1b 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1396,7 +1396,7 @@ static void gfs2_evict_inode(struct inode *inode)
truncate_inode_pages_final(&inode->i_data);
if (ip->i_qadata)
gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
- gfs2_rs_delete(ip, NULL);
+ gfs2_rs_deltree(&ip->i_res);
gfs2_ordered_del_inode(ip);
clear_inode(inode);
gfs2_dir_hash_inval(ip);