This is a note to let you know that I've just added the patch titled
gso: fix payload length when gso_size is zero
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
gso-fix-payload-length-when-gso_size-is-zero.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Nov 15 17:24:03 CET 2017
From: Alexey Kodanev <alexey.kodanev(a)oracle.com>
Date: Fri, 6 Oct 2017 19:02:35 +0300
Subject: gso: fix payload length when gso_size is zero
From: Alexey Kodanev <alexey.kodanev(a)oracle.com>
[ Upstream commit 3d0241d57c7b25bb75ac9d7a62753642264fdbce ]
When gso_size reset to zero for the tail segment in skb_segment(), later
in ipv6_gso_segment(), __skb_udp_tunnel_segment() and gre_gso_segment()
we will get incorrect results (payload length, pcsum) for that segment.
inet_gso_segment() already has a check for gso_size before calculating
payload.
The issue was found with LTP vxlan & gre tests over ixgbe NIC.
Fixes: 07b26c9454a2 ("gso: Support partial splitting at the frag_list pointer")
Signed-off-by: Alexey Kodanev <alexey.kodanev(a)oracle.com>
Acked-by: Alexander Duyck <alexander.h.duyck(a)intel.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/ipv4/gre_offload.c | 2 +-
net/ipv4/udp_offload.c | 2 +-
net/ipv6/ip6_offload.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -98,7 +98,7 @@ static struct sk_buff *gre_gso_segment(s
greh = (struct gre_base_hdr *)skb_transport_header(skb);
pcsum = (__sum16 *)(greh + 1);
- if (gso_partial) {
+ if (gso_partial && skb_is_gso(skb)) {
unsigned int partial_adj;
/* Adjust checksum to account for the fact that
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -122,7 +122,7 @@ static struct sk_buff *__skb_udp_tunnel_
* will be using a length value equal to only one MSS sized
* segment instead of the entire frame.
*/
- if (gso_partial) {
+ if (gso_partial && skb_is_gso(skb)) {
uh->len = htons(skb_shinfo(skb)->gso_size +
SKB_GSO_CB(skb)->data_offset +
skb->head - (unsigned char *)uh);
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -105,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment(
for (skb = segs; skb; skb = skb->next) {
ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
- if (gso_partial)
+ if (gso_partial && skb_is_gso(skb))
payload_len = skb_shinfo(skb)->gso_size +
SKB_GSO_CB(skb)->data_offset +
skb->head - (unsigned char *)(ipv6h + 1);
Patches currently in stable-queue which might be from alexey.kodanev(a)oracle.com are
queue-4.9/gso-fix-payload-length-when-gso_size-is-zero.patch
This is a note to let you know that I've just added the patch titled
tun/tap: sanitize TUNSETSNDBUF input
to the 4.13-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tun-tap-sanitize-tunsetsndbuf-input.patch
and it can be found in the queue-4.13 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Nov 15 17:25:34 CET 2017
From: Craig Gallek <kraig(a)google.com>
Date: Mon, 30 Oct 2017 18:50:11 -0400
Subject: tun/tap: sanitize TUNSETSNDBUF input
From: Craig Gallek <kraig(a)google.com>
[ Upstream commit 93161922c658c714715686cd0cf69b090cb9bf1d ]
Syzkaller found several variants of the lockup below by setting negative
values with the TUNSETSNDBUF ioctl. This patch adds a sanity check
to both the tun and tap versions of this ioctl.
watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [repro:2389]
Modules linked in:
irq event stamp: 329692056
hardirqs last enabled at (329692055): [<ffffffff824b8381>] _raw_spin_unlock_irqrestore+0x31/0x75
hardirqs last disabled at (329692056): [<ffffffff824b9e58>] apic_timer_interrupt+0x98/0xb0
softirqs last enabled at (35659740): [<ffffffff824bc958>] __do_softirq+0x328/0x48c
softirqs last disabled at (35659731): [<ffffffff811c796c>] irq_exit+0xbc/0xd0
CPU: 0 PID: 2389 Comm: repro Not tainted 4.14.0-rc7 #23
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
task: ffff880009452140 task.stack: ffff880006a20000
RIP: 0010:_raw_spin_lock_irqsave+0x11/0x80
RSP: 0018:ffff880006a27c50 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff10
RAX: ffff880009ac68d0 RBX: ffff880006a27ce0 RCX: 0000000000000000
RDX: 0000000000000001 RSI: ffff880006a27ce0 RDI: ffff880009ac6900
RBP: ffff880006a27c60 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000001 R11: 000000000063ff00 R12: ffff880009ac6900
R13: ffff880006a27cf8 R14: 0000000000000001 R15: ffff880006a27cf8
FS: 00007f4be4838700(0000) GS:ffff88000cc00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020101000 CR3: 0000000009616000 CR4: 00000000000006f0
Call Trace:
prepare_to_wait+0x26/0xc0
sock_alloc_send_pskb+0x14e/0x270
? remove_wait_queue+0x60/0x60
tun_get_user+0x2cc/0x19d0
? __tun_get+0x60/0x1b0
tun_chr_write_iter+0x57/0x86
__vfs_write+0x156/0x1e0
vfs_write+0xf7/0x230
SyS_write+0x57/0xd0
entry_SYSCALL_64_fastpath+0x1f/0xbe
RIP: 0033:0x7f4be4356df9
RSP: 002b:00007ffc18101c08 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f4be4356df9
RDX: 0000000000000046 RSI: 0000000020101000 RDI: 0000000000000005
RBP: 00007ffc18101c40 R08: 0000000000000001 R09: 0000000000000001
R10: 0000000000000001 R11: 0000000000000293 R12: 0000559c75f64780
R13: 00007ffc18101d30 R14: 0000000000000000 R15: 0000000000000000
Fixes: 33dccbb050bb ("tun: Limit amount of queued packets per device")
Fixes: 20d29d7a916a ("net: macvtap driver")
Signed-off-by: Craig Gallek <kraig(a)google.com>
Reviewed-by: Eric Dumazet <edumazet(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/tap.c | 2 ++
drivers/net/tun.c | 4 ++++
2 files changed, 6 insertions(+)
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -1035,6 +1035,8 @@ static long tap_ioctl(struct file *file,
case TUNSETSNDBUF:
if (get_user(s, sp))
return -EFAULT;
+ if (s <= 0)
+ return -EINVAL;
q->sk.sk_sndbuf = s;
return 0;
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2219,6 +2219,10 @@ static long __tun_chr_ioctl(struct file
ret = -EFAULT;
break;
}
+ if (sndbuf <= 0) {
+ ret = -EINVAL;
+ break;
+ }
tun->sndbuf = sndbuf;
tun_set_sndbuf(tun);
Patches currently in stable-queue which might be from kraig(a)google.com are
queue-4.13/soreuseport-fix-initialization-race.patch
queue-4.13/tun-tap-sanitize-tunsetsndbuf-input.patch
This is a note to let you know that I've just added the patch titled
tun: call dev_get_valid_name() before register_netdevice()
to the 4.13-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tun-call-dev_get_valid_name-before-register_netdevice.patch
and it can be found in the queue-4.13 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Nov 15 17:25:34 CET 2017
From: Cong Wang <xiyou.wangcong(a)gmail.com>
Date: Fri, 13 Oct 2017 11:58:53 -0700
Subject: tun: call dev_get_valid_name() before register_netdevice()
From: Cong Wang <xiyou.wangcong(a)gmail.com>
[ Upstream commit 0ad646c81b2182f7fa67ec0c8c825e0ee165696d ]
register_netdevice() could fail early when we have an invalid
dev name, in which case ->ndo_uninit() is not called. For tun
device, this is a problem because a timer etc. are already
initialized and it expects ->ndo_uninit() to clean them up.
We could move these initializations into a ->ndo_init() so
that register_netdevice() knows better, however this is still
complicated due to the logic in tun_detach().
Therefore, I choose to just call dev_get_valid_name() before
register_netdevice(), which is quicker and much easier to audit.
And for this specific case, it is already enough.
Fixes: 96442e42429e ("tuntap: choose the txq based on rxq")
Reported-by: Dmitry Alexeev <avekceeb(a)gmail.com>
Cc: Jason Wang <jasowang(a)redhat.com>
Cc: "Michael S. Tsirkin" <mst(a)redhat.com>
Signed-off-by: Cong Wang <xiyou.wangcong(a)gmail.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/tun.c | 3 +++
include/linux/netdevice.h | 3 +++
net/core/dev.c | 6 +++---
3 files changed, 9 insertions(+), 3 deletions(-)
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1813,6 +1813,9 @@ static int tun_set_iff(struct net *net,
if (!dev)
return -ENOMEM;
+ err = dev_get_valid_name(net, dev, name);
+ if (err)
+ goto err_free_dev;
dev_net_set(dev, net);
dev->rtnl_link_ops = &tun_link_ops;
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3702,6 +3702,9 @@ struct net_device *alloc_netdev_mqs(int
unsigned char name_assign_type,
void (*setup)(struct net_device *),
unsigned int txqs, unsigned int rxqs);
+int dev_get_valid_name(struct net *net, struct net_device *dev,
+ const char *name);
+
#define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \
alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1146,9 +1146,8 @@ static int dev_alloc_name_ns(struct net
return ret;
}
-static int dev_get_valid_name(struct net *net,
- struct net_device *dev,
- const char *name)
+int dev_get_valid_name(struct net *net, struct net_device *dev,
+ const char *name)
{
BUG_ON(!net);
@@ -1164,6 +1163,7 @@ static int dev_get_valid_name(struct net
return 0;
}
+EXPORT_SYMBOL(dev_get_valid_name);
/**
* dev_change_name - change name of a device
Patches currently in stable-queue which might be from xiyou.wangcong(a)gmail.com are
queue-4.13/tun-call-dev_get_valid_name-before-register_netdevice.patch
queue-4.13/net_sched-avoid-matching-qdisc-with-zero-handle.patch
queue-4.13/tun-allow-positive-return-values-on-dev_get_valid_name-call.patch
This is a note to let you know that I've just added the patch titled
tun: allow positive return values on dev_get_valid_name() call
to the 4.13-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tun-allow-positive-return-values-on-dev_get_valid_name-call.patch
and it can be found in the queue-4.13 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Nov 15 17:25:34 CET 2017
From: Julien Gomes <julien(a)arista.com>
Date: Wed, 25 Oct 2017 11:50:50 -0700
Subject: tun: allow positive return values on dev_get_valid_name() call
From: Julien Gomes <julien(a)arista.com>
[ Upstream commit 5c25f65fd1e42685f7ccd80e0621829c105785d9 ]
If the name argument of dev_get_valid_name() contains "%d", it will try
to assign it a unit number in __dev__alloc_name() and return either the
unit number (>= 0) or an error code (< 0).
Considering positive values as error values prevent tun device creations
relying this mechanism, therefor we should only consider negative values
as errors here.
Signed-off-by: Julien Gomes <julien(a)arista.com>
Acked-by: Cong Wang <xiyou.wangcong(a)gmail.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/tun.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1814,7 +1814,7 @@ static int tun_set_iff(struct net *net,
if (!dev)
return -ENOMEM;
err = dev_get_valid_name(net, dev, name);
- if (err)
+ if (err < 0)
goto err_free_dev;
dev_net_set(dev, net);
Patches currently in stable-queue which might be from julien(a)arista.com are
queue-4.13/tun-allow-positive-return-values-on-dev_get_valid_name-call.patch
This is a note to let you know that I've just added the patch titled
tcp: refresh tp timestamp before tcp_mtu_probe()
to the 4.13-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch
and it can be found in the queue-4.13 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Nov 15 17:25:34 CET 2017
From: Eric Dumazet <edumazet(a)google.com>
Date: Thu, 26 Oct 2017 21:21:40 -0700
Subject: tcp: refresh tp timestamp before tcp_mtu_probe()
From: Eric Dumazet <edumazet(a)google.com>
[ Upstream commit ee1836aec4f5a977c1699a311db4d9027ef21ac8 ]
In the unlikely event tcp_mtu_probe() is sending a packet, we
want tp->tcp_mstamp being as accurate as possible.
This means we need to call tcp_mstamp_refresh() a bit earlier in
tcp_write_xmit().
Fixes: 385e20706fac ("tcp: use tp->tcp_mstamp in output path")
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/ipv4/tcp_output.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2271,6 +2271,7 @@ static bool tcp_write_xmit(struct sock *
sent_pkts = 0;
+ tcp_mstamp_refresh(tp);
if (!push_one) {
/* Do MTU probing. */
result = tcp_mtu_probe(sk);
@@ -2282,7 +2283,6 @@ static bool tcp_write_xmit(struct sock *
}
max_segs = tcp_tso_segs(sk, mss_now);
- tcp_mstamp_refresh(tp);
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
Patches currently in stable-queue which might be from edumazet(a)google.com are
queue-4.13/tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch
queue-4.13/net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch
queue-4.13/tcp-dccp-fix-ireq-opt-races.patch
queue-4.13/tcp-fix-tcp_mtu_probe-vs-highest_sack.patch
queue-4.13/ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch
queue-4.13/ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch
queue-4.13/packet-avoid-panic-in-packet_getsockopt.patch
queue-4.13/sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch
queue-4.13/net_sched-avoid-matching-qdisc-with-zero-handle.patch
queue-4.13/tun-tap-sanitize-tunsetsndbuf-input.patch
queue-4.13/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch
queue-4.13/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch
This is a note to let you know that I've just added the patch titled
tcp: fix tcp_mtu_probe() vs highest_sack
to the 4.13-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tcp-fix-tcp_mtu_probe-vs-highest_sack.patch
and it can be found in the queue-4.13 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Nov 15 17:25:34 CET 2017
From: Eric Dumazet <edumazet(a)google.com>
Date: Mon, 30 Oct 2017 23:08:20 -0700
Subject: tcp: fix tcp_mtu_probe() vs highest_sack
From: Eric Dumazet <edumazet(a)google.com>
[ Upstream commit 2b7cda9c35d3b940eb9ce74b30bbd5eb30db493d ]
Based on SNMP values provided by Roman, Yuchung made the observation
that some crashes in tcp_sacktag_walk() might be caused by MTU probing.
Looking at tcp_mtu_probe(), I found that when a new skb was placed
in front of the write queue, we were not updating tcp highest sack.
If one skb is freed because all its content was copied to the new skb
(for MTU probing), then tp->highest_sack could point to a now freed skb.
Bad things would then happen, including infinite loops.
This patch renames tcp_highest_sack_combine() and uses it
from tcp_mtu_probe() to fix the bug.
Note that I also removed one test against tp->sacked_out,
since we want to replace tp->highest_sack regardless of whatever
condition, since keeping a stale pointer to freed skb is a recipe
for disaster.
Fixes: a47e5a988a57 ("[TCP]: Convert highest_sack to sk_buff to allow direct access")
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Reported-by: Alexei Starovoitov <alexei.starovoitov(a)gmail.com>
Reported-by: Roman Gushchin <guro(a)fb.com>
Reported-by: Oleksandr Natalenko <oleksandr(a)natalenko.name>
Acked-by: Alexei Starovoitov <ast(a)kernel.org>
Acked-by: Neal Cardwell <ncardwell(a)google.com>
Acked-by: Yuchung Cheng <ycheng(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
include/net/tcp.h | 6 +++---
net/ipv4/tcp_output.c | 3 ++-
2 files changed, 5 insertions(+), 4 deletions(-)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1750,12 +1750,12 @@ static inline void tcp_highest_sack_rese
tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk);
}
-/* Called when old skb is about to be deleted (to be combined with new skb) */
-static inline void tcp_highest_sack_combine(struct sock *sk,
+/* Called when old skb is about to be deleted and replaced by new skb */
+static inline void tcp_highest_sack_replace(struct sock *sk,
struct sk_buff *old,
struct sk_buff *new)
{
- if (tcp_sk(sk)->sacked_out && (old == tcp_sk(sk)->highest_sack))
+ if (old == tcp_highest_sack(sk))
tcp_sk(sk)->highest_sack = new;
}
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2094,6 +2094,7 @@ static int tcp_mtu_probe(struct sock *sk
nskb->ip_summed = skb->ip_summed;
tcp_insert_write_queue_before(nskb, skb, sk);
+ tcp_highest_sack_replace(sk, skb, nskb);
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
@@ -2694,7 +2695,7 @@ static bool tcp_collapse_retrans(struct
else if (!skb_shift(skb, next_skb, next_skb_size))
return false;
}
- tcp_highest_sack_combine(sk, next_skb, skb);
+ tcp_highest_sack_replace(sk, next_skb, skb);
tcp_unlink_write_queue(next_skb, sk);
Patches currently in stable-queue which might be from edumazet(a)google.com are
queue-4.13/tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch
queue-4.13/net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch
queue-4.13/tcp-dccp-fix-ireq-opt-races.patch
queue-4.13/tcp-fix-tcp_mtu_probe-vs-highest_sack.patch
queue-4.13/ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch
queue-4.13/ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch
queue-4.13/packet-avoid-panic-in-packet_getsockopt.patch
queue-4.13/sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch
queue-4.13/net_sched-avoid-matching-qdisc-with-zero-handle.patch
queue-4.13/tun-tap-sanitize-tunsetsndbuf-input.patch
queue-4.13/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch
queue-4.13/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch
This is a note to let you know that I've just added the patch titled
tcp/dccp: fix other lockdep splats accessing ireq_opt
to the 4.13-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch
and it can be found in the queue-4.13 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Nov 15 17:25:34 CET 2017
From: Eric Dumazet <edumazet(a)google.com>
Date: Tue, 24 Oct 2017 08:20:31 -0700
Subject: tcp/dccp: fix other lockdep splats accessing ireq_opt
From: Eric Dumazet <edumazet(a)google.com>
[ Upstream commit 06f877d613be3621604c2520ec0351d9fbdca15f ]
In my first attempt to fix the lockdep splat, I forgot we could
enter inet_csk_route_req() with a freshly allocated request socket,
for which refcount has not yet been elevated, due to complex
SLAB_TYPESAFE_BY_RCU rules.
We either are in rcu_read_lock() section _or_ we own a refcount on the
request.
Correct RCU verb to use here is rcu_dereference_check(), although it is
not possible to prove we actually own a reference on a shared
refcount :/
In v2, I added ireq_opt_deref() helper and use in three places, to fix other
possible splats.
[ 49.844590] lockdep_rcu_suspicious+0xea/0xf3
[ 49.846487] inet_csk_route_req+0x53/0x14d
[ 49.848334] tcp_v4_route_req+0xe/0x10
[ 49.850174] tcp_conn_request+0x31c/0x6a0
[ 49.851992] ? __lock_acquire+0x614/0x822
[ 49.854015] tcp_v4_conn_request+0x5a/0x79
[ 49.855957] ? tcp_v4_conn_request+0x5a/0x79
[ 49.858052] tcp_rcv_state_process+0x98/0xdcc
[ 49.859990] ? sk_filter_trim_cap+0x2f6/0x307
[ 49.862085] tcp_v4_do_rcv+0xfc/0x145
[ 49.864055] ? tcp_v4_do_rcv+0xfc/0x145
[ 49.866173] tcp_v4_rcv+0x5ab/0xaf9
[ 49.868029] ip_local_deliver_finish+0x1af/0x2e7
[ 49.870064] ip_local_deliver+0x1b2/0x1c5
[ 49.871775] ? inet_del_offload+0x45/0x45
[ 49.873916] ip_rcv_finish+0x3f7/0x471
[ 49.875476] ip_rcv+0x3f1/0x42f
[ 49.876991] ? ip_local_deliver_finish+0x2e7/0x2e7
[ 49.878791] __netif_receive_skb_core+0x6d3/0x950
[ 49.880701] ? process_backlog+0x7e/0x216
[ 49.882589] __netif_receive_skb+0x1d/0x5e
[ 49.884122] process_backlog+0x10c/0x216
[ 49.885812] net_rx_action+0x147/0x3df
Fixes: a6ca7abe53633 ("tcp/dccp: fix lockdep splat in inet_csk_route_req()")
Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races")
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Reported-by: kernel test robot <fengguang.wu(a)intel.com>
Reported-by: Maciej Żenczykowski <maze(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
include/net/inet_sock.h | 6 ++++++
net/dccp/ipv4.c | 2 +-
net/ipv4/inet_connection_sock.c | 4 ++--
net/ipv4/tcp_ipv4.c | 2 +-
4 files changed, 10 insertions(+), 4 deletions(-)
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -132,6 +132,12 @@ static inline int inet_request_bound_dev
return sk->sk_bound_dev_if;
}
+static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq)
+{
+ return rcu_dereference_check(ireq->ireq_opt,
+ refcount_read(&ireq->req.rsk_refcnt) > 0);
+}
+
struct inet_cork {
unsigned int flags;
__be32 addr;
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -495,7 +495,7 @@ static int dccp_v4_send_response(const s
ireq->ir_rmt_addr);
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- rcu_dereference(ireq->ireq_opt));
+ ireq_opt_deref(ireq));
err = net_xmit_eval(err);
}
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -540,8 +540,8 @@ struct dst_entry *inet_csk_route_req(con
struct ip_options_rcu *opt;
struct rtable *rt;
- opt = rcu_dereference_protected(ireq->ireq_opt,
- refcount_read(&req->rsk_refcnt) > 0);
+ opt = ireq_opt_deref(ireq);
+
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -878,7 +878,7 @@ static int tcp_v4_send_synack(const stru
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- rcu_dereference(ireq->ireq_opt));
+ ireq_opt_deref(ireq));
err = net_xmit_eval(err);
}
Patches currently in stable-queue which might be from edumazet(a)google.com are
queue-4.13/tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch
queue-4.13/net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch
queue-4.13/tcp-dccp-fix-ireq-opt-races.patch
queue-4.13/tcp-fix-tcp_mtu_probe-vs-highest_sack.patch
queue-4.13/ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch
queue-4.13/ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch
queue-4.13/packet-avoid-panic-in-packet_getsockopt.patch
queue-4.13/sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch
queue-4.13/net_sched-avoid-matching-qdisc-with-zero-handle.patch
queue-4.13/tun-tap-sanitize-tunsetsndbuf-input.patch
queue-4.13/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch
queue-4.13/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch
This is a note to let you know that I've just added the patch titled
tcp/dccp: fix lockdep splat in inet_csk_route_req()
to the 4.13-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch
and it can be found in the queue-4.13 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Nov 15 17:25:34 CET 2017
From: Eric Dumazet <edumazet(a)google.com>
Date: Sun, 22 Oct 2017 12:33:57 -0700
Subject: tcp/dccp: fix lockdep splat in inet_csk_route_req()
From: Eric Dumazet <edumazet(a)google.com>
[ Upstream commit a6ca7abe53633d08eea1c6756cb49c9b2d4c90bf ]
This patch fixes the following lockdep splat in inet_csk_route_req()
lockdep_rcu_suspicious
inet_csk_route_req
tcp_v4_send_synack
tcp_rtx_synack
inet_rtx_syn_ack
tcp_fastopen_synack_time
tcp_retransmit_timer
tcp_write_timer_handler
tcp_write_timer
call_timer_fn
Thread running inet_csk_route_req() owns a reference on the request
socket, so we have the guarantee ireq->ireq_opt wont be changed or
freed.
lockdep can enforce this invariant for us.
Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races")
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/ipv4/inet_connection_sock.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -540,7 +540,8 @@ struct dst_entry *inet_csk_route_req(con
struct ip_options_rcu *opt;
struct rtable *rt;
- opt = rcu_dereference(ireq->ireq_opt);
+ opt = rcu_dereference_protected(ireq->ireq_opt,
+ refcount_read(&req->rsk_refcnt) > 0);
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
Patches currently in stable-queue which might be from edumazet(a)google.com are
queue-4.13/tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch
queue-4.13/net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch
queue-4.13/tcp-dccp-fix-ireq-opt-races.patch
queue-4.13/tcp-fix-tcp_mtu_probe-vs-highest_sack.patch
queue-4.13/ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch
queue-4.13/ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch
queue-4.13/packet-avoid-panic-in-packet_getsockopt.patch
queue-4.13/sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch
queue-4.13/net_sched-avoid-matching-qdisc-with-zero-handle.patch
queue-4.13/tun-tap-sanitize-tunsetsndbuf-input.patch
queue-4.13/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch
queue-4.13/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch
This is a note to let you know that I've just added the patch titled
tap: reference to KVA of an unloaded module causes kernel panic
to the 4.13-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
tap-reference-to-kva-of-an-unloaded-module-causes-kernel-panic.patch
and it can be found in the queue-4.13 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Wed Nov 15 17:25:34 CET 2017
From: Girish Moodalbail <girish.moodalbail(a)oracle.com>
Date: Fri, 27 Oct 2017 00:00:16 -0700
Subject: tap: reference to KVA of an unloaded module causes kernel panic
From: Girish Moodalbail <girish.moodalbail(a)oracle.com>
[ Upstream commit dea6e19f4ef746aa18b4c33d1a7fed54356796ed ]
The commit 9a393b5d5988 ("tap: tap as an independent module") created a
separate tap module that implements tap functionality and exports
interfaces that will be used by macvtap and ipvtap modules to create
create respective tap devices.
However, that patch introduced a regression wherein the modules macvtap
and ipvtap can be removed (through modprobe -r) while there are
applications using the respective /dev/tapX devices. These applications
cause kernel to hold reference to /dev/tapX through 'struct cdev
macvtap_cdev' and 'struct cdev ipvtap_dev' defined in macvtap and ipvtap
modules respectively. So, when the application is later closed the
kernel panics because we are referencing KVA that is present in the
unloaded modules.
----------8<------- Example ----------8<----------
$ sudo ip li add name mv0 link enp7s0 type macvtap
$ sudo ip li show mv0 |grep mv0| awk -e '{print $1 $2}'
14:mv0@enp7s0:
$ cat /dev/tap14 &
$ lsmod |egrep -i 'tap|vlan'
macvtap 16384 0
macvlan 24576 1 macvtap
tap 24576 3 macvtap
$ sudo modprobe -r macvtap
$ fg
cat /dev/tap14
^C
<...system panics...>
BUG: unable to handle kernel paging request at ffffffffa038c500
IP: cdev_put+0xf/0x30
----------8<-----------------8<----------
The fix is to set cdev.owner to the module that creates the tap device
(either macvtap or ipvtap). With this set, the operations (in
fs/char_dev.c) on char device holds and releases the module through
cdev_get() and cdev_put() and will not allow the module to unload
prematurely.
Fixes: 9a393b5d5988ea4e (tap: tap as an independent module)
Signed-off-by: Girish Moodalbail <girish.moodalbail(a)oracle.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/ipvlan/ipvtap.c | 4 ++--
drivers/net/macvtap.c | 4 ++--
drivers/net/tap.c | 5 +++--
include/linux/if_tap.h | 4 ++--
4 files changed, 9 insertions(+), 8 deletions(-)
--- a/drivers/net/ipvlan/ipvtap.c
+++ b/drivers/net/ipvlan/ipvtap.c
@@ -197,8 +197,8 @@ static int ipvtap_init(void)
{
int err;
- err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap");
-
+ err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap",
+ THIS_MODULE);
if (err)
goto out1;
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -204,8 +204,8 @@ static int macvtap_init(void)
{
int err;
- err = tap_create_cdev(&macvtap_cdev, &macvtap_major, "macvtap");
-
+ err = tap_create_cdev(&macvtap_cdev, &macvtap_major, "macvtap",
+ THIS_MODULE);
if (err)
goto out1;
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -1252,8 +1252,8 @@ static int tap_list_add(dev_t major, con
return 0;
}
-int tap_create_cdev(struct cdev *tap_cdev,
- dev_t *tap_major, const char *device_name)
+int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major,
+ const char *device_name, struct module *module)
{
int err;
@@ -1262,6 +1262,7 @@ int tap_create_cdev(struct cdev *tap_cde
goto out1;
cdev_init(tap_cdev, &tap_fops);
+ tap_cdev->owner = module;
err = cdev_add(tap_cdev, *tap_major, TAP_NUM_DEVS);
if (err)
goto out2;
--- a/include/linux/if_tap.h
+++ b/include/linux/if_tap.h
@@ -73,8 +73,8 @@ void tap_del_queues(struct tap_dev *tap)
int tap_get_minor(dev_t major, struct tap_dev *tap);
void tap_free_minor(dev_t major, struct tap_dev *tap);
int tap_queue_resize(struct tap_dev *tap);
-int tap_create_cdev(struct cdev *tap_cdev,
- dev_t *tap_major, const char *device_name);
+int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major,
+ const char *device_name, struct module *module);
void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev);
#endif /*_LINUX_IF_TAP_H_*/
Patches currently in stable-queue which might be from girish.moodalbail(a)oracle.com are
queue-4.13/tap-reference-to-kva-of-an-unloaded-module-causes-kernel-panic.patch
queue-4.13/tap-double-free-in-error-path-in-tap_open.patch