This is a note to let you know that I've just added the patch titled
packet: fix crash in fanout_demux_rollover()
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
packet-fix-crash-in-fanout_demux_rollover.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Thu Dec 14 11:45:58 CET 2017
From: Mike Maloney <maloney(a)google.com>
Date: Tue, 28 Nov 2017 10:44:29 -0500
Subject: packet: fix crash in fanout_demux_rollover()
From: Mike Maloney <maloney(a)google.com>
syzkaller found a race condition fanout_demux_rollover() while removing
a packet socket from a fanout group.
po->rollover is read and operated on during packet_rcv_fanout(), via
fanout_demux_rollover(), but the pointer is currently cleared before the
synchronization in packet_release(). It is safer to delay the cleanup
until after synchronize_net() has been called, ensuring all calls to
packet_rcv_fanout() for this socket have finished.
To further simplify synchronization around the rollover structure, set
po->rollover in fanout_add() only if there are no errors. This removes
the need for rcu in the struct and in the call to
packet_getsockopt(..., PACKET_ROLLOVER_STATS, ...).
Crashing stack trace:
fanout_demux_rollover+0xb6/0x4d0 net/packet/af_packet.c:1392
packet_rcv_fanout+0x649/0x7c8 net/packet/af_packet.c:1487
dev_queue_xmit_nit+0x835/0xc10 net/core/dev.c:1953
xmit_one net/core/dev.c:2975 [inline]
dev_hard_start_xmit+0x16b/0xac0 net/core/dev.c:2995
__dev_queue_xmit+0x17a4/0x2050 net/core/dev.c:3476
dev_queue_xmit+0x17/0x20 net/core/dev.c:3509
neigh_connected_output+0x489/0x720 net/core/neighbour.c:1379
neigh_output include/net/neighbour.h:482 [inline]
ip6_finish_output2+0xad1/0x22a0 net/ipv6/ip6_output.c:120
ip6_finish_output+0x2f9/0x920 net/ipv6/ip6_output.c:146
NF_HOOK_COND include/linux/netfilter.h:239 [inline]
ip6_output+0x1f4/0x850 net/ipv6/ip6_output.c:163
dst_output include/net/dst.h:459 [inline]
NF_HOOK.constprop.35+0xff/0x630 include/linux/netfilter.h:250
mld_sendpack+0x6a8/0xcc0 net/ipv6/mcast.c:1660
mld_send_initial_cr.part.24+0x103/0x150 net/ipv6/mcast.c:2072
mld_send_initial_cr net/ipv6/mcast.c:2056 [inline]
ipv6_mc_dad_complete+0x99/0x130 net/ipv6/mcast.c:2079
addrconf_dad_completed+0x595/0x970 net/ipv6/addrconf.c:4039
addrconf_dad_work+0xac9/0x1160 net/ipv6/addrconf.c:3971
process_one_work+0xbf0/0x1bc0 kernel/workqueue.c:2113
worker_thread+0x223/0x1990 kernel/workqueue.c:2247
kthread+0x35e/0x430 kernel/kthread.c:231
ret_from_fork+0x2a/0x40 arch/x86/entry/entry_64.S:432
Fixes: 0648ab70afe6 ("packet: rollover prepare: per-socket state")
Fixes: 509c7a1ecc860 ("packet: avoid panic in packet_getsockopt()")
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Signed-off-by: Mike Maloney <maloney(a)google.com>
Reviewed-by: Eric Dumazet <edumazet(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/packet/af_packet.c | 32 ++++++++++----------------------
net/packet/internal.h | 1 -
2 files changed, 10 insertions(+), 23 deletions(-)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1661,7 +1661,6 @@ static int fanout_add(struct sock *sk, u
atomic_long_set(&rollover->num, 0);
atomic_long_set(&rollover->num_huge, 0);
atomic_long_set(&rollover->num_failed, 0);
- po->rollover = rollover;
}
match = NULL;
@@ -1706,6 +1705,8 @@ static int fanout_add(struct sock *sk, u
if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
__dev_remove_pack(&po->prot_hook);
po->fanout = match;
+ po->rollover = rollover;
+ rollover = NULL;
atomic_inc(&match->sk_ref);
__fanout_link(sk, po);
err = 0;
@@ -1719,10 +1720,7 @@ static int fanout_add(struct sock *sk, u
}
out:
- if (err && rollover) {
- kfree_rcu(rollover, rcu);
- po->rollover = NULL;
- }
+ kfree(rollover);
mutex_unlock(&fanout_mutex);
return err;
}
@@ -1746,11 +1744,6 @@ static struct packet_fanout *fanout_rele
list_del(&f->list);
else
f = NULL;
-
- if (po->rollover) {
- kfree_rcu(po->rollover, rcu);
- po->rollover = NULL;
- }
}
mutex_unlock(&fanout_mutex);
@@ -3039,6 +3032,7 @@ static int packet_release(struct socket
synchronize_net();
if (f) {
+ kfree(po->rollover);
fanout_release_data(f);
kfree(f);
}
@@ -3853,7 +3847,6 @@ static int packet_getsockopt(struct sock
void *data = &val;
union tpacket_stats_u st;
struct tpacket_rollover_stats rstats;
- struct packet_rollover *rollover;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3932,18 +3925,13 @@ static int packet_getsockopt(struct sock
0);
break;
case PACKET_ROLLOVER_STATS:
- rcu_read_lock();
- rollover = rcu_dereference(po->rollover);
- if (rollover) {
- rstats.tp_all = atomic_long_read(&rollover->num);
- rstats.tp_huge = atomic_long_read(&rollover->num_huge);
- rstats.tp_failed = atomic_long_read(&rollover->num_failed);
- data = &rstats;
- lv = sizeof(rstats);
- }
- rcu_read_unlock();
- if (!rollover)
+ if (!po->rollover)
return -EINVAL;
+ rstats.tp_all = atomic_long_read(&po->rollover->num);
+ rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
+ rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
+ data = &rstats;
+ lv = sizeof(rstats);
break;
case PACKET_TX_HAS_OFF:
val = po->tp_tx_has_off;
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -92,7 +92,6 @@ struct packet_fanout {
struct packet_rollover {
int sock;
- struct rcu_head rcu;
atomic_long_t num;
atomic_long_t num_huge;
atomic_long_t num_failed;
Patches currently in stable-queue which might be from maloney(a)google.com are
queue-4.9/packet-fix-crash-in-fanout_demux_rollover.patch
This is a note to let you know that I've just added the patch titled
net: remove hlist_nulls_add_tail_rcu()
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
net-remove-hlist_nulls_add_tail_rcu.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Thu Dec 14 11:45:58 CET 2017
From: Eric Dumazet <edumazet(a)google.com>
Date: Tue, 5 Dec 2017 12:45:56 -0800
Subject: net: remove hlist_nulls_add_tail_rcu()
From: Eric Dumazet <edumazet(a)google.com>
[ Upstream commit d7efc6c11b277d9d80b99b1334a78bfe7d7edf10 ]
Alexander Potapenko reported use of uninitialized memory [1]
This happens when inserting a request socket into TCP ehash,
in __sk_nulls_add_node_rcu(), since sk_reuseport is not initialized.
Bug was added by commit d894ba18d4e4 ("soreuseport: fix ordering for
mixed v4/v6 sockets")
Note that d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6
ordering fix") missed the opportunity to get rid of
hlist_nulls_add_tail_rcu() :
Both UDP sockets and TCP/DCCP listeners no longer use
__sk_nulls_add_node_rcu() for their hash insertion.
Since all other sockets have unique 4-tuple, the reuseport status
has no special meaning, so we can always use hlist_nulls_add_head_rcu()
for them and save few cycles/instructions.
[1]
==================================================================
BUG: KMSAN: use of uninitialized memory in inet_ehash_insert+0xd40/0x1050
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.13.0+ #3288
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
<IRQ>
__dump_stack lib/dump_stack.c:16
dump_stack+0x185/0x1d0 lib/dump_stack.c:52
kmsan_report+0x13f/0x1c0 mm/kmsan/kmsan.c:1016
__msan_warning_32+0x69/0xb0 mm/kmsan/kmsan_instr.c:766
__sk_nulls_add_node_rcu ./include/net/sock.h:684
inet_ehash_insert+0xd40/0x1050 net/ipv4/inet_hashtables.c:413
reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:754
inet_csk_reqsk_queue_hash_add+0x1cc/0x300 net/ipv4/inet_connection_sock.c:765
tcp_conn_request+0x31e7/0x36f0 net/ipv4/tcp_input.c:6414
tcp_v4_conn_request+0x16d/0x220 net/ipv4/tcp_ipv4.c:1314
tcp_rcv_state_process+0x42a/0x7210 net/ipv4/tcp_input.c:5917
tcp_v4_do_rcv+0xa6a/0xcd0 net/ipv4/tcp_ipv4.c:1483
tcp_v4_rcv+0x3de0/0x4ab0 net/ipv4/tcp_ipv4.c:1763
ip_local_deliver_finish+0x6bb/0xcb0 net/ipv4/ip_input.c:216
NF_HOOK ./include/linux/netfilter.h:248
ip_local_deliver+0x3fa/0x480 net/ipv4/ip_input.c:257
dst_input ./include/net/dst.h:477
ip_rcv_finish+0x6fb/0x1540 net/ipv4/ip_input.c:397
NF_HOOK ./include/linux/netfilter.h:248
ip_rcv+0x10f6/0x15c0 net/ipv4/ip_input.c:488
__netif_receive_skb_core+0x36f6/0x3f60 net/core/dev.c:4298
__netif_receive_skb net/core/dev.c:4336
netif_receive_skb_internal+0x63c/0x19c0 net/core/dev.c:4497
napi_skb_finish net/core/dev.c:4858
napi_gro_receive+0x629/0xa50 net/core/dev.c:4889
e1000_receive_skb drivers/net/ethernet/intel/e1000/e1000_main.c:4018
e1000_clean_rx_irq+0x1492/0x1d30
drivers/net/ethernet/intel/e1000/e1000_main.c:4474
e1000_clean+0x43aa/0x5970 drivers/net/ethernet/intel/e1000/e1000_main.c:3819
napi_poll net/core/dev.c:5500
net_rx_action+0x73c/0x1820 net/core/dev.c:5566
__do_softirq+0x4b4/0x8dd kernel/softirq.c:284
invoke_softirq kernel/softirq.c:364
irq_exit+0x203/0x240 kernel/softirq.c:405
exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:638
do_IRQ+0x15e/0x1a0 arch/x86/kernel/irq.c:263
common_interrupt+0x86/0x86
Fixes: d894ba18d4e4 ("soreuseport: fix ordering for mixed v4/v6 sockets")
Fixes: d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix")
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Reported-by: Alexander Potapenko <glider(a)google.com>
Acked-by: Craig Gallek <kraig(a)google.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
include/linux/rculist_nulls.h | 38 --------------------------------------
include/net/sock.h | 6 +-----
2 files changed, 1 insertion(+), 43 deletions(-)
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -100,44 +100,6 @@ static inline void hlist_nulls_add_head_
}
/**
- * hlist_nulls_add_tail_rcu
- * @n: the element to add to the hash list.
- * @h: the list to add to.
- *
- * Description:
- * Adds the specified element to the end of the specified hlist_nulls,
- * while permitting racing traversals. NOTE: tail insertion requires
- * list traversal.
- *
- * The caller must take whatever precautions are necessary
- * (such as holding appropriate locks) to avoid racing
- * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
- * or hlist_nulls_del_rcu(), running on this same list.
- * However, it is perfectly legal to run concurrently with
- * the _rcu list-traversal primitives, such as
- * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
- * problems on Alpha CPUs. Regardless of the type of CPU, the
- * list-traversal primitive must be guarded by rcu_read_lock().
- */
-static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
- struct hlist_nulls_head *h)
-{
- struct hlist_nulls_node *i, *last = NULL;
-
- for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i);
- i = hlist_nulls_next_rcu(i))
- last = i;
-
- if (last) {
- n->next = last->next;
- n->pprev = &last->next;
- rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
- } else {
- hlist_nulls_add_head_rcu(n, h);
- }
-}
-
-/**
* hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_nulls_node to use as a loop cursor.
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -648,11 +648,7 @@ static inline void sk_add_node_rcu(struc
static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
{
- if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
- sk->sk_family == AF_INET6)
- hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
- else
- hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
+ hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
}
static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
Patches currently in stable-queue which might be from edumazet(a)google.com are
queue-4.9/net-packet-fix-a-race-in-packet_bind-and-packet_notifier.patch
queue-4.9/packet-fix-crash-in-fanout_demux_rollover.patch
queue-4.9/net-remove-hlist_nulls_add_tail_rcu.patch
queue-4.9/tcp-dccp-block-bh-before-arming-time_wait-timer.patch
This is a note to let you know that I've just added the patch titled
net: qmi_wwan: add Quectel BG96 2c7c:0296
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
net-qmi_wwan-add-quectel-bg96-2c7c-0296.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Thu Dec 14 11:45:58 CET 2017
From: Sebastian Sjoholm <ssjoholm(a)mac.com>
Date: Mon, 20 Nov 2017 19:05:17 +0100
Subject: net: qmi_wwan: add Quectel BG96 2c7c:0296
From: Sebastian Sjoholm <ssjoholm(a)mac.com>
[ Upstream commit f9409e7f086fa6c4623769b4b2f4f17a024d8143 ]
Quectel BG96 is an Qualcomm MDM9206 based IoT modem, supporting both
CAT-M and NB-IoT. Tested hardware is BG96 mounted on Quectel development
board (EVB). The USB id is added to qmi_wwan.c to allow QMI
communication with the BG96.
Signed-off-by: Sebastian Sjoholm <ssjoholm(a)mac.com>
Acked-by: Bjørn Mork <bjorn(a)mork.no>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/net/usb/qmi_wwan.c | 1 +
1 file changed, 1 insertion(+)
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -936,6 +936,7 @@ static const struct usb_device_id produc
{QMI_FIXED_INTF(0x1e0e, 0x9001, 5)}, /* SIMCom 7230E */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */
+ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */
/* 4. Gobi 1000 devices */
{QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */
Patches currently in stable-queue which might be from ssjoholm(a)mac.com are
queue-4.9/net-qmi_wwan-add-quectel-bg96-2c7c-0296.patch
This is a note to let you know that I've just added the patch titled
net/packet: fix a race in packet_bind() and packet_notifier()
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
net-packet-fix-a-race-in-packet_bind-and-packet_notifier.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From foo@baz Thu Dec 14 11:45:58 CET 2017
From: Eric Dumazet <edumazet(a)google.com>
Date: Tue, 28 Nov 2017 08:03:30 -0800
Subject: net/packet: fix a race in packet_bind() and packet_notifier()
From: Eric Dumazet <edumazet(a)google.com>
[ Upstream commit 15fe076edea787807a7cdc168df832544b58eba6 ]
syzbot reported crashes [1] and provided a C repro easing bug hunting.
When/if packet_do_bind() calls __unregister_prot_hook() and releases
po->bind_lock, another thread can run packet_notifier() and process an
NETDEV_UP event.
This calls register_prot_hook() and hooks again the socket right before
first thread is able to grab again po->bind_lock.
Fixes this issue by temporarily setting po->num to 0, as suggested by
David Miller.
[1]
dev_remove_pack: ffff8801bf16fa80 not found
------------[ cut here ]------------
kernel BUG at net/core/dev.c:7945! ( BUG_ON(!list_empty(&dev->ptype_all)); )
invalid opcode: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
(ftrace buffer empty)
Modules linked in:
device syz0 entered promiscuous mode
CPU: 0 PID: 3161 Comm: syzkaller404108 Not tainted 4.14.0+ #190
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
task: ffff8801cc57a500 task.stack: ffff8801cc588000
RIP: 0010:netdev_run_todo+0x772/0xae0 net/core/dev.c:7945
RSP: 0018:ffff8801cc58f598 EFLAGS: 00010293
RAX: ffff8801cc57a500 RBX: dffffc0000000000 RCX: ffffffff841f75b2
RDX: 0000000000000000 RSI: 1ffff100398b1ede RDI: ffff8801bf1f8810
device syz0 entered promiscuous mode
RBP: ffff8801cc58f898 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801bf1f8cd8
R13: ffff8801cc58f870 R14: ffff8801bf1f8780 R15: ffff8801cc58f7f0
FS: 0000000001716880(0000) GS:ffff8801db400000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020b13000 CR3: 0000000005e25000 CR4: 00000000001406f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
rtnl_unlock+0xe/0x10 net/core/rtnetlink.c:106
tun_detach drivers/net/tun.c:670 [inline]
tun_chr_close+0x49/0x60 drivers/net/tun.c:2845
__fput+0x333/0x7f0 fs/file_table.c:210
____fput+0x15/0x20 fs/file_table.c:244
task_work_run+0x199/0x270 kernel/task_work.c:113
exit_task_work include/linux/task_work.h:22 [inline]
do_exit+0x9bb/0x1ae0 kernel/exit.c:865
do_group_exit+0x149/0x400 kernel/exit.c:968
SYSC_exit_group kernel/exit.c:979 [inline]
SyS_exit_group+0x1d/0x20 kernel/exit.c:977
entry_SYSCALL_64_fastpath+0x1f/0x96
RIP: 0033:0x44ad19
Fixes: 30f7ea1c2b5f ("packet: race condition in packet_bind")
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Reported-by: syzbot <syzkaller(a)googlegroups.com>
Cc: Francesco Ruggeri <fruggeri(a)aristanetworks.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/packet/af_packet.c | 5 +++++
1 file changed, 5 insertions(+)
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3101,6 +3101,10 @@ static int packet_do_bind(struct sock *s
if (need_rehook) {
if (po->running) {
rcu_read_unlock();
+ /* prevents packet_notifier() from calling
+ * register_prot_hook()
+ */
+ po->num = 0;
__unregister_prot_hook(sk, true);
rcu_read_lock();
dev_curr = po->prot_hook.dev;
@@ -3109,6 +3113,7 @@ static int packet_do_bind(struct sock *s
dev->ifindex);
}
+ BUG_ON(po->running);
po->num = proto;
po->prot_hook.type = proto;
Patches currently in stable-queue which might be from edumazet(a)google.com are
queue-4.9/net-packet-fix-a-race-in-packet_bind-and-packet_notifier.patch
queue-4.9/packet-fix-crash-in-fanout_demux_rollover.patch
queue-4.9/net-remove-hlist_nulls_add_tail_rcu.patch
queue-4.9/tcp-dccp-block-bh-before-arming-time_wait-timer.patch
This is a note to let you know that I've just added the patch titled
ipmi: Stop timers before cleaning up the module
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
ipmi-stop-timers-before-cleaning-up-the-module.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 4f7f5551a760eb0124267be65763008169db7087 Mon Sep 17 00:00:00 2001
From: Masamitsu Yamazaki <m-yamazaki(a)ah.jp.nec.com>
Date: Wed, 15 Nov 2017 07:33:14 +0000
Subject: ipmi: Stop timers before cleaning up the module
From: Masamitsu Yamazaki <m-yamazaki(a)ah.jp.nec.com>
commit 4f7f5551a760eb0124267be65763008169db7087 upstream.
System may crash after unloading ipmi_si.ko module
because a timer may remain and fire after the module cleaned up resources.
cleanup_one_si() contains the following processing.
/*
* Make sure that interrupts, the timer and the thread are
* stopped and will not run again.
*/
if (to_clean->irq_cleanup)
to_clean->irq_cleanup(to_clean);
wait_for_timer_and_thread(to_clean);
/*
* Timeouts are stopped, now make sure the interrupts are off
* in the BMC. Note that timers and CPU interrupts are off,
* so no need for locks.
*/
while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
poll(to_clean);
schedule_timeout_uninterruptible(1);
}
si_state changes as following in the while loop calling poll(to_clean).
SI_GETTING_MESSAGES
=> SI_CHECKING_ENABLES
=> SI_SETTING_ENABLES
=> SI_GETTING_EVENTS
=> SI_NORMAL
As written in the code comments above,
timers are expected to stop before the polling loop and not to run again.
But the timer is set again in the following process
when si_state becomes SI_SETTING_ENABLES.
=> poll
=> smi_event_handler
=> handle_transaction_done
// smi_info->si_state == SI_SETTING_ENABLES
=> start_getting_events
=> start_new_msg
=> smi_mod_timer
=> mod_timer
As a result, before the timer set in start_new_msg() expires,
the polling loop may see si_state becoming SI_NORMAL
and the module clean-up finishes.
For example, hard LOCKUP and panic occurred as following.
smi_timeout was called after smi_event_handler,
kcs_event and hangs at port_inb()
trying to access I/O port after release.
[exception RIP: port_inb+19]
RIP: ffffffffc0473053 RSP: ffff88069fdc3d80 RFLAGS: 00000006
RAX: ffff8806800f8e00 RBX: ffff880682bd9400 RCX: 0000000000000000
RDX: 0000000000000ca3 RSI: 0000000000000ca3 RDI: ffff8806800f8e40
RBP: ffff88069fdc3d80 R8: ffffffff81d86dfc R9: ffffffff81e36426
R10: 00000000000509f0 R11: 0000000000100000 R12: 0000000000]:000000
R13: 0000000000000000 R14: 0000000000000246 R15: ffff8806800f8e00
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0000
--- <NMI exception stack> ---
To fix the problem I defined a flag, timer_can_start,
as member of struct smi_info.
The flag is enabled immediately after initializing the timer
and disabled immediately before waiting for timer deletion.
Fixes: 0cfec916e86d ("ipmi: Start the timer and thread on internal msgs")
Signed-off-by: Yamazaki Masamitsu <m-yamazaki(a)ah.jp.nec.com>
[Adjusted for recent changes in the driver.]
[Some fairly major changes went into the IPMI driver in 4.15, so this
required a backport as the code had changed and moved to a different
file. The 4.14 version of this patch moved some code under an
if statement causing it to not apply to 4.7-4.13.]
Signed-off-by: Corey Minyard <cminyard(a)mvista.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/char/ipmi/ipmi_si_intf.c | 44 ++++++++++++++++++++-------------------
1 file changed, 23 insertions(+), 21 deletions(-)
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -241,6 +241,9 @@ struct smi_info {
/* The timer for this si. */
struct timer_list si_timer;
+ /* This flag is set, if the timer can be set */
+ bool timer_can_start;
+
/* This flag is set, if the timer is running (timer_pending() isn't enough) */
bool timer_running;
@@ -416,6 +419,8 @@ out:
static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
{
+ if (!smi_info->timer_can_start)
+ return;
smi_info->last_timeout_jiffies = jiffies;
mod_timer(&smi_info->si_timer, new_val);
smi_info->timer_running = true;
@@ -435,21 +440,18 @@ static void start_new_msg(struct smi_inf
smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
}
-static void start_check_enables(struct smi_info *smi_info, bool start_timer)
+static void start_check_enables(struct smi_info *smi_info)
{
unsigned char msg[2];
msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
- if (start_timer)
- start_new_msg(smi_info, msg, 2);
- else
- smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
+ start_new_msg(smi_info, msg, 2);
smi_info->si_state = SI_CHECKING_ENABLES;
}
-static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
+static void start_clear_flags(struct smi_info *smi_info)
{
unsigned char msg[3];
@@ -458,10 +460,7 @@ static void start_clear_flags(struct smi
msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
msg[2] = WDT_PRE_TIMEOUT_INT;
- if (start_timer)
- start_new_msg(smi_info, msg, 3);
- else
- smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
+ start_new_msg(smi_info, msg, 3);
smi_info->si_state = SI_CLEARING_FLAGS;
}
@@ -496,11 +495,11 @@ static void start_getting_events(struct
* Note that we cannot just use disable_irq(), since the interrupt may
* be shared.
*/
-static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer)
+static inline bool disable_si_irq(struct smi_info *smi_info)
{
if ((smi_info->irq) && (!smi_info->interrupt_disabled)) {
smi_info->interrupt_disabled = true;
- start_check_enables(smi_info, start_timer);
+ start_check_enables(smi_info);
return true;
}
return false;
@@ -510,7 +509,7 @@ static inline bool enable_si_irq(struct
{
if ((smi_info->irq) && (smi_info->interrupt_disabled)) {
smi_info->interrupt_disabled = false;
- start_check_enables(smi_info, true);
+ start_check_enables(smi_info);
return true;
}
return false;
@@ -528,7 +527,7 @@ static struct ipmi_smi_msg *alloc_msg_ha
msg = ipmi_alloc_smi_msg();
if (!msg) {
- if (!disable_si_irq(smi_info, true))
+ if (!disable_si_irq(smi_info))
smi_info->si_state = SI_NORMAL;
} else if (enable_si_irq(smi_info)) {
ipmi_free_smi_msg(msg);
@@ -544,7 +543,7 @@ retry:
/* Watchdog pre-timeout */
smi_inc_stat(smi_info, watchdog_pretimeouts);
- start_clear_flags(smi_info, true);
+ start_clear_flags(smi_info);
smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
if (smi_info->intf)
ipmi_smi_watchdog_pretimeout(smi_info->intf);
@@ -927,7 +926,7 @@ restart:
* disable and messages disabled.
*/
if (smi_info->supports_event_msg_buff || smi_info->irq) {
- start_check_enables(smi_info, true);
+ start_check_enables(smi_info);
} else {
smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
if (!smi_info->curr_msg)
@@ -1234,6 +1233,7 @@ static int smi_start_processing(void
/* Set up the timer that drives the interface. */
setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi);
+ new_smi->timer_can_start = true;
smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES);
/* Try to claim any interrupts. */
@@ -3448,10 +3448,12 @@ static void check_for_broken_irqs(struct
check_set_rcv_irq(smi_info);
}
-static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
+static inline void stop_timer_and_thread(struct smi_info *smi_info)
{
if (smi_info->thread != NULL)
kthread_stop(smi_info->thread);
+
+ smi_info->timer_can_start = false;
if (smi_info->timer_running)
del_timer_sync(&smi_info->si_timer);
}
@@ -3593,7 +3595,7 @@ static int try_smi_init(struct smi_info
* Start clearing the flags before we enable interrupts or the
* timer to avoid racing with the timer.
*/
- start_clear_flags(new_smi, false);
+ start_clear_flags(new_smi);
/*
* IRQ is defined to be set when non-zero. req_events will
@@ -3671,7 +3673,7 @@ static int try_smi_init(struct smi_info
return 0;
out_err_stop_timer:
- wait_for_timer_and_thread(new_smi);
+ stop_timer_and_thread(new_smi);
out_err:
new_smi->interrupt_disabled = true;
@@ -3865,7 +3867,7 @@ static void cleanup_one_si(struct smi_in
*/
if (to_clean->irq_cleanup)
to_clean->irq_cleanup(to_clean);
- wait_for_timer_and_thread(to_clean);
+ stop_timer_and_thread(to_clean);
/*
* Timeouts are stopped, now make sure the interrupts are off
@@ -3876,7 +3878,7 @@ static void cleanup_one_si(struct smi_in
poll(to_clean);
schedule_timeout_uninterruptible(1);
}
- disable_si_irq(to_clean, false);
+ disable_si_irq(to_clean);
while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
poll(to_clean);
schedule_timeout_uninterruptible(1);
Patches currently in stable-queue which might be from m-yamazaki(a)ah.jp.nec.com are
queue-4.9/ipmi-stop-timers-before-cleaning-up-the-module.patch
A verdict of NF_STOLEN after NF_QUEUE will cause an incorrect return value
and a potential kernel panic via double free of skb's
This was broken by commit 7034b566a4e7 ("netfilter: fix nf_queue handling")
and subsequently fixed in v4.10 by commit c63cbc460419 ("netfilter:
use switch() to handle verdict cases from nf_hook_slow()"). However that
commit cannot be cleanly cherry-picked to v4.9
Signed-off-by: Debabrata Banerjee <dbanerje(a)akamai.com>
---
This fix is only needed for v4.9 stable since v4.10+ does not have the
issue
---
net/netfilter/core.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 004af030ef1a..d869ea50623e 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -364,6 +364,11 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
ret = nf_queue(skb, state, &entry, verdict);
if (ret == 1 && entry)
goto next_hook;
+ } else {
+ /* Implicit handling for NF_STOLEN, as well as any other
+ * non conventional verdicts.
+ */
+ ret = 0;
}
return ret;
}
--
2.15.1
On Thu, Dec 07, 2017 at 05:33:36PM +0000, Mark Brown wrote:
>On Thu, Dec 07, 2017 at 03:45:57PM +0000, alexander.levin(a)verizon.com wrote:
>
>> Secondary DAI in Exynos I2S driver is not used by any of the currently
>> supported boards and it causes problems due to some limitations in the
>> ASoC code. Disable it until it gets proper support both by board-specific
>> and ASoC core code. Also disable IDMA support, which relies on secondary
>> DAI presence.
>
>People carry out of tree patches for that hardware, I'm not sure that it
>makes sense to backport this given that it's likely to conflict for
>users who would encounter it (and if they care they'll have already
>deal with it).
Sorry I forgot to reply in time, I've removed the patch.
--
Thanks,
Sasha
From: Masamitsu Yamazaki <m-yamazaki(a)ah.jp.nec.com>
commit 4f7f5551a760eb0124267be65763008169db7087 upstream.
System may crash after unloading ipmi_si.ko module
because a timer may remain and fire after the module cleaned up resources.
cleanup_one_si() contains the following processing.
/*
* Make sure that interrupts, the timer and the thread are
* stopped and will not run again.
*/
if (to_clean->irq_cleanup)
to_clean->irq_cleanup(to_clean);
wait_for_timer_and_thread(to_clean);
/*
* Timeouts are stopped, now make sure the interrupts are off
* in the BMC. Note that timers and CPU interrupts are off,
* so no need for locks.
*/
while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
poll(to_clean);
schedule_timeout_uninterruptible(1);
}
si_state changes as following in the while loop calling poll(to_clean).
SI_GETTING_MESSAGES
=> SI_CHECKING_ENABLES
=> SI_SETTING_ENABLES
=> SI_GETTING_EVENTS
=> SI_NORMAL
As written in the code comments above,
timers are expected to stop before the polling loop and not to run again.
But the timer is set again in the following process
when si_state becomes SI_SETTING_ENABLES.
=> poll
=> smi_event_handler
=> handle_transaction_done
// smi_info->si_state == SI_SETTING_ENABLES
=> start_getting_events
=> start_new_msg
=> smi_mod_timer
=> mod_timer
As a result, before the timer set in start_new_msg() expires,
the polling loop may see si_state becoming SI_NORMAL
and the module clean-up finishes.
For example, hard LOCKUP and panic occurred as following.
smi_timeout was called after smi_event_handler,
kcs_event and hangs at port_inb()
trying to access I/O port after release.
[exception RIP: port_inb+19]
RIP: ffffffffc0473053 RSP: ffff88069fdc3d80 RFLAGS: 00000006
RAX: ffff8806800f8e00 RBX: ffff880682bd9400 RCX: 0000000000000000
RDX: 0000000000000ca3 RSI: 0000000000000ca3 RDI: ffff8806800f8e40
RBP: ffff88069fdc3d80 R8: ffffffff81d86dfc R9: ffffffff81e36426
R10: 00000000000509f0 R11: 0000000000100000 R12: 0000000000]:000000
R13: 0000000000000000 R14: 0000000000000246 R15: ffff8806800f8e00
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0000
--- <NMI exception stack> ---
To fix the problem I defined a flag, timer_can_start,
as member of struct smi_info.
The flag is enabled immediately after initializing the timer
and disabled immediately before waiting for timer deletion.
Fixes: 0cfec916e86d ("ipmi: Start the timer and thread on internal msgs")
Signed-off-by: Yamazaki Masamitsu <m-yamazaki(a)ah.jp.nec.com>
[Some fairly major changes went into the IPMI driver in 4.15, so this
required a backport as the code had changed and moved to a different
file. The 4.14 version of this patch moved some code under an
if statement and there was an API change causing it to not apply to
4.4-4.6.]
Signed-off-by: Corey Minyard <cminyard(a)mvista.com>
---
This is for kernel version 4.4-4.6 only. Code and API changes required
backporting. There is another version for 4.7-4.13 and another for
4.14 coming, too. Bug was introduced in 4.4.
drivers/char/ipmi/ipmi_si_intf.c | 44 +++++++++++++++++++++-------------------
1 file changed, 23 insertions(+), 21 deletions(-)
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 4cc72fa..2f9abe0 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -239,6 +239,9 @@ struct smi_info {
/* The timer for this si. */
struct timer_list si_timer;
+ /* This flag is set, if the timer can be set */
+ bool timer_can_start;
+
/* This flag is set, if the timer is running (timer_pending() isn't enough) */
bool timer_running;
@@ -414,6 +417,8 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
{
+ if (!smi_info->timer_can_start)
+ return;
smi_info->last_timeout_jiffies = jiffies;
mod_timer(&smi_info->si_timer, new_val);
smi_info->timer_running = true;
@@ -433,21 +438,18 @@ static void start_new_msg(struct smi_info *smi_info, unsigned char *msg,
smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
}
-static void start_check_enables(struct smi_info *smi_info, bool start_timer)
+static void start_check_enables(struct smi_info *smi_info)
{
unsigned char msg[2];
msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
- if (start_timer)
- start_new_msg(smi_info, msg, 2);
- else
- smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
+ start_new_msg(smi_info, msg, 2);
smi_info->si_state = SI_CHECKING_ENABLES;
}
-static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
+static void start_clear_flags(struct smi_info *smi_info)
{
unsigned char msg[3];
@@ -456,10 +458,7 @@ static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
msg[2] = WDT_PRE_TIMEOUT_INT;
- if (start_timer)
- start_new_msg(smi_info, msg, 3);
- else
- smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
+ start_new_msg(smi_info, msg, 3);
smi_info->si_state = SI_CLEARING_FLAGS;
}
@@ -494,11 +493,11 @@ static void start_getting_events(struct smi_info *smi_info)
* Note that we cannot just use disable_irq(), since the interrupt may
* be shared.
*/
-static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer)
+static inline bool disable_si_irq(struct smi_info *smi_info)
{
if ((smi_info->irq) && (!smi_info->interrupt_disabled)) {
smi_info->interrupt_disabled = true;
- start_check_enables(smi_info, start_timer);
+ start_check_enables(smi_info);
return true;
}
return false;
@@ -508,7 +507,7 @@ static inline bool enable_si_irq(struct smi_info *smi_info)
{
if ((smi_info->irq) && (smi_info->interrupt_disabled)) {
smi_info->interrupt_disabled = false;
- start_check_enables(smi_info, true);
+ start_check_enables(smi_info);
return true;
}
return false;
@@ -526,7 +525,7 @@ static struct ipmi_smi_msg *alloc_msg_handle_irq(struct smi_info *smi_info)
msg = ipmi_alloc_smi_msg();
if (!msg) {
- if (!disable_si_irq(smi_info, true))
+ if (!disable_si_irq(smi_info))
smi_info->si_state = SI_NORMAL;
} else if (enable_si_irq(smi_info)) {
ipmi_free_smi_msg(msg);
@@ -542,7 +541,7 @@ static void handle_flags(struct smi_info *smi_info)
/* Watchdog pre-timeout */
smi_inc_stat(smi_info, watchdog_pretimeouts);
- start_clear_flags(smi_info, true);
+ start_clear_flags(smi_info);
smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
if (smi_info->intf)
ipmi_smi_watchdog_pretimeout(smi_info->intf);
@@ -925,7 +924,7 @@ static enum si_sm_result smi_event_handler(struct smi_info *smi_info,
* disable and messages disabled.
*/
if (smi_info->supports_event_msg_buff || smi_info->irq) {
- start_check_enables(smi_info, true);
+ start_check_enables(smi_info);
} else {
smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
if (!smi_info->curr_msg)
@@ -1232,6 +1231,7 @@ static int smi_start_processing(void *send_info,
/* Set up the timer that drives the interface. */
setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi);
+ new_smi->timer_can_start = true;
smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES);
/* Try to claim any interrupts. */
@@ -3434,10 +3434,12 @@ static void check_for_broken_irqs(struct smi_info *smi_info)
check_set_rcv_irq(smi_info);
}
-static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
+static inline void stop_timer_and_thread(struct smi_info *smi_info)
{
if (smi_info->thread != NULL)
kthread_stop(smi_info->thread);
+
+ smi_info->timer_can_start = false;
if (smi_info->timer_running)
del_timer_sync(&smi_info->si_timer);
}
@@ -3635,7 +3637,7 @@ static int try_smi_init(struct smi_info *new_smi)
* Start clearing the flags before we enable interrupts or the
* timer to avoid racing with the timer.
*/
- start_clear_flags(new_smi, false);
+ start_clear_flags(new_smi);
/*
* IRQ is defined to be set when non-zero. req_events will
@@ -3713,7 +3715,7 @@ static int try_smi_init(struct smi_info *new_smi)
return 0;
out_err_stop_timer:
- wait_for_timer_and_thread(new_smi);
+ stop_timer_and_thread(new_smi);
out_err:
new_smi->interrupt_disabled = true;
@@ -3919,7 +3921,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
*/
if (to_clean->irq_cleanup)
to_clean->irq_cleanup(to_clean);
- wait_for_timer_and_thread(to_clean);
+ stop_timer_and_thread(to_clean);
/*
* Timeouts are stopped, now make sure the interrupts are off
@@ -3930,7 +3932,7 @@ static void cleanup_one_si(struct smi_info *to_clean)
poll(to_clean);
schedule_timeout_uninterruptible(1);
}
- disable_si_irq(to_clean, false);
+ disable_si_irq(to_clean);
while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
poll(to_clean);
schedule_timeout_uninterruptible(1);
--
2.7.4