syzbot reported a circular locking dependency involving nr_neigh_list_lock, nr_node_list_lock and nr_node->node_lock in the NET/ROM routing code [1].
One of the problematic scenarios looks like this:
CPU0 CPU1 ---- ---- nr_rt_device_down() nr_rt_ioctl() lock(nr_neigh_list_lock); nr_del_node() ... lock(nr_node_list_lock); lock(nr_node_list_lock); nr_remove_neigh(); lock(nr_neigh_list_lock);
This creates the following lock chain:
nr_neigh_list_lock -> nr_node_list_lock -> &nr_node->node_lock
while the ioctl path may acquire the locks in the opposite order via nr_dec_obs()/nr_del_node(), which makes lockdep complain about a possible deadlock.
Refactor nr_rt_device_down() to avoid nested locking of nr_neigh_list_lock and nr_node_list_lock. The function now performs two separate passes: one that walks all nodes under nr_node_list_lock and drops routes / reference counts, and a second one that removes unused neighbours under nr_neigh_list_lock.
This also fixes a reference count leak of nr_neigh in the node route removal path.
[1] https://syzkaller.appspot.com/bug?extid=14afda08dc3484d5db82
Reported-by: syzbot+14afda08dc3484d5db82@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=14afda08dc3484d5db82 Tested-by: syzbot+14afda08dc3484d5db82@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Signed-off-by: Junjie Cao junjie.cao@intel.com --- net/netrom/nr_route.c | 61 +++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 29 deletions(-)
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index b94cb2ffbaf8..cc8c47e1340a 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -511,37 +511,40 @@ void nr_rt_device_down(struct net_device *dev) struct nr_node *t; int i;
- spin_lock_bh(&nr_neigh_list_lock); - nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) { - if (s->dev == dev) { - spin_lock_bh(&nr_node_list_lock); - nr_node_for_each_safe(t, node2t, &nr_node_list) { - nr_node_lock(t); - for (i = 0; i < t->count; i++) { - if (t->routes[i].neighbour == s) { - t->count--; - - switch (i) { - case 0: - t->routes[0] = t->routes[1]; - fallthrough; - case 1: - t->routes[1] = t->routes[2]; - break; - case 2: - break; - } - } - } + spin_lock_bh(&nr_node_list_lock); + nr_node_for_each_safe(t, node2t, &nr_node_list) { + nr_node_lock(t); + for (i = 0; i < t->count; i++) { + s = t->routes[i].neighbour; + if (s->dev == dev) { + s->count--; + nr_neigh_put(s); + t->count--;
- if (t->count <= 0) - nr_remove_node_locked(t); - nr_node_unlock(t); + switch (i) { + case 0: + t->routes[0] = t->routes[1]; + fallthrough; + case 1: + t->routes[1] = t->routes[2]; + break; + case 2: + break; + } + i--; } - spin_unlock_bh(&nr_node_list_lock); + }
+ if (t->count <= 0) + nr_remove_node_locked(t); + nr_node_unlock(t); + } + spin_unlock_bh(&nr_node_list_lock); + + spin_lock_bh(&nr_neigh_list_lock); + nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) { + if (s->dev == dev) nr_remove_neigh_locked(s); - } } spin_unlock_bh(&nr_neigh_list_lock); } @@ -965,8 +968,8 @@ void nr_rt_free(void) struct nr_node *t = NULL; struct hlist_node *nodet;
- spin_lock_bh(&nr_neigh_list_lock); spin_lock_bh(&nr_node_list_lock); + spin_lock_bh(&nr_neigh_list_lock); nr_node_for_each_safe(t, nodet, &nr_node_list) { nr_node_lock(t); nr_remove_node_locked(t); @@ -979,6 +982,6 @@ void nr_rt_free(void) } nr_remove_neigh_locked(s); } - spin_unlock_bh(&nr_node_list_lock); spin_unlock_bh(&nr_neigh_list_lock); + spin_unlock_bh(&nr_node_list_lock); }