If the send_peer_notif counter and the peer event notify are not synchronized. It may cause problems such as the loss or dup of peer notify event.
Before this patch: - If should_notify_peers is true and the lock for send_peer_notif-- fails, peer event may be sent again in next mii_monitor loop, because should_notify_peers is still true. - If should_notify_peers is true and the lock for send_peer_notif-- succeeded, but the lock for peer event fails, the peer event will be lost.
This patch locks the RTNL for send_peer_notif, events, and commit simultaneously.
Fixes: 07a4ddec3ce9 ("bonding: add an option to specify a delay between peer notifications") Cc: Jay Vosburgh jv@jvosburgh.net Cc: Andrew Lunn andrew+netdev@lunn.ch Cc: Eric Dumazet edumazet@google.com Cc: Jakub Kicinski kuba@kernel.org Cc: Paolo Abeni pabeni@redhat.com Cc: Hangbin Liu liuhangbin@gmail.com Cc: Nikolay Aleksandrov razor@blackwall.org Cc: Vincent Bernat vincent@bernat.ch Cc: stable@vger.kernel.org Signed-off-by: Tonghao Zhang tonghao@bamaicloud.com
--- drivers/net/bonding/bond_main.c | 40 +++++++++++++++------------------ 1 file changed, 18 insertions(+), 22 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5791c3e39baa..52b7ac8ddfbc 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2971,7 +2971,7 @@ static void bond_mii_monitor(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, mii_work.work); - bool should_notify_peers = false; + bool should_notify_peers; bool commit; unsigned long delay; struct slave *slave; @@ -2983,30 +2983,33 @@ static void bond_mii_monitor(struct work_struct *work) goto re_arm;
rcu_read_lock(); + should_notify_peers = bond_should_notify_peers(bond); commit = !!bond_miimon_inspect(bond); - if (bond->send_peer_notif) { - rcu_read_unlock(); - if (rtnl_trylock()) { - bond->send_peer_notif--; - rtnl_unlock(); - } - } else { - rcu_read_unlock(); - }
- if (commit) { + rcu_read_unlock(); + + if (commit || bond->send_peer_notif) { /* Race avoidance with bond_close cancel of workqueue */ if (!rtnl_trylock()) { delay = 1; - should_notify_peers = false; goto re_arm; }
- bond_for_each_slave(bond, slave, iter) { - bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER); + if (commit) { + bond_for_each_slave(bond, slave, iter) { + bond_commit_link_state(slave, + BOND_SLAVE_NOTIFY_LATER); + } + bond_miimon_commit(bond); + } + + if (bond->send_peer_notif) { + bond->send_peer_notif--; + if (should_notify_peers) + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, + bond->dev); } - bond_miimon_commit(bond);
rtnl_unlock(); /* might sleep, hold no other locks */ } @@ -3014,13 +3017,6 @@ static void bond_mii_monitor(struct work_struct *work) re_arm: if (bond->params.miimon) queue_delayed_work(bond->wq, &bond->mii_work, delay); - - if (should_notify_peers) { - if (!rtnl_trylock()) - return; - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); - rtnl_unlock(); - } }
static int bond_upper_dev_walk(struct net_device *upper,
Tonghao Zhang tonghao@bamaicloud.com wrote:
If the send_peer_notif counter and the peer event notify are not synchronized. It may cause problems such as the loss or dup of peer notify event.
Before this patch:
- If should_notify_peers is true and the lock for send_peer_notif-- fails, peer
event may be sent again in next mii_monitor loop, because should_notify_peers is still true.
- If should_notify_peers is true and the lock for send_peer_notif-- succeeded,
but the lock for peer event fails, the peer event will be lost.
This patch locks the RTNL for send_peer_notif, events, and commit simultaneously.
Fixes: 07a4ddec3ce9 ("bonding: add an option to specify a delay between peer notifications") Cc: Jay Vosburgh jv@jvosburgh.net Cc: Andrew Lunn andrew+netdev@lunn.ch Cc: Eric Dumazet edumazet@google.com Cc: Jakub Kicinski kuba@kernel.org Cc: Paolo Abeni pabeni@redhat.com Cc: Hangbin Liu liuhangbin@gmail.com Cc: Nikolay Aleksandrov razor@blackwall.org Cc: Vincent Bernat vincent@bernat.ch Cc: stable@vger.kernel.org Signed-off-by: Tonghao Zhang tonghao@bamaicloud.com
I'll note that this appears to preserve the ordering of the various events (commit, link state, notify peers).
-J
Acked-by: Jay Vosburgh jv@jvosburgh.net
drivers/net/bonding/bond_main.c | 40 +++++++++++++++------------------ 1 file changed, 18 insertions(+), 22 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5791c3e39baa..52b7ac8ddfbc 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2971,7 +2971,7 @@ static void bond_mii_monitor(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, mii_work.work);
- bool should_notify_peers = false;
- bool should_notify_peers; bool commit; unsigned long delay; struct slave *slave;
@@ -2983,30 +2983,33 @@ static void bond_mii_monitor(struct work_struct *work) goto re_arm;
rcu_read_lock();
- should_notify_peers = bond_should_notify_peers(bond); commit = !!bond_miimon_inspect(bond);
if (bond->send_peer_notif) {
rcu_read_unlock();if (rtnl_trylock()) {bond->send_peer_notif--;rtnl_unlock();}} else {
rcu_read_unlock();}
if (commit) {
- rcu_read_unlock();
- if (commit || bond->send_peer_notif) { /* Race avoidance with bond_close cancel of workqueue */ if (!rtnl_trylock()) { delay = 1;
should_notify_peers = false; goto re_arm;}
bond_for_each_slave(bond, slave, iter) {bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER);
if (commit) {bond_for_each_slave(bond, slave, iter) {bond_commit_link_state(slave,BOND_SLAVE_NOTIFY_LATER);}bond_miimon_commit(bond);}if (bond->send_peer_notif) {bond->send_peer_notif--;if (should_notify_peers)call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, }bond->dev);
bond_miimon_commit(bond);rtnl_unlock(); /* might sleep, hold no other locks */ }
@@ -3014,13 +3017,6 @@ static void bond_mii_monitor(struct work_struct *work) re_arm: if (bond->params.miimon) queue_delayed_work(bond->wq, &bond->mii_work, delay);
- if (should_notify_peers) {
if (!rtnl_trylock())return;call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);rtnl_unlock();- }
}
static int bond_upper_dev_walk(struct net_device *upper,
2.34.1
--- -Jay Vosburgh, jv@jvosburgh.net
Hello:
This patch was applied to netdev/net.git (main) by Paolo Abeni pabeni@redhat.com:
On Tue, 21 Oct 2025 13:09:33 +0800 you wrote:
If the send_peer_notif counter and the peer event notify are not synchronized. It may cause problems such as the loss or dup of peer notify event.
Before this patch:
- If should_notify_peers is true and the lock for send_peer_notif-- fails, peer event may be sent again in next mii_monitor loop, because should_notify_peers is still true.
- If should_notify_peers is true and the lock for send_peer_notif-- succeeded, but the lock for peer event fails, the peer event will be lost.
[...]
Here is the summary with links: - [net] net: bonding: fix possible peer notify event loss or dup issue https://git.kernel.org/netdev/net/c/10843e1492e4
You are awesome, thank you!
linux-stable-mirror@lists.linaro.org