On Mon, 31 Jan 2022, Greg Kroah-Hartman wrote:
From: Paolo Abeni pabeni@redhat.com
[ Upstream commit 86e39e04482b0aadf3ee3ed5fcf2d63816559d36 ]
Hi Greg -
Please drop this from the stable queue for both 5.16 and 5.15. It wasn't intended for backporting and we haven't checked for dependencies with other changes in this part of MPTCP subsystem.
In the mptcp tree we make sure to add Fixes: tags on every patch we think is eligible for the -stable tree. I know you're sifting through a lot of patches from subsystems that end up with important fixes arriving from the "-next" branches, and it seems like the scripts scooped up several MPTCP patches this time around that don't meet the -stable rules.
Thanks,
Mat
Include into the path manager status a bitmap tracking the list of local endpoints still available - not yet used - for the relevant mptcp socket.
Keep such map updated at endpoint creation/deletion time, so that we can easily skip already used endpoint at local address selection time.
The endpoint used by the initial subflow is lazyly accounted at subflow creation time: the usage bitmap is be up2date before endpoint selection and we avoid such unneeded task in some relevant scenarios - e.g. busy servers accepting incoming subflows but not creating any additional ones nor annuncing additional addresses.
Overall this allows for fair local endpoints usage in case of subflow failure.
As a side effect, this patch also enforces that each endpoint is used at most once for each mptcp connection.
Signed-off-by: Paolo Abeni pabeni@redhat.com Signed-off-by: Mat Martineau mathew.j.martineau@linux.intel.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org
net/mptcp/pm.c | 1 + net/mptcp/pm_netlink.c | 125 +++++++++++------- net/mptcp/protocol.c | 3 +- net/mptcp/protocol.h | 12 +- .../testing/selftests/net/mptcp/mptcp_join.sh | 5 +- 5 files changed, 91 insertions(+), 55 deletions(-)
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 6ab386ff32944..332ac6eda3ba4 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -370,6 +370,7 @@ void mptcp_pm_data_init(struct mptcp_sock *msk) WRITE_ONCE(msk->pm.accept_subflow, false); WRITE_ONCE(msk->pm.remote_deny_join_id0, false); msk->pm.status = 0;
bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
spin_lock_init(&msk->pm.lock); INIT_LIST_HEAD(&msk->pm.anno_list);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 27427aeeee0e5..ad3dc9c6c5310 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -38,10 +38,6 @@ struct mptcp_pm_add_entry { u8 retrans_times; };
-/* max value of mptcp_addr_info.id */ -#define MAX_ADDR_ID U8_MAX -#define BITMAP_SZ DIV_ROUND_UP(MAX_ADDR_ID + 1, BITS_PER_LONG)
struct pm_nl_pernet { /* protects pernet updates */ spinlock_t lock; @@ -53,14 +49,14 @@ struct pm_nl_pernet { unsigned int local_addr_max; unsigned int subflows_max; unsigned int next_id;
- unsigned long id_bitmap[BITMAP_SZ];
- DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
};
#define MPTCP_PM_ADDR_MAX 8 #define ADD_ADDR_RETRANS_MAX 3
static bool addresses_equal(const struct mptcp_addr_info *a,
struct mptcp_addr_info *b, bool use_port)
const struct mptcp_addr_info *b, bool use_port)
{ bool addr_equals = false;
@@ -174,6 +170,9 @@ select_local_address(const struct pm_nl_pernet *pernet, if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) continue;
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
continue;
- if (entry->addr.family != sk->sk_family) {
#if IS_ENABLED(CONFIG_MPTCP_IPV6) if ((entry->addr.family == AF_INET && @@ -184,23 +183,17 @@ select_local_address(const struct pm_nl_pernet *pernet, continue; }
/* avoid any address already in use by subflows and
* pending join
*/
if (!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
ret = entry;
break;
}
ret = entry;
} rcu_read_unlock(); return ret;break;
}
static struct mptcp_pm_addr_entry * -select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos) +select_signal_address(struct pm_nl_pernet *pernet, struct mptcp_sock *msk) { struct mptcp_pm_addr_entry *entry, *ret = NULL;
int i = 0;
rcu_read_lock(); /* do not keep any additional per socket state, just signal
@@ -209,12 +202,14 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos) * can lead to additional addresses not being announced. */ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
continue;
- if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) continue;
if (i++ == pos) {
ret = entry;
break;
}
ret = entry;
} rcu_read_unlock(); return ret;break;
@@ -258,9 +253,11 @@ EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max);
static void check_work_pending(struct mptcp_sock *msk) {
- if (msk->pm.add_addr_signaled == mptcp_pm_get_add_addr_signal_max(msk) &&
(msk->pm.local_addr_used == mptcp_pm_get_local_addr_max(msk) ||
msk->pm.subflows == mptcp_pm_get_subflows_max(msk)))
- struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
- if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) ||
(find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
WRITE_ONCE(msk->pm.work_pending, false);MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1))
}
@@ -460,6 +457,35 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm return i; }
+static struct mptcp_pm_addr_entry * +__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) +{
- struct mptcp_pm_addr_entry *entry;
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
if (entry->addr.id == id)
return entry;
- }
- return NULL;
+}
+static int +lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr) +{
- struct mptcp_pm_addr_entry *entry;
- int ret = -1;
- rcu_read_lock();
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
if (addresses_equal(&entry->addr, addr, entry->addr.port)) {
ret = entry->addr.id;
break;
}
- }
- rcu_read_unlock();
- return ret;
+}
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; @@ -475,6 +501,19 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) local_addr_max = mptcp_pm_get_local_addr_max(msk); subflows_max = mptcp_pm_get_subflows_max(msk);
- /* do lazy endpoint usage accounting for the MPC subflows */
- if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) {
struct mptcp_addr_info local;
int mpc_id;
local_address((struct sock_common *)msk->first, &local);
mpc_id = lookup_id_by_addr(pernet, &local);
if (mpc_id < 0)
__clear_bit(mpc_id, msk->pm.id_avail_bitmap);
msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED);
- }
- pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", msk->pm.local_addr_used, local_addr_max, msk->pm.add_addr_signaled, add_addr_signal_max,
@@ -482,21 +521,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* check first for announce */ if (msk->pm.add_addr_signaled < add_addr_signal_max) {
local = select_signal_address(pernet,
msk->pm.add_addr_signaled);
local = select_signal_address(pernet, msk);
if (local) { if (mptcp_pm_alloc_anno_list(msk, local)) {
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap); msk->pm.add_addr_signaled++; mptcp_pm_announce_addr(msk, &local->addr, false); mptcp_pm_nl_addr_send_ack(msk); }
} else {
/* pick failed, avoid fourther attempts later */
msk->pm.local_addr_used = add_addr_signal_max;
}
check_work_pending(msk);
}
/* check if should create a new subflow */
@@ -510,19 +544,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) int i, nr;
msk->pm.local_addr_used++;
check_work_pending(msk); nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
if (nr)
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap); spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) __mptcp_subflow_connect(sk, &local->addr, &addrs[i]); spin_lock_bh(&msk->pm.lock);
}return;
/* lookup failed, avoid fourther attempts later */
msk->pm.local_addr_used = local_addr_max;
}check_work_pending(msk);
- check_work_pending(msk);
}
static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) @@ -736,6 +767,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, msk->pm.subflows--; __MPTCP_INC_STATS(sock_net(sk), rm_type); }
if (!removed) continue;__set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap);
@@ -765,6 +797,9 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
msk_owned_by_me(msk);
if (!(pm->status & MPTCP_PM_WORK_MASK))
return;
spin_lock_bh(&msk->pm.lock);
pr_debug("msk=%p status=%x", msk, pm->status);
@@ -810,7 +845,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, /* to keep the code simple, don't do IDR-like allocation for address ID, * just bail when we exceed limits */
- if (pernet->next_id == MAX_ADDR_ID)
- if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) pernet->next_id = 1; if (pernet->addrs >= MPTCP_PM_ADDR_MAX) goto out;
@@ -830,7 +865,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, if (!entry->addr.id) { find_next: entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
MAX_ADDR_ID + 1,
if (!entry->addr.id && pernet->next_id != 1) { pernet->next_id = 1;MPTCP_PM_MAX_ADDR_ID + 1, pernet->next_id);
@@ -1197,18 +1232,6 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) return 0; }
-static struct mptcp_pm_addr_entry * -__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) -{
- struct mptcp_pm_addr_entry *entry;
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
if (entry->addr.id == id)
return entry;
- }
- return NULL;
-}
int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id, u8 *flags, int *ifindex) { @@ -1467,7 +1490,7 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info) list_splice_init(&pernet->local_addr_list, &free_list); __reset_counters(pernet); pernet->next_id = 1;
- bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
- bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); spin_unlock_bh(&pernet->lock); mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list); synchronize_rcu();
@@ -1577,7 +1600,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg, pernet = net_generic(net, pm_nl_pernet_id);
spin_lock_bh(&pernet->lock);
- for (i = id; i < MAX_ADDR_ID + 1; i++) {
- for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { if (test_bit(i, pernet->id_bitmap)) { entry = __lookup_addr_by_id(pernet, i); if (!entry)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 0cd55e4c30fab..70a3cac85f7b8 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2416,8 +2416,7 @@ static void mptcp_worker(struct work_struct *work)
mptcp_check_fastclose(msk);
- if (msk->pm.status)
mptcp_pm_nl_work(msk);
mptcp_pm_nl_work(msk);
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags)) mptcp_check_for_eof(msk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d87cc040352e3..0387ad9fb43f7 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -174,16 +174,25 @@ enum mptcp_pm_status { MPTCP_PM_ADD_ADDR_SEND_ACK, MPTCP_PM_RM_ADDR_RECEIVED, MPTCP_PM_ESTABLISHED,
- MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */ MPTCP_PM_SUBFLOW_ESTABLISHED,
- MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */
- MPTCP_PM_MPC_ENDPOINT_ACCOUNTED /* persistent status, set after MPC local address is
* accounted int id_avail_bitmap
*/
};
+/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */ +#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)
enum mptcp_addr_signal_status { MPTCP_ADD_ADDR_SIGNAL, MPTCP_ADD_ADDR_ECHO, MPTCP_RM_ADDR_SIGNAL, };
+/* max value of mptcp_addr_info.id */ +#define MPTCP_PM_MAX_ADDR_ID U8_MAX
struct mptcp_pm_data { struct mptcp_addr_info local; struct mptcp_addr_info remote; @@ -202,6 +211,7 @@ struct mptcp_pm_data { u8 local_addr_used; u8 subflows; u8 status;
- DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_rm_list rm_list_tx; struct mptcp_rm_list rm_list_rx;
}; diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 7ef639a9d4a6f..bbafa4cf54538 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1071,7 +1071,10 @@ signal_address_tests() ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal run_tests $ns1 $ns2 10.0.1.1
- chk_add_nr 4 4
- # the server will not signal the address terminating
- # the MPC subflow
- chk_add_nr 3 3
}
link_failure_tests()
2.34.1
-- Mat Martineau Intel