This patch series includes some netns-related improvements and fixes for RTNL and ip_tunnel, to make link creation more intuitive:
- Creating link in another net namespace doesn't conflict with link names in current one. - Add a flag in rtnl_ops, to avoid netns change when link-netns is present if possible. - When creating ip tunnel (e.g. GRE) in another netns, use current as link-netns if not specified explicitly.
So that
# modprobe ip_gre netns_atomic=1 # ip link add netns ns1 link-netns ns2 tun0 type gre ...
will create tun0 in ns1, rather than create it in ns2 and move to ns1. And don't conflict with another interface named "tun0" in current netns.
---
v2: - Check NLM_F_EXCL to ensure only link creation is affected. - Add self tests for link name/ifindex conflict and notifications in different netns. - Changes in dummy driver and ynl in order to add the test case.
v1: link: https://lore.kernel.org/all/20241023023146.372653-1-shaw.leon@gmail.com/
Xiao Liang (8): rtnetlink: Lookup device in target netns when creating link rtnetlink: Add netns_atomic flag in rtnl_link_ops net: ip_tunnel: Build flow in underlay net namespace net: ip_tunnel: Add source netns support for newlink net: ip_gre: Add netns_atomic module parameter net: dummy: Set netns_atomic in rtnl ops for testing tools/net/ynl: Add retry limit for async notification selftests: net: Add two test cases for link netns
drivers/net/dummy.c | 1 + include/net/ip_tunnels.h | 3 ++ include/net/rtnetlink.h | 3 ++ net/core/rtnetlink.c | 17 +++++-- net/ipv4/ip_gre.c | 15 +++++- net/ipv4/ip_tunnel.c | 27 +++++++---- tools/net/ynl/lib/ynl.py | 7 ++- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/netns-name.sh | 10 ++++ tools/testing/selftests/net/netns_atomic.py | 54 +++++++++++++++++++++ 10 files changed, 121 insertions(+), 17 deletions(-) create mode 100755 tools/testing/selftests/net/netns_atomic.py
When creating link, lookup for existing device in target net namespace instead of current one. For example, two links created by:
# ip link add dummy1 type dummy # ip link add netns ns1 dummy1 type dummy
should have no conflict since they are in different namespaces.
Signed-off-by: Xiao Liang shaw.leon@gmail.com --- net/core/rtnetlink.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3b33810d92a8..8119f4ad9e5f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3744,20 +3744,26 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, { struct nlattr ** const tb = tbs->tb; struct net *net = sock_net(skb->sk); + struct net *device_net; struct net_device *dev; struct ifinfomsg *ifm; bool link_specified;
+ /* When creating, lookup for existing device in target net namespace */ + device_net = (nlh->nlmsg_flags & NLM_F_CREATE) && + (nlh->nlmsg_flags & NLM_F_EXCL) ? + tgt_net : net; + ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) { link_specified = true; - dev = __dev_get_by_index(net, ifm->ifi_index); + dev = __dev_get_by_index(device_net, ifm->ifi_index); } else if (ifm->ifi_index < 0) { NL_SET_ERR_MSG(extack, "ifindex can't be negative"); return -EINVAL; } else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) { link_specified = true; - dev = rtnl_dev_get(net, tb); + dev = rtnl_dev_get(device_net, tb); } else { link_specified = false; dev = NULL;
Currently these two steps are needed to create a net device with IFLA_LINK_NETNSID attr:
1. create and setup the netdev in the link netns with rtnl_create_link() 2. move it to the target netns with dev_change_net_namespace()
This has some side effects, including extra ifindex allocation, ifname validation and link notifications in link netns.
Add a netns_atomic flag, that if set to true, devices will be created in the target netns directly.
Signed-off-by: Xiao Liang shaw.leon@gmail.com --- include/net/rtnetlink.h | 3 +++ net/core/rtnetlink.c | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index b260c0cc9671..7e78f3952774 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -75,6 +75,8 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) * @srcu: Used internally * @kind: Identifier * @netns_refund: Physical device, move to init_net on netns exit + * @netns_atomic: Device can be created in target netns even when + * link-netns is different, avoiding netns change. * @maxtype: Highest device specific netlink attribute number * @policy: Netlink policy for device specific attribute validation * @validate: Optional validation function for netlink/changelink parameters @@ -116,6 +118,7 @@ struct rtnl_link_ops { void (*setup)(struct net_device *dev);
bool netns_refund; + bool netns_atomic; unsigned int maxtype; const struct nla_policy *policy; int (*validate)(struct nlattr *tb[], diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8119f4ad9e5f..b0d1cbb44a03 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3690,8 +3690,9 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, name_assign_type = NET_NAME_ENUM; }
- dev = rtnl_create_link(link_net ? : tgt_net, ifname, - name_assign_type, ops, tb, extack); + dev = rtnl_create_link(!link_net || ops->netns_atomic ? + tgt_net : link_net, ifname, name_assign_type, + ops, tb, extack); if (IS_ERR(dev)) { err = PTR_ERR(dev); goto out; @@ -3711,7 +3712,7 @@ static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, err = rtnl_configure_link(dev, ifm, portid, nlh); if (err < 0) goto out_unregister; - if (link_net) { + if (link_net && !ops->netns_atomic) { err = dev_change_net_namespace(dev, tgt_net, ifname); if (err < 0) goto out_unregister;
Build IPv4 flow in underlay net namespace, where encapsulated packets are routed.
Signed-off-by: Xiao Liang shaw.leon@gmail.com --- net/ipv4/ip_tunnel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 25505f9b724c..09b73acf037a 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -294,7 +294,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr, iph->saddr, tunnel->parms.o_key, - iph->tos & INET_DSCP_MASK, dev_net(dev), + iph->tos & INET_DSCP_MASK, tunnel->net, tunnel->parms.link, tunnel->fwmark, 0, 0); rt = ip_route_output_key(tunnel->net, &fl4);
@@ -611,7 +611,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, tunnel_id_to_key32(key->tun_id), - tos & INET_DSCP_MASK, dev_net(dev), 0, skb->mark, + tos & INET_DSCP_MASK, tunnel->net, 0, skb->mark, skb_get_hash(skb), key->flow_flags);
if (!tunnel_hlen) @@ -774,7 +774,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr, tunnel->parms.o_key, tos & INET_DSCP_MASK, - dev_net(dev), READ_ONCE(tunnel->parms.link), + tunnel->net, READ_ONCE(tunnel->parms.link), tunnel->fwmark, skb_get_hash(skb), 0);
if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
Add ip_tunnel_newlink_net() that accepts src_net parameter, which is passed from newlink() of RTNL ops, and use it as tunnel source netns.
Signed-off-by: Xiao Liang shaw.leon@gmail.com --- include/net/ip_tunnels.h | 3 +++ net/ipv4/ip_tunnel.c | 21 +++++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 1aa31bdb2b31..e7cabc93902b 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -408,6 +408,9 @@ int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm_kern *p, __u32 fwmark); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm_kern *p, __u32 fwmark); +int ip_tunnel_newlink_net(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct ip_tunnel_parm_kern *p, + __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 09b73acf037a..bbb2fbb90bd2 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1213,17 +1213,17 @@ void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, } EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
-int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm_kern *p, __u32 fwmark) +int ip_tunnel_newlink_net(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct ip_tunnel_parm_kern *p, + __u32 fwmark) { struct ip_tunnel *nt; - struct net *net = dev_net(dev); struct ip_tunnel_net *itn; int mtu; int err;
nt = netdev_priv(dev); - itn = net_generic(net, nt->ip_tnl_net_id); + itn = net_generic(src_net, nt->ip_tnl_net_id);
if (nt->collect_md) { if (rtnl_dereference(itn->collect_md_tun)) @@ -1233,7 +1233,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], return -EEXIST; }
- nt->net = net; + nt->net = src_net; nt->parms = *p; nt->fwmark = fwmark; err = register_netdevice(dev); @@ -1265,6 +1265,13 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], err_register_netdevice: return err; } +EXPORT_SYMBOL_GPL(ip_tunnel_newlink_net); + +int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], + struct ip_tunnel_parm_kern *p, __u32 fwmark) +{ + return ip_tunnel_newlink_net(dev_net(dev), dev, tb, p, fwmark); +} EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], @@ -1326,7 +1333,9 @@ int ip_tunnel_init(struct net_device *dev) }
tunnel->dev = dev; - tunnel->net = dev_net(dev); + if (!tunnel->net) + tunnel->net = dev_net(dev); + strscpy(tunnel->parms.name, dev->name); iph->version = 4; iph->ihl = 5;
If set to true, create device in target netns (when different from link-netns) without netns change, and use current netns as link-netns if not specified explicitly.
Signed-off-by: Xiao Liang shaw.leon@gmail.com --- net/ipv4/ip_gre.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f1f31ebfc793..6ef7e98e4620 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -107,6 +107,11 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+static bool netns_atomic; +module_param(netns_atomic, bool, 0444); +MODULE_PARM_DESC(netns_atomic, + "Create tunnel in target net namespace directly and use current net namespace as link-netns by default"); + static struct rtnl_link_ops ipgre_link_ops __read_mostly; static const struct header_ops ipgre_header_ops;
@@ -1393,6 +1398,7 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { + struct net *link_net = netns_atomic ? src_net : dev_net(dev); struct ip_tunnel_parm_kern p; __u32 fwmark = 0; int err; @@ -1404,13 +1410,14 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) return err; - return ip_tunnel_newlink(dev, tb, &p, fwmark); + return ip_tunnel_newlink_net(link_net, dev, tb, &p, fwmark); }
static int erspan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { + struct net *link_net = netns_atomic ? src_net : dev_net(dev); struct ip_tunnel_parm_kern p; __u32 fwmark = 0; int err; @@ -1422,7 +1429,7 @@ static int erspan_newlink(struct net *src_net, struct net_device *dev, err = erspan_netlink_parms(dev, data, tb, &p, &fwmark); if (err) return err; - return ip_tunnel_newlink(dev, tb, &p, fwmark); + return ip_tunnel_newlink_net(link_net, dev, tb, &p, fwmark); }
static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], @@ -1777,6 +1784,10 @@ static int __init ipgre_init(void)
pr_info("GRE over IPv4 tunneling driver\n");
+ ipgre_link_ops.netns_atomic = netns_atomic; + ipgre_tap_ops.netns_atomic = netns_atomic; + erspan_link_ops.netns_atomic = netns_atomic; + err = register_pernet_device(&ipgre_net_ops); if (err < 0) return err;
On Thu, Nov 7, 2024 at 2:30 PM Xiao Liang shaw.leon@gmail.com wrote:
If set to true, create device in target netns (when different from link-netns) without netns change, and use current netns as link-netns if not specified explicitly.
Sorry, module parameters are not going to fly.
Instead, add new rtnetlink attributes ?
On Thu, Nov 7, 2024 at 9:37 PM Eric Dumazet edumazet@google.com wrote:
On Thu, Nov 7, 2024 at 2:30 PM Xiao Liang shaw.leon@gmail.com wrote:
If set to true, create device in target netns (when different from link-netns) without netns change, and use current netns as link-netns if not specified explicitly.
Sorry, module parameters are not going to fly.
Sorry, I didn't know that.
Instead, add new rtnetlink attributes ?
It is to control driver behavior at rtnl_ops registration time. I think rtnetlink attributes are too late for that, maybe? Can't think of a way other than module parameters or register separate ops. Any suggestions?
On Thu, 7 Nov 2024 22:11:24 +0800 Xiao Liang wrote:
Instead, add new rtnetlink attributes ?
It is to control driver behavior at rtnl_ops registration time. I think rtnetlink attributes are too late for that, maybe? Can't think of a way other than module parameters or register separate ops. Any suggestions?
Step back from the implementation you have a little, forget that there is a boolean in rtnl_link_ops. User makes a request to spawn an interface, surely a flag inside that request can dictate how the netns attrs are interpreted.
On Thu, Nov 7, 2024 at 11:59 PM Jakub Kicinski kuba@kernel.org wrote:
On Thu, 7 Nov 2024 22:11:24 +0800 Xiao Liang wrote:
Instead, add new rtnetlink attributes ?
It is to control driver behavior at rtnl_ops registration time. I think rtnetlink attributes are too late for that, maybe? Can't think of a way other than module parameters or register separate ops. Any suggestions?
Step back from the implementation you have a little, forget that there is a boolean in rtnl_link_ops. User makes a request to spawn an interface, surely a flag inside that request can dictate how the netns attrs are interpreted.
IMO, this is about driver capability, not about user requests. As you've pointed out earlier, probably no one would actually want the old behavior whenever the driver supports the new one. I added the module parameter just for compatibility, because ip_tunnels was not implemented to support src_net properly. Yes it's possible to add an extra flag in user request, but I don't think it's a good approach.
BTW, I didn't find what's going on with module parameters, is there any documentation?
Thanks.
On Fri, 8 Nov 2024 00:53:55 +0800 Xiao Liang wrote:
It is to control driver behavior at rtnl_ops registration time. I think rtnetlink attributes are too late for that, maybe? Can't think of a way other than module parameters or register separate ops. Any suggestions?
Step back from the implementation you have a little, forget that there is a boolean in rtnl_link_ops. User makes a request to spawn an interface, surely a flag inside that request can dictate how the netns attrs are interpreted.
IMO, this is about driver capability, not about user requests.
The bit is a driver capability, that's fine. But the question was how to achieve backward compatibility. A flag in user request shifts the responsibility of ensuring all services are compatible to whoever spawns the interfaces. Which will probably be some network management daemon.
As you've pointed out earlier, probably no one would actually want the old behavior whenever the driver supports the new one. I added the module parameter just for compatibility, because ip_tunnels was not implemented to support src_net properly.
And I maintain that it's very unlikely anyone cares about old behavior. So maybe as a starting point we can have neither the flag nor the module param? We can add them later if someone screams.
Yes it's possible to add an extra flag in user request, but I don't think it's a good approach.
There are two maintainers with opposing intuition so more data may be needed to convince..
BTW, I didn't find what's going on with module parameters, is there any documentation?
Not sure if there is documentation, but module params are quite painful to work with. Main reason is that they are global and not namespace aware. Plus developers usually default to making them read only, which means they practically speaking have to be configured at boot.
On Fri, Nov 8, 2024 at 12:04 PM Jakub Kicinski kuba@kernel.org wrote:
On Fri, 8 Nov 2024 00:53:55 +0800 Xiao Liang wrote:
IMO, this is about driver capability, not about user requests.
The bit is a driver capability, that's fine. But the question was how to achieve backward compatibility. A flag in user request shifts the responsibility of ensuring all services are compatible to whoever spawns the interfaces. Which will probably be some network management daemon.
OK. So I think we can change the driver capability indicator in rtnl_ops to a tristate field, say, "linkns_support". If it is - not supported, then keep the old behavior - supported (vlan, macvlan, etc.), then change to the new behavior - compat-mode (ip_tunnel), default to old behavior and can be changed via an IFLA flag. Is this reasonable?
BTW, I didn't find what's going on with module parameters, is there any documentation?
Not sure if there is documentation, but module params are quite painful to work with. Main reason is that they are global and not namespace aware. Plus developers usually default to making them read only, which means they practically speaking have to be configured at boot.
Understood, thanks.
On Fri, 8 Nov 2024 14:44:37 +0800 Xiao Liang wrote:
On Fri, 8 Nov 2024 00:53:55 +0800 Xiao Liang wrote:
IMO, this is about driver capability, not about user requests.
The bit is a driver capability, that's fine. But the question was how to achieve backward compatibility. A flag in user request shifts the responsibility of ensuring all services are compatible to whoever spawns the interfaces. Which will probably be some network management daemon.
OK. So I think we can change the driver capability indicator in rtnl_ops to a tristate field, say, "linkns_support". If it is
- not supported, then keep the old behavior
- supported (vlan, macvlan, etc.), then change to the new behavior
- compat-mode (ip_tunnel), default to old behavior and can be changed via an IFLA flag.
Is this reasonable?
Let's start with annotating the drivers which need the old behavior. It seems like something that was done as a workaround for old drivers, maybe there isn't that many of them and we can convert them all in one series.
On Sun, Nov 10, 2024 at 8:59 AM Jakub Kicinski kuba@kernel.org wrote:
On Fri, 8 Nov 2024 14:44:37 +0800 Xiao Liang wrote:
On Fri, 8 Nov 2024 00:53:55 +0800 Xiao Liang wrote:
IMO, this is about driver capability, not about user requests.
The bit is a driver capability, that's fine. But the question was how to achieve backward compatibility. A flag in user request shifts the responsibility of ensuring all services are compatible to whoever spawns the interfaces. Which will probably be some network management daemon.
OK. So I think we can change the driver capability indicator in rtnl_ops to a tristate field, say, "linkns_support". If it is
- not supported, then keep the old behavior
- supported (vlan, macvlan, etc.), then change to the new behavior
- compat-mode (ip_tunnel), default to old behavior and can be changed via an IFLA flag.
Is this reasonable?
Let's start with annotating the drivers which need the old behavior. It seems like something that was done as a workaround for old drivers, maybe there isn't that many of them and we can convert them all in one series.
I'd like to clarify a bit here. Link netns is closely coupled with source netns, as it's passed to drivers as source netns. Without introducing a flag, after applying the logic in this patchset, drivers won't be able to distinguish the two: 1) ip -n ns1 link add netns ns2 ... 2) ip link add netns ns2 link-netns ns1 ...
There's no problem for drivers that already handle source netns. But it changes the semantics of 1) for ip tunnels silently. The effective link-netns is ns2 before, and will be changed to ns1 afterwards, which will almost certainly affect some users. Is this acceptable? On the other hand, do we need to deal with out-of-tree drivers?
On Mon, 11 Nov 2024 16:15:41 +0800 Xiao Liang wrote:
Let's start with annotating the drivers which need the old behavior. It seems like something that was done as a workaround for old drivers, maybe there isn't that many of them and we can convert them all in one series.
I'd like to clarify a bit here. Link netns is closely coupled with source netns, as it's passed to drivers as source netns. Without introducing a flag, after applying the logic in this patchset, drivers won't be able to distinguish the two:
- ip -n ns1 link add netns ns2 ...
- ip link add netns ns2 link-netns ns1 ...
True, but the question is how many drivers actually care about the net parameter. Ideally we would pass both netns to the drivers, refactor the ->newlink callback to take a parameter struct and add both netns as members. Passing just one or the other will always be confusing.
There's no problem for drivers that already handle source netns. But it changes the semantics of 1) for ip tunnels silently. The effective link-netns is ns2 before, and will be changed to ns1 afterwards, which will almost certainly affect some users. Is this acceptable?
No, changing the behavior for the commands you provided is not acceptable. At the same time we shouldn't be adding technical debt of supporting both converted and unconverted drivers upstream.
On the other hand, do we need to deal with out-of-tree drivers?
Nope, but again, changing the prototype of newlink would also make it hard to get wrong for OOT modules.
On Tue, Nov 12, 2024 at 7:42 AM Jakub Kicinski kuba@kernel.org wrote:
On Mon, 11 Nov 2024 16:15:41 +0800 Xiao Liang wrote:
Let's start with annotating the drivers which need the old behavior. It seems like something that was done as a workaround for old drivers, maybe there isn't that many of them and we can convert them all in one series.
I'd like to clarify a bit here. Link netns is closely coupled with source netns, as it's passed to drivers as source netns. Without introducing a flag, after applying the logic in this patchset, drivers won't be able to distinguish the two:
- ip -n ns1 link add netns ns2 ...
- ip link add netns ns2 link-netns ns1 ...
True, but the question is how many drivers actually care about the net parameter. Ideally we would pass both netns to the drivers, refactor the ->newlink callback to take a parameter struct and add both netns as members. Passing just one or the other will always be confusing.
Got it, thanks. Will work on a v3.
There's no problem for drivers that already handle source netns. But it changes the semantics of 1) for ip tunnels silently. The effective link-netns is ns2 before, and will be changed to ns1 afterwards, which will almost certainly affect some users. Is this acceptable?
No, changing the behavior for the commands you provided is not acceptable. At the same time we shouldn't be adding technical debt of supporting both converted and unconverted drivers upstream.
On the other hand, do we need to deal with out-of-tree drivers?
Nope, but again, changing the prototype of newlink would also make it hard to get wrong for OOT modules.
Set netns_atomic flag in rtnl ops. For testing purpose only, since link-netns is not used for dummy interfaces in practice.
Signed-off-by: Xiao Liang shaw.leon@gmail.com --- drivers/net/dummy.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index e9c5e1e11fa0..38d79f543998 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -138,6 +138,7 @@ static struct rtnl_link_ops dummy_link_ops __read_mostly = { .kind = DRV_NAME, .setup = dummy_setup, .validate = dummy_validate, + .netns_atomic = true, };
/* Number of dummy devices to be set up by this module. */
Since commit 1bf70e6c3a53 ("tools/net/ynl: improve async notification handling"), check_ntf() would block indefinitely if there's no messages. In some cases we want to set a limit on waiting time. This patch adds max_reties parameter check_ntf(), and makes it stop when no message is recievied in that number of consecutive retries.
Signed-off-by: Xiao Liang shaw.leon@gmail.com --- tools/net/ynl/lib/ynl.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 92f85698c50e..dff5166a4650 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -907,7 +907,8 @@ class YnlFamily(SpecFamily): msg['msg'] = attrs self.async_msg_queue.put(msg)
- def check_ntf(self, interval=0.1): + def check_ntf(self, interval=0.1, max_retries=None): + retry = 0 while True: try: reply = self.sock.recv(self._recv_size, socket.MSG_DONTWAIT) @@ -933,7 +934,11 @@ class YnlFamily(SpecFamily):
try: yield self.async_msg_queue.get_nowait() + retry = 0 except queue.Empty: + retry += 1 + if max_retries is not None and retry > max_retries: + return try: time.sleep(interval) except KeyboardInterrupt:
On Thu, 7 Nov 2024 21:30:02 +0800 Xiao Liang wrote:
Since commit 1bf70e6c3a53 ("tools/net/ynl: improve async notification handling"), check_ntf() would block indefinitely if there's no messages. In some cases we want to set a limit on waiting time. This patch adds max_reties parameter check_ntf(), and makes it stop when no message is recievied in that number of consecutive retries.
Looking at 1bf70e6c3a53 again I wonder if we should revert it, sort of, and add its logic back as a new function called poll_nft?
The thing is C YNL has check_ntf too - ynl_ntf_check() and it has the old semantics. Would be nice for similarly named functions to behave the same across languages.
WDYT Donald? Sorry for not thinking about this earlier.
Xiao, feel free to submit this separately from the series.
On Thu, 7 Nov 2024 at 16:04, Jakub Kicinski kuba@kernel.org wrote:
On Thu, 7 Nov 2024 21:30:02 +0800 Xiao Liang wrote:
Since commit 1bf70e6c3a53 ("tools/net/ynl: improve async notification handling"), check_ntf() would block indefinitely if there's no messages. In some cases we want to set a limit on waiting time. This patch adds max_reties parameter check_ntf(), and makes it stop when no message is recievied in that number of consecutive retries.
Looking at 1bf70e6c3a53 again I wonder if we should revert it, sort of, and add its logic back as a new function called poll_nft?
The thing is C YNL has check_ntf too - ynl_ntf_check() and it has the old semantics. Would be nice for similarly named functions to behave the same across languages.
WDYT Donald? Sorry for not thinking about this earlier.
Yes, that makes sense. I didn't realise the C lib had an equivalent. Adding a poll_ntf() that calls check_ntf() internally will actually be a bit cleaner overall.
It's then a question of whether we need the repeat logic in poll_ntf() because it's always possible to use check_ntf() in your own repeat logic. Either way, I'd prefer not to call the parameter "max_retries" because semantically I don't think we are retrying - it is a count of how many times to repeat the poll. Thoughts? Should it be a "duration" parameter?
Xiao, feel free to submit this separately from the series.
On Fri, Nov 8, 2024 at 1:16 AM Donald Hunter donald.hunter@gmail.com wrote:
It's then a question of whether we need the repeat logic in poll_ntf() because it's always possible to use check_ntf() in your own repeat logic. Either way, I'd prefer not to call the parameter "max_retries" because semantically I don't think we are retrying - it is a count of how many times to repeat the poll. Thoughts? Should it be a "duration" parameter?
Yes, a "duration" is better. The meaning of "retry" or "count" is not clear. The original check_ntf() is good enough for the test case in this series. Could you make the change, or do you prefer me to submit another patch?
On Fri, 8 Nov 2024 at 08:46, Xiao Liang shaw.leon@gmail.com wrote:
On Fri, Nov 8, 2024 at 1:16 AM Donald Hunter donald.hunter@gmail.com wrote:
It's then a question of whether we need the repeat logic in poll_ntf() because it's always possible to use check_ntf() in your own repeat logic. Either way, I'd prefer not to call the parameter "max_retries" because semantically I don't think we are retrying - it is a count of how many times to repeat the poll. Thoughts? Should it be a "duration" parameter?
Yes, a "duration" is better. The meaning of "retry" or "count" is not clear. The original check_ntf() is good enough for the test case in this series. Could you make the change, or do you prefer me to submit another patch?
I'm happy to make the change.
I have prepared a patch which reverts most of 1bf70e6c3a53 and introduces poll_ntf(interval, duration).
Jakub, is it okay to submit this as a single patch, or would you prefer me to actually revert 1bf70e6c3a53? (there's about 5 lines retained from the original patch).
On Fri, 8 Nov 2024 at 10:04, Donald Hunter donald.hunter@gmail.com wrote:
Jakub, is it okay to submit this as a single patch, or would you prefer me to actually revert 1bf70e6c3a53? (there's about 5 lines retained from the original patch).
I'll submit it as a series with a revert and a new patch. The patch is much cleaner that way.
- Add test for creating link in another netns when a link of the same name and ifindex exists in current netns. - Add test for netns_atomic - create link directly in target netns, and no notifications should be generated in current netns.
Signed-off-by: Xiao Liang shaw.leon@gmail.com --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/netns-name.sh | 10 ++++ tools/testing/selftests/net/netns_atomic.py | 54 +++++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100755 tools/testing/selftests/net/netns_atomic.py
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 26a4883a65c9..ba1a486f93dc 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -34,6 +34,7 @@ TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += netns-name.sh +TEST_PROGS += netns_atomic.py TEST_PROGS += nl_netdev.py TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh index 6974474c26f3..0be1905d1f2f 100755 --- a/tools/testing/selftests/net/netns-name.sh +++ b/tools/testing/selftests/net/netns-name.sh @@ -78,6 +78,16 @@ ip -netns $NS link show dev $ALT_NAME 2> /dev/null && fail "Can still find alt-name after move" ip -netns $test_ns link del $DEV || fail
+# +# Test no conflict of the same name/ifindex in different netns +# +ip -netns $NS link add name $DEV index 100 type dummy || fail +ip -netns $NS link add netns $test_ns name $DEV index 100 type dummy || + fail "Can create in netns without moving" +ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found" +ip -netns $NS link del $DEV || fail +ip -netns $test_ns link del $DEV || fail + echo -ne "$(basename $0) \t\t\t\t" if [ $RET_CODE -eq 0 ]; then echo "[ OK ]" diff --git a/tools/testing/selftests/net/netns_atomic.py b/tools/testing/selftests/net/netns_atomic.py new file mode 100755 index 000000000000..0a3864e22b8b --- /dev/null +++ b/tools/testing/selftests/net/netns_atomic.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import ctypes + +from lib.py import ksft_run, ksft_exit, ksft_true +from lib.py import ip +from lib.py import NetNS +from lib.py import RtnlFamily + +libc = ctypes.cdll.LoadLibrary('libc.so.6') + + +class NSEnter: + def __init__(self, ns_name): + self.ns_path = f"/run/netns/{ns_name}" + + def __enter__(self): + self.saved = open("/proc/thread-self/ns/net") + with open(self.ns_path) as ns_file: + libc.setns(ns_file.fileno(), 0) + + def __exit__(self, exc_type, exc_value, traceback): + libc.setns(self.saved.fileno(), 0) + self.saved.close() + + +def test_event(ns1, ns2) -> None: + with NSEnter(str(ns1)): + rtnl = RtnlFamily() + + rtnl.ntf_subscribe("rtnlgrp-link") + + ip(f"netns set {ns1} 0", ns=str(ns2)) + + ip(f"link add netns {ns2} link-netnsid 0 dummy1 type dummy") + ip(f"link add netns {ns2} dummy2 type dummy", ns=str(ns1)) + + ip("link del dummy1", ns=str(ns2)) + ip("link del dummy2", ns=str(ns2)) + + # Should receive no link events in ns1. Wait 5*0.1 seconds. + ksft_true(next(rtnl.check_ntf(max_retries=5), None) is None, + "Received unexpected link notification") + + +def main() -> None: + with NetNS() as ns1, NetNS() as ns2: + ksft_run([test_event], args=(ns1, ns2)) + ksft_exit() + + +if __name__ == "__main__": + main()
On Thu, 7 Nov 2024 21:30:03 +0800 Xiao Liang wrote:
+class NSEnter:
- def __init__(self, ns_name):
self.ns_path = f"/run/netns/{ns_name}"
- def __enter__(self):
self.saved = open("/proc/thread-self/ns/net")
with open(self.ns_path) as ns_file:
libc.setns(ns_file.fileno(), 0)
- def __exit__(self, exc_type, exc_value, traceback):
libc.setns(self.saved.fileno(), 0)
self.saved.close()
This is quite nice, why not move it to tools/testing/selftests/net/lib/py/netns.py so others can use it too
On Sun, Nov 10, 2024 at 9:01 AM Jakub Kicinski kuba@kernel.org wrote:
On Thu, 7 Nov 2024 21:30:03 +0800 Xiao Liang wrote:
+class NSEnter:
- def __init__(self, ns_name):
self.ns_path = f"/run/netns/{ns_name}"
- def __enter__(self):
self.saved = open("/proc/thread-self/ns/net")
with open(self.ns_path) as ns_file:
libc.setns(ns_file.fileno(), 0)
- def __exit__(self, exc_type, exc_value, traceback):
libc.setns(self.saved.fileno(), 0)
self.saved.close()
This is quite nice, why not move it to tools/testing/selftests/net/lib/py/netns.py so others can use it too
Sure, I will move it to lib.
linux-kselftest-mirror@lists.linaro.org