On Wed, Feb 14, 2024 at 08:06:12PM +0300, kovalev@altlinux.org wrote:
14.02.2024 19:49, Pablo Neira Ayuso wrote:
On Wed, Feb 14, 2024 at 07:27:33PM +0300, kovalev@altlinux.org wrote:
From: Vasiliy Kovalev kovalev@altlinux.org
The gtp_net_ops pernet operations structure for the subsystem must be registered before registering the generic netlink family.
Thanks for finding a remedy for this.
If your fix is correct, (I didn't test your patch yet) then maybe this needs to be fixed in a few more spots in the tree?
net/devlink/core.c-static int __init devlink_init(void) net/devlink/core.c-{ net/devlink/core.c- int err; net/devlink/core.c- net/devlink/core.c- err = genl_register_family(&devlink_nl_family); net/devlink/core.c- if (err) net/devlink/core.c- goto out; net/devlink/core.c: err = register_pernet_subsys(&devlink_pernet_ops); net/devlink/core.c- if (err)
net/handshake/netlink.c- ret = genl_register_family(&handshake_nl_family); net/handshake/netlink.c- if (ret) { net/handshake/netlink.c- pr_warn("handshake: netlink registration failed (%d)\n", ret); net/handshake/netlink.c- handshake_req_hash_destroy(); net/handshake/netlink.c- return ret; net/handshake/netlink.c- } net/handshake/netlink.c- net/handshake/netlink.c- /* net/handshake/netlink.c- * ORDER: register_pernet_subsys must be done last. net/handshake/netlink.c- * net/handshake/netlink.c- * If initialization does not make it past pernet_subsys net/handshake/netlink.c- * registration, then handshake_net_id will remain 0. That net/handshake/netlink.c- * shunts the handshake consumer API to return ENOTSUPP net/handshake/netlink.c- * to prevent it from dereferencing something that hasn't net/handshake/netlink.c- * been allocated. net/handshake/netlink.c- */ net/handshake/netlink.c: ret = register_pernet_subsys(&handshake_genl_net_ops);
net/ipv4/tcp_metrics.c: ret = register_pernet_subsys(&tcp_net_metrics_ops); net/ipv4/tcp_metrics.c- if (ret < 0) net/ipv4/tcp_metrics.c- panic("Could not register tcp_net_metrics_ops\n"); net/ipv4/tcp_metrics.c- net/ipv4/tcp_metrics.c- ret = genl_register_family(&tcp_metrics_nl_family); net/ipv4/tcp_metrics.c- if (ret < 0) net/ipv4/tcp_metrics.c- panic("Could not register tcp_metrics generic netlink\n"); net/ipv4/tcp_metrics.c-}
net/ipv6/ioam6.c-int __init ioam6_init(void) net/ipv6/ioam6.c-{ net/ipv6/ioam6.c: int err = register_pernet_subsys(&ioam6_net_ops); net/ipv6/ioam6.c- if (err) net/ipv6/ioam6.c- goto out; net/ipv6/ioam6.c- net/ipv6/ioam6.c- err = genl_register_family(&ioam6_genl_family); net/ipv6/ioam6.c- if (err) net/ipv6/ioam6.c- goto out_unregister_pernet_subsys;
net/ipv6/seg6.c- err = genl_register_family(&seg6_genl_family); net/ipv6/seg6.c- if (err) net/ipv6/seg6.c- goto out; net/ipv6/seg6.c- net/ipv6/seg6.c: err = register_pernet_subsys(&ip6_segments_ops); net/ipv6/seg6.c- if (err) net/ipv6/seg6.c- goto out_unregister_genl;
net/netlink/genetlink.c- err = genl_register_family(&genl_ctrl); net/netlink/genetlink.c- if (err < 0) net/netlink/genetlink.c- goto problem; net/netlink/genetlink.c- net/netlink/genetlink.c: err = register_pernet_subsys(&genl_pernet_ops); net/netlink/genetlink.c- if (err) net/netlink/genetlink.c- goto problem;
Most likely, judging by the backtrace, the bug is the same [1]:
Call Trace: <TASK> genl_dumpit+0x119/0x220 net/netlink/genetlink.c:1025 netlink_dump+0x588/0xca0 net/netlink/af_netlink.c:2264 __netlink_dump_start+0x6d0/0x9c0 net/netlink/af_netlink.c:2370 genl_family_rcv_msg_dumpit+0x1e1/0x2d0 net/netlink/genetlink.c:1074 genl_family_rcv_msg net/netlink/genetlink.c:1190 [inline] genl_rcv_msg+0x470/0x800 net/netlink/genetlink.c:1208 netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2543 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1217 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x53b/0x810 net/netlink/af_netlink.c:1367 netlink_sendmsg+0x8b7/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0xd5/0x180 net/socket.c:745 ____sys_sendmsg+0x6ac/0x940 net/socket.c:2584 ___sys_sendmsg+0x135/0x1d0 net/socket.c:2638 __sys_sendmsg+0x117/0x1e0 net/socket.c:2667 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd3/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x63/0x6b RIP: 0033:0x7f35d567cda9
Ok, then a series of fixes probably needs to happen so each maintain can review and apply them.
Maybe some of these subsystems above can only be compiled built-in, so that cannot trigger.
In any case, are you up to pick on that series?
Thanks.