[PATCH 6.12.y 4/8] ipv4: adopt dst_dev, skb_dst_dev and skb_dst_dev_net[_rcu]

20 Oct 2025

From: Eric Dumazet edumazet@google.com
[ Upstream commit a74fc62eec155ca5a6da8ff3856f3dc87fe24558 ]
Use the new helpers as a first step to deal with
potential dst->dev races.
Signed-off-by: Eric Dumazet edumazet@google.com
Reviewed-by: Kuniyuki Iwashima kuniyu@google.com
Link: https://patch.msgid.link/20250630121934.3399505-8-edumazet@google.com
Signed-off-by: Jakub Kicinski kuba@kernel.org
Stable-dep-of: 833d4313bc1e ("mptcp: reset blackhole on success with non-loopback ifaces")
Signed-off-by: Sasha Levin sashal@kernel.org
---
 include/net/inet_hashtables.h |  2 +-
 include/net/ip.h              | 11 ++++++-----
 include/net/route.h           |  2 +-
 net/ipv4/icmp.c               | 24 +++++++++++++-----------
 net/ipv4/igmp.c               |  2 +-
 net/ipv4/ip_fragment.c        |  2 +-
 net/ipv4/ip_output.c          |  6 +++---
 net/ipv4/ip_vti.c             |  4 ++--
 net/ipv4/netfilter.c          |  4 ++--
 net/ipv4/route.c              |  8 ++++----
 net/ipv4/tcp_fastopen.c       |  4 +++-
 net/ipv4/tcp_ipv4.c           |  2 +-
 net/ipv4/tcp_metrics.c        |  8 ++++----
 net/ipv4/xfrm4_output.c       |  2 +-
 14 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index da818fb0205fe..3c4118c63cfe1 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -492,7 +492,7 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo,
    				     const int sdif,
    				     bool *refcounted)
 {
-	struct net *net = dev_net_rcu(skb_dst(skb)->dev);
+	struct net *net = skb_dst_dev_net_rcu(skb);
    const struct iphdr *iph = ip_hdr(skb);
    struct sock *sk;
diff --git a/include/net/ip.h b/include/net/ip.h
index bd201278c55a5..5f0f1215d2f92 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -475,7 +475,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
rcu_read_lock();
-	net = dev_net_rcu(dst->dev);
+	net = dev_net_rcu(dst_dev(dst));
    if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
        ip_mtu_locked(dst) ||
        !forwarding) {
@@ -489,7 +489,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
    if (mtu)
    	goto out;
-	mtu = READ_ONCE(dst->dev->mtu);
+	mtu = READ_ONCE(dst_dev(dst)->mtu);
if (unlikely(ip_mtu_locked(dst))) {
    	if (rt->rt_uses_gateway && mtu > 576)
@@ -509,16 +509,17 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
 static inline unsigned int ip_skb_dst_mtu(struct sock *sk,
    				  const struct sk_buff *skb)
 {
+	const struct dst_entry *dst = skb_dst(skb);
    unsigned int mtu;
if (!sk || !sk_fullsock(sk) || ip_sk_use_pmtu(sk)) {
    	bool forwarding = IPCB(skb)->flags & IPSKB_FORWARDED;
-		return ip_dst_mtu_maybe_forward(skb_dst(skb), forwarding);
+		return ip_dst_mtu_maybe_forward(dst, forwarding);
    }
-	mtu = min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU);
-	return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu);
+	mtu = min(READ_ONCE(dst_dev(dst)->mtu), IP_MAX_MTU);
+	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, int fc_mx_len,
diff --git a/include/net/route.h b/include/net/route.h
index 8a11d19f897bb..232b7bf55ba22 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -369,7 +369,7 @@ static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
    	const struct net *net;
rcu_read_lock();
-		net = dev_net_rcu(dst->dev);
+		net = dev_net_rcu(dst_dev(dst));
    	hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
    	rcu_read_unlock();
    }
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 8f11870b77377..508b23204edc5 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -311,18 +311,20 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 {
    struct dst_entry *dst = &rt->dst;
    struct inet_peer *peer;
+	struct net_device *dev;
    bool rc = true;
if (!apply_ratelimit)
    	return true;
/* No rate limit on loopback */
-	if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
+	dev = dst_dev(dst);
+	if (dev && (dev->flags & IFF_LOOPBACK))
    	goto out;
rcu_read_lock();
    peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr,
-			       l3mdev_master_ifindex_rcu(dst->dev));
+			       l3mdev_master_ifindex_rcu(dev));
    rc = inet_peer_xrlim_allow(peer,
    			   READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
    rcu_read_unlock();
@@ -468,13 +470,13 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
  */
 static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
 {
-	struct net_device *route_lookup_dev = NULL;
+	struct net_device *dev = skb->dev;
+	const struct dst_entry *dst;
-	if (skb->dev)
-		route_lookup_dev = skb->dev;
-	else if (skb_dst(skb))
-		route_lookup_dev = skb_dst(skb)->dev;
-	return route_lookup_dev;
+	if (dev)
+		return dev;
+	dst = skb_dst(skb);
+	return dst ? dst_dev(dst) : NULL;
 }
static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4,
@@ -873,7 +875,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
    struct net *net;
    u32 info = 0;
-	net = dev_net_rcu(skb_dst(skb)->dev);
+	net = skb_dst_dev_net_rcu(skb);
/*
     *	Incomplete header ?
@@ -1016,7 +1018,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
    struct icmp_bxm icmp_param;
    struct net *net;
-	net = dev_net_rcu(skb_dst(skb)->dev);
+	net = skb_dst_dev_net_rcu(skb);
    /* should there be an ICMP stat for ignored echos? */
    if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all))
    	return SKB_NOT_DROPPED_YET;
@@ -1186,7 +1188,7 @@ static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb)
    return SKB_NOT_DROPPED_YET;
out_err:
-	__ICMP_INC_STATS(dev_net_rcu(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
+	__ICMP_INC_STATS(skb_dst_dev_net_rcu(skb), ICMP_MIB_INERRORS);
    return SKB_DROP_REASON_PKT_TOO_SMALL;
 }
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 9bf09de6a2e77..f4a87b90351e9 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -424,7 +424,7 @@ static int igmpv3_sendpack(struct sk_buff *skb)
pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
-	return ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+	return ip_local_out(skb_dst_dev_net(skb), skb->sk, skb);
 }
static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9ca0a183a55ff..183856b0b7409 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -488,7 +488,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 /* Process an incoming IP datagram fragment. */
 int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
 {
-	struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
+	struct net_device *dev = skb->dev ? : skb_dst_dev(skb);
    int vif = l3mdev_master_ifindex_rcu(dev);
    struct ipq *qp;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 49811c9281d42..4d432e314bcb2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -117,7 +117,7 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
    skb->protocol = htons(ETH_P_IP);
return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
-		       net, sk, skb, NULL, skb_dst(skb)->dev,
+		       net, sk, skb, NULL, skb_dst_dev(skb),
    	       dst_output);
 }
@@ -200,7 +200,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
 {
    struct dst_entry *dst = skb_dst(skb);
    struct rtable *rt = dst_rtable(dst);
-	struct net_device *dev = dst->dev;
+	struct net_device *dev = dst_dev(dst);
    unsigned int hh_len = LL_RESERVED_SPACE(dev);
    struct neighbour *neigh;
    bool is_v6gw = false;
@@ -426,7 +426,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
+	struct net_device *dev = skb_dst_dev(skb), *indev = skb->dev;
skb->dev = dev;
    skb->protocol = htons(ETH_P_IP);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index f0b4419cef349..fc95161663071 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -229,7 +229,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
    	goto tx_error_icmp;
    }
-	tdev = dst->dev;
+	tdev = dst_dev(dst);
if (tdev == dev) {
    	dst_release(dst);
@@ -259,7 +259,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
 xmit:
    skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
    skb_dst_set(skb, dst);
-	skb->dev = skb_dst(skb)->dev;
+	skb->dev = skb_dst_dev(skb);
err = dst_output(tunnel->net, skb->sk, skb);
    if (net_xmit_eval(err) == 0)
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index e0aab66cd9251..dff06b9eb6607 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -20,12 +20,12 @@
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
 int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type)
 {
+	struct net_device *dev = skb_dst_dev(skb);
    const struct iphdr *iph = ip_hdr(skb);
    struct rtable *rt;
    struct flowi4 fl4 = {};
    __be32 saddr = iph->saddr;
    __u8 flags;
-	struct net_device *dev = skb_dst(skb)->dev;
    struct flow_keys flkeys;
    unsigned int hh_len;
@@ -74,7 +74,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
 #endif
/* Change in oif may mean change in hh_len. */
-	hh_len = skb_dst(skb)->dev->hard_header_len;
+	hh_len = skb_dst_dev(skb)->hard_header_len;
    if (skb_headroom(skb) < hh_len &&
        pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
    			0, GFP_ATOMIC))
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 261ddb6542a40..7d04df4fc6608 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -413,7 +413,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
    				   const void *daddr)
 {
    const struct rtable *rt = container_of(dst, struct rtable, dst);
-	struct net_device *dev = dst->dev;
+	struct net_device *dev = dst_dev(dst);
    struct neighbour *n;
rcu_read_lock();
@@ -440,7 +440,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
 static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 {
    const struct rtable *rt = container_of(dst, struct rtable, dst);
-	struct net_device *dev = dst->dev;
+	struct net_device *dev = dst_dev(dst);
    const __be32 *pkey = daddr;
if (rt->rt_gw_family == AF_INET) {
@@ -1025,7 +1025,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
    	return;
rcu_read_lock();
-	net = dev_net_rcu(dst->dev);
+	net = dev_net_rcu(dst_dev(dst));
    if (mtu < net->ipv4.ip_rt_min_pmtu) {
    	lock = true;
    	mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu);
@@ -1323,7 +1323,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
    struct net *net;
rcu_read_lock();
-	net = dev_net_rcu(dst->dev);
+	net = dev_net_rcu(dst_dev(dst));
    advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
    			   net->ipv4.ip_rt_min_advmss);
    rcu_read_unlock();
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 408985eb74eef..86c995dc1c5e5 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -558,6 +558,7 @@ bool tcp_fastopen_active_should_disable(struct sock *sk)
 void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
 {
    struct tcp_sock *tp = tcp_sk(sk);
+	struct net_device *dev;
    struct dst_entry *dst;
    struct sk_buff *skb;
@@ -575,7 +576,8 @@ void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
    } else if (tp->syn_fastopen_ch &&
    	   atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
    	dst = sk_dst_get(sk);
-		if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
+		dev = dst ? dst_dev(dst) : NULL;
+		if (!(dev && (dev->flags & IFF_LOOPBACK)))
    		atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
    	dst_release(dst);
    }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d8976753d4e47..1572562b0498c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -786,7 +786,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
    arg.iov[0].iov_base = (unsigned char *)&rep;
    arg.iov[0].iov_len  = sizeof(rep.th);
-	net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev);
+	net = sk ? sock_net(sk) : skb_dst_dev_net_rcu(skb);
/* Invalid TCP option size or twice included auth */
    if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh))
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 4251670e328c8..03c068ea27b6a 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -166,11 +166,11 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
    				  unsigned int hash)
 {
    struct tcp_metrics_block *tm;
-	struct net *net;
    bool reclaim = false;
+	struct net *net;
spin_lock_bh(&tcp_metrics_lock);
-	net = dev_net_rcu(dst->dev);
+	net = dev_net_rcu(dst_dev(dst));
/* While waiting for the spin-lock the cache might have been populated
     * with this entry and so we have to check again.
@@ -273,7 +273,7 @@ static struct tcp_metrics_block *__tcp_get_metrics_req(struct request_sock *req,
    	return NULL;
    }
-	net = dev_net_rcu(dst->dev);
+	net = dev_net_rcu(dst_dev(dst));
    hash ^= net_hash_mix(net);
    hash = hash_32(hash, tcp_metrics_hash_log);
@@ -318,7 +318,7 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
    else
    	return NULL;
-	net = dev_net_rcu(dst->dev);
+	net = dev_net_rcu(dst_dev(dst));
    hash ^= net_hash_mix(net);
    hash = hash_32(hash, tcp_metrics_hash_log);
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 3cff51ba72bb0..0ae67d537499a 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -31,7 +31,7 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
    return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
-			    net, sk, skb, skb->dev, skb_dst(skb)->dev,
+			    net, sk, skb, skb->dev, skb_dst_dev(skb),
    		    __xfrm4_output,
    		    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
-- 
2.51.0



    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

[PATCH 6.12.y 4/8] ipv4: adopt dst_dev, skb_dst_dev and skb_dst_dev_net[_rcu]