July 2024 - Linux-stable-mirror

FAILED: patch "[PATCH] ipv6: fix source address selection with route leak" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x 252442f2ae317d109ef0b4b39ce0608c09563042 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072910-fax-periscope-a350@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: 252442f2ae31 ("ipv6: fix source address selection with route leak") fa17a6d8a5bd ("ipv6: lockless IPV6_ADDR_PREFERENCES implementation") 859f8b265fc2 ("ipv6: lockless IPV6_FLOWINFO_SEND implementation") 6b724bc4300b ("ipv6: lockless IPV6_MTU_DISCOVER implementation") 83cd5eb654b3 ("ipv6: lockless IPV6_ROUTER_ALERT_ISOLATE implementation") 3cccda8db2cf ("ipv6: move np->repflow to atomic flags") 3fa29971c695 ("ipv6: lockless IPV6_RECVERR implemetation") 1086ca7cce29 ("ipv6: lockless IPV6_DONTFRAG implementation") 5121516b0c47 ("ipv6: lockless IPV6_AUTOFLOWLABEL implementation") 6559c0ff3bc2 ("ipv6: lockless IPV6_MULTICAST_ALL implementation") dcae74622c05 ("ipv6: lockless IPV6_RECVERR_RFC4884 implementation") 273784d3c574 ("ipv6: lockless IPV6_MINHOPCOUNT implementation") 15f926c4457a ("ipv6: lockless IPV6_MTU implementation") 2da23eb07c91 ("ipv6: lockless IPV6_MULTICAST_HOPS implementation") d986f52124e0 ("ipv6: lockless IPV6_MULTICAST_LOOP implementation") b0adfba7ee77 ("ipv6: lockless IPV6_UNICAST_HOPS implementation") 8cdd9f1aaedf ("ipv6: fix ip6_sock_set_addr_preferences() typo") e3390b30a5df ("net: annotate data-races around sk->sk_tsflags") 0f158b32a9b1 ("net: selectively purge error queue in IP_RECVERR / IPV6_RECVERR") 08e39c0dfa29 ("inet: move inet->defer_connect to inet->inet_flags") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 252442f2ae317d109ef0b4b39ce0608c09563042 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel <nicolas.dichtel(a)6wind.com> Date: Wed, 10 Jul 2024 10:14:28 +0200 Subject: [PATCH] ipv6: fix source address selection with route leak By default, an address assigned to the output interface is selected when the source address is not specified. This is problematic when a route, configured in a vrf, uses an interface from another vrf (aka route leak). The original vrf does not own the selected source address. Let's add a check against the output interface and call the appropriate function to select the source address. CC: stable(a)vger.kernel.org Fixes: 0d240e7811c4 ("net: vrf: Implement get_saddr for IPv6") Signed-off-by: Nicolas Dichtel <nicolas.dichtel(a)6wind.com> Link: https://patch.msgid.link/20240710081521.3809742-3-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a18ed24fed94..6dbdf60b342f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -127,18 +127,26 @@ void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, const struct in6_addr *daddr, - unsigned int prefs, + unsigned int prefs, int l3mdev_index, struct in6_addr *saddr) { + struct net_device *l3mdev; + struct net_device *dev; + bool same_vrf; int err = 0; - if (f6i && f6i->fib6_prefsrc.plen) { - *saddr = f6i->fib6_prefsrc.addr; - } else { - struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + rcu_read_lock(); - err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); - } + l3mdev = dev_get_by_index_rcu(net, l3mdev_index); + if (!f6i || !f6i->fib6_prefsrc.plen || l3mdev) + dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + same_vrf = !l3mdev || l3mdev_master_dev_rcu(dev) == l3mdev; + if (f6i && f6i->fib6_prefsrc.plen && same_vrf) + *saddr = f6i->fib6_prefsrc.addr; + else + err = ipv6_dev_get_saddr(net, same_vrf ? dev : l3mdev, daddr, prefs, saddr); + + rcu_read_unlock(); return err; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 27d8725445e3..784424ac4147 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1124,6 +1124,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, + fl6->flowi6_l3mdev, &fl6->saddr); rcu_read_unlock(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8d72ca0b086d..c9a9506b714d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5689,7 +5689,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; } else if (dest) { struct in6_addr saddr_buf; - if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && + if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 && nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; }

1 year, 1 month

1
0
0 0

FAILED: patch "[PATCH] ipv6: fix source address selection with route leak" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 252442f2ae317d109ef0b4b39ce0608c09563042 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072909-varnish-hence-c531@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: 252442f2ae31 ("ipv6: fix source address selection with route leak") fa17a6d8a5bd ("ipv6: lockless IPV6_ADDR_PREFERENCES implementation") 859f8b265fc2 ("ipv6: lockless IPV6_FLOWINFO_SEND implementation") 6b724bc4300b ("ipv6: lockless IPV6_MTU_DISCOVER implementation") 83cd5eb654b3 ("ipv6: lockless IPV6_ROUTER_ALERT_ISOLATE implementation") 3cccda8db2cf ("ipv6: move np->repflow to atomic flags") 3fa29971c695 ("ipv6: lockless IPV6_RECVERR implemetation") 1086ca7cce29 ("ipv6: lockless IPV6_DONTFRAG implementation") 5121516b0c47 ("ipv6: lockless IPV6_AUTOFLOWLABEL implementation") 6559c0ff3bc2 ("ipv6: lockless IPV6_MULTICAST_ALL implementation") dcae74622c05 ("ipv6: lockless IPV6_RECVERR_RFC4884 implementation") 273784d3c574 ("ipv6: lockless IPV6_MINHOPCOUNT implementation") 15f926c4457a ("ipv6: lockless IPV6_MTU implementation") 2da23eb07c91 ("ipv6: lockless IPV6_MULTICAST_HOPS implementation") d986f52124e0 ("ipv6: lockless IPV6_MULTICAST_LOOP implementation") b0adfba7ee77 ("ipv6: lockless IPV6_UNICAST_HOPS implementation") 8cdd9f1aaedf ("ipv6: fix ip6_sock_set_addr_preferences() typo") e3390b30a5df ("net: annotate data-races around sk->sk_tsflags") 0f158b32a9b1 ("net: selectively purge error queue in IP_RECVERR / IPV6_RECVERR") 08e39c0dfa29 ("inet: move inet->defer_connect to inet->inet_flags") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 252442f2ae317d109ef0b4b39ce0608c09563042 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel <nicolas.dichtel(a)6wind.com> Date: Wed, 10 Jul 2024 10:14:28 +0200 Subject: [PATCH] ipv6: fix source address selection with route leak By default, an address assigned to the output interface is selected when the source address is not specified. This is problematic when a route, configured in a vrf, uses an interface from another vrf (aka route leak). The original vrf does not own the selected source address. Let's add a check against the output interface and call the appropriate function to select the source address. CC: stable(a)vger.kernel.org Fixes: 0d240e7811c4 ("net: vrf: Implement get_saddr for IPv6") Signed-off-by: Nicolas Dichtel <nicolas.dichtel(a)6wind.com> Link: https://patch.msgid.link/20240710081521.3809742-3-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a18ed24fed94..6dbdf60b342f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -127,18 +127,26 @@ void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, const struct in6_addr *daddr, - unsigned int prefs, + unsigned int prefs, int l3mdev_index, struct in6_addr *saddr) { + struct net_device *l3mdev; + struct net_device *dev; + bool same_vrf; int err = 0; - if (f6i && f6i->fib6_prefsrc.plen) { - *saddr = f6i->fib6_prefsrc.addr; - } else { - struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + rcu_read_lock(); - err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); - } + l3mdev = dev_get_by_index_rcu(net, l3mdev_index); + if (!f6i || !f6i->fib6_prefsrc.plen || l3mdev) + dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + same_vrf = !l3mdev || l3mdev_master_dev_rcu(dev) == l3mdev; + if (f6i && f6i->fib6_prefsrc.plen && same_vrf) + *saddr = f6i->fib6_prefsrc.addr; + else + err = ipv6_dev_get_saddr(net, same_vrf ? dev : l3mdev, daddr, prefs, saddr); + + rcu_read_unlock(); return err; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 27d8725445e3..784424ac4147 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1124,6 +1124,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, + fl6->flowi6_l3mdev, &fl6->saddr); rcu_read_unlock(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8d72ca0b086d..c9a9506b714d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5689,7 +5689,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; } else if (dest) { struct in6_addr saddr_buf; - if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && + if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 && nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; }

1 year, 1 month

1
0
0 0

FAILED: patch "[PATCH] ipv6: fix source address selection with route leak" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 252442f2ae317d109ef0b4b39ce0608c09563042 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072908-clicker-cornball-8a64@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: 252442f2ae31 ("ipv6: fix source address selection with route leak") fa17a6d8a5bd ("ipv6: lockless IPV6_ADDR_PREFERENCES implementation") 859f8b265fc2 ("ipv6: lockless IPV6_FLOWINFO_SEND implementation") 6b724bc4300b ("ipv6: lockless IPV6_MTU_DISCOVER implementation") 83cd5eb654b3 ("ipv6: lockless IPV6_ROUTER_ALERT_ISOLATE implementation") 3cccda8db2cf ("ipv6: move np->repflow to atomic flags") 3fa29971c695 ("ipv6: lockless IPV6_RECVERR implemetation") 1086ca7cce29 ("ipv6: lockless IPV6_DONTFRAG implementation") 5121516b0c47 ("ipv6: lockless IPV6_AUTOFLOWLABEL implementation") 6559c0ff3bc2 ("ipv6: lockless IPV6_MULTICAST_ALL implementation") dcae74622c05 ("ipv6: lockless IPV6_RECVERR_RFC4884 implementation") 273784d3c574 ("ipv6: lockless IPV6_MINHOPCOUNT implementation") 15f926c4457a ("ipv6: lockless IPV6_MTU implementation") 2da23eb07c91 ("ipv6: lockless IPV6_MULTICAST_HOPS implementation") d986f52124e0 ("ipv6: lockless IPV6_MULTICAST_LOOP implementation") b0adfba7ee77 ("ipv6: lockless IPV6_UNICAST_HOPS implementation") 8cdd9f1aaedf ("ipv6: fix ip6_sock_set_addr_preferences() typo") e3390b30a5df ("net: annotate data-races around sk->sk_tsflags") 0f158b32a9b1 ("net: selectively purge error queue in IP_RECVERR / IPV6_RECVERR") 08e39c0dfa29 ("inet: move inet->defer_connect to inet->inet_flags") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 252442f2ae317d109ef0b4b39ce0608c09563042 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel <nicolas.dichtel(a)6wind.com> Date: Wed, 10 Jul 2024 10:14:28 +0200 Subject: [PATCH] ipv6: fix source address selection with route leak By default, an address assigned to the output interface is selected when the source address is not specified. This is problematic when a route, configured in a vrf, uses an interface from another vrf (aka route leak). The original vrf does not own the selected source address. Let's add a check against the output interface and call the appropriate function to select the source address. CC: stable(a)vger.kernel.org Fixes: 0d240e7811c4 ("net: vrf: Implement get_saddr for IPv6") Signed-off-by: Nicolas Dichtel <nicolas.dichtel(a)6wind.com> Link: https://patch.msgid.link/20240710081521.3809742-3-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a18ed24fed94..6dbdf60b342f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -127,18 +127,26 @@ void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, const struct in6_addr *daddr, - unsigned int prefs, + unsigned int prefs, int l3mdev_index, struct in6_addr *saddr) { + struct net_device *l3mdev; + struct net_device *dev; + bool same_vrf; int err = 0; - if (f6i && f6i->fib6_prefsrc.plen) { - *saddr = f6i->fib6_prefsrc.addr; - } else { - struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + rcu_read_lock(); - err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); - } + l3mdev = dev_get_by_index_rcu(net, l3mdev_index); + if (!f6i || !f6i->fib6_prefsrc.plen || l3mdev) + dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + same_vrf = !l3mdev || l3mdev_master_dev_rcu(dev) == l3mdev; + if (f6i && f6i->fib6_prefsrc.plen && same_vrf) + *saddr = f6i->fib6_prefsrc.addr; + else + err = ipv6_dev_get_saddr(net, same_vrf ? dev : l3mdev, daddr, prefs, saddr); + + rcu_read_unlock(); return err; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 27d8725445e3..784424ac4147 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1124,6 +1124,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, + fl6->flowi6_l3mdev, &fl6->saddr); rcu_read_unlock(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8d72ca0b086d..c9a9506b714d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5689,7 +5689,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; } else if (dest) { struct in6_addr saddr_buf; - if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && + if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 && nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; }

1 year, 1 month

1
0
0 0

FAILED: patch "[PATCH] ipv6: fix source address selection with route leak" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 252442f2ae317d109ef0b4b39ce0608c09563042 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072907-unlaced-unlovely-6e01@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: 252442f2ae31 ("ipv6: fix source address selection with route leak") fa17a6d8a5bd ("ipv6: lockless IPV6_ADDR_PREFERENCES implementation") 859f8b265fc2 ("ipv6: lockless IPV6_FLOWINFO_SEND implementation") 6b724bc4300b ("ipv6: lockless IPV6_MTU_DISCOVER implementation") 83cd5eb654b3 ("ipv6: lockless IPV6_ROUTER_ALERT_ISOLATE implementation") 3cccda8db2cf ("ipv6: move np->repflow to atomic flags") 3fa29971c695 ("ipv6: lockless IPV6_RECVERR implemetation") 1086ca7cce29 ("ipv6: lockless IPV6_DONTFRAG implementation") 5121516b0c47 ("ipv6: lockless IPV6_AUTOFLOWLABEL implementation") 6559c0ff3bc2 ("ipv6: lockless IPV6_MULTICAST_ALL implementation") dcae74622c05 ("ipv6: lockless IPV6_RECVERR_RFC4884 implementation") 273784d3c574 ("ipv6: lockless IPV6_MINHOPCOUNT implementation") 15f926c4457a ("ipv6: lockless IPV6_MTU implementation") 2da23eb07c91 ("ipv6: lockless IPV6_MULTICAST_HOPS implementation") d986f52124e0 ("ipv6: lockless IPV6_MULTICAST_LOOP implementation") b0adfba7ee77 ("ipv6: lockless IPV6_UNICAST_HOPS implementation") 8cdd9f1aaedf ("ipv6: fix ip6_sock_set_addr_preferences() typo") e3390b30a5df ("net: annotate data-races around sk->sk_tsflags") 0f158b32a9b1 ("net: selectively purge error queue in IP_RECVERR / IPV6_RECVERR") 08e39c0dfa29 ("inet: move inet->defer_connect to inet->inet_flags") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 252442f2ae317d109ef0b4b39ce0608c09563042 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel <nicolas.dichtel(a)6wind.com> Date: Wed, 10 Jul 2024 10:14:28 +0200 Subject: [PATCH] ipv6: fix source address selection with route leak By default, an address assigned to the output interface is selected when the source address is not specified. This is problematic when a route, configured in a vrf, uses an interface from another vrf (aka route leak). The original vrf does not own the selected source address. Let's add a check against the output interface and call the appropriate function to select the source address. CC: stable(a)vger.kernel.org Fixes: 0d240e7811c4 ("net: vrf: Implement get_saddr for IPv6") Signed-off-by: Nicolas Dichtel <nicolas.dichtel(a)6wind.com> Link: https://patch.msgid.link/20240710081521.3809742-3-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a18ed24fed94..6dbdf60b342f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -127,18 +127,26 @@ void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, const struct in6_addr *daddr, - unsigned int prefs, + unsigned int prefs, int l3mdev_index, struct in6_addr *saddr) { + struct net_device *l3mdev; + struct net_device *dev; + bool same_vrf; int err = 0; - if (f6i && f6i->fib6_prefsrc.plen) { - *saddr = f6i->fib6_prefsrc.addr; - } else { - struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + rcu_read_lock(); - err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); - } + l3mdev = dev_get_by_index_rcu(net, l3mdev_index); + if (!f6i || !f6i->fib6_prefsrc.plen || l3mdev) + dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + same_vrf = !l3mdev || l3mdev_master_dev_rcu(dev) == l3mdev; + if (f6i && f6i->fib6_prefsrc.plen && same_vrf) + *saddr = f6i->fib6_prefsrc.addr; + else + err = ipv6_dev_get_saddr(net, same_vrf ? dev : l3mdev, daddr, prefs, saddr); + + rcu_read_unlock(); return err; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 27d8725445e3..784424ac4147 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1124,6 +1124,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, + fl6->flowi6_l3mdev, &fl6->saddr); rcu_read_unlock(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8d72ca0b086d..c9a9506b714d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5689,7 +5689,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; } else if (dest) { struct in6_addr saddr_buf; - if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && + if (ip6_route_get_saddr(net, rt, dest, 0, 0, &saddr_buf) == 0 && nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; }

1 year, 1 month

1
0
0 0

FAILED: patch "[PATCH] kernel: rerun task_work while freezing in get_signal()" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 943ad0b62e3c21f324c4884caa6cb4a871bca05c # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072941-nucleus-cannabis-e513@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: 943ad0b62e3c ("kernel: rerun task_work while freezing in get_signal()") f5d39b020809 ("freezer,sched: Rewrite core freezer logic") 9963e444f71e ("sched: Widen TAKS_state literals") f9fc8cad9728 ("sched: Add TASK_ANY for wait_task_inactive()") 9204a97f7ae8 ("sched: Change wait_task_inactive()s match_state") 1fbcaa923ce2 ("freezer,umh: Clean up freezer/initrd interaction") 5950e5d574c6 ("freezer: Have {,un}lock_system_sleep() save/restore flags") 0b9d46fc5ef7 ("sched: Rename task_running() to task_on_cpu()") 8386c414e27c ("PM: hibernate: defer device probing when resuming from hibernation") 57b6de08b5f6 ("ptrace: Admit ptrace_stop can generate spuriuos SIGTRAPs") 7b0fe1367ef2 ("ptrace: Document that wait_task_inactive can't fail") 1930a6e739c4 ("Merge tag 'ptrace-cleanups-for-v5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 943ad0b62e3c21f324c4884caa6cb4a871bca05c Mon Sep 17 00:00:00 2001 From: Pavel Begunkov <asml.silence(a)gmail.com> Date: Wed, 10 Jul 2024 18:58:18 +0100 Subject: [PATCH] kernel: rerun task_work while freezing in get_signal() io_uring can asynchronously add a task_work while the task is getting freezed. TIF_NOTIFY_SIGNAL will prevent the task from sleeping in do_freezer_trap(), and since the get_signal()'s relock loop doesn't retry task_work, the task will spin there not being able to sleep until the freezing is cancelled / the task is killed / etc. Run task_works in the freezer path. Keep the patch small and simple so it can be easily back ported, but we might need to do some cleaning after and look if there are other places with similar problems. Cc: stable(a)vger.kernel.org Link: https://github.com/systemd/systemd/issues/33626 Fixes: 12db8b690010c ("entry: Add support for TIF_NOTIFY_SIGNAL") Reported-by: Julian Orth <ju.orth(a)gmail.com> Acked-by: Oleg Nesterov <oleg(a)redhat.com> Acked-by: Tejun Heo <tj(a)kernel.org> Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com> Link: https://lore.kernel.org/r/89ed3a52933370deaaf61a0a620a6ac91f1e754d.17206341… Signed-off-by: Jens Axboe <axboe(a)kernel.dk> diff --git a/kernel/signal.c b/kernel/signal.c index 1f9dd41c04be..60c737e423a1 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2600,6 +2600,14 @@ static void do_freezer_trap(void) spin_unlock_irq(&current->sighand->siglock); cgroup_enter_frozen(); schedule(); + + /* + * We could've been woken by task_work, run it to clear + * TIF_NOTIFY_SIGNAL. The caller will retry if necessary. + */ + clear_notify_signal(); + if (unlikely(task_work_pending(current))) + task_work_run(); } static int ptrace_signal(int signr, kernel_siginfo_t *info, enum pid_type type)

1 year, 1 month

1
0
0 0

FAILED: patch "[PATCH] thermal/drivers/broadcom: Fix race between removal and clock" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x e90c369cc2ffcf7145a46448de101f715a1f5584 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072901-rare-engaging-abb3@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: e90c369cc2ff ("thermal/drivers/broadcom: Fix race between removal and clock disable") f29ecd3748a2 ("thermal: bcm2835: Convert to platform remove callback returning void") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From e90c369cc2ffcf7145a46448de101f715a1f5584 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org> Date: Tue, 9 Jul 2024 14:59:31 +0200 Subject: [PATCH] thermal/drivers/broadcom: Fix race between removal and clock disable During the probe, driver enables clocks necessary to access registers (in get_temp()) and then registers thermal zone with managed-resources (devm) interface. Removal of device is not done in reversed order, because: 1. Clock will be disabled in driver remove() callback - thermal zone is still registered and accessible to users, 2. devm interface will unregister thermal zone. This leaves short window between (1) and (2) for accessing the get_temp() callback with disabled clock. Fix this by enabling clock also via devm-interface, so entire cleanup path will be in proper, reversed order. Fixes: 8454c8c09c77 ("thermal/drivers/bcm2835: Remove buggy call to thermal_of_zone_unregister") Cc: stable(a)vger.kernel.org Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org> Link: https://lore.kernel.org/r/20240709-thermal-probe-v1-1-241644e2b6e0@linaro.o… Signed-off-by: Daniel Lezcano <daniel.lezcano(a)linaro.org> diff --git a/drivers/thermal/broadcom/bcm2835_thermal.c b/drivers/thermal/broadcom/bcm2835_thermal.c index 5c1cebe07580..3b1030fc4fbf 100644 --- a/drivers/thermal/broadcom/bcm2835_thermal.c +++ b/drivers/thermal/broadcom/bcm2835_thermal.c @@ -185,7 +185,7 @@ static int bcm2835_thermal_probe(struct platform_device *pdev) return err; } - data->clk = devm_clk_get(&pdev->dev, NULL); + data->clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(data->clk)) { err = PTR_ERR(data->clk); if (err != -EPROBE_DEFER) @@ -193,10 +193,6 @@ static int bcm2835_thermal_probe(struct platform_device *pdev) return err; } - err = clk_prepare_enable(data->clk); - if (err) - return err; - rate = clk_get_rate(data->clk); if ((rate < 1920000) || (rate > 5000000)) dev_warn(&pdev->dev, @@ -211,7 +207,7 @@ static int bcm2835_thermal_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Failed to register the thermal device: %d\n", err); - goto err_clk; + return err; } /* @@ -236,7 +232,7 @@ static int bcm2835_thermal_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Not able to read trip_temp: %d\n", err); - goto err_tz; + return err; } /* set bandgap reference voltage and enable voltage regulator */ @@ -269,17 +265,11 @@ static int bcm2835_thermal_probe(struct platform_device *pdev) */ err = thermal_add_hwmon_sysfs(tz); if (err) - goto err_tz; + return err; bcm2835_thermal_debugfs(pdev); return 0; -err_tz: - devm_thermal_of_zone_unregister(&pdev->dev, tz); -err_clk: - clk_disable_unprepare(data->clk); - - return err; } static void bcm2835_thermal_remove(struct platform_device *pdev) @@ -287,7 +277,6 @@ static void bcm2835_thermal_remove(struct platform_device *pdev) struct bcm2835_thermal_data *data = platform_get_drvdata(pdev); debugfs_remove_recursive(data->debugfsdir); - clk_disable_unprepare(data->clk); } static struct platform_driver bcm2835_thermal_driver = {

1 year, 1 month

1
0
0 0

EFI backports from v6.11-rc to v6.6 and newer

by Ard Biesheuvel

Please consider the following patches (in this order) for backporting to v6.6 and newer. fb318ca0a522295edd6d796fb987e99ec41f0ee5 ae835a96d72cd025421910edb0e8faf706998727 The second patch addresses a regression on older Dell hardware. The first one is a prerequisite for the second one, and a minor bugfix itself. Thanks, Ard.

1 year, 1 month

2
1
0 0

FAILED: patch "[PATCH] mm/mglru: fix ineffective protection calculation" failed to apply to 6.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y git checkout FETCH_HEAD git cherry-pick -x 30d77b7eef019fa4422980806e8b7cdc8674493e # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072911-marshland-grab-ced7@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^.. Possible dependencies: 30d77b7eef01 ("mm/mglru: fix ineffective protection calculation") 3f74e6bd3b84 ("mm/mglru: fix overshooting shrinker memory") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 30d77b7eef019fa4422980806e8b7cdc8674493e Mon Sep 17 00:00:00 2001 From: Yu Zhao <yuzhao(a)google.com> Date: Fri, 12 Jul 2024 17:29:56 -0600 Subject: [PATCH] mm/mglru: fix ineffective protection calculation mem_cgroup_calculate_protection() is not stateless and should only be used as part of a top-down tree traversal. shrink_one() traverses the per-node memcg LRU instead of the root_mem_cgroup tree, and therefore it should not call mem_cgroup_calculate_protection(). The existing misuse in shrink_one() can cause ineffective protection of sub-trees that are grandchildren of root_mem_cgroup. Fix it by reusing lru_gen_age_node(), which already traverses the root_mem_cgroup tree, to calculate the protection. Previously lru_gen_age_node() opportunistically skips the first pass, i.e., when scan_control->priority is DEF_PRIORITY. On the second pass, lruvec_is_sizable() uses appropriate scan_control->priority, set by set_initial_priority() from lru_gen_shrink_node(), to decide whether a memcg is too small to reclaim from. Now lru_gen_age_node() unconditionally traverses the root_mem_cgroup tree. So it should call set_initial_priority() upfront, to make sure lruvec_is_sizable() uses appropriate scan_control->priority on the first pass. Otherwise, lruvec_is_reclaimable() can return false negatives and result in premature OOM kills when min_ttl_ms is used. Link: https://lkml.kernel.org/r/20240712232956.1427127-1-yuzhao@google.com Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists") Signed-off-by: Yu Zhao <yuzhao(a)google.com> Reported-by: T.J. Mercier <tjmercier(a)google.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/mm/vmscan.c b/mm/vmscan.c index 6216d79edb7f..525d3ffa8451 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3915,6 +3915,32 @@ static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long seq, * working set protection ******************************************************************************/ +static void set_initial_priority(struct pglist_data *pgdat, struct scan_control *sc) +{ + int priority; + unsigned long reclaimable; + + if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH) + return; + /* + * Determine the initial priority based on + * (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, + * where reclaimed_to_scanned_ratio = inactive / total. + */ + reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE); + if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc)) + reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON); + + /* round down reclaimable and round up sc->nr_to_reclaim */ + priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1); + + /* + * The estimation is based on LRU pages only, so cap it to prevent + * overshoots of shrinker objects by large margins. + */ + sc->priority = clamp(priority, DEF_PRIORITY / 2, DEF_PRIORITY); +} + static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc) { int gen, type, zone; @@ -3948,19 +3974,17 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc struct mem_cgroup *memcg = lruvec_memcg(lruvec); DEFINE_MIN_SEQ(lruvec); - /* see the comment on lru_gen_folio */ - gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); - birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); - - if (time_is_after_jiffies(birth + min_ttl)) + if (mem_cgroup_below_min(NULL, memcg)) return false; if (!lruvec_is_sizable(lruvec, sc)) return false; - mem_cgroup_calculate_protection(NULL, memcg); + /* see the comment on lru_gen_folio */ + gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); + birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); - return !mem_cgroup_below_min(NULL, memcg); + return time_is_before_jiffies(birth + min_ttl); } /* to protect the working set of the last N jiffies */ @@ -3970,23 +3994,20 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) { struct mem_cgroup *memcg; unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl); + bool reclaimable = !min_ttl; VM_WARN_ON_ONCE(!current_is_kswapd()); - /* check the order to exclude compaction-induced reclaim */ - if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY) - return; + set_initial_priority(pgdat, sc); memcg = mem_cgroup_iter(NULL, NULL, NULL); do { struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); - if (lruvec_is_reclaimable(lruvec, sc, min_ttl)) { - mem_cgroup_iter_break(NULL, memcg); - return; - } + mem_cgroup_calculate_protection(NULL, memcg); - cond_resched(); + if (!reclaimable) + reclaimable = lruvec_is_reclaimable(lruvec, sc, min_ttl); } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); /* @@ -3994,7 +4015,7 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) * younger than min_ttl. However, another possibility is all memcgs are * either too small or below min. */ - if (mutex_trylock(&oom_lock)) { + if (!reclaimable && mutex_trylock(&oom_lock)) { struct oom_control oc = { .gfp_mask = sc->gfp_mask, }; @@ -4786,8 +4807,7 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc) struct mem_cgroup *memcg = lruvec_memcg(lruvec); struct pglist_data *pgdat = lruvec_pgdat(lruvec); - mem_cgroup_calculate_protection(NULL, memcg); - + /* lru_gen_age_node() called mem_cgroup_calculate_protection() */ if (mem_cgroup_below_min(NULL, memcg)) return MEMCG_LRU_YOUNG; @@ -4911,32 +4931,6 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc blk_finish_plug(&plug); } -static void set_initial_priority(struct pglist_data *pgdat, struct scan_control *sc) -{ - int priority; - unsigned long reclaimable; - - if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH) - return; - /* - * Determine the initial priority based on - * (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, - * where reclaimed_to_scanned_ratio = inactive / total. - */ - reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE); - if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc)) - reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON); - - /* round down reclaimable and round up sc->nr_to_reclaim */ - priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1); - - /* - * The estimation is based on LRU pages only, so cap it to prevent - * overshoots of shrinker objects by large margins. - */ - sc->priority = clamp(priority, DEF_PRIORITY / 2, DEF_PRIORITY); -} - static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc) { struct blk_plug plug;

1 year, 1 month

1
0
0 0

FAILED: patch "[PATCH] mm/migrate: putback split folios when numa hint migration" failed to apply to 6.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y git checkout FETCH_HEAD git cherry-pick -x 6e49019db5f7a09a9c0e8ac4d108e656c3f8e583 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072937-aloe-fog-7fc4@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^.. Possible dependencies: 6e49019db5f7 ("mm/migrate: putback split folios when numa hint migration fails") ee86814b0562 ("mm/migrate: move NUMA hinting fault folio isolation + checks under PTL") 4b88c23ab8c9 ("mm/migrate: make migrate_misplaced_folio() return 0 on success") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 6e49019db5f7a09a9c0e8ac4d108e656c3f8e583 Mon Sep 17 00:00:00 2001 From: Peter Xu <peterx(a)redhat.com> Date: Mon, 8 Jul 2024 17:55:37 -0400 Subject: [PATCH] mm/migrate: putback split folios when numa hint migration fails This issue is not from any report yet, but by code observation only. This is yet another fix besides Hugh's patch [1] but on relevant code path, where eager split of folio can happen if the folio is already on deferred list during a folio migration. Here the issue is NUMA path (migrate_misplaced_folio()) may start to encounter such folio split now even with MR_NUMA_MISPLACED hint applied. Then when migrate_pages() didn't migrate all the folios, it's possible the split small folios be put onto the list instead of the original folio. Then putting back only the head page won't be enough. Fix it by putting back all the folios on the list. [1] https://lore.kernel.org/all/46c948b4-4dd8-6e03-4c7b-ce4e81cfa536@google.com/ [akpm(a)linux-foundation.org: remove now unused local `nr_pages'] Link: https://lkml.kernel.org/r/20240708215537.2630610-1-peterx@redhat.com Fixes: 7262f208ca68 ("mm/migrate: split source folio if it is on deferred split list") Signed-off-by: Peter Xu <peterx(a)redhat.com> Reviewed-by: Zi Yan <ziy(a)nvidia.com> Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com> Cc: Yang Shi <shy828301(a)gmail.com> Cc: Hugh Dickins <hughd(a)google.com> Cc: Huang Ying <ying.huang(a)intel.com> Cc: David Hildenbrand <david(a)redhat.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/mm/migrate.c b/mm/migrate.c index 6eb9df239230..bdbb5bb04c91 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2621,20 +2621,13 @@ int migrate_misplaced_folio(struct folio *folio, struct vm_area_struct *vma, int nr_remaining; unsigned int nr_succeeded; LIST_HEAD(migratepages); - int nr_pages = folio_nr_pages(folio); list_add(&folio->lru, &migratepages); nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_folio, NULL, node, MIGRATE_ASYNC, MR_NUMA_MISPLACED, &nr_succeeded); - if (nr_remaining) { - if (!list_empty(&migratepages)) { - list_del(&folio->lru); - node_stat_mod_folio(folio, NR_ISOLATED_ANON + - folio_is_file_lru(folio), -nr_pages); - folio_putback_lru(folio); - } - } + if (nr_remaining && !list_empty(&migratepages)) + putback_movable_pages(&migratepages); if (nr_succeeded) { count_vm_numa_events(NUMA_PAGE_MIGRATE, nr_succeeded); if (!node_is_toptier(folio_nid(folio)) && node_is_toptier(node))

1 year, 1 month

1
0
0 0

FAILED: patch "[PATCH] mm: avoid overflows in dirty throttling logic" failed to apply to 6.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.10.y git checkout FETCH_HEAD git cherry-pick -x 68ed2a394a0190433ba982b353579075a29099bd # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024072925-darling-chaplain-8e34@gregkh' --subject-prefix 'PATCH 6.10.y' HEAD^.. Possible dependencies: 68ed2a394a01 ("mm: avoid overflows in dirty throttling logic") thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 68ed2a394a0190433ba982b353579075a29099bd Mon Sep 17 00:00:00 2001 From: Jan Kara <jack(a)suse.cz> Date: Fri, 21 Jun 2024 16:42:38 +0200 Subject: [PATCH] mm: avoid overflows in dirty throttling logic The dirty throttling logic is interspersed with assumptions that dirty limits in PAGE_SIZE units fit into 32-bit (so that various multiplications fit into 64-bits). If limits end up being larger, we will hit overflows, possible divisions by 0 etc. Fix these problems by never allowing so large dirty limits as they have dubious practical value anyway. For dirty_bytes / dirty_background_bytes interfaces we can just refuse to set so large limits. For dirty_ratio / dirty_background_ratio it isn't so simple as the dirty limit is computed from the amount of available memory which can change due to memory hotplug etc. So when converting dirty limits from ratios to numbers of pages, we just don't allow the result to exceed UINT_MAX. This is root-only triggerable problem which occurs when the operator sets dirty limits to >16 TB. Link: https://lkml.kernel.org/r/20240621144246.11148-2-jack@suse.cz Signed-off-by: Jan Kara <jack(a)suse.cz> Reported-by: Zach O'Keefe <zokeefe(a)google.com> Reviewed-By: Zach O'Keefe <zokeefe(a)google.com> Cc: <stable(a)vger.kernel.org> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c4aa6e84c20a..acff24e9fae4 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -417,13 +417,20 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc) else bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; - if (bg_thresh >= thresh) - bg_thresh = thresh / 2; tsk = current; if (rt_task(tsk)) { bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; thresh += thresh / 4 + global_wb_domain.dirty_limit / 32; } + /* + * Dirty throttling logic assumes the limits in page units fit into + * 32-bits. This gives 16TB dirty limits max which is hopefully enough. + */ + if (thresh > UINT_MAX) + thresh = UINT_MAX; + /* This makes sure bg_thresh is within 32-bits as well */ + if (bg_thresh >= thresh) + bg_thresh = thresh / 2; dtc->thresh = thresh; dtc->bg_thresh = bg_thresh; @@ -473,7 +480,11 @@ static unsigned long node_dirty_limit(struct pglist_data *pgdat) if (rt_task(tsk)) dirty += dirty / 4; - return dirty; + /* + * Dirty throttling logic assumes the limits in page units fit into + * 32-bits. This gives 16TB dirty limits max which is hopefully enough. + */ + return min_t(unsigned long, dirty, UINT_MAX); } /** @@ -510,10 +521,17 @@ static int dirty_background_bytes_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; + unsigned long old_bytes = dirty_background_bytes; ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); - if (ret == 0 && write) + if (ret == 0 && write) { + if (DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE) > + UINT_MAX) { + dirty_background_bytes = old_bytes; + return -ERANGE; + } dirty_background_ratio = 0; + } return ret; } @@ -539,6 +557,10 @@ static int dirty_bytes_handler(struct ctl_table *table, int write, ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); if (ret == 0 && write && vm_dirty_bytes != old_bytes) { + if (DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE) > UINT_MAX) { + vm_dirty_bytes = old_bytes; + return -ERANGE; + } writeback_set_ratelimit(); vm_dirty_ratio = 0; }

1 year, 1 month

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror July 2024