The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 38ec4944b593fd90c5ef42aaaa53e66ae5769d04 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet(a)google.com>
Date: Tue, 13 Apr 2021 05:41:35 -0700
Subject: [PATCH] gro: ensure frag0 meets IP header alignment
After commit 0f6925b3e8da ("virtio_net: Do not pull payload in skb->head")
Guenter Roeck reported one failure in his tests using sh architecture.
After much debugging, we have been able to spot silent unaligned accesses
in inet_gro_receive()
The issue at hand is that upper networking stacks assume their header
is word-aligned. Low level drivers are supposed to reserve NET_IP_ALIGN
bytes before the Ethernet header to make that happen.
This patch hardens skb_gro_reset_offset() to not allow frag0 fast-path
if the fragment is not properly aligned.
Some arches like x86, arm64 and powerpc do not care and define NET_IP_ALIGN
as 0, this extra check will be a NOP for them.
Note that if frag0 is not used, GRO will call pskb_may_pull()
as many times as needed to pull network and transport headers.
Fixes: 0f6925b3e8da ("virtio_net: Do not pull payload in skb->head")
Fixes: 78a478d0efd9 ("gro: Inline skb_gro_header and cache frag0 virtual address")
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Reported-by: Guenter Roeck <linux(a)roeck-us.net>
Cc: Xuan Zhuo <xuanzhuo(a)linux.alibaba.com>
Cc: "Michael S. Tsirkin" <mst(a)redhat.com>
Cc: Jason Wang <jasowang(a)redhat.com>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Tested-by: Guenter Roeck <linux(a)roeck-us.net>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/net/core/dev.c b/net/core/dev.c
index af8c1ea040b9..1f79b9aa9a3f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5924,7 +5924,8 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
NAPI_GRO_CB(skb)->frag0_len = 0;
if (!skb_headlen(skb) && pinfo->nr_frags &&
- !PageHighMem(skb_frag_page(frag0))) {
+ !PageHighMem(skb_frag_page(frag0)) &&
+ (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
skb_frag_size(frag0),
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 38ec4944b593fd90c5ef42aaaa53e66ae5769d04 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet(a)google.com>
Date: Tue, 13 Apr 2021 05:41:35 -0700
Subject: [PATCH] gro: ensure frag0 meets IP header alignment
After commit 0f6925b3e8da ("virtio_net: Do not pull payload in skb->head")
Guenter Roeck reported one failure in his tests using sh architecture.
After much debugging, we have been able to spot silent unaligned accesses
in inet_gro_receive()
The issue at hand is that upper networking stacks assume their header
is word-aligned. Low level drivers are supposed to reserve NET_IP_ALIGN
bytes before the Ethernet header to make that happen.
This patch hardens skb_gro_reset_offset() to not allow frag0 fast-path
if the fragment is not properly aligned.
Some arches like x86, arm64 and powerpc do not care and define NET_IP_ALIGN
as 0, this extra check will be a NOP for them.
Note that if frag0 is not used, GRO will call pskb_may_pull()
as many times as needed to pull network and transport headers.
Fixes: 0f6925b3e8da ("virtio_net: Do not pull payload in skb->head")
Fixes: 78a478d0efd9 ("gro: Inline skb_gro_header and cache frag0 virtual address")
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Reported-by: Guenter Roeck <linux(a)roeck-us.net>
Cc: Xuan Zhuo <xuanzhuo(a)linux.alibaba.com>
Cc: "Michael S. Tsirkin" <mst(a)redhat.com>
Cc: Jason Wang <jasowang(a)redhat.com>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Tested-by: Guenter Roeck <linux(a)roeck-us.net>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/net/core/dev.c b/net/core/dev.c
index af8c1ea040b9..1f79b9aa9a3f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5924,7 +5924,8 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
NAPI_GRO_CB(skb)->frag0_len = 0;
if (!skb_headlen(skb) && pinfo->nr_frags &&
- !PageHighMem(skb_frag_page(frag0))) {
+ !PageHighMem(skb_frag_page(frag0)) &&
+ (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
skb_frag_size(frag0),
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 1fe976d308acb6374c899a4ee8025a0a016e453e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali(a)kernel.org>
Date: Mon, 12 Apr 2021 18:57:39 +0200
Subject: [PATCH] net: phy: marvell: fix detection of PHY on Topaz switches
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Since commit fee2d546414d ("net: phy: marvell: mv88e6390 temperature
sensor reading"), Linux reports the temperature of Topaz hwmon as
constant -75°C.
This is because switches from the Topaz family (88E6141 / 88E6341) have
the address of the temperature sensor register different from Peridot.
This address is instead compatible with 88E1510 PHYs, as was used for
Topaz before the above mentioned commit.
Create a new mapping table between switch family and PHY ID for families
which don't have a model number. And define PHY IDs for Topaz and Peridot
families.
Create a new PHY ID and a new PHY driver for Topaz's internal PHY.
The only difference from Peridot's PHY driver is the HWMON probing
method.
Prior this change Topaz's internal PHY is detected by kernel as:
PHY [...] driver [Marvell 88E6390] (irq=63)
And afterwards as:
PHY [...] driver [Marvell 88E6341 Family] (irq=63)
Signed-off-by: Pali Rohár <pali(a)kernel.org>
BugLink: https://github.com/globalscaletechnologies/linux/issues/1
Fixes: fee2d546414d ("net: phy: marvell: mv88e6390 temperature sensor reading")
Reviewed-by: Marek Behún <kabel(a)kernel.org>
Reviewed-by: Andrew Lunn <andrew(a)lunn.ch>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 903d619e08ed..e08bf9377140 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3026,10 +3026,17 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
return err;
}
+/* prod_id for switch families which do not have a PHY model number */
+static const u16 family_prod_id_table[] = {
+ [MV88E6XXX_FAMILY_6341] = MV88E6XXX_PORT_SWITCH_ID_PROD_6341,
+ [MV88E6XXX_FAMILY_6390] = MV88E6XXX_PORT_SWITCH_ID_PROD_6390,
+};
+
static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg)
{
struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv;
struct mv88e6xxx_chip *chip = mdio_bus->chip;
+ u16 prod_id;
u16 val;
int err;
@@ -3040,23 +3047,12 @@ static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg)
err = chip->info->ops->phy_read(chip, bus, phy, reg, &val);
mv88e6xxx_reg_unlock(chip);
- if (reg == MII_PHYSID2) {
- /* Some internal PHYs don't have a model number. */
- if (chip->info->family != MV88E6XXX_FAMILY_6165)
- /* Then there is the 6165 family. It gets is
- * PHYs correct. But it can also have two
- * SERDES interfaces in the PHY address
- * space. And these don't have a model
- * number. But they are not PHYs, so we don't
- * want to give them something a PHY driver
- * will recognise.
- *
- * Use the mv88e6390 family model number
- * instead, for anything which really could be
- * a PHY,
- */
- if (!(val & 0x3f0))
- val |= MV88E6XXX_PORT_SWITCH_ID_PROD_6390 >> 4;
+ /* Some internal PHYs don't have a model number. */
+ if (reg == MII_PHYSID2 && !(val & 0x3f0) &&
+ chip->info->family < ARRAY_SIZE(family_prod_id_table)) {
+ prod_id = family_prod_id_table[chip->info->family];
+ if (prod_id)
+ val |= prod_id >> 4;
}
return err ? err : val;
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index e26a5d663f8a..8018ddf7f316 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -3021,9 +3021,34 @@ static struct phy_driver marvell_drivers[] = {
.get_stats = marvell_get_stats,
},
{
- .phy_id = MARVELL_PHY_ID_88E6390,
+ .phy_id = MARVELL_PHY_ID_88E6341_FAMILY,
.phy_id_mask = MARVELL_PHY_ID_MASK,
- .name = "Marvell 88E6390",
+ .name = "Marvell 88E6341 Family",
+ /* PHY_GBIT_FEATURES */
+ .flags = PHY_POLL_CABLE_TEST,
+ .probe = m88e1510_probe,
+ .config_init = marvell_config_init,
+ .config_aneg = m88e6390_config_aneg,
+ .read_status = marvell_read_status,
+ .config_intr = marvell_config_intr,
+ .handle_interrupt = marvell_handle_interrupt,
+ .resume = genphy_resume,
+ .suspend = genphy_suspend,
+ .read_page = marvell_read_page,
+ .write_page = marvell_write_page,
+ .get_sset_count = marvell_get_sset_count,
+ .get_strings = marvell_get_strings,
+ .get_stats = marvell_get_stats,
+ .get_tunable = m88e1540_get_tunable,
+ .set_tunable = m88e1540_set_tunable,
+ .cable_test_start = marvell_vct7_cable_test_start,
+ .cable_test_tdr_start = marvell_vct5_cable_test_tdr_start,
+ .cable_test_get_status = marvell_vct7_cable_test_get_status,
+ },
+ {
+ .phy_id = MARVELL_PHY_ID_88E6390_FAMILY,
+ .phy_id_mask = MARVELL_PHY_ID_MASK,
+ .name = "Marvell 88E6390 Family",
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
.probe = m88e6390_probe,
@@ -3107,7 +3132,8 @@ static struct mdio_device_id __maybe_unused marvell_tbl[] = {
{ MARVELL_PHY_ID_88E1540, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E1545, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E3016, MARVELL_PHY_ID_MASK },
- { MARVELL_PHY_ID_88E6390, MARVELL_PHY_ID_MASK },
+ { MARVELL_PHY_ID_88E6341_FAMILY, MARVELL_PHY_ID_MASK },
+ { MARVELL_PHY_ID_88E6390_FAMILY, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E1340S, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E1548P, MARVELL_PHY_ID_MASK },
{ }
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 52b1610eae68..c544b70dfbd2 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -28,11 +28,12 @@
/* Marvel 88E1111 in Finisar SFP module with modified PHY ID */
#define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0
-/* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do
+/* These Ethernet switch families contain embedded PHYs, but they do
* not have a model ID. So the switch driver traps reads to the ID2
* register and returns the switch family ID
*/
-#define MARVELL_PHY_ID_88E6390 0x01410f90
+#define MARVELL_PHY_ID_88E6341_FAMILY 0x01410f41
+#define MARVELL_PHY_ID_88E6390_FAMILY 0x01410f90
#define MARVELL_PHY_FAMILY_ID(id) ((id) >> 4)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9601148392520e2e134936e76788fc2a6371e7be Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel(a)iogearbox.net>
Date: Tue, 23 Mar 2021 08:32:59 +0100
Subject: [PATCH] bpf: Use correct permission flag for mixed signed bounds
arithmetic
We forbid adding unknown scalars with mixed signed bounds due to the
spectre v1 masking mitigation. Hence this also needs bypass_spec_v1
flag instead of allow_ptr_leaks.
Fixes: 2c78ee898d8f ("bpf: Implement CAP_BPF")
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend(a)gmail.com>
Acked-by: Alexei Starovoitov <ast(a)kernel.org>
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3a738724a380..2ede4b850230 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6085,7 +6085,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
case PTR_TO_MAP_VALUE:
- if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
+ if (!env->env->bypass_spec_v1 && !known && (smin_val < 0) != (smax_val < 0)) {
verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
off_reg == dst_reg ? dst : src);
return -EACCES;
The patch below does not apply to the 5.11-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 9601148392520e2e134936e76788fc2a6371e7be Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel(a)iogearbox.net>
Date: Tue, 23 Mar 2021 08:32:59 +0100
Subject: [PATCH] bpf: Use correct permission flag for mixed signed bounds
arithmetic
We forbid adding unknown scalars with mixed signed bounds due to the
spectre v1 masking mitigation. Hence this also needs bypass_spec_v1
flag instead of allow_ptr_leaks.
Fixes: 2c78ee898d8f ("bpf: Implement CAP_BPF")
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend(a)gmail.com>
Acked-by: Alexei Starovoitov <ast(a)kernel.org>
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3a738724a380..2ede4b850230 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6085,7 +6085,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
case PTR_TO_MAP_VALUE:
- if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
+ if (!env->env->bypass_spec_v1 && !known && (smin_val < 0) != (smax_val < 0)) {
verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
off_reg == dst_reg ? dst : src);
return -EACCES;
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 941ea91e87a6e879ed82dad4949f6234f2702bec Mon Sep 17 00:00:00 2001
From: Hristo Venev <hristo(a)venev.name>
Date: Mon, 12 Apr 2021 20:41:17 +0300
Subject: [PATCH] net: ip6_tunnel: Unregister catch-all devices
Similarly to the sit case, we need to remove the tunnels with no
addresses that have been moved to another network namespace.
Fixes: 0bd8762824e73 ("ip6tnl: add x-netns support")
Signed-off-by: Hristo Venev <hristo(a)venev.name>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3fa0eca5a06f..42fe7db6bbb3 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -2244,6 +2244,16 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head
t = rtnl_dereference(t->next);
}
}
+
+ t = rtnl_dereference(ip6n->tnls_wc[0]);
+ while (t) {
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev, list);
+ t = rtnl_dereference(t->next);
+ }
}
static int __net_init ip6_tnl_init_net(struct net *net)
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 941ea91e87a6e879ed82dad4949f6234f2702bec Mon Sep 17 00:00:00 2001
From: Hristo Venev <hristo(a)venev.name>
Date: Mon, 12 Apr 2021 20:41:17 +0300
Subject: [PATCH] net: ip6_tunnel: Unregister catch-all devices
Similarly to the sit case, we need to remove the tunnels with no
addresses that have been moved to another network namespace.
Fixes: 0bd8762824e73 ("ip6tnl: add x-netns support")
Signed-off-by: Hristo Venev <hristo(a)venev.name>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3fa0eca5a06f..42fe7db6bbb3 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -2244,6 +2244,16 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head
t = rtnl_dereference(t->next);
}
}
+
+ t = rtnl_dereference(ip6n->tnls_wc[0]);
+ while (t) {
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev, list);
+ t = rtnl_dereference(t->next);
+ }
}
static int __net_init ip6_tnl_init_net(struct net *net)
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 941ea91e87a6e879ed82dad4949f6234f2702bec Mon Sep 17 00:00:00 2001
From: Hristo Venev <hristo(a)venev.name>
Date: Mon, 12 Apr 2021 20:41:17 +0300
Subject: [PATCH] net: ip6_tunnel: Unregister catch-all devices
Similarly to the sit case, we need to remove the tunnels with no
addresses that have been moved to another network namespace.
Fixes: 0bd8762824e73 ("ip6tnl: add x-netns support")
Signed-off-by: Hristo Venev <hristo(a)venev.name>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3fa0eca5a06f..42fe7db6bbb3 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -2244,6 +2244,16 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head
t = rtnl_dereference(t->next);
}
}
+
+ t = rtnl_dereference(ip6n->tnls_wc[0]);
+ while (t) {
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev, list);
+ t = rtnl_dereference(t->next);
+ }
}
static int __net_init ip6_tnl_init_net(struct net *net)
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 610f8c0fc8d46e0933955ce13af3d64484a4630a Mon Sep 17 00:00:00 2001
From: Hristo Venev <hristo(a)venev.name>
Date: Mon, 12 Apr 2021 20:41:16 +0300
Subject: [PATCH] net: sit: Unregister catch-all devices
A sit interface created without a local or a remote address is linked
into the `sit_net::tunnels_wc` list of its original namespace. When
deleting a network namespace, delete the devices that have been moved.
The following script triggers a null pointer dereference if devices
linked in a deleted `sit_net` remain:
for i in `seq 1 30`; do
ip netns add ns-test
ip netns exec ns-test ip link add dev veth0 type veth peer veth1
ip netns exec ns-test ip link add dev sit$i type sit dev veth0
ip netns exec ns-test ip link set dev sit$i netns $$
ip netns del ns-test
done
for i in `seq 1 30`; do
ip link del dev sit$i
done
Fixes: 5e6700b3bf98f ("sit: add support of x-netns")
Signed-off-by: Hristo Venev <hristo(a)venev.name>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 63ccd9f2dccc..9fdccf0718b5 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1867,9 +1867,9 @@ static void __net_exit sit_destroy_tunnels(struct net *net,
if (dev->rtnl_link_ops == &sit_link_ops)
unregister_netdevice_queue(dev, head);
- for (prio = 1; prio < 4; prio++) {
+ for (prio = 0; prio < 4; prio++) {
int h;
- for (h = 0; h < IP6_SIT_HASH_SIZE; h++) {
+ for (h = 0; h < (prio ? IP6_SIT_HASH_SIZE : 1); h++) {
struct ip_tunnel *t;
t = rtnl_dereference(sitn->tunnels[prio][h]);
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From d163a925ebbc6eb5b562b0f1d72c7e817aa75c40 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw(a)strlen.de>
Date: Wed, 7 Apr 2021 21:43:40 +0200
Subject: [PATCH] netfilter: arp_tables: add pre_exit hook for table unregister
Same problem that also existed in iptables/ip(6)tables, when
arptable_filter is removed there is no longer a wait period before the
table/ruleset is free'd.
Unregister the hook in pre_exit, then remove the table in the exit
function.
This used to work correctly because the old nf_hook_unregister API
did unconditional synchronize_net.
The per-net hook unregister function uses call_rcu instead.
Fixes: b9e69e127397 ("netfilter: xtables: don't hook tables by default")
Signed-off-by: Florian Westphal <fw(a)strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 7d3537c40ec9..26a13294318c 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -52,8 +52,9 @@ extern void *arpt_alloc_initial_table(const struct xt_table *);
int arpt_register_table(struct net *net, const struct xt_table *table,
const struct arpt_replace *repl,
const struct nf_hook_ops *ops, struct xt_table **res);
-void arpt_unregister_table(struct net *net, struct xt_table *table,
- const struct nf_hook_ops *ops);
+void arpt_unregister_table(struct net *net, struct xt_table *table);
+void arpt_unregister_table_pre_exit(struct net *net, struct xt_table *table,
+ const struct nf_hook_ops *ops);
extern unsigned int arpt_do_table(struct sk_buff *skb,
const struct nf_hook_state *state,
struct xt_table *table);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index d1e04d2b5170..6c26533480dd 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1539,10 +1539,15 @@ int arpt_register_table(struct net *net,
return ret;
}
-void arpt_unregister_table(struct net *net, struct xt_table *table,
- const struct nf_hook_ops *ops)
+void arpt_unregister_table_pre_exit(struct net *net, struct xt_table *table,
+ const struct nf_hook_ops *ops)
{
nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+}
+EXPORT_SYMBOL(arpt_unregister_table_pre_exit);
+
+void arpt_unregister_table(struct net *net, struct xt_table *table)
+{
__arpt_unregister_table(net, table);
}
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index c216b9ad3bb2..6c300ba5634e 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -56,16 +56,24 @@ static int __net_init arptable_filter_table_init(struct net *net)
return err;
}
+static void __net_exit arptable_filter_net_pre_exit(struct net *net)
+{
+ if (net->ipv4.arptable_filter)
+ arpt_unregister_table_pre_exit(net, net->ipv4.arptable_filter,
+ arpfilter_ops);
+}
+
static void __net_exit arptable_filter_net_exit(struct net *net)
{
if (!net->ipv4.arptable_filter)
return;
- arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops);
+ arpt_unregister_table(net, net->ipv4.arptable_filter);
net->ipv4.arptable_filter = NULL;
}
static struct pernet_operations arptable_filter_net_ops = {
.exit = arptable_filter_net_exit,
+ .pre_exit = arptable_filter_net_pre_exit,
};
static int __init arptable_filter_init(void)