The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 1fe976d308acb6374c899a4ee8025a0a016e453e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali(a)kernel.org>
Date: Mon, 12 Apr 2021 18:57:39 +0200
Subject: [PATCH] net: phy: marvell: fix detection of PHY on Topaz switches
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Since commit fee2d546414d ("net: phy: marvell: mv88e6390 temperature
sensor reading"), Linux reports the temperature of Topaz hwmon as
constant -75°C.
This is because switches from the Topaz family (88E6141 / 88E6341) have
the address of the temperature sensor register different from Peridot.
This address is instead compatible with 88E1510 PHYs, as was used for
Topaz before the above mentioned commit.
Create a new mapping table between switch family and PHY ID for families
which don't have a model number. And define PHY IDs for Topaz and Peridot
families.
Create a new PHY ID and a new PHY driver for Topaz's internal PHY.
The only difference from Peridot's PHY driver is the HWMON probing
method.
Prior this change Topaz's internal PHY is detected by kernel as:
PHY [...] driver [Marvell 88E6390] (irq=63)
And afterwards as:
PHY [...] driver [Marvell 88E6341 Family] (irq=63)
Signed-off-by: Pali Rohár <pali(a)kernel.org>
BugLink: https://github.com/globalscaletechnologies/linux/issues/1
Fixes: fee2d546414d ("net: phy: marvell: mv88e6390 temperature sensor reading")
Reviewed-by: Marek Behún <kabel(a)kernel.org>
Reviewed-by: Andrew Lunn <andrew(a)lunn.ch>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 903d619e08ed..e08bf9377140 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3026,10 +3026,17 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
return err;
}
+/* prod_id for switch families which do not have a PHY model number */
+static const u16 family_prod_id_table[] = {
+ [MV88E6XXX_FAMILY_6341] = MV88E6XXX_PORT_SWITCH_ID_PROD_6341,
+ [MV88E6XXX_FAMILY_6390] = MV88E6XXX_PORT_SWITCH_ID_PROD_6390,
+};
+
static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg)
{
struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv;
struct mv88e6xxx_chip *chip = mdio_bus->chip;
+ u16 prod_id;
u16 val;
int err;
@@ -3040,23 +3047,12 @@ static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg)
err = chip->info->ops->phy_read(chip, bus, phy, reg, &val);
mv88e6xxx_reg_unlock(chip);
- if (reg == MII_PHYSID2) {
- /* Some internal PHYs don't have a model number. */
- if (chip->info->family != MV88E6XXX_FAMILY_6165)
- /* Then there is the 6165 family. It gets is
- * PHYs correct. But it can also have two
- * SERDES interfaces in the PHY address
- * space. And these don't have a model
- * number. But they are not PHYs, so we don't
- * want to give them something a PHY driver
- * will recognise.
- *
- * Use the mv88e6390 family model number
- * instead, for anything which really could be
- * a PHY,
- */
- if (!(val & 0x3f0))
- val |= MV88E6XXX_PORT_SWITCH_ID_PROD_6390 >> 4;
+ /* Some internal PHYs don't have a model number. */
+ if (reg == MII_PHYSID2 && !(val & 0x3f0) &&
+ chip->info->family < ARRAY_SIZE(family_prod_id_table)) {
+ prod_id = family_prod_id_table[chip->info->family];
+ if (prod_id)
+ val |= prod_id >> 4;
}
return err ? err : val;
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index e26a5d663f8a..8018ddf7f316 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -3021,9 +3021,34 @@ static struct phy_driver marvell_drivers[] = {
.get_stats = marvell_get_stats,
},
{
- .phy_id = MARVELL_PHY_ID_88E6390,
+ .phy_id = MARVELL_PHY_ID_88E6341_FAMILY,
.phy_id_mask = MARVELL_PHY_ID_MASK,
- .name = "Marvell 88E6390",
+ .name = "Marvell 88E6341 Family",
+ /* PHY_GBIT_FEATURES */
+ .flags = PHY_POLL_CABLE_TEST,
+ .probe = m88e1510_probe,
+ .config_init = marvell_config_init,
+ .config_aneg = m88e6390_config_aneg,
+ .read_status = marvell_read_status,
+ .config_intr = marvell_config_intr,
+ .handle_interrupt = marvell_handle_interrupt,
+ .resume = genphy_resume,
+ .suspend = genphy_suspend,
+ .read_page = marvell_read_page,
+ .write_page = marvell_write_page,
+ .get_sset_count = marvell_get_sset_count,
+ .get_strings = marvell_get_strings,
+ .get_stats = marvell_get_stats,
+ .get_tunable = m88e1540_get_tunable,
+ .set_tunable = m88e1540_set_tunable,
+ .cable_test_start = marvell_vct7_cable_test_start,
+ .cable_test_tdr_start = marvell_vct5_cable_test_tdr_start,
+ .cable_test_get_status = marvell_vct7_cable_test_get_status,
+ },
+ {
+ .phy_id = MARVELL_PHY_ID_88E6390_FAMILY,
+ .phy_id_mask = MARVELL_PHY_ID_MASK,
+ .name = "Marvell 88E6390 Family",
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
.probe = m88e6390_probe,
@@ -3107,7 +3132,8 @@ static struct mdio_device_id __maybe_unused marvell_tbl[] = {
{ MARVELL_PHY_ID_88E1540, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E1545, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E3016, MARVELL_PHY_ID_MASK },
- { MARVELL_PHY_ID_88E6390, MARVELL_PHY_ID_MASK },
+ { MARVELL_PHY_ID_88E6341_FAMILY, MARVELL_PHY_ID_MASK },
+ { MARVELL_PHY_ID_88E6390_FAMILY, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E1340S, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E1548P, MARVELL_PHY_ID_MASK },
{ }
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 52b1610eae68..c544b70dfbd2 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -28,11 +28,12 @@
/* Marvel 88E1111 in Finisar SFP module with modified PHY ID */
#define MARVELL_PHY_ID_88E1111_FINISAR 0x01ff0cc0
-/* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do
+/* These Ethernet switch families contain embedded PHYs, but they do
* not have a model ID. So the switch driver traps reads to the ID2
* register and returns the switch family ID
*/
-#define MARVELL_PHY_ID_88E6390 0x01410f90
+#define MARVELL_PHY_ID_88E6341_FAMILY 0x01410f41
+#define MARVELL_PHY_ID_88E6390_FAMILY 0x01410f90
#define MARVELL_PHY_FAMILY_ID(id) ((id) >> 4)
commit 1fe976d308acb6374c899a4ee8025a0a016e453e upstream.
Since commit fee2d546414d ("net: phy: marvell: mv88e6390 temperature
sensor reading"), Linux reports the temperature of Topaz hwmon as
constant -75°C.
This is because switches from the Topaz family (88E6141 / 88E6341) have
the address of the temperature sensor register different from Peridot.
This address is instead compatible with 88E1510 PHYs, as was used for
Topaz before the above mentioned commit.
Create a new mapping table between switch family and PHY ID for families
which don't have a model number. And define PHY IDs for Topaz and Peridot
families.
Create a new PHY ID and a new PHY driver for Topaz's internal PHY.
The only difference from Peridot's PHY driver is the HWMON probing
method.
Prior this change Topaz's internal PHY is detected by kernel as:
PHY [...] driver [Marvell 88E6390] (irq=63)
And afterwards as:
PHY [...] driver [Marvell 88E6341 Family] (irq=63)
Signed-off-by: Pali Rohár <pali(a)kernel.org>
BugLink: https://github.com/globalscaletechnologies/linux/issues/1
Fixes: fee2d546414d ("net: phy: marvell: mv88e6390 temperature sensor reading")
Reviewed-by: Marek Behún <kabel(a)kernel.org>
Reviewed-by: Andrew Lunn <andrew(a)lunn.ch>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
[pali: Backported to 5.10 version]
---
This patch is backported to 5.10 version. Tested on Turris Mox with Topaz switch.
---
drivers/net/dsa/mv88e6xxx/chip.c | 30 +++++++++++++----------------
drivers/net/phy/marvell.c | 33 +++++++++++++++++++++++++++++---
include/linux/marvell_phy.h | 5 +++--
3 files changed, 46 insertions(+), 22 deletions(-)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 87160e723dfc..70ec17f3c300 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2994,10 +2994,17 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
return err;
}
+/* prod_id for switch families which do not have a PHY model number */
+static const u16 family_prod_id_table[] = {
+ [MV88E6XXX_FAMILY_6341] = MV88E6XXX_PORT_SWITCH_ID_PROD_6341,
+ [MV88E6XXX_FAMILY_6390] = MV88E6XXX_PORT_SWITCH_ID_PROD_6390,
+};
+
static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg)
{
struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv;
struct mv88e6xxx_chip *chip = mdio_bus->chip;
+ u16 prod_id;
u16 val;
int err;
@@ -3008,23 +3015,12 @@ static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg)
err = chip->info->ops->phy_read(chip, bus, phy, reg, &val);
mv88e6xxx_reg_unlock(chip);
- if (reg == MII_PHYSID2) {
- /* Some internal PHYs don't have a model number. */
- if (chip->info->family != MV88E6XXX_FAMILY_6165)
- /* Then there is the 6165 family. It gets is
- * PHYs correct. But it can also have two
- * SERDES interfaces in the PHY address
- * space. And these don't have a model
- * number. But they are not PHYs, so we don't
- * want to give them something a PHY driver
- * will recognise.
- *
- * Use the mv88e6390 family model number
- * instead, for anything which really could be
- * a PHY,
- */
- if (!(val & 0x3f0))
- val |= MV88E6XXX_PORT_SWITCH_ID_PROD_6390 >> 4;
+ /* Some internal PHYs don't have a model number. */
+ if (reg == MII_PHYSID2 && !(val & 0x3f0) &&
+ chip->info->family < ARRAY_SIZE(family_prod_id_table)) {
+ prod_id = family_prod_id_table[chip->info->family];
+ if (prod_id)
+ val |= prod_id >> 4;
}
return err ? err : val;
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 5dbdaf0f5f09..823a89354466 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -2913,9 +2913,35 @@ static struct phy_driver marvell_drivers[] = {
.get_stats = marvell_get_stats,
},
{
- .phy_id = MARVELL_PHY_ID_88E6390,
+ .phy_id = MARVELL_PHY_ID_88E6341_FAMILY,
.phy_id_mask = MARVELL_PHY_ID_MASK,
- .name = "Marvell 88E6390",
+ .name = "Marvell 88E6341 Family",
+ /* PHY_GBIT_FEATURES */
+ .flags = PHY_POLL_CABLE_TEST,
+ .probe = m88e1510_probe,
+ .config_init = marvell_config_init,
+ .config_aneg = m88e6390_config_aneg,
+ .read_status = marvell_read_status,
+ .ack_interrupt = marvell_ack_interrupt,
+ .config_intr = marvell_config_intr,
+ .did_interrupt = m88e1121_did_interrupt,
+ .resume = genphy_resume,
+ .suspend = genphy_suspend,
+ .read_page = marvell_read_page,
+ .write_page = marvell_write_page,
+ .get_sset_count = marvell_get_sset_count,
+ .get_strings = marvell_get_strings,
+ .get_stats = marvell_get_stats,
+ .get_tunable = m88e1540_get_tunable,
+ .set_tunable = m88e1540_set_tunable,
+ .cable_test_start = marvell_vct7_cable_test_start,
+ .cable_test_tdr_start = marvell_vct5_cable_test_tdr_start,
+ .cable_test_get_status = marvell_vct7_cable_test_get_status,
+ },
+ {
+ .phy_id = MARVELL_PHY_ID_88E6390_FAMILY,
+ .phy_id_mask = MARVELL_PHY_ID_MASK,
+ .name = "Marvell 88E6390 Family",
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
.probe = m88e6390_probe,
@@ -3001,7 +3027,8 @@ static struct mdio_device_id __maybe_unused marvell_tbl[] = {
{ MARVELL_PHY_ID_88E1540, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E1545, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E3016, MARVELL_PHY_ID_MASK },
- { MARVELL_PHY_ID_88E6390, MARVELL_PHY_ID_MASK },
+ { MARVELL_PHY_ID_88E6341_FAMILY, MARVELL_PHY_ID_MASK },
+ { MARVELL_PHY_ID_88E6390_FAMILY, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E1340S, MARVELL_PHY_ID_MASK },
{ MARVELL_PHY_ID_88E1548P, MARVELL_PHY_ID_MASK },
{ }
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index ff7b7607c8cf..f5cf19d19776 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -25,11 +25,12 @@
#define MARVELL_PHY_ID_88X3310 0x002b09a0
#define MARVELL_PHY_ID_88E2110 0x002b09b0
-/* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do
+/* These Ethernet switch families contain embedded PHYs, but they do
* not have a model ID. So the switch driver traps reads to the ID2
* register and returns the switch family ID
*/
-#define MARVELL_PHY_ID_88E6390 0x01410f90
+#define MARVELL_PHY_ID_88E6341_FAMILY 0x01410f41
+#define MARVELL_PHY_ID_88E6390_FAMILY 0x01410f90
#define MARVELL_PHY_FAMILY_ID(id) ((id) >> 4)
--
2.20.1
The patch below does not apply to the 5.11-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7fedb63a8307dda0ec3b8969a3b233a1dd7ea8e0 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel(a)iogearbox.net>
Date: Wed, 24 Mar 2021 10:38:26 +0100
Subject: [PATCH] bpf: Tighten speculative pointer arithmetic mask
This work tightens the offset mask we use for unprivileged pointer arithmetic
in order to mitigate a corner case reported by Piotr and Benedict where in
the speculative domain it is possible to advance, for example, the map value
pointer by up to value_size-1 out-of-bounds in order to leak kernel memory
via side-channel to user space.
Before this change, the computed ptr_limit for retrieve_ptr_limit() helper
represents largest valid distance when moving pointer to the right or left
which is then fed as aux->alu_limit to generate masking instructions against
the offset register. After the change, the derived aux->alu_limit represents
the largest potential value of the offset register which we mask against which
is just a narrower subset of the former limit.
For minimal complexity, we call sanitize_ptr_alu() from 2 observation points
in adjust_ptr_min_max_vals(), that is, before and after the simulated alu
operation. In the first step, we retieve the alu_state and alu_limit before
the operation as well as we branch-off a verifier path and push it to the
verification stack as we did before which checks the dst_reg under truncation,
in other words, when the speculative domain would attempt to move the pointer
out-of-bounds.
In the second step, we retrieve the new alu_limit and calculate the absolute
distance between both. Moreover, we commit the alu_state and final alu_limit
via update_alu_sanitation_state() to the env's instruction aux data, and bail
out from there if there is a mismatch due to coming from different verification
paths with different states.
Reported-by: Piotr Krysiuk <piotras(a)gmail.com>
Reported-by: Benedict Schlueter <benedict.schlueter(a)rub.de>
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend(a)gmail.com>
Acked-by: Alexei Starovoitov <ast(a)kernel.org>
Tested-by: Benedict Schlueter <benedict.schlueter(a)rub.de>
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e41b6326e3e6..0399ac092b36 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5871,7 +5871,7 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
bool off_is_neg = off_reg->smin_value < 0;
bool mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
(opcode == BPF_SUB && !off_is_neg);
- u32 off, max = 0, ptr_limit = 0;
+ u32 max = 0, ptr_limit = 0;
if (!tnum_is_const(off_reg->var_off) &&
(off_reg->smin_value < 0) != (off_reg->smax_value < 0))
@@ -5880,26 +5880,18 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
switch (ptr_reg->type) {
case PTR_TO_STACK:
/* Offset 0 is out-of-bounds, but acceptable start for the
- * left direction, see BPF_REG_FP.
+ * left direction, see BPF_REG_FP. Also, unknown scalar
+ * offset where we would need to deal with min/max bounds is
+ * currently prohibited for unprivileged.
*/
max = MAX_BPF_STACK + mask_to_left;
- /* Indirect variable offset stack access is prohibited in
- * unprivileged mode so it's not handled here.
- */
- off = ptr_reg->off + ptr_reg->var_off.value;
- if (mask_to_left)
- ptr_limit = MAX_BPF_STACK + off;
- else
- ptr_limit = -off - 1;
+ ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
break;
case PTR_TO_MAP_VALUE:
max = ptr_reg->map_ptr->value_size;
- if (mask_to_left) {
- ptr_limit = ptr_reg->umax_value + ptr_reg->off;
- } else {
- off = ptr_reg->smin_value + ptr_reg->off;
- ptr_limit = ptr_reg->map_ptr->value_size - off - 1;
- }
+ ptr_limit = (mask_to_left ?
+ ptr_reg->smin_value :
+ ptr_reg->umax_value) + ptr_reg->off;
break;
default:
return REASON_TYPE;
@@ -5954,10 +5946,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
struct bpf_insn *insn,
const struct bpf_reg_state *ptr_reg,
const struct bpf_reg_state *off_reg,
- struct bpf_reg_state *dst_reg)
+ struct bpf_reg_state *dst_reg,
+ struct bpf_insn_aux_data *tmp_aux,
+ const bool commit_window)
{
+ struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux;
struct bpf_verifier_state *vstate = env->cur_state;
- struct bpf_insn_aux_data *aux = cur_aux(env);
bool off_is_neg = off_reg->smin_value < 0;
bool ptr_is_dst_reg = ptr_reg == dst_reg;
u8 opcode = BPF_OP(insn->code);
@@ -5976,18 +5970,33 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
if (vstate->speculative)
goto do_sim;
- alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
- alu_state |= ptr_is_dst_reg ?
- BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
-
err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode);
if (err < 0)
return err;
+ if (commit_window) {
+ /* In commit phase we narrow the masking window based on
+ * the observed pointer move after the simulated operation.
+ */
+ alu_state = tmp_aux->alu_state;
+ alu_limit = abs(tmp_aux->alu_limit - alu_limit);
+ } else {
+ alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
+ alu_state |= ptr_is_dst_reg ?
+ BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
+ }
+
err = update_alu_sanitation_state(aux, alu_state, alu_limit);
if (err < 0)
return err;
do_sim:
+ /* If we're in commit phase, we're done here given we already
+ * pushed the truncated dst_reg into the speculative verification
+ * stack.
+ */
+ if (commit_window)
+ return 0;
+
/* Simulate and find potential out-of-bounds access under
* speculative execution from truncation as a result of
* masking when off was not within expected range. If off
@@ -6130,6 +6139,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
+ struct bpf_insn_aux_data tmp_aux = {};
u8 opcode = BPF_OP(insn->code);
u32 dst = insn->dst_reg;
int ret;
@@ -6196,12 +6206,15 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
/* pointer types do not carry 32-bit bounds at the moment. */
__mark_reg32_unbounded(dst_reg);
- switch (opcode) {
- case BPF_ADD:
- ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg);
+ if (sanitize_needed(opcode)) {
+ ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
+ &tmp_aux, false);
if (ret < 0)
return sanitize_err(env, insn, ret, off_reg, dst_reg);
+ }
+ switch (opcode) {
+ case BPF_ADD:
/* We can take a fixed offset as long as it doesn't overflow
* the s32 'off' field
*/
@@ -6252,10 +6265,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
}
break;
case BPF_SUB:
- ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg);
- if (ret < 0)
- return sanitize_err(env, insn, ret, off_reg, dst_reg);
-
if (dst_reg == off_reg) {
/* scalar -= pointer. Creates an unknown scalar */
verbose(env, "R%d tried to subtract pointer from scalar\n",
@@ -6338,6 +6347,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
if (sanitize_check_bounds(env, insn, dst_reg) < 0)
return -EACCES;
+ if (sanitize_needed(opcode)) {
+ ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
+ &tmp_aux, true);
+ if (ret < 0)
+ return sanitize_err(env, insn, ret, off_reg, dst_reg);
+ }
return 0;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 7fedb63a8307dda0ec3b8969a3b233a1dd7ea8e0 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel(a)iogearbox.net>
Date: Wed, 24 Mar 2021 10:38:26 +0100
Subject: [PATCH] bpf: Tighten speculative pointer arithmetic mask
This work tightens the offset mask we use for unprivileged pointer arithmetic
in order to mitigate a corner case reported by Piotr and Benedict where in
the speculative domain it is possible to advance, for example, the map value
pointer by up to value_size-1 out-of-bounds in order to leak kernel memory
via side-channel to user space.
Before this change, the computed ptr_limit for retrieve_ptr_limit() helper
represents largest valid distance when moving pointer to the right or left
which is then fed as aux->alu_limit to generate masking instructions against
the offset register. After the change, the derived aux->alu_limit represents
the largest potential value of the offset register which we mask against which
is just a narrower subset of the former limit.
For minimal complexity, we call sanitize_ptr_alu() from 2 observation points
in adjust_ptr_min_max_vals(), that is, before and after the simulated alu
operation. In the first step, we retieve the alu_state and alu_limit before
the operation as well as we branch-off a verifier path and push it to the
verification stack as we did before which checks the dst_reg under truncation,
in other words, when the speculative domain would attempt to move the pointer
out-of-bounds.
In the second step, we retrieve the new alu_limit and calculate the absolute
distance between both. Moreover, we commit the alu_state and final alu_limit
via update_alu_sanitation_state() to the env's instruction aux data, and bail
out from there if there is a mismatch due to coming from different verification
paths with different states.
Reported-by: Piotr Krysiuk <piotras(a)gmail.com>
Reported-by: Benedict Schlueter <benedict.schlueter(a)rub.de>
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend(a)gmail.com>
Acked-by: Alexei Starovoitov <ast(a)kernel.org>
Tested-by: Benedict Schlueter <benedict.schlueter(a)rub.de>
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e41b6326e3e6..0399ac092b36 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5871,7 +5871,7 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
bool off_is_neg = off_reg->smin_value < 0;
bool mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
(opcode == BPF_SUB && !off_is_neg);
- u32 off, max = 0, ptr_limit = 0;
+ u32 max = 0, ptr_limit = 0;
if (!tnum_is_const(off_reg->var_off) &&
(off_reg->smin_value < 0) != (off_reg->smax_value < 0))
@@ -5880,26 +5880,18 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
switch (ptr_reg->type) {
case PTR_TO_STACK:
/* Offset 0 is out-of-bounds, but acceptable start for the
- * left direction, see BPF_REG_FP.
+ * left direction, see BPF_REG_FP. Also, unknown scalar
+ * offset where we would need to deal with min/max bounds is
+ * currently prohibited for unprivileged.
*/
max = MAX_BPF_STACK + mask_to_left;
- /* Indirect variable offset stack access is prohibited in
- * unprivileged mode so it's not handled here.
- */
- off = ptr_reg->off + ptr_reg->var_off.value;
- if (mask_to_left)
- ptr_limit = MAX_BPF_STACK + off;
- else
- ptr_limit = -off - 1;
+ ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
break;
case PTR_TO_MAP_VALUE:
max = ptr_reg->map_ptr->value_size;
- if (mask_to_left) {
- ptr_limit = ptr_reg->umax_value + ptr_reg->off;
- } else {
- off = ptr_reg->smin_value + ptr_reg->off;
- ptr_limit = ptr_reg->map_ptr->value_size - off - 1;
- }
+ ptr_limit = (mask_to_left ?
+ ptr_reg->smin_value :
+ ptr_reg->umax_value) + ptr_reg->off;
break;
default:
return REASON_TYPE;
@@ -5954,10 +5946,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
struct bpf_insn *insn,
const struct bpf_reg_state *ptr_reg,
const struct bpf_reg_state *off_reg,
- struct bpf_reg_state *dst_reg)
+ struct bpf_reg_state *dst_reg,
+ struct bpf_insn_aux_data *tmp_aux,
+ const bool commit_window)
{
+ struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux;
struct bpf_verifier_state *vstate = env->cur_state;
- struct bpf_insn_aux_data *aux = cur_aux(env);
bool off_is_neg = off_reg->smin_value < 0;
bool ptr_is_dst_reg = ptr_reg == dst_reg;
u8 opcode = BPF_OP(insn->code);
@@ -5976,18 +5970,33 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
if (vstate->speculative)
goto do_sim;
- alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
- alu_state |= ptr_is_dst_reg ?
- BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
-
err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode);
if (err < 0)
return err;
+ if (commit_window) {
+ /* In commit phase we narrow the masking window based on
+ * the observed pointer move after the simulated operation.
+ */
+ alu_state = tmp_aux->alu_state;
+ alu_limit = abs(tmp_aux->alu_limit - alu_limit);
+ } else {
+ alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
+ alu_state |= ptr_is_dst_reg ?
+ BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
+ }
+
err = update_alu_sanitation_state(aux, alu_state, alu_limit);
if (err < 0)
return err;
do_sim:
+ /* If we're in commit phase, we're done here given we already
+ * pushed the truncated dst_reg into the speculative verification
+ * stack.
+ */
+ if (commit_window)
+ return 0;
+
/* Simulate and find potential out-of-bounds access under
* speculative execution from truncation as a result of
* masking when off was not within expected range. If off
@@ -6130,6 +6139,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
+ struct bpf_insn_aux_data tmp_aux = {};
u8 opcode = BPF_OP(insn->code);
u32 dst = insn->dst_reg;
int ret;
@@ -6196,12 +6206,15 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
/* pointer types do not carry 32-bit bounds at the moment. */
__mark_reg32_unbounded(dst_reg);
- switch (opcode) {
- case BPF_ADD:
- ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg);
+ if (sanitize_needed(opcode)) {
+ ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
+ &tmp_aux, false);
if (ret < 0)
return sanitize_err(env, insn, ret, off_reg, dst_reg);
+ }
+ switch (opcode) {
+ case BPF_ADD:
/* We can take a fixed offset as long as it doesn't overflow
* the s32 'off' field
*/
@@ -6252,10 +6265,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
}
break;
case BPF_SUB:
- ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg);
- if (ret < 0)
- return sanitize_err(env, insn, ret, off_reg, dst_reg);
-
if (dst_reg == off_reg) {
/* scalar -= pointer. Creates an unknown scalar */
verbose(env, "R%d tried to subtract pointer from scalar\n",
@@ -6338,6 +6347,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
if (sanitize_check_bounds(env, insn, dst_reg) < 0)
return -EACCES;
+ if (sanitize_needed(opcode)) {
+ ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
+ &tmp_aux, true);
+ if (ret < 0)
+ return sanitize_err(env, insn, ret, off_reg, dst_reg);
+ }
return 0;
}
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 073815b756c51ba9d8384d924c5d1c03ca3d1ae4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel(a)iogearbox.net>
Date: Tue, 23 Mar 2021 15:05:48 +0100
Subject: [PATCH] bpf: Refactor and streamline bounds check into helper
Move the bounds check in adjust_ptr_min_max_vals() into a small helper named
sanitize_check_bounds() in order to simplify the former a bit.
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend(a)gmail.com>
Acked-by: Alexei Starovoitov <ast(a)kernel.org>
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f378d4ae405f..db77e2c670b9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6075,6 +6075,37 @@ static int check_stack_access_for_ptr_arithmetic(
return 0;
}
+static int sanitize_check_bounds(struct bpf_verifier_env *env,
+ const struct bpf_insn *insn,
+ const struct bpf_reg_state *dst_reg)
+{
+ u32 dst = insn->dst_reg;
+
+ /* For unprivileged we require that resulting offset must be in bounds
+ * in order to be able to sanitize access later on.
+ */
+ if (env->bypass_spec_v1)
+ return 0;
+
+ switch (dst_reg->type) {
+ case PTR_TO_STACK:
+ if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
+ dst_reg->off + dst_reg->var_off.value))
+ return -EACCES;
+ break;
+ case PTR_TO_MAP_VALUE:
+ if (check_map_access(env, dst, dst_reg->off, 1, false)) {
+ verbose(env, "R%d pointer arithmetic of map value goes out of range, "
+ "prohibited for !root\n", dst);
+ return -EACCES;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
* Caller should also handle BPF_MOV case separately.
@@ -6300,22 +6331,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
__reg_deduce_bounds(dst_reg);
__reg_bound_offset(dst_reg);
- /* For unprivileged we require that resulting offset must be in bounds
- * in order to be able to sanitize access later on.
- */
- if (!env->bypass_spec_v1) {
- if (dst_reg->type == PTR_TO_MAP_VALUE &&
- check_map_access(env, dst, dst_reg->off, 1, false)) {
- verbose(env, "R%d pointer arithmetic of map value goes out of range, "
- "prohibited for !root\n", dst);
- return -EACCES;
- } else if (dst_reg->type == PTR_TO_STACK &&
- check_stack_access_for_ptr_arithmetic(
- env, dst, dst_reg, dst_reg->off +
- dst_reg->var_off.value)) {
- return -EACCES;
- }
- }
+ if (sanitize_check_bounds(env, insn, dst_reg) < 0)
+ return -EACCES;
return 0;
}
The patch below does not apply to the 5.11-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 073815b756c51ba9d8384d924c5d1c03ca3d1ae4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel(a)iogearbox.net>
Date: Tue, 23 Mar 2021 15:05:48 +0100
Subject: [PATCH] bpf: Refactor and streamline bounds check into helper
Move the bounds check in adjust_ptr_min_max_vals() into a small helper named
sanitize_check_bounds() in order to simplify the former a bit.
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Reviewed-by: John Fastabend <john.fastabend(a)gmail.com>
Acked-by: Alexei Starovoitov <ast(a)kernel.org>
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f378d4ae405f..db77e2c670b9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6075,6 +6075,37 @@ static int check_stack_access_for_ptr_arithmetic(
return 0;
}
+static int sanitize_check_bounds(struct bpf_verifier_env *env,
+ const struct bpf_insn *insn,
+ const struct bpf_reg_state *dst_reg)
+{
+ u32 dst = insn->dst_reg;
+
+ /* For unprivileged we require that resulting offset must be in bounds
+ * in order to be able to sanitize access later on.
+ */
+ if (env->bypass_spec_v1)
+ return 0;
+
+ switch (dst_reg->type) {
+ case PTR_TO_STACK:
+ if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
+ dst_reg->off + dst_reg->var_off.value))
+ return -EACCES;
+ break;
+ case PTR_TO_MAP_VALUE:
+ if (check_map_access(env, dst, dst_reg->off, 1, false)) {
+ verbose(env, "R%d pointer arithmetic of map value goes out of range, "
+ "prohibited for !root\n", dst);
+ return -EACCES;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
* Caller should also handle BPF_MOV case separately.
@@ -6300,22 +6331,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
__reg_deduce_bounds(dst_reg);
__reg_bound_offset(dst_reg);
- /* For unprivileged we require that resulting offset must be in bounds
- * in order to be able to sanitize access later on.
- */
- if (!env->bypass_spec_v1) {
- if (dst_reg->type == PTR_TO_MAP_VALUE &&
- check_map_access(env, dst, dst_reg->off, 1, false)) {
- verbose(env, "R%d pointer arithmetic of map value goes out of range, "
- "prohibited for !root\n", dst);
- return -EACCES;
- } else if (dst_reg->type == PTR_TO_STACK &&
- check_stack_access_for_ptr_arithmetic(
- env, dst, dst_reg, dst_reg->off +
- dst_reg->var_off.value)) {
- return -EACCES;
- }
- }
+ if (sanitize_check_bounds(env, insn, dst_reg) < 0)
+ return -EACCES;
return 0;
}
The following commit has been merged into the x86/build branch of tip:
Commit-ID: 0ef3439cd80ba7770723edb0470d15815914bb62
Gitweb: https://git.kernel.org/tip/0ef3439cd80ba7770723edb0470d15815914bb62
Author: Maciej W. Rozycki <macro(a)orcam.me.uk>
AuthorDate: Wed, 14 Apr 2021 12:38:28 +02:00
Committer: Borislav Petkov <bp(a)suse.de>
CommitterDate: Mon, 19 Apr 2021 14:02:12 +02:00
x86/build: Disable HIGHMEM64G selection for M486SX
Fix a regression caused by making the 486SX separately selectable in
Kconfig, for which the HIGHMEM64G setting has not been updated and
therefore has become exposed as a user-selectable option for the M486SX
configuration setting unlike with original M486 and all the other
settings that choose non-PAE-enabled processors:
High Memory Support
> 1. off (NOHIGHMEM)
2. 4GB (HIGHMEM4G)
3. 64GB (HIGHMEM64G)
choice[1-3?]:
With the fix in place the setting is now correctly removed:
High Memory Support
> 1. off (NOHIGHMEM)
2. 4GB (HIGHMEM4G)
choice[1-2?]:
[ bp: Massage commit message. ]
Fixes: 87d6021b8143 ("x86/math-emu: Limit MATH_EMULATION to 486SX compatibles")
Signed-off-by: Maciej W. Rozycki <macro(a)orcam.me.uk>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Cc: stable(a)vger.kernel.org # v5.5+
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.2104141221340.44318@angie.orcam.m…
---
arch/x86/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2792879..268b7d5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1406,7 +1406,7 @@ config HIGHMEM4G
config HIGHMEM64G
bool "64GB"
- depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
+ depends on !M486SX && !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
select X86_PAE
help
Select this if you have a 32-bit processor and more than 4
commit 2decad92f4731fac9755a083fcfefa66edb7d67d upstream.
The entry from EL0 code checks the TFSRE0_EL1 register for any
asynchronous tag check faults in user space and sets the
TIF_MTE_ASYNC_FAULT flag. This is not done atomically, potentially
racing with another CPU calling set_tsk_thread_flag().
Replace the non-atomic ORR+STR with an STSET instruction. While STSET
requires ARMv8.1 and an assembler that understands LSE atomics, the MTE
feature is part of ARMv8.5 and already requires an updated assembler.
Signed-off-by: Catalin Marinas <catalin.marinas(a)arm.com>
Fixes: 637ec831ea4f ("arm64: mte: Handle synchronous and asynchronous tag check faults")
Cc: <stable(a)vger.kernel.org> # 5.10.x
Reported-by: Will Deacon <will(a)kernel.org>
Cc: Will Deacon <will(a)kernel.org>
Cc: Vincenzo Frascino <vincenzo.frascino(a)arm.com>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Link: https://lore.kernel.org/r/20210409173710.18582-1-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will(a)kernel.org>
---
arch/arm64/Kconfig | 6 +++++-
arch/arm64/kernel/entry.S | 10 ++++++----
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c1be64228327..5e5cf3af6351 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1390,10 +1390,13 @@ config ARM64_PAN
The feature is detected at runtime, and will remain as a 'nop'
instruction if the cpu does not implement the feature.
+config AS_HAS_LSE_ATOMICS
+ def_bool $(as-instr,.arch_extension lse)
+
config ARM64_LSE_ATOMICS
bool
default ARM64_USE_LSE_ATOMICS
- depends on $(as-instr,.arch_extension lse)
+ depends on AS_HAS_LSE_ATOMICS
config ARM64_USE_LSE_ATOMICS
bool "Atomic instructions"
@@ -1667,6 +1670,7 @@ config ARM64_MTE
bool "Memory Tagging Extension support"
default y
depends on ARM64_AS_HAS_MTE && ARM64_TAGGED_ADDR_ABI
+ depends on AS_HAS_LSE_ATOMICS
select ARCH_USES_HIGH_VMA_FLAGS
help
Memory Tagging (part of the ARMv8.5 Extensions) provides
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index d72c818b019c..2da82c139e1c 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -148,16 +148,18 @@ alternative_cb_end
.endm
/* Check for MTE asynchronous tag check faults */
- .macro check_mte_async_tcf, flgs, tmp
+ .macro check_mte_async_tcf, tmp, ti_flags
#ifdef CONFIG_ARM64_MTE
+ .arch_extension lse
alternative_if_not ARM64_MTE
b 1f
alternative_else_nop_endif
mrs_s \tmp, SYS_TFSRE0_EL1
tbz \tmp, #SYS_TFSR_EL1_TF0_SHIFT, 1f
/* Asynchronous TCF occurred for TTBR0 access, set the TI flag */
- orr \flgs, \flgs, #_TIF_MTE_ASYNC_FAULT
- str \flgs, [tsk, #TSK_TI_FLAGS]
+ mov \tmp, #_TIF_MTE_ASYNC_FAULT
+ add \ti_flags, tsk, #TSK_TI_FLAGS
+ stset \tmp, [\ti_flags]
msr_s SYS_TFSRE0_EL1, xzr
1:
#endif
@@ -207,7 +209,7 @@ alternative_else_nop_endif
disable_step_tsk x19, x20
/* Check for asynchronous tag check faults in user space */
- check_mte_async_tcf x19, x22
+ check_mte_async_tcf x22, x23
apply_ssbd 1, x22, x23
ptrauth_keys_install_kernel tsk, x20, x22, x23