The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e8684db191e4164f3f5f3ad7dec04a6734c25f1c Mon Sep 17 00:00:00 2001
From: Yuiko Oshino <yuiko.oshino(a)microchip.com>
Date: Wed, 27 Oct 2021 14:23:02 -0400
Subject: [PATCH] net: ethernet: microchip: lan743x: Fix skb allocation failure
The driver allocates skb during ndo_open with GFP_ATOMIC which has high chance of failure when there are multiple instances.
GFP_KERNEL is enough while open and use GFP_ATOMIC only from interrupt context.
Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver")
Signed-off-by: Yuiko Oshino <yuiko.oshino(a)microchip.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index c4f32c7e0db1..4d5a5d6595b3 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -1944,7 +1944,8 @@ static void lan743x_rx_update_tail(struct lan743x_rx *rx, int index)
index);
}
-static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index)
+static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index,
+ gfp_t gfp)
{
struct net_device *netdev = rx->adapter->netdev;
struct device *dev = &rx->adapter->pdev->dev;
@@ -1958,7 +1959,7 @@ static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index)
descriptor = &rx->ring_cpu_ptr[index];
buffer_info = &rx->buffer_info[index];
- skb = __netdev_alloc_skb(netdev, buffer_length, GFP_ATOMIC | GFP_DMA);
+ skb = __netdev_alloc_skb(netdev, buffer_length, gfp);
if (!skb)
return -ENOMEM;
dma_ptr = dma_map_single(dev, skb->data, buffer_length, DMA_FROM_DEVICE);
@@ -2120,7 +2121,8 @@ static int lan743x_rx_process_buffer(struct lan743x_rx *rx)
/* save existing skb, allocate new skb and map to dma */
skb = buffer_info->skb;
- if (lan743x_rx_init_ring_element(rx, rx->last_head)) {
+ if (lan743x_rx_init_ring_element(rx, rx->last_head,
+ GFP_ATOMIC | GFP_DMA)) {
/* failed to allocate next skb.
* Memory is very low.
* Drop this packet and reuse buffer.
@@ -2335,13 +2337,16 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx)
rx->last_head = 0;
for (index = 0; index < rx->ring_size; index++) {
- ret = lan743x_rx_init_ring_element(rx, index);
+ ret = lan743x_rx_init_ring_element(rx, index, GFP_KERNEL);
if (ret)
goto cleanup;
}
return 0;
cleanup:
+ netif_warn(rx->adapter, ifup, rx->adapter->netdev,
+ "Error allocating memory for LAN743x\n");
+
lan743x_rx_ring_cleanup(rx);
return ret;
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From e8684db191e4164f3f5f3ad7dec04a6734c25f1c Mon Sep 17 00:00:00 2001
From: Yuiko Oshino <yuiko.oshino(a)microchip.com>
Date: Wed, 27 Oct 2021 14:23:02 -0400
Subject: [PATCH] net: ethernet: microchip: lan743x: Fix skb allocation failure
The driver allocates skb during ndo_open with GFP_ATOMIC which has high chance of failure when there are multiple instances.
GFP_KERNEL is enough while open and use GFP_ATOMIC only from interrupt context.
Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver")
Signed-off-by: Yuiko Oshino <yuiko.oshino(a)microchip.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index c4f32c7e0db1..4d5a5d6595b3 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -1944,7 +1944,8 @@ static void lan743x_rx_update_tail(struct lan743x_rx *rx, int index)
index);
}
-static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index)
+static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index,
+ gfp_t gfp)
{
struct net_device *netdev = rx->adapter->netdev;
struct device *dev = &rx->adapter->pdev->dev;
@@ -1958,7 +1959,7 @@ static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index)
descriptor = &rx->ring_cpu_ptr[index];
buffer_info = &rx->buffer_info[index];
- skb = __netdev_alloc_skb(netdev, buffer_length, GFP_ATOMIC | GFP_DMA);
+ skb = __netdev_alloc_skb(netdev, buffer_length, gfp);
if (!skb)
return -ENOMEM;
dma_ptr = dma_map_single(dev, skb->data, buffer_length, DMA_FROM_DEVICE);
@@ -2120,7 +2121,8 @@ static int lan743x_rx_process_buffer(struct lan743x_rx *rx)
/* save existing skb, allocate new skb and map to dma */
skb = buffer_info->skb;
- if (lan743x_rx_init_ring_element(rx, rx->last_head)) {
+ if (lan743x_rx_init_ring_element(rx, rx->last_head,
+ GFP_ATOMIC | GFP_DMA)) {
/* failed to allocate next skb.
* Memory is very low.
* Drop this packet and reuse buffer.
@@ -2335,13 +2337,16 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx)
rx->last_head = 0;
for (index = 0; index < rx->ring_size; index++) {
- ret = lan743x_rx_init_ring_element(rx, index);
+ ret = lan743x_rx_init_ring_element(rx, index, GFP_KERNEL);
if (ret)
goto cleanup;
}
return 0;
cleanup:
+ netif_warn(rx->adapter, ifup, rx->adapter->netdev,
+ "Error allocating memory for LAN743x\n");
+
lan743x_rx_ring_cleanup(rx);
return ret;
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3bda2e5df476417b6d08967e2d84234a59d57b1c Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2(a)huawei.com>
Date: Wed, 27 Oct 2021 20:11:43 +0800
Subject: [PATCH] net: hns3: fix pause config problem after autoneg disabled
If a TP port is configured by follow steps:
1.ethtool -s ethx autoneg off speed 100 duplex full
2.ethtool -A ethx rx on tx on
3.ethtool -s ethx autoneg on(rx&tx negotiated pause results are off)
4.ethtool -s ethx autoneg off speed 100 duplex full
In step 3, driver will set rx&tx pause parameters of hardware to off as
pause parameters negotiated with link partner are off.
After step 4, the "ethtool -a ethx" command shows both rx and tx pause
parameters are on. However, pause parameters of hardware are still off
and port has no flow control function actually.
To fix this problem, if autoneg is disabled, driver uses its saved
parameters to restore pause of hardware. If the speed is not changed in
this case, there is no link state changed for phy, it will cause the pause
parameter is not taken effect, so we need to force phy to go down and up.
Fixes: aacbe27e82f0 ("net: hns3: modify how pause options is displayed")
Signed-off-by: Guangbin Huang <huangguangbin2(a)huawei.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index d701451596c8..da3a593f6a56 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -568,6 +568,7 @@ struct hnae3_ae_ops {
u32 *auto_neg, u32 *rx_en, u32 *tx_en);
int (*set_pauseparam)(struct hnae3_handle *handle,
u32 auto_neg, u32 rx_en, u32 tx_en);
+ int (*restore_pauseparam)(struct hnae3_handle *handle);
int (*set_autoneg)(struct hnae3_handle *handle, bool enable);
int (*get_autoneg)(struct hnae3_handle *handle);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 5ebd96f6833d..7d92dd273ed7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -824,6 +824,26 @@ static int hns3_check_ksettings_param(const struct net_device *netdev,
return 0;
}
+static int hns3_set_phy_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *cmd)
+{
+ struct hnae3_handle *handle = hns3_get_handle(netdev);
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ int ret;
+
+ if (cmd->base.speed == SPEED_1000 &&
+ cmd->base.autoneg == AUTONEG_DISABLE)
+ return -EINVAL;
+
+ if (cmd->base.autoneg == AUTONEG_DISABLE && ops->restore_pauseparam) {
+ ret = ops->restore_pauseparam(handle);
+ if (ret)
+ return ret;
+ }
+
+ return phy_ethtool_ksettings_set(netdev->phydev, cmd);
+}
+
static int hns3_set_link_ksettings(struct net_device *netdev,
const struct ethtool_link_ksettings *cmd)
{
@@ -842,16 +862,11 @@ static int hns3_set_link_ksettings(struct net_device *netdev,
cmd->base.autoneg, cmd->base.speed, cmd->base.duplex);
/* Only support ksettings_set for netdev with phy attached for now */
- if (netdev->phydev) {
- if (cmd->base.speed == SPEED_1000 &&
- cmd->base.autoneg == AUTONEG_DISABLE)
- return -EINVAL;
-
- return phy_ethtool_ksettings_set(netdev->phydev, cmd);
- } else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) &&
- ops->set_phy_link_ksettings) {
+ if (netdev->phydev)
+ return hns3_set_phy_link_ksettings(netdev, cmd);
+ else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) &&
+ ops->set_phy_link_ksettings)
return ops->set_phy_link_ksettings(handle, cmd);
- }
if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index dcd40cc73082..c6b9806c75a5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -11021,6 +11021,35 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
return -EOPNOTSUPP;
}
+static int hclge_restore_pauseparam(struct hnae3_handle *handle)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ u32 auto_neg, rx_pause, tx_pause;
+ int ret;
+
+ hclge_get_pauseparam(handle, &auto_neg, &rx_pause, &tx_pause);
+ /* when autoneg is disabled, the pause setting of phy has no effect
+ * unless the link goes down.
+ */
+ ret = phy_suspend(hdev->hw.mac.phydev);
+ if (ret)
+ return ret;
+
+ phy_set_asym_pause(hdev->hw.mac.phydev, rx_pause, tx_pause);
+
+ ret = phy_resume(hdev->hw.mac.phydev);
+ if (ret)
+ return ret;
+
+ ret = hclge_mac_pause_setup_hw(hdev);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "restore pauseparam error, ret = %d.\n", ret);
+
+ return ret;
+}
+
static void hclge_get_ksettings_an_result(struct hnae3_handle *handle,
u8 *auto_neg, u32 *speed, u8 *duplex)
{
@@ -12984,6 +13013,7 @@ static const struct hnae3_ae_ops hclge_ops = {
.halt_autoneg = hclge_halt_autoneg,
.get_pauseparam = hclge_get_pauseparam,
.set_pauseparam = hclge_set_pauseparam,
+ .restore_pauseparam = hclge_restore_pauseparam,
.set_mtu = hclge_set_mtu,
.reset_queue = hclge_reset_tqp,
.get_stats = hclge_get_stats,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 95074e91a846..124791e4bfee 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -1435,7 +1435,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc)
return 0;
}
-static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
+int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
{
bool tx_en, rx_en;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 2ee9b795f71d..4b2c3a788980 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -244,6 +244,7 @@ int hclge_tm_get_pri_weight(struct hclge_dev *hdev, u8 pri_id, u8 *weight);
int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id,
enum hclge_opcode_type cmd,
struct hclge_tm_shaper_para *para);
+int hclge_mac_pause_setup_hw(struct hclge_dev *hdev);
int hclge_tm_get_q_to_qs_map(struct hclge_dev *hdev, u16 q_id, u16 *qset_id);
int hclge_tm_get_q_to_tc(struct hclge_dev *hdev, u16 q_id, u8 *tc_id);
int hclge_tm_get_pg_to_pri_map(struct hclge_dev *hdev, u8 pg_id,
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 3bda2e5df476417b6d08967e2d84234a59d57b1c Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2(a)huawei.com>
Date: Wed, 27 Oct 2021 20:11:43 +0800
Subject: [PATCH] net: hns3: fix pause config problem after autoneg disabled
If a TP port is configured by follow steps:
1.ethtool -s ethx autoneg off speed 100 duplex full
2.ethtool -A ethx rx on tx on
3.ethtool -s ethx autoneg on(rx&tx negotiated pause results are off)
4.ethtool -s ethx autoneg off speed 100 duplex full
In step 3, driver will set rx&tx pause parameters of hardware to off as
pause parameters negotiated with link partner are off.
After step 4, the "ethtool -a ethx" command shows both rx and tx pause
parameters are on. However, pause parameters of hardware are still off
and port has no flow control function actually.
To fix this problem, if autoneg is disabled, driver uses its saved
parameters to restore pause of hardware. If the speed is not changed in
this case, there is no link state changed for phy, it will cause the pause
parameter is not taken effect, so we need to force phy to go down and up.
Fixes: aacbe27e82f0 ("net: hns3: modify how pause options is displayed")
Signed-off-by: Guangbin Huang <huangguangbin2(a)huawei.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index d701451596c8..da3a593f6a56 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -568,6 +568,7 @@ struct hnae3_ae_ops {
u32 *auto_neg, u32 *rx_en, u32 *tx_en);
int (*set_pauseparam)(struct hnae3_handle *handle,
u32 auto_neg, u32 rx_en, u32 tx_en);
+ int (*restore_pauseparam)(struct hnae3_handle *handle);
int (*set_autoneg)(struct hnae3_handle *handle, bool enable);
int (*get_autoneg)(struct hnae3_handle *handle);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 5ebd96f6833d..7d92dd273ed7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -824,6 +824,26 @@ static int hns3_check_ksettings_param(const struct net_device *netdev,
return 0;
}
+static int hns3_set_phy_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *cmd)
+{
+ struct hnae3_handle *handle = hns3_get_handle(netdev);
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+ int ret;
+
+ if (cmd->base.speed == SPEED_1000 &&
+ cmd->base.autoneg == AUTONEG_DISABLE)
+ return -EINVAL;
+
+ if (cmd->base.autoneg == AUTONEG_DISABLE && ops->restore_pauseparam) {
+ ret = ops->restore_pauseparam(handle);
+ if (ret)
+ return ret;
+ }
+
+ return phy_ethtool_ksettings_set(netdev->phydev, cmd);
+}
+
static int hns3_set_link_ksettings(struct net_device *netdev,
const struct ethtool_link_ksettings *cmd)
{
@@ -842,16 +862,11 @@ static int hns3_set_link_ksettings(struct net_device *netdev,
cmd->base.autoneg, cmd->base.speed, cmd->base.duplex);
/* Only support ksettings_set for netdev with phy attached for now */
- if (netdev->phydev) {
- if (cmd->base.speed == SPEED_1000 &&
- cmd->base.autoneg == AUTONEG_DISABLE)
- return -EINVAL;
-
- return phy_ethtool_ksettings_set(netdev->phydev, cmd);
- } else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) &&
- ops->set_phy_link_ksettings) {
+ if (netdev->phydev)
+ return hns3_set_phy_link_ksettings(netdev, cmd);
+ else if (test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps) &&
+ ops->set_phy_link_ksettings)
return ops->set_phy_link_ksettings(handle, cmd);
- }
if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2)
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index dcd40cc73082..c6b9806c75a5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -11021,6 +11021,35 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg,
return -EOPNOTSUPP;
}
+static int hclge_restore_pauseparam(struct hnae3_handle *handle)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ u32 auto_neg, rx_pause, tx_pause;
+ int ret;
+
+ hclge_get_pauseparam(handle, &auto_neg, &rx_pause, &tx_pause);
+ /* when autoneg is disabled, the pause setting of phy has no effect
+ * unless the link goes down.
+ */
+ ret = phy_suspend(hdev->hw.mac.phydev);
+ if (ret)
+ return ret;
+
+ phy_set_asym_pause(hdev->hw.mac.phydev, rx_pause, tx_pause);
+
+ ret = phy_resume(hdev->hw.mac.phydev);
+ if (ret)
+ return ret;
+
+ ret = hclge_mac_pause_setup_hw(hdev);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "restore pauseparam error, ret = %d.\n", ret);
+
+ return ret;
+}
+
static void hclge_get_ksettings_an_result(struct hnae3_handle *handle,
u8 *auto_neg, u32 *speed, u8 *duplex)
{
@@ -12984,6 +13013,7 @@ static const struct hnae3_ae_ops hclge_ops = {
.halt_autoneg = hclge_halt_autoneg,
.get_pauseparam = hclge_get_pauseparam,
.set_pauseparam = hclge_set_pauseparam,
+ .restore_pauseparam = hclge_restore_pauseparam,
.set_mtu = hclge_set_mtu,
.reset_queue = hclge_reset_tqp,
.get_stats = hclge_get_stats,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 95074e91a846..124791e4bfee 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -1435,7 +1435,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc)
return 0;
}
-static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
+int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
{
bool tx_en, rx_en;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 2ee9b795f71d..4b2c3a788980 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -244,6 +244,7 @@ int hclge_tm_get_pri_weight(struct hclge_dev *hdev, u8 pri_id, u8 *weight);
int hclge_tm_get_pri_shaper(struct hclge_dev *hdev, u8 pri_id,
enum hclge_opcode_type cmd,
struct hclge_tm_shaper_para *para);
+int hclge_mac_pause_setup_hw(struct hclge_dev *hdev);
int hclge_tm_get_q_to_qs_map(struct hclge_dev *hdev, u16 q_id, u16 *qset_id);
int hclge_tm_get_q_to_tc(struct hclge_dev *hdev, u16 q_id, u8 *tc_id);
int hclge_tm_get_pg_to_pri_map(struct hclge_dev *hdev, u8 pg_id,
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From ace19b992436a257d9a793672e57abc28fe83e2e Mon Sep 17 00:00:00 2001
From: Trevor Woerner <twoerner(a)gmail.com>
Date: Sun, 24 Oct 2021 13:50:02 -0400
Subject: [PATCH] net: nxp: lpc_eth.c: avoid hang when bringing interface down
A hard hang is observed whenever the ethernet interface is brought
down. If the PHY is stopped before the LPC core block is reset,
the SoC will hang. Comparing lpc_eth_close() and lpc_eth_open() I
re-arranged the ordering of the functions calls in lpc_eth_close() to
reset the hardware before stopping the PHY.
Fixes: b7370112f519 ("lpc32xx: Added ethernet driver")
Signed-off-by: Trevor Woerner <twoerner(a)gmail.com>
Acked-by: Vladimir Zapolskiy <vz(a)mleia.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index d29fe562b3de..c910fa2f40a4 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1015,9 +1015,6 @@ static int lpc_eth_close(struct net_device *ndev)
napi_disable(&pldat->napi);
netif_stop_queue(ndev);
- if (ndev->phydev)
- phy_stop(ndev->phydev);
-
spin_lock_irqsave(&pldat->lock, flags);
__lpc_eth_reset(pldat);
netif_carrier_off(ndev);
@@ -1025,6 +1022,8 @@ static int lpc_eth_close(struct net_device *ndev)
writel(0, LPC_ENET_MAC2(pldat->net_base));
spin_unlock_irqrestore(&pldat->lock, flags);
+ if (ndev->phydev)
+ phy_stop(ndev->phydev);
clk_disable_unprepare(pldat->clk);
return 0;
The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 759635760a804b0d8ad0cc677b650f1544cae22f Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch(a)nvidia.com>
Date: Sun, 24 Oct 2021 09:40:14 +0300
Subject: [PATCH] mlxsw: pci: Recycle received packet upon allocation failure
When the driver fails to allocate a new Rx buffer, it passes an empty Rx
descriptor (contains zero address and size) to the device and marks it
as invalid by setting the skb pointer in the descriptor's metadata to
NULL.
After processing enough Rx descriptors, the driver will try to process
the invalid descriptor, but will return immediately seeing that the skb
pointer is NULL. Since the driver no longer passes new Rx descriptors to
the device, the Rx queue will eventually become full and the device will
start to drop packets.
Fix this by recycling the received packet if allocation of the new
packet failed. This means that allocation is no longer performed at the
end of the Rx routine, but at the start, before tearing down the DMA
mapping of the received packet.
Remove the comment about the descriptor being zeroed as it is no longer
correct. This is OK because we either use the descriptor as-is (when
recycling) or overwrite its address and size fields with that of the
newly allocated Rx buffer.
The issue was discovered when a process ("perf") consumed too much
memory and put the system under memory pressure. It can be reproduced by
injecting slab allocation failures [1]. After the fix, the Rx queue no
longer comes to a halt.
[1]
# echo 10 > /sys/kernel/debug/failslab/times
# echo 1000 > /sys/kernel/debug/failslab/interval
# echo 100 > /sys/kernel/debug/failslab/probability
FAULT_INJECTION: forcing a failure.
name failslab, interval 1000, probability 100, space 0, times 8
[...]
Call Trace:
<IRQ>
dump_stack_lvl+0x34/0x44
should_fail.cold+0x32/0x37
should_failslab+0x5/0x10
kmem_cache_alloc_node+0x23/0x190
__alloc_skb+0x1f9/0x280
__netdev_alloc_skb+0x3a/0x150
mlxsw_pci_rdq_skb_alloc+0x24/0x90
mlxsw_pci_cq_tasklet+0x3dc/0x1200
tasklet_action_common.constprop.0+0x9f/0x100
__do_softirq+0xb5/0x252
irq_exit_rcu+0x7a/0xa0
common_interrupt+0x83/0xa0
</IRQ>
asm_common_interrupt+0x1e/0x40
RIP: 0010:cpuidle_enter_state+0xc8/0x340
[...]
mlxsw_spectrum2 0000:06:00.0: Failed to alloc skb for RDQ
Fixes: eda6500a987a ("mlxsw: Add PCI bus implementation")
Signed-off-by: Ido Schimmel <idosch(a)nvidia.com>
Reviewed-by: Petr Machata <petrm(a)nvidia.com>
Link: https://lore.kernel.org/r/20211024064014.1060919-1-idosch@idosch.org
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 13b0259f7ea6..fcace73eae40 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -353,13 +353,10 @@ static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci,
struct sk_buff *skb;
int err;
- elem_info->u.rdq.skb = NULL;
skb = netdev_alloc_skb_ip_align(NULL, buf_len);
if (!skb)
return -ENOMEM;
- /* Assume that wqe was previously zeroed. */
-
err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data,
buf_len, DMA_FROM_DEVICE);
if (err)
@@ -597,21 +594,26 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
struct pci_dev *pdev = mlxsw_pci->pdev;
struct mlxsw_pci_queue_elem_info *elem_info;
struct mlxsw_rx_info rx_info = {};
- char *wqe;
+ char wqe[MLXSW_PCI_WQE_SIZE];
struct sk_buff *skb;
u16 byte_count;
int err;
elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
- skb = elem_info->u.sdq.skb;
- if (!skb)
- return;
- wqe = elem_info->elem;
- mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
+ skb = elem_info->u.rdq.skb;
+ memcpy(wqe, elem_info->elem, MLXSW_PCI_WQE_SIZE);
if (q->consumer_counter++ != consumer_counter_limit)
dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n");
+ err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info);
+ if (err) {
+ dev_err_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n");
+ goto out;
+ }
+
+ mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
+
if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) {
rx_info.is_lag = true;
rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe_v, cqe);
@@ -647,10 +649,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
skb_put(skb, byte_count);
mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);
- memset(wqe, 0, q->elem_size);
- err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info);
- if (err)
- dev_dbg_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n");
+out:
/* Everything is set up, ring doorbell to pass elem to HW */
q->producer_counter++;
mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
The patch below does not apply to the 4.9-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 759635760a804b0d8ad0cc677b650f1544cae22f Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch(a)nvidia.com>
Date: Sun, 24 Oct 2021 09:40:14 +0300
Subject: [PATCH] mlxsw: pci: Recycle received packet upon allocation failure
When the driver fails to allocate a new Rx buffer, it passes an empty Rx
descriptor (contains zero address and size) to the device and marks it
as invalid by setting the skb pointer in the descriptor's metadata to
NULL.
After processing enough Rx descriptors, the driver will try to process
the invalid descriptor, but will return immediately seeing that the skb
pointer is NULL. Since the driver no longer passes new Rx descriptors to
the device, the Rx queue will eventually become full and the device will
start to drop packets.
Fix this by recycling the received packet if allocation of the new
packet failed. This means that allocation is no longer performed at the
end of the Rx routine, but at the start, before tearing down the DMA
mapping of the received packet.
Remove the comment about the descriptor being zeroed as it is no longer
correct. This is OK because we either use the descriptor as-is (when
recycling) or overwrite its address and size fields with that of the
newly allocated Rx buffer.
The issue was discovered when a process ("perf") consumed too much
memory and put the system under memory pressure. It can be reproduced by
injecting slab allocation failures [1]. After the fix, the Rx queue no
longer comes to a halt.
[1]
# echo 10 > /sys/kernel/debug/failslab/times
# echo 1000 > /sys/kernel/debug/failslab/interval
# echo 100 > /sys/kernel/debug/failslab/probability
FAULT_INJECTION: forcing a failure.
name failslab, interval 1000, probability 100, space 0, times 8
[...]
Call Trace:
<IRQ>
dump_stack_lvl+0x34/0x44
should_fail.cold+0x32/0x37
should_failslab+0x5/0x10
kmem_cache_alloc_node+0x23/0x190
__alloc_skb+0x1f9/0x280
__netdev_alloc_skb+0x3a/0x150
mlxsw_pci_rdq_skb_alloc+0x24/0x90
mlxsw_pci_cq_tasklet+0x3dc/0x1200
tasklet_action_common.constprop.0+0x9f/0x100
__do_softirq+0xb5/0x252
irq_exit_rcu+0x7a/0xa0
common_interrupt+0x83/0xa0
</IRQ>
asm_common_interrupt+0x1e/0x40
RIP: 0010:cpuidle_enter_state+0xc8/0x340
[...]
mlxsw_spectrum2 0000:06:00.0: Failed to alloc skb for RDQ
Fixes: eda6500a987a ("mlxsw: Add PCI bus implementation")
Signed-off-by: Ido Schimmel <idosch(a)nvidia.com>
Reviewed-by: Petr Machata <petrm(a)nvidia.com>
Link: https://lore.kernel.org/r/20211024064014.1060919-1-idosch@idosch.org
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 13b0259f7ea6..fcace73eae40 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -353,13 +353,10 @@ static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci,
struct sk_buff *skb;
int err;
- elem_info->u.rdq.skb = NULL;
skb = netdev_alloc_skb_ip_align(NULL, buf_len);
if (!skb)
return -ENOMEM;
- /* Assume that wqe was previously zeroed. */
-
err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data,
buf_len, DMA_FROM_DEVICE);
if (err)
@@ -597,21 +594,26 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
struct pci_dev *pdev = mlxsw_pci->pdev;
struct mlxsw_pci_queue_elem_info *elem_info;
struct mlxsw_rx_info rx_info = {};
- char *wqe;
+ char wqe[MLXSW_PCI_WQE_SIZE];
struct sk_buff *skb;
u16 byte_count;
int err;
elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
- skb = elem_info->u.sdq.skb;
- if (!skb)
- return;
- wqe = elem_info->elem;
- mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
+ skb = elem_info->u.rdq.skb;
+ memcpy(wqe, elem_info->elem, MLXSW_PCI_WQE_SIZE);
if (q->consumer_counter++ != consumer_counter_limit)
dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n");
+ err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info);
+ if (err) {
+ dev_err_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n");
+ goto out;
+ }
+
+ mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
+
if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) {
rx_info.is_lag = true;
rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe_v, cqe);
@@ -647,10 +649,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
skb_put(skb, byte_count);
mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);
- memset(wqe, 0, q->elem_size);
- err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info);
- if (err)
- dev_dbg_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n");
+out:
/* Everything is set up, ring doorbell to pass elem to HW */
q->producer_counter++;
mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
>From 759635760a804b0d8ad0cc677b650f1544cae22f Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch(a)nvidia.com>
Date: Sun, 24 Oct 2021 09:40:14 +0300
Subject: [PATCH] mlxsw: pci: Recycle received packet upon allocation failure
When the driver fails to allocate a new Rx buffer, it passes an empty Rx
descriptor (contains zero address and size) to the device and marks it
as invalid by setting the skb pointer in the descriptor's metadata to
NULL.
After processing enough Rx descriptors, the driver will try to process
the invalid descriptor, but will return immediately seeing that the skb
pointer is NULL. Since the driver no longer passes new Rx descriptors to
the device, the Rx queue will eventually become full and the device will
start to drop packets.
Fix this by recycling the received packet if allocation of the new
packet failed. This means that allocation is no longer performed at the
end of the Rx routine, but at the start, before tearing down the DMA
mapping of the received packet.
Remove the comment about the descriptor being zeroed as it is no longer
correct. This is OK because we either use the descriptor as-is (when
recycling) or overwrite its address and size fields with that of the
newly allocated Rx buffer.
The issue was discovered when a process ("perf") consumed too much
memory and put the system under memory pressure. It can be reproduced by
injecting slab allocation failures [1]. After the fix, the Rx queue no
longer comes to a halt.
[1]
# echo 10 > /sys/kernel/debug/failslab/times
# echo 1000 > /sys/kernel/debug/failslab/interval
# echo 100 > /sys/kernel/debug/failslab/probability
FAULT_INJECTION: forcing a failure.
name failslab, interval 1000, probability 100, space 0, times 8
[...]
Call Trace:
<IRQ>
dump_stack_lvl+0x34/0x44
should_fail.cold+0x32/0x37
should_failslab+0x5/0x10
kmem_cache_alloc_node+0x23/0x190
__alloc_skb+0x1f9/0x280
__netdev_alloc_skb+0x3a/0x150
mlxsw_pci_rdq_skb_alloc+0x24/0x90
mlxsw_pci_cq_tasklet+0x3dc/0x1200
tasklet_action_common.constprop.0+0x9f/0x100
__do_softirq+0xb5/0x252
irq_exit_rcu+0x7a/0xa0
common_interrupt+0x83/0xa0
</IRQ>
asm_common_interrupt+0x1e/0x40
RIP: 0010:cpuidle_enter_state+0xc8/0x340
[...]
mlxsw_spectrum2 0000:06:00.0: Failed to alloc skb for RDQ
Fixes: eda6500a987a ("mlxsw: Add PCI bus implementation")
Signed-off-by: Ido Schimmel <idosch(a)nvidia.com>
Reviewed-by: Petr Machata <petrm(a)nvidia.com>
Link: https://lore.kernel.org/r/20211024064014.1060919-1-idosch@idosch.org
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 13b0259f7ea6..fcace73eae40 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -353,13 +353,10 @@ static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci,
struct sk_buff *skb;
int err;
- elem_info->u.rdq.skb = NULL;
skb = netdev_alloc_skb_ip_align(NULL, buf_len);
if (!skb)
return -ENOMEM;
- /* Assume that wqe was previously zeroed. */
-
err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data,
buf_len, DMA_FROM_DEVICE);
if (err)
@@ -597,21 +594,26 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
struct pci_dev *pdev = mlxsw_pci->pdev;
struct mlxsw_pci_queue_elem_info *elem_info;
struct mlxsw_rx_info rx_info = {};
- char *wqe;
+ char wqe[MLXSW_PCI_WQE_SIZE];
struct sk_buff *skb;
u16 byte_count;
int err;
elem_info = mlxsw_pci_queue_elem_info_consumer_get(q);
- skb = elem_info->u.sdq.skb;
- if (!skb)
- return;
- wqe = elem_info->elem;
- mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
+ skb = elem_info->u.rdq.skb;
+ memcpy(wqe, elem_info->elem, MLXSW_PCI_WQE_SIZE);
if (q->consumer_counter++ != consumer_counter_limit)
dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n");
+ err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info);
+ if (err) {
+ dev_err_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n");
+ goto out;
+ }
+
+ mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE);
+
if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) {
rx_info.is_lag = true;
rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe_v, cqe);
@@ -647,10 +649,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci,
skb_put(skb, byte_count);
mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info);
- memset(wqe, 0, q->elem_size);
- err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info);
- if (err)
- dev_dbg_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n");
+out:
/* Everything is set up, ring doorbell to pass elem to HW */
q->producer_counter++;
mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q);