Hi Greg,
This series includes some upstream patches aimed to fix multiple checksum issues with mlx5 driver in 4.19-stable kernels.
Since the patches didn't apply cleanly to 4.19 back when they were submitted for the first time around 5.1 kernel release to the netdev mailing list, i couldn't mark them for -stable 4.19, so now as the issue is being reported on 4.19 LTS kernels, I had to do the backporting and this submission myself.
This series required some dependency patches and some manual touches to apply some of them.
Please apply to 4.19-stable and let me know if there's any problem. I tested and the patches apply cleanly and work on top of: v4.19.75
Thanks, Saeed.
---
Alaa Hleihel (1): net/mlx5e: don't set CHECKSUM_COMPLETE on SCTP packets
Cong Wang (1): mlx5: fix get_ip_proto()
Natali Shechtman (1): net/mlx5e: Set ECN for received packets using CQE indication
Or Gerlitz (1): net/mlx5e: Allow reporting of checksum unnecessary
Saeed Mahameed (3): net/mlx5e: XDP, Avoid checksum complete when XDP prog is loaded net/mlx5e: Rx, Fixup skb checksum for packets with tail padding net/mlx5e: Rx, Check ip headers sanity
drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 + .../ethernet/mellanox/mlx5/core/en_ethtool.c | 28 ++++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 8 ++ .../net/ethernet/mellanox/mlx5/core/en_rx.c | 126 +++++++++++++++--- .../ethernet/mellanox/mlx5/core/en_stats.c | 9 ++ .../ethernet/mellanox/mlx5/core/en_stats.h | 6 + 6 files changed, 165 insertions(+), 15 deletions(-)
From: Natali Shechtman natali@mellanox.com
[ Upstream commit f007c13d4ad62f494c83897eda96437005df4a91 ]
In multi-host (MH) NIC scheme, a single HW port serves multiple hosts or sockets on the same host. The HW uses a mechanism in the PCIe buffer which monitors the amount of consumed PCIe buffers per host. On a certain configuration, under congestion, the HW emulates a switch doing ECN marking on packets using ECN indication on the completion descriptor (CQE).
The driver needs to set the ECN bits on the packet SKB, such that the network stack can react on that, this commit does that.
Needed by downstream patch which fixes a mlx5 checksum issue.
Fixes: bbceefce9adf ("net/mlx5e: Support RX CHECKSUM_COMPLETE") Signed-off-by: Natali Shechtman natali@mellanox.com Reviewed-by: Tariq Toukan tariqt@mellanox.com Signed-off-by: Saeed Mahameed saeedm@mellanox.com --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 35 ++++++++++++++++--- .../ethernet/mellanox/mlx5/core/en_stats.c | 3 ++ .../ethernet/mellanox/mlx5/core/en_stats.h | 2 ++ 3 files changed, 35 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index d3f794d4fb96..2a37f5f8a290 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -37,6 +37,7 @@ #include <net/busy_poll.h> #include <net/ip6_checksum.h> #include <net/page_pool.h> +#include <net/inet_ecn.h> #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -688,12 +689,29 @@ static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht); }
-static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth) +static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth, + __be16 *proto) { - __be16 ethertype = ((struct ethhdr *)skb->data)->h_proto; + *proto = ((struct ethhdr *)skb->data)->h_proto; + *proto = __vlan_get_protocol(skb, *proto, network_depth); + return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6)); +} + +static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) +{ + int network_depth = 0; + __be16 proto; + void *ip; + int rc;
- ethertype = __vlan_get_protocol(skb, ethertype, network_depth); - return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6)); + if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto))) + return; + + ip = skb->data + network_depth; + rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) : + IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip)); + + rq->stats->ecn_mark += !!rc; }
static u32 mlx5e_get_fcs(const struct sk_buff *skb) @@ -717,6 +735,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, { struct mlx5e_rq_stats *stats = rq->stats; int network_depth = 0; + __be16 proto;
if (unlikely(!(netdev->features & NETIF_F_RXCSUM))) goto csum_none; @@ -738,7 +757,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (short_frame(skb->len)) goto csum_unnecessary;
- if (likely(is_last_ethertype_ip(skb, &network_depth))) { + if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); if (network_depth > ETH_HLEN) @@ -775,6 +794,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, stats->csum_none++; }
+#define MLX5E_CE_BIT_MASK 0x80 + static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, u32 cqe_bcnt, struct mlx5e_rq *rq, @@ -819,6 +840,10 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); + /* checking CE bit in cqe - MSB in ml_path field */ + if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK)) + mlx5e_enable_ecn(rq, skb); + skb->protocol = eth_type_trans(skb, netdev); }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 7047cc293545..493bd2752037 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -53,6 +53,7 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, @@ -144,6 +145,7 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_bytes += rq_stats->bytes; s->rx_lro_packets += rq_stats->lro_packets; s->rx_lro_bytes += rq_stats->lro_bytes; + s->rx_ecn_mark += rq_stats->ecn_mark; s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; @@ -1144,6 +1146,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 0ad7a165443a..13f9028c638d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -66,6 +66,7 @@ struct mlx5e_sw_stats { u64 tx_nop; u64 rx_lro_packets; u64 rx_lro_bytes; + u64 rx_ecn_mark; u64 rx_removed_vlan_packets; u64 rx_csum_unnecessary; u64 rx_csum_none; @@ -184,6 +185,7 @@ struct mlx5e_rq_stats { u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 ecn_mark; u64 removed_vlan_packets; u64 xdp_drop; u64 xdp_redirect;
From: Alaa Hleihel alaa@mellanox.com
[ Upstream commit fe1dc069990c1f290ef6b99adb46332c03258f38 ]
CHECKSUM_COMPLETE is not applicable to SCTP protocol. Setting it for SCTP packets leads to CRC32c validation failure.
Fixes: bbceefce9adf ("net/mlx5e: Support RX CHECKSUM_COMPLETE") Signed-off-by: Alaa Hleihel alaa@mellanox.com Reviewed-by: Or Gerlitz ogerlitz@mellanox.com Signed-off-by: Saeed Mahameed saeedm@mellanox.com --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 2a37f5f8a290..61eab0c55fca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -725,6 +725,14 @@ static u32 mlx5e_get_fcs(const struct sk_buff *skb) return __get_unaligned_cpu32(fcs_bytes); }
+static u8 get_ip_proto(struct sk_buff *skb, __be16 proto) +{ + void *ip_p = skb->data + sizeof(struct ethhdr); + + return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol : + ((struct ipv6hdr *)ip_p)->nexthdr; +} + #define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
static inline void mlx5e_handle_csum(struct net_device *netdev, @@ -758,6 +766,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, goto csum_unnecessary;
if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { + if (unlikely(get_ip_proto(skb, proto) == IPPROTO_SCTP)) + goto csum_unnecessary; + skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); if (network_depth > ETH_HLEN)
From: Cong Wang xiyou.wangcong@gmail.com
[ Upstream commit ef6fcd455278c2be3032a346cc66d9dd9866b787 ]
IP header is not necessarily located right after struct ethhdr, there could be multiple 802.1Q headers in between, this is why we call __vlan_get_protocol().
Fixes: fe1dc069990c ("net/mlx5e: don't set CHECKSUM_COMPLETE on SCTP packets") Cc: Alaa Hleihel alaa@mellanox.com Cc: Or Gerlitz ogerlitz@mellanox.com Cc: Saeed Mahameed saeedm@mellanox.com Signed-off-by: Cong Wang xiyou.wangcong@gmail.com Reviewed-by: Tariq Toukan tariqt@mellanox.com Acked-by: Saeed Mahameed saeedm@mellanox.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Saeed Mahameed saeedm@mellanox.com --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 61eab0c55fca..8323534f075a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -725,9 +725,9 @@ static u32 mlx5e_get_fcs(const struct sk_buff *skb) return __get_unaligned_cpu32(fcs_bytes); }
-static u8 get_ip_proto(struct sk_buff *skb, __be16 proto) +static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) { - void *ip_p = skb->data + sizeof(struct ethhdr); + void *ip_p = skb->data + network_depth;
return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol : ((struct ipv6hdr *)ip_p)->nexthdr; @@ -766,7 +766,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, goto csum_unnecessary;
if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { - if (unlikely(get_ip_proto(skb, proto) == IPPROTO_SCTP)) + if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP)) goto csum_unnecessary;
skb->ip_summed = CHECKSUM_COMPLETE;
From: Or Gerlitz ogerlitz@mellanox.com
[ Upstream commit b856df28f9230a47669efbdd57896084caadb2b3 ]
Currently we practically never report checksum unnecessary, because for all IP packets we take the checksum complete path.
Enable non-default runs with reprorting checksum unnecessary, using an ethtool private flag. This can be useful for performance evals and other explorations.
Required by downstream patch which fixes XDP checksum.
Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support") Signed-off-by: Or Gerlitz ogerlitz@mellanox.com Reviewed-by: Tariq Toukan tariqt@mellanox.com Signed-off-by: Saeed Mahameed saeedm@mellanox.com --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +++ .../ethernet/mellanox/mlx5/core/en_ethtool.c | 27 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 4 +++ .../net/ethernet/mellanox/mlx5/core/en_rx.c | 3 +++ 4 files changed, 37 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index da52e60d4437..d79e177f8990 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -210,6 +210,7 @@ static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { "tx_cqe_moder", "rx_cqe_compress", "rx_striding_rq", + "rx_no_csum_complete", };
enum mlx5e_priv_flag { @@ -217,6 +218,7 @@ enum mlx5e_priv_flag { MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1), MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2), MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3), + MLX5E_PFLAG_RX_NO_CSUM_COMPLETE = (1 << 4), };
#define MLX5E_SET_PFLAG(params, pflag, enable) \ @@ -298,6 +300,7 @@ struct mlx5e_dcbx_dp { enum { MLX5E_RQ_STATE_ENABLED, MLX5E_RQ_STATE_AM, + MLX5E_RQ_STATE_NO_CSUM_COMPLETE, };
struct mlx5e_cq { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 2b9350f4c752..cb79aaea1a69 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1510,6 +1510,27 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) return 0; }
+static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_channels *channels = &priv->channels; + struct mlx5e_channel *c; + int i; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + for (i = 0; i < channels->num; i++) { + c = channels->c[i]; + if (enable) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + else + __clear_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + } + + return 0; +} + static int mlx5e_handle_pflag(struct net_device *netdev, u32 wanted_flags, enum mlx5e_priv_flag flag, @@ -1561,6 +1582,12 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) err = mlx5e_handle_pflag(netdev, pflags, MLX5E_PFLAG_RX_STRIDING_RQ, set_pflag_rx_striding_rq); + if (err) + goto out; + + err = mlx5e_handle_pflag(netdev, pflags, + MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, + set_pflag_rx_no_csum_complete);
out: mutex_unlock(&priv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 83ab2c0e6b61..5e98b31620c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -934,6 +934,9 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (params->rx_dim_enabled) __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
+ if (params->pflags & MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + return 0;
err_destroy_rq: @@ -4533,6 +4536,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, params->rx_cqe_compress_def = slow_pci_heuristic(mdev);
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false);
/* RQ */ /* Prefer Striding RQ, unless any of the following holds: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 8323534f075a..4851fc575185 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -754,6 +754,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; }
+ if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))) + goto csum_unnecessary; + /* CQE csum doesn't cover padding octets in short ethernet * frames. And the pad field is appended prior to calculating * and appending the FCS field.
[ Upstream commit 5d0bb3bac4b9f6c22280b04545626fdfd99edc6b ]
XDP programs might change packets data contents which will make the reported skb checksum (checksum complete) invalid.
When XDP programs are loaded/unloaded set/clear rx RQs MLX5E_RQ_STATE_NO_CSUM_COMPLETE flag.
Fixes: 86994156c736 ("net/mlx5e: XDP fast RX drop bpf programs support") Reviewed-by: Tariq Toukan tariqt@mellanox.com Signed-off-by: Saeed Mahameed saeedm@mellanox.com --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 3 ++- 3 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index cb79aaea1a69..10d72c83714d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1517,7 +1517,8 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) struct mlx5e_channel *c; int i;
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || + priv->channels.params.xdp_prog) return 0;
for (i = 0; i < channels->num; i++) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5e98b31620c1..7e6706333fa8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -934,7 +934,11 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (params->rx_dim_enabled) __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
- if (params->pflags & MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) + /* We disable csum_complete when XDP is enabled since + * XDP programs might manipulate packets which will render + * skb->checksum incorrect. + */ + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp) __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 4851fc575185..98509e228ac3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -754,7 +754,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; }
- if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))) + /* True when explicitly set via priv flag, or XDP prog is loaded */ + if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)) goto csum_unnecessary;
/* CQE csum doesn't cover padding octets in short ethernet
[ Upstream commit 0aa1d18615c163f92935b806dcaff9157645233a ]
When an ethernet frame with ip payload is padded, the padding octets are not covered by the hardware checksum.
Prior to the cited commit, skb checksum was forced to be CHECKSUM_NONE when padding is detected. After it, the kernel will try to trim the padding bytes and subtract their checksum from skb->csum.
In this patch we fixup skb->csum for any ip packet with tail padding of any size, if any padding found. FCS case is just one special case of this general purpose patch, hence, it is removed.
Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"), Cc: Eric Dumazet edumazet@google.com Reviewed-by: Tariq Toukan tariqt@mellanox.com Signed-off-by: Saeed Mahameed saeedm@mellanox.com --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 79 +++++++++++++++---- .../ethernet/mellanox/mlx5/core/en_stats.c | 6 ++ .../ethernet/mellanox/mlx5/core/en_stats.h | 4 + 3 files changed, 74 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 98509e228ac3..318fee09f049 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -714,17 +714,6 @@ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) rq->stats->ecn_mark += !!rc; }
-static u32 mlx5e_get_fcs(const struct sk_buff *skb) -{ - const void *fcs_bytes; - u32 _fcs_bytes; - - fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN, - ETH_FCS_LEN, &_fcs_bytes); - - return __get_unaligned_cpu32(fcs_bytes); -} - static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto) { void *ip_p = skb->data + network_depth; @@ -735,6 +724,68 @@ static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
+#define MAX_PADDING 8 + +static void +tail_padding_csum_slow(struct sk_buff *skb, int offset, int len, + struct mlx5e_rq_stats *stats) +{ + stats->csum_complete_tail_slow++; + skb->csum = csum_block_add(skb->csum, + skb_checksum(skb, offset, len, 0), + offset); +} + +static void +tail_padding_csum(struct sk_buff *skb, int offset, + struct mlx5e_rq_stats *stats) +{ + u8 tail_padding[MAX_PADDING]; + int len = skb->len - offset; + void *tail; + + if (unlikely(len > MAX_PADDING)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + tail = skb_header_pointer(skb, offset, len, tail_padding); + if (unlikely(!tail)) { + tail_padding_csum_slow(skb, offset, len, stats); + return; + } + + stats->csum_complete_tail++; + skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset); +} + +static void +mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto, + struct mlx5e_rq_stats *stats) +{ + struct ipv6hdr *ip6; + struct iphdr *ip4; + int pkt_len; + + switch (proto) { + case htons(ETH_P_IP): + ip4 = (struct iphdr *)(skb->data + network_depth); + pkt_len = network_depth + ntohs(ip4->tot_len); + break; + case htons(ETH_P_IPV6): + ip6 = (struct ipv6hdr *)(skb->data + network_depth); + pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len); + break; + default: + return; + } + + if (likely(pkt_len >= skb->len)) + return; + + tail_padding_csum(skb, pkt_len, stats); +} + static inline void mlx5e_handle_csum(struct net_device *netdev, struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, @@ -783,10 +834,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->csum = csum_partial(skb->data + ETH_HLEN, network_depth - ETH_HLEN, skb->csum); - if (unlikely(netdev->features & NETIF_F_RXFCS)) - skb->csum = csum_block_add(skb->csum, - (__force __wsum)mlx5e_get_fcs(skb), - skb->len - ETH_FCS_LEN); + + mlx5e_skb_padding_csum(skb, network_depth, proto, stats); stats->csum_complete++; return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 493bd2752037..8255d797ea94 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -58,6 +58,8 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) }, @@ -149,6 +151,8 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_complete_tail += rq_stats->csum_complete_tail; + s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow; s->rx_csum_unnecessary += rq_stats->csum_unnecessary; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_xdp_drop += rq_stats->xdp_drop; @@ -1139,6 +1143,8 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 13f9028c638d..3ea8033ed6bd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -71,6 +71,8 @@ struct mlx5e_sw_stats { u64 rx_csum_unnecessary; u64 rx_csum_none; u64 rx_csum_complete; + u64 rx_csum_complete_tail; + u64 rx_csum_complete_tail_slow; u64 rx_csum_unnecessary_inner; u64 rx_xdp_drop; u64 rx_xdp_redirect; @@ -180,6 +182,8 @@ struct mlx5e_rq_stats { u64 packets; u64 bytes; u64 csum_complete; + u64 csum_complete_tail; + u64 csum_complete_tail_slow; u64 csum_unnecessary; u64 csum_unnecessary_inner; u64 csum_none;
[ Upstream commit 0318a7b7fcad9765931146efa7ca3a034194737c ]
In the two places is_last_ethertype_ip is being called, the caller will be looking inside the ip header, to be safe, add ip{4,6} header sanity check. And return true only on valid ip headers, i.e: the whole header is contained in the linear part of the skb.
Note: Such situation is very rare and hard to reproduce, since mlx5e allocates a large enough headroom to contain the largest header one can imagine.
Fixes: fe1dc069990c ("net/mlx5e: don't set CHECKSUM_COMPLETE on SCTP packets") Reported-by: Cong Wang xiyou.wangcong@gmail.com Reviewed-by: Tariq Toukan tariqt@mellanox.com Signed-off-by: Saeed Mahameed saeedm@mellanox.com --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 318fee09f049..df49dc143c47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -694,7 +694,14 @@ static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth, { *proto = ((struct ethhdr *)skb->data)->h_proto; *proto = __vlan_get_protocol(skb, *proto, network_depth); - return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6)); + + if (*proto == htons(ETH_P_IP)) + return pskb_may_pull(skb, *network_depth + sizeof(struct iphdr)); + + if (*proto == htons(ETH_P_IPV6)) + return pskb_may_pull(skb, *network_depth + sizeof(struct ipv6hdr)); + + return false; }
static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
From: Saeed Mahameed saeedm@mellanox.com Date: Mon, 23 Sep 2019 12:39:57 +0000
This series includes some upstream patches aimed to fix multiple checksum issues with mlx5 driver in 4.19-stable kernels.
Since the patches didn't apply cleanly to 4.19 back when they were submitted for the first time around 5.1 kernel release to the netdev mailing list, i couldn't mark them for -stable 4.19, so now as the issue is being reported on 4.19 LTS kernels, I had to do the backporting and this submission myself. This series required some dependency patches and some manual touches to apply some of them.
Please apply to 4.19-stable and let me know if there's any problem. I tested and the patches apply cleanly and work on top of: v4.19.75
FWIW, I'm fine with this.
On Mon, Sep 23, 2019 at 05:51:06PM +0200, David Miller wrote:
From: Saeed Mahameed saeedm@mellanox.com Date: Mon, 23 Sep 2019 12:39:57 +0000
This series includes some upstream patches aimed to fix multiple checksum issues with mlx5 driver in 4.19-stable kernels.
Since the patches didn't apply cleanly to 4.19 back when they were submitted for the first time around 5.1 kernel release to the netdev mailing list, i couldn't mark them for -stable 4.19, so now as the issue is being reported on 4.19 LTS kernels, I had to do the backporting and this submission myself. This series required some dependency patches and some manual touches to apply some of them.
Please apply to 4.19-stable and let me know if there's any problem. I tested and the patches apply cleanly and work on top of: v4.19.75
FWIW, I'm fine with this.
Thanks for the review, will go queue these up now...
greg k-h
linux-stable-mirror@lists.linaro.org