From: Tariq Toukan tariqt@mellanox.com
[ Upstream commit b57e66ad42d051ed31319c28ed1b62b191299a29 ]
For a cyclic work queue, when not requesting a completion per WQE, a single CQE might indicate the completion of several WQEs. However, in case some WQE in the batch causes an error, then an error completion is issued, breaking the batch, and pointing to the offending WQE in the wqe_counter field.
Hence, WQE-specific error CQE handling (like printing, breaking, etc...) should be performed only for the last WQE in batch.
Fixes: 130c7b46c93d ("net/mlx5e: TX, Dump WQs wqe descriptors on CQE with error events") Fixes: fd9b4be8002c ("net/mlx5e: RX, Support multiple outstanding UMR posts") Signed-off-by: Tariq Toukan tariqt@mellanox.com Reviewed-by: Aya Levin ayal@mellanox.com Signed-off-by: Saeed Mahameed saeedm@mellanox.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 16 ++++++----- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 33 ++++++++++-------------- 2 files changed, 23 insertions(+), 26 deletions(-)
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -613,13 +613,6 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *
wqe_counter = be16_to_cpu(cqe->wqe_counter);
- if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { - netdev_WARN_ONCE(cq->channel->netdev, - "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); - if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) - queue_work(cq->channel->priv->wq, &sq->recover_work); - break; - } do { struct mlx5e_sq_wqe_info *wi; u16 ci; @@ -629,6 +622,15 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq * ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); wi = &sq->db.ico_wqe[ci];
+ if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { + netdev_WARN_ONCE(cq->channel->netdev, + "Bad OP in ICOSQ CQE: 0x%x\n", + get_cqe_opcode(cqe)); + if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) + queue_work(cq->channel->priv->wq, &sq->recover_work); + break; + } + if (likely(wi->opcode == MLX5_OPCODE_UMR)) { sqcc += MLX5E_UMR_WQEBBS; wi->umr.rq->mpwqe.umr_completed++; --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -451,34 +451,17 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *c
i = 0; do { + struct mlx5e_tx_wqe_info *wi; u16 wqe_counter; bool last_wqe; + u16 ci;
mlx5_cqwq_pop(&cq->wq);
wqe_counter = be16_to_cpu(cqe->wqe_counter);
- if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { - if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, - &sq->state)) { - struct mlx5e_tx_wqe_info *wi; - u16 ci; - - ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); - wi = &sq->db.wqe_info[ci]; - mlx5e_dump_error_cqe(sq, - (struct mlx5_err_cqe *)cqe); - mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); - queue_work(cq->channel->priv->wq, - &sq->recover_work); - } - stats->cqe_err++; - } - do { - struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; - u16 ci; int j;
last_wqe = (sqcc == wqe_counter); @@ -516,6 +499,18 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *c napi_consume_skb(skb, napi_budget); } while (!last_wqe);
+ if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) { + if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, + &sq->state)) { + mlx5e_dump_error_cqe(sq, + (struct mlx5_err_cqe *)cqe); + mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); + queue_work(cq->channel->priv->wq, + &sq->recover_work); + } + stats->cqe_err++; + } + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
stats->cqes += i;