[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c73fe66a-2d9a-d675-79bc-09d7f63caa53@meta.com>
Date: Mon, 23 Jan 2023 17:24:33 +0000
From: Vadim Fedorenko <vadfed@...a.com>
To: Gal Pressman <gal@...dia.com>
CC: "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
Aya Levin <ayal@...dia.com>,
Saeed Mahameed <saeedm@...dia.com>,
Jakub Kicinski <kuba@...nel.org>
Subject: Re: [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow
> Hi Vadim,
>
Hi Gal!
Your mail didn't show up in my mailbox for some reasons, so I tried to
construct it back from mailing list. This may end up with some side
effects, but I did my best to avoid it.
> On 22/01/2023 18:16, Vadim Fedorenko wrote:
>> Fifo pointers are not checked for overflow and this could potentially
>> lead to overflow and double free under heavy PTP traffic.
>>
>> Also there were accidental OOO cqe which lead to absolutely broken fifo.
>> Add checks to workaround OOO cqe and add counters to show the amount of
>> such events.
>>
>> Fixes: 19b43a432e3e ("net/mlx5e: Extend SKB room check to include
PTP-SQ")
>
> Isn't 58a518948f60 ("net/mlx5e: Add resiliency for PTP TX port
> timestamp") more appropriate?
It looks like the bugs were actually introduced by the commit in Fixes
even though the commit you mentioned introduced the feature itself. But
I might be wrong, I'll recheck it.
>> Signed-off-by: Vadim Fedorenko <vadfed@...a.com>
>> ---
>> .../net/ethernet/mellanox/mlx5/core/en/ptp.c | 28 ++++++++++++++-----
>> .../net/ethernet/mellanox/mlx5/core/en/txrx.h | 6 +++-
>> .../ethernet/mellanox/mlx5/core/en_stats.c | 2 ++
>> .../ethernet/mellanox/mlx5/core/en_stats.h | 2 ++
>> 4 files changed, 30 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> index 903de88bab53..11a99e0f00c6 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> @@ -86,20 +86,31 @@ static bool mlx5e_ptp_ts_cqe_drop(struct
mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb
>> return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
>> }
>>
>> -static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq
*ptpsq, u16 skb_cc, u16 skb_id)
>> +static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq
*ptpsq, u16 skb_cc, u16 skb_id)
>> {
>> struct skb_shared_hwtstamps hwts = {};
>> struct sk_buff *skb;
>>
>> ptpsq->cq_stats->resync_event++;
>>
>> - while (skb_cc != skb_id) {
>> - skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>> + if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id) {
>> + ptpsq->cq_stats->ooo_cqe++;
>> + return false;
>> + }
>
>I honestly don't understand how this could happen, can you please
>provide more information about your issue? Did you actually witness ooo
>completions or is it a theoretical issue?
>We know ptp CQEs can be dropped in some rare cases (that's the reason we
>implemented this resync flow), but completions should always arrive
>in-order.
I was also surprised to see OOO completions but it's the reality. With a
little bit of debug I found this issue:
[65578.231710] FIFO drop found, skb_cc = 141, skb_id = 140
[65578.293358] FIFO drop found, skb_cc = 141, skb_id = 143
[65578.301240] FIFO drop found, skb_cc = 145, skb_id = 142
[65578.365277] FIFO drop found, skb_cc = 173, skb_id = 141
[65578.426681] FIFO drop found, skb_cc = 173, skb_id = 145
[65578.488089] FIFO drop found, skb_cc = 173, skb_id = 146
[65578.549489] FIFO drop found, skb_cc = 173, skb_id = 147
[65578.610897] FIFO drop found, skb_cc = 173, skb_id = 148
[65578.672301] FIFO drop found, skb_cc = 173, skb_id = 149
It really shows that CQE are coming OOO sometimes.
>> +
>> + while (skb_cc != skb_id && (skb =
mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
>> hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
>> skb_tstamp_tx(skb, &hwts);
>> ptpsq->cq_stats->resync_cqe++;
>> skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>> }
>> +
>> + if (!skb) {
>> + ptpsq->cq_stats->fifo_empty++;
>
>Hmm, for this to happen you need _all_ ptp CQEs to drop and wraparound
>the SQ?
Yep, and that's what I've seen before I fixed mlx5e_ptp_ts_cqe_drop()
check. I added this counter just to be sure I won't happen again.
>> + return false;
>> + }
>> +
>> + return true;
>> }
>>
>> static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>> @@ -109,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct
mlx5e_ptpsq *ptpsq,
>> u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
>> u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>> struct mlx5e_txqsq *sq = &ptpsq->txqsq;
>> - struct sk_buff *skb;
>> + struct sk_buff *skb = NULL;
>> ktime_t hwtstamp;
>>
>> if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
>> @@ -118,8 +129,10 @@ static void mlx5e_ptp_handle_ts_cqe(struct
mlx5e_ptpsq *ptpsq,
>> goto out;
>> }
>>
>> - if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
>> - mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id);
>> + if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id) &&
>> + !mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id)) {
>> + goto out;
>> + }
>>
>> skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>> hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock,
get_cqe_ts(cqe));
>> @@ -128,7 +141,8 @@ static void mlx5e_ptp_handle_ts_cqe(struct
mlx5e_ptpsq *ptpsq,
>> ptpsq->cq_stats->cqe++;
>>
>> out:
>> - napi_consume_skb(skb, budget);
>> + if (skb)
>> + napi_consume_skb(skb, budget);
>> }
>>
>> static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> index aeed165a2dec..0bd2dd694f04 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> @@ -81,7 +81,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
>> static inline bool
>> mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
>> {
>> - return (*fifo->pc - *fifo->cc) < fifo->mask;
>> + return (u16)(*fifo->pc - *fifo->cc) < fifo->mask;
>
>What is this cast for?
To properly check u16 overflow cases. (*fifo->pc - *fifo->cc) is casted
to int if we don't put explicit cast here. And it easily ends up with
negative value which we be less than mask until fifo->cc overflows too.
>> }
>>
>> static inline bool
>> @@ -291,12 +291,16 @@ void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo
*fifo, struct sk_buff *skb)
>> {
>> struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>>
>> + WARN_ONCE((u16)(*fifo->pc - *fifo->cc) > fifo->mask, "%s
overflow", __func__);
>
>The fifo is the same size of the SQ, how can it overflow?
>
There is one fifo_push call in mlx5e_txwqe_complete before
mlx5e_skb_fifo_has_room() is checked, so it can potentially overflow.
>> *skb_item = skb;
>> }
Powered by blists - more mailing lists