netdev - Re: [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <facabfb3-953c-2a48-a980-97bf2b9f70d3@novek.ru>
Date:   Mon, 23 Jan 2023 23:49:28 +0000
From:   Vadim Fedorenko <vfedorenko@...ek.ru>
To:     Gal Pressman <gal@...dia.com>, Vadim Fedorenko <vadfed@...a.com>,
        Aya Levin <ayal@...dia.com>,
        Saeed Mahameed <saeedm@...dia.com>,
        Jakub Kicinski <kuba@...nel.org>
Cc:     netdev@...r.kernel.org
Subject: Re: [PATCH net 1/2] mlx5: fix possible ptp queue fifo overflow

On 23.01.2023 12:32, Gal Pressman wrote:
> Hi Vadim,
> 
> On 22/01/2023 18:16, Vadim Fedorenko wrote:
>> Fifo pointers are not checked for overflow and this could potentially
>> lead to overflow and double free under heavy PTP traffic.
>>
>> Also there were accidental OOO cqe which lead to absolutely broken fifo.
>> Add checks to workaround OOO cqe and add counters to show the amount of
>> such events.
>>
>> Fixes: 19b43a432e3e ("net/mlx5e: Extend SKB room check to include PTP-SQ")
> 
> Isn't 58a518948f60 ("net/mlx5e: Add resiliency for PTP TX port
> timestamp") more appropriate?
> 
I re-checked the log and realised that you are absolutely right, I'll update it 
in v2, thanks!


>> Signed-off-by: Vadim Fedorenko <vadfed@...a.com>
>> ---
>>   .../net/ethernet/mellanox/mlx5/core/en/ptp.c  | 28 ++++++++++++++-----
>>   .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  6 +++-
>>   .../ethernet/mellanox/mlx5/core/en_stats.c    |  2 ++
>>   .../ethernet/mellanox/mlx5/core/en_stats.h    |  2 ++
>>   4 files changed, 30 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> index 903de88bab53..11a99e0f00c6 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
>> @@ -86,20 +86,31 @@ static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb
>>   	return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
>>   }
>>   
>> -static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
>> +static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
>>   {
>>   	struct skb_shared_hwtstamps hwts = {};
>>   	struct sk_buff *skb;
>>   
>>   	ptpsq->cq_stats->resync_event++;
>>   
>> -	while (skb_cc != skb_id) {
>> -		skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>> +	if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id) {
>> +		ptpsq->cq_stats->ooo_cqe++;
>> +		return false;
>> +	}
> 
> I honestly don't understand how this could happen, can you please
> provide more information about your issue? Did you actually witness ooo
> completions or is it a theoretical issue?
> We know ptp CQEs can be dropped in some rare cases (that's the reason we
> implemented this resync flow), but completions should always arrive
> in-order.
> 
>> +
>> +	while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
>>   		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
>>   		skb_tstamp_tx(skb, &hwts);
>>   		ptpsq->cq_stats->resync_cqe++;
>>   		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>>   	}
>> +
>> +	if (!skb) {
>> +		ptpsq->cq_stats->fifo_empty++;
> 
> Hmm, for this to happen you need _all_ ptp CQEs to drop and wraparound
> the SQ?
> 
>> +		return false;> +	}
>> +
>> +	return true;
>>   }
>>   
>>   static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>> @@ -109,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>>   	u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
>>   	u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>>   	struct mlx5e_txqsq *sq = &ptpsq->txqsq;
>> -	struct sk_buff *skb;
>> +	struct sk_buff *skb = NULL;
>>   	ktime_t hwtstamp;
>>   
>>   	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
>> @@ -118,8 +129,10 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>>   		goto out;
>>   	}
>>   
>> -	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
>> -		mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id);
>> +	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id) &&
>> +	    !mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id)) {
>> +		goto out;
>> +	}
>>   
>>   	skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>>   	hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
>> @@ -128,7 +141,8 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>>   	ptpsq->cq_stats->cqe++;
>>   
>>   out:
>> -	napi_consume_skb(skb, budget);
>> +	if (skb)
>> +		napi_consume_skb(skb, budget);
>>   }
>>   
>>   static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
>> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> index aeed165a2dec..0bd2dd694f04 100644
>> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
>> @@ -81,7 +81,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
>>   static inline bool
>>   mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
>>   {
>> -	return (*fifo->pc - *fifo->cc) < fifo->mask;
>> +	return (u16)(*fifo->pc - *fifo->cc) < fifo->mask;
> 
> What is this cast for?
> 
>>   }
>>   
>>   static inline bool
>> @@ -291,12 +291,16 @@ void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
>>   {
>>   	struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>>   
>> +	WARN_ONCE((u16)(*fifo->pc - *fifo->cc) > fifo->mask, "%s overflow", __func__);
> 
> The fifo is the same size of the SQ, how can it overflow?
> 
>>   	*skb_item = skb;
>>   }