lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 23 Jan 2023 18:05:55 -0800
From:   Rahul Rameshbabu <rrameshbabu@...dia.com>
To:     Vadim Fedorenko <vfedorenko@...ek.ru>
Cc:     Vadim Fedorenko <vadfed@...com>, Aya Levin <ayal@...dia.com>,
        Saeed Mahameed <saeedm@...dia.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Gal Pressman <gal@...dia.com>,
        Vadim Fedorenko <vadfed@...a.com>, netdev@...r.kernel.org
Subject: Re: [PATCH net v2 1/2] mlx5: fix possible ptp queue fifo overflow

On Tue, 24 Jan, 2023 03:08:35 +0300 Vadim Fedorenko <vfedorenko@...ek.ru> wrote:
> From: Vadim Fedorenko <vadfed@...a.com>
>
> Fifo pointers are not checked for overflow and this could potentially
> lead to overflow and double free under heavy PTP traffic.
>
> Also there were accidental OOO cqe which lead to absolutely broken fifo.
> Add checks to workaround OOO cqe and add counters to show the amount of
> such events.
>
> Fixes: 58a518948f60 ("net/mlx5e: Add resiliency for PTP TX port timestamp")
> Signed-off-by: Vadim Fedorenko <vadfed@...a.com>
> ---
>  .../net/ethernet/mellanox/mlx5/core/en/ptp.c  | 28 ++++++++++++++-----
>  .../net/ethernet/mellanox/mlx5/core/en/txrx.h |  6 +++-
>  .../ethernet/mellanox/mlx5/core/en_stats.c    |  2 ++
>  .../ethernet/mellanox/mlx5/core/en_stats.h    |  2 ++
>  4 files changed, 30 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> index 8469e9c38670..32d6b387af61 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
> @@ -86,20 +86,31 @@ static bool mlx5e_ptp_ts_cqe_drop(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb
>  	return (ptpsq->ts_cqe_ctr_mask && (skb_cc != skb_id));
>  }
>  
> -static void mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
> +static bool mlx5e_ptp_skb_fifo_ts_cqe_resync(struct mlx5e_ptpsq *ptpsq, u16 skb_cc, u16 skb_id)
>  {
>  	struct skb_shared_hwtstamps hwts = {};
>  	struct sk_buff *skb;
>  
>  	ptpsq->cq_stats->resync_event++;
>  
> -	while (skb_cc != skb_id) {
> -		skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
> +	if (skb_cc > skb_id || PTP_WQE_CTR2IDX(ptpsq->skb_fifo_pc) < skb_id) {
> +		ptpsq->cq_stats->ooo_cqe++;
> +		return false;
> +	}
> +
> +	while (skb_cc != skb_id && (skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo))) {
>  		hwts.hwtstamp = mlx5e_skb_cb_get_hwts(skb)->cqe_hwtstamp;
>  		skb_tstamp_tx(skb, &hwts);
>  		ptpsq->cq_stats->resync_cqe++;
>  		skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>  	}
> +
> +	if (!skb) {
> +		ptpsq->cq_stats->fifo_empty++;
> +		return false;
> +	}
> +
> +	return true;
>  }
>  
>  static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
> @@ -109,7 +120,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>  	u16 skb_id = PTP_WQE_CTR2IDX(be16_to_cpu(cqe->wqe_counter));
>  	u16 skb_cc = PTP_WQE_CTR2IDX(ptpsq->skb_fifo_cc);
>  	struct mlx5e_txqsq *sq = &ptpsq->txqsq;
> -	struct sk_buff *skb;
> +	struct sk_buff *skb = NULL;
>  	ktime_t hwtstamp;
>  
>  	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
> @@ -118,8 +129,10 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>  		goto out;
>  	}
>  
> -	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id))
> -		mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id);
> +	if (mlx5e_ptp_ts_cqe_drop(ptpsq, skb_cc, skb_id) &&
> +	    !mlx5e_ptp_skb_fifo_ts_cqe_resync(ptpsq, skb_cc, skb_id)) {
> +		goto out;
> +	}
>  
>  	skb = mlx5e_skb_fifo_pop(&ptpsq->skb_fifo);
>  	hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
> @@ -128,7 +141,8 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
>  	ptpsq->cq_stats->cqe++;
>  
>  out:
> -	napi_consume_skb(skb, budget);
> +	if (skb)
> +		napi_consume_skb(skb, budget);
>  }
>  
>  static bool mlx5e_ptp_poll_ts_cq(struct mlx5e_cq *cq, int budget)
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> index 853f312cd757..5fb58764c923 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
> @@ -81,7 +81,7 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
>  static inline bool
>  mlx5e_skb_fifo_has_room(struct mlx5e_skb_fifo *fifo)
>  {
> -	return (*fifo->pc - *fifo->cc) < fifo->mask;
> +	return (u16)(*fifo->pc - *fifo->cc) < fifo->mask;
>  }
>  
>  static inline bool
> @@ -291,12 +291,16 @@ void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
>  {
>  	struct sk_buff **skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
>  
> +	WARN_ONCE((u16)(*fifo->pc - *fifo->cc) > fifo->mask, "ptp fifo overflow");

I found this pretty tough to read/understand since it needed to account
for the fact that the overflow already occurred. Instead I would
refactor into the following.

  static inline
  void mlx5e_skb_fifo_push(struct mlx5e_skb_fifo *fifo, struct sk_buff *skb)
  {
    struct sk_buff **skb_item;

    WARN_ONCE(!mlx5e_skb_fifo_has_room(fifo), "ptp fifo overflow");
    skb_item = mlx5e_skb_fifo_get(fifo, (*fifo->pc)++);
    *skb_item = skb;
  }

>  	*skb_item = skb;
>  }
>  
>  static inline
>  struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_skb_fifo *fifo)
>  {
> +	if (*fifo->pc == *fifo->cc)
> +		return NULL;
> +
>  	return *mlx5e_skb_fifo_get(fifo, (*fifo->cc)++);
>  }
>  
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
> index 6687b8136e44..6fbd58d1722a 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
> @@ -2138,6 +2138,8 @@ static const struct counter_desc ptp_cq_stats_desc[] = {
>  	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) },
>  	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_cqe) },
>  	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, resync_event) },
> +	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, ooo_cqe) },
> +	{ MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, fifo_empty) },
>  };
>  
>  static const struct counter_desc ptp_rq_stats_desc[] = {
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
> index 375752d6546d..51da492169c2 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
> @@ -461,6 +461,8 @@ struct mlx5e_ptp_cq_stats {
>  	u64 abort_abs_diff_ns;
>  	u64 resync_cqe;
>  	u64 resync_event;
> +	u64 ooo_cqe;
> +	u64 fifo_empty;
>  };
>  
>  struct mlx5e_rep_stats {

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ