lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z8rbkpRca35brvij@boxer>
Date: Fri, 7 Mar 2025 12:42:10 +0100
From: Maciej Fijalkowski <maciej.fijalkowski@...el.com>
To: Alexander Lobakin <aleksander.lobakin@...el.com>
CC: <intel-wired-lan@...ts.osuosl.org>, Michal Kubiak
	<michal.kubiak@...el.com>, Tony Nguyen <anthony.l.nguyen@...el.com>, "Przemek
 Kitszel" <przemyslaw.kitszel@...el.com>, Andrew Lunn <andrew+netdev@...n.ch>,
	"David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
	Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, "Alexei
 Starovoitov" <ast@...nel.org>, Daniel Borkmann <daniel@...earbox.net>,
	"Jesper Dangaard Brouer" <hawk@...nel.org>, John Fastabend
	<john.fastabend@...il.com>, Simon Horman <horms@...nel.org>,
	<bpf@...r.kernel.org>, <netdev@...r.kernel.org>,
	<linux-kernel@...r.kernel.org>
Subject: Re: [PATCH net-next 09/16] idpf: remove SW marker handling from NAPI

On Wed, Mar 05, 2025 at 05:21:25PM +0100, Alexander Lobakin wrote:
> From: Michal Kubiak <michal.kubiak@...el.com>
> 
> SW marker descriptors on completion queues are used only when a queue
> is about to be destroyed. It's far from hotpath and handling it in the
> hotpath NAPI poll makes no sense.
> Instead, run a simple poller after a virtchnl message for destroying
> the queue is sent and wait for the replies. If replies for all of the
> queues are received, this means the synchronization is done correctly
> and we can go forth with stopping the link.
> 
> Signed-off-by: Michal Kubiak <michal.kubiak@...el.com>
> Signed-off-by: Alexander Lobakin <aleksander.lobakin@...el.com>
> ---
>  drivers/net/ethernet/intel/idpf/idpf.h        |   7 +-
>  drivers/net/ethernet/intel/idpf/idpf_txrx.h   |   4 +-
>  drivers/net/ethernet/intel/idpf/idpf_lib.c    |   2 -
>  drivers/net/ethernet/intel/idpf/idpf_txrx.c   | 108 +++++++++++-------
>  .../net/ethernet/intel/idpf/idpf_virtchnl.c   |  34 ++----
>  5 files changed, 80 insertions(+), 75 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h
> index 66544faab710..6b51a5dcc1e0 100644
> --- a/drivers/net/ethernet/intel/idpf/idpf.h
> +++ b/drivers/net/ethernet/intel/idpf/idpf.h
> @@ -36,6 +36,7 @@ struct idpf_vport_max_q;
>  #define IDPF_NUM_CHUNKS_PER_MSG(struct_sz, chunk_sz)	\
>  	((IDPF_CTLQ_MAX_BUF_LEN - (struct_sz)) / (chunk_sz))
>  
> +#define IDPF_WAIT_FOR_MARKER_TIMEO	500
>  #define IDPF_MAX_WAIT			500
>  
>  /* available message levels */
> @@ -224,13 +225,10 @@ enum idpf_vport_reset_cause {
>  /**
>   * enum idpf_vport_flags - Vport flags
>   * @IDPF_VPORT_DEL_QUEUES: To send delete queues message
> - * @IDPF_VPORT_SW_MARKER: Indicate TX pipe drain software marker packets
> - *			  processing is done
>   * @IDPF_VPORT_FLAGS_NBITS: Must be last
>   */
>  enum idpf_vport_flags {
>  	IDPF_VPORT_DEL_QUEUES,
> -	IDPF_VPORT_SW_MARKER,
>  	IDPF_VPORT_FLAGS_NBITS,
>  };
>  
> @@ -289,7 +287,6 @@ struct idpf_port_stats {
>   * @tx_itr_profile: TX profiles for Dynamic Interrupt Moderation
>   * @port_stats: per port csum, header split, and other offload stats
>   * @link_up: True if link is up
> - * @sw_marker_wq: workqueue for marker packets
>   */
>  struct idpf_vport {
>  	u16 num_txq;
> @@ -332,8 +329,6 @@ struct idpf_vport {
>  	struct idpf_port_stats port_stats;
>  
>  	bool link_up;
> -
> -	wait_queue_head_t sw_marker_wq;
>  };
>  
>  /**
> diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
> index 9f938301b2c5..dd6cc3b5cdab 100644
> --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
> +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
> @@ -286,7 +286,6 @@ struct idpf_ptype_state {
>   *			  bit and Q_RFL_GEN is the SW bit.
>   * @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling
>   * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions
> - * @__IDPF_Q_POLL_MODE: Enable poll mode
>   * @__IDPF_Q_CRC_EN: enable CRC offload in singleq mode
>   * @__IDPF_Q_HSPLIT_EN: enable header split on Rx (splitq)
>   * @__IDPF_Q_FLAGS_NBITS: Must be last
> @@ -296,7 +295,6 @@ enum idpf_queue_flags_t {
>  	__IDPF_Q_RFL_GEN_CHK,
>  	__IDPF_Q_FLOW_SCH_EN,
>  	__IDPF_Q_SW_MARKER,
> -	__IDPF_Q_POLL_MODE,
>  	__IDPF_Q_CRC_EN,
>  	__IDPF_Q_HSPLIT_EN,
>  
> @@ -1044,6 +1042,8 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq,
>  				      u16 cleaned_count);
>  int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);
>  
> +void idpf_wait_for_sw_marker_completion(struct idpf_tx_queue *txq);
> +
>  static inline bool idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q,
>  					     u32 needed)
>  {
> diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c
> index f3aea7bcdaa3..e17582d15e27 100644
> --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
> +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
> @@ -1501,8 +1501,6 @@ void idpf_init_task(struct work_struct *work)
>  	index = vport->idx;
>  	vport_config = adapter->vport_config[index];
>  
> -	init_waitqueue_head(&vport->sw_marker_wq);
> -
>  	spin_lock_init(&vport_config->mac_filter_list_lock);
>  
>  	INIT_LIST_HEAD(&vport_config->user_config.mac_filter_list);
> diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
> index a240ed115e3e..4e3de6031422 100644
> --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
> +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
> @@ -1626,32 +1626,6 @@ int idpf_vport_queues_alloc(struct idpf_vport *vport)
>  	return err;
>  }
>  
> -/**
> - * idpf_tx_handle_sw_marker - Handle queue marker packet
> - * @tx_q: tx queue to handle software marker
> - */
> -static void idpf_tx_handle_sw_marker(struct idpf_tx_queue *tx_q)
> -{
> -	struct idpf_netdev_priv *priv = netdev_priv(tx_q->netdev);
> -	struct idpf_vport *vport = priv->vport;
> -	int i;
> -
> -	idpf_queue_clear(SW_MARKER, tx_q);
> -	/* Hardware must write marker packets to all queues associated with
> -	 * completion queues. So check if all queues received marker packets
> -	 */
> -	for (i = 0; i < vport->num_txq; i++)
> -		/* If we're still waiting on any other TXQ marker completions,
> -		 * just return now since we cannot wake up the marker_wq yet.
> -		 */
> -		if (idpf_queue_has(SW_MARKER, vport->txqs[i]))
> -			return;
> -
> -	/* Drain complete */
> -	set_bit(IDPF_VPORT_SW_MARKER, vport->flags);
> -	wake_up(&vport->sw_marker_wq);
> -}
> -
>  /**
>   * idpf_tx_clean_stashed_bufs - clean bufs that were stored for
>   * out of order completions
> @@ -2008,6 +1982,19 @@ idpf_tx_handle_rs_cmpl_fb(struct idpf_tx_queue *txq,
>  		idpf_tx_clean_stashed_bufs(txq, compl_tag, cleaned, budget);
>  }
>  
> +/**
> + * idpf_tx_update_complq_indexes - update completion queue indexes
> + * @complq: completion queue being updated
> + * @ntc: current "next to clean" index value
> + * @gen_flag: current "generation" flag value
> + */
> +static void idpf_tx_update_complq_indexes(struct idpf_compl_queue *complq,
> +					  int ntc, bool gen_flag)
> +{
> +	complq->next_to_clean = ntc + complq->desc_count;
> +	idpf_queue_assign(GEN_CHK, complq, gen_flag);
> +}
> +
>  /**
>   * idpf_tx_finalize_complq - Finalize completion queue cleaning
>   * @complq: completion queue to finalize
> @@ -2059,8 +2046,7 @@ static void idpf_tx_finalize_complq(struct idpf_compl_queue *complq, int ntc,
>  		tx_q->cleaned_pkts = 0;
>  	}
>  
> -	complq->next_to_clean = ntc + complq->desc_count;
> -	idpf_queue_assign(GEN_CHK, complq, gen_flag);
> +	idpf_tx_update_complq_indexes(complq, ntc, gen_flag);
>  }
>  
>  /**
> @@ -2115,9 +2101,6 @@ static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget,
>  							  &cleaned_stats,
>  							  budget);
>  			break;
> -		case IDPF_TXD_COMPLT_SW_MARKER:
> -			idpf_tx_handle_sw_marker(tx_q);
> -			break;
>  		case -ENODATA:
>  			goto exit_clean_complq;
>  		case -EINVAL:
> @@ -2159,6 +2142,59 @@ static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget,
>  	return !!complq_budget;
>  }
>  
> +/**
> + * idpf_wait_for_sw_marker_completion - wait for SW marker of disabled Tx queue
> + * @txq: disabled Tx queue
> + */
> +void idpf_wait_for_sw_marker_completion(struct idpf_tx_queue *txq)
> +{
> +	struct idpf_compl_queue *complq = txq->txq_grp->complq;
> +	struct idpf_splitq_4b_tx_compl_desc *tx_desc;
> +	s16 ntc = complq->next_to_clean;
> +	unsigned long timeout;
> +	bool flow, gen_flag;
> +	u32 pos = ntc;
> +
> +	if (!idpf_queue_has(SW_MARKER, txq))
> +		return;
> +
> +	flow = idpf_queue_has(FLOW_SCH_EN, complq);
> +	gen_flag = idpf_queue_has(GEN_CHK, complq);
> +
> +	timeout = jiffies + msecs_to_jiffies(IDPF_WAIT_FOR_MARKER_TIMEO);
> +	tx_desc = flow ? &complq->comp[pos].common : &complq->comp_4b[pos];
> +	ntc -= complq->desc_count;

could we stop this logic? it was introduced back in the days as comparison
against 0 for wrap case was faster, here as you said it doesn't have much
in common with hot path.

> +
> +	do {
> +		struct idpf_tx_queue *tx_q;
> +		int ctype;
> +
> +		ctype = idpf_parse_compl_desc(tx_desc, complq, &tx_q,
> +					      gen_flag);
> +		if (ctype == IDPF_TXD_COMPLT_SW_MARKER) {
> +			idpf_queue_clear(SW_MARKER, tx_q);
> +			if (txq == tx_q)
> +				break;
> +		} else if (ctype == -ENODATA) {
> +			usleep_range(500, 1000);
> +			continue;
> +		}
> +
> +		pos++;
> +		ntc++;
> +		if (unlikely(!ntc)) {
> +			ntc -= complq->desc_count;
> +			pos = 0;
> +			gen_flag = !gen_flag;
> +		}
> +
> +		tx_desc = flow ? &complq->comp[pos].common :
> +			  &complq->comp_4b[pos];
> +		prefetch(tx_desc);
> +	} while (time_before(jiffies, timeout));

what if timeout expires and you didn't find the marker desc? why do you
need timer? couldn't you scan the whole ring instead?

> +
> +	idpf_tx_update_complq_indexes(complq, ntc, gen_flag);
> +}
>  /**
>   * idpf_tx_splitq_build_ctb - populate command tag and size for queue
>   * based scheduling descriptors
> @@ -4130,15 +4166,7 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget)
>  	else
>  		idpf_vport_intr_set_wb_on_itr(q_vector);
>  
> -	/* Switch to poll mode in the tear-down path after sending disable
> -	 * queues virtchnl message, as the interrupts will be disabled after
> -	 * that
> -	 */
> -	if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE,
> -							 q_vector->tx[0])))
> -		return budget;
> -	else
> -		return work_done;
> +	return work_done;
>  }
>  
>  /**
> diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
> index 135af3cc243f..24495e4d6c78 100644
> --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
> +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
> @@ -752,21 +752,17 @@ int idpf_recv_mb_msg(struct idpf_adapter *adapter)
>   **/
>  static int idpf_wait_for_marker_event(struct idpf_vport *vport)
>  {
> -	int event;
> -	int i;
> -
> -	for (i = 0; i < vport->num_txq; i++)
> -		idpf_queue_set(SW_MARKER, vport->txqs[i]);
> +	bool markers_rcvd = true;
>  
> -	event = wait_event_timeout(vport->sw_marker_wq,
> -				   test_and_clear_bit(IDPF_VPORT_SW_MARKER,
> -						      vport->flags),
> -				   msecs_to_jiffies(500));
> +	for (u32 i = 0; i < vport->num_txq; i++) {
> +		struct idpf_tx_queue *txq = vport->txqs[i];
>  
> -	for (i = 0; i < vport->num_txq; i++)
> -		idpf_queue_clear(POLL_MODE, vport->txqs[i]);
> +		idpf_queue_set(SW_MARKER, txq);
> +		idpf_wait_for_sw_marker_completion(txq);
> +		markers_rcvd &= !idpf_queue_has(SW_MARKER, txq);
> +	}
>  
> -	if (event)
> +	if (markers_rcvd)
>  		return 0;
>  
>  	dev_warn(&vport->adapter->pdev->dev, "Failed to receive marker packets\n");
> @@ -1993,24 +1989,12 @@ int idpf_send_enable_queues_msg(struct idpf_vport *vport)
>   */
>  int idpf_send_disable_queues_msg(struct idpf_vport *vport)
>  {
> -	int err, i;
> +	int err;
>  
>  	err = idpf_send_ena_dis_queues_msg(vport, false);
>  	if (err)
>  		return err;
>  
> -	/* switch to poll mode as interrupts will be disabled after disable
> -	 * queues virtchnl message is sent
> -	 */
> -	for (i = 0; i < vport->num_txq; i++)
> -		idpf_queue_set(POLL_MODE, vport->txqs[i]);
> -
> -	/* schedule the napi to receive all the marker packets */
> -	local_bh_disable();
> -	for (i = 0; i < vport->num_q_vectors; i++)
> -		napi_schedule(&vport->q_vectors[i].napi);
> -	local_bh_enable();
> -
>  	return idpf_wait_for_marker_event(vport);
>  }
>  
> -- 
> 2.48.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ