lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <7121300c-1fa3-461f-a531-d148b16d5079@redhat.com>
Date: Mon, 22 Jan 2024 12:24:34 +0100
From: Hans de Goede <hdegoede@...hat.com>
To: Liming Sun <limings@...dia.com>, Vadim Pasternak <vadimp@...dia.com>,
 David Thompson <davthompson@...dia.com>, Mark Gross <markgross@...nel.org>,
 Dan Carpenter <dan.carpenter@...aro.org>
Cc: platform-driver-x86@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3] Drop Tx network packet when Tx TmFIFO is full

Hi,

On 1/11/24 18:31, Liming Sun wrote:
> Starting from Linux 5.16 kernel, Tx timeout mechanism was added
> in the virtio_net driver which prints the "Tx timeout" warning
> message when a packet stays in Tx queue for too long. Below is an
> example of the reported message:
> 
> "[494105.316739] virtio_net virtio1 tmfifo_net0: TX timeout on
> queue: 0, sq: output.0, vq: 0×1, name: output.0, usecs since
> last trans: 3079892256".
> 
> This issue could happen when external host driver which drains the
> FIFO is restared, stopped or upgraded. To avoid such confusing
> "Tx timeout" messages, this commit adds logic to drop the outstanding
> Tx packet if it's not able to transmit in two seconds due to Tx FIFO
> full, which can be considered as congestion or out-of-resource drop.
> 
> This commit also handles the special case that the packet is half-
> transmitted into the Tx FIFO. In such case, the packet is discarded
> with remaining length stored in vring->rem_padding. So paddings with
> zeros can be sent out when Tx space is available to maintain the
> integrity of the packet format. The padded packet will be dropped on
> the receiving side.
> 
> Signed-off-by: Liming Sun <limings@...dia.com>

Thank you for your patch/series, I've applied this patch
(series) to my review-hans branch:
https://git.kernel.org/pub/scm/linux/kernel/git/pdx86/platform-drivers-x86.git/log/?h=review-hans

Note it will show up in the pdx86 review-hans branch once I've
pushed my local branch there, which might take a while.

I will include this patch in my next fixes pull-req to Linus
for the current kernel development cycle.

Regards,

Hans




> ---
> v2->v3:
>   Updates for Ilpo's comments:
>   - Revises commit message to avoid confusion.
> v2: Fixed formatting warning
> v1: Initial version
> ---
>  drivers/platform/mellanox/mlxbf-tmfifo.c | 67 ++++++++++++++++++++++++
>  1 file changed, 67 insertions(+)
> 
> diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
> index 5c683b4eaf10..f39b7b9d2bfe 100644
> --- a/drivers/platform/mellanox/mlxbf-tmfifo.c
> +++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
> @@ -47,6 +47,9 @@
>  /* Message with data needs at least two words (for header & data). */
>  #define MLXBF_TMFIFO_DATA_MIN_WORDS		2
>  
> +/* Tx timeout in milliseconds. */
> +#define TMFIFO_TX_TIMEOUT			2000
> +
>  /* ACPI UID for BlueField-3. */
>  #define TMFIFO_BF3_UID				1
>  
> @@ -62,12 +65,14 @@ struct mlxbf_tmfifo;
>   * @drop_desc: dummy desc for packet dropping
>   * @cur_len: processed length of the current descriptor
>   * @rem_len: remaining length of the pending packet
> + * @rem_padding: remaining bytes to send as paddings
>   * @pkt_len: total length of the pending packet
>   * @next_avail: next avail descriptor id
>   * @num: vring size (number of descriptors)
>   * @align: vring alignment size
>   * @index: vring index
>   * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
> + * @tx_timeout: expire time of last tx packet
>   * @fifo: pointer to the tmfifo structure
>   */
>  struct mlxbf_tmfifo_vring {
> @@ -79,12 +84,14 @@ struct mlxbf_tmfifo_vring {
>  	struct vring_desc drop_desc;
>  	int cur_len;
>  	int rem_len;
> +	int rem_padding;
>  	u32 pkt_len;
>  	u16 next_avail;
>  	int num;
>  	int align;
>  	int index;
>  	int vdev_id;
> +	unsigned long tx_timeout;
>  	struct mlxbf_tmfifo *fifo;
>  };
>  
> @@ -819,6 +826,50 @@ static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
>  	return true;
>  }
>  
> +static void mlxbf_tmfifo_check_tx_timeout(struct mlxbf_tmfifo_vring *vring)
> +{
> +	unsigned long flags;
> +
> +	/* Only handle Tx timeout for network vdev. */
> +	if (vring->vdev_id != VIRTIO_ID_NET)
> +		return;
> +
> +	/* Initialize the timeout or return if not expired. */
> +	if (!vring->tx_timeout) {
> +		/* Initialize the timeout. */
> +		vring->tx_timeout = jiffies +
> +			msecs_to_jiffies(TMFIFO_TX_TIMEOUT);
> +		return;
> +	} else if (time_before(jiffies, vring->tx_timeout)) {
> +		/* Return if not timeout yet. */
> +		return;
> +	}
> +
> +	/*
> +	 * Drop the packet after timeout. The outstanding packet is
> +	 * released and the remaining bytes will be sent with padding byte 0x00
> +	 * as a recovery. On the peer(host) side, the padding bytes 0x00 will be
> +	 * either dropped directly, or appended into existing outstanding packet
> +	 * thus dropped as corrupted network packet.
> +	 */
> +	vring->rem_padding = round_up(vring->rem_len, sizeof(u64));
> +	mlxbf_tmfifo_release_pkt(vring);
> +	vring->cur_len = 0;
> +	vring->rem_len = 0;
> +	vring->fifo->vring[0] = NULL;
> +
> +	/*
> +	 * Make sure the load/store are in order before
> +	 * returning back to virtio.
> +	 */
> +	virtio_mb(false);
> +
> +	/* Notify upper layer. */
> +	spin_lock_irqsave(&vring->fifo->spin_lock[0], flags);
> +	vring_interrupt(0, vring->vq);
> +	spin_unlock_irqrestore(&vring->fifo->spin_lock[0], flags);
> +}
> +
>  /* Rx & Tx processing of a queue. */
>  static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
>  {
> @@ -841,6 +892,7 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
>  		return;
>  
>  	do {
> +retry:
>  		/* Get available FIFO space. */
>  		if (avail == 0) {
>  			if (is_rx)
> @@ -851,6 +903,17 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
>  				break;
>  		}
>  
> +		/* Insert paddings for discarded Tx packet. */
> +		if (!is_rx) {
> +			vring->tx_timeout = 0;
> +			while (vring->rem_padding >= sizeof(u64)) {
> +				writeq(0, vring->fifo->tx.data);
> +				vring->rem_padding -= sizeof(u64);
> +				if (--avail == 0)
> +					goto retry;
> +			}
> +		}
> +
>  		/* Console output always comes from the Tx buffer. */
>  		if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
>  			mlxbf_tmfifo_console_tx(fifo, avail);
> @@ -860,6 +923,10 @@ static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
>  		/* Handle one descriptor. */
>  		more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
>  	} while (more);
> +
> +	/* Check Tx timeout. */
> +	if (avail <= 0 && !is_rx)
> +		mlxbf_tmfifo_check_tx_timeout(vring);
>  }
>  
>  /* Handle Rx or Tx queues. */


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ