netdev - Re: [PATCH net-next v7 4/5] eea: create/destroy rx,tx queues for netdevice open and stop

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1b1697fe-2393-4959-b29e-59fb30e5ed49@redhat.com>
Date: Tue, 21 Oct 2025 10:28:49 +0200
From: Paolo Abeni <pabeni@...hat.com>
To: Xuan Zhuo <xuanzhuo@...ux.alibaba.com>, netdev@...r.kernel.org
Cc: Andrew Lunn <andrew+netdev@...n.ch>, "David S. Miller"
 <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
 Jakub Kicinski <kuba@...nel.org>, Wen Gu <guwen@...ux.alibaba.com>,
 Philo Lu <lulie@...ux.alibaba.com>, Lorenzo Bianconi <lorenzo@...nel.org>,
 Vadim Fedorenko <vadim.fedorenko@...ux.dev>,
 Lukas Bulwahn <lukas.bulwahn@...hat.com>,
 Geert Uytterhoeven <geert+renesas@...der.be>,
 Vivian Wang <wangruikang@...as.ac.cn>,
 Troy Mitchell <troy.mitchell@...ux.spacemit.com>,
 Dust Li <dust.li@...ux.alibaba.com>
Subject: Re: [PATCH net-next v7 4/5] eea: create/destroy rx,tx queues for
 netdevice open and stop

On 10/16/25 1:06 PM, Xuan Zhuo wrote:
> +/* resources: ring, buffers, irq */
> +int eea_reset_hw_resources(struct eea_net *enet, struct eea_net_tmp *tmp)
> +{
> +	struct eea_net_tmp _tmp;
> +	int err;
> +
> +	if (!tmp) {
> +		enet_init_cfg(enet, &_tmp);
> +		tmp = &_tmp;

As suggested on v5, you should:

		enet_init_cfg(enet, &status);
		eea_reset_hw_resources(enet, &status);

in the caller currently using a NULL argument.

> +	}
> +
> +	if (!netif_running(enet->netdev)) {
> +		enet->cfg = tmp->cfg;
> +		return 0;
> +	}
> +
> +	err = eea_alloc_rxtx_q_mem(tmp);
> +	if (err) {
> +		netdev_warn(enet->netdev,
> +			    "eea reset: alloc q failed. stop reset. err %d\n",
> +			    err);
> +		return err;
> +	}
> +
> +	eea_netdev_stop(enet->netdev);
> +
> +	enet_bind_new_q_and_cfg(enet, tmp);
> +
> +	err = eea_active_ring_and_irq(enet);
> +	if (err) {
> +		netdev_warn(enet->netdev,
> +			    "eea reset: active new ring and irq failed. err %d\n",
> +			    err);
> +		return err;
> +	}
> +
> +	err = eea_start_rxtx(enet->netdev);
> +	if (err)
> +		netdev_warn(enet->netdev,
> +			    "eea reset: start queue failed. err %d\n", err);

Following-up on v5 discussion, I see this function is used to handle
scenario where the entire setup fails, but it's also used in the next
patch to do set_ring/set_channel operations. The latter should leave the
device in a working state even when the requested change is not
possible, so this function should need gracefully failures at least on
such invocations.

[...]
> +/* ha handle code */
> +static void eea_ha_handle_work(struct work_struct *work)
> +{
> +	struct eea_pci_device *ep_dev;
> +	struct eea_device *edev;
> +	struct pci_dev *pci_dev;
> +	u16 reset;
> +
> +	ep_dev = container_of(work, struct eea_pci_device, ha_handle_work);
> +	edev = &ep_dev->edev;
> +
> +	/* Ha interrupt is triggered, so there maybe some error, we may need to
> +	 * reset the device or reset some queues.
> +	 */
> +	dev_warn(&ep_dev->pci_dev->dev, "recv ha interrupt.\n");
> +
> +	if (ep_dev->reset_pos) {
> +		pci_read_config_word(ep_dev->pci_dev, ep_dev->reset_pos,
> +				     &reset);
> +		/* clear bit */
> +		pci_write_config_word(ep_dev->pci_dev, ep_dev->reset_pos,
> +				      0xFFFF);
> +
> +		if (reset & EEA_PCI_CAP_RESET_FLAG) {
> +			dev_warn(&ep_dev->pci_dev->dev,
> +				 "recv device reset request.\n");
> +
> +			pci_dev = ep_dev->pci_dev;
> +
> +			/* The pci remove callback may hold this lock. If the
> +			 * pci remove callback is called, then we can ignore the
> +			 * ha interrupt.
> +			 */
> +			if (mutex_trylock(&edev->ha_lock)) {
> +				edev->ha_reset = true;
> +
> +				__eea_pci_remove(pci_dev, false);
> +				__eea_pci_probe(pci_dev, ep_dev);
> +
> +				edev->ha_reset = false;
> +				mutex_unlock(&edev->ha_lock);
> +			} else {
> +				dev_warn(&ep_dev->pci_dev->dev,
> +					 "ha device reset: trylock failed.\n");
> +			}
> +			return;

Nesting here is quite high, possibly move the above in a separate helper.

> @@ -45,9 +52,17 @@ u16 eea_pci_dev_id(struct eea_device *edev);
>  
>  int eea_device_reset(struct eea_device *dev);
>  void eea_device_ready(struct eea_device *dev);
> +

Minor nit: either do not introduce this whitespace, or add it together
with the surronding chunk of code

[...]> +static void meta_align_offset(struct eea_net_rx *rx, struct
eea_rx_meta *meta)
> +{
> +	int h, b;
> +
> +	h = rx->headroom;
> +	b = meta->offset + h;
> +
> +	b = ALIGN(b, 128);

Out of sheer curiosity, why the above align? Possibly a comment and a
macro instead of a magic number would be useful.

> +static int eea_alloc_rx_hdr(struct eea_net_tmp *tmp, struct eea_net_rx *rx)
> +{
> +	struct page *hdr_page = NULL;
> +	struct eea_rx_meta *meta;
> +	u32 offset = 0, hdrsize;
> +	struct device *dmadev;
> +	dma_addr_t dma;
> +	int i;
> +
> +	dmadev = tmp->edev->dma_dev;
> +	hdrsize = tmp->cfg.split_hdr;
> +
> +	for (i = 0; i < tmp->cfg.rx_ring_depth; ++i) {
> +		meta = &rx->meta[i];
> +
> +		if (!hdr_page || offset + hdrsize > PAGE_SIZE) {
> +			hdr_page = dev_alloc_page();
> +			if (!hdr_page)
> +				return -ENOMEM;

Why you are not using the page pool for the headers?

> +
> +			dma = dma_map_page(dmadev, hdr_page, 0, PAGE_SIZE,
> +					   DMA_FROM_DEVICE);
> +
> +			if (unlikely(dma_mapping_error(dmadev, dma))) {
> +				put_page(hdr_page);
> +				return -ENOMEM;
> +			}
> +
> +			offset = 0;
> +			meta->hdr_page = hdr_page;
> +			meta->dma = dma;
> +		}
> +
> +		meta->hdr_dma = dma + offset;
> +		meta->hdr_addr = page_address(hdr_page) + offset;
> +		offset += hdrsize;
> +	}
> +
> +	return 0;
> +}
> +
> +static void eea_rx_meta_dma_sync_for_cpu(struct eea_net_rx *rx,
> +					 struct eea_rx_meta *meta, u32 len)
> +{
> +	dma_sync_single_for_cpu(rx->enet->edev->dma_dev,
> +				meta->dma + meta->offset + meta->headroom,
> +				len, DMA_FROM_DEVICE);
> +}
> +
> +static int eea_harden_check_overflow(struct eea_rx_ctx *ctx,
> +				     struct eea_net *enet)
> +{
> +	if (unlikely(ctx->len > ctx->meta->truesize - ctx->meta->room)) {

Give the above, it looks like the hypervisor could corrupt the guest
driver memory. If so, are any defensive, related, checks in the guests
really effective?

> +		pr_debug("%s: rx error: len %u exceeds truesize %u\n",
> +			 enet->netdev->name, ctx->len,
> +			 ctx->meta->truesize - ctx->meta->room);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}



> +static bool eea_rx_post(struct eea_net *enet,
> +			struct eea_net_rx *rx, gfp_t gfp)

It looks like this function is always called with gfp == GFP_ATOMIC. If
so, just drop the argument.

[...]> +static int eea_tx_post_skb(struct eea_net_tx *tx, struct sk_buff
*skb)
> +{
> +	const struct skb_shared_info *shinfo = skb_shinfo(skb);
> +	u32 hlen = skb_headlen(skb);
> +	struct eea_tx_meta *meta;
> +	dma_addr_t addr;
> +	int i, err;
> +	u16 flags;
> +
> +	addr = dma_map_single(tx->dma_dev, skb->data, hlen, DMA_TO_DEVICE);
> +	if (unlikely(dma_mapping_error(tx->dma_dev, addr)))
> +		return -ENOMEM;
> +
> +	flags = skb->ip_summed == CHECKSUM_PARTIAL ? EEA_DESC_F_DO_CSUM : 0;
> +
> +	meta = eea_tx_desc_fill(tx, addr, hlen, !shinfo->nr_frags, skb, flags);
> +
> +	if (eea_fill_desc_from_skb(skb, tx->ering, meta->desc))
> +		goto err;
> +
> +	for (i = 0; i < shinfo->nr_frags; i++) {
> +		const skb_frag_t *frag = &shinfo->frags[i];
> +		bool is_last = i == (shinfo->nr_frags - 1);
> +
> +		err = eea_tx_add_skb_frag(tx, meta, frag, is_last);
> +		if (err)
> +			goto err;
> +	}
> +
> +	meta->num = shinfo->nr_frags + 1;

It looks like there is no memory barrier after filling the descriptor
and before commiting it. Whoever is processing this data could possibly
observe inconsistent/corrupted descriptors.

/P