[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <1b1697fe-2393-4959-b29e-59fb30e5ed49@redhat.com>
Date: Tue, 21 Oct 2025 10:28:49 +0200
From: Paolo Abeni <pabeni@...hat.com>
To: Xuan Zhuo <xuanzhuo@...ux.alibaba.com>, netdev@...r.kernel.org
Cc: Andrew Lunn <andrew+netdev@...n.ch>, "David S. Miller"
<davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Wen Gu <guwen@...ux.alibaba.com>,
Philo Lu <lulie@...ux.alibaba.com>, Lorenzo Bianconi <lorenzo@...nel.org>,
Vadim Fedorenko <vadim.fedorenko@...ux.dev>,
Lukas Bulwahn <lukas.bulwahn@...hat.com>,
Geert Uytterhoeven <geert+renesas@...der.be>,
Vivian Wang <wangruikang@...as.ac.cn>,
Troy Mitchell <troy.mitchell@...ux.spacemit.com>,
Dust Li <dust.li@...ux.alibaba.com>
Subject: Re: [PATCH net-next v7 4/5] eea: create/destroy rx,tx queues for
netdevice open and stop
On 10/16/25 1:06 PM, Xuan Zhuo wrote:
> +/* resources: ring, buffers, irq */
> +int eea_reset_hw_resources(struct eea_net *enet, struct eea_net_tmp *tmp)
> +{
> + struct eea_net_tmp _tmp;
> + int err;
> +
> + if (!tmp) {
> + enet_init_cfg(enet, &_tmp);
> + tmp = &_tmp;
As suggested on v5, you should:
enet_init_cfg(enet, &status);
eea_reset_hw_resources(enet, &status);
in the caller currently using a NULL argument.
> + }
> +
> + if (!netif_running(enet->netdev)) {
> + enet->cfg = tmp->cfg;
> + return 0;
> + }
> +
> + err = eea_alloc_rxtx_q_mem(tmp);
> + if (err) {
> + netdev_warn(enet->netdev,
> + "eea reset: alloc q failed. stop reset. err %d\n",
> + err);
> + return err;
> + }
> +
> + eea_netdev_stop(enet->netdev);
> +
> + enet_bind_new_q_and_cfg(enet, tmp);
> +
> + err = eea_active_ring_and_irq(enet);
> + if (err) {
> + netdev_warn(enet->netdev,
> + "eea reset: active new ring and irq failed. err %d\n",
> + err);
> + return err;
> + }
> +
> + err = eea_start_rxtx(enet->netdev);
> + if (err)
> + netdev_warn(enet->netdev,
> + "eea reset: start queue failed. err %d\n", err);
Following-up on v5 discussion, I see this function is used to handle
scenario where the entire setup fails, but it's also used in the next
patch to do set_ring/set_channel operations. The latter should leave the
device in a working state even when the requested change is not
possible, so this function should need gracefully failures at least on
such invocations.
[...]
> +/* ha handle code */
> +static void eea_ha_handle_work(struct work_struct *work)
> +{
> + struct eea_pci_device *ep_dev;
> + struct eea_device *edev;
> + struct pci_dev *pci_dev;
> + u16 reset;
> +
> + ep_dev = container_of(work, struct eea_pci_device, ha_handle_work);
> + edev = &ep_dev->edev;
> +
> + /* Ha interrupt is triggered, so there maybe some error, we may need to
> + * reset the device or reset some queues.
> + */
> + dev_warn(&ep_dev->pci_dev->dev, "recv ha interrupt.\n");
> +
> + if (ep_dev->reset_pos) {
> + pci_read_config_word(ep_dev->pci_dev, ep_dev->reset_pos,
> + &reset);
> + /* clear bit */
> + pci_write_config_word(ep_dev->pci_dev, ep_dev->reset_pos,
> + 0xFFFF);
> +
> + if (reset & EEA_PCI_CAP_RESET_FLAG) {
> + dev_warn(&ep_dev->pci_dev->dev,
> + "recv device reset request.\n");
> +
> + pci_dev = ep_dev->pci_dev;
> +
> + /* The pci remove callback may hold this lock. If the
> + * pci remove callback is called, then we can ignore the
> + * ha interrupt.
> + */
> + if (mutex_trylock(&edev->ha_lock)) {
> + edev->ha_reset = true;
> +
> + __eea_pci_remove(pci_dev, false);
> + __eea_pci_probe(pci_dev, ep_dev);
> +
> + edev->ha_reset = false;
> + mutex_unlock(&edev->ha_lock);
> + } else {
> + dev_warn(&ep_dev->pci_dev->dev,
> + "ha device reset: trylock failed.\n");
> + }
> + return;
Nesting here is quite high, possibly move the above in a separate helper.
> @@ -45,9 +52,17 @@ u16 eea_pci_dev_id(struct eea_device *edev);
>
> int eea_device_reset(struct eea_device *dev);
> void eea_device_ready(struct eea_device *dev);
> +
Minor nit: either do not introduce this whitespace, or add it together
with the surronding chunk of code
[...]> +static void meta_align_offset(struct eea_net_rx *rx, struct
eea_rx_meta *meta)
> +{
> + int h, b;
> +
> + h = rx->headroom;
> + b = meta->offset + h;
> +
> + b = ALIGN(b, 128);
Out of sheer curiosity, why the above align? Possibly a comment and a
macro instead of a magic number would be useful.
> +static int eea_alloc_rx_hdr(struct eea_net_tmp *tmp, struct eea_net_rx *rx)
> +{
> + struct page *hdr_page = NULL;
> + struct eea_rx_meta *meta;
> + u32 offset = 0, hdrsize;
> + struct device *dmadev;
> + dma_addr_t dma;
> + int i;
> +
> + dmadev = tmp->edev->dma_dev;
> + hdrsize = tmp->cfg.split_hdr;
> +
> + for (i = 0; i < tmp->cfg.rx_ring_depth; ++i) {
> + meta = &rx->meta[i];
> +
> + if (!hdr_page || offset + hdrsize > PAGE_SIZE) {
> + hdr_page = dev_alloc_page();
> + if (!hdr_page)
> + return -ENOMEM;
Why you are not using the page pool for the headers?
> +
> + dma = dma_map_page(dmadev, hdr_page, 0, PAGE_SIZE,
> + DMA_FROM_DEVICE);
> +
> + if (unlikely(dma_mapping_error(dmadev, dma))) {
> + put_page(hdr_page);
> + return -ENOMEM;
> + }
> +
> + offset = 0;
> + meta->hdr_page = hdr_page;
> + meta->dma = dma;
> + }
> +
> + meta->hdr_dma = dma + offset;
> + meta->hdr_addr = page_address(hdr_page) + offset;
> + offset += hdrsize;
> + }
> +
> + return 0;
> +}
> +
> +static void eea_rx_meta_dma_sync_for_cpu(struct eea_net_rx *rx,
> + struct eea_rx_meta *meta, u32 len)
> +{
> + dma_sync_single_for_cpu(rx->enet->edev->dma_dev,
> + meta->dma + meta->offset + meta->headroom,
> + len, DMA_FROM_DEVICE);
> +}
> +
> +static int eea_harden_check_overflow(struct eea_rx_ctx *ctx,
> + struct eea_net *enet)
> +{
> + if (unlikely(ctx->len > ctx->meta->truesize - ctx->meta->room)) {
Give the above, it looks like the hypervisor could corrupt the guest
driver memory. If so, are any defensive, related, checks in the guests
really effective?
> + pr_debug("%s: rx error: len %u exceeds truesize %u\n",
> + enet->netdev->name, ctx->len,
> + ctx->meta->truesize - ctx->meta->room);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +static bool eea_rx_post(struct eea_net *enet,
> + struct eea_net_rx *rx, gfp_t gfp)
It looks like this function is always called with gfp == GFP_ATOMIC. If
so, just drop the argument.
[...]> +static int eea_tx_post_skb(struct eea_net_tx *tx, struct sk_buff
*skb)
> +{
> + const struct skb_shared_info *shinfo = skb_shinfo(skb);
> + u32 hlen = skb_headlen(skb);
> + struct eea_tx_meta *meta;
> + dma_addr_t addr;
> + int i, err;
> + u16 flags;
> +
> + addr = dma_map_single(tx->dma_dev, skb->data, hlen, DMA_TO_DEVICE);
> + if (unlikely(dma_mapping_error(tx->dma_dev, addr)))
> + return -ENOMEM;
> +
> + flags = skb->ip_summed == CHECKSUM_PARTIAL ? EEA_DESC_F_DO_CSUM : 0;
> +
> + meta = eea_tx_desc_fill(tx, addr, hlen, !shinfo->nr_frags, skb, flags);
> +
> + if (eea_fill_desc_from_skb(skb, tx->ering, meta->desc))
> + goto err;
> +
> + for (i = 0; i < shinfo->nr_frags; i++) {
> + const skb_frag_t *frag = &shinfo->frags[i];
> + bool is_last = i == (shinfo->nr_frags - 1);
> +
> + err = eea_tx_add_skb_frag(tx, meta, frag, is_last);
> + if (err)
> + goto err;
> + }
> +
> + meta->num = shinfo->nr_frags + 1;
It looks like there is no memory barrier after filling the descriptor
and before commiting it. Whoever is processing this data could possibly
observe inconsistent/corrupted descriptors.
/P
Powered by blists - more mailing lists