[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c02c0369-ece0-4437-aa56-e8e36d945a23@suse.de>
Date: Tue, 28 Oct 2025 17:34:46 +0100
From: Fernando Fernandez Mancera <fmancera@...e.de>
To: bpf@...r.kernel.org
Cc: netdev@...r.kernel.org, magnus.karlsson@...el.com,
maciej.fijalkowski@...el.com, sdf@...ichev.me, kerneljasonxing@...il.com,
fw@...len.de
Subject: Re: [PATCH 2/2 bpf] xsk: avoid data corruption on cq descriptor
number
On 10/28/25 5:02 PM, Fernando Fernandez Mancera wrote:
> Since commit 30f241fcf52a ("xsk: Fix immature cq descriptor
> production"), the descriptor number is stored in skb control block and
> xsk_cq_submit_addr_locked() relies on it to put the umem addrs onto
> pool's completion queue.
>[...]
>
> len = desc->len;
> @@ -804,6 +823,11 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
> if (unlikely(err))
> goto free_err;
>
> + if (!skb_ext_add(skb, SKB_EXT_XDP)) {
> + err = -ENOMEM;
> + goto free_err;
> + }
> +
This is a leftover. Without it, the logic simplified and indeed the
performance for non-fragmented traffic is not affected at all.
While reviewing this, consider this line is dropped. I will send a V2 in
24 hours anyway as this is indeed introducing a buggy behavior.
> xsk_skb_init_misc(skb, xs, desc->addr);
> if (desc->options & XDP_TX_METADATA) {
> err = xsk_skb_metadata(skb, buffer, desc,
> @@ -814,6 +838,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
> } else {
> int nr_frags = skb_shinfo(skb)->nr_frags;
> struct xsk_addr_node *xsk_addr;
> + struct xdp_skb_ext *ext;
> struct page *page;
> u8 *vaddr;
>
> @@ -828,6 +853,22 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
> goto free_err;
> }
>
> + ext = skb_ext_find(skb, SKB_EXT_XDP);
> + if (!ext) {
> + ext = skb_ext_add(skb, SKB_EXT_XDP);
> + if (!ext) {
> + __free_page(page);
> + err = -ENOMEM;
> + goto free_err;
> + }
> + memset(ext, 0, sizeof(*ext));
> + INIT_LIST_HEAD(&ext->addrs_list);
> + ext->num_descs = 1;
> + } else if (ext->num_descs == 0) {
> + INIT_LIST_HEAD(&ext->addrs_list);
> + ext->num_descs = 1;
> + }
> +
> xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL);
> if (!xsk_addr) {
> __free_page(page);
> @@ -843,12 +884,11 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
> refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);
>
> xsk_addr->addr = desc->addr;
> - list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list);
> + list_add_tail(&xsk_addr->addr_node, &ext->addrs_list);
> + xsk_inc_num_desc(skb);
> }
> }
>
> - xsk_inc_num_desc(skb);
> -
> return skb;
>
> free_err:
> @@ -857,7 +897,6 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
>
> if (err == -EOVERFLOW) {
> /* Drop the packet */
> - xsk_inc_num_desc(xs->skb);
> xsk_drop_skb(xs->skb);
> xskq_cons_release(xs->tx);
> } else {
Powered by blists - more mailing lists