[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aMvIONMZ9CFqyNnM@boxer>
Date: Thu, 18 Sep 2025 10:52:08 +0200
From: Maciej Fijalkowski <maciej.fijalkowski@...el.com>
To: Amery Hung <ameryhung@...il.com>
CC: <bpf@...r.kernel.org>, <netdev@...r.kernel.org>,
<alexei.starovoitov@...il.com>, <andrii@...nel.org>, <daniel@...earbox.net>,
<paul.chaignon@...il.com>, <kuba@...nel.org>, <stfomichev@...il.com>,
<martin.lau@...nel.org>, <mohsin.bashr@...il.com>, <noren@...dia.com>,
<dtatulea@...dia.com>, <saeedm@...dia.com>, <tariqt@...dia.com>,
<mbloch@...dia.com>, <kernel-team@...a.com>
Subject: Re: [PATCH bpf-next v4 1/6] bpf: Allow bpf_xdp_shrink_data to shrink
a frag from head and tail
On Wed, Sep 17, 2025 at 03:55:08PM -0700, Amery Hung wrote:
> Move skb_frag_t adjustment into bpf_xdp_shrink_data() and extend its
> functionality to be able to shrink an xdp fragment from both head and
> tail. In a later patch, bpf_xdp_pull_data() will reuse it to shrink an
> xdp fragment from head.
>
> Additionally, in bpf_xdp_frags_shrink_tail(), breaking the loop when
> bpf_xdp_shrink_data() returns false (i.e., not releasing the current
> fragment) is not necessary as the loop condition, offset > 0, has the
> same effect. Remove the else branch to simplify the code.
>
> Signed-off-by: Amery Hung <ameryhung@...il.com>
> ---
> include/net/xdp_sock_drv.h | 21 ++++++++++++++++++---
> net/core/filter.c | 28 +++++++++++++++++-----------
> 2 files changed, 35 insertions(+), 14 deletions(-)
>
> diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
> index 513c8e9704f6..4f2d3268a676 100644
> --- a/include/net/xdp_sock_drv.h
> +++ b/include/net/xdp_sock_drv.h
> @@ -160,13 +160,23 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
> return ret;
> }
>
> -static inline void xsk_buff_del_tail(struct xdp_buff *tail)
> +static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
> {
> - struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
> + struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
>
> list_del(&xskb->list_node);
> }
>
> +static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
> +{
> + struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
> + struct xdp_buff_xsk *frag;
> +
> + frag = list_first_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
> + list_node);
> + return &frag->xdp;
> +}
> +
> static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
> {
> struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
> @@ -389,8 +399,13 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
> return NULL;
> }
>
> -static inline void xsk_buff_del_tail(struct xdp_buff *tail)
> +static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
> +{
> +}
> +
> +static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
> {
> + return NULL;
> }
>
> static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 63f3baee2daf..0b82cb348ce0 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -4153,27 +4153,31 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
> return 0;
> }
>
> -static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
> +static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink, bool tail,
> enum xdp_mem_type mem_type, bool release)
> {
> - struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
> + struct xdp_buff *zc_frag = tail ? xsk_buff_get_tail(xdp) :
> + xsk_buff_get_head(xdp);
>
> if (release) {
> - xsk_buff_del_tail(zc_frag);
> + xsk_buff_del_frag(zc_frag);
> __xdp_return(0, mem_type, false, zc_frag);
> } else {
> - zc_frag->data_end -= shrink;
> + if (tail)
> + zc_frag->data_end -= shrink;
> + else
> + zc_frag->data += shrink;
> }
> }
>
> static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
> - int shrink)
> + int shrink, bool tail)
> {
> enum xdp_mem_type mem_type = xdp->rxq->mem.type;
> bool release = skb_frag_size(frag) == shrink;
>
> if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
> - bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release);
> + bpf_xdp_shrink_data_zc(xdp, shrink, tail, mem_type, release);
> goto out;
> }
>
> @@ -4181,6 +4185,12 @@ static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
> __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
>
> out:
> + if (!release) {
> + if (!tail)
> + skb_frag_off_add(frag, shrink);
> + skb_frag_size_sub(frag, shrink);
> + }
Hi Amery,
it feels a bit off to have separate conditions around @release. How about
something below?
diff --git a/net/core/filter.c b/net/core/filter.c
index 0b82cb348ce0..b1fca279c1de 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4175,20 +4175,17 @@ static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
{
enum xdp_mem_type mem_type = xdp->rxq->mem.type;
bool release = skb_frag_size(frag) == shrink;
+ bool zc = mem_type == MEM_TYPE_XSK_BUFF_POOL;
- if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
+ if (zc)
bpf_xdp_shrink_data_zc(xdp, shrink, tail, mem_type, release);
- goto out;
- }
-
- if (release)
- __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
-out:
if (!release) {
if (!tail)
skb_frag_off_add(frag, shrink);
skb_frag_size_sub(frag, shrink);
+ } else if (!zc) {
+ __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
}
return release;
> +
> return release;
> }
>
> @@ -4198,12 +4208,8 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
>
> len_free += shrink;
> offset -= shrink;
> - if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
> + if (bpf_xdp_shrink_data(xdp, frag, shrink, true))
> n_frags_free++;
> - } else {
> - skb_frag_size_sub(frag, shrink);
> - break;
> - }
> }
> sinfo->nr_frags -= n_frags_free;
> sinfo->xdp_frags_size -= len_free;
> --
> 2.47.3
>
Powered by blists - more mailing lists