[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAMB2axNnzkqao1+md4qN5UvodzNQeNCywa6xbtbNGm4i1HR7LA@mail.gmail.com>
Date: Thu, 18 Sep 2025 10:50:48 -0700
From: Amery Hung <ameryhung@...il.com>
To: Maciej Fijalkowski <maciej.fijalkowski@...el.com>
Cc: bpf@...r.kernel.org, netdev@...r.kernel.org, alexei.starovoitov@...il.com,
andrii@...nel.org, daniel@...earbox.net, paul.chaignon@...il.com,
kuba@...nel.org, stfomichev@...il.com, martin.lau@...nel.org,
mohsin.bashr@...il.com, noren@...dia.com, dtatulea@...dia.com,
saeedm@...dia.com, tariqt@...dia.com, mbloch@...dia.com, kernel-team@...a.com
Subject: Re: [PATCH bpf-next v4 1/6] bpf: Allow bpf_xdp_shrink_data to shrink
a frag from head and tail
On Thu, Sep 18, 2025 at 1:52 AM Maciej Fijalkowski
<maciej.fijalkowski@...el.com> wrote:
>
> On Wed, Sep 17, 2025 at 03:55:08PM -0700, Amery Hung wrote:
> > Move skb_frag_t adjustment into bpf_xdp_shrink_data() and extend its
> > functionality to be able to shrink an xdp fragment from both head and
> > tail. In a later patch, bpf_xdp_pull_data() will reuse it to shrink an
> > xdp fragment from head.
> >
> > Additionally, in bpf_xdp_frags_shrink_tail(), breaking the loop when
> > bpf_xdp_shrink_data() returns false (i.e., not releasing the current
> > fragment) is not necessary as the loop condition, offset > 0, has the
> > same effect. Remove the else branch to simplify the code.
> >
> > Signed-off-by: Amery Hung <ameryhung@...il.com>
> > ---
> > include/net/xdp_sock_drv.h | 21 ++++++++++++++++++---
> > net/core/filter.c | 28 +++++++++++++++++-----------
> > 2 files changed, 35 insertions(+), 14 deletions(-)
> >
> > diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
> > index 513c8e9704f6..4f2d3268a676 100644
> > --- a/include/net/xdp_sock_drv.h
> > +++ b/include/net/xdp_sock_drv.h
> > @@ -160,13 +160,23 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
> > return ret;
> > }
> >
> > -static inline void xsk_buff_del_tail(struct xdp_buff *tail)
> > +static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
> > {
> > - struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
> > + struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
> >
> > list_del(&xskb->list_node);
> > }
> >
> > +static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
> > +{
> > + struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
> > + struct xdp_buff_xsk *frag;
> > +
> > + frag = list_first_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
> > + list_node);
> > + return &frag->xdp;
> > +}
> > +
> > static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
> > {
> > struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
> > @@ -389,8 +399,13 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
> > return NULL;
> > }
> >
> > -static inline void xsk_buff_del_tail(struct xdp_buff *tail)
> > +static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
> > +{
> > +}
> > +
> > +static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
> > {
> > + return NULL;
> > }
> >
> > static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 63f3baee2daf..0b82cb348ce0 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -4153,27 +4153,31 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
> > return 0;
> > }
> >
> > -static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
> > +static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink, bool tail,
> > enum xdp_mem_type mem_type, bool release)
> > {
> > - struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
> > + struct xdp_buff *zc_frag = tail ? xsk_buff_get_tail(xdp) :
> > + xsk_buff_get_head(xdp);
> >
> > if (release) {
> > - xsk_buff_del_tail(zc_frag);
> > + xsk_buff_del_frag(zc_frag);
> > __xdp_return(0, mem_type, false, zc_frag);
> > } else {
> > - zc_frag->data_end -= shrink;
> > + if (tail)
> > + zc_frag->data_end -= shrink;
> > + else
> > + zc_frag->data += shrink;
> > }
> > }
> >
> > static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
> > - int shrink)
> > + int shrink, bool tail)
> > {
> > enum xdp_mem_type mem_type = xdp->rxq->mem.type;
> > bool release = skb_frag_size(frag) == shrink;
> >
> > if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
> > - bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release);
> > + bpf_xdp_shrink_data_zc(xdp, shrink, tail, mem_type, release);
> > goto out;
> > }
> >
> > @@ -4181,6 +4185,12 @@ static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
> > __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
> >
> > out:
> > + if (!release) {
> > + if (!tail)
> > + skb_frag_off_add(frag, shrink);
> > + skb_frag_size_sub(frag, shrink);
> > + }
>
> Hi Amery,
>
> it feels a bit off to have separate conditions around @release. How about
> something below?
>
Agree that it feels weird. It bugged me a little but when working on this set...
Let me try again. I moved common things out so that
bpf_xdp_shrink_data_zc() just does adjustments specific to zerocopy.
Does it look better to you?
static struct xdp_buff *bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
bool tail, bool release)
{
struct xdp_buff *zc_frag = tail ? xsk_buff_get_tail(xdp) :
xsk_buff_get_head(xdp);
if (release) {
xsk_buff_del_frag(zc_frag);
} else {
if (tail)
zc_frag->data_end -= shrink;
else
zc_frag->data += shrink;
}
return zc_frag;
}
static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
int shrink, bool tail)
{
enum xdp_mem_type mem_type = xdp->rxq->mem.type;
bool release = skb_frag_size(frag) == shrink;
netmem_ref netmem = skb_frag_netmem(frag);
struct xdp_buff *zc_frag = NULL;
if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
netmem = 0;
zc_frag = bpf_xdp_shrink_data_zc(xdp, shrink, tail, release);
}
if (release) {
__xdp_return(netmem, mem_type, false, zc_frag);
} else {
if (!tail)
skb_frag_off_add(frag, shrink);
skb_frag_size_sub(frag, shrink);
}
return release;
}
>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 0b82cb348ce0..b1fca279c1de 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -4175,20 +4175,17 @@ static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
> {
> enum xdp_mem_type mem_type = xdp->rxq->mem.type;
> bool release = skb_frag_size(frag) == shrink;
> + bool zc = mem_type == MEM_TYPE_XSK_BUFF_POOL;
>
> - if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
> + if (zc)
> bpf_xdp_shrink_data_zc(xdp, shrink, tail, mem_type, release);
> - goto out;
> - }
> -
> - if (release)
> - __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
>
> -out:
> if (!release) {
> if (!tail)
> skb_frag_off_add(frag, shrink);
> skb_frag_size_sub(frag, shrink);
> + } else if (!zc) {
> + __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
> }
>
> return release;
>
> > +
> > return release;
> > }
> >
> > @@ -4198,12 +4208,8 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
> >
> > len_free += shrink;
> > offset -= shrink;
> > - if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
> > + if (bpf_xdp_shrink_data(xdp, frag, shrink, true))
> > n_frags_free++;
> > - } else {
> > - skb_frag_size_sub(frag, shrink);
> > - break;
> > - }
> > }
> > sinfo->nr_frags -= n_frags_free;
> > sinfo->xdp_frags_size -= len_free;
> > --
> > 2.47.3
> >
Powered by blists - more mailing lists