lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAMB2axNnzkqao1+md4qN5UvodzNQeNCywa6xbtbNGm4i1HR7LA@mail.gmail.com>
Date: Thu, 18 Sep 2025 10:50:48 -0700
From: Amery Hung <ameryhung@...il.com>
To: Maciej Fijalkowski <maciej.fijalkowski@...el.com>
Cc: bpf@...r.kernel.org, netdev@...r.kernel.org, alexei.starovoitov@...il.com, 
	andrii@...nel.org, daniel@...earbox.net, paul.chaignon@...il.com, 
	kuba@...nel.org, stfomichev@...il.com, martin.lau@...nel.org, 
	mohsin.bashr@...il.com, noren@...dia.com, dtatulea@...dia.com, 
	saeedm@...dia.com, tariqt@...dia.com, mbloch@...dia.com, kernel-team@...a.com
Subject: Re: [PATCH bpf-next v4 1/6] bpf: Allow bpf_xdp_shrink_data to shrink
 a frag from head and tail

On Thu, Sep 18, 2025 at 1:52 AM Maciej Fijalkowski
<maciej.fijalkowski@...el.com> wrote:
>
> On Wed, Sep 17, 2025 at 03:55:08PM -0700, Amery Hung wrote:
> > Move skb_frag_t adjustment into bpf_xdp_shrink_data() and extend its
> > functionality to be able to shrink an xdp fragment from both head and
> > tail. In a later patch, bpf_xdp_pull_data() will reuse it to shrink an
> > xdp fragment from head.
> >
> > Additionally, in bpf_xdp_frags_shrink_tail(), breaking the loop when
> > bpf_xdp_shrink_data() returns false (i.e., not releasing the current
> > fragment) is not necessary as the loop condition, offset > 0, has the
> > same effect. Remove the else branch to simplify the code.
> >
> > Signed-off-by: Amery Hung <ameryhung@...il.com>
> > ---
> >  include/net/xdp_sock_drv.h | 21 ++++++++++++++++++---
> >  net/core/filter.c          | 28 +++++++++++++++++-----------
> >  2 files changed, 35 insertions(+), 14 deletions(-)
> >
> > diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
> > index 513c8e9704f6..4f2d3268a676 100644
> > --- a/include/net/xdp_sock_drv.h
> > +++ b/include/net/xdp_sock_drv.h
> > @@ -160,13 +160,23 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
> >       return ret;
> >  }
> >
> > -static inline void xsk_buff_del_tail(struct xdp_buff *tail)
> > +static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
> >  {
> > -     struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);
> > +     struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
> >
> >       list_del(&xskb->list_node);
> >  }
> >
> > +static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
> > +{
> > +     struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
> > +     struct xdp_buff_xsk *frag;
> > +
> > +     frag = list_first_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
> > +                             list_node);
> > +     return &frag->xdp;
> > +}
> > +
> >  static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
> >  {
> >       struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp);
> > @@ -389,8 +399,13 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first)
> >       return NULL;
> >  }
> >
> > -static inline void xsk_buff_del_tail(struct xdp_buff *tail)
> > +static inline void xsk_buff_del_frag(struct xdp_buff *xdp)
> > +{
> > +}
> > +
> > +static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first)
> >  {
> > +     return NULL;
> >  }
> >
> >  static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 63f3baee2daf..0b82cb348ce0 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -4153,27 +4153,31 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
> >       return 0;
> >  }
> >
> > -static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
> > +static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink, bool tail,
> >                                  enum xdp_mem_type mem_type, bool release)
> >  {
> > -     struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
> > +     struct xdp_buff *zc_frag = tail ? xsk_buff_get_tail(xdp) :
> > +                                       xsk_buff_get_head(xdp);
> >
> >       if (release) {
> > -             xsk_buff_del_tail(zc_frag);
> > +             xsk_buff_del_frag(zc_frag);
> >               __xdp_return(0, mem_type, false, zc_frag);
> >       } else {
> > -             zc_frag->data_end -= shrink;
> > +             if (tail)
> > +                     zc_frag->data_end -= shrink;
> > +             else
> > +                     zc_frag->data += shrink;
> >       }
> >  }
> >
> >  static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
> > -                             int shrink)
> > +                             int shrink, bool tail)
> >  {
> >       enum xdp_mem_type mem_type = xdp->rxq->mem.type;
> >       bool release = skb_frag_size(frag) == shrink;
> >
> >       if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
> > -             bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release);
> > +             bpf_xdp_shrink_data_zc(xdp, shrink, tail, mem_type, release);
> >               goto out;
> >       }
> >
> > @@ -4181,6 +4185,12 @@ static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
> >               __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
> >
> >  out:
> > +     if (!release) {
> > +             if (!tail)
> > +                     skb_frag_off_add(frag, shrink);
> > +             skb_frag_size_sub(frag, shrink);
> > +     }
>
> Hi Amery,
>
> it feels a bit off to have separate conditions around @release. How about
> something below?
>

Agree that it feels weird. It bugged me a little but when working on this set...

Let me try again. I moved common things out so that
bpf_xdp_shrink_data_zc() just does adjustments specific to zerocopy.
Does it look better to you?


static struct xdp_buff *bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
                                               bool tail, bool release)
{
        struct xdp_buff *zc_frag = tail ? xsk_buff_get_tail(xdp) :
                                          xsk_buff_get_head(xdp);

        if (release) {
                xsk_buff_del_frag(zc_frag);
        } else {
                if (tail)
                        zc_frag->data_end -= shrink;
                else
                        zc_frag->data += shrink;
        }

        return zc_frag;
}

static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
                                int shrink, bool tail)
{
        enum xdp_mem_type mem_type = xdp->rxq->mem.type;
        bool release = skb_frag_size(frag) == shrink;
        netmem_ref netmem = skb_frag_netmem(frag);
        struct xdp_buff *zc_frag = NULL;

        if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
                netmem = 0;
                zc_frag = bpf_xdp_shrink_data_zc(xdp, shrink, tail, release);
        }

        if (release) {
                __xdp_return(netmem, mem_type, false, zc_frag);
        } else {
                if (!tail)
                        skb_frag_off_add(frag, shrink);
                skb_frag_size_sub(frag, shrink);
        }

        return release;
}

>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 0b82cb348ce0..b1fca279c1de 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -4175,20 +4175,17 @@ static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
>  {
>         enum xdp_mem_type mem_type = xdp->rxq->mem.type;
>         bool release = skb_frag_size(frag) == shrink;
> +       bool zc = mem_type == MEM_TYPE_XSK_BUFF_POOL;
>
> -       if (mem_type == MEM_TYPE_XSK_BUFF_POOL) {
> +       if (zc)
>                 bpf_xdp_shrink_data_zc(xdp, shrink, tail, mem_type, release);
> -               goto out;
> -       }
> -
> -       if (release)
> -               __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
>
> -out:
>         if (!release) {
>                 if (!tail)
>                         skb_frag_off_add(frag, shrink);
>                 skb_frag_size_sub(frag, shrink);
> +       } else if (!zc) {
> +               __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL);
>         }
>
>         return release;
>
> > +
> >       return release;
> >  }
> >
> > @@ -4198,12 +4208,8 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
> >
> >               len_free += shrink;
> >               offset -= shrink;
> > -             if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
> > +             if (bpf_xdp_shrink_data(xdp, frag, shrink, true))
> >                       n_frags_free++;
> > -             } else {
> > -                     skb_frag_size_sub(frag, shrink);
> > -                     break;
> > -             }
> >       }
> >       sinfo->nr_frags -= n_frags_free;
> >       sinfo->xdp_frags_size -= len_free;
> > --
> > 2.47.3
> >

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ