lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAL+tcoB8d3jvD50oa3p5eZT+qLAXFXtuEQ9TvRr3jHzkxeXtSg@mail.gmail.com>
Date: Fri, 5 Sep 2025 08:09:43 +0800
From: Jason Xing <kerneljasonxing@...il.com>
To: Xin Zhao <jackzxcui1989@....com>
Cc: willemdebruijn.kernel@...il.com, edumazet@...gle.com, ferenc@...es.dev, 
	davem@...emloft.net, kuba@...nel.org, pabeni@...hat.com, horms@...nel.org, 
	netdev@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH net-next v10 2/2] net: af_packet: Use hrtimer to do the
 retire operation

On Thu, Sep 4, 2025 at 11:00 PM Xin Zhao <jackzxcui1989@....com> wrote:
>
> On Thu, Sep 4, 2025 at 10:50 +0800 Jason Xing <kerneljasonxing@...il.com> wrote:
>
> > > In the description of [PATCH net-next v10 0/2] net: af_packet: optimize retire operation:
> > >
> > > Changes in v7:
> > >   When the callback return, without sk_buff_head lock protection, __run_hrtimer will
> > >   enqueue the timer if return HRTIMER_RESTART. Setting the hrtimer expires while
> > >   enqueuing a timer may cause chaos in the hrtimer red-black tree.
> > >
> > > Neither hrtimer_set_expires nor hrtimer_forward_now is allowed when the hrtimer has
> > > already been enqueued. Therefore, the only place where the hrtimer timeout can be set is
> > > within the callback, at which point the hrtimer is in a non-enqueued state and can have
> > > its timeout set.
> >
> > Can we use hrtimer_is_queued() instead? Please see tcp_pacing_check()
> > as an example. But considering your following explanation, I think
> > it's okay now.
>
> Okay, let's keep the current logic as it is.

In case I didn't clearly state it, you don't need to change the
overall logic but only add back one missing point as I replied in the
last email? That is lookup path needing to refresh/update the timer.

>
>
>
> > > /* kbdq - kernel block descriptor queue */
> > > struct tpacket_kbdq_core {
> > >         struct pgv      *pkbdq;
> > >         unsigned int    feature_req_word;
> > >         unsigned int    hdrlen;
> > >         unsigned short  kactive_blk_num;
> > >         unsigned short  blk_sizeof_priv;
> > >         unsigned char   reset_pending_on_curr_blk;
> > >
> > >         char            *pkblk_start;
> > >         char            *pkblk_end;
> > >         int             kblk_size;
> > >         unsigned int    max_frame_len;
> > >         unsigned int    knum_blocks;
> > >         char            *prev;
> > >         char            *nxt_offset;
> > >
> > >         unsigned short  version;
> > >
> > >         uint64_t        knxt_seq_num;
> > >         struct sk_buff  *skb;
> > >
> > >         rwlock_t        blk_fill_in_prog_lock;
> > >
> > >         /* timer to retire an outstanding block */
> > >         struct hrtimer  retire_blk_timer;
> > >
> > >         /* Default is set to 8ms */
> > > #define DEFAULT_PRB_RETIRE_TOV  (8)
> > >
> > >         ktime_t         interval_ktime;
> > > };
> >
> > Could you share the result after running 'pahole --hex -C
> > tpacket_kbdq_core vmlinux'?
>
> I change the struct tpacket_kbdq_core as following:
>
> /* kbdq - kernel block descriptor queue */
> struct tpacket_kbdq_core {
>         struct pgv      *pkbdq;
>         unsigned int    feature_req_word;
>         unsigned int    hdrlen;
>         unsigned char   reset_pending_on_curr_blk;
>         unsigned short  kactive_blk_num;
>         unsigned short  blk_sizeof_priv;
>
>         unsigned short  version;
>
>         char            *pkblk_start;
>         char            *pkblk_end;
>         int             kblk_size;
>         unsigned int    max_frame_len;
>         unsigned int    knum_blocks;
>         char            *prev;
>         char            *nxt_offset;
>
>         uint64_t        knxt_seq_num;
>         struct sk_buff  *skb;
>
>         rwlock_t        blk_fill_in_prog_lock;
>
>         /* timer to retire an outstanding block */
>         struct hrtimer  retire_blk_timer;
>
>         /* Default is set to 8ms */
> #define DEFAULT_PRB_RETIRE_TOV  (8)
>
>         ktime_t         interval_ktime;
> };
>
>
> pahole --hex -C tpacket_kbdq_core vmlinux
>
> running results:
>
> struct tpacket_kbdq_core {
>         struct pgv *               pkbdq;                /*     0   0x8 */
>         unsigned int               feature_req_word;     /*   0x8   0x4 */
>         unsigned int               hdrlen;               /*   0xc   0x4 */
>         unsigned char              reset_pending_on_curr_blk; /*  0x10   0x1 */
>
>         /* XXX 1 byte hole, try to pack */
>
>         short unsigned int         kactive_blk_num;      /*  0x12   0x2 */
>         short unsigned int         blk_sizeof_priv;      /*  0x14   0x2 */
>         short unsigned int         version;              /*  0x16   0x2 */
>         char *                     pkblk_start;          /*  0x18   0x8 */
>         char *                     pkblk_end;            /*  0x20   0x8 */
>         int                        kblk_size;            /*  0x28   0x4 */
>         unsigned int               max_frame_len;        /*  0x2c   0x4 */
>         unsigned int               knum_blocks;          /*  0x30   0x4 */
>
>         /* XXX 4 bytes hole, try to pack */
>
>         char *                     prev;                 /*  0x38   0x8 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         char *                     nxt_offset;           /*  0x40   0x8 */
>         uint64_t                   knxt_seq_num;         /*  0x48   0x8 */
>         struct sk_buff *           skb;                  /*  0x50   0x8 */
>         rwlock_t                   blk_fill_in_prog_lock; /*  0x58  0x30 */
>         /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
>         struct hrtimer             retire_blk_timer __attribute__((__aligned__(8))); /*  0x88  0x40 */
>
>         /* XXX last struct has 4 bytes of padding */
>
>         /* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */
>         ktime_t                    interval_ktime;       /*  0xc8   0x8 */
>
>         /* size: 208, cachelines: 4, members: 19 */
>         /* sum members: 203, holes: 2, sum holes: 5 */
>         /* paddings: 1, sum paddings: 4 */
>         /* forced alignments: 1 */
>         /* last cacheline: 16 bytes */
> } __attribute__((__aligned__(8)));

How about this one? The 'size' would be shrinked to168 and the 'sum
holes' remains 5.
# pahole --hex -C tpacket_kbdq_core vmlinux
struct tpacket_kbdq_core {
        struct pgv *               pkbdq;                /*     0   0x8 */
        unsigned int               feature_req_word;     /*   0x8   0x4 */
        unsigned int               hdrlen;               /*   0xc   0x4 */
        short unsigned int         kactive_blk_num;      /*  0x10   0x2 */
        short unsigned int         blk_sizeof_priv;      /*  0x12   0x2 */
        short unsigned int         version;              /*  0x14   0x2 */
        unsigned char              reset_pending_on_curr_blk; /*  0x16   0x1 */

        /* XXX 1 byte hole, try to pack */

        char *                     pkblk_start;          /*  0x18   0x8 */
        char *                     pkblk_end;            /*  0x20   0x8 */
        int                        kblk_size;            /*  0x28   0x4 */
        unsigned int               max_frame_len;        /*  0x2c   0x4 */
        unsigned int               knum_blocks;          /*  0x30   0x4 */

        /* XXX 4 bytes hole, try to pack */

        uint64_t                   knxt_seq_num;         /*  0x38   0x8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        char *                     prev;                 /*  0x40   0x8 */
        char *                     nxt_offset;           /*  0x48   0x8 */
        struct sk_buff *           skb;                  /*  0x50   0x8 */
        rwlock_t                   blk_fill_in_prog_lock; /*  0x58   0x8 */
        ktime_t                    interval_ktime;       /*  0x60   0x8 */
        struct hrtimer             retire_blk_timer
__attribute__((__aligned__(8))); /*  0x68  0x40 */

        /* XXX last struct has 4 bytes of padding */

        /* size: 168, cachelines: 3, members: 19 */
        /* sum members: 163, holes: 2, sum holes: 5 */
        /* paddings: 1, sum paddings: 4 */
        /* forced alignments: 1 */
        /* last cacheline: 40 bytes */
} __attribute__((__aligned__(8)));

I didn't want a more aggressive adjustment around the remaining holes.
At least, the current statistics show a better result than before :)

Thanks,
Jason

>
> Thanks
> Xin Zhao
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ