lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250904145920.1431219-1-jackzxcui1989@163.com>
Date: Thu,  4 Sep 2025 22:59:20 +0800
From: Xin Zhao <jackzxcui1989@....com>
To: kerneljasonxing@...il.com,
	willemdebruijn.kernel@...il.com,
	edumazet@...gle.com,
	ferenc@...es.dev
Cc: davem@...emloft.net,
	kuba@...nel.org,
	pabeni@...hat.com,
	horms@...nel.org,
	netdev@...r.kernel.org,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH net-next v10 2/2] net: af_packet: Use hrtimer to do the retire operation

On Thu, Sep 4, 2025 at 10:50 +0800 Jason Xing <kerneljasonxing@...il.com> wrote:

> > In the description of [PATCH net-next v10 0/2] net: af_packet: optimize retire operation:
> >
> > Changes in v7:
> >   When the callback return, without sk_buff_head lock protection, __run_hrtimer will
> >   enqueue the timer if return HRTIMER_RESTART. Setting the hrtimer expires while
> >   enqueuing a timer may cause chaos in the hrtimer red-black tree.
> >
> > Neither hrtimer_set_expires nor hrtimer_forward_now is allowed when the hrtimer has
> > already been enqueued. Therefore, the only place where the hrtimer timeout can be set is
> > within the callback, at which point the hrtimer is in a non-enqueued state and can have
> > its timeout set.
> 
> Can we use hrtimer_is_queued() instead? Please see tcp_pacing_check()
> as an example. But considering your following explanation, I think
> it's okay now.

Okay, let's keep the current logic as it is.



> > /* kbdq - kernel block descriptor queue */
> > struct tpacket_kbdq_core {
> >         struct pgv      *pkbdq;
> >         unsigned int    feature_req_word;
> >         unsigned int    hdrlen;
> >         unsigned short  kactive_blk_num;
> >         unsigned short  blk_sizeof_priv;
> >         unsigned char   reset_pending_on_curr_blk;
> >
> >         char            *pkblk_start;
> >         char            *pkblk_end;
> >         int             kblk_size;
> >         unsigned int    max_frame_len;
> >         unsigned int    knum_blocks;
> >         char            *prev;
> >         char            *nxt_offset;
> >
> >         unsigned short  version;
> >
> >         uint64_t        knxt_seq_num;
> >         struct sk_buff  *skb;
> >
> >         rwlock_t        blk_fill_in_prog_lock;
> >
> >         /* timer to retire an outstanding block */
> >         struct hrtimer  retire_blk_timer;
> >
> >         /* Default is set to 8ms */
> > #define DEFAULT_PRB_RETIRE_TOV  (8)
> >
> >         ktime_t         interval_ktime;
> > };
> 
> Could you share the result after running 'pahole --hex -C
> tpacket_kbdq_core vmlinux'?

I change the struct tpacket_kbdq_core as following:

/* kbdq - kernel block descriptor queue */
struct tpacket_kbdq_core {
	struct pgv	*pkbdq;
	unsigned int	feature_req_word;
	unsigned int	hdrlen;
	unsigned char	reset_pending_on_curr_blk;
	unsigned short	kactive_blk_num;
	unsigned short	blk_sizeof_priv;

	unsigned short  version;

	char		*pkblk_start;
	char		*pkblk_end;
	int		kblk_size;
	unsigned int	max_frame_len;
	unsigned int	knum_blocks;
	char		*prev;
	char		*nxt_offset;

	uint64_t	knxt_seq_num;
	struct sk_buff	*skb;

	rwlock_t	blk_fill_in_prog_lock;

	/* timer to retire an outstanding block */
	struct hrtimer  retire_blk_timer;

	/* Default is set to 8ms */
#define DEFAULT_PRB_RETIRE_TOV	(8)

	ktime_t		interval_ktime;
};


pahole --hex -C tpacket_kbdq_core vmlinux

running results:

struct tpacket_kbdq_core {
        struct pgv *               pkbdq;                /*     0   0x8 */
        unsigned int               feature_req_word;     /*   0x8   0x4 */
        unsigned int               hdrlen;               /*   0xc   0x4 */
        unsigned char              reset_pending_on_curr_blk; /*  0x10   0x1 */

        /* XXX 1 byte hole, try to pack */

        short unsigned int         kactive_blk_num;      /*  0x12   0x2 */
        short unsigned int         blk_sizeof_priv;      /*  0x14   0x2 */
        short unsigned int         version;              /*  0x16   0x2 */
        char *                     pkblk_start;          /*  0x18   0x8 */
        char *                     pkblk_end;            /*  0x20   0x8 */
        int                        kblk_size;            /*  0x28   0x4 */
        unsigned int               max_frame_len;        /*  0x2c   0x4 */
        unsigned int               knum_blocks;          /*  0x30   0x4 */

        /* XXX 4 bytes hole, try to pack */

        char *                     prev;                 /*  0x38   0x8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        char *                     nxt_offset;           /*  0x40   0x8 */
        uint64_t                   knxt_seq_num;         /*  0x48   0x8 */
        struct sk_buff *           skb;                  /*  0x50   0x8 */
        rwlock_t                   blk_fill_in_prog_lock; /*  0x58  0x30 */
        /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
        struct hrtimer             retire_blk_timer __attribute__((__aligned__(8))); /*  0x88  0x40 */

        /* XXX last struct has 4 bytes of padding */

        /* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */
        ktime_t                    interval_ktime;       /*  0xc8   0x8 */

        /* size: 208, cachelines: 4, members: 19 */
        /* sum members: 203, holes: 2, sum holes: 5 */
        /* paddings: 1, sum paddings: 4 */
        /* forced alignments: 1 */
        /* last cacheline: 16 bytes */
} __attribute__((__aligned__(8)));


Thanks
Xin Zhao


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ