[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250904145920.1431219-1-jackzxcui1989@163.com>
Date: Thu, 4 Sep 2025 22:59:20 +0800
From: Xin Zhao <jackzxcui1989@....com>
To: kerneljasonxing@...il.com,
willemdebruijn.kernel@...il.com,
edumazet@...gle.com,
ferenc@...es.dev
Cc: davem@...emloft.net,
kuba@...nel.org,
pabeni@...hat.com,
horms@...nel.org,
netdev@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH net-next v10 2/2] net: af_packet: Use hrtimer to do the retire operation
On Thu, Sep 4, 2025 at 10:50 +0800 Jason Xing <kerneljasonxing@...il.com> wrote:
> > In the description of [PATCH net-next v10 0/2] net: af_packet: optimize retire operation:
> >
> > Changes in v7:
> > When the callback return, without sk_buff_head lock protection, __run_hrtimer will
> > enqueue the timer if return HRTIMER_RESTART. Setting the hrtimer expires while
> > enqueuing a timer may cause chaos in the hrtimer red-black tree.
> >
> > Neither hrtimer_set_expires nor hrtimer_forward_now is allowed when the hrtimer has
> > already been enqueued. Therefore, the only place where the hrtimer timeout can be set is
> > within the callback, at which point the hrtimer is in a non-enqueued state and can have
> > its timeout set.
>
> Can we use hrtimer_is_queued() instead? Please see tcp_pacing_check()
> as an example. But considering your following explanation, I think
> it's okay now.
Okay, let's keep the current logic as it is.
> > /* kbdq - kernel block descriptor queue */
> > struct tpacket_kbdq_core {
> > struct pgv *pkbdq;
> > unsigned int feature_req_word;
> > unsigned int hdrlen;
> > unsigned short kactive_blk_num;
> > unsigned short blk_sizeof_priv;
> > unsigned char reset_pending_on_curr_blk;
> >
> > char *pkblk_start;
> > char *pkblk_end;
> > int kblk_size;
> > unsigned int max_frame_len;
> > unsigned int knum_blocks;
> > char *prev;
> > char *nxt_offset;
> >
> > unsigned short version;
> >
> > uint64_t knxt_seq_num;
> > struct sk_buff *skb;
> >
> > rwlock_t blk_fill_in_prog_lock;
> >
> > /* timer to retire an outstanding block */
> > struct hrtimer retire_blk_timer;
> >
> > /* Default is set to 8ms */
> > #define DEFAULT_PRB_RETIRE_TOV (8)
> >
> > ktime_t interval_ktime;
> > };
>
> Could you share the result after running 'pahole --hex -C
> tpacket_kbdq_core vmlinux'?
I change the struct tpacket_kbdq_core as following:
/* kbdq - kernel block descriptor queue */
struct tpacket_kbdq_core {
struct pgv *pkbdq;
unsigned int feature_req_word;
unsigned int hdrlen;
unsigned char reset_pending_on_curr_blk;
unsigned short kactive_blk_num;
unsigned short blk_sizeof_priv;
unsigned short version;
char *pkblk_start;
char *pkblk_end;
int kblk_size;
unsigned int max_frame_len;
unsigned int knum_blocks;
char *prev;
char *nxt_offset;
uint64_t knxt_seq_num;
struct sk_buff *skb;
rwlock_t blk_fill_in_prog_lock;
/* timer to retire an outstanding block */
struct hrtimer retire_blk_timer;
/* Default is set to 8ms */
#define DEFAULT_PRB_RETIRE_TOV (8)
ktime_t interval_ktime;
};
pahole --hex -C tpacket_kbdq_core vmlinux
running results:
struct tpacket_kbdq_core {
struct pgv * pkbdq; /* 0 0x8 */
unsigned int feature_req_word; /* 0x8 0x4 */
unsigned int hdrlen; /* 0xc 0x4 */
unsigned char reset_pending_on_curr_blk; /* 0x10 0x1 */
/* XXX 1 byte hole, try to pack */
short unsigned int kactive_blk_num; /* 0x12 0x2 */
short unsigned int blk_sizeof_priv; /* 0x14 0x2 */
short unsigned int version; /* 0x16 0x2 */
char * pkblk_start; /* 0x18 0x8 */
char * pkblk_end; /* 0x20 0x8 */
int kblk_size; /* 0x28 0x4 */
unsigned int max_frame_len; /* 0x2c 0x4 */
unsigned int knum_blocks; /* 0x30 0x4 */
/* XXX 4 bytes hole, try to pack */
char * prev; /* 0x38 0x8 */
/* --- cacheline 1 boundary (64 bytes) --- */
char * nxt_offset; /* 0x40 0x8 */
uint64_t knxt_seq_num; /* 0x48 0x8 */
struct sk_buff * skb; /* 0x50 0x8 */
rwlock_t blk_fill_in_prog_lock; /* 0x58 0x30 */
/* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
struct hrtimer retire_blk_timer __attribute__((__aligned__(8))); /* 0x88 0x40 */
/* XXX last struct has 4 bytes of padding */
/* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */
ktime_t interval_ktime; /* 0xc8 0x8 */
/* size: 208, cachelines: 4, members: 19 */
/* sum members: 203, holes: 2, sum holes: 5 */
/* paddings: 1, sum paddings: 4 */
/* forced alignments: 1 */
/* last cacheline: 16 bytes */
} __attribute__((__aligned__(8)));
Thanks
Xin Zhao
Powered by blists - more mailing lists