lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAL+tcoB3iwsTTt8Bpc62Zc-CoyOGRrAdAjo26XqUvFnBoqXpTw@mail.gmail.com>
Date: Wed, 24 Jul 2024 00:00:38 +0800
From: Jason Xing <kerneljasonxing@...il.com>
To: Eric Dumazet <edumazet@...gle.com>
Cc: davem@...emloft.net, kuba@...nel.org, pabeni@...hat.com, horms@...nel.org, 
	netdev@...r.kernel.org, Jason Xing <kernelxing@...cent.com>
Subject: Re: [RFC PATCH net-next] net: add an entry for CONFIG_NET_RX_BUSY_POLL

On Tue, Jul 23, 2024 at 11:26 PM Eric Dumazet <edumazet@...gle.com> wrote:
>
> On Tue, Jul 23, 2024 at 5:13 PM Jason Xing <kerneljasonxing@...il.com> wrote:
> >
> > On Tue, Jul 23, 2024 at 11:09 PM Jason Xing <kerneljasonxing@...il.com> wrote:
> > >
> > > On Tue, Jul 23, 2024 at 10:57 PM Eric Dumazet <edumazet@...gle.com> wrote:
> > > >
> > > > On Tue, Jul 23, 2024 at 3:57 PM Jason Xing <kerneljasonxing@...il.com> wrote:
> > > > >
> > > > > From: Jason Xing <kernelxing@...cent.com>
> > > > >
> > > > > When I was doing performance test on unix_poll(), I found out that
> > > > > accessing sk->sk_ll_usec when calling sock_poll()->sk_can_busy_loop()
> > > > > occupies too much time, which causes around 16% degradation. So I
> > > > > decided to turn off this config, which cannot be done apparently
> > > > > before this patch.
> > > >
> > > > Too many CONFIG_ options, distros will enable it anyway.
> > > >
> > > > In my builds, offset of sk_ll_usec is 0xe8.
> > > >
> > > > Are you using some debug options or an old tree ?
> >
> > I forgot to say: I'm running the latest kernel which I pulled around
> > two hours ago. Whatever kind of configs with/without debug options I
> > use, I can still reproduce it.
>
> Ok, please post :
>
> pahole --hex -C sock vmlinux

1) Enable the config:
$ pahole --hex -C sock vmlinux
struct sock {
        struct sock_common         __sk_common;          /*     0  0x88 */
        /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
        __u8
__cacheline_group_begin__sock_write_rx[0]; /*  0x88     0 */
        atomic_t                   sk_drops;             /*  0x88   0x4 */
        __s32                      sk_peek_off;          /*  0x8c   0x4 */
        struct sk_buff_head        sk_error_queue;       /*  0x90  0x18 */
        struct sk_buff_head        sk_receive_queue;     /*  0xa8  0x18 */
        /* --- cacheline 3 boundary (192 bytes) --- */
        struct {
                atomic_t           rmem_alloc;           /*  0xc0   0x4 */
                int                len;                  /*  0xc4   0x4 */
                struct sk_buff *   head;                 /*  0xc8   0x8 */
                struct sk_buff *   tail;                 /*  0xd0   0x8 */
        } sk_backlog;                                    /*  0xc0  0x18 */
        __u8
__cacheline_group_end__sock_write_rx[0]; /*  0xd8     0 */
        __u8
__cacheline_group_begin__sock_read_rx[0]; /*  0xd8     0 */
        struct dst_entry *         sk_rx_dst;            /*  0xd8   0x8 */
        int                        sk_rx_dst_ifindex;    /*  0xe0   0x4 */
        u32                        sk_rx_dst_cookie;     /*  0xe4   0x4 */
        unsigned int               sk_ll_usec;           /*  0xe8   0x4 */
        unsigned int               sk_napi_id;           /*  0xec   0x4 */
        u16                        sk_busy_poll_budget;  /*  0xf0   0x2 */
        u8                         sk_prefer_busy_poll;  /*  0xf2   0x1 */
        u8                         sk_userlocks;         /*  0xf3   0x1 */
        int                        sk_rcvbuf;            /*  0xf4   0x4 */
        struct sk_filter *         sk_filter;            /*  0xf8   0x8 */
        /* --- cacheline 4 boundary (256 bytes) --- */
        union {
                struct socket_wq * sk_wq;                /* 0x100   0x8 */
                struct socket_wq * sk_wq_raw;            /* 0x100   0x8 */
        };                                               /* 0x100   0x8 */
        void                       (*sk_data_ready)(struct sock *); /*
0x108   0x8 */
        long int                   sk_rcvtimeo;          /* 0x110   0x8 */
        int                        sk_rcvlowat;          /* 0x118   0x4 */
        __u8
__cacheline_group_end__sock_read_rx[0]; /* 0x11c     0 */
        __u8
__cacheline_group_begin__sock_read_rxtx[0]; /* 0x11c     0 */
        int                        sk_err;               /* 0x11c   0x4 */
        struct socket *            sk_socket;            /* 0x120   0x8 */
        struct mem_cgroup *        sk_memcg;             /* 0x128   0x8 */
        struct xfrm_policy *       sk_policy[2];         /* 0x130  0x10 */
        /* --- cacheline 5 boundary (320 bytes) --- */
        __u8
__cacheline_group_end__sock_read_rxtx[0]; /* 0x140     0 */
        __u8
__cacheline_group_begin__sock_write_rxtx[0]; /* 0x140     0 */
        socket_lock_t              sk_lock;              /* 0x140  0x20 */
        u32                        sk_reserved_mem;      /* 0x160   0x4 */
        int                        sk_forward_alloc;     /* 0x164   0x4 */
        u32                        sk_tsflags;           /* 0x168   0x4 */
        __u8
__cacheline_group_end__sock_write_rxtx[0]; /* 0x16c     0 */
        __u8
__cacheline_group_begin__sock_write_tx[0]; /* 0x16c     0 */
        int                        sk_write_pending;     /* 0x16c   0x4 */
        atomic_t                   sk_omem_alloc;        /* 0x170   0x4 */
        int                        sk_sndbuf;            /* 0x174   0x4 */
        int                        sk_wmem_queued;       /* 0x178   0x4 */
        refcount_t                 sk_wmem_alloc;        /* 0x17c   0x4 */
        /* --- cacheline 6 boundary (384 bytes) --- */
        long unsigned int          sk_tsq_flags;         /* 0x180   0x8 */
        union {
                struct sk_buff *   sk_send_head;         /* 0x188   0x8 */
                struct rb_root     tcp_rtx_queue;        /* 0x188   0x8 */
        };                                               /* 0x188   0x8 */
        struct sk_buff_head        sk_write_queue;       /* 0x190  0x18 */
        u32                        sk_dst_pending_confirm; /* 0x1a8   0x4 */
        u32                        sk_pacing_status;     /* 0x1ac   0x4 */
        struct page_frag           sk_frag;              /* 0x1b0  0x10 */
        /* --- cacheline 7 boundary (448 bytes) --- */
        struct timer_list          sk_timer;             /* 0x1c0  0x28 */

        /* XXX last struct has 4 bytes of padding */

        long unsigned int          sk_pacing_rate;       /* 0x1e8   0x8 */
        atomic_t                   sk_zckey;             /* 0x1f0   0x4 */
        atomic_t                   sk_tskey;             /* 0x1f4   0x4 */
        __u8
__cacheline_group_end__sock_write_tx[0]; /* 0x1f8     0 */
        __u8
__cacheline_group_begin__sock_read_tx[0]; /* 0x1f8     0 */
        long unsigned int          sk_max_pacing_rate;   /* 0x1f8   0x8 */
        /* --- cacheline 8 boundary (512 bytes) --- */
        long int                   sk_sndtimeo;          /* 0x200   0x8 */
        u32                        sk_priority;          /* 0x208   0x4 */
        u32                        sk_mark;              /* 0x20c   0x4 */
        struct dst_entry *         sk_dst_cache;         /* 0x210   0x8 */
        netdev_features_t          sk_route_caps;        /* 0x218   0x8 */
        u16                        sk_gso_type;          /* 0x220   0x2 */
        u16                        sk_gso_max_segs;      /* 0x222   0x2 */
        unsigned int               sk_gso_max_size;      /* 0x224   0x4 */
        gfp_t                      sk_allocation;        /* 0x228   0x4 */
        u32                        sk_txhash;            /* 0x22c   0x4 */
        u8                         sk_pacing_shift;      /* 0x230   0x1 */
        bool                       sk_use_task_frag;     /* 0x231   0x1 */
        __u8
__cacheline_group_end__sock_read_tx[0]; /* 0x232     0 */
        u8                         sk_gso_disabled:1;    /* 0x232: 0 0x1 */
        u8                         sk_kern_sock:1;       /* 0x232:0x1 0x1 */
        u8                         sk_no_check_tx:1;     /* 0x232:0x2 0x1 */
        u8                         sk_no_check_rx:1;     /* 0x232:0x3 0x1 */

        /* XXX 4 bits hole, try to pack */

        u8                         sk_shutdown;          /* 0x233   0x1 */
        u16                        sk_type;              /* 0x234   0x2 */
        u16                        sk_protocol;          /* 0x236   0x2 */
        long unsigned int          sk_lingertime;        /* 0x238   0x8 */
        /* --- cacheline 9 boundary (576 bytes) --- */
        struct proto *             sk_prot_creator;      /* 0x240   0x8 */
        rwlock_t                   sk_callback_lock;     /* 0x248   0x8 */
        int                        sk_err_soft;          /* 0x250   0x4 */
        u32                        sk_ack_backlog;       /* 0x254   0x4 */
        u32                        sk_max_ack_backlog;   /* 0x258   0x4 */
        kuid_t                     sk_uid;               /* 0x25c   0x4 */
        spinlock_t                 sk_peer_lock;         /* 0x260   0x4 */
        int                        sk_bind_phc;          /* 0x264   0x4 */
        struct pid *               sk_peer_pid;          /* 0x268   0x8 */
        const struct cred  *       sk_peer_cred;         /* 0x270   0x8 */
        ktime_t                    sk_stamp;             /* 0x278   0x8 */
        /* --- cacheline 10 boundary (640 bytes) --- */
        int                        sk_disconnects;       /* 0x280   0x4 */
        u8                         sk_txrehash;          /* 0x284   0x1 */
        u8                         sk_clockid;           /* 0x285   0x1 */
        u8                         sk_txtime_deadline_mode:1; /* 0x286: 0 0x1 */
        u8                         sk_txtime_report_errors:1; /*
0x286:0x1 0x1 */
        u8                         sk_txtime_unused:6;   /* 0x286:0x2 0x1 */

        /* XXX 1 byte hole, try to pack */

        void *                     sk_user_data;         /* 0x288   0x8 */
        void *                     sk_security;          /* 0x290   0x8 */
        struct sock_cgroup_data    sk_cgrp_data;         /* 0x298  0x10 */

        /* XXX last struct has 2 bytes of padding */

        void                       (*sk_state_change)(struct sock *);
/* 0x2a8   0x8 */
        void                       (*sk_write_space)(struct sock *);
/* 0x2b0   0x8 */
        void                       (*sk_error_report)(struct sock *);
/* 0x2b8   0x8 */
        /* --- cacheline 11 boundary (704 bytes) --- */
        int                        (*sk_backlog_rcv)(struct sock *,
struct sk_buff *); /* 0x2c0   0x8 */
        void                       (*sk_destruct)(struct sock *); /*
0x2c8   0x8 */
        struct sock_reuseport *    sk_reuseport_cb;      /* 0x2d0   0x8 */
        struct bpf_local_storage * sk_bpf_storage;       /* 0x2d8   0x8 */
        struct callback_head       sk_rcu
__attribute__((__aligned__(8))); /* 0x2e0  0x10 */
        netns_tracker              ns_tracker;           /* 0x2f0     0 */

        /* size: 752, cachelines: 12, members: 105 */
        /* sum members: 749, holes: 1, sum holes: 1 */
        /* sum bitfield members: 12 bits, bit holes: 1, sum bit holes: 4 bits */
        /* paddings: 2, sum paddings: 6 */
        /* forced alignments: 1 */
        /* last cacheline: 48 bytes */
} __attribute__((__aligned__(8)));

2) Disable the config:
$ pahole --hex -C sock vmlinux
struct sock {
        struct sock_common         __sk_common;          /*     0  0x88 */
        /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
        __u8
__cacheline_group_begin__sock_write_rx[0]; /*  0x88     0 */
        atomic_t                   sk_drops;             /*  0x88   0x4 */
        __s32                      sk_peek_off;          /*  0x8c   0x4 */
        struct sk_buff_head        sk_error_queue;       /*  0x90  0x18 */
        struct sk_buff_head        sk_receive_queue;     /*  0xa8  0x18 */
        /* --- cacheline 3 boundary (192 bytes) --- */
        struct {
                atomic_t           rmem_alloc;           /*  0xc0   0x4 */
                int                len;                  /*  0xc4   0x4 */
                struct sk_buff *   head;                 /*  0xc8   0x8 */
                struct sk_buff *   tail;                 /*  0xd0   0x8 */
        } sk_backlog;                                    /*  0xc0  0x18 */
        __u8
__cacheline_group_end__sock_write_rx[0]; /*  0xd8     0 */
        __u8
__cacheline_group_begin__sock_read_rx[0]; /*  0xd8     0 */
        struct dst_entry *         sk_rx_dst;            /*  0xd8   0x8 */
        int                        sk_rx_dst_ifindex;    /*  0xe0   0x4 */
        u32                        sk_rx_dst_cookie;     /*  0xe4   0x4 */
        u8                         sk_userlocks;         /*  0xe8   0x1 */

        /* XXX 3 bytes hole, try to pack */

        int                        sk_rcvbuf;            /*  0xec   0x4 */
        struct sk_filter *         sk_filter;            /*  0xf0   0x8 */
        union {
                struct socket_wq * sk_wq;                /*  0xf8   0x8 */
                struct socket_wq * sk_wq_raw;            /*  0xf8   0x8 */
        };                                               /*  0xf8   0x8 */
        /* --- cacheline 4 boundary (256 bytes) --- */
        void                       (*sk_data_ready)(struct sock *); /*
0x100   0x8 */
        long int                   sk_rcvtimeo;          /* 0x108   0x8 */
        int                        sk_rcvlowat;          /* 0x110   0x4 */
        __u8
__cacheline_group_end__sock_read_rx[0]; /* 0x114     0 */
        __u8
__cacheline_group_begin__sock_read_rxtx[0]; /* 0x114     0 */
        int                        sk_err;               /* 0x114   0x4 */
        struct socket *            sk_socket;            /* 0x118   0x8 */
        struct mem_cgroup *        sk_memcg;             /* 0x120   0x8 */
        struct xfrm_policy *       sk_policy[2];         /* 0x128  0x10 */
        __u8
__cacheline_group_end__sock_read_rxtx[0]; /* 0x138     0 */
        __u8
__cacheline_group_begin__sock_write_rxtx[0]; /* 0x138     0 */
        socket_lock_t              sk_lock;              /* 0x138  0x20 */
        /* --- cacheline 5 boundary (320 bytes) was 24 bytes ago --- */
        u32                        sk_reserved_mem;      /* 0x158   0x4 */
        int                        sk_forward_alloc;     /* 0x15c   0x4 */
        u32                        sk_tsflags;           /* 0x160   0x4 */
        __u8
__cacheline_group_end__sock_write_rxtx[0]; /* 0x164     0 */
        __u8
__cacheline_group_begin__sock_write_tx[0]; /* 0x164     0 */
        int                        sk_write_pending;     /* 0x164   0x4 */
        atomic_t                   sk_omem_alloc;        /* 0x168   0x4 */
        int                        sk_sndbuf;            /* 0x16c   0x4 */
        int                        sk_wmem_queued;       /* 0x170   0x4 */
        refcount_t                 sk_wmem_alloc;        /* 0x174   0x4 */
        long unsigned int          sk_tsq_flags;         /* 0x178   0x8 */
        /* --- cacheline 6 boundary (384 bytes) --- */
        union {
                struct sk_buff *   sk_send_head;         /* 0x180   0x8 */
                struct rb_root     tcp_rtx_queue;        /* 0x180   0x8 */
        };                                               /* 0x180   0x8 */
        struct sk_buff_head        sk_write_queue;       /* 0x188  0x18 */
        u32                        sk_dst_pending_confirm; /* 0x1a0   0x4 */
        u32                        sk_pacing_status;     /* 0x1a4   0x4 */
        struct page_frag           sk_frag;              /* 0x1a8  0x10 */
        struct timer_list          sk_timer;             /* 0x1b8  0x28 */

        /* XXX last struct has 4 bytes of padding */

        /* --- cacheline 7 boundary (448 bytes) was 32 bytes ago --- */
        long unsigned int          sk_pacing_rate;       /* 0x1e0   0x8 */
        atomic_t                   sk_zckey;             /* 0x1e8   0x4 */
        atomic_t                   sk_tskey;             /* 0x1ec   0x4 */
        __u8
__cacheline_group_end__sock_write_tx[0]; /* 0x1f0     0 */
        __u8
__cacheline_group_begin__sock_read_tx[0]; /* 0x1f0     0 */
        long unsigned int          sk_max_pacing_rate;   /* 0x1f0   0x8 */
        long int                   sk_sndtimeo;          /* 0x1f8   0x8 */
        /* --- cacheline 8 boundary (512 bytes) --- */
        u32                        sk_priority;          /* 0x200   0x4 */
        u32                        sk_mark;              /* 0x204   0x4 */
        struct dst_entry *         sk_dst_cache;         /* 0x208   0x8 */
        netdev_features_t          sk_route_caps;        /* 0x210   0x8 */
        u16                        sk_gso_type;          /* 0x218   0x2 */
        u16                        sk_gso_max_segs;      /* 0x21a   0x2 */
        unsigned int               sk_gso_max_size;      /* 0x21c   0x4 */
        gfp_t                      sk_allocation;        /* 0x220   0x4 */
        u32                        sk_txhash;            /* 0x224   0x4 */
        u8                         sk_pacing_shift;      /* 0x228   0x1 */
        bool                       sk_use_task_frag;     /* 0x229   0x1 */
        __u8
__cacheline_group_end__sock_read_tx[0]; /* 0x22a     0 */
        u8                         sk_gso_disabled:1;    /* 0x22a: 0 0x1 */
        u8                         sk_kern_sock:1;       /* 0x22a:0x1 0x1 */
        u8                         sk_no_check_tx:1;     /* 0x22a:0x2 0x1 */
        u8                         sk_no_check_rx:1;     /* 0x22a:0x3 0x1 */

        /* XXX 4 bits hole, try to pack */

        u8                         sk_shutdown;          /* 0x22b   0x1 */
        u16                        sk_type;              /* 0x22c   0x2 */
        u16                        sk_protocol;          /* 0x22e   0x2 */
        long unsigned int          sk_lingertime;        /* 0x230   0x8 */
        struct proto *             sk_prot_creator;      /* 0x238   0x8 */
        /* --- cacheline 9 boundary (576 bytes) --- */
        rwlock_t                   sk_callback_lock;     /* 0x240   0x8 */
        int                        sk_err_soft;          /* 0x248   0x4 */
        u32                        sk_ack_backlog;       /* 0x24c   0x4 */
        u32                        sk_max_ack_backlog;   /* 0x250   0x4 */
        kuid_t                     sk_uid;               /* 0x254   0x4 */
        spinlock_t                 sk_peer_lock;         /* 0x258   0x4 */
        int                        sk_bind_phc;          /* 0x25c   0x4 */
        struct pid *               sk_peer_pid;          /* 0x260   0x8 */
        const struct cred  *       sk_peer_cred;         /* 0x268   0x8 */
        ktime_t                    sk_stamp;             /* 0x270   0x8 */
        int                        sk_disconnects;       /* 0x278   0x4 */
        u8                         sk_txrehash;          /* 0x27c   0x1 */
        u8                         sk_clockid;           /* 0x27d   0x1 */
        u8                         sk_txtime_deadline_mode:1; /* 0x27e: 0 0x1 */
        u8                         sk_txtime_report_errors:1; /*
0x27e:0x1 0x1 */
        u8                         sk_txtime_unused:6;   /* 0x27e:0x2 0x1 */

        /* XXX 1 byte hole, try to pack */

        /* --- cacheline 10 boundary (640 bytes) --- */
        void *                     sk_user_data;         /* 0x280   0x8 */
        void *                     sk_security;          /* 0x288   0x8 */
        struct sock_cgroup_data    sk_cgrp_data;         /* 0x290  0x10 */

        /* XXX last struct has 2 bytes of padding */

        void                       (*sk_state_change)(struct sock *);
/* 0x2a0   0x8 */
        void                       (*sk_write_space)(struct sock *);
/* 0x2a8   0x8 */
        void                       (*sk_error_report)(struct sock *);
/* 0x2b0   0x8 */
        int                        (*sk_backlog_rcv)(struct sock *,
struct sk_buff *); /* 0x2b8   0x8 */
        /* --- cacheline 11 boundary (704 bytes) --- */
        void                       (*sk_destruct)(struct sock *); /*
0x2c0   0x8 */
        struct sock_reuseport *    sk_reuseport_cb;      /* 0x2c8   0x8 */
        struct bpf_local_storage * sk_bpf_storage;       /* 0x2d0   0x8 */
        struct callback_head       sk_rcu
__attribute__((__aligned__(8))); /* 0x2d8  0x10 */
        netns_tracker              ns_tracker;           /* 0x2e8     0 */

        /* size: 744, cachelines: 12, members: 101 */
        /* sum members: 738, holes: 2, sum holes: 4 */
        /* sum bitfield members: 12 bits, bit holes: 1, sum bit holes: 4 bits */
        /* paddings: 2, sum paddings: 6 */
        /* forced alignments: 1 */
        /* last cacheline: 40 bytes */
} __attribute__((__aligned__(8)));

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ