lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <51DC1591.7070907@intel.com>
Date:	Tue, 09 Jul 2013 21:52:17 +0800
From:	"Yan, Zheng" <zheng.z.yan@...el.com>
To:	Peter Zijlstra <peterz@...radead.org>
CC:	linux-kernel@...r.kernel.org, mingo@...e.hu, eranian@...gle.com,
	ak@...ux.intel.com
Subject: Re: [PATCH] perf: Update event buffer tail when overwriting old events

On 07/09/2013 04:05 PM, Peter Zijlstra wrote:
> On Tue, Jul 09, 2013 at 03:05:41PM +0800, Yan, Zheng wrote:
> 
>> Thank you for your help. I ran the same test, the results for regular case
>> are much better. But it still has about 1% overhead, probably because we
>> enlarge the ring_buffer structure, make it less cache friendly.
>>
>>       origin    with the patch
>> AVG    1000      1013
>> STDEV  13.4      15.0
> 
> And this is the !overwrite case, right? I don't suppose you cured the logic
> Namhyung Kim pointed out? That should affect the overwrite case I suppose since
> it won't switch to perf_event_output_overwrite().

yes, it's the overwrite case.

> 
> tip/master:
> 
> struct ring_buffer {
>         atomic_t                   refcount;             /*     0     4 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         struct callback_head       callback_head;        /*     8    16 */
>         int                        nr_pages;             /*    24     4 */
>         int                        overwrite;            /*    28     4 */
>         atomic_t                   poll;                 /*    32     4 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         local_t                    head;                 /*    40     8 */
>         local_t                    nest;                 /*    48     8 */
>         local_t                    events;               /*    56     8 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         local_t                    wakeup;               /*    64     8 */
>         local_t                    lost;                 /*    72     8 */
>         long int                   watermark;            /*    80     8 */
>         spinlock_t                 event_lock;           /*    88    56 */
>         /* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
>         struct list_head           event_list;           /*   144    16 */
>         atomic_t                   mmap_count;           /*   160     4 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         long unsigned int          mmap_locked;          /*   168     8 */
>         struct user_struct *       mmap_user;            /*   176     8 */
>         struct perf_event_mmap_page * user_page;         /*   184     8 */
>         /* --- cacheline 3 boundary (192 bytes) --- */
>         void *                     data_pages[0];        /*   192     0 */
> 
>         /* size: 192, cachelines: 3, members: 18 */
>         /* sum members: 180, holes: 3, sum holes: 12 */
> };
> 
> tip/master + patch:
> 
> struct ring_buffer {
>         atomic_t                   refcount;             /*     0     4 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         struct callback_head       callback_head;        /*     8    16 */
>         int                        nr_pages;             /*    24     4 */
>         int                        overwrite;            /*    28     4 */
>         atomic_t                   poll;                 /*    32     4 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         local_t                    tail;                 /*    40     8 */
>         local_t                    next_tail;            /*    48     8 */
>         local_t                    head;                 /*    56     8 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         local_t                    nest;                 /*    64     8 */
>         local_t                    events;               /*    72     8 */
>         local_t                    wakeup;               /*    80     8 */
>         local_t                    lost;                 /*    88     8 */
>         long int                   watermark;            /*    96     8 */
>         spinlock_t                 event_lock;           /*   104    56 */
>         /* --- cacheline 2 boundary (128 bytes) was 32 bytes ago --- */
>         struct list_head           event_list;           /*   160    16 */
>         atomic_t                   mmap_count;           /*   176     4 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         long unsigned int          mmap_locked;          /*   184     8 */
>         /* --- cacheline 3 boundary (192 bytes) --- */
>         struct user_struct *       mmap_user;            /*   192     8 */
>         struct perf_event_mmap_page * user_page;         /*   200     8 */
>         void *                     data_pages[0];        /*   208     0 */
> 
>         /* size: 208, cachelines: 4, members: 20 */
>         /* sum members: 196, holes: 3, sum holes: 12 */
>         /* last cacheline: 16 bytes */
> };
> 
> tip/master + patch^2:
> 
> struct ring_buffer {
>         atomic_t                   refcount;             /*     0     4 */
>         atomic_t                   mmap_count;           /*     4     4 */
>         union {
>                 int                overwrite;            /*           4 */
>                 struct callback_head callback_head;      /*          16 */
>         };                                               /*     8    16 */
>         int                        nr_pages;             /*    24     4 */
>         atomic_t                   poll;                 /*    28     4 */
>         local_t                    tail;                 /*    32     8 */
>         local_t                    next_tail;            /*    40     8 */
>         local_t                    head;                 /*    48     8 */
>         local_t                    nest;                 /*    56     8 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         local_t                    events;               /*    64     8 */
>         local_t                    wakeup;               /*    72     8 */
>         local_t                    lost;                 /*    80     8 */
>         long int                   watermark;            /*    88     8 */
>         spinlock_t                 event_lock;           /*    96    56 */
>         /* --- cacheline 2 boundary (128 bytes) was 24 bytes ago --- */
>         struct list_head           event_list;           /*   152    16 */
>         long unsigned int          mmap_locked;          /*   168     8 */
>         struct user_struct *       mmap_user;            /*   176     8 */
>         struct perf_event_mmap_page * user_page;         /*   184     8 */
>         /* --- cacheline 3 boundary (192 bytes) --- */
>         void *                     data_pages[0];        /*   192     0 */
> 
>         /* size: 192, cachelines: 3, members: 19 */
> };
> 
> 
> ---
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -4641,7 +4641,7 @@ static void perf_event_output_overwrite(
>  static void
>  perf_event_set_overflow(struct perf_event *event, struct ring_buffer *rb)
>  {
> -	if (event->overflow_handler != perf_event_output ||
> +	if (event->overflow_handler != perf_event_output &&
>  	    event->overflow_handler != perf_event_output_overwrite)
>  		return;
>  
> --- a/kernel/events/internal.h
> +++ b/kernel/events/internal.h
> @@ -10,13 +10,16 @@
>  
>  struct ring_buffer {
>  	atomic_t			refcount;
> -	struct rcu_head			rcu_head;
> +	atomic_t			mmap_count;
> +	union {
> +		int			overwrite;	/* can overwrite itself */
> +		struct rcu_head		rcu_head;
> +	};
>  #ifdef CONFIG_PERF_USE_VMALLOC
>  	struct work_struct		work;
>  	int				page_order;	/* allocation order  */
>  #endif
>  	int				nr_pages;	/* nr of data pages  */
> -	int				overwrite;	/* can overwrite itself */
>  
>  	atomic_t			poll;		/* POLL_ for wakeups */
>  
> @@ -33,7 +36,6 @@ struct ring_buffer {
>  	spinlock_t			event_lock;
>  	struct list_head		event_list;
>  
> -	atomic_t			mmap_count;
>  	unsigned long			mmap_locked;
>  	struct user_struct		*mmap_user;
>  
> 

      origin    patch    patch^2
AVG    1000      1013    1028
STDEV  13.4      15.0    14.6

patch^2 doesn't help. I will try moving the new fields down and re-test tomorrow

Regards
Yan, Zheng



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ