lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20140715141625.GH9918@twins.programming.kicks-ass.net>
Date:	Tue, 15 Jul 2014 16:16:25 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	Stephane Eranian <eranian@...gle.com>
Cc:	linux-kernel@...r.kernel.org, mingo@...e.hu, ak@...ux.intel.com,
	jolsa@...hat.com, acme@...hat.com, namhyung@...nel.org
Subject: Re: [PATCH v2 1/5] perf: add ability to sample machine state on
 interrupt

On Tue, Jul 15, 2014 at 02:31:40AM +0200, Stephane Eranian wrote:
> @@ -595,7 +595,8 @@ struct perf_sample_data {
>  	struct perf_callchain_entry	*callchain;
>  	struct perf_raw_record		*raw;
>  	struct perf_branch_stack	*br_stack;
> -	struct perf_regs_user		regs_user;
> +	struct perf_regs		regs_user;
> +	struct perf_regs		regs_intr;
>  	u64				stack_user_size;
>  	u64				weight;
>  	/*
> @@ -618,6 +619,8 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
>  	data->weight = 0;
>  	data->data_src.val = 0;
>  	data->txn = 0;
> +	data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE;
> +	data->regs_intr.regs = NULL;
>  }

I don't think we've been very careful here; does the below make sense?

AFAICT we don't need to set stack_user_size at all,
perf_prepare_sample() will set it when required, and with the change to
perf_sample_regs_user() the same is true for the regs_user thing.

This again reduces the cost of perf_sample_data_init() to touching a
single cacheline.

I'm not entirely sure the ____cacheline_aligned makes sense though, the
previous stack line is probably touched already so any next cacheline is
the one, and one avg we'd gain 0.5 cachelines worth of data.



---
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 707617a8c0f6..d27fec8118b1 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -575,34 +575,40 @@ extern u64 perf_event_read_value(struct perf_event *event,
 
 
 struct perf_sample_data {
-	u64				type;
+	/*
+	 * Fields set by perf_sample_data_init(), group so as to
+	 * minimize the cachelines touched.
+	 */
+	u64				addr;
+	struct perf_raw_record		*raw;
+	struct perf_branch_stack	*br_stack;
+	u64				period;
+	u64				weight;
+	u64				txn;
+	union  perf_mem_data_src	data_src;
+
 
+	/*
+	 * The other fields, optionally {set,used} by
+	 * perf_{prepare,output}_sample().
+	 */
+	u64				type;
 	u64				ip;
 	struct {
 		u32	pid;
 		u32	tid;
 	}				tid_entry;
 	u64				time;
-	u64				addr;
 	u64				id;
 	u64				stream_id;
 	struct {
 		u32	cpu;
 		u32	reserved;
 	}				cpu_entry;
-	u64				period;
-	union  perf_mem_data_src	data_src;
 	struct perf_callchain_entry	*callchain;
-	struct perf_raw_record		*raw;
-	struct perf_branch_stack	*br_stack;
 	struct perf_regs_user		regs_user;
 	u64				stack_user_size;
-	u64				weight;
-	/*
-	 * Transaction flags for abort events:
-	 */
-	u64				txn;
-};
+} ____cacheline_aligned;
 
 static inline void perf_sample_data_init(struct perf_sample_data *data,
 					 u64 addr, u64 period)
@@ -612,9 +618,6 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->raw  = NULL;
 	data->br_stack = NULL;
 	data->period = period;
-	data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
-	data->regs_user.regs = NULL;
-	data->stack_user_size = 0;
 	data->weight = 0;
 	data->data_src.val = 0;
 	data->txn = 0;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c8b53c94d41d..926cd7aafc14 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4299,6 +4299,8 @@ perf_output_sample_regs(struct perf_output_handle *handle,
 static void perf_sample_regs_user(struct perf_regs_user *regs_user,
 				  struct pt_regs *regs)
 {
+	regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
+
 	if (!user_mode(regs)) {
 		if (current->mm)
 			regs = task_pt_regs(current);

Content of type "application/pgp-signature" skipped

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ