lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <200903041033.09502.rusty@rustcorp.com.au>
Date:	Wed, 4 Mar 2009 10:33:08 +1030
From:	Rusty Russell <rusty@...tcorp.com.au>
To:	Tejun Heo <tj@...nel.org>
Cc:	Ingo Molnar <mingo@...e.hu>, tglx@...utronix.de, x86@...nel.org,
	linux-kernel@...r.kernel.org, hpa@...or.com, jeremy@...p.org,
	cpw@....com, nickpiggin@...oo.com.au, ink@...assic.park.msu.ru
Subject: Re: [PATCHSET x86/core/percpu] improve the first percpu 
 chunk	allocation

On Wednesday 25 February 2009 01:07:24 Tejun Heo wrote:
> it always
> saves a 2MB TLB entry for all the non-NUMA machines out there.

Note that everyone keeps talking about "a" TLB entry; I wanted to make
sure (esp. for those of us reading from the sidelines), it's not: it's
up to num_possible_cpus() TLB entries.  Of course, many paths won't access
other CPU's data, but it'd be interesting (and pretty easy) to actually
instrument how rare this is...

Hmm, fairly rare, but not incredibly:

percpu: measure use

With the idea of using virtual mappings for percpu regions, we wonder
how often we access other CPU's per-cpu variables.

32-bit 4-way SMP (under kvm), kernel make -j4:
get_cpu_var()         52,358,618
raw_get_cpu_var()        287,191
per_cpu():            17,371,648
per_cpu(same):        16,020,390

Total same-cpu calls: 68,666,199
Cross-per-cpu calls:   1,351,258

Signed-off-by: Rusty Russell <rusty@...tcorp.com.au>
---
 arch/x86/Makefile_32.cpu     |    2 +-
 include/asm-generic/percpu.h |   10 +++++++---
 kernel/module.c              |   11 +++++++++++
 kernel/smp.c                 |   21 +++++++++++++++++++++
 4 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -47,5 +47,5 @@ cflags-$(CONFIG_X86_GENERIC) 	+= $(call 
 # Bug fix for binutils: this option is required in order to keep
 # binutils from generating NOPL instructions against our will.
 ifneq ($(CONFIG_X86_P6_NOP),y)
-cflags-y			+= $(call cc-option,-Wa$(comma)-mtune=generic32,)
+#cflags-y			+= $(call cc-option,-Wa$(comma)-mtune=generic32,)
 endif
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -53,12 +53,16 @@ extern unsigned long __per_cpu_offset[NR
  * established ways to produce a usable pointer from the percpu variable
  * offset.
  */
+void count_per_cpu(unsigned int cpu);
+void count_get_cpu_var(void);
+void count_raw_get_cpu_var(void);
+
 #define per_cpu(var, cpu) \
-	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), per_cpu_offset(cpu)))
+	(*(count_per_cpu(cpu), SHIFT_PERCPU_PTR(&per_cpu_var(var), per_cpu_offset(cpu))))
 #define __get_cpu_var(var) \
-	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), my_cpu_offset))
+	(*(count_get_cpu_var(), SHIFT_PERCPU_PTR(&per_cpu_var(var), my_cpu_offset)))
 #define __raw_get_cpu_var(var) \
-	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset))
+	(*(count_raw_get_cpu_var(), SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset)))
 
 
 #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
diff --git a/kernel/module.c b/kernel/module.c
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2705,6 +2705,17 @@ static const struct seq_operations modul
 
 static int modules_open(struct inode *inode, struct file *file)
 {
+	extern atomic_t get_cpu_var_count, raw_get_cpu_var_count, per_cpu_count[], unnecessary_count[];
+	unsigned int i;
+
+	printk("get_cpu_var_count: %i\n", atomic_xchg(&get_cpu_var_count, 0));
+	printk("raw_get_cpu_var_count: %i\n",
+	       atomic_xchg(&raw_get_cpu_var_count, 0));
+	for_each_online_cpu(i)
+		printk("per_cpu %i: %u (%u self)\n",
+		       i, atomic_xchg(&per_cpu_count[i], 0),
+		       atomic_xchg(&unnecessary_count[i], 0));
+
 	return seq_open(file, &modules_op);
 }
 
diff --git a/kernel/smp.c b/kernel/smp.c
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -10,6 +10,27 @@
 #include <linux/rcupdate.h>
 #include <linux/rculist.h>
 #include <linux/smp.h>
+
+atomic_t get_cpu_var_count, raw_get_cpu_var_count, per_cpu_count[CONFIG_NR_CPUS], unnecessary_count[CONFIG_NR_CPUS];
+void count_per_cpu(unsigned int cpu)
+{
+	if (cpu == raw_smp_processor_id())
+		atomic_inc(&unnecessary_count[cpu]);
+	atomic_inc(&per_cpu_count[cpu]);
+}
+EXPORT_SYMBOL(count_per_cpu);
+
+void count_get_cpu_var(void)
+{
+	atomic_inc(&get_cpu_var_count);
+}
+EXPORT_SYMBOL(count_get_cpu_var);
+
+void count_raw_get_cpu_var(void)
+{
+	atomic_inc(&raw_get_cpu_var_count);
+}
+EXPORT_SYMBOL(count_raw_get_cpu_var);
 
 static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
 static LIST_HEAD(call_function_queue);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ