lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 28 Sep 2018 13:53:20 -0400
From:   Waiman Long <longman@...hat.com>
To:     Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>,
        Will Deacon <will.deacon@....com>
Cc:     linux-kernel@...r.kernel.org, Waiman Long <longman@...hat.com>
Subject: [PATCH 4/5] locking/lockdep: Make class->ops a percpu counter

A sizable portion of the CPU cycles spent on the __lock_acquire() is used
up by the atomic increment of class->ops stat counter. By changing it
to a per-cpu counter, we can reduce the amount of cacheline contention
on the class structure when multiple CPUs are trying to acquire locks
of the same class simultaneously.

This patch also fixes a bug in the increment code as the counter is of
the unsigned long type, but atomic_inc() was used to increment it.

Signed-off-by: Waiman Long <longman@...hat.com>
---
 include/linux/lockdep.h  |  2 +-
 kernel/locking/lockdep.c | 18 ++++++++++++++----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index b0d0b51..f8bf705 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -102,7 +102,7 @@ struct lock_class {
 	/*
 	 * Statistics counter:
 	 */
-	unsigned long			ops;
+	unsigned long __percpu		*pops;
 
 	const char			*name;
 	int				name_version;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index ca002c0..7a0ed1d 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -139,6 +139,7 @@ static inline int debug_locks_off_graph_unlock(void)
  */
 unsigned long nr_lock_classes;
 static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
+static DEFINE_PER_CPU(unsigned long [MAX_LOCKDEP_KEYS], lock_class_ops);
 
 static inline struct lock_class *hlock_class(struct held_lock *hlock)
 {
@@ -784,11 +785,14 @@ static bool assign_lock_key(struct lockdep_map *lock)
 		dump_stack();
 		return NULL;
 	}
-	class = lock_classes + nr_lock_classes++;
+	class = lock_classes + nr_lock_classes;
 	debug_atomic_inc(nr_unused_locks);
 	class->key = key;
 	class->name = lock->name;
 	class->subclass = subclass;
+	class->pops = &lock_class_ops[nr_lock_classes];
+	nr_lock_classes++;
+
 	INIT_LIST_HEAD(&class->lock_entry);
 	INIT_LIST_HEAD(&class->locks_before);
 	INIT_LIST_HEAD(&class->locks_after);
@@ -1387,11 +1391,15 @@ static inline int usage_match(struct lock_list *entry, void *bit)
 
 static void print_lock_class_header(struct lock_class *class, int depth)
 {
-	int bit;
+	int bit, cpu;
+	unsigned long ops = 0UL;
+
+	for_each_possible_cpu(cpu)
+		ops += *per_cpu(class->pops, cpu);
 
 	printk("%*s->", depth, "");
 	print_lock_name(class);
-	printk(KERN_CONT " ops: %lu", class->ops);
+	printk(KERN_CONT " ops: %lu", ops);
 	printk(KERN_CONT " {\n");
 
 	for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
@@ -3226,7 +3234,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 		if (!class)
 			return 0;
 	}
-	atomic_inc((atomic_t *)&class->ops);
+
+	__this_cpu_inc(*class->pops);
+
 	if (very_verbose(class)) {
 		printk("\nacquire class [%px] %s", class->key, class->name);
 		if (class->name_version > 1)
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ