lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240904134152.2141-4-thunder.leizhen@huawei.com>
Date: Wed, 4 Sep 2024 21:41:52 +0800
From: Zhen Lei <thunder.leizhen@...wei.com>
To: Andrew Morton <akpm@...ux-foundation.org>, Thomas Gleixner
	<tglx@...utronix.de>, <linux-kernel@...r.kernel.org>, David Gow
	<davidgow@...gle.com>, <linux-kselftest@...r.kernel.org>,
	<kunit-dev@...glegroups.com>
CC: Zhen Lei <thunder.leizhen@...wei.com>
Subject: [PATCH 3/3] debugobjects: Use hlist_cut_number() to optimize performance and improve readability

Currently, there are multiple instances where several nodes are extracted
from one list and added to another list. One by one extraction, and then
one by one splicing, not only low efficiency, readability is also poor.
The work can be done well with hlist_cut_number() and hlist_splice_init(),
which move the entire sublist at once.

When the number of nodes expected to be moved is less than or equal to 0,
or the source list is empty, hlist_cut_number() safely returns 0. The
splicing is performed only when the return value of hlist_cut_number() is
greater than 0.

For two calls to hlist_cut_number() in __free_object(), the result is
obviously positive, the check of the return value is omitted.

Signed-off-by: Zhen Lei <thunder.leizhen@...wei.com>
---
 lib/debugobjects.c | 115 +++++++++++++++++++--------------------------
 1 file changed, 48 insertions(+), 67 deletions(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index db8f6b4b8b3151a..1cb9458af3d0b4f 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -128,8 +128,9 @@ static const char *obj_states[ODEBUG_STATE_MAX] = {
 static void fill_pool(void)
 {
 	gfp_t gfp = __GFP_HIGH | __GFP_NOWARN;
-	struct debug_obj *obj;
+	HLIST_HEAD(freelist);
 	unsigned long flags;
+	int cnt;
 
 	/*
 	 * The upper-layer function uses only one node at a time. If there are
@@ -152,17 +153,19 @@ static void fill_pool(void)
 	 * the WRITE_ONCE() in pool_lock critical sections.
 	 */
 	if (READ_ONCE(obj_nr_tofree)) {
+		struct hlist_node *last;
+
 		raw_spin_lock_irqsave(&pool_lock, flags);
 		/*
 		 * Recheck with the lock held as the worker thread might have
 		 * won the race and freed the global free list already.
 		 */
-		while (obj_nr_tofree && (obj_pool_free < debug_objects_pool_min_level)) {
-			obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
-			hlist_del(&obj->node);
-			WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1);
-			hlist_add_head(&obj->node, &obj_pool);
-			WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
+		cnt = min(obj_nr_tofree, debug_objects_pool_min_level - obj_pool_free);
+		cnt = hlist_cut_number(&freelist, &obj_to_free, cnt, &last);
+		if (cnt > 0) {
+			hlist_splice_init(&freelist, last, &obj_pool);
+			WRITE_ONCE(obj_pool_free, obj_pool_free + cnt);
+			WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - cnt);
 		}
 		raw_spin_unlock_irqrestore(&pool_lock, flags);
 	}
@@ -172,8 +175,6 @@ static void fill_pool(void)
 
 	while (READ_ONCE(obj_pool_free) < debug_objects_pool_min_level) {
 		struct debug_obj *new, *last = NULL;
-		HLIST_HEAD(freelist);
-		int cnt;
 
 		for (cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
 			new = kmem_cache_zalloc(obj_cache, gfp);
@@ -245,30 +246,28 @@ alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *d
 	raw_spin_lock(&pool_lock);
 	obj = __alloc_object(&obj_pool);
 	if (obj) {
-		obj_pool_used++;
-		WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
+		int cnt = 0;
 
 		/*
 		 * Looking ahead, allocate one batch of debug objects and
 		 * put them into the percpu free pool.
 		 */
 		if (likely(obj_cache)) {
-			int i;
-
-			for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
-				struct debug_obj *obj2;
-
-				obj2 = __alloc_object(&obj_pool);
-				if (!obj2)
-					break;
-				hlist_add_head(&obj2->node,
-					       &percpu_pool->free_objs);
-				percpu_pool->obj_free++;
-				obj_pool_used++;
-				WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
+			struct hlist_node *last;
+			HLIST_HEAD(freelist);
+
+			cnt = hlist_cut_number(&freelist, &obj_pool, ODEBUG_BATCH_SIZE, &last);
+			if (cnt > 0) {
+				hlist_splice_init(&freelist, last, &percpu_pool->free_objs);
+				percpu_pool->obj_free += cnt;
 			}
 		}
 
+		/* add one for obj */
+		cnt++;
+		obj_pool_used += cnt;
+		WRITE_ONCE(obj_pool_free, obj_pool_free - cnt);
+
 		if (obj_pool_used > obj_pool_max_used)
 			obj_pool_max_used = obj_pool_used;
 
@@ -300,6 +299,7 @@ static void free_obj_work(struct work_struct *work)
 	struct debug_obj *obj;
 	unsigned long flags;
 	HLIST_HEAD(tofree);
+	int cnt;
 
 	WRITE_ONCE(obj_freeing, false);
 	if (!raw_spin_trylock_irqsave(&pool_lock, flags))
@@ -315,12 +315,12 @@ static void free_obj_work(struct work_struct *work)
 	 * may be gearing up to use more and more objects, don't free any
 	 * of them until the next round.
 	 */
-	while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) {
-		obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
-		hlist_del(&obj->node);
-		hlist_add_head(&obj->node, &obj_pool);
-		WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
-		WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1);
+	cnt = min(obj_nr_tofree, debug_objects_pool_size - obj_pool_free);
+	cnt = hlist_cut_number(&tofree, &obj_to_free, cnt, &tmp);
+	if (cnt > 0) {
+		hlist_splice_init(&tofree, tmp, &obj_pool);
+		WRITE_ONCE(obj_pool_free, obj_pool_free + cnt);
+		WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - cnt);
 	}
 	raw_spin_unlock_irqrestore(&pool_lock, flags);
 	return;
@@ -346,11 +346,12 @@ static void free_obj_work(struct work_struct *work)
 
 static void __free_object(struct debug_obj *obj)
 {
-	struct debug_obj *objs[ODEBUG_BATCH_SIZE];
 	struct debug_percpu_free *percpu_pool;
-	int lookahead_count = 0;
+	struct hlist_node *last;
+	HLIST_HEAD(freelist);
 	unsigned long flags;
 	bool work;
+	int cnt;
 
 	local_irq_save(flags);
 	if (!obj_cache)
@@ -371,56 +372,36 @@ static void __free_object(struct debug_obj *obj)
 	 * As the percpu pool is full, look ahead and pull out a batch
 	 * of objects from the percpu pool and free them as well.
 	 */
-	for (; lookahead_count < ODEBUG_BATCH_SIZE; lookahead_count++) {
-		objs[lookahead_count] = __alloc_object(&percpu_pool->free_objs);
-		if (!objs[lookahead_count])
-			break;
-		percpu_pool->obj_free--;
-	}
+	cnt = hlist_cut_number(&freelist, &percpu_pool->free_objs, ODEBUG_BATCH_SIZE, &last);
+	percpu_pool->obj_free -= cnt;
+
+	/* add one for obj */
+	cnt++;
+	hlist_add_head(&obj->node, &freelist);
 
 free_to_obj_pool:
 	raw_spin_lock(&pool_lock);
 	work = (obj_pool_free > debug_objects_pool_size) && obj_cache &&
 	       (obj_nr_tofree < ODEBUG_FREE_WORK_MAX);
-	obj_pool_used--;
+	obj_pool_used -= cnt;
 
 	if (work) {
-		WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1);
-		hlist_add_head(&obj->node, &obj_to_free);
-		if (lookahead_count) {
-			WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + lookahead_count);
-			obj_pool_used -= lookahead_count;
-			while (lookahead_count) {
-				hlist_add_head(&objs[--lookahead_count]->node,
-					       &obj_to_free);
-			}
-		}
+		WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + cnt);
+		hlist_splice_init(&freelist, last, &obj_to_free);
 
 		if ((obj_pool_free > debug_objects_pool_size) &&
 		    (obj_nr_tofree < ODEBUG_FREE_WORK_MAX)) {
-			int i;
-
 			/*
 			 * Free one more batch of objects from obj_pool.
 			 */
-			for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
-				obj = __alloc_object(&obj_pool);
-				hlist_add_head(&obj->node, &obj_to_free);
-				WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
-				WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1);
-			}
+			cnt = hlist_cut_number(&freelist, &obj_pool, ODEBUG_BATCH_SIZE, &last);
+			hlist_splice_init(&freelist, last, &obj_to_free);
+			WRITE_ONCE(obj_pool_free, obj_pool_free - cnt);
+			WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + cnt);
 		}
 	} else {
-		WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
-		hlist_add_head(&obj->node, &obj_pool);
-		if (lookahead_count) {
-			WRITE_ONCE(obj_pool_free, obj_pool_free + lookahead_count);
-			obj_pool_used -= lookahead_count;
-			while (lookahead_count) {
-				hlist_add_head(&objs[--lookahead_count]->node,
-					       &obj_pool);
-			}
-		}
+		WRITE_ONCE(obj_pool_free, obj_pool_free + cnt);
+		hlist_splice_init(&freelist, last, &obj_pool);
 	}
 	raw_spin_unlock(&pool_lock);
 	local_irq_restore(flags);
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ