lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251022110555.837390652@linutronix.de>
Date: Wed, 22 Oct 2025 14:55:28 +0200 (CEST)
From: Thomas Gleixner <tglx@...utronix.de>
To: LKML <linux-kernel@...r.kernel.org>
Cc: Peter Zijlstra <peterz@...radead.org>,
 Gabriele Monaco <gmonaco@...hat.com>,
 Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
 Michael Jeanson <mjeanson@...icios.com>,
 Jens Axboe <axboe@...nel.dk>,
 "Paul E. McKenney" <paulmck@...nel.org>,
 "Gautham R. Shenoy" <gautham.shenoy@....com>,
 Florian Weimer <fweimer@...hat.com>,
 Tim Chen <tim.c.chen@...el.com>,
 Yury Norov <yury.norov@...il.com>
Subject: [patch V2 07/20] cpumask: Introduce cpumask_or_and_calc_weight()

CID management OR's two cpumasks and then calculates the weight on the
result. That's inefficient as that has to walk the same stuff twice. As
this is done with runqueue lock held, there is a real benefit of speeding
this up. Depending on the system this results in 10-20% less cycles spent
with runqueue lock held for a 4K cpumask.

Provide cpumask_or_and_calc_weight() and the corresponding bitmap functions
which return the weight of the OR result right away.

Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
Cc: Yury Norov <yury.norov@...il.com>
---
V2: Rename and use the BITMAP_WEIGHT() macro - Yury
---
 include/linux/bitmap.h  |   16 ++++++++++++++++
 include/linux/cpumask.h |   16 ++++++++++++++++
 lib/bitmap.c            |    6 ++++++
 3 files changed, 38 insertions(+)

--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -45,6 +45,8 @@ struct device;
  *  bitmap_copy(dst, src, nbits)                *dst = *src
  *  bitmap_and(dst, src1, src2, nbits)          *dst = *src1 & *src2
  *  bitmap_or(dst, src1, src2, nbits)           *dst = *src1 | *src2
+ *  bitmap_or_and_calc_weight(dst, src1, src2, nbits)
+ *						*dst = *src1 | *src2. Returns Hamming Weight of dst
  *  bitmap_xor(dst, src1, src2, nbits)          *dst = *src1 ^ *src2
  *  bitmap_andnot(dst, src1, src2, nbits)       *dst = *src1 & ~(*src2)
  *  bitmap_complement(dst, src, nbits)          *dst = ~(*src)
@@ -165,6 +167,8 @@ bool __bitmap_and(unsigned long *dst, co
 		 const unsigned long *bitmap2, unsigned int nbits);
 void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 		 const unsigned long *bitmap2, unsigned int nbits);
+unsigned int __bitmap_or_and_calc_weight(unsigned long *dst, const unsigned long *bitmap1,
+					 const unsigned long *bitmap2, unsigned int nbits);
 void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 		  const unsigned long *bitmap2, unsigned int nbits);
 bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
@@ -338,6 +342,18 @@ void bitmap_or(unsigned long *dst, const
 }
 
 static __always_inline
+unsigned int bitmap_or_and_calc_weight(unsigned long *dst, const unsigned long *src1,
+				       const unsigned long *src2, unsigned int nbits)
+{
+	if (small_const_nbits(nbits)) {
+		*dst = *src1 | *src2;
+		return hweight_long(*dst & BITMAP_LAST_WORD_MASK(nbits));
+	} else {
+		return __bitmap_or_and_calc_weight(dst, src1, src2, nbits);
+	}
+}
+
+static __always_inline
 void bitmap_xor(unsigned long *dst, const unsigned long *src1,
 		const unsigned long *src2, unsigned int nbits)
 {
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -729,6 +729,22 @@ void cpumask_or(struct cpumask *dstp, co
 }
 
 /**
+ * cpumask_or_and_calc_weight - *dstp = *src1p | *src2p and return the weight of the result
+ * @dstp: the cpumask result
+ * @src1p: the first input
+ * @src2p: the second input
+ *
+ * Return: The number of bits set in the resulting cpumask @dstp
+ */
+static __always_inline
+unsigned int cpumask_or_and_calc_weight(struct cpumask *dstp, const struct cpumask *src1p,
+					const struct cpumask *src2p)
+{
+	return bitmap_or_and_calc_weight(cpumask_bits(dstp), cpumask_bits(src1p),
+					 cpumask_bits(src2p), small_cpumask_bits);
+}
+
+/**
  * cpumask_xor - *dstp = *src1p ^ *src2p
  * @dstp: the cpumask result
  * @src1p: the first input
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -355,6 +355,12 @@ unsigned int __bitmap_weight_andnot(cons
 }
 EXPORT_SYMBOL(__bitmap_weight_andnot);
 
+unsigned int __bitmap_or_and_calc_weight(unsigned long *dst, const unsigned long *bitmap1,
+					 const unsigned long *bitmap2, unsigned int bits)
+{
+	return BITMAP_WEIGHT(({dst[idx] = bitmap1[idx] | bitmap2[idx]; dst[idx]; }), bits);
+}
+
 void __bitmap_set(unsigned long *map, unsigned int start, int len)
 {
 	unsigned long *p = map + BIT_WORD(start);


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ