lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230719115358.GB3529734@hirez.programming.kicks-ass.net>
Date:   Wed, 19 Jul 2023 13:53:58 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Aaron Lu <aaron.lu@...el.com>
Cc:     Ingo Molnar <mingo@...hat.com>, Juri Lelli <juri.lelli@...hat.com>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Daniel Jordan <daniel.m.jordan@...cle.com>,
        Dietmar Eggemann <dietmar.eggemann@....com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
        Daniel Bristot de Oliveira <bristot@...hat.com>,
        Valentin Schneider <vschneid@...hat.com>,
        Tim Chen <tim.c.chen@...el.com>,
        Nitin Tekchandani <nitin.tekchandani@...el.com>,
        Yu Chen <yu.c.chen@...el.com>,
        Waiman Long <longman@...hat.com>, linux-kernel@...r.kernel.org,
        yury.norov@...il.com, andriy.shevchenko@...ux.intel.com,
        linux@...musvillemoes.dk, rppt@...nel.org
Subject: Re: [RFC PATCH 2/4] sched/fair: Make tg->load_avg per node

On Tue, Jul 18, 2023 at 09:41:18PM +0800, Aaron Lu wrote:
> +#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
> +static inline long tg_load_avg(struct task_group *tg)
> +{
> +	long load_avg = 0;
> +	int i;
> +
> +	/*
> +	 * The only path that can give us a root_task_group
> +	 * here is from print_cfs_rq() thus unlikely.
> +	 */
> +	if (unlikely(tg == &root_task_group))
> +		return 0;
> +
> +	for_each_node(i)
> +		load_avg += atomic_long_read(&tg->node_info[i]->load_avg);
> +
> +	return load_avg;
> +}
> +#endif

So I was working on something else numa and noticed that for_each_node()
(and most of the nodemask stuff) is quite moronic, afaict we should do
something like the below.

I now see Mike added the nr_node_ids thing fairly recent, but given
distros have NODES_SHIFT=10 and actual machines typically only have <=4
nodes, this would save a factor of 256 scanning.

Specifically, your for_each_node() would scan the full 1024 bit bitmap
looking for more bits that would never be there.

---

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 8d07116caaf1..c23c0889b8cf 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -109,7 +109,7 @@ extern nodemask_t _unused_nodemask_arg_;
 				__nodemask_pr_bits(maskp)
 static inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m)
 {
-	return m ? MAX_NUMNODES : 0;
+	return m ? nr_node_ids : 0;
 }
 static inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m)
 {
@@ -137,13 +137,13 @@ static inline void __node_clear(int node, volatile nodemask_t *dstp)
 	clear_bit(node, dstp->bits);
 }
 
-#define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES)
+#define nodes_setall(dst) __nodes_setall(&(dst), nr_node_ids)
 static inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits)
 {
 	bitmap_fill(dstp->bits, nbits);
 }
 
-#define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES)
+#define nodes_clear(dst) __nodes_clear(&(dst), nr_node_ids)
 static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits)
 {
 	bitmap_zero(dstp->bits, nbits);
@@ -160,7 +160,7 @@ static inline bool __node_test_and_set(int node, nodemask_t *addr)
 }
 
 #define nodes_and(dst, src1, src2) \
-			__nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES)
+			__nodes_and(&(dst), &(src1), &(src2), nr_node_ids)
 static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
 					const nodemask_t *src2p, unsigned int nbits)
 {
@@ -168,7 +168,7 @@ static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
 }
 
 #define nodes_or(dst, src1, src2) \
-			__nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES)
+			__nodes_or(&(dst), &(src1), &(src2), nr_node_ids)
 static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
 					const nodemask_t *src2p, unsigned int nbits)
 {
@@ -176,7 +176,7 @@ static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
 }
 
 #define nodes_xor(dst, src1, src2) \
-			__nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES)
+			__nodes_xor(&(dst), &(src1), &(src2), nr_node_ids)
 static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
 					const nodemask_t *src2p, unsigned int nbits)
 {
@@ -184,7 +184,7 @@ static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
 }
 
 #define nodes_andnot(dst, src1, src2) \
-			__nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES)
+			__nodes_andnot(&(dst), &(src1), &(src2), nr_node_ids)
 static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
 					const nodemask_t *src2p, unsigned int nbits)
 {
@@ -192,7 +192,7 @@ static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
 }
 
 #define nodes_complement(dst, src) \
-			__nodes_complement(&(dst), &(src), MAX_NUMNODES)
+			__nodes_complement(&(dst), &(src), nr_node_ids)
 static inline void __nodes_complement(nodemask_t *dstp,
 					const nodemask_t *srcp, unsigned int nbits)
 {
@@ -200,7 +200,7 @@ static inline void __nodes_complement(nodemask_t *dstp,
 }
 
 #define nodes_equal(src1, src2) \
-			__nodes_equal(&(src1), &(src2), MAX_NUMNODES)
+			__nodes_equal(&(src1), &(src2), nr_node_ids)
 static inline bool __nodes_equal(const nodemask_t *src1p,
 					const nodemask_t *src2p, unsigned int nbits)
 {
@@ -208,7 +208,7 @@ static inline bool __nodes_equal(const nodemask_t *src1p,
 }
 
 #define nodes_intersects(src1, src2) \
-			__nodes_intersects(&(src1), &(src2), MAX_NUMNODES)
+			__nodes_intersects(&(src1), &(src2), nr_node_ids)
 static inline bool __nodes_intersects(const nodemask_t *src1p,
 					const nodemask_t *src2p, unsigned int nbits)
 {
@@ -216,33 +216,33 @@ static inline bool __nodes_intersects(const nodemask_t *src1p,
 }
 
 #define nodes_subset(src1, src2) \
-			__nodes_subset(&(src1), &(src2), MAX_NUMNODES)
+			__nodes_subset(&(src1), &(src2), nr_node_ids)
 static inline bool __nodes_subset(const nodemask_t *src1p,
 					const nodemask_t *src2p, unsigned int nbits)
 {
 	return bitmap_subset(src1p->bits, src2p->bits, nbits);
 }
 
-#define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES)
+#define nodes_empty(src) __nodes_empty(&(src), nr_node_ids)
 static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits)
 {
 	return bitmap_empty(srcp->bits, nbits);
 }
 
-#define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES)
+#define nodes_full(nodemask) __nodes_full(&(nodemask), nr_node_ids)
 static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits)
 {
 	return bitmap_full(srcp->bits, nbits);
 }
 
-#define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES)
+#define nodes_weight(nodemask) __nodes_weight(&(nodemask), nr_node_ids)
 static inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits)
 {
 	return bitmap_weight(srcp->bits, nbits);
 }
 
 #define nodes_shift_right(dst, src, n) \
-			__nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES)
+			__nodes_shift_right(&(dst), &(src), (n), nr_node_ids)
 static inline void __nodes_shift_right(nodemask_t *dstp,
 					const nodemask_t *srcp, int n, int nbits)
 {
@@ -250,7 +250,7 @@ static inline void __nodes_shift_right(nodemask_t *dstp,
 }
 
 #define nodes_shift_left(dst, src, n) \
-			__nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES)
+			__nodes_shift_left(&(dst), &(src), (n), nr_node_ids)
 static inline void __nodes_shift_left(nodemask_t *dstp,
 					const nodemask_t *srcp, int n, int nbits)
 {
@@ -385,7 +385,7 @@ static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp,
 #if MAX_NUMNODES > 1
 #define for_each_node_mask(node, mask)				    \
 	for ((node) = first_node(mask);				    \
-	     (node) < MAX_NUMNODES;				    \
+	     (node) < nr_node_ids;				    \
 	     (node) = next_node((node), (mask)))
 #else /* MAX_NUMNODES == 1 */
 #define for_each_node_mask(node, mask)                                  \

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ