lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sun, 07 Dec 2008 19:30:10 +0100
From:	Eric Dumazet <dada1@...mosbay.com>
To:	Andrew Morton <akpm@...ux-foundation.org>
CC:	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Mike Travis <travis@....com>,
	"David S. Miller" <davem@...emloft.net>,
	linux kernel <linux-kernel@...r.kernel.org>,
	Christoph Lameter <cl@...ux-foundation.org>,
	linux-ext4@...r.kernel.org
Subject: Re: [PATCH, take2] percpu_counter: FBC_BATCH might be too big

Andrew Morton a écrit :
> On Sun, 07 Dec 2008 10:25:45 +0100 Eric Dumazet <dada1@...mosbay.com> wrote:
> 
>> In this second version I guarded hotcpu_notifier() call by
>> a #ifdef CONFIG_HOTPLUG_CPU
>>
>> I wonder why hotcpu_notifier() is not a null op if !CONFIG_HOTPLUG_CPU
>>
>> Thank you
>>
>> [PATCH] percpu_counter: FBC_BATCH might be too big
>>
>> For NR_CPUS >= 16 values, FBC_BATCH is 2*NR_CPUS
>>
>> Considering more and more distros are using high NR_CPUS values,
>> it makes sense to use a more sensible value for FBC_BATCH.
>>
>> A sensible value is 2*num_online_cpus(), with a minimum value of 32
>> (This minimum value helps branch prediction in __percpu_counter_add())
>>
>> We already have a hotcpu notifier, so we can adjust FBC_BATCH dynamically.
> 
> Yup, anything using NR_CPUS is probably wrong.
> 
>> diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
>> index 9007ccd..c42a184 100644
>> --- a/include/linux/percpu_counter.h
>> +++ b/include/linux/percpu_counter.h
>> @@ -24,11 +24,7 @@ struct percpu_counter {
>>  	s32 *counters;
>>  };
>>  
>> -#if NR_CPUS >= 16
>> -#define FBC_BATCH	(NR_CPUS*2)
>> -#else
>> -#define FBC_BATCH	(NR_CPUS*4)
>> -#endif
>> +extern int FBC_BATCH;
> 
> y:/usr/src/linux-2.6.28-rc7> grep -r FBC_BATCH . | wc -l
> 7
> 
> Can we fix this properly please?  It should now become lower case, and
> it was a pretty dopey name anyway - now would be a good time to improve
> it. `percpu_counter_batch'?

Yes

> 
>>  int percpu_counter_init(struct percpu_counter *fbc, s64 amount);
>>  int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
>> diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
>> index a866389..e21ce7c 100644
>> --- a/lib/percpu_counter.c
>> +++ b/lib/percpu_counter.c
>> @@ -114,6 +114,16 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
>>  }
>>  EXPORT_SYMBOL(percpu_counter_destroy);
>>  
>> +int FBC_BATCH __read_mostly = 32;
>> +EXPORT_SYMBOL(FBC_BATCH);
>> +
>> +static void compute_batch_value(void)
>> +{
>> +	int nr = num_online_cpus();
>> +
>> +	FBC_BATCH = max(32, nr*2);
>> +}
>> +
>>  #ifdef CONFIG_HOTPLUG_CPU
>>  static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
>>  					unsigned long action, void *hcpu)
>> @@ -121,6 +131,7 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
>>  	unsigned int cpu;
>>  	struct percpu_counter *fbc;
>>  
>> +	compute_batch_value();
>>  	if (action != CPU_DEAD)
>>  		return NOTIFY_OK;
>>  
>> @@ -139,11 +150,14 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
>>  	mutex_unlock(&percpu_counters_lock);
>>  	return NOTIFY_OK;
>>  }
>> +#endif
>>  
>>  static int __init percpu_counter_startup(void)
>>  {
>> +	compute_batch_value();
>> +#ifdef CONFIG_HOTPLUG_CPU
>>  	hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
>> +#endif
>>  	return 0;
>>  }
>>  module_init(percpu_counter_startup);
>> -#endif
> 
> hm, now what's going on in there?  We should be able to drop the #ifdef
> CONFIG_HOTPLUG_CPU from lib/percpu_counter.c altogether. 
> hotcpu_notifier() will do the right thing, and the compiler should
> generate no code for percpu_counter_hotcpu_callback() if
> CONFIG_HOTPLUG_CPU=n.  
> 
> Do
> 
> 	$EDITOR  $(grep -l hotcpu_notifier */*.c)
> 
> and you'll see lots of code gets it right, and lots of code gets it wrong.

I see nothing interesting, I must be blind.

lib/percpu_counter.c: In function 'percpu_counter_startup':
lib/percpu_counter.c:158: error: 'percpu_counter_hotcpu_callback' undeclared (first use in this function)
lib/percpu_counter.c:158: error: (Each undeclared identifier is reported only once
lib/percpu_counter.c:158: error: for each function it appears in.)
make[1]: *** [lib/percpu_counter.o] Error 1

static int __init percpu_counter_startup(void)
{
        compute_batch_value();
        hotcpu_notifier(percpu_counter_hotcpu_callback, 0); << ERROR >>
        return 0;
}
module_init(percpu_counter_startup);

# grep hotcpu_notifier include/linux/cpu.h
#define hotcpu_notifier(fn, pri) {                          \
#define hotcpu_notifier(fn, pri)    do { (void)(fn); } while (0)  << ERROR >>


If changed to :

static struct notifier_block __cpuinitdata percpu_counter_cpu_notifier = {
        .notifier_call  = percpu_counter_hotcpu_callback,
};
static int __init percpu_counter_startup(void)
{
        compute_batch_value();
        register_hotcpu_notifier(&percpu_counter_cpu_notifier);
        return 0;
}
module_init(percpu_counter_startup);

Then error is :
lib/percpu_counter.c:156: error: 'percpu_counter_hotcpu_callback' undeclared here (not in a function)
make[1]: *** [lib/percpu_counter.o] Error 1


So the only way seems to add an ugly

#define percpu_counter_hotcpu_callback NULL

Is is what you name "the right thing" ?

Thanks

[PATCH] percpu_counter: FBC_BATCH should be a variable

For NR_CPUS >= 16 values, FBC_BATCH is 2*NR_CPUS

Considering more and more distros are using high NR_CPUS values,
it makes sense to use a more sensible value for FBC_BATCH.

A sensible value is 2*num_online_cpus(), with a minimum value of 32
(This minimum value helps branch prediction in __percpu_counter_add())

We already have a hotcpu notifier, so we can adjust FBC_BATCH dynamically.

We rename FBC_BATCH to percpu_counter_batch since its not a constant
anymore.

Signed-off-by: Eric Dumazet <dada1@...mosbay.com>
Acked-by: David S. Miller <davem@...emloft.net>
Acked-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
---
 fs/ext4/ext4.h                 |    6 +++---
 fs/ext4/inode.c                |    2 +-
 include/linux/percpu_counter.h |    8 ++------
 lib/percpu_counter.c           |   16 +++++++++++++++-
 4 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b0537c8..6c46c64 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1225,11 +1225,11 @@ do {								\
 } while (0)
 
 #ifdef CONFIG_SMP
-/* Each CPU can accumulate FBC_BATCH blocks in their local
+/* Each CPU can accumulate percpu_counter_batch blocks in their local
  * counters. So we need to make sure we have free blocks more
- * than FBC_BATCH  * nr_cpu_ids. Also add a window of 4 times.
+ * than percpu_counter_batch  * nr_cpu_ids. Also add a window of 4 times.
  */
-#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids))
+#define EXT4_FREEBLOCKS_WATERMARK (4 * (percpu_counter_batch * nr_cpu_ids))
 #else
 #define EXT4_FREEBLOCKS_WATERMARK 0
 #endif
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index be21a5a..db661e7 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2497,7 +2497,7 @@ static int ext4_nonda_switch(struct super_block *sb)
 	/*
 	 * switch to non delalloc mode if we are running low
 	 * on free block. The free block accounting via percpu
-	 * counters can get slightly wrong with FBC_BATCH getting
+	 * counters can get slightly wrong with percpu_counter_batch getting
 	 * accumulated on each CPU without updating global counters
 	 * Delalloc need an accurate free block accounting. So switch
 	 * to non delalloc when we are near to error range.
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 9007ccd..99de7a3 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -24,11 +24,7 @@ struct percpu_counter {
 	s32 *counters;
 };
 
-#if NR_CPUS >= 16
-#define FBC_BATCH	(NR_CPUS*2)
-#else
-#define FBC_BATCH	(NR_CPUS*4)
-#endif
+extern int percpu_counter_batch;
 
 int percpu_counter_init(struct percpu_counter *fbc, s64 amount);
 int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
@@ -39,7 +35,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc);
 
 static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 {
-	__percpu_counter_add(fbc, amount, FBC_BATCH);
+	__percpu_counter_add(fbc, amount, percpu_counter_batch);
 }
 
 static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index a866389..519b877 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -114,6 +114,16 @@ void percpu_counter_destroy(struct percpu_counter *fbc)
 }
 EXPORT_SYMBOL(percpu_counter_destroy);
 
+int percpu_counter_batch __read_mostly = 32;
+EXPORT_SYMBOL(percpu_counter_batch);
+
+static void compute_batch_value(void)
+{
+	int nr = num_online_cpus();
+
+	percpu_counter_batch = max(32, nr*2);
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
 					unsigned long action, void *hcpu)
@@ -121,6 +131,7 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
 	unsigned int cpu;
 	struct percpu_counter *fbc;
 
+	compute_batch_value();
 	if (action != CPU_DEAD)
 		return NOTIFY_OK;
 
@@ -139,11 +150,14 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
 	mutex_unlock(&percpu_counters_lock);
 	return NOTIFY_OK;
 }
+#else
+#define percpu_counter_hotcpu_callback NULL
+#endif /* CONFIG_HOTPLUG_CPU */
 
 static int __init percpu_counter_startup(void)
 {
+	compute_batch_value();
 	hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
 	return 0;
 }
 module_init(percpu_counter_startup);
-#endif

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ