lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:	Tue, 15 Oct 2013 12:25:38 -0700
From:	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
To:	Christoph Lameter <cl@...ux.com>
Cc:	Tejun Heo <tj@...nel.org>, akpm@...uxfoundation.org,
	Dipankar Sarma <dipankar@...ibm.com>,
	linux-arch@...r.kernel.org, Steven Rostedt <srostedt@...hat.com>,
	linux-kernel@...r.kernel.org, Ingo Molnar <mingo@...nel.org>,
	Peter Zijlstra <peterz@...radead.org>
Subject: Re: [PATCH 07/10] rcu: Replace __get_cpu_var uses

On Tue, Oct 15, 2013 at 12:58:59PM -0500, Christoph Lameter wrote:
> __get_cpu_var() is used for multiple purposes in the kernel source. One of
> them is address calculation via the form &__get_cpu_var(x).  This calculates
> the address for the instance of the percpu variable of the current processor
> based on an offset.

Commit #c9d4b0af9e06 in -rcu, to be sent to -tip soonish.

							Thanx, Paul

> Other use cases are for storing and retrieving data from the current
> processors percpu area.  __get_cpu_var() can be used as an lvalue when
> writing data or on the right side of an assignment.
> 
> __get_cpu_var() is defined as :
> 
> 
> #define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
> 
> 
> 
> __get_cpu_var() always only does an address determination. However, store
> and retrieve operations could use a segment prefix (or global register on
> other platforms) to avoid the address calculation.
> 
> this_cpu_write() and this_cpu_read() can directly take an offset into a
> percpu area and use optimized assembly code to read and write per cpu
> variables.
> 
> 
> This patch converts __get_cpu_var into either an explicit address
> calculation using this_cpu_ptr() or into a use of this_cpu operations that
> use the offset.  Thereby address calculations are avoided and less registers
> are used when code is generated.
> 
> At the end of the patch set all uses of __get_cpu_var have been removed so
> the macro is removed too.
> 
> The patch set includes passes over all arches as well. Once these operations
> are used throughout then specialized macros can be defined in non -x86
> arches as well in order to optimize per cpu access by f.e.  using a global
> register that may be set to the per cpu base.
> 
> 
> 
> 
> Transformations done to __get_cpu_var()
> 
> 
> 1. Determine the address of the percpu instance of the current processor.
> 
> 	DEFINE_PER_CPU(int, y);
> 	int *x = &__get_cpu_var(y);
> 
>     Converts to
> 
> 	int *x = this_cpu_ptr(&y);
> 
> 
> 2. Same as #1 but this time an array structure is involved.
> 
> 	DEFINE_PER_CPU(int, y[20]);
> 	int *x = __get_cpu_var(y);
> 
>     Converts to
> 
> 	int *x = this_cpu_ptr(y);
> 
> 
> 3. Retrieve the content of the current processors instance of a per cpu
> variable.
> 
> 	DEFINE_PER_CPU(int, y);
> 	int x = __get_cpu_var(y)
> 
>    Converts to
> 
> 	int x = __this_cpu_read(y);
> 
> 
> 4. Retrieve the content of a percpu struct
> 
> 	DEFINE_PER_CPU(struct mystruct, y);
> 	struct mystruct x = __get_cpu_var(y);
> 
>    Converts to
> 
> 	memcpy(&x, this_cpu_ptr(&y), sizeof(x));
> 
> 
> 5. Assignment to a per cpu variable
> 
> 	DEFINE_PER_CPU(int, y)
> 	__get_cpu_var(y) = x;
> 
>    Converts to
> 
> 	this_cpu_write(y, x);
> 
> 
> 6. Increment/Decrement etc of a per cpu variable
> 
> 	DEFINE_PER_CPU(int, y);
> 	__get_cpu_var(y)++
> 
>    Converts to
> 
> 	this_cpu_inc(y)
> 
> Signed-off-by: Christoph Lameter <cl@...ux.com>
> 
> Index: linux/kernel/rcutree.c
> ===================================================================
> --- linux.orig/kernel/rcutree.c	2013-10-14 10:55:57.747107834 -0500
> +++ linux/kernel/rcutree.c	2013-10-14 10:58:26.247229815 -0500
> @@ -407,7 +407,7 @@ static void rcu_eqs_enter(bool user)
>  	long long oldval;
>  	struct rcu_dynticks *rdtp;
> 
> -	rdtp = &__get_cpu_var(rcu_dynticks);
> +	rdtp = this_cpu_ptr(&rcu_dynticks);
>  	oldval = rdtp->dynticks_nesting;
>  	WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
>  	if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
> @@ -435,7 +435,7 @@ void rcu_idle_enter(void)
> 
>  	local_irq_save(flags);
>  	rcu_eqs_enter(false);
> -	rcu_sysidle_enter(&__get_cpu_var(rcu_dynticks), 0);
> +	rcu_sysidle_enter(this_cpu_ptr(&rcu_dynticks), 0);
>  	local_irq_restore(flags);
>  }
>  EXPORT_SYMBOL_GPL(rcu_idle_enter);
> @@ -478,7 +478,7 @@ void rcu_irq_exit(void)
>  	struct rcu_dynticks *rdtp;
> 
>  	local_irq_save(flags);
> -	rdtp = &__get_cpu_var(rcu_dynticks);
> +	rdtp = this_cpu_ptr(&rcu_dynticks);
>  	oldval = rdtp->dynticks_nesting;
>  	rdtp->dynticks_nesting--;
>  	WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
> @@ -528,7 +528,7 @@ static void rcu_eqs_exit(bool user)
>  	struct rcu_dynticks *rdtp;
>  	long long oldval;
> 
> -	rdtp = &__get_cpu_var(rcu_dynticks);
> +	rdtp = this_cpu_ptr(&rcu_dynticks);
>  	oldval = rdtp->dynticks_nesting;
>  	WARN_ON_ONCE(oldval < 0);
>  	if (oldval & DYNTICK_TASK_NEST_MASK)
> @@ -555,7 +555,7 @@ void rcu_idle_exit(void)
> 
>  	local_irq_save(flags);
>  	rcu_eqs_exit(false);
> -	rcu_sysidle_exit(&__get_cpu_var(rcu_dynticks), 0);
> +	rcu_sysidle_exit(this_cpu_ptr(&rcu_dynticks), 0);
>  	local_irq_restore(flags);
>  }
>  EXPORT_SYMBOL_GPL(rcu_idle_exit);
> @@ -599,7 +599,7 @@ void rcu_irq_enter(void)
>  	long long oldval;
> 
>  	local_irq_save(flags);
> -	rdtp = &__get_cpu_var(rcu_dynticks);
> +	rdtp = this_cpu_ptr(&rcu_dynticks);
>  	oldval = rdtp->dynticks_nesting;
>  	rdtp->dynticks_nesting++;
>  	WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
> @@ -620,7 +620,7 @@ void rcu_irq_enter(void)
>   */
>  void rcu_nmi_enter(void)
>  {
> -	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
> +	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
> 
>  	if (rdtp->dynticks_nmi_nesting == 0 &&
>  	    (atomic_read(&rdtp->dynticks) & 0x1))
> @@ -642,7 +642,7 @@ void rcu_nmi_enter(void)
>   */
>  void rcu_nmi_exit(void)
>  {
> -	struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
> +	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
> 
>  	if (rdtp->dynticks_nmi_nesting == 0 ||
>  	    --rdtp->dynticks_nmi_nesting != 0)
> @@ -665,7 +665,7 @@ int rcu_is_cpu_idle(void)
>  	int ret;
> 
>  	preempt_disable();
> -	ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
> +	ret = (atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1) == 0;
>  	preempt_enable();
>  	return ret;
>  }
> @@ -703,7 +703,7 @@ bool rcu_lockdep_current_cpu_online(void
>  	if (in_nmi())
>  		return 1;
>  	preempt_disable();
> -	rdp = &__get_cpu_var(rcu_sched_data);
> +	rdp = this_cpu_ptr(&rcu_sched_data);
>  	rnp = rdp->mynode;
>  	ret = (rdp->grpmask & rnp->qsmaskinit) ||
>  	      !rcu_scheduler_fully_active;
> @@ -723,7 +723,7 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cp
>   */
>  static int rcu_is_cpu_rrupt_from_idle(void)
>  {
> -	return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
> +	return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1;
>  }
> 
>  /*
> Index: linux/kernel/rcutree_plugin.h
> ===================================================================
> --- linux.orig/kernel/rcutree_plugin.h	2013-10-14 10:55:57.747107834 -0500
> +++ linux/kernel/rcutree_plugin.h	2013-10-14 10:55:57.747107834 -0500
> @@ -660,7 +660,7 @@ static void rcu_preempt_check_callbacks(
> 
>  static void rcu_preempt_do_callbacks(void)
>  {
> -	rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
> +	rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
>  }
> 
>  #endif /* #ifdef CONFIG_RCU_BOOST */
> @@ -1332,7 +1332,7 @@ static void invoke_rcu_callbacks_kthread
>   */
>  static bool rcu_is_callbacks_kthread(void)
>  {
> -	return __get_cpu_var(rcu_cpu_kthread_task) == current;
> +	return __this_cpu_read(rcu_cpu_kthread_task) == current;
>  }
> 
>  #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
> @@ -1382,8 +1382,8 @@ static int rcu_spawn_one_boost_kthread(s
> 
>  static void rcu_kthread_do_work(void)
>  {
> -	rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
> -	rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
> +	rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
> +	rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
>  	rcu_preempt_do_callbacks();
>  }
> 
> @@ -1402,7 +1402,7 @@ static void rcu_cpu_kthread_park(unsigne
> 
>  static int rcu_cpu_kthread_should_run(unsigned int cpu)
>  {
> -	return __get_cpu_var(rcu_cpu_has_work);
> +	return __this_cpu_read(rcu_cpu_has_work);
>  }
> 
>  /*
> @@ -1412,8 +1412,8 @@ static int rcu_cpu_kthread_should_run(un
>   */
>  static void rcu_cpu_kthread(unsigned int cpu)
>  {
> -	unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);
> -	char work, *workp = &__get_cpu_var(rcu_cpu_has_work);
> +	unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
> +	char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
>  	int spincnt;
> 
>  	for (spincnt = 0; spincnt < 10; spincnt++) {
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ