lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20161115012925.GB12110@tardis.cn.ibm.com>
Date:   Tue, 15 Nov 2016 09:29:25 +0800
From:   Boqun Feng <boqun.feng@...il.com>
To:     "Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Cc:     linux-kernel@...r.kernel.org, mingo@...nel.org,
        jiangshanlai@...il.com, dipankar@...ibm.com,
        akpm@...ux-foundation.org, mathieu.desnoyers@...icios.com,
        josh@...htriplett.org, tglx@...utronix.de, peterz@...radead.org,
        rostedt@...dmis.org, dhowells@...hat.com, edumazet@...gle.com,
        dvhart@...ux.intel.com, fweisbec@...il.com, oleg@...hat.com,
        bobby.prani@...il.com
Subject: Re: [PATCH tip/core/rcu 5/5] rcu: Maintain special bits at bottom of
 ->dynticks counter

On Mon, Nov 14, 2016 at 10:30:21AM -0800, Paul E. McKenney wrote:
> Currently, IPIs are used to force other CPUs to invalidate their TLBs
> in response to a kernel virtual-memory mapping change.  This works, but
> degrades both battery lifetime (for idle CPUs) and real-time response
> (for nohz_full CPUs), and in addition results in unnecessary IPIs due to
> the fact that CPUs executing in usermode are unaffected by stale kernel
> mappings.  It would be better to cause a CPU executing in usermode to
> wait until it is entering kernel mode to do the flush, first to avoid
> interrupting usemode tasks and second to handle multiple flush requests
> with a single flush in the case of a long-running user task.
> 
> This commit therefore reserves a bit at the bottom of the ->dynticks
> counter, which is checked upon exit from extended quiescent states.
> If it is set, it is cleared and then a new rcu_eqs_special_exit() macro is
> invoked, which, if not supplied, is an empty single-pass do-while loop.
> If this bottom bit is set on -entry- to an extended quiescent state,
> then a WARN_ON_ONCE() triggers.
> 
> This bottom bit may be set using a new rcu_eqs_special_set() function,
> which returns true if the bit was set, or false if the CPU turned
> out to not be in an extended quiescent state.  Please note that this
> function refuses to set the bit for a non-nohz_full CPU when that CPU
> is executing in usermode because usermode execution is tracked by RCU
> as a dyntick-idle extended quiescent state only for nohz_full CPUs.
> 
> Reported-by: Andy Lutomirski <luto@...capital.net>
> Signed-off-by: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
> ---
>  include/linux/rcutiny.h |  5 +++
>  kernel/rcu/tree.c       | 81 +++++++++++++++++++++++++++++++++++++------------
>  kernel/rcu/tree.h       |  1 +
>  3 files changed, 67 insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
> index 4f9b2fa2173d..7232d199a81c 100644
> --- a/include/linux/rcutiny.h
> +++ b/include/linux/rcutiny.h
> @@ -33,6 +33,11 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
>  	return 0;
>  }
>  
> +static inline bool rcu_eqs_special_set(int cpu)
> +{
> +	return false;  /* Never flag non-existent other CPUs! */
> +}
> +
>  static inline unsigned long get_state_synchronize_rcu(void)
>  {
>  	return 0;
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index c2b2f5b591b7..2c399db6df6e 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -269,9 +269,19 @@ void rcu_bh_qs(void)
>  
>  static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
>  
> +/*
> + * Steal a bit from the bottom of ->dynticks for idle entry/exit
> + * control.  Initially this is for TLB flushing.
> + */
> +#define RCU_DYNTICK_CTRL_MASK 0x1
> +#define RCU_DYNTICK_CTRL_CTR  (RCU_DYNTICK_CTRL_MASK + 1)
> +#ifndef rcu_eqs_special_exit
> +#define rcu_eqs_special_exit() do { } while (0)
> +#endif
> +
>  static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
>  	.dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
> -	.dynticks = ATOMIC_INIT(1),
> +	.dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
>  #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
>  	.dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
>  	.dynticks_idle = ATOMIC_INIT(1),
> @@ -285,17 +295,20 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
>  static void rcu_dynticks_eqs_enter(void)
>  {
>  	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
> +	int seq;
>  
>  	/*
> -	 * CPUs seeing atomic_inc() must see prior RCU read-side critical
> -	 * sections, and we also must force ordering with the next idle
> -	 * sojourn.
> +	 * CPUs seeing atomic_inc_return() must see prior RCU read-side
> +	 * critical sections, and we also must force ordering with the
> +	 * next idle sojourn.
>  	 */
> -	smp_mb__before_atomic(); /* See above. */
> -	atomic_inc(&rdtp->dynticks);
> -	smp_mb__after_atomic(); /* See above. */
> +	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
> +	/* Better be in an extended quiescent state! */
> +	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> +		     (seq & RCU_DYNTICK_CTRL_CTR));
> +	/* Better not have special action (TLB flush) pending! */

Ah.. you did the clean-up here ;-)

Never mind my previous comment on patch #3 ;-)

Regards,
Boqun

>  	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> -		     atomic_read(&rdtp->dynticks) & 0x1);
> +		     (seq & RCU_DYNTICK_CTRL_MASK));
>  }
>  
>  /*
> @@ -305,17 +318,22 @@ static void rcu_dynticks_eqs_enter(void)
>  static void rcu_dynticks_eqs_exit(void)
>  {
>  	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
> +	int seq;
>  
>  	/*
> -	 * CPUs seeing atomic_inc() must see prior idle sojourns,
> +	 * CPUs seeing atomic_inc_return() must see prior idle sojourns,
>  	 * and we also must force ordering with the next RCU read-side
>  	 * critical section.
>  	 */
> -	smp_mb__before_atomic(); /* See above. */
> -	atomic_inc(&rdtp->dynticks);
> -	smp_mb__after_atomic(); /* See above. */
> +	seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
>  	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> -		     !(atomic_read(&rdtp->dynticks) & 0x1));
> +		     !(seq & RCU_DYNTICK_CTRL_CTR));
> +	if (seq & RCU_DYNTICK_CTRL_MASK) {
> +		rcu_eqs_special_exit();
> +		/* Prefer duplicate flushes to losing a flush. */
> +		smp_mb__before_atomic(); /* NMI safety. */
> +		atomic_and(~RCU_DYNTICK_CTRL_MASK, &rdtp->dynticks);
> +	}
>  }
>  
>  /*
> @@ -332,9 +350,9 @@ static void rcu_dynticks_eqs_online(void)
>  {
>  	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
>  
> -	if (atomic_read(&rdtp->dynticks) & 0x1)
> +	if (atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR)
>  		return;
> -	atomic_add(0x1, &rdtp->dynticks);
> +	atomic_add(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
>  }
>  
>  /*
> @@ -346,7 +364,7 @@ bool rcu_dynticks_curr_cpu_in_eqs(void)
>  {
>  	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
>  
> -	return !(atomic_read(&rdtp->dynticks) & 0x1);
> +	return !(atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR);
>  }
>  
>  /*
> @@ -357,7 +375,7 @@ int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
>  {
>  	int snap = atomic_add_return(0, &rdtp->dynticks);
>  
> -	return snap;
> +	return snap & ~RCU_DYNTICK_CTRL_MASK;
>  }
>  
>  /*
> @@ -366,7 +384,7 @@ int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
>   */
>  static bool rcu_dynticks_in_eqs(int snap)
>  {
> -	return !(snap & 0x1);
> +	return !(snap & RCU_DYNTICK_CTRL_CTR);
>  }
>  
>  /*
> @@ -386,10 +404,33 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_dynticks *rdtp, int snap)
>  static void rcu_dynticks_momentary_idle(void)
>  {
>  	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
> -	int special = atomic_add_return(2, &rdtp->dynticks);
> +	int special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
> +					&rdtp->dynticks);
>  
>  	/* It is illegal to call this from idle state. */
> -	WARN_ON_ONCE(!(special & 0x1));
> +	WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
> +}
> +
> +/*
> + * Set the special (bottom) bit of the specified CPU so that it
> + * will take special action (such as flushing its TLB) on the
> + * next exit from an extended quiescent state.  Returns true if
> + * the bit was successfully set, or false if the CPU was not in
> + * an extended quiescent state.
> + */
> +bool rcu_eqs_special_set(int cpu)
> +{
> +	int old;
> +	int new;
> +	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
> +
> +	do {
> +		old = atomic_read(&rdtp->dynticks);
> +		if (old & RCU_DYNTICK_CTRL_CTR)
> +			return false;
> +		new = old | RCU_DYNTICK_CTRL_MASK;
> +	} while (atomic_cmpxchg(&rdtp->dynticks, old, new) != old);
> +	return true;
>  }
>  
>  DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index 3b953dcf6afc..7dcdd59d894c 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -596,6 +596,7 @@ extern struct rcu_state rcu_preempt_state;
>  #endif /* #ifdef CONFIG_PREEMPT_RCU */
>  
>  int rcu_dynticks_snap(struct rcu_dynticks *rdtp);
> +bool rcu_eqs_special_set(int cpu);
>  
>  #ifdef CONFIG_RCU_BOOST
>  DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
> -- 
> 2.5.2
> 

Download attachment "signature.asc" of type "application/pgp-signature" (456 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ