linux-kernel - Re: [PATCH tip/core/rcu] classic RCU locking and memory-barrier cleanups

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20080805174058.GE6737@linux.vnet.ibm.com>
Date:	Tue, 5 Aug 2008 10:40:58 -0700
From:	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
To:	Steven Rostedt <rostedt@...dmis.org>
Cc:	linux-kernel@...r.kernel.org, mingo@...e.hu,
	akpm@...ux-foundation.org, oleg@...sign.ru,
	manfred@...orfullife.com, dipankar@...ibm.com, dvhltc@...ibm.com,
	niv@...ibm.com
Subject: Re: [PATCH tip/core/rcu] classic RCU locking and memory-barrier
	cleanups

On Tue, Aug 05, 2008 at 12:48:41PM -0400, Steven Rostedt wrote:

Thank you for looking this over, Steve!

> On Tue, 5 Aug 2008, Paul E. McKenney wrote:
> > ---
> > 
> >  rcuclassic.c |   51 +++++++++++++++++++++++++++++++++++++++++----------
> >  1 file changed, 41 insertions(+), 10 deletions(-)
> > 
> > diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
> > index d3553ee..e6f42ef 100644
> > --- a/kernel/rcuclassic.c
> > +++ b/kernel/rcuclassic.c
> > @@ -86,6 +86,7 @@ static void force_quiescent_state(struct rcu_data *rdp,
> >  	int cpu;
> >  	cpumask_t cpumask;
> >  	set_need_resched();
> > +	spin_lock(&rcp->lock);
> >  	if (unlikely(!rcp->signaled)) {
> >  		rcp->signaled = 1;
> >  		/*
> > @@ -111,6 +112,7 @@ static void force_quiescent_state(struct rcu_data *rdp,
> >  		for_each_cpu_mask(cpu, cpumask)
> >  			smp_send_reschedule(cpu);
> >  	}
> > +	spin_unlock(&rcp->lock);
> >  }
> >  #else
> >  static inline void force_quiescent_state(struct rcu_data *rdp,
> > @@ -124,7 +126,9 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
> >  		struct rcu_data *rdp)
> >  {
> >  	long batch;
> > -	smp_mb(); /* reads the most recently updated value of rcu->cur. */
> > +
> > +	head->next = NULL;
> > +	smp_mb(); /* Read of rcu->cur must happen after any change by caller. */
> >  
> >  	/*
> >  	 * Determine the batch number of this callback.
> > @@ -174,7 +178,6 @@ void call_rcu(struct rcu_head *head,
> >  	unsigned long flags;
> >  
> >  	head->func = func;
> > -	head->next = NULL;
> >  	local_irq_save(flags);
> >  	__call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data));
> >  	local_irq_restore(flags);
> > @@ -203,7 +206,6 @@ void call_rcu_bh(struct rcu_head *head,
> >  	unsigned long flags;
> >  
> >  	head->func = func;
> > -	head->next = NULL;
> >  	local_irq_save(flags);
> >  	__call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
> >  	local_irq_restore(flags);
> > @@ -389,17 +391,17 @@ static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
> >  static void __rcu_offline_cpu(struct rcu_data *this_rdp,
> >  				struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> >  {
> > -	/* if the cpu going offline owns the grace period
> > +	/*
> > +	 * if the cpu going offline owns the grace period
> >  	 * we can block indefinitely waiting for it, so flush
> >  	 * it here
> >  	 */
> >  	spin_lock_bh(&rcp->lock);
> >  	if (rcp->cur != rcp->completed)
> >  		cpu_quiet(rdp->cpu, rcp);
> > -	spin_unlock_bh(&rcp->lock);
> > -	/* spin_lock implies smp_mb() */
> 
> The spin_unlock is being removed here. Was that the smp_mb that was 
> implied or is it the above spin_lock_bh?

Moving the spinlock down below, combined with adding spinlocks elsewhere,
eliminates the need for the memory barrier -- the locking primitives
provide full ordering and atomicity as well.  This does likely reduce
scalability somewhat, which I will address with a hierarchical approach
in a later patch.

							Thanx, Paul

> >  	rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1);
> >  	rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1);
> > +	spin_unlock_bh(&rcp->lock);
> >  
> >  	local_irq_disable();
> >  	this_rdp->qlen += rdp->qlen;
> > @@ -433,16 +435,19 @@ static void rcu_offline_cpu(int cpu)
> >  static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
> >  					struct rcu_data *rdp)
> >  {
> > +	long completed_snap;
> > +
> >  	if (rdp->nxtlist) {
> >  		local_irq_disable();
> > +		completed_snap = ACCESS_ONCE(rcp->completed);
> >  
> >  		/*
> >  		 * move the other grace-period-completed entries to
> >  		 * [rdp->nxtlist, *rdp->nxttail[0]) temporarily
> >  		 */
> > -		if (!rcu_batch_before(rcp->completed, rdp->batch))
> > +		if (!rcu_batch_before(completed_snap, rdp->batch))
> >  			rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2];
> > -		else if (!rcu_batch_before(rcp->completed, rdp->batch - 1))
> > +		else if (!rcu_batch_before(completed_snap, rdp->batch - 1))
> >  			rdp->nxttail[0] = rdp->nxttail[1];
> >  
> >  		/*
> > @@ -483,20 +488,38 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
> >  
> >  static void rcu_process_callbacks(struct softirq_action *unused)
> >  {
> > +	/*
> > +	 * Memory references from any prior RCU read-side critical sections
> > +	 * executed by the interrupted code must be see before any RCU
> > +	 * grace-period manupulations below.
> > +	 */
> > +
> > +	smp_mb(); /* See above block comment. */
> > +
> >  	__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
> >  	__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
> > +
> > +	/*
> > +	 * Memory references from any later RCU read-side critical sections
> > +	 * executed by the interrupted code must be see after any RCU
> > +	 * grace-period manupulations above.
> > +	 */
> > +
> > +	smp_mb(); /* See above block comment. */
> >  }
> >  
> >  static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
> >  {
> >  	if (rdp->nxtlist) {
> > +		long completed_snap = ACCESS_ONCE(rcp->completed);
> > +
> >  		/*
> >  		 * This cpu has pending rcu entries and the grace period
> >  		 * for them has completed.
> >  		 */
> > -		if (!rcu_batch_before(rcp->completed, rdp->batch))
> > +		if (!rcu_batch_before(completed_snap, rdp->batch))
> >  			return 1;
> > -		if (!rcu_batch_before(rcp->completed, rdp->batch - 1) &&
> > +		if (!rcu_batch_before(completed_snap, rdp->batch - 1) &&
> >  				rdp->nxttail[0] != rdp->nxttail[1])
> >  			return 1;
> >  		if (rdp->nxttail[0] != &rdp->nxtlist)
> > @@ -547,6 +570,12 @@ int rcu_needs_cpu(int cpu)
> >  	return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu);
> >  }
> >  
> > +/*
> > + * Top-level function driving RCU grace-period detection, normally
> > + * invoked from the scheduler-clock interrupt.  This function simply
> > + * increments counters that are read only from softirq by this same
> > + * CPU, so there are no memory barriers required.
> > + */
> >  void rcu_check_callbacks(int cpu, int user)
> >  {
> >  	if (user ||
> > @@ -590,6 +619,7 @@ void rcu_check_callbacks(int cpu, int user)
> >  static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
> >  						struct rcu_data *rdp)
> >  {
> > +	spin_lock(&rcp->lock);
> >  	memset(rdp, 0, sizeof(*rdp));
> >  	rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist;
> >  	rdp->donetail = &rdp->donelist;
> > @@ -597,6 +627,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
> >  	rdp->qs_pending = 0;
> >  	rdp->cpu = cpu;
> >  	rdp->blimit = blimit;
> > +	spin_unlock(&rcp->lock);
> >  }
> >  
> >  static void __cpuinit rcu_online_cpu(int cpu)
> > 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/