lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170728111014.d333y7huoumspyh6@hirez.programming.kicks-ass.net>
Date:   Fri, 28 Jul 2017 13:10:14 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
Cc:     "Paul E . McKenney" <paulmck@...ux.vnet.ibm.com>,
        linux-kernel@...r.kernel.org, Boqun Feng <boqun.feng@...il.com>,
        Andrew Hunter <ahh@...gle.com>,
        Maged Michael <maged.michael@...il.com>, gromer@...gle.com,
        Avi Kivity <avi@...lladb.com>,
        Andy Lutomirski <luto@...nel.org>,
        Nicholas Piggin <npiggin@...il.com>
Subject: Re: [RFC PATCH v2] membarrier: expedited private command

On Fri, Jul 28, 2017 at 10:55:32AM +0200, Peter Zijlstra wrote:
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index e9785f7aed75..33f34a201255 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -2641,8 +2641,18 @@ static struct rq *finish_task_switch(struct task_struct *prev)
>  	finish_arch_post_lock_switch();
>  
>  	fire_sched_in_preempt_notifiers(current);
> +
> +	/*
> +	 * For CONFIG_MEMBARRIER we need a full memory barrier after the
> +	 * rq->curr assignment. Not all architectures have one in either
> +	 * switch_to() or switch_mm() so we use (and complement) the one
> +	 * implied by mmdrop()'s atomic_dec_and_test().
> +	 */
>  	if (mm)
>  		mmdrop(mm);
> +	else if (IS_ENABLED(CONFIG_MEMBARRIER))
> +		smp_mb();
> +
>  	if (unlikely(prev_state == TASK_DEAD)) {
>  		if (prev->sched_class->task_dead)
>  			prev->sched_class->task_dead(prev);

Hurm.. so going over this again, I'm not sure this is as good as we
thought it was..


context_switch()

  mm = next->mm
  old_mm = prev->active_mm;

  if (!mm) {
    next->active_mm = old_mm;
    mmgrab(oldmm);
    enter_lazy_tlb(oldmm, next);
  } else
    switch_mm(oldmm, mm, next);

  if (!prev->mm) {
    prev->active_mm = NULL;
    rq->prev_mm = old_mm;
  }

  /* ... */

  finish_task_switch()

    mm = rq->prev_mm; // oldmm when !prev->mm
    rq->prev_mm = NULL;

    if (mm)
      mmdrop(mm);



That mmdrop() is to balance the mmgrab() for when we switch between
kthreads. Also, it looks to me that if we do kthread->kthread switches,
we do a superfluous mmgrab() / mmdrop().

Something like the below perhaps would optimize and clarify things.

---
 kernel/sched/core.c | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e9785f7aed75..7924b4cc2bff 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2733,12 +2733,8 @@ static __always_inline struct rq *
 context_switch(struct rq *rq, struct task_struct *prev,
 	       struct task_struct *next, struct rq_flags *rf)
 {
-	struct mm_struct *mm, *oldmm;
-
 	prepare_task_switch(rq, prev, next);
 
-	mm = next->mm;
-	oldmm = prev->active_mm;
 	/*
 	 * For paravirt, this is coupled with an exit in switch_to to
 	 * combine the page table reload and the switch backend into
@@ -2746,16 +2742,29 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	 */
 	arch_start_context_switch(prev);
 
-	if (!mm) {
-		next->active_mm = oldmm;
-		mmgrab(oldmm);
-		enter_lazy_tlb(oldmm, next);
-	} else
-		switch_mm_irqs_off(oldmm, mm, next);
+	/*
+	 * kernel -> kernel   transfer active
+	 *   user -> kernel   mmgrab()
+	 *
+	 * kernel ->   user   mmdrop()
+	 *   user ->   user   switch_mm()
+	 */
+	if (!next->mm) {				// to kernel
+		next->active_mm = prev->active_mm;
+		enter_lazy_tlb(prev->active_mm, next);
+
+		if (prev->mm)				// from user
+			mmgrab(prev->active_mm);
+
+	} else {					// to user
+		switch_mm_irqs_off(prev->active_mm, next->mm, next);
 
-	if (!prev->mm) {
-		prev->active_mm = NULL;
-		rq->prev_mm = oldmm;
+		if (!prev->mm) {			// from kernel
+			rq->prev_mm = prev->active_mm;
+			prev->active_mm = NULL;
+
+			/* will mmdrop() in finish_task_switch(). */
+		}
 	}
 
 	rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ