[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20201014083321.GA2628@hirez.programming.kicks-ass.net>
Date:   Wed, 14 Oct 2020 10:33:21 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Marcelo Tosatti <mtosatti@...hat.com>
Cc:     Frederic Weisbecker <frederic@...nel.org>,
        linux-kernel@...r.kernel.org,
        Nitesh Narayan Lal <nitesh@...hat.com>,
        Peter Xu <peterx@...hat.com>
Subject: Re: [patch 1/2] nohz: only wakeup a single target cpu when kicking a
 task
On Tue, Oct 13, 2020 at 02:13:28PM -0300, Marcelo Tosatti wrote:
> > Yes but if the task isn't running, run_posix_cpu_timers() doesn't have
> > anything to elapse. So indeed we can spare the IPI if the task is not
> > running. Provided ordering makes sure that the task sees the new dependency
> > when it schedules in of course.
> 
> True.
> 
>  * p->on_cpu <- { 0, 1 }:
>  *
>  *   is set by prepare_task() and cleared by finish_task() such that it will be
>  *   set before p is scheduled-in and cleared after p is scheduled-out, both
>  *   under rq->lock. Non-zero indicates the task is running on its CPU.
> 
> 
> CPU-0 (tick_set_dep)            CPU-1 (task switch)
> 
> STORE p->tick_dep_mask
> smp_mb() (atomic_fetch_or())
> LOAD p->on_cpu
> 
> 
>                                 context_switch(prev, next)
>                                 STORE next->on_cpu = 1
>                                 ...                             [*]
> 
>                                 LOAD current->tick_dep_mask
> 
That load is in tick_nohz_task_switch() right? (which BTW is placed
completely wrong) You could easily do something like the below I
suppose.
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 81632cd5e3b7..2a5fafe66bb0 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -410,6 +410,14 @@ void __tick_nohz_task_switch(void)
 	ts = this_cpu_ptr(&tick_cpu_sched);
 
 	if (ts->tick_stopped) {
+		/*
+		 * tick_set_dep()		(this)
+		 *
+		 * STORE p->tick_dep_mask	STORE p->on_cpu
+		 * smp_mb()			smp_mb()
+		 * LOAD p->on_cpu		LOAD p->tick_dep_mask
+		 */
+		smp_mb();
 		if (atomic_read(¤t->tick_dep_mask) ||
 		    atomic_read(¤t->signal->tick_dep_mask))
 			tick_nohz_full_kick();
re tick_nohz_task_switch() being placed wrong, it should probably be
placed before finish_lock_switch(). Something like so.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index cf044580683c..5c92c959824f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4084,6 +4084,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 	vtime_task_switch(prev);
 	perf_event_task_sched_in(prev, current);
 	finish_task(prev);
+	tick_nohz_task_switch();
 	finish_lock_switch(rq);
 	finish_arch_post_lock_switch();
 	kcov_finish_switch(current);
@@ -4121,7 +4122,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 		put_task_struct_rcu_user(prev);
 	}
 
-	tick_nohz_task_switch();
 	return rq;
 }
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 81632cd5e3b7..c8bddc9fb792 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -402,10 +402,8 @@ void __tick_nohz_task_switch(void)
 	unsigned long flags;
 	struct tick_sched *ts;
 
-	local_irq_save(flags);
-
 	if (!tick_nohz_full_cpu(smp_processor_id()))
-		goto out;
+		return;
 
 	ts = this_cpu_ptr(&tick_cpu_sched);
 
@@ -414,8 +412,6 @@ void __tick_nohz_task_switch(void)
 		    atomic_read(¤t->signal->tick_dep_mask))
 			tick_nohz_full_kick();
 	}
-out:
-	local_irq_restore(flags);
 }
 
 /* Get the boot-time nohz CPU list from the kernel parameters. */
Powered by blists - more mailing lists
 
