lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1542163569-20047-9-git-send-email-frederic@kernel.org>
Date:   Wed, 14 Nov 2018 03:45:52 +0100
From:   Frederic Weisbecker <frederic@...nel.org>
To:     LKML <linux-kernel@...r.kernel.org>
Cc:     Frederic Weisbecker <frederic@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Wanpeng Li <wanpengli@...cent.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Yauheni Kaliuta <yauheni.kaliuta@...hat.com>,
        Ingo Molnar <mingo@...nel.org>, Rik van Riel <riel@...hat.com>
Subject: [PATCH 08/25] vtime: Exit vtime before exit_notify()

In order to correctly implement kcpustat under nohz_full, we need to
track the task running on a given CPU and read its vtime state safely,
reliably and locklessly.

This leaves us with tracking and fetching that task under RCU. This will
be done in a further patch. Until then we need to prepare vtime for
handling that properly and close the accounting before we meet the earliest
opportunity for the RCU delayed put_task_struct() to be queued. That
point happens to be in exit_notify() in case of auto-reaping.

Therefore we need to finish the accounting right before exit_notify().
After that we shouldn't track the exiting task any further.

Signed-off-by: Frederic Weisbecker <frederic@...nel.org>
Cc: Yauheni Kaliuta <yauheni.kaliuta@...hat.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Rik van Riel <riel@...hat.com>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Wanpeng Li <wanpengli@...cent.com>
Cc: Ingo Molnar <mingo@...nel.org>
---
 include/linux/sched.h  |  2 ++
 include/linux/vtime.h  |  2 ++
 kernel/exit.c          |  1 +
 kernel/sched/cputime.c | 56 ++++++++++++++++++++++++++++++++++++++++++--------
 4 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d458d65..27e0544 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -265,6 +265,8 @@ struct task_cputime {
 enum vtime_state {
 	/* Task is sleeping or running in a CPU with VTIME inactive: */
 	VTIME_INACTIVE = 0,
+	/* Task has passed exit_notify() */
+	VTIME_DEAD,
 	/* Task is idle */
 	VTIME_IDLE,
 	/* Task runs in kernelspace in a CPU with VTIME active: */
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index d9160ab..8350a0b 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -73,12 +73,14 @@ extern void vtime_user_exit(struct task_struct *tsk);
 extern void vtime_guest_enter(struct task_struct *tsk);
 extern void vtime_guest_exit(struct task_struct *tsk);
 extern void vtime_init_idle(struct task_struct *tsk, int cpu);
+extern void vtime_exit_task(struct task_struct *tsk);
 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN  */
 static inline void vtime_user_enter(struct task_struct *tsk) { }
 static inline void vtime_user_exit(struct task_struct *tsk) { }
 static inline void vtime_guest_enter(struct task_struct *tsk) { }
 static inline void vtime_guest_exit(struct task_struct *tsk) { }
 static inline void vtime_init_idle(struct task_struct *tsk, int cpu) { }
+static inline void vtime_exit_task(struct task_struct *tsk) { }
 #endif
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
diff --git a/kernel/exit.c b/kernel/exit.c
index 0e21e6d..cae3fe9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -883,6 +883,7 @@ void __noreturn do_exit(long code)
 	 */
 	flush_ptrace_hw_breakpoint(tsk);
 
+	vtime_exit_task(tsk);
 	exit_tasks_rcu_start();
 	exit_notify(tsk, group_dead);
 	proc_exit_connector(tsk);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index f64afd7..a0c3a82 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -702,7 +702,7 @@ static u64 get_vtime_delta(struct vtime *vtime)
 	 * errors from causing elapsed vtime to go negative.
 	 */
 	other = account_other_time(delta);
-	WARN_ON_ONCE(vtime->state == VTIME_INACTIVE);
+	WARN_ON_ONCE(vtime->state < VTIME_IDLE);
 	vtime->starttime += delta;
 
 	return delta - other;
@@ -813,17 +813,31 @@ void vtime_task_switch_generic(struct task_struct *prev)
 {
 	struct vtime *vtime = &prev->vtime;
 
-	write_seqcount_begin(&vtime->seqcount);
-	if (vtime->state == VTIME_IDLE)
-		vtime_account_idle(prev);
-	else
-		__vtime_account_kernel(prev, vtime);
-	vtime->state = VTIME_INACTIVE;
-	vtime->cpu = -1;
-	write_seqcount_end(&vtime->seqcount);
+	/*
+	 * Flush the prev task vtime, unless it has passed
+	 * vtime_exit_task(), in which case there is nothing
+	 * left to account.
+	 */
+	if (vtime->state != VTIME_DEAD) {
+		write_seqcount_begin(&vtime->seqcount);
+		if (vtime->state == VTIME_IDLE)
+			vtime_account_idle(prev);
+		else
+			__vtime_account_kernel(prev, vtime);
+		vtime->state = VTIME_INACTIVE;
+		vtime->cpu = -1;
+		write_seqcount_end(&vtime->seqcount);
+	}
 
 	vtime = &current->vtime;
 
+	/*
+	 * Ignore the next task if it has been preempted after
+	 * vtime_exit_task().
+	 */
+	if (vtime->state == VTIME_DEAD)
+		return;
+
 	write_seqcount_begin(&vtime->seqcount);
 	if (is_idle_task(current))
 		vtime->state = VTIME_IDLE;
@@ -850,6 +864,30 @@ void vtime_init_idle(struct task_struct *t, int cpu)
 	local_irq_restore(flags);
 }
 
+/*
+ * This is the final settlement point after which we don't account
+ * anymore vtime for this task.
+ */
+void vtime_exit_task(struct task_struct *t)
+{
+	struct vtime *vtime = &t->vtime;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	write_seqcount_begin(&vtime->seqcount);
+	/*
+	 * A task that has never run on a nohz_full CPU hasn't
+	 * been tracked by vtime. Thus it's in VTIME_INACTIVE
+	 * state. Nothing to account for it.
+	 */
+	if (vtime->state != VTIME_INACTIVE)
+		vtime_account_system(t, vtime);
+	vtime->state = VTIME_DEAD;
+	vtime->cpu = -1;
+	write_seqcount_end(&vtime->seqcount);
+	local_irq_restore(flags);
+}
+
 u64 task_gtime(struct task_struct *t)
 {
 	struct vtime *vtime = &t->vtime;
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ