lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200716202044.734067877@linutronix.de>
Date:   Thu, 16 Jul 2020 22:19:26 +0200
From:   Thomas Gleixner <tglx@...utronix.de>
To:     LKML <linux-kernel@...r.kernel.org>
Cc:     x86@...nel.org, Oleg Nesterov <oleg@...hat.com>,
        "Eric W. Biederman" <ebiederm@...ssion.com>,
        Frederic Weisbecker <frederic@...nel.org>,
        John Stultz <john.stultz@...aro.org>,
        Paolo Bonzini <pbonzini@...hat.com>
Subject: [patch V2 3/5] posix-cpu-timers: Provide mechanisms to defer timer
 handling to task_work

Running posix cpu timers in hard interrupt context has a few downsides:

 - For PREEMPT_RT it cannot work as the expiry code needs to take
   sighand lock, which is a 'sleeping spinlock' in RT. The original RT
   approach of offloading the posix CPU timer handling into a high
   priority thread was clumsy and provided no real benefit in general.

 - For fine grained accounting it's just wrong to run this in context of
   the timer interrupt because that way a process specific cpu time is
   accounted to the timer interrupt.

 - Long running timer interrupts caused by a large amount of expiring
   timers which can be created and armed by unpriviledged user space.

There is no hard requirement to expire them in interrupt context.

Provide infrastructure to schedule task work which allows splitting the
posix CPU timer code into a quick check in interrupt context and a thread
context expiry and signal delivery function. This has to be enabled by
architectures as it requires that the architecture specific KVM
implementation handles pending task work before exiting to guest mode.

Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
---
 include/linux/posix-timers.h   |   17 ++++++++++++++++
 kernel/time/Kconfig            |    5 ++++
 kernel/time/posix-cpu-timers.c |   42 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 63 insertions(+), 1 deletion(-)

--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -112,25 +112,42 @@ struct posix_cputimer_base {
 enum {
 	CPUTIMERS_ACTIVE,
 	CPUTIMERS_EXPIRING,
+	CPUTIMERS_WORK_SCHEDULED,
 };
 
 /**
  * posix_cputimers - Container for posix CPU timer related data
  * @bases:	Base container for posix CPU clocks
  * @flags:	Flags for various CPUTIMERS_* states
+ * @task_work:	Task work to defer timer expiry into task context
  * Used in task_struct and signal_struct
  */
 struct posix_cputimers {
 	struct posix_cputimer_base	bases[CPUCLOCK_MAX];
 	unsigned long			flags;
+#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+	struct callback_head		task_work;
+#endif
 };
 
+#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+void posix_cpu_timers_work(struct callback_head *work);
+
+static inline void posix_cputimer_init_work(struct posix_cputimers *pct)
+{
+	pct->task_work.func = posix_cpu_timers_work;
+}
+#else
+static inline void posix_cputimer_init_work(struct posix_cputimers *pct) { }
+#endif
+
 static inline void posix_cputimers_init(struct posix_cputimers *pct)
 {
 	memset(pct, 0, sizeof(*pct));
 	pct->bases[0].nextevt = U64_MAX;
 	pct->bases[1].nextevt = U64_MAX;
 	pct->bases[2].nextevt = U64_MAX;
+	posix_cputimer_init_work(pct);
 }
 
 void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit);
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -52,6 +52,11 @@ config GENERIC_CLOCKEVENTS_MIN_ADJUST
 config GENERIC_CMOS_UPDATE
 	bool
 
+# Select to handle posix CPU timers from task_work
+# and not from the timer interrupt context
+config POSIX_CPU_TIMERS_TASK_WORK
+	bool
+
 if GENERIC_CLOCKEVENTS
 menu "Timers subsystem"
 
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -14,6 +14,7 @@
 #include <linux/tick.h>
 #include <linux/workqueue.h>
 #include <linux/compat.h>
+#include <linux/task_work.h>
 #include <linux/sched/deadline.h>
 
 #include "posix-timers.h"
@@ -1075,7 +1076,9 @@ static inline bool fastpath_timer_check(
 	return false;
 }
 
-static void __run_posix_cpu_timers(struct task_struct *tsk)
+static inline void posix_cpu_timers_enable_work(struct task_struct *tsk);
+
+static void handle_posix_cpu_timers(struct task_struct *tsk)
 {
 	struct k_itimer *timer, *next;
 	unsigned long flags;
@@ -1096,6 +1099,12 @@ static void __run_posix_cpu_timers(struc
 	check_process_timers(tsk, &firing);
 
 	/*
+	 * Allow new work to be scheduled. The expiry cache
+	 * is up to date.
+	 */
+	posix_cpu_timers_enable_work(tsk);
+
+	/*
 	 * We must release these locks before taking any timer's lock.
 	 * There is a potential race with timer deletion here, as the
 	 * siglock now protects our private firing list.  We have set
@@ -1130,6 +1139,37 @@ static void __run_posix_cpu_timers(struc
 	lockdep_posixtimer_exit();
 }
 
+#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+
+void posix_cpu_timers_work(struct callback_head *work)
+{
+	handle_posix_cpu_timers(current);
+}
+
+static void __run_posix_cpu_timers(struct task_struct *tsk)
+{
+	struct posix_cputimers *pct = &tsk->posix_cputimers;
+
+	if (!test_and_set_bit(CPUTIMERS_WORK_SCHEDULED, &pct->flags))
+		task_work_add(tsk, &pct->task_work, true);
+}
+
+static inline void posix_cpu_timers_enable_work(struct task_struct *tsk)
+{
+	clear_bit(CPUTIMERS_WORK_SCHEDULED, &tsk->posix_cputimers.flags);
+}
+
+#else
+
+static void __run_posix_cpu_timers(struct task_struct *tsk)
+{
+	handle_posix_cpu_timers(tsk);
+}
+
+static inline void posix_cpu_timers_enable_work(struct task_struct *tsk) { }
+
+#endif
+
 /*
  * This is called from the timer interrupt handler.  The irq handler has
  * already updated our counts.  We need to check if any timers fire now.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ