lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20190625075730.7twx4i6aamoqh6nz@linutronix.de>
Date:   Tue, 25 Jun 2019 09:57:30 +0200
From:   Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To:     Thomas Gleixner <tglx@...utronix.de>
Cc:     LKML <linux-kernel@...r.kernel.org>,
        linux-rt-users <linux-rt-users@...r.kernel.org>,
        Steven Rostedt <rostedt@...dmis.org>
Subject: [ANNOUNCE] v5.0.21-rt13

Dear RT folks!

I'm pleased to announce the v5.0.21-rt13 patch set. 

Changes since v5.0.21-rt12:

  - A patch by Kirill Smelkov to avoid deadlock in the switchtec driver.

  - Rework of the hrtimer, timer and posix-timer cancelation interface
    on -RT. Instead of the swait/schedule interface we now have locks
    which are taken while timer is active. During the cancellation of an
    active timer the lock is acquired. The lock will then either
    PI-boost the timer or block and wait until the timer completed.
    The new code looks simpler and does not trigger a warning from
    rcu_note_context_switch() anymore like reported by Grygorii Strashko
    and Daniel Wagner.
    The patches were contributed by Anna-Maria Gleixner.

  - Drop a preempt_disable_rt() statement in get_nohz_timer_target().
    The caller holds a lock which already disables preemption.

  - tasklet_kill() could deadlock since the softirq rework if the task
    invoking tasklet_kill() preempted the active tasklet.

  - in_softirq() (and related functions) did not work as expected since
    the softirq rework.

  - RCU_FAST_NO_HZ was disabled on RT because a timer was used in a bad
    context. After double checking this is no longer the case and the
    option can be enabled (but it depends on RCU_EXPERT so be careful).

  - The option "rcu.rcu_normal_after_boot=1" is set by default on RT.
    Now it is not possible to disable it on command line. Suggested by
    Paul E. McKenney.

  - Backport a patch from upstream to introduce
    user_access_{save,restore}() which is needed due to a backport made
    by stable.

Known issues
     - rcutorture is currently broken on -RT. Reported by Juri Lelli.

The delta patch against v5.0.21-rt12 is appended below and can be found here:
 
     https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.0/incr/patch-5.0.21-rt12-rt13.patch.xz

You can get this release via the git tree at:

    git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.0.21-rt13

The RT patch against v5.0.21 can be found here:

    https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.0/older/patch-5.0.21-rt13.patch.xz

The split quilt queue is available at:

    https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.0/older/patches-5.0.21-rt13.tar.xz

Sebastian

diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index db333300bd4be..6cfe431710203 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -58,6 +58,23 @@ static __always_inline void stac(void)
 	alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
 }
 
+static __always_inline unsigned long smap_save(void)
+{
+	unsigned long flags;
+
+	asm volatile (ALTERNATIVE("", "pushf; pop %0; " __stringify(__ASM_CLAC),
+				  X86_FEATURE_SMAP)
+		      : "=rm" (flags) : : "memory", "cc");
+
+	return flags;
+}
+
+static __always_inline void smap_restore(unsigned long flags)
+{
+	asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
+		      : : "g" (flags) : "memory", "cc");
+}
+
 /* These macros can be used in asm() statements */
 #define ASM_CLAC \
 	ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
@@ -69,6 +86,9 @@ static __always_inline void stac(void)
 static inline void clac(void) { }
 static inline void stac(void) { }
 
+static inline unsigned long smap_save(void) { return 0; }
+static inline void smap_restore(unsigned long flags) { }
+
 #define ASM_CLAC
 #define ASM_STAC
 
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 2b0dd1b9c2087..743e1a96cd6ea 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -720,6 +720,9 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
 #define user_access_begin(a,b)	user_access_begin(a,b)
 #define user_access_end()	__uaccess_end()
 
+#define user_access_save()	smap_save()
+#define user_access_restore(x)	smap_restore(x)
+
 #define unsafe_put_user(x, ptr, label)	\
 	__put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label)
 
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index 80823ad221ba5..ba17eaa410f96 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -392,7 +392,7 @@ static int switchtec_dev_open(struct inode *inode, struct file *filp)
 		return PTR_ERR(stuser);
 
 	filp->private_data = stuser;
-	nonseekable_open(inode, filp);
+	stream_open(inode, filp);
 
 	dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser);
 
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 190cb85044112..86ce98700f323 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -471,10 +471,11 @@ static int do_timerfd_settime(int ufd, int flags,
 				break;
 		}
 		spin_unlock_irq(&ctx->wqh.lock);
+
 		if (isalarm(ctx))
-			hrtimer_wait_for_timer(&ctx->t.alarm.timer);
+			hrtimer_grab_expiry_lock(&ctx->t.alarm.timer);
 		else
-			hrtimer_wait_for_timer(&ctx->t.tmr);
+			hrtimer_grab_expiry_lock(&ctx->t.tmr);
 	}
 
 	/*
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 34e9c6b74ae0a..c737e15ea6536 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -19,7 +19,6 @@
 #include <linux/percpu.h>
 #include <linux/timer.h>
 #include <linux/timerqueue.h>
-#include <linux/wait.h>
 
 struct hrtimer_clock_base;
 struct hrtimer_cpu_base;
@@ -190,6 +189,8 @@ enum  hrtimer_base_type {
  * @nr_retries:		Total number of hrtimer interrupt retries
  * @nr_hangs:		Total number of hrtimer interrupt hangs
  * @max_hang_time:	Maximum time spent in hrtimer_interrupt
+ * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
+ *			 expired
  * @expires_next:	absolute time of the next event, is required for remote
  *			hrtimer enqueue; it is the total first expiry time (hard
  *			and soft hrtimer are taken into account)
@@ -217,12 +218,10 @@ struct hrtimer_cpu_base {
 	unsigned short			nr_hangs;
 	unsigned int			max_hang_time;
 #endif
+	spinlock_t			softirq_expiry_lock;
 	ktime_t				expires_next;
 	struct hrtimer			*next_timer;
 	ktime_t				softirq_expires_next;
-#ifdef CONFIG_PREEMPT_RT_BASE
-	wait_queue_head_t		wait;
-#endif
 	struct hrtimer			*softirq_next_timer;
 	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
 } ____cacheline_aligned;
@@ -423,6 +422,7 @@ static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim,
 
 extern int hrtimer_cancel(struct hrtimer *timer);
 extern int hrtimer_try_to_cancel(struct hrtimer *timer);
+extern void hrtimer_grab_expiry_lock(const struct hrtimer *timer);
 
 static inline void hrtimer_start_expires(struct hrtimer *timer,
 					 enum hrtimer_mode mode)
@@ -440,13 +440,6 @@ static inline void hrtimer_restart(struct hrtimer *timer)
 	hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
 
-/* Softirq preemption could deadlock timer removal */
-#ifdef CONFIG_PREEMPT_RT_BASE
-  extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
-#else
-# define hrtimer_wait_for_timer(timer)	do { cpu_relax(); } while (0)
-#endif
-
 /* Query timers: */
 extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
 
@@ -472,7 +465,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer)
  * Helper function to check, whether the timer is running the callback
  * function
  */
-static inline int hrtimer_callback_running(const struct hrtimer *timer)
+static inline int hrtimer_callback_running(struct hrtimer *timer)
 {
 	return timer->base->running == timer;
 }
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9329de0d8bfdd..64762225e1756 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -596,7 +596,10 @@ static inline void tasklet_unlock(struct tasklet_struct *t)
 
 static inline void tasklet_unlock_wait(struct tasklet_struct *t)
 {
-	while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
+	while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
+		local_bh_disable();
+		local_bh_enable();
+	}
 }
 #else
 #define tasklet_trylock(t) 1
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 05cd7466d10a8..d3552a5bcc8b2 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -15,6 +15,7 @@ struct cpu_timer_list {
 	u64 expires, incr;
 	struct task_struct *task;
 	int firing;
+	int firing_cpu;
 };
 
 /*
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 6a4884268f4c9..d559e3a0379c2 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -80,14 +80,6 @@
 #define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
 #define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
 				 | NMI_MASK))
-#ifdef CONFIG_PREEMPT_RT_FULL
-
-long softirq_count(void);
-
-#else
-#define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
-#endif
-
 /*
  * Are we doing bottom half or hardware interrupt processing?
  *
@@ -102,12 +94,23 @@ long softirq_count(void);
  *       should not be used in new code.
  */
 #define in_irq()		(hardirq_count())
-#define in_softirq()		(softirq_count())
 #define in_interrupt()		(irq_count())
-#define in_serving_softirq()	(softirq_count() & SOFTIRQ_OFFSET)
 #define in_nmi()		(preempt_count() & NMI_MASK)
 #define in_task()		(!(preempt_count() & \
 				   (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
+#ifdef CONFIG_PREEMPT_RT_FULL
+
+#define softirq_count()		((long)get_current()->softirq_count)
+#define in_softirq()		(softirq_count())
+#define in_serving_softirq()	(get_current()->softirq_count & SOFTIRQ_OFFSET)
+
+#else
+
+#define softirq_count()		(preempt_count() & SOFTIRQ_MASK)
+#define in_softirq()		(softirq_count())
+#define in_serving_softirq()	(softirq_count() & SOFTIRQ_OFFSET)
+
+#endif
 
 /*
  * The preempt_count offset after preempt_disable();
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e1ea2ea52feb0..8c5bc47f934c3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -961,6 +961,9 @@ struct task_struct {
 	int				softirqs_enabled;
 	int				softirq_context;
 #endif
+#ifdef CONFIG_PREEMPT_RT_FULL
+	int				softirq_count;
+#endif
 
 #ifdef CONFIG_LOCKDEP
 # define MAX_LOCK_DEPTH			48UL
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index d3afc0f018147..7f7356e151ce3 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -270,6 +270,8 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
 #define user_access_end() do { } while (0)
 #define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0)
 #define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0)
+static inline unsigned long user_access_save(void) { return 0UL; }
+static inline void user_access_restore(unsigned long flags) { }
 #endif
 
 #ifdef CONFIG_HARDENED_USERCOPY
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 7e36ea9b7b720..5f5a54714a7a3 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -172,7 +172,7 @@ config RCU_FANOUT_LEAF
 
 config RCU_FAST_NO_HZ
 	bool "Accelerate last non-dyntick-idle CPU's grace periods"
-	depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL
+	depends on NO_HZ_COMMON && SMP && RCU_EXPERT
 	default n
 	help
 	  This option permits CPUs to enter dynticks-idle state even if
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 3700b730ea55d..aae5968ec9ebb 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -69,7 +69,9 @@ module_param(rcu_expedited, int, 0);
 extern int rcu_normal; /* from sysctl */
 module_param(rcu_normal, int, 0);
 static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT_FULL);
+#ifndef CONFIG_PREEMPT_RT_FULL
 module_param(rcu_normal_after_boot, int, 0);
+#endif
 #endif /* #ifndef CONFIG_TINY_RCU */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a8493ff60b673..2bd114e788a10 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -570,14 +570,11 @@ void resched_cpu(int cpu)
  */
 int get_nohz_timer_target(void)
 {
-	int i, cpu;
+	int i, cpu = smp_processor_id();
 	struct sched_domain *sd;
 
-	preempt_disable_rt();
-	cpu = smp_processor_id();
-
 	if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
-		goto preempt_en_rt;
+		return cpu;
 
 	rcu_read_lock();
 	for_each_domain(cpu, sd) {
@@ -596,8 +593,6 @@ int get_nohz_timer_target(void)
 		cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
 unlock:
 	rcu_read_unlock();
-preempt_en_rt:
-	preempt_enable_rt();
 	return cpu;
 }
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 473369122ddd0..c4fae96f23c54 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -109,12 +109,6 @@ static bool ksoftirqd_running(unsigned long pending)
 static DEFINE_LOCAL_IRQ_LOCK(bh_lock);
 static DEFINE_PER_CPU(long, softirq_counter);
 
-long softirq_count(void)
-{
-	return raw_cpu_read(softirq_counter);
-}
-EXPORT_SYMBOL(softirq_count);
-
 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
 {
 	unsigned long __maybe_unused flags;
@@ -125,6 +119,7 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
 		local_lock(bh_lock);
 	soft_cnt = this_cpu_inc_return(softirq_counter);
 	WARN_ON_ONCE(soft_cnt == 0);
+	current->softirq_count += SOFTIRQ_DISABLE_OFFSET;
 
 #ifdef CONFIG_TRACE_IRQFLAGS
 	local_irq_save(flags);
@@ -155,6 +150,7 @@ void _local_bh_enable(void)
 	local_irq_restore(flags);
 #endif
 
+	current->softirq_count -= SOFTIRQ_DISABLE_OFFSET;
 	if (!in_atomic())
 		local_unlock(bh_lock);
 }
@@ -192,6 +188,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
 	if (!in_atomic())
 		local_unlock(bh_lock);
 
+	current->softirq_count -= SOFTIRQ_DISABLE_OFFSET;
 	preempt_check_resched();
 }
 EXPORT_SYMBOL(__local_bh_enable_ip);
@@ -365,7 +362,9 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
 	pending = local_softirq_pending();
 	account_irq_enter_time(current);
 
-#ifndef CONFIG_PREEMPT_RT_FULL
+#ifdef CONFIG_PREEMPT_RT_FULL
+	current->softirq_count |= SOFTIRQ_OFFSET;
+#else
 	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
 #endif
 	in_hardirq = lockdep_softirq_start();
@@ -418,7 +417,9 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
 
 	lockdep_softirq_end(in_hardirq);
 	account_irq_exit_time(current);
-#ifndef CONFIG_PREEMPT_RT_FULL
+#ifdef CONFIG_PREEMPT_RT_FULL
+	current->softirq_count &= ~SOFTIRQ_OFFSET;
+#else
 	__local_bh_enable(SOFTIRQ_OFFSET);
 #endif
 	WARN_ON_ONCE(in_interrupt());
@@ -468,7 +469,7 @@ void irq_enter(void)
 
 static inline void invoke_softirq(void)
 {
-	if (softirq_count() == 0)
+	if (this_cpu_read(softirq_counter) == 0)
 		wakeup_softirqd();
 }
 
@@ -552,7 +553,7 @@ void raise_softirq_irqoff(unsigned int nr)
 	 * If were are not in BH-disabled section then we have to wake
 	 * ksoftirqd.
 	 */
-	if (softirq_count() == 0)
+	if (this_cpu_read(softirq_counter) == 0)
 		wakeup_softirqd();
 }
 
@@ -704,7 +705,8 @@ void tasklet_kill(struct tasklet_struct *t)
 
 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
 		do {
-			yield();
+			local_bh_disable();
+			local_bh_enable();
 		} while (test_bit(TASKLET_STATE_SCHED, &t->state));
 	}
 	tasklet_unlock_wait(t);
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index f6cd4bed61846..9f17e011087eb 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -433,7 +433,7 @@ int alarm_cancel(struct alarm *alarm)
 		int ret = alarm_try_to_cancel(alarm);
 		if (ret >= 0)
 			return ret;
-		hrtimer_wait_for_timer(&alarm->timer);
+		hrtimer_grab_expiry_lock(&alarm->timer);
 	}
 }
 EXPORT_SYMBOL_GPL(alarm_cancel);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 6e5d62bdebf22..2067f461b12a3 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -930,33 +930,16 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
 }
 EXPORT_SYMBOL_GPL(hrtimer_forward);
 
-#ifdef CONFIG_PREEMPT_RT_BASE
-# define wake_up_timer_waiters(b)	wake_up(&(b)->wait)
-
-/**
- * hrtimer_wait_for_timer - Wait for a running timer
- *
- * @timer:	timer to wait for
- *
- * The function waits in case the timers callback function is
- * currently executed on the waitqueue of the timer base. The
- * waitqueue is woken up after the timer callback function has
- * finished execution.
- */
-void hrtimer_wait_for_timer(const struct hrtimer *timer)
+void hrtimer_grab_expiry_lock(const struct hrtimer *timer)
 {
 	struct hrtimer_clock_base *base = timer->base;
 
-	if (base && base->cpu_base &&
-	    base->index >= HRTIMER_BASE_MONOTONIC_SOFT)
-		wait_event(base->cpu_base->wait,
-				!(hrtimer_callback_running(timer)));
+	if (base && base->cpu_base) {
+		spin_lock(&base->cpu_base->softirq_expiry_lock);
+		spin_unlock(&base->cpu_base->softirq_expiry_lock);
+	}
 }
 
-#else
-# define wake_up_timer_waiters(b)	do { } while (0)
-#endif
-
 /*
  * enqueue_hrtimer - internal function to (re)start a timer
  *
@@ -1191,7 +1174,7 @@ int hrtimer_cancel(struct hrtimer *timer)
 
 		if (ret >= 0)
 			return ret;
-		hrtimer_wait_for_timer(timer);
+		hrtimer_grab_expiry_lock(timer);
 	}
 }
 EXPORT_SYMBOL_GPL(hrtimer_cancel);
@@ -1495,6 +1478,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
 	unsigned long flags;
 	ktime_t now;
 
+	spin_lock(&cpu_base->softirq_expiry_lock);
 	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
 	now = hrtimer_update_base(cpu_base);
@@ -1504,7 +1488,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
 	hrtimer_update_softirq_timer(cpu_base, true);
 
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
-	wake_up_timer_waiters(cpu_base);
+	spin_unlock(&cpu_base->softirq_expiry_lock);
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1914,9 +1898,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
 	cpu_base->softirq_next_timer = NULL;
 	cpu_base->expires_next = KTIME_MAX;
 	cpu_base->softirq_expires_next = KTIME_MAX;
-#ifdef CONFIG_PREEMPT_RT_BASE
-	init_waitqueue_head(&cpu_base->wait);
-#endif
+	spin_lock_init(&cpu_base->softirq_expiry_lock);
 	return 0;
 }
 
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index cb2b301d05490..d999294adeefd 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -213,7 +213,7 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
 		/* We are sharing ->siglock with it_real_fn() */
 		if (hrtimer_try_to_cancel(timer) < 0) {
 			spin_unlock_irq(&tsk->sighand->siglock);
-			hrtimer_wait_for_timer(&tsk->signal->real_timer);
+			hrtimer_grab_expiry_lock(timer);
 			goto again;
 		}
 		expires = timeval_to_ktime(value->it_value);
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index a436ee592737a..5bb1edffe0d05 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -789,6 +789,7 @@ check_timers_list(struct list_head *timers,
 			return t->expires;
 
 		t->firing = 1;
+		t->firing_cpu = smp_processor_id();
 		list_move_tail(&t->entry, firing);
 	}
 
@@ -1131,6 +1132,20 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
 	return 0;
 }
 
+static DEFINE_PER_CPU(spinlock_t, cpu_timer_expiry_lock) = __SPIN_LOCK_UNLOCKED(cpu_timer_expiry_lock);
+
+void cpu_timers_grab_expiry_lock(struct k_itimer *timer)
+{
+	int cpu = timer->it.cpu.firing_cpu;
+
+	if (cpu >= 0) {
+		spinlock_t *expiry_lock = per_cpu_ptr(&cpu_timer_expiry_lock, cpu);
+
+		spin_lock_irq(expiry_lock);
+		spin_unlock_irq(expiry_lock);
+	}
+}
+
 /*
  * This is called from the timer interrupt handler.  The irq handler has
  * already updated our counts.  We need to check if any timers fire now.
@@ -1141,6 +1156,7 @@ static void __run_posix_cpu_timers(struct task_struct *tsk)
 	LIST_HEAD(firing);
 	struct k_itimer *timer, *next;
 	unsigned long flags;
+	spinlock_t *expiry_lock;
 
 	/*
 	 * The fast path checks that there are no expired thread or thread
@@ -1149,6 +1165,9 @@ static void __run_posix_cpu_timers(struct task_struct *tsk)
 	if (!fastpath_timer_check(tsk))
 		return;
 
+	expiry_lock = this_cpu_ptr(&cpu_timer_expiry_lock);
+	spin_lock(expiry_lock);
+
 	if (!lock_task_sighand(tsk, &flags))
 		return;
 	/*
@@ -1183,6 +1202,7 @@ static void __run_posix_cpu_timers(struct task_struct *tsk)
 		list_del_init(&timer->it.cpu.entry);
 		cpu_firing = timer->it.cpu.firing;
 		timer->it.cpu.firing = 0;
+		timer->it.cpu.firing_cpu = -1;
 		/*
 		 * The firing flag is -1 if we collided with a reset
 		 * of the timer, which already reported this
@@ -1192,6 +1212,7 @@ static void __run_posix_cpu_timers(struct task_struct *tsk)
 			cpu_timer_fire(timer);
 		spin_unlock(&timer->it_lock);
 	}
+	spin_unlock(expiry_lock);
 }
 
 #ifdef CONFIG_PREEMPT_RT_BASE
@@ -1457,6 +1478,8 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
 		spin_unlock_irq(&timer.it_lock);
 
 		while (error == TIMER_RETRY) {
+
+			cpu_timers_grab_expiry_lock(&timer);
 			/*
 			 * We need to handle case when timer was or is in the
 			 * middle of firing. In other cases we already freed
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index a4f57a1ea0df2..2c1ca3cc391b2 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -800,27 +800,22 @@ static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires,
 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
 
-/*
- * Protected by RCU!
- */
-static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timr)
-{
-#ifdef CONFIG_PREEMPT_RT_FULL
-	if (kc->timer_arm == common_hrtimer_arm)
-		hrtimer_wait_for_timer(&timr->it.real.timer);
-	else if (kc == &alarm_clock)
-		hrtimer_wait_for_timer(&timr->it.alarm.alarmtimer.timer);
-	else
-		/* FIXME: Whacky hack for posix-cpu-timers */
-		schedule_timeout(1);
-#endif
-}
-
 static int common_hrtimer_try_to_cancel(struct k_itimer *timr)
 {
 	return hrtimer_try_to_cancel(&timr->it.real.timer);
 }
 
+static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timer)
+{
+	if (kc->timer_arm == common_hrtimer_arm)
+		hrtimer_grab_expiry_lock(&timer->it.real.timer);
+	else if (kc == &alarm_clock)
+		hrtimer_grab_expiry_lock(&timer->it.alarm.alarmtimer.timer);
+	else
+		/* posix-cpu-timers */
+		cpu_timers_grab_expiry_lock(timer);
+}
+
 /* Set a POSIX.1b interval timer. */
 int common_timer_set(struct k_itimer *timr, int flags,
 		     struct itimerspec64 *new_setting,
@@ -880,21 +875,21 @@ static int do_timer_settime(timer_t timer_id, int flags,
 	if (!timr)
 		return -EINVAL;
 
-	rcu_read_lock();
 	kc = timr->kclock;
 	if (WARN_ON_ONCE(!kc || !kc->timer_set))
 		error = -EINVAL;
 	else
 		error = kc->timer_set(timr, flags, new_spec64, old_spec64);
 
-	unlock_timer(timr, flag);
 	if (error == TIMER_RETRY) {
+		rcu_read_lock();
+		unlock_timer(timr, flag);
 		timer_wait_for_callback(kc, timr);
-		old_spec64 = NULL;	// We already got the old time...
 		rcu_read_unlock();
+		old_spec64 = NULL;	// We already got the old time...
 		goto retry;
 	}
-	rcu_read_unlock();
+	unlock_timer(timr, flag);
 
 	return error;
 }
@@ -956,13 +951,21 @@ int common_timer_del(struct k_itimer *timer)
 	return 0;
 }
 
-static inline int timer_delete_hook(struct k_itimer *timer)
+static int timer_delete_hook(struct k_itimer *timer)
 {
 	const struct k_clock *kc = timer->kclock;
+	int ret;
 
 	if (WARN_ON_ONCE(!kc || !kc->timer_del))
 		return -EINVAL;
-	return kc->timer_del(timer);
+	ret = kc->timer_del(timer);
+	if (ret == TIMER_RETRY) {
+		rcu_read_lock();
+		spin_unlock_irq(&timer->it_lock);
+		timer_wait_for_callback(kc, timer);
+		rcu_read_unlock();
+	}
+	return ret;
 }
 
 /* Delete a POSIX.1b interval timer. */
@@ -976,15 +979,8 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
 	if (!timer)
 		return -EINVAL;
 
-	rcu_read_lock();
-	if (timer_delete_hook(timer) == TIMER_RETRY) {
-		unlock_timer(timer, flags);
-		timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
-					timer);
-		rcu_read_unlock();
+	if (timer_delete_hook(timer) == TIMER_RETRY)
 		goto retry_delete;
-	}
-	rcu_read_unlock();
 
 	spin_lock(&current->sighand->siglock);
 	list_del(&timer->list);
@@ -1010,20 +1006,9 @@ static void itimer_delete(struct k_itimer *timer)
 retry_delete:
 	spin_lock_irqsave(&timer->it_lock, flags);
 
-	/* On RT we can race with a deletion */
-	if (!timer->it_signal) {
-		unlock_timer(timer, flags);
-		return;
-	}
-
-	if (timer_delete_hook(timer) == TIMER_RETRY) {
-		rcu_read_lock();
-		unlock_timer(timer, flags);
-		timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
-					timer);
-		rcu_read_unlock();
+	if (timer_delete_hook(timer) == TIMER_RETRY)
 		goto retry_delete;
-	}
+
 	list_del(&timer->list);
 	/*
 	 * This keeps any tasks waiting on the spin lock from thinking
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index ddb21145211a0..725bd230a8db4 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -32,6 +32,8 @@ extern const struct k_clock clock_process;
 extern const struct k_clock clock_thread;
 extern const struct k_clock alarm_clock;
 
+extern void cpu_timers_grab_expiry_lock(struct k_itimer *timer);
+
 int posix_timer_event(struct k_itimer *timr, int si_private);
 
 void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 227dba00dd0ef..0b5f07c2fa834 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -43,7 +43,6 @@
 #include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
-#include <linux/swait.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -197,9 +196,7 @@ EXPORT_SYMBOL(jiffies_64);
 struct timer_base {
 	raw_spinlock_t		lock;
 	struct timer_list	*running_timer;
-#ifdef CONFIG_PREEMPT_RT_FULL
-	struct swait_queue_head	wait_for_running_timer;
-#endif
+	spinlock_t		expiry_lock;
 	unsigned long		clk;
 	unsigned long		next_expiry;
 	unsigned int		cpu;
@@ -1181,33 +1178,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
 }
 EXPORT_SYMBOL_GPL(add_timer_on);
 
-#ifdef CONFIG_PREEMPT_RT_FULL
-/*
- * Wait for a running timer
- */
-static void wait_for_running_timer(struct timer_list *timer)
-{
-	struct timer_base *base;
-	u32 tf = timer->flags;
-
-	if (tf & TIMER_MIGRATING)
-		return;
-
-	base = get_timer_base(tf);
-	swait_event_exclusive(base->wait_for_running_timer,
-			      base->running_timer != timer);
-}
-
-# define wakeup_timer_waiters(b)	swake_up_all(&(b)->wait_for_running_timer)
-#else
-static inline void wait_for_running_timer(struct timer_list *timer)
-{
-	cpu_relax();
-}
-
-# define wakeup_timer_waiters(b)	do { } while (0)
-#endif
-
 /**
  * del_timer - deactivate a timer.
  * @timer: the timer to be deactivated
@@ -1237,6 +1207,25 @@ int del_timer(struct timer_list *timer)
 }
 EXPORT_SYMBOL(del_timer);
 
+static int __try_to_del_timer_sync(struct timer_list *timer,
+				   struct timer_base **basep)
+{
+	struct timer_base *base;
+	unsigned long flags;
+	int ret = -1;
+
+	debug_assert_init(timer);
+
+	*basep = base = lock_timer_base(timer, &flags);
+
+	if (base->running_timer != timer)
+		ret = detach_if_pending(timer, base, true);
+
+	raw_spin_unlock_irqrestore(&base->lock, flags);
+
+	return ret;
+}
+
 /**
  * try_to_del_timer_sync - Try to deactivate a timer
  * @timer: timer to delete
@@ -1247,23 +1236,31 @@ EXPORT_SYMBOL(del_timer);
 int try_to_del_timer_sync(struct timer_list *timer)
 {
 	struct timer_base *base;
-	unsigned long flags;
-	int ret = -1;
 
-	debug_assert_init(timer);
-
-	base = lock_timer_base(timer, &flags);
-
-	if (base->running_timer != timer)
-		ret = detach_if_pending(timer, base, true);
-
-	raw_spin_unlock_irqrestore(&base->lock, flags);
-
-	return ret;
+	return __try_to_del_timer_sync(timer, &base);
 }
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
+static int __del_timer_sync(struct timer_list *timer)
+{
+	struct timer_base *base;
+	int ret;
+
+	for (;;) {
+		ret = __try_to_del_timer_sync(timer, &base);
+		if (ret >= 0)
+			return ret;
+
+		/*
+		 * When accessing the lock, timers of base are no longer expired
+		 * and so timer is no longer running.
+		 */
+		spin_lock(&base->expiry_lock);
+		spin_unlock(&base->expiry_lock);
+	}
+}
+
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -1319,12 +1316,8 @@ int del_timer_sync(struct timer_list *timer)
 	 * could lead to deadlock.
 	 */
 	WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
-	for (;;) {
-		int ret = try_to_del_timer_sync(timer);
-		if (ret >= 0)
-			return ret;
-		wait_for_running_timer(timer);
-	}
+
+	return __del_timer_sync(timer);
 }
 EXPORT_SYMBOL(del_timer_sync);
 #endif
@@ -1389,11 +1382,15 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
 			raw_spin_unlock(&base->lock);
 			call_timer_fn(timer, fn);
 			base->running_timer = NULL;
+			spin_unlock(&base->expiry_lock);
+			spin_lock(&base->expiry_lock);
 			raw_spin_lock(&base->lock);
 		} else {
 			raw_spin_unlock_irq(&base->lock);
 			call_timer_fn(timer, fn);
 			base->running_timer = NULL;
+			spin_unlock(&base->expiry_lock);
+			spin_lock(&base->expiry_lock);
 			raw_spin_lock_irq(&base->lock);
 		}
 	}
@@ -1688,6 +1685,7 @@ static inline void __run_timers(struct timer_base *base)
 	if (!time_after_eq(jiffies, base->clk))
 		return;
 
+	spin_lock(&base->expiry_lock);
 	raw_spin_lock_irq(&base->lock);
 
 	/*
@@ -1715,7 +1713,7 @@ static inline void __run_timers(struct timer_base *base)
 			expire_timers(base, heads + levels);
 	}
 	raw_spin_unlock_irq(&base->lock);
-	wakeup_timer_waiters(base);
+	spin_unlock(&base->expiry_lock);
 }
 
 /*
@@ -1962,9 +1960,7 @@ static void __init init_timer_cpu(int cpu)
 		base->cpu = cpu;
 		raw_spin_lock_init(&base->lock);
 		base->clk = jiffies;
-#ifdef CONFIG_PREEMPT_RT_FULL
-		init_swait_queue_head(&base->wait_for_running_timer);
-#endif
+		spin_lock_init(&base->expiry_lock);
 	}
 }
 
diff --git a/localversion-rt b/localversion-rt
index 6e44e540b927b..9f7d0bdbffb18 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt12
+-rt13

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ