linux-kernel - [ANNOUNCE] 3.10.9-rt5

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 22 Aug 2013 20:21:27 +0200
From:	Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To:	linux-rt-users <linux-rt-users@...r.kernel.org>
Cc:	LKML <linux-kernel@...r.kernel.org>,
	Thomas Gleixner <tglx@...utronix.de>, rostedt@...dmis.org,
	John Kacur <jkacur@...hat.com>
Subject: [ANNOUNCE] 3.10.9-rt5

Dear RT folks!

I'm pleased to announce the v3.10.9-rt5 patch set.

Changes since v3.10.9-rt4
- swait fixes from Steven. It fixed the issues with CONFIG_RCU_NOCB_CPU
  where the system suddenly froze and RCU wasn't doing its job anymore
- hwlat improvements by Steven

Known issues:

      - SLAB support not working

      - The cpsw network driver shows some issues.

      - bcache does not compile.

      - set_affinity callbacks result in splat due to sleeping while
        atomic

      - an ancient race (since we got sleeping spinlocks) where the
        TASK_TRACED state is temporary replaced while waiting on a rw
        lock and the task can't be traced.

The delta patch against v3.10.9-rt4 is appended below and can be found
here:
  https://www.kernel.org/pub/linux/kernel/projects/rt/3.10/incr/patch-3.10.9-rt4-rt5.patch.xz

The RT patch against 3.10.9 can be found here:

  https://www.kernel.org/pub/linux/kernel/projects/rt/3.10/patch-3.10.9-rt5.patch.xz

The split quilt queue is available at:

  https://www.kernel.org/pub/linux/kernel/projects/rt/3.10/patches-3.10.9-rt5.tar.xz

Sebastian

diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c
index b7b7c90..0bfa40d 100644
--- a/drivers/misc/hwlat_detector.c
+++ b/drivers/misc/hwlat_detector.c
@@ -41,7 +41,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/ring_buffer.h>
-#include <linux/stop_machine.h>
 #include <linux/time.h>
 #include <linux/hrtimer.h>
 #include <linux/kthread.h>
@@ -51,6 +50,7 @@
 #include <linux/version.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/trace_clock.h>
 
 #define BUF_SIZE_DEFAULT	262144UL		/* 8K*(sizeof(entry)) */
 #define BUF_FLAGS		(RB_FL_OVERWRITE)	/* no block on full */
@@ -106,7 +106,6 @@ struct data;					/* Global state */
 /* Sampling functions */
 static int __buffer_add_sample(struct sample *sample);
 static struct sample *buffer_get_sample(struct sample *sample);
-static int get_sample(void *unused);
 
 /* Threading and state */
 static int kthread_fn(void *unused);
@@ -143,11 +142,12 @@ static void detector_exit(void);
 struct sample {
 	u64		seqnum;		/* unique sequence */
 	u64		duration;	/* ktime delta */
+	u64		outer_duration;	/* ktime delta (outer loop) */
 	struct timespec	timestamp;	/* wall time */
 	unsigned long   lost;
 };
 
-/* keep the global state somewhere. Mostly used under stop_machine. */
+/* keep the global state somewhere. */
 static struct data {
 
 	struct mutex lock;		/* protect changes */
@@ -170,7 +170,7 @@ static struct data {
  * @sample: The new latency sample value
  *
  * This receives a new latency sample and records it in a global ring buffer.
- * No additional locking is used in this case - suited for stop_machine use.
+ * No additional locking is used in this case.
  */
 static int __buffer_add_sample(struct sample *sample)
 {
@@ -210,29 +210,60 @@ static struct sample *buffer_get_sample(struct sample *sample)
 	return sample;
 }
 
+#ifndef CONFIG_TRACING
+#define time_type	ktime_t
+#define time_get()	ktime_get()
+#define time_to_us(x)	ktime_to_us(x)
+#define time_sub(a, b)	ktime_sub(a, b)
+#define init_time(a, b)	(a).tv64 = b
+#define time_u64(a)	(a).tv64
+#else
+#define time_type	u64
+#define time_get()	trace_clock_local()
+#define time_to_us(x)	((x) / 1000)
+#define time_sub(a, b)	((a) - (b))
+#define init_time(a, b)	a = b
+#define time_u64(a)	a
+#endif
 /**
  * get_sample - sample the CPU TSC and look for likely hardware latencies
- * @unused: This is not used but is a part of the stop_machine API
  *
  * Used to repeatedly capture the CPU TSC (or similar), looking for potential
- * hardware-induced latency. Called under stop_machine, with data.lock held.
+ * hardware-induced latency. Called with interrupts disabled and with data.lock held.
  */
-static int get_sample(void *unused)
+static int get_sample(void)
 {
-	ktime_t start, t1, t2;
+	time_type start, t1, t2, last_t2;
 	s64 diff, total = 0;
 	u64 sample = 0;
-	int ret = 1;
+	u64 outer_sample = 0;
+	int ret = -1;
 
-	start = ktime_get(); /* start timestamp */
+	init_time(last_t2, 0);
+	start = time_get(); /* start timestamp */
 
 	do {
 
-		t1 = ktime_get();	/* we'll look for a discontinuity */
-		t2 = ktime_get();
+		t1 = time_get();	/* we'll look for a discontinuity */
+		t2 = time_get();
+
+		if (time_u64(last_t2)) {
+			/* Check the delta from the outer loop (t2 to next t1) */
+			diff = time_to_us(time_sub(t1, last_t2));
+			/* This shouldn't happen */
+			if (diff < 0) {
+				printk(KERN_ERR BANNER "time running backwards\n");
+				goto out;
+			}
+			if (diff > outer_sample)
+				outer_sample = diff;
+		}
+		last_t2 = t2;
+
+		total = time_to_us(time_sub(t2, start)); /* sample width */
 
-		total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
-		diff = ktime_to_us(ktime_sub(t2, t1));     /* current diff */
+		/* This checks the inner loop (t1 to t2) */
+		diff = time_to_us(time_sub(t2, t1));     /* current diff */
 
 		/* This shouldn't happen */
 		if (diff < 0) {
@@ -245,13 +276,18 @@ static int get_sample(void *unused)
 
 	} while (total <= data.sample_width);
 
+	ret = 0;
+
 	/* If we exceed the threshold value, we have found a hardware latency */
-	if (sample > data.threshold) {
+	if (sample > data.threshold || outer_sample > data.threshold) {
 		struct sample s;
 
+		ret = 1;
+
 		data.count++;
 		s.seqnum = data.count;
 		s.duration = sample;
+		s.outer_duration = outer_sample;
 		s.timestamp = CURRENT_TIME;
 		__buffer_add_sample(&s);
 
@@ -260,7 +296,6 @@ static int get_sample(void *unused)
 			data.max_sample = sample;
 	}
 
-	ret = 0;
 out:
 	return ret;
 }
@@ -270,32 +305,30 @@ static int get_sample(void *unused)
  * @unused: A required part of the kthread API.
  *
  * Used to periodically sample the CPU TSC via a call to get_sample. We
- * use stop_machine, whith does (intentionally) introduce latency since we
+ * disable interrupts, which does (intentionally) introduce latency since we
  * need to ensure nothing else might be running (and thus pre-empting).
  * Obviously this should never be used in production environments.
  *
- * stop_machine will schedule us typically only on CPU0 which is fine for
- * almost every real-world hardware latency situation - but we might later
- * generalize this if we find there are any actualy systems with alternate
- * SMI delivery or other non CPU0 hardware latencies.
+ * Currently this runs on which ever CPU it was scheduled on, but most
+ * real-worald hardware latency situations occur across several CPUs,
+ * but we might later generalize this if we find there are any actualy
+ * systems with alternate SMI delivery or other hardware latencies.
  */
 static int kthread_fn(void *unused)
 {
-	int err = 0;
-	u64 interval = 0;
+	int ret;
+	u64 interval;
 
 	while (!kthread_should_stop()) {
 
 		mutex_lock(&data.lock);
 
-		err = stop_machine(get_sample, unused, 0);
-		if (err) {
-			/* Houston, we have a problem */
-			mutex_unlock(&data.lock);
-			goto err_out;
-		}
+		local_irq_disable();
+		ret = get_sample();
+		local_irq_enable();
 
-		wake_up(&data.wq); /* wake up reader(s) */
+		if (ret > 0)
+			wake_up(&data.wq); /* wake up reader(s) */
 
 		interval = data.sample_window - data.sample_width;
 		do_div(interval, USEC_PER_MSEC); /* modifies interval value */
@@ -303,15 +336,10 @@ static int kthread_fn(void *unused)
 		mutex_unlock(&data.lock);
 
 		if (msleep_interruptible(interval))
-			goto out;
+			break;
 	}
-		goto out;
-err_out:
-	printk(KERN_ERR BANNER "could not call stop_machine, disabling\n");
-	enabled = 0;
-out:
-	return err;
 
+	return 0;
 }
 
 /**
@@ -407,8 +435,7 @@ static int init_stats(void)
  * This function provides a generic read implementation for the global state
  * "data" structure debugfs filesystem entries. It would be nice to use
  * simple_attr_read directly, but we need to make sure that the data.lock
- * spinlock is held during the actual read (even though we likely won't ever
- * actually race here as the updater runs under a stop_machine context).
+ * is held during the actual read.
  */
 static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
 				size_t cnt, loff_t *ppos, const u64 *entry)
@@ -443,8 +470,7 @@ static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
  * This function provides a generic write implementation for the global state
  * "data" structure debugfs filesystem entries. It would be nice to use
  * simple_attr_write directly, but we need to make sure that the data.lock
- * spinlock is held during the actual write (even though we likely won't ever
- * actually race here as the updater runs under a stop_machine context).
+ * is held during the actual write.
  */
 static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
 				 size_t cnt, loff_t *ppos, u64 *entry)
@@ -738,10 +764,11 @@ static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
 		}
 	}
 
-	len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n",
-		      sample->timestamp.tv_sec,
-		      sample->timestamp.tv_nsec,
-		      sample->duration);
+	len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n",
+		       sample->timestamp.tv_sec,
+		       sample->timestamp.tv_nsec,
+		       sample->duration,
+		       sample->outer_duration);
 
 
 	/* handling partial reads is more trouble than it's worth */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 684b762..dc0c4b2 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -2036,7 +2036,7 @@ static int rcu_nocb_needs_gp(struct rcu_state *rsp)
  */
 static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
 {
-	swait_wake(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
+	swait_wake_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
 }
 
 /*
diff --git a/kernel/wait-simple.c b/kernel/wait-simple.c
index 4b9a0b5..2c85626 100644
--- a/kernel/wait-simple.c
+++ b/kernel/wait-simple.c
@@ -16,6 +16,8 @@
 static inline void __swait_enqueue(struct swait_head *head, struct swaiter *w)
 {
 	list_add(&w->node, &head->list);
+	/* We can't let the condition leak before the setting of head */
+	smp_mb();
 }
 
 /* Removes w from head->list. Must be called with head->lock locked. */
@@ -27,6 +29,8 @@ static inline void __swait_dequeue(struct swaiter *w)
 /* Check whether a head has waiters enqueued */
 static inline bool swait_head_has_waiters(struct swait_head *h)
 {
+	/* Make sure the condition is visible before checking list_empty() */
+	smp_mb();
 	return !list_empty(&h->list);
 }
 
diff --git a/localversion-rt b/localversion-rt
index ad3da1b..0efe7ba 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt4
+-rt5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/