lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240308202432.269350592@goodmis.org>
Date: Fri, 08 Mar 2024 15:24:06 -0500
From: Steven Rostedt <rostedt@...dmis.org>
To: linux-kernel@...r.kernel.org,
 linux-trace-kernel@...r.kernel.org
Cc: Masami Hiramatsu <mhiramat@...nel.org>,
 Mark Rutland <mark.rutland@....com>,
 Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
 Andrew Morton <akpm@...ux-foundation.org>,
 Linus Torvalds <torvalds@...ux-foundation.org>,
 joel@...lfernandes.org,
 linke li <lilinke99@...com>,
 Rabin Vincent <rabin@....in>,
 stable@...r.kernel.org
Subject: [PATCH v2 4/6] tracing: Fix waking up tracing readers

From: "Steven Rostedt (Google)" <rostedt@...dmis.org>

When the tracing_pipe_raw file is closed, if there are readers still
blocked on it, they need to be woken up. Currently a wait_index is used.
When the readers need to be woken, the index is updated and they are all
woken up.

But there is a race where a new reader could be coming in just as the file
is being closed, and it could still block if the wake up happens just
before the reader enters the wait.

Add another field called "waking" and wrap both the waking and wait_index
around a new wait_lock to synchronize them.

When a reader comes in, it will save the current wait_index, but if waking
is set, then it will not block no matter what wait_index is.

After it wakes from the wait, if either the waking is set or the
wait_index is not the same as what it read before, then it will not block.

The waker will set waking and increment the wait_index. For the .flush()
function, it will not clear waking so that all new readers must not block.

There's an ioctl() that kicks all current waiters, but does not care about
new waiters. It will set the waking count back to what it was when it came
in.

There's still a race with the wait_on_pipe() with respect to the
ring_buffer_wait(), but that will be dealt with separately.

Link: https://lore.kernel.org/all/CAHk-=whs5MdtNjzFkTyaUy=vHi=qwWgPi0JgTe6OYUYMNSRZfg@mail.gmail.com/

Cc: stable@...r.kernel.org
Fixes: f3ddb74ad0790 ("tracing: Wake up ring buffer waiters on closing of the file")
Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
---
Changes since v1: https://lore.kernel.org/linux-trace-kernel/20240308184007.805898590@goodmis.org

- My tests triggered a warning about calling a mutex_lock() after a
  prepare_to_wait() that changed the task's state. Convert the affected
  mutex over to a spinlock.

 include/linux/trace_events.h |   3 +-
 kernel/trace/trace.c         | 104 +++++++++++++++++++++++++++++------
 2 files changed, 89 insertions(+), 18 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index d68ff9b1247f..adf8e163a7be 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -103,7 +103,8 @@ struct trace_iterator {
 	unsigned int		temp_size;
 	char			*fmt;	/* modified format holder */
 	unsigned int		fmt_size;
-	long			wait_index;
+	int			wait_index;
+	int			waking;	/* set by a waker */
 
 	/* trace_seq for __print_flags() and __print_symbolic() etc. */
 	struct trace_seq	tmp_seq;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c9c898307348..a184dbdf8e91 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1955,6 +1955,68 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
+/*
+ * In order to wake up readers and have them return back to user space,
+ * the iterator has two counters:
+ *
+ *  wait_index - always increases every time a waker wakes up the readers.
+ *  waking - Set by the waker when waking and cleared afterward.
+ *
+ * Both are protected together with the wait_lock.
+ * When waking, the lock is taken and both indexes are incremented.
+ * The reader will first prepare the wait by taking the lock,
+ * if waking is set, it will sleep regardless of what wait_index is.
+ * Then after it sleeps it checks if wait_index has been updated
+ * and if it has, it will not sleep again.
+ *
+ * Note, if wait_woken_clear() is not called, then all new readers
+ * will not sleep (this happens in closing the file).
+ *
+ * wait_lock needs to be a spinlock and not a mutex as it can be called
+ * after prepare_to_wait(), which changes the task's state.
+ */
+static DEFINE_SPINLOCK(wait_lock);
+
+static bool wait_woken_prepare(struct trace_iterator *iter, int *wait_index)
+{
+	bool woken = false;
+
+	spin_lock(&wait_lock);
+	if (iter->waking)
+		woken = true;
+	*wait_index = iter->wait_index;
+	spin_unlock(&wait_lock);
+
+	return woken;
+}
+
+static bool wait_woken_check(struct trace_iterator *iter, int *wait_index)
+{
+	bool woken = false;
+
+	spin_lock(&wait_lock);
+	if (iter->waking || *wait_index != iter->wait_index)
+		woken = true;
+	spin_unlock(&wait_lock);
+
+	return woken;
+}
+
+static void wait_woken_set(struct trace_iterator *iter)
+{
+	spin_lock(&wait_lock);
+	iter->waking++;
+	iter->wait_index++;
+	spin_unlock(&wait_lock);
+}
+
+static void wait_woken_clear(struct trace_iterator *iter)
+{
+	spin_lock(&wait_lock);
+	iter->waking--;
+	spin_unlock(&wait_lock);
+}
+
 static int wait_on_pipe(struct trace_iterator *iter, int full)
 {
 	int ret;
@@ -8312,9 +8374,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 	struct ftrace_buffer_info *info = filp->private_data;
 	struct trace_iterator *iter = &info->iter;
 	void *trace_data;
+	int wait_index;
 	int page_size;
 	ssize_t ret = 0;
 	ssize_t size;
+	bool woken;
 
 	if (!count)
 		return 0;
@@ -8353,6 +8417,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 	if (info->read < page_size)
 		goto read;
 
+	woken = wait_woken_prepare(iter, &wait_index);
  again:
 	trace_access_lock(iter->cpu_file);
 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
@@ -8362,7 +8427,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 	trace_access_unlock(iter->cpu_file);
 
 	if (ret < 0) {
-		if (trace_empty(iter)) {
+		if (trace_empty(iter) && !woken) {
 			if ((filp->f_flags & O_NONBLOCK))
 				return -EAGAIN;
 
@@ -8370,6 +8435,8 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 			if (ret)
 				return ret;
 
+			woken = wait_woken_check(iter, &wait_index);
+
 			goto again;
 		}
 		return 0;
@@ -8398,12 +8465,14 @@ static int tracing_buffers_flush(struct file *file, fl_owner_t id)
 	struct ftrace_buffer_info *info = file->private_data;
 	struct trace_iterator *iter = &info->iter;
 
-	iter->wait_index++;
-	/* Make sure the waiters see the new wait_index */
-	smp_wmb();
+	wait_woken_set(iter);
 
 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
 
+	/*
+	 * Do not call wait_woken_clear(), as the file is being closed.
+	 * this will prevent any new readers from sleeping.
+	 */
 	return 0;
 }
 
@@ -8500,9 +8569,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		.spd_release	= buffer_spd_release,
 	};
 	struct buffer_ref *ref;
+	int wait_index;
 	int page_size;
 	int entries, i;
 	ssize_t ret = 0;
+	bool woken = false;
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
@@ -8522,6 +8593,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 	if (splice_grow_spd(pipe, &spd))
 		return -ENOMEM;
 
+	woken = wait_woken_prepare(iter, &wait_index);
  again:
 	trace_access_lock(iter->cpu_file);
 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
@@ -8573,17 +8645,17 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 
 	/* did we read anything? */
 	if (!spd.nr_pages) {
-		long wait_index;
 
 		if (ret)
 			goto out;
 
+		if (woken)
+			goto out;
+
 		ret = -EAGAIN;
 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
 			goto out;
 
-		wait_index = READ_ONCE(iter->wait_index);
-
 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
 		if (ret)
 			goto out;
@@ -8592,10 +8664,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		if (!tracer_tracing_is_on(iter->tr))
 			goto out;
 
-		/* Make sure we see the new wait_index */
-		smp_rmb();
-		if (wait_index != iter->wait_index)
-			goto out;
+		woken = wait_woken_check(iter, &wait_index);
 
 		goto again;
 	}
@@ -8616,15 +8685,16 @@ static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned
 	if (cmd)
 		return -ENOIOCTLCMD;
 
-	mutex_lock(&trace_types_lock);
-
-	iter->wait_index++;
-	/* Make sure the waiters see the new wait_index */
-	smp_wmb();
+	wait_woken_set(iter);
 
 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
 
-	mutex_unlock(&trace_types_lock);
+	/*
+	 * This just kicks existing readers, a new reader coming in may
+	 * still sleep.
+	 */
+	wait_woken_clear(iter);
+
 	return 0;
 }
 
-- 
2.43.0



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ