[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20250813023044.2121943-3-wutengda@huaweicloud.com>
Date: Wed, 13 Aug 2025 02:30:44 +0000
From: Tengda Wu <wutengda@...weicloud.com>
To: Steven Rostedt <rostedt@...dmis.org>,
Masami Hiramatsu <mhiramat@...nel.org>
Cc: Mark Rutland <mark.rutland@....com>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
linux-trace-kernel@...r.kernel.org,
linux-kernel@...r.kernel.org,
Tengda Wu <wutengda@...weicloud.com>
Subject: [PATCH -next 2/2] ftrace: Fix potential use-after-free for set_ftrace_{notrace,filter} files
Concurrent read/write operations on the set_ftrace_{notrace,filter}
files may probabilistically trigger the following issues:
[ 2715.745293] BUG: unable to handle page fault for address: 00000003da393970
[ 2715.753736] CPU: 1 UID: 0 PID: 1324 Comm: read Not tainted 6.16.0-next-20250808 #1 PREEMPT(full)
[ 2715.755292] RIP: 0010:ftrace_lookup_ip+0x40/0x70
[ 2715.761114] Call Trace:
[ 2715.761462] <TASK>
[ 2715.761705] t_func_next.isra.0+0xaa/0xd0
[ 2715.762049] t_start+0xa3/0x140
[ 2715.762207] seq_read_iter+0xe8/0x4a0
[ 2715.762564] seq_read+0x101/0x140
[ 2715.762769] vfs_read+0xbd/0x340
[ 2715.763014] ? preempt_count_add+0x4b/0xa0
[ 2715.763311] ? do_sys_openat2+0x8c/0xd0
[ 2715.763623] ksys_read+0x65/0xe0
[ 2715.763797] do_syscall_64+0x4e/0x1c0
[ 2715.764049] entry_SYSCALL_64_after_hwframe+0x76/0x7e
The issue can be reproduced with the following script (using the
set_ftrace_notrace file as an example):
while true; do
echo __probestub_initcall_level > /sys/kernel/tracing/set_ftrace_notrace &
cat /sys/kernel/tracing/set_ftrace_notrace &
done
The root cause is that ftrace_regex_open and ftrace_regex_release
do not properly handle concurrent synchronization for notrace_hash.
Consider a race scenario between a reader and a writer:
1. The reader first obtains the value of notrace_hash via
ftrace_regex_open().
2. The writer then updates notrace_hash via ftrace_regex_release()
and frees the memory pointed to by the old notrace_hash.
3. Later, the reader accesses the old notrace_hash memory while
ftrace_hash_empty() and ftrace_lookup_ip(), leading to a UAF.
CPU 1 (read) CPU 2 (write)
ftrace_regex_open
hash = ops->func_hash->notrace_hash;
iter->hash = hash;
ftrace_regex_open
ftrace_regex_release
orig_hash = &iter->ops->func_hash->notrace_hash;
old_hash = *orig_hash;
free_ftrace_hash_rcu(old_hash);
t_start
ftrace_hash_empty(iter->hash)
t_func_next
!ftrace_lookup_ip(iter->hash, rec->ip)
Since the reader's hash is always tied to its file descriptor (fd),
the writer cannot directly manage the reader's hash. To fix this,
introduce a refcount for ftrace_hash, initialized to 1. The count
is incremented only when a reader opens it, and decremented when
either a reader or writer releases it, thereby controlling the timing
of ftrace_hash deallocation.
Fixes: c20489dad156 ("ftrace: Assign iter->hash to filter or notrace hashes on seq read")
Signed-off-by: Tengda Wu <wutengda@...weicloud.com>
---
kernel/trace/ftrace.c | 27 ++++++++++++++++++++++++---
kernel/trace/trace.h | 2 ++
2 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index cade13595b08..be4842054254 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1060,6 +1060,7 @@ struct ftrace_func_probe {
static const struct hlist_head empty_buckets[1];
static const struct ftrace_hash empty_hash = {
.buckets = (struct hlist_head *)empty_buckets,
+ .refcount = REFCOUNT_INIT(1),
};
#define EMPTY_HASH ((struct ftrace_hash *)&empty_hash)
@@ -1282,6 +1283,22 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
call_rcu(&hash->rcu, __free_ftrace_hash_rcu);
}
+static void get_ftrace_hash(struct ftrace_hash *hash)
+{
+ if (!hash || hash == EMPTY_HASH)
+ return;
+ if (!refcount_inc_not_zero(&hash->refcount))
+ WARN_ON(1);
+}
+
+static void put_ftrace_hash_rcu(struct ftrace_hash *hash)
+{
+ if (!hash || hash == EMPTY_HASH)
+ return;
+ if (refcount_dec_and_test(&hash->refcount))
+ call_rcu(&hash->rcu, __free_ftrace_hash_rcu);
+}
+
/**
* ftrace_free_filter - remove all filters for an ftrace_ops
* @ops: the ops to remove the filters from
@@ -1316,6 +1333,7 @@ static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
}
hash->size_bits = size_bits;
+ refcount_set(&hash->refcount, 1);
return hash;
}
@@ -3362,7 +3380,7 @@ static int __ftrace_hash_move_and_update_ops(struct ftrace_ops *ops,
ret = ftrace_hash_move(ops, enable, orig_hash, hash);
if (!ret) {
ftrace_ops_update_code(ops, &old_hash_ops);
- free_ftrace_hash_rcu(old_hash);
+ put_ftrace_hash_rcu(old_hash);
}
return ret;
}
@@ -3714,7 +3732,7 @@ static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops,
*orig_subhash = save_hash;
free_ftrace_hash_rcu(new_hash);
} else {
- free_ftrace_hash_rcu(save_hash);
+ put_ftrace_hash_rcu(save_hash);
}
return ret;
}
@@ -4666,8 +4684,10 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
trace_parser_put(&iter->parser);
goto out_unlock;
}
- } else
+ } else {
iter->hash = hash;
+ get_ftrace_hash(iter->hash);
+ }
ret = 0;
@@ -6544,6 +6564,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
mutex_unlock(&ftrace_lock);
} else {
/* For read only, the hash is the ops hash */
+ put_ftrace_hash_rcu(iter->hash);
iter->hash = NULL;
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 1dbf1d3cf2f1..4936cd218c36 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -22,6 +22,7 @@
#include <linux/ctype.h>
#include <linux/once_lite.h>
#include <linux/ftrace_regs.h>
+#include <linux/refcount.h>
#include "pid_list.h"
@@ -905,6 +906,7 @@ struct ftrace_hash {
unsigned long count;
unsigned long flags;
struct rcu_head rcu;
+ refcount_t refcount;
};
struct ftrace_func_entry *
--
2.34.1
Powered by blists - more mailing lists