lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241105133405.2703607-7-jolsa@kernel.org>
Date: Tue,  5 Nov 2024 14:34:00 +0100
From: Jiri Olsa <jolsa@...nel.org>
To: Oleg Nesterov <oleg@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Andrii Nakryiko <andrii@...nel.org>
Cc: bpf@...r.kernel.org,
	Song Liu <songliubraving@...com>,
	Yonghong Song <yhs@...com>,
	John Fastabend <john.fastabend@...il.com>,
	Hao Luo <haoluo@...gle.com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Masami Hiramatsu <mhiramat@...nel.org>,
	Alan Maguire <alan.maguire@...cle.com>,
	linux-kernel@...r.kernel.org,
	linux-trace-kernel@...r.kernel.org
Subject: [RFC perf/core 06/11] uprobes: Add uprobe syscall to speed up uprobe

Adding new uprobe syscall that calls uprobe handlers for given
'breakpoint' address.

The idea is that the 'breakpoint' address calls the user space
trampoline which executes the uprobe syscall.

The syscall handler reads the return address of the initiall call
to retrieve the original 'breakpoint' address.

With this address we find the related uprobe object and call its
consumers.

TODO allow to call uprobe syscall only from uprobe trampoline.

Signed-off-by: Jiri Olsa <jolsa@...nel.org>
---
 arch/x86/entry/syscalls/syscall_64.tbl |  1 +
 arch/x86/kernel/uprobes.c              | 48 ++++++++++++++++++++++++++
 include/linux/syscalls.h               |  2 ++
 include/linux/uprobes.h                |  2 ++
 kernel/events/uprobes.c                | 35 +++++++++++++++++++
 kernel/sys_ni.c                        |  1 +
 6 files changed, 89 insertions(+)

diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 7093ee21c0d1..f6299d57afe5 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -345,6 +345,7 @@
 333	common	io_pgetevents		sys_io_pgetevents
 334	common	rseq			sys_rseq
 335	common	uretprobe		sys_uretprobe
+336	common	uprobe			sys_uprobe
 # don't use numbers 387 through 423, add new calls after the last
 # 'common' entry
 424	common	pidfd_send_signal	sys_pidfd_send_signal
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 22a17c149a55..02aa4519b677 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -425,6 +425,54 @@ SYSCALL_DEFINE0(uretprobe)
 	return -1;
 }
 
+SYSCALL_DEFINE0(uprobe)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	unsigned long bp_vaddr;
+	int err;
+
+	err = copy_from_user(&bp_vaddr, (void __user *)regs->sp + 3*8, sizeof(bp_vaddr));
+	if (err) {
+		force_sig(SIGILL);
+		return -1;
+	}
+
+	handle_syscall_uprobe(regs, bp_vaddr - 5);
+	return 0;
+}
+
+asm (
+	".pushsection .rodata\n"
+	".global uprobe_trampoline_entry\n"
+	"uprobe_trampoline_entry:\n"
+	"push %rcx\n"
+	"push %r11\n"
+	"push %rax\n"
+	"movq $" __stringify(__NR_uprobe) ", %rax\n"
+	"syscall\n"
+	"pop %rax\n"
+	"pop %r11\n"
+	"pop %rcx\n"
+	"ret\n"
+	".global uprobe_trampoline_end\n"
+	"uprobe_trampoline_end:\n"
+	".popsection\n"
+);
+
+extern __visible u8 uprobe_trampoline_entry[];
+extern __visible u8 uprobe_trampoline_end[];
+
+void *arch_uprobe_trampoline(unsigned long *psize)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+
+	if (user_64bit_mode(regs)) {
+		*psize = uprobe_trampoline_end - uprobe_trampoline_entry;
+		return uprobe_trampoline_entry;
+	}
+	return NULL;
+}
+
 /*
  * If arch_uprobe->insn doesn't use rip-relative addressing, return
  * immediately.  Otherwise, rewrite the instruction so that it accesses
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5758104921e6..a2573f9dd248 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -981,6 +981,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on);
 
 asmlinkage long sys_uretprobe(void);
 
+asmlinkage long sys_uprobe(void);
+
 /* pciconfig: alpha, arm, arm64, ia64, sparc */
 asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn,
 				unsigned long off, unsigned long len,
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 222d8e82cee2..4024e6ea52a4 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -231,6 +231,8 @@ extern bool arch_uprobe_is_register(uprobe_opcode_t *insn, int len, void *data);
 struct tramp_area *get_tramp_area(unsigned long vaddr);
 void put_tramp_area(struct tramp_area *area);
 bool arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr);
+extern void *arch_uprobe_trampoline(unsigned long *psize);
+extern void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index a44305c559a4..b8399684231c 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -621,6 +621,11 @@ bool __weak arch_uprobe_is_callable(unsigned long vtramp, unsigned long vaddr)
 	return false;
 }
 
+void * __weak arch_uprobe_trampoline(unsigned long *psize)
+{
+	return NULL;
+}
+
 static unsigned long find_nearest_page(unsigned long vaddr)
 {
 	struct mm_struct *mm = current->mm;
@@ -673,7 +678,13 @@ static struct tramp_area *create_tramp_area(unsigned long vaddr)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
+	unsigned long tramp_size;
 	struct tramp_area *area;
+	void *tramp;
+
+	tramp = arch_uprobe_trampoline(&tramp_size);
+	if (!tramp)
+		return NULL;
 
 	vaddr = find_nearest_page(vaddr);
 	if (!vaddr)
@@ -690,6 +701,8 @@ static struct tramp_area *create_tramp_area(unsigned long vaddr)
 	refcount_set(&area->ref, 1);
 	area->vaddr = vaddr;
 
+	arch_uprobe_copy_ixol(area->page, 0, tramp, tramp_size);
+
 	vma = _install_special_mapping(mm, area->vaddr, PAGE_SIZE,
 				VM_READ|VM_EXEC|VM_MAYEXEC|VM_MAYREAD|VM_DONTCOPY|VM_IO,
 				&tramp_mapping);
@@ -2757,6 +2770,28 @@ static void handle_swbp(struct pt_regs *regs)
 	rcu_read_unlock_trace();
 }
 
+void handle_syscall_uprobe(struct pt_regs *regs, unsigned long bp_vaddr)
+{
+	struct uprobe *uprobe;
+	int is_swbp;
+
+	rcu_read_lock_trace();
+	uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
+	if (!uprobe)
+		goto unlock;
+
+	if (!get_utask())
+		goto unlock;
+
+	if (arch_uprobe_ignore(&uprobe->arch, regs))
+		goto unlock;
+
+	handler_chain(uprobe, regs);
+
+unlock:
+	rcu_read_unlock_trace();
+}
+
 /*
  * Perform required fix-ups and disable singlestep.
  * Allow pending signals to take effect.
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index c00a86931f8c..bf5d05c635ff 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -392,3 +392,4 @@ COND_SYSCALL(setuid16);
 COND_SYSCALL(rseq);
 
 COND_SYSCALL(uretprobe);
+COND_SYSCALL(uprobe);
-- 
2.47.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ