[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250905082447.GQ4068168@noisy.programming.kicks-ass.net>
Date: Fri, 5 Sep 2025 10:24:47 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Andrii Nakryiko <andrii.nakryiko@...il.com>
Cc: Jiri Olsa <olsajiri@...il.com>, Oleg Nesterov <oleg@...hat.com>,
Andrii Nakryiko <andrii@...nel.org>, bpf <bpf@...r.kernel.org>,
open list <linux-kernel@...r.kernel.org>,
Linux trace kernel <linux-trace-kernel@...r.kernel.org>,
X86 ML <x86@...nel.org>, Song Liu <songliubraving@...com>,
Yonghong Song <yhs@...com>,
John Fastabend <john.fastabend@...il.com>,
Hao Luo <haoluo@...gle.com>, Steven Rostedt <rostedt@...dmis.org>,
Masami Hiramatsu <mhiramat@...nel.org>,
Alan Maguire <alan.maguire@...cle.com>,
David Laight <David.Laight@...lab.com>,
Thomas Weißschuh <thomas@...ch.de>,
Ingo Molnar <mingo@...nel.org>
Subject: Re: nop5-optimized USDTs WAS: Re: [PATCHv6 perf/core 09/22]
uprobes/x86: Add uprobe syscall to speed up uprobe
On Thu, Sep 04, 2025 at 11:58:26PM +0200, Peter Zijlstra wrote:
> On Thu, Sep 04, 2025 at 11:56:17PM +0200, Peter Zijlstra wrote:
>
> > Ooh, that suggests we do something like so:
>
> N/m, I need to go sleep, that doesn't work right for the 32bit nops that
> use lea instead of nopl. I'll see if I can come up with something more
> sensible.
Something like this. Can someone please look very critical at this fancy
insn_is_nop()?
---
arch/x86/include/asm/insn-eval.h | 2 +
arch/x86/kernel/alternative.c | 20 +--------
arch/x86/kernel/uprobes.c | 32 ++------------
arch/x86/lib/insn-eval.c | 92 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 98 insertions(+), 48 deletions(-)
diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h
index 54368a43abf6..4733e9064ee5 100644
--- a/arch/x86/include/asm/insn-eval.h
+++ b/arch/x86/include/asm/insn-eval.h
@@ -44,4 +44,6 @@ enum insn_mmio_type {
enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes);
+bool insn_is_nop(struct insn *insn);
+
#endif /* _ASM_X86_INSN_EVAL_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 7bde68247b5f..e1f98189fe77 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -9,6 +9,7 @@
#include <asm/text-patching.h>
#include <asm/insn.h>
+#include <asm/insn-eval.h>
#include <asm/ibt.h>
#include <asm/set_memory.h>
#include <asm/nmi.h>
@@ -345,25 +346,6 @@ static void add_nop(u8 *buf, unsigned int len)
*buf = INT3_INSN_OPCODE;
}
-/*
- * Matches NOP and NOPL, not any of the other possible NOPs.
- */
-static bool insn_is_nop(struct insn *insn)
-{
- /* Anything NOP, but no REP NOP */
- if (insn->opcode.bytes[0] == 0x90 &&
- (!insn->prefixes.nbytes || insn->prefixes.bytes[0] != 0xF3))
- return true;
-
- /* NOPL */
- if (insn->opcode.bytes[0] == 0x0F && insn->opcode.bytes[1] == 0x1F)
- return true;
-
- /* TODO: more nops */
-
- return false;
-}
-
/*
* Find the offset of the first non-NOP instruction starting at @offset
* but no further than @len.
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 0a8c0a4a5423..32bc583e6fd4 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -17,6 +17,7 @@
#include <linux/kdebug.h>
#include <asm/processor.h>
#include <asm/insn.h>
+#include <asm/insn-eval.h>
#include <asm/mmu_context.h>
#include <asm/nops.h>
@@ -1158,35 +1159,12 @@ void arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr)
mmap_write_unlock(mm);
}
-static bool insn_is_nop(struct insn *insn)
-{
- return insn->opcode.nbytes == 1 && insn->opcode.bytes[0] == 0x90;
-}
-
-static bool insn_is_nopl(struct insn *insn)
-{
- if (insn->opcode.nbytes != 2)
- return false;
-
- if (insn->opcode.bytes[0] != 0x0f || insn->opcode.bytes[1] != 0x1f)
- return false;
-
- if (!insn->modrm.nbytes)
- return false;
-
- if (X86_MODRM_REG(insn->modrm.bytes[0]) != 0)
- return false;
-
- /* 0f 1f /0 - NOPL */
- return true;
-}
-
static bool can_optimize(struct insn *insn, unsigned long vaddr)
{
if (!insn->x86_64 || insn->length != 5)
return false;
- if (!insn_is_nop(insn) && !insn_is_nopl(insn))
+ if (!insn_is_nop(insn))
return false;
/* We can't do cross page atomic writes yet. */
@@ -1428,17 +1406,13 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
insn_byte_t p;
int i;
- /* x86_nops[insn->length]; same as jmp with .offs = 0 */
- if (insn->length <= ASM_NOP_MAX &&
- !memcmp(insn->kaddr, x86_nops[insn->length], insn->length))
+ if (insn_is_nop(insn))
goto setup;
switch (opc1) {
case 0xeb: /* jmp 8 */
case 0xe9: /* jmp 32 */
break;
- case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */
- goto setup;
case 0xe8: /* call relative */
branch_clear_offset(auprobe, insn);
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 4e385cbfd444..3a67f1c5582c 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -1676,3 +1676,95 @@ enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes)
return type;
}
+
+bool insn_is_nop(struct insn *insn)
+{
+ u8 rex, rex_b = 0, rex_x = 0, rex_r = 0, rex_w = 0;
+ u8 modrm, modrm_mod, modrm_reg, modrm_rm;
+ u8 sib = 0, sib_scale, sib_index, sib_base;
+
+ if (insn->rex_prefix.nbytes) {
+ rex = insn->rex_prefix.bytes[0];
+ rex_w = !!X86_REX_W(rex);
+ rex_r = !!X86_REX_R(rex);
+ rex_x = !!X86_REX_X(rex);
+ rex_b = !!X86_REX_B(rex);
+ }
+
+ if (insn->modrm.nbytes) {
+ modrm = insn->modrm.bytes[0];
+ modrm_mod = X86_MODRM_MOD(modrm);
+ modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r;
+ modrm_rm = X86_MODRM_RM(modrm) + 8*rex_b;
+ }
+
+ if (insn->sib.nbytes) {
+ sib = insn->sib.bytes[0];
+ sib_scale = X86_SIB_SCALE(sib);
+ sib_index = X86_SIB_INDEX(sib) + 8*rex_x;
+ sib_base = X86_SIB_BASE(sib) + 8*rex_b;
+
+ modrm_rm = sib_base;
+ }
+
+ switch (insn->opcode.bytes[0]) {
+ case 0x0f: /* 2nd byte */
+ break;
+
+ case 0x89: /* MOV */
+ if (modrm_mod != 3) /* register-direct */
+ return false;
+
+ if (insn->x86_64 && !rex_w) /* native size */
+ return false;
+
+ for (int i = 0; i < insn->prefixes.nbytes; i++) {
+ if (insn->prefixes.bytes[i] == 0x66) /* OSP */
+ return false;
+ }
+
+ return modrm_reg == modrm_rm; /* MOV %reg, %reg */
+
+ case 0x8d: /* LEA */
+ if (modrm_mod == 0 || modrm_mod == 3) /* register-indirect with disp */
+ return false;
+
+ if (insn->x86_64 && !rex_w) /* native size */
+ return false;
+
+ if (insn->displacement.value != 0)
+ return false;
+
+ if (sib & (sib_scale != 0 || sib_index != 4)) /* (%reg, %eiz, 1) */
+ return false;
+
+ for (int i = 0; i < insn->prefixes.nbytes; i++) {
+ if (insn->prefixes.bytes[i] != 0x3e) /* DS */
+ return false;
+ }
+
+ return modrm_reg == modrm_rm; /* LEA 0(%reg), %reg */
+
+ case 0x90: /* NOP */
+ for (int i = 0; i < insn->prefixes.nbytes; i++) {
+ if (insn->prefixes.bytes[i] == 0xf3) /* REP */
+ return false; /* REP NOP -- PAUSE */
+ }
+ return true;
+
+ case 0xe9: /* JMP.d32 */
+ case 0xeb: /* JMP.d8 */
+ return insn->immediate.value == 0; /* JMP +0 */
+
+ default:
+ return false;
+ }
+
+ switch (insn->opcode.bytes[1]) {
+ case 0x1f:
+ return modrm_reg == 0; /* 0f 1f /0 -- NOPL */
+
+ default:
+ return false;
+ }
+}
Powered by blists - more mailing lists