lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 23 Jan 2023 17:44:31 -0500
From:   Steven Rostedt <rostedt@...dmis.org>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     mingo@...nel.org, tglx@...utronix.de, bp@...en8.de,
        dave.hansen@...ux.intel.com, x86@...nel.org, hpa@...or.com,
        jpoimboe@...nel.org, jbaron@...mai.com, ardb@...nel.org,
        linux-kernel@...r.kernel.org, erhard_f@...lbox.org,
        ndesaulniers@...gle.com, mhiramat@...nel.org, sandipan.das@....com
Subject: Re: [PATCH 3/3] x86/static_call: Add support for Jcc tail-calls

On Mon, 23 Jan 2023 21:59:18 +0100
Peter Zijlstra <peterz@...radead.org> wrote:

> Clang likes to create conditional tail calls like:
> 
> 0000000000000350 <amd_pmu_add_event>:
> 350:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1) 351: R_X86_64_NONE      __fentry__-0x4
> 355:       48 83 bf 20 01 00 00 00         cmpq   $0x0,0x120(%rdi)
> 35d:       0f 85 00 00 00 00       jne    363 <amd_pmu_add_event+0x13>     35f: R_X86_64_PLT32     __SCT__amd_pmu_branch_add-0x4
> 363:       e9 00 00 00 00          jmp    368 <amd_pmu_add_event+0x18>     364: R_X86_64_PLT32     __x86_return_thunk-0x4
> 

Just to confirm, as it's not clear if this is the static call site or one
of the functions that is being called.

I'm guessing that this is an issue because clang optimizes the static call
site, right?

> Teach the in-line static call text patching about this.
> 
> Notably, since there is no conditional-ret, in that caes patch the Jcc

					      "in that case"

-- Steve

> to point at an empty stub function that does the ret -- or the return
> thunk when needed.
> 
> Reported-by: "Erhard F." <erhard_f@...lbox.org>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> Reviewed-by: Masami Hiramatsu (Google) <mhiramat@...nel.org>
> ---
>  arch/x86/kernel/static_call.c |   50 +++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 47 insertions(+), 3 deletions(-)
> 
> --- a/arch/x86/kernel/static_call.c
> +++ b/arch/x86/kernel/static_call.c
> @@ -9,6 +9,7 @@ enum insn_type {
>  	NOP = 1,  /* site cond-call */
>  	JMP = 2,  /* tramp / site tail-call */
>  	RET = 3,  /* tramp / site cond-tail-call */
> +	JCC = 4,
>  };
>  
>  /*
> @@ -25,12 +26,40 @@ static const u8 xor5rax[] = { 0x2e, 0x2e
>  
>  static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
>  
> +static u8 __is_Jcc(u8 *insn) /* Jcc.d32 */
> +{
> +	u8 ret = 0;
> +
> +	if (insn[0] == 0x0f) {
> +		u8 tmp = insn[1];
> +		if ((tmp & 0xf0) == 0x80)
> +			ret = tmp;
> +	}
> +
> +	return ret;
> +}
> +
> +extern void __static_call_return(void);
> +
> +asm (".global __static_call_return\n\t"
> +     ".type __static_call_return, @function\n\t"
> +     ASM_FUNC_ALIGN "\n\t"
> +     "__static_call_return:\n\t"
> +     ANNOTATE_NOENDBR
> +     ANNOTATE_RETPOLINE_SAFE
> +     "ret; int3\n\t"
> +     ".size __static_call_return, . - __static_call_return \n\t");
> +
>  static void __ref __static_call_transform(void *insn, enum insn_type type,
>  					  void *func, bool modinit)
>  {
>  	const void *emulate = NULL;
>  	int size = CALL_INSN_SIZE;
>  	const void *code;
> +	u8 op, buf[6];
> +
> +	if ((type == JMP || type == RET) && (op = __is_Jcc(insn)))
> +		type = JCC;
>  
>  	switch (type) {
>  	case CALL:
> @@ -57,6 +86,20 @@ static void __ref __static_call_transfor
>  		else
>  			code = &retinsn;
>  		break;
> +
> +	case JCC:
> +		if (!func) {
> +			func = __static_call_return;
> +			if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
> +				func = x86_return_thunk;
> +		}
> +
> +		buf[0] = 0x0f;
> +		__text_gen_insn(buf+1, op, insn+1, func, 5);
> +		code = buf;
> +		size = 6;
> +
> +		break;
>  	}
>  
>  	if (memcmp(insn, code, size) == 0)
> @@ -68,9 +111,9 @@ static void __ref __static_call_transfor
>  	text_poke_bp(insn, code, size, emulate);
>  }
>  
> -static void __static_call_validate(void *insn, bool tail, bool tramp)
> +static void __static_call_validate(u8 *insn, bool tail, bool tramp)
>  {
> -	u8 opcode = *(u8 *)insn;
> +	u8 opcode = insn[0];
>  
>  	if (tramp && memcmp(insn+5, tramp_ud, 3)) {
>  		pr_err("trampoline signature fail");
> @@ -79,7 +122,8 @@ static void __static_call_validate(void
>  
>  	if (tail) {
>  		if (opcode == JMP32_INSN_OPCODE ||
> -		    opcode == RET_INSN_OPCODE)
> +		    opcode == RET_INSN_OPCODE ||
> +		    __is_Jcc(insn))
>  			return;
>  	} else {
>  		if (opcode == CALL_INSN_OPCODE ||
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ