lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230120162018.4e48f78c@yea>
Date:   Fri, 20 Jan 2023 16:20:18 +0100
From:   "Erhard F." <erhard_f@...lbox.org>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     Sandipan Das <sandipan.das@....com>, linux-kernel@...r.kernel.org,
        Nick Desaulniers <ndesaulniers@...gle.com>,
        Joao Moreira <joao@...rdrivepizza.com>,
        Masami Hiramatsu <mhiramat@...nel.org>, x86@...nel.org,
        Josh Poimboeuf <jpoimboe@...hat.com>
Subject: Re: [bisected] clang 15 built kernel fails to boot, stuck at
 "Loading Linux 6.1.1 ...", gcc 12 built kernel with same config boots fine

On Fri, 20 Jan 2023 15:57:47 +0100
Peter Zijlstra <peterz@...radead.org> wrote:

> diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
> index f4b87f08f5c5..29832c338cdc 100644
> --- a/arch/x86/include/asm/text-patching.h
> +++ b/arch/x86/include/asm/text-patching.h
> @@ -184,6 +184,37 @@ void int3_emulate_ret(struct pt_regs *regs)
>  	unsigned long ip = int3_emulate_pop(regs);
>  	int3_emulate_jmp(regs, ip);
>  }
> +
> +static __always_inline
> +void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp)
> +{
> +	static const unsigned long jcc_mask[6] = {
> +		[0] = X86_EFLAGS_OF,
> +		[1] = X86_EFLAGS_CF,
> +		[2] = X86_EFLAGS_ZF,
> +		[3] = X86_EFLAGS_CF | X86_EFLAGS_ZF,
> +		[4] = X86_EFLAGS_SF,
> +		[5] = X86_EFLAGS_PF,
> +	};
> +
> +	bool invert = cc & 1;
> +	bool match;
> +
> +	if (cc < 0xc) {
> +		match = regs->flags & jcc_mask[cc >> 1];
> +	} else {
> +		match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
> +			((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
> +		if (cc >= 0xe)
> +			match = match || (regs->flags & X86_EFLAGS_ZF);
> +	}
> +
> +	if ((match && !invert) || (!match && invert))
> +		ip += disp;
> +
> +	int3_emulate_jmp(regs, ip);
> +}
> +
>  #endif /* !CONFIG_UML_X86 */
>  
>  #endif /* _ASM_X86_TEXT_PATCHING_H */
> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> index 7d8c3cbde368..cd632a0171b4 100644
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -1772,6 +1772,11 @@ void text_poke_sync(void)
>  	on_each_cpu(do_sync_core, NULL, 1);
>  }
>  
> +/*
> + * NOTE: crazy scheme to allow patching Jcc.d32 but not increase the size of
> + * this thing. When len == 6 everything is prefixed with 0x0f and we map
> + * opcode to Jcc.d8, using len to distinguish.
> + */
>  struct text_poke_loc {
>  	/* addr := _stext + rel_addr */
>  	s32 rel_addr;
> @@ -1893,6 +1898,10 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
>  		int3_emulate_jmp(regs, (long)ip + tp->disp);
>  		break;
>  
> +	case 0x70 ... 0x7f: /* Jcc */
> +		int3_emulate_jcc(regs, tp->opcode & 0xf, (long)ip, tp->disp);
> +		break;
> +
>  	default:
>  		BUG();
>  	}
> @@ -1966,16 +1975,26 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
>  	 * Second step: update all but the first byte of the patched range.
>  	 */
>  	for (do_sync = 0, i = 0; i < nr_entries; i++) {
> -		u8 old[POKE_MAX_OPCODE_SIZE] = { tp[i].old, };
> +		u8 old[POKE_MAX_OPCODE_SIZE+1] = { tp[i].old, };
> +		u8 _new[POKE_MAX_OPCODE_SIZE+1];
> +		const u8 *new = tp[i].text;
>  		int len = tp[i].len;
>  
>  		if (len - INT3_INSN_SIZE > 0) {
>  			memcpy(old + INT3_INSN_SIZE,
>  			       text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
>  			       len - INT3_INSN_SIZE);
> +
> +			if (len == 6) {
> +				_new[0] = 0x0f;
> +				memcpy(_new + 1, new, 5);
> +				new = _new;
> +			}
> +
>  			text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
> -				  (const char *)tp[i].text + INT3_INSN_SIZE,
> +				  new + INT3_INSN_SIZE,
>  				  len - INT3_INSN_SIZE);
> +
>  			do_sync++;
>  		}
>  
> @@ -2003,8 +2022,7 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
>  		 * The old instruction is recorded so that the event can be
>  		 * processed forwards or backwards.
>  		 */
> -		perf_event_text_poke(text_poke_addr(&tp[i]), old, len,
> -				     tp[i].text, len);
> +		perf_event_text_poke(text_poke_addr(&tp[i]), old, len, new, len);
>  	}
>  
>  	if (do_sync) {
> @@ -2021,10 +2039,15 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
>  	 * replacing opcode.
>  	 */
>  	for (do_sync = 0, i = 0; i < nr_entries; i++) {
> -		if (tp[i].text[0] == INT3_INSN_OPCODE)
> +		u8 byte = tp[i].text[0];
> +
> +		if (tp[i].len == 6)
> +			byte = 0x0f;
> +
> +		if (byte == INT3_INSN_OPCODE)
>  			continue;
>  
> -		text_poke(text_poke_addr(&tp[i]), tp[i].text, INT3_INSN_SIZE);
> +		text_poke(text_poke_addr(&tp[i]), &byte, INT3_INSN_SIZE);
>  		do_sync++;
>  	}
>  
> @@ -2042,9 +2065,11 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
>  			       const void *opcode, size_t len, const void *emulate)
>  {
>  	struct insn insn;
> -	int ret, i;
> +	int ret, i = 0;
>  
> -	memcpy((void *)tp->text, opcode, len);
> +	if (len == 6)
> +		i = 1;
> +	memcpy((void *)tp->text, opcode+i, len-i);
>  	if (!emulate)
>  		emulate = opcode;
>  
> @@ -2055,6 +2080,13 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
>  	tp->len = len;
>  	tp->opcode = insn.opcode.bytes[0];
>  
> +	if (is_jcc32(&insn)) {
> +		/*
> +		 * Map Jcc.d32 onto Jcc.d8 and use len to distinguish.
> +		 */
> +		tp->opcode = insn.opcode.bytes[1] - 0x10;
> +	}
> +
>  	switch (tp->opcode) {
>  	case RET_INSN_OPCODE:
>  	case JMP32_INSN_OPCODE:
> @@ -2071,7 +2103,6 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
>  		BUG_ON(len != insn.length);
>  	}
>  
> -
>  	switch (tp->opcode) {
>  	case INT3_INSN_OPCODE:
>  	case RET_INSN_OPCODE:
> @@ -2080,6 +2111,7 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
>  	case CALL_INSN_OPCODE:
>  	case JMP32_INSN_OPCODE:
>  	case JMP8_INSN_OPCODE:
> +	case 0x70 ... 0x7f: /* Jcc */
>  		tp->disp = insn.immediate.value;
>  		break;
>  
> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> index b36f3c367cb2..f2b2f7545ecb 100644
> --- a/arch/x86/kernel/kprobes/core.c
> +++ b/arch/x86/kernel/kprobes/core.c
> @@ -464,50 +464,26 @@ static void kprobe_emulate_call(struct kprobe *p, struct pt_regs *regs)
>  }
>  NOKPROBE_SYMBOL(kprobe_emulate_call);
>  
> -static nokprobe_inline
> -void __kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs, bool cond)
> +static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs)
>  {
>  	unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
>  
> -	if (cond)
> -		ip += p->ainsn.rel32;
> +	ip += p->ainsn.rel32;
>  	int3_emulate_jmp(regs, ip);
>  }
> -
> -static void kprobe_emulate_jmp(struct kprobe *p, struct pt_regs *regs)
> -{
> -	__kprobe_emulate_jmp(p, regs, true);
> -}
>  NOKPROBE_SYMBOL(kprobe_emulate_jmp);
>  
> -static const unsigned long jcc_mask[6] = {
> -	[0] = X86_EFLAGS_OF,
> -	[1] = X86_EFLAGS_CF,
> -	[2] = X86_EFLAGS_ZF,
> -	[3] = X86_EFLAGS_CF | X86_EFLAGS_ZF,
> -	[4] = X86_EFLAGS_SF,
> -	[5] = X86_EFLAGS_PF,
> -};
> -
>  static void kprobe_emulate_jcc(struct kprobe *p, struct pt_regs *regs)
>  {
> -	bool invert = p->ainsn.jcc.type & 1;
> -	bool match;
> -
> -	if (p->ainsn.jcc.type < 0xc) {
> -		match = regs->flags & jcc_mask[p->ainsn.jcc.type >> 1];
> -	} else {
> -		match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^
> -			((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT);
> -		if (p->ainsn.jcc.type >= 0xe)
> -			match = match || (regs->flags & X86_EFLAGS_ZF);
> -	}
> -	__kprobe_emulate_jmp(p, regs, (match && !invert) || (!match && invert));
> +	int3_emulate_jcc(regs, p->ainsn.jcc.type,
> +			 regs->ip - INT3_INSN_SIZE + p->ainsn.size,
> +			 p->ainsn.rel32);
>  }
>  NOKPROBE_SYMBOL(kprobe_emulate_jcc);
>  
>  static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs)
>  {
> +	unsigned long ip = regs->ip - INT3_INSN_SIZE + p->ainsn.size;
>  	bool match;
>  
>  	if (p->ainsn.loop.type != 3) {	/* LOOP* */
> @@ -535,7 +511,9 @@ static void kprobe_emulate_loop(struct kprobe *p, struct pt_regs *regs)
>  	else if (p->ainsn.loop.type == 1)	/* LOOPE */
>  		match = match && (regs->flags & X86_EFLAGS_ZF);
>  
> -	__kprobe_emulate_jmp(p, regs, match);
> +	if (match)
> +		ip += p->ainsn.rel32;
> +	int3_emulate_jmp(regs, ip);
>  }
>  NOKPROBE_SYMBOL(kprobe_emulate_loop);
>  
> diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
> index 2ebc338980bc..deb909f2b62f 100644
> --- a/arch/x86/kernel/static_call.c
> +++ b/arch/x86/kernel/static_call.c
> @@ -9,6 +9,7 @@ enum insn_type {
>  	NOP = 1,  /* site cond-call */
>  	JMP = 2,  /* tramp / site tail-call */
>  	RET = 3,  /* tramp / site cond-tail-call */
> +	JCC = 4,
>  };
>  
>  /*
> @@ -25,12 +26,38 @@ static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 };
>  
>  static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
>  
> +static u8 __is_Jcc(u8 *insn) /* Jcc.d32 */
> +{
> +	u8 ret = 0;
> +
> +	if (insn[0] == 0x0f) {
> +		u8 tmp = insn[1];
> +		if ((tmp & 0xf0) == 0x80)
> +			ret = tmp;
> +	}
> +
> +	return ret;
> +}
> +
> +extern void __static_call_return(void);
> +
> +asm (".global __static_call_return\n\t"
> +     ".type __static_call_return, @function\n\t"
> +     ASM_FUNC_ALIGN "\n\t"
> +     "__static_call_return:\n\t"
> +     "ret; int3\n\t"
> +     ".size __static_call_return, . - __static_call_return \n\t");
> +
>  static void __ref __static_call_transform(void *insn, enum insn_type type,
>  					  void *func, bool modinit)
>  {
>  	const void *emulate = NULL;
>  	int size = CALL_INSN_SIZE;
>  	const void *code;
> +	u8 op, buf[6];
> +
> +	if ((type == JMP || type == RET) && (op = __is_Jcc(insn)))
> +		type = JCC;
>  
>  	switch (type) {
>  	case CALL:
> @@ -57,6 +84,20 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
>  		else
>  			code = &retinsn;
>  		break;
> +
> +	case JCC:
> +		if (!func) {
> +			func = __static_call_return;
> +			if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
> +				func = x86_return_thunk;
> +		}
> +
> +		buf[0] = 0x0f;
> +		__text_gen_insn(buf+1, op, insn+1, func, 5);
> +		code = buf;
> +		size = 6;
> +
> +		break;
>  	}
>  
>  	if (memcmp(insn, code, size) == 0)
> @@ -79,7 +120,8 @@ static void __static_call_validate(void *insn, bool tail, bool tramp)
>  
>  	if (tail) {
>  		if (opcode == JMP32_INSN_OPCODE ||
> -		    opcode == RET_INSN_OPCODE)
> +		    opcode == RET_INSN_OPCODE ||
> +		    __is_Jcc(insn))
>  			return;
>  	} else {
>  		if (opcode == CALL_INSN_OPCODE ||

Success! You nailed it.

Applied your patch on top of v6.2-rc4 and with that I got ye olde Thinkpad A275 with it's borked custom config + clang -Os(stupid) booting again. No side effects to be seen so far..

Many thanks!

Regards,
Erhard

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ