lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 7 Feb 2023 14:38:13 +0300
From:   "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     x86@...nel.org, Borislav Petkov <bp@...en8.de>,
        Masami Hiramatsu <mhiramat@...nel.org>,
        Andrew Cooper <Andrew.Cooper3@...rix.com>,
        Josh Poimboeuf <jpoimboe@...hat.com>,
        linux-kernel@...r.kernel.org
Subject: Re: [PATCH] x86/alternative: Support relocations in alternatives

On Mon, Feb 06, 2023 at 04:05:38PM +0100, Peter Zijlstra wrote:
> 
> A little while ago someone (Kirill) ran into the whole 'alternatives don't
> do relocations nonsense' again and I got annoyed enough to actually look
> at the code.
> 
> Since the whole alternative machinery already fully decodes the
> instructions it is simple enough to adjust immediates and displacement
> when needed. Specifically, the immediates for IP modifying instructions
> (JMP, CALL, Jcc) and the displacement for RIP-relative instructions.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> ---
>  arch/x86/kernel/alternative.c    |  248 +++++++++++++++++++++++++--------------
>  tools/objtool/arch/x86/special.c |    8 -
>  2 files changed, 161 insertions(+), 95 deletions(-)
> 
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -123,71 +123,6 @@ extern s32 __smp_locks[], __smp_locks_en
>  void text_poke_early(void *addr, const void *opcode, size_t len);
>  
>  /*
> - * Are we looking at a near JMP with a 1 or 4-byte displacement.
> - */
> -static inline bool is_jmp(const u8 opcode)
> -{
> -	return opcode == 0xeb || opcode == 0xe9;
> -}
> -
> -static void __init_or_module
> -recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
> -{
> -	u8 *next_rip, *tgt_rip;
> -	s32 n_dspl, o_dspl;
> -	int repl_len;
> -
> -	if (a->replacementlen != 5)
> -		return;
> -
> -	o_dspl = *(s32 *)(insn_buff + 1);
> -
> -	/* next_rip of the replacement JMP */
> -	next_rip = repl_insn + a->replacementlen;
> -	/* target rip of the replacement JMP */
> -	tgt_rip  = next_rip + o_dspl;
> -	n_dspl = tgt_rip - orig_insn;
> -
> -	DPRINTK("target RIP: %px, new_displ: 0x%x", tgt_rip, n_dspl);
> -
> -	if (tgt_rip - orig_insn >= 0) {
> -		if (n_dspl - 2 <= 127)
> -			goto two_byte_jmp;
> -		else
> -			goto five_byte_jmp;
> -	/* negative offset */
> -	} else {
> -		if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
> -			goto two_byte_jmp;
> -		else
> -			goto five_byte_jmp;
> -	}
> -
> -two_byte_jmp:
> -	n_dspl -= 2;
> -
> -	insn_buff[0] = 0xeb;
> -	insn_buff[1] = (s8)n_dspl;
> -	add_nops(insn_buff + 2, 3);
> -
> -	repl_len = 2;
> -	goto done;
> -
> -five_byte_jmp:
> -	n_dspl -= 5;
> -
> -	insn_buff[0] = 0xe9;
> -	*(s32 *)&insn_buff[1] = n_dspl;
> -
> -	repl_len = 5;
> -
> -done:
> -
> -	DPRINTK("final displ: 0x%08x, JMP 0x%lx",
> -		n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
> -}
> -
> -/*
>   * optimize_nops_range() - Optimize a sequence of single byte NOPs (0x90)
>   *
>   * @instr: instruction byte stream
> @@ -254,6 +189,127 @@ static void __init_or_module noinline op
>  }
>  
>  /*
> + * What we start with is:
> + *
> + *   src_imm = target - src_next_ip                  (1)
> + *
> + * what we want is:
> + *
> + *   dst_imm = target - dst_next_ip                  (2)
> + *
> + * so what we do is rework (1) as an expression for target like:
> + *
> + *   target = src_imm + src_next_ip                  (1a)
> + *
> + * and substitute in (2) to get:
> + *
> + *   dst_imm = (src_imm + src_next_ip) - dst_next_ip (3)
> + *
> + * Now, since the instruction stream is 'identical' at src and dst (we copy
> + * after all) we can state that:
> + *
> + *   src_next_ip = src + ip_offset
> + *   dst_next_ip = dst + ip_offset                   (4)
> + *
> + * Substitute (4) in (3) and observe ip_offset being cancelled out to
> + * obtain:
> + *
> + *   dst_imm = src_imm + (src + ip_offset) - (dst + ip_offset)
> + *           = src_imm + src - dst + ip_offset - ip_offset
> + *           = src_imm + src - dst                   (5)
> + *
> + * IOW, only the relative displacement of the code block matters.
> + */
> +
> +#define apply_reloc_n(n_, p_, d_)				\
> +	do {							\
> +		s32 v = *(s##n_ *)(p_);				\
> +		v += (d_);					\
> +		BUG_ON((v >> 31) != (v >> (n_-1)));		\
> +		*(s##n_ *)(p_) = (s##n_)v;			\
> +	} while (0)
> +
> +
> +static __always_inline
> +void apply_reloc(int n, void *ptr, uintptr_t diff)
> +{
> +	switch (n) {
> +	case 1: apply_reloc_n(8, ptr, diff); break;
> +	case 2: apply_reloc_n(16, ptr, diff); break;
> +	case 4: apply_reloc_n(32, ptr, diff); break;
> +	default: BUG();
> +	}
> +}
> +
> +static __always_inline
> +bool need_reloc(unsigned long offset, u8 *src, size_t src_len)
> +{
> +	u8 *target = src + offset;
> +	/*
> +	 * If the target is inside the patched block, it's relative to the
> +	 * block itself and does not need relocation.
> +	 */
> +	return (target < src || target > src + src_len);
> +}
> +
> +static void __init_or_module noinline
> +apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
> +{
> +	for (int next, i = 0; i < len; len = next) {

'len = next'? I guess it suppose to be 'i = next', right? Otherwise it
hangs for me.

And if I fix this, I get this crash later on:

	BUG: unable to handle page fault for address: ffffffff53fb2f2e
	#PF: supervisor instruction fetch in kernel mode
	#PF: error_code(0x0010) - not-present page
	PGD 283b067 P4D 283b067 PUD 0
	Oops: 0010 [#1] PREEMPT SMP PTI
	CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.2.0-rc7-00012-ga7fd2d79efc5-dirty #1736
	Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
	RIP: 0010:0xffffffff53fb2f2e
	Code: Unable to access opcode bytes at 0xffffffff53fb2f04.
	RSP: 0000:ffffffff82803d58 EFLAGS: 00010206
	RAX: 0000000000000bc9 RBX: 0000000000000005 RCX: 0000000000000000
	RDX: 0000000000000000 RSI: ffffffff826be1f9 RDI: ffffffff82662566
	RBP: ffffffff82803d8a R08: 0000000000000001 R09: 0000000000000001
	R10: 0000000000000005 R11: 00000000ffffffff R12: ffffffff836f1a37
	R13: ffffffff8388c65d R14: ffffffff8388bef0 R15: ffffffff82803d8a
	FS:  0000000000000000(0000) GS:ffff8881f9c00000(0000) knlGS:0000000000000000
	CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
	CR2: ffffffff53fb2f04 CR3: 0000000002836001 CR4: 0000000000370ef0
	DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
	DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
	Call Trace:
	 <TASK>
	 ? text_poke_early+0x46/0x5d
	 ? text_poke_early+0x3a/0x5d
	 ? apply_alternatives+0x258/0x287
	 ? lock_release+0x1d8/0x2e0
	 ? _raw_spin_unlock_irqrestore+0x2b/0x50
	 ? synchronize_rcu+0x1b0/0x1d0
	 ? synchronize_rcu+0x16c/0x1d0
	 ? synchronize_rcu+0x16c/0x1d0
	 ? lockdep_hardirqs_on+0x79/0x100
	 ? synchronize_rcu+0x16c/0x1d0
	 ? lock_release+0x13a/0x2e0
	 ? _raw_spin_unlock_irqrestore+0x2b/0x50
	 ? _raw_spin_unlock_irqrestore+0x2b/0x50
	 ? lockdep_hardirqs_on+0x79/0x100
	 ? alternative_instructions+0x35/0xd1
	 ? check_bugs+0xd6f/0xdc3
	 ? start_kernel+0x674/0x6a8
	 ? secondary_startup_64_no_verify+0xe0/0xeb
	 </TASK>
	CR2: ffffffff53fb2f2e
	---[ end trace 0000000000000000 ]---

It happens when applied on top of current Linus' tree. My config is
attached just in case.

Maybe you forgot to fold in some fixup, I donno.

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

View attachment ".config" of type "text/plain" (143743 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ