lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sat, 07 Feb 2015 19:08:04 +0900
From:	Masami Hiramatsu <masami.hiramatsu.pt@...achi.com>
To:	Wang Nan <wangnan0@...wei.com>
Cc:	Ingo Molnar <mingo@...hat.com>, hpa@...or.com, x86@...nel.org,
	linux-kernel@...r.kernel.org, linux@....linux.org.uk,
	anil.s.keshavamurthy@...el.com, davem@...emloft.net,
	ananth@...ibm.com, dave.long@...aro.org, tixy@...aro.org,
	lizefan@...wei.com, linux-arm-kernel@...ts.infradead.org,
	rostedt@...dmis.org
Subject: Re: [RFC PATCH] x86: kprobes: enable optmize relative call insn

(2015/02/06 19:30), Wang Nan wrote:
> In reply to Masami Hiramatsu's question on my previous early kprobe
> patch series at:
> 
> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-January/315771.html
> 
> that on x86, early kprobe's applications range is limited by the type of
> optimizable instructions, I made this patch, which enables optimizing
> relative call instructions by introducing specific template for them.
> Such instructions make up about 7% of the kernel. In addition, when
> ftrace is enabled, funtion entry will be it, so early kprobe will be
> much useful than before.

Sorry, I couldn't understand this part. If you put kprobe on ftrace site
after ftrace enabled, it uses ftrace directly, instead of int3 or jump.
Anyway, ftrace-site instruction should be controlled by ftrace, not kprobes.

> 
> The relationship between ftrace and kprobe is interesting. Under normal
> circumstances, kprobe utilizes ftrace. However, under early case,
> there's no way to tell whether the probing instruction is an ftrace
> entry. Another possible method on that is to move part of ftrace init
> ahead. However, to allow optimize more instructions should also be
> good for performance.
> 
> Masami, I'd like to hear your reply on it. Do you think this patch is
> also useful for the normal cases?

Expanding "optimizability" is good, but I don't like add new asm-templates
which reduces maintainability less. Perhaps, we'd better start with reviewing
can_boost table again...

Thank you,

> 
> Signed-off-by: Wang Nan <wangnan0@...wei.com>
> ---
>  arch/x86/include/asm/kprobes.h | 17 +++++++--
>  arch/x86/kernel/kprobes/opt.c  | 82 ++++++++++++++++++++++++++++++++++++++++--
>  2 files changed, 94 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
> index 017f4bb..3627694 100644
> --- a/arch/x86/include/asm/kprobes.h
> +++ b/arch/x86/include/asm/kprobes.h
> @@ -31,6 +31,7 @@
>  #define RELATIVEJUMP_OPCODE 0xe9
>  #define RELATIVEJUMP_SIZE 5
>  #define RELATIVECALL_OPCODE 0xe8
> +#define RELATIVECALL_SIZE 5
>  #define RELATIVE_ADDR_SIZE 4
>  #define MAX_STACK_SIZE 64
>  #define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE)
> @@ -38,8 +39,10 @@
>  #ifdef __ASSEMBLY__
>  
>  #define KPROBE_OPCODE_SIZE     1
> +#define OPT_CALL_TEMPLATE_SIZE (optprobe_call_template_end - \
> +		optprobe_call_template_entry)
>  #define MAX_OPTINSN_SIZE ((optprobe_template_end - optprobe_template_entry) + \
> -	MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
> +	MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE + OPT_CALL_TEMPLATE_SIZE)
>  
>  #ifdef CONFIG_EARLY_KPROBES
>  # define EARLY_KPROBES_CODES_AREA					\
> @@ -81,10 +84,20 @@ extern __visible kprobe_opcode_t optprobe_template_entry;
>  extern __visible kprobe_opcode_t optprobe_template_val;
>  extern __visible kprobe_opcode_t optprobe_template_call;
>  extern __visible kprobe_opcode_t optprobe_template_end;
> +
> +extern __visible kprobe_opcode_t optprobe_call_template_entry;
> +extern __visible kprobe_opcode_t optprobe_call_template_val_destaddr;
> +extern __visible kprobe_opcode_t optprobe_call_template_val_retaddr;
> +extern __visible kprobe_opcode_t optprobe_call_template_end;
> +
> +#define OPT_CALL_TEMPLATE_SIZE				\
> +	((unsigned long)&optprobe_call_template_end -	\
> +	 (unsigned long)&optprobe_call_template_entry)
>  #define MAX_OPTINSN_SIZE 				\
>  	(((unsigned long)&optprobe_template_end -	\
>  	  (unsigned long)&optprobe_template_entry) +	\
> -	 MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE)
> +	 MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE +	\
> +	 OPT_CALL_TEMPLATE_SIZE)
>  
>  extern const int kretprobe_blacklist_size;
>  
> diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
> index dc5fccb..05dd06f 100644
> --- a/arch/x86/kernel/kprobes/opt.c
> +++ b/arch/x86/kernel/kprobes/opt.c
> @@ -39,6 +39,23 @@
>  
>  #include "common.h"
>  
> +static inline bool
> +is_relcall(u8 *addr)
> +{
> +	return (*(u8 *)(addr) == RELATIVECALL_OPCODE);
> +}
> +
> +static inline void *
> +get_relcall_target(u8 *addr)
> +{
> +	struct __arch_relative_insn {
> +		u8 op;
> +		s32 raddr;
> +	} __packed *insn;
> +	insn = (struct __arch_relative_insn *)addr;
> +	return (void *)((unsigned long)addr + RELATIVECALL_SIZE + insn->raddr);
> +}
> +
>  unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
>  {
>  	struct optimized_kprobe *op;
> @@ -89,6 +106,48 @@ static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val)
>  }
>  
>  asm (
> +#ifdef CONFIG_X86_64
> +			".global optprobe_call_template_entry\n"
> +			"optprobe_call_template_entry:"
> +			"pushq %rdi\n"
> +			".global optprobe_call_template_val_destaddr\n"
> +			"optprobe_call_template_val_destaddr:"
> +			ASM_NOP5
> +			ASM_NOP5
> +			"pushq %rdi\n"
> +			".global optprobe_call_template_val_retaddr\n"
> +			"optprobe_call_template_val_retaddr:"
> +			ASM_NOP5
> +			ASM_NOP5
> +			"xchgq %rdi, 8(%rsp)\n"
> +			"retq\n"
> +#else /* CONFIG_X86_32 */
> +			".global optprobe_call_template_entry\n"
> +			"optprobe_call_template_entry:"
> +			"push %edi\n"
> +			".global optprobe_call_template_val_destaddr\n"
> +			"optprobe_call_template_val_destaddr:"
> +			ASM_NOP5
> +			"push %edi\n"
> +			".global optprobe_call_template_val_retaddr\n"
> +			"optprobe_call_template_val_retaddr:"
> +			ASM_NOP5
> +			"xchg %edi, 4(%esp)\n"
> +			"ret\n"
> +#endif
> +			".global optprobe_call_template_end\n"
> +			"optprobe_call_template_end:\n"
> +);
> +
> +#define __OPTCALL_TMPL_MOVE_DESTADDR_IDX \
> +	((long)&optprobe_call_template_val_destaddr - (long)&optprobe_call_template_entry)
> +#define __OPTCALL_TMPL_MOVE_RETADDR_IDX \
> +	((long)&optprobe_call_template_val_retaddr - (long)&optprobe_call_template_entry)
> +#define __OPTCALL_TMPL_END_IDX \
> +	((long)&optprobe_call_template_end - (long)&optprobe_call_template_entry)
> +#define OPTCALL_TMPL_SIZE	__OPTCALL_TMPL_END_IDX
> +
> +asm (
>  			".global optprobe_template_entry\n"
>  			"optprobe_template_entry:\n"
>  #ifdef CONFIG_X86_64
> @@ -135,6 +194,10 @@ asm (
>  #define TMPL_END_IDX \
>  	((long)&optprobe_template_end - (long)&optprobe_template_entry)
>  
> +#define TMPL_OPTCALL_MOVE_DESTADDR_IDX	(TMPL_END_IDX + __OPTCALL_TMPL_MOVE_DESTADDR_IDX)
> +#define TMPL_OPTCALL_MOVE_RETADDR_IDX	(TMPL_END_IDX + __OPTCALL_TMPL_MOVE_RETADDR_IDX)
> +#define TMPL_OPTCALL_END_IDX	(TMPL_END_IDX + __OPTCALL_TMPL_END_IDX)
> +
>  #define INT3_SIZE sizeof(kprobe_opcode_t)
>  
>  /* Optimized kprobe call back function: called from optinsn */
> @@ -175,6 +238,12 @@ static int copy_optimized_instructions(u8 *dest, u8 *src)
>  {
>  	int len = 0, ret;
>  
> +	if (is_relcall(src)) {
> +		memcpy(dest, &optprobe_call_template_entry,
> +				OPTCALL_TMPL_SIZE);
> +		return OPTCALL_TMPL_SIZE;
> +	}
> +
>  	while (len < RELATIVEJUMP_SIZE) {
>  		ret = __copy_instruction(dest + len, src + len);
>  		if (!ret || !can_boost(dest + len))
> @@ -365,9 +434,16 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
>  	/* Set probe function call */
>  	synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
>  
> -	/* Set returning jmp instruction at the tail of out-of-line buffer */
> -	synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
> -			   (u8 *)op->kp.addr + op->optinsn.size);
> +	if (!is_relcall(op->kp.addr)) {
> +		/* Set returning jmp instruction at the tail of out-of-line buffer */
> +		synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
> +				(u8 *)op->kp.addr + op->optinsn.size);
> +	} else {
> +		synthesize_set_arg1(buf + TMPL_OPTCALL_MOVE_DESTADDR_IDX,
> +				(unsigned long)(get_relcall_target(op->kp.addr)));
> +		synthesize_set_arg1(buf + TMPL_OPTCALL_MOVE_RETADDR_IDX,
> +				(unsigned long)(op->kp.addr + RELATIVECALL_SIZE));
> +	}
>  
>  	flush_icache_range((unsigned long) buf,
>  			   (unsigned long) buf + TMPL_END_IDX +
> 


-- 
Masami HIRAMATSU
Software Platform Research Dept. Linux Technology Research Center
Hitachi, Ltd., Yokohama Research Laboratory
E-mail: masami.hiramatsu.pt@...achi.com


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ