lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 23 Jun 2014 12:54:46 -0700
From:	Andy Lutomirski <luto@...capital.net>
To:	Qiaowei Ren <qiaowei.ren@...el.com>,
	"H. Peter Anvin" <hpa@...or.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...hat.com>,
	Dave Hansen <dave.hansen@...el.com>
CC:	x86@...nel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v6 04/10] x86, mpx: hook #BR exception handler to allocate
 bound tables

On 06/18/2014 02:44 AM, Qiaowei Ren wrote:
> This patch handles a #BR exception for non-existent tables by
> carving the space out of the normal processes address space
> (essentially calling mmap() from inside the kernel) and then
> pointing the bounds-directory over to it.
> 
> The tables need to be accessed and controlled by userspace
> because the compiler generates instructions for MPX-enabled
> code which frequently store and retrieve entries from the bounds
> tables. Any direct kernel involvement (like a syscall) to access
> the tables would destroy performance since these are so frequent.
> 
> The tables are carved out of userspace because we have no better
> spot to put them. For each pointer which is being tracked by MPX,
> the bounds tables contain 4 longs worth of data, and the tables
> are indexed virtually. If we were to preallocate the tables, we
> would theoretically need to allocate 4x the virtual space that
> we have available for userspace somewhere else. We don't have
> that room in the kernel address space.
> 
> Signed-off-by: Qiaowei Ren <qiaowei.ren@...el.com>
> ---
>  arch/x86/include/asm/mpx.h |   20 ++++++++++++++
>  arch/x86/kernel/Makefile   |    1 +
>  arch/x86/kernel/mpx.c      |   63 ++++++++++++++++++++++++++++++++++++++++++++
>  arch/x86/kernel/traps.c    |   56 ++++++++++++++++++++++++++++++++++++++-
>  4 files changed, 139 insertions(+), 1 deletions(-)
>  create mode 100644 arch/x86/kernel/mpx.c
> 
> diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
> index 5725ac4..b7598ac 100644
> --- a/arch/x86/include/asm/mpx.h
> +++ b/arch/x86/include/asm/mpx.h
> @@ -18,6 +18,8 @@
>  #define MPX_BT_ENTRY_SHIFT	5
>  #define MPX_IGN_BITS		3
>  
> +#define MPX_BD_ENTRY_TAIL	3
> +
>  #else
>  
>  #define MPX_BD_ENTRY_OFFSET	20
> @@ -26,13 +28,31 @@
>  #define MPX_BT_ENTRY_SHIFT	4
>  #define MPX_IGN_BITS		2
>  
> +#define MPX_BD_ENTRY_TAIL	2
> +
>  #endif
>  
> +#define MPX_BNDSTA_TAIL		2
> +#define MPX_BNDCFG_TAIL		12
> +#define MPX_BNDSTA_ADDR_MASK	(~((1UL<<MPX_BNDSTA_TAIL)-1))
> +#define MPX_BNDCFG_ADDR_MASK	(~((1UL<<MPX_BNDCFG_TAIL)-1))
> +#define MPX_BT_ADDR_MASK	(~((1UL<<MPX_BD_ENTRY_TAIL)-1))
> +
>  #define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT))
>  #define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT))
>  
>  #define MPX_BNDSTA_ERROR_CODE	0x3
> +#define MPX_BD_ENTRY_VALID_FLAG	0x1
>  
>  unsigned long mpx_mmap(unsigned long len);
>  
> +#ifdef CONFIG_X86_INTEL_MPX
> +int do_mpx_bt_fault(struct xsave_struct *xsave_buf);
> +#else
> +static inline int do_mpx_bt_fault(struct xsave_struct *xsave_buf)
> +{
> +	return -EINVAL;
> +}
> +#endif /* CONFIG_X86_INTEL_MPX */
> +
>  #endif /* _ASM_X86_MPX_H */
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index f4d9600..3e81aed 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -41,6 +41,7 @@ obj-$(CONFIG_PREEMPT)	+= preempt.o
>  
>  obj-y				+= process.o
>  obj-y				+= i387.o xsave.o
> +obj-$(CONFIG_X86_INTEL_MPX)	+= mpx.o
>  obj-y				+= ptrace.o
>  obj-$(CONFIG_X86_32)		+= tls.o
>  obj-$(CONFIG_IA32_EMULATION)	+= tls.o
> diff --git a/arch/x86/kernel/mpx.c b/arch/x86/kernel/mpx.c
> new file mode 100644
> index 0000000..4230c7b
> --- /dev/null
> +++ b/arch/x86/kernel/mpx.c
> @@ -0,0 +1,63 @@
> +#include <linux/kernel.h>
> +#include <linux/syscalls.h>
> +#include <asm/mpx.h>
> +
> +static int allocate_bt(long __user *bd_entry)
> +{
> +	unsigned long bt_addr, old_val = 0;
> +	int ret = 0;
> +
> +	bt_addr = mpx_mmap(MPX_BT_SIZE_BYTES);
> +	if (IS_ERR((void *)bt_addr)) {
> +		pr_err("Bounds table allocation failed at entry addr %p\n",
> +				bd_entry);
> +		return bt_addr;
> +	}
> +	bt_addr = (bt_addr & MPX_BT_ADDR_MASK) | MPX_BD_ENTRY_VALID_FLAG;
> +
> +	ret = user_atomic_cmpxchg_inatomic(&old_val, bd_entry, 0, bt_addr);
> +	if (ret)
> +		goto out;
> +
> +	/*
> +	 * there is a existing bounds table pointed at this bounds
> +	 * directory entry, and so we need to free the bounds table
> +	 * allocated just now.
> +	 */
> +	if (old_val)
> +		goto out;
> +
> +	pr_debug("Allocate bounds table %lx at entry %p\n",
> +			bt_addr, bd_entry);
> +	return 0;
> +
> +out:
> +	vm_munmap(bt_addr & MPX_BT_ADDR_MASK, MPX_BT_SIZE_BYTES);
> +	return ret;
> +}
> +
> +/*
> + * When a BNDSTX instruction attempts to save bounds to a BD entry
> + * with the lack of the valid bit being set, a #BR is generated.
> + * This is an indication that no BT exists for this entry. In this
> + * case the fault handler will allocate a new BT.
> + *
> + * With 32-bit mode, the size of BD is 4MB, and the size of each
> + * bound table is 16KB. With 64-bit mode, the size of BD is 2GB,
> + * and the size of each bound table is 4MB.
> + */
> +int do_mpx_bt_fault(struct xsave_struct *xsave_buf)
> +{
> +	unsigned long status;
> +	unsigned long bd_entry, bd_base;
> +
> +	bd_base = xsave_buf->bndcsr.cfg_reg_u & MPX_BNDCFG_ADDR_MASK;
> +	status = xsave_buf->bndcsr.status_reg;
> +
> +	bd_entry = status & MPX_BNDSTA_ADDR_MASK;
> +	if ((bd_entry < bd_base) ||
> +		(bd_entry >= bd_base + MPX_BD_SIZE_BYTES))
> +		return -EINVAL;
> +
> +	return allocate_bt((long __user *)bd_entry);
> +}
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index f73b5d4..35b9b29 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -59,6 +59,7 @@
>  #include <asm/fixmap.h>
>  #include <asm/mach_traps.h>
>  #include <asm/alternative.h>
> +#include <asm/mpx.h>
>  
>  #ifdef CONFIG_X86_64
>  #include <asm/x86_init.h>
> @@ -213,7 +214,6 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
>  
>  DO_ERROR_INFO(X86_TRAP_DE,     SIGFPE,  "divide error",			divide_error,		     FPE_INTDIV, regs->ip )
>  DO_ERROR     (X86_TRAP_OF,     SIGSEGV, "overflow",			overflow					  )
> -DO_ERROR     (X86_TRAP_BR,     SIGSEGV, "bounds",			bounds						  )
>  DO_ERROR_INFO(X86_TRAP_UD,     SIGILL,  "invalid opcode",		invalid_op,		     ILL_ILLOPN, regs->ip )
>  DO_ERROR     (X86_TRAP_OLD_MF, SIGFPE,  "coprocessor segment overrun",	coprocessor_segment_overrun			  )
>  DO_ERROR     (X86_TRAP_TS,     SIGSEGV, "invalid TSS",			invalid_TSS					  )
> @@ -263,6 +263,60 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
>  }
>  #endif
>  
> +dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
> +{
> +	enum ctx_state prev_state;
> +	unsigned long status;
> +	struct xsave_struct *xsave_buf;
> +	struct task_struct *tsk = current;
> +
> +	prev_state = exception_enter();
> +	if (notify_die(DIE_TRAP, "bounds", regs, error_code,
> +			X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
> +		goto exit;
> +	conditional_sti(regs);
> +
> +	if (!user_mode(regs))
> +		die("bounds", regs, error_code);

Does this need to be user_mode_vm?

> +
> +	if (!cpu_has_mpx) {
> +		/* The exception is not from Intel MPX */
> +		do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL);
> +		goto exit;
> +	}
> +
> +	fpu_xsave(&tsk->thread.fpu);
> +	xsave_buf = &(tsk->thread.fpu.state->xsave);
> +	status = xsave_buf->bndcsr.status_reg;
> +
> +	/*
> +	 * The error code field of the BNDSTATUS register communicates status
> +	 * information of a bound range exception #BR or operation involving
> +	 * bound directory.
> +	 */
> +	switch (status & MPX_BNDSTA_ERROR_CODE) {
> +	case 2:
> +		/*
> +		 * Bound directory has invalid entry.
> +		 * No signal will be sent to the user space.

This comment is a lie.

> +		 */
> +		if (do_mpx_bt_fault(xsave_buf))
> +			force_sig(SIGBUS, tsk);

Would it make sense to assign and use a new si_code value here?

--Andy
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ