lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190402154329.scp7i7uqevubgwrz@treble>
Date:   Tue, 2 Apr 2019 10:43:29 -0500
From:   Josh Poimboeuf <jpoimboe@...hat.com>
To:     Thomas Gleixner <tglx@...utronix.de>
Cc:     LKML <linux-kernel@...r.kernel.org>, x86@...nel.org,
        Andy Lutomirski <luto@...nel.org>
Subject: Re: [patch 15/14] x86/dumpstack/64: Speedup in_exception_stack()

On Tue, Apr 02, 2019 at 12:19:46PM +0200, Thomas Gleixner wrote:
> The current implementation of in_exception_stack() iterates over the
> exception stacks array. Most of the time this is an useless exercise, but
> even for the actual use cases (perf and ftrace) it takes at least 2
> iterations to get to the NMI stack.
> 
> As the exception stacks and the guard pages are page aligned the loop can
> be avoided completely.
> 
> Add a initial check whether the stack pointer is inside the full exception
> stack area and leave early if not.
> 
> Create a lookup table which describes the stack area. The table index is
> the page offset from the beginning of the exception stacks. So for any
> given stack pointer the page offset is computed and a lookup in the
> description table is performed. If it is inside a guard page, return. If
> not, use the descriptor to fill in the info structure.
> 
> The table is filled at compile time with nasty macro magic and for the
> !KASAN case the interesting page descriptors exactly fit into a single
> cache line. Just the last guard page descriptor is in the next cacheline,
> but that should not be accessed in the regular case.
> 
> Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
> ---
>  arch/x86/kernel/dumpstack_64.c |   97 +++++++++++++++++++++++++++--------------
>  1 file changed, 66 insertions(+), 31 deletions(-)
> 
> --- a/arch/x86/kernel/dumpstack_64.c
> +++ b/arch/x86/kernel/dumpstack_64.c
> @@ -48,50 +48,85 @@ const char *stack_type_name(enum stack_t
>  	return NULL;
>  }
>  
> -struct estack_layout {
> -	unsigned int	begin;
> -	unsigned int	end;
> +#define ESTACK_S(st)						\
> +	(offsetof(struct cea_exception_stacks, st## _stack))
> +
> +#define ESTACK_E(st)							\
> +	(offsetof(struct cea_exception_stacks, st## _stack_guard))
> +
> +#define PAGENR(offs)	((offs) / PAGE_SIZE)
> +#define PAGERANGE(st)	PAGENR(ESTACK_S(st)) ... PAGENR(ESTACK_E(st) - 1)
> +
> +#if EXCEPTION_STKSZ == PAGE_SIZE
> +# define CONDRANGE(st)	PAGENR(ESTACK_S(st))
> +#else
> +# define CONDRANGE(st)	PAGERANGE(st)
> +#endif
> +
> +/**
> + * struct estack_pages - Page descriptor for exception stacks
> + * @offs:	Offset from the start of the exception stack area
> + * @size:	Size of the exception stack
> + * @type:	Type to store in the stack_info struct
> + */
> +struct estack_pages {
> +	u32	offs;
> +	u16	size;
> +	u16	type;
>  };
>  
> -#define	ESTACK_ENTRY(x)	{						  \
> -	.begin	= offsetof(struct cea_exception_stacks, x## _stack),	  \
> -	.end	= offsetof(struct cea_exception_stacks, x## _stack_guard) \
> +#define ESTACK_PAGE(ist, est) {				\
> +	.offs	= ESTACK_S(est),			\
> +	.size	= ESTACK_E(est) - ESTACK_S(est),	\
> +	.type	= STACK_TYPE_EXCEPTION + ist,		\
>  	}
>  
> -static const struct estack_layout layout[N_EXCEPTION_STACKS] = {
> -	[ DOUBLEFAULT_IST	]	= ESTACK_ENTRY(DF),
> -	[ NMI_IST		]	= ESTACK_ENTRY(NMI),
> -	[ DEBUG_IST		]	= ESTACK_ENTRY(DB),
> -	[ MCE_IST		]	= ESTACK_ENTRY(MCE),
> +#define ESTACK_PAGES	(sizeof(struct cea_exception_stacks) / PAGE_SIZE)
> +
> +/*
> + * Array of exception stack page descriptors. If the stack is larger than
> + * PAGE_SIZE, all pages covering a particular stack will have the same
> + * info.
> + */
> +static const struct estack_pages estack_pages[ESTACK_PAGES] ____cacheline_aligned = {
> +	[CONDRANGE(DF)]		= ESTACK_PAGE(DOUBLEFAULT_IST, DF),
> +	[CONDRANGE(NMI)]	= ESTACK_PAGE(NMI_IST, NMI),
> +	[PAGERANGE(DB)]		= ESTACK_PAGE(DEBUG_IST, DB),
> +	[CONDRANGE(MCE)]	= ESTACK_PAGE(MCE_IST, MCE),

It would be nice if the *_IST macro naming aligned with the struct
cea_exception_stacks field naming.  Then you could just do, e.g.
ESTACKPAGE(DF).

Also it's a bit unfortunate that some of the stack size knowledge is
hard-coded here, i.e #DB always being > 1 page and non-#DB being
sometimes 1 page.

>  };
>  
>  static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
>  {
> -	unsigned long estacks, begin, end, stk = (unsigned long)stack;
> +	unsigned long begin, end, stk = (unsigned long)stack;
> +	const struct estack_pages *ep;
>  	struct pt_regs *regs;
>  	unsigned int k;
>  
>  	BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
>  
> -	estacks = (unsigned long)__this_cpu_read(cea_exception_stacks);
> -
> -	for (k = 0; k < N_EXCEPTION_STACKS; k++) {
> -		begin = estacks + layout[k].begin;
> -		end   = estacks + layout[k].end;
> -		regs  = (struct pt_regs *)end - 1;
> -
> -		if (stk <= begin || stk >= end)
> -			continue;
> -
> -		info->type	= STACK_TYPE_EXCEPTION + k;
> -		info->begin	= (unsigned long *)begin;
> -		info->end	= (unsigned long *)end;
> -		info->next_sp	= (unsigned long *)regs->sp;
> -
> -		return true;
> -	}
> -
> -	return false;
> +	begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
> +	end = begin + sizeof(struct cea_exception_stacks);
> +	/* Bail if @stack is outside the exception stack area. */
> +	if (stk <= begin || stk >= end)
> +		return false;

This check is the most important piece.  Exception stack dumps are quite
rare, so this ensures an early exit in most cases regardless of whether
there's a loop below.

> +
> +	/* Calc page offset from start of exception stacks */
> +	k = (stk - begin) >> PAGE_SHIFT;
> +	/* Lookup the page descriptor */
> +	ep = &estack_pages[k];
> +	/* Guard page? */
> +	if (unlikely(!ep->size))
> +		return false;
> +
> +	begin += (unsigned long)ep->offs;
> +	end = begin + (unsigned long)ep->size;
> +	regs = (struct pt_regs *)end - 1;
> +
> +	info->type	= ep->type;
> +	info->begin	= (unsigned long *)begin;
> +	info->end	= (unsigned long *)end;
> +	info->next_sp	= (unsigned long *)regs->sp;
> +	return true;

With the above "(stk <= begin || stk >= end)" check, removing the loop
becomes not all that important since exception stack dumps are quite
rare and not performance sensitive.  With all the macros this code
becomes a little more obtuse, so I'm not sure whether removal of the
loop is a net positive.

>  }
>  
>  static bool in_irq_stack(unsigned long *stack, struct stack_info *info)

-- 
Josh

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ