lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 17 Oct 2013 12:04:39 -0400
From:	Don Zickus <dzickus@...hat.com>
To:	Peter Zijlstra <peterz@...radead.org>
Cc:	Andi Kleen <ak@...ux.intel.com>, dave.hansen@...ux.intel.com,
	eranian@...gle.com, jmario@...hat.com,
	linux-kernel@...r.kernel.org, acme@...radead.org, mingo@...nel.org
Subject: Re: [PATCH] perf, x86: Optimize intel_pmu_pebs_fixup_ip()

On Thu, Oct 17, 2013 at 12:00:34PM -0400, Don Zickus wrote:
> On Thu, Oct 17, 2013 at 11:41:45AM +0200, Peter Zijlstra wrote:
> > On Thu, Oct 17, 2013 at 01:07:12AM +0200, Peter Zijlstra wrote:
> > > On Wed, Oct 16, 2013 at 11:03:19PM +0200, Peter Zijlstra wrote:
> > > > Anyway; if you want to have a go at this, feel free.
> > > 
> > > OK, couldn't help myself; completely untested patch below.
> > > 
> > > I think the full once copy it best for the decode as even with the below
> > > interface you'd end up doing a lot of duplicate copying due to the
> > > variable size insn mess.
> > 
> > Duh, a very small tweak would make it work for that and avoid most of
> > the memcpy()s.
> 
> Hmm, for some reason, even though copy_from_user_nmi_iter is super fast
> now, the while(to < ip) count increased dramatically and so did my
> latency. :-(

I take that back the copy_from_user_nmi_iter is not super fast, I just had
a bug in how I accumulate total time.  So some how this approach is slower
that yesterdays.

Cheers,
Don

> 
> Not sure what happened between your pretty patch yesterday and this
> direction.
> 
> Cheers,
> Don
> 
> > 
> > ---
> >  arch/x86/include/asm/uaccess.h            | 13 +++++
> >  arch/x86/kernel/cpu/perf_event.c          | 32 +++++------
> >  arch/x86/kernel/cpu/perf_event_intel_ds.c | 21 ++++---
> >  arch/x86/lib/usercopy.c                   | 91 ++++++++++++++++++++++++++++++-
> >  arch/x86/mm/gup.c                         | 63 +++++++++++++--------
> >  5 files changed, 165 insertions(+), 55 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
> > index 5838fa911aa0..a341de0eadd1 100644
> > --- a/arch/x86/include/asm/uaccess.h
> > +++ b/arch/x86/include/asm/uaccess.h
> > @@ -516,6 +516,19 @@ struct __large_struct { unsigned long buf[100]; };
> >  
> >  extern unsigned long
> >  copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
> > +
> > +struct copy_from_user_nmi_state {
> > +	void *map;
> > +	unsigned long address;
> > +	unsigned long flags;
> > +};
> > +
> > +extern void *
> > +copy_from_user_nmi_iter(void *to, const void __user *from,
> > +			unsigned long n, struct copy_from_user_nmi_state *state);
> > +extern void
> > +copy_from_user_nmi_end(struct copy_from_user_nmi_state *state);
> > +
> >  extern __must_check long
> >  strncpy_from_user(char *dst, const char __user *src, long count);
> >  
> > diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> > index 19c9d86d2f04..c917fe470861 100644
> > --- a/arch/x86/kernel/cpu/perf_event.c
> > +++ b/arch/x86/kernel/cpu/perf_event.c
> > @@ -1979,8 +1979,9 @@ static inline int
> >  perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
> >  {
> >  	/* 32-bit process in 64-bit kernel. */
> > +	struct copy_from_user_nmi_state state = { NULL };
> >  	unsigned long ss_base, cs_base;
> > -	struct stack_frame_ia32 frame;
> > +	struct stack_frame_ia32 frame, *f;
> >  	const void __user *fp;
> >  
> >  	if (!test_thread_flag(TIF_IA32))
> > @@ -1991,20 +1992,17 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
> >  
> >  	fp = compat_ptr(ss_base + regs->bp);
> >  	while (entry->nr < PERF_MAX_STACK_DEPTH) {
> > -		unsigned long bytes;
> > -		frame.next_frame     = 0;
> > -		frame.return_address = 0;
> > -
> > -		bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
> > -		if (bytes != sizeof(frame))
> > +		f = copy_from_user_nmi_iter(&frame, fp, sizeof(frame), &state);
> > +		if (!f)
> >  			break;
> >  
> >  		if (!valid_user_frame(fp, sizeof(frame)))
> >  			break;
> >  
> > -		perf_callchain_store(entry, cs_base + frame.return_address);
> > -		fp = compat_ptr(ss_base + frame.next_frame);
> > +		perf_callchain_store(entry, cs_base + f->return_address);
> > +		fp = compat_ptr(ss_base + f->next_frame);
> >  	}
> > +	copy_from_user_nmi_end(&state);
> >  	return 1;
> >  }
> >  #else
> > @@ -2018,7 +2016,8 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
> >  void
> >  perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
> >  {
> > -	struct stack_frame frame;
> > +	struct copy_from_user_nmi_state state = { NULL };
> > +	struct stack_frame frame, *f;
> >  	const void __user *fp;
> >  
> >  	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
> > @@ -2043,20 +2042,17 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
> >  		return;
> >  
> >  	while (entry->nr < PERF_MAX_STACK_DEPTH) {
> > -		unsigned long bytes;
> > -		frame.next_frame	     = NULL;
> > -		frame.return_address = 0;
> > -
> > -		bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
> > -		if (bytes != sizeof(frame))
> > +		f = copy_from_user_nmi_iter(&frame, fp, sizeof(frame), &state);
> > +		if (!f)
> >  			break;
> >  
> >  		if (!valid_user_frame(fp, sizeof(frame)))
> >  			break;
> >  
> > -		perf_callchain_store(entry, frame.return_address);
> > -		fp = frame.next_frame;
> > +		perf_callchain_store(entry, f->return_address);
> > +		fp = f->next_frame;
> >  	}
> > +	copy_from_user_nmi_end(&state);
> >  }
> >  
> >  /*
> > diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> > index 32e9ed81cd00..5bd3f2091da9 100644
> > --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> > +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> > @@ -725,10 +725,14 @@ void intel_pmu_pebs_disable_all(void)
> >  static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
> >  {
> >  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> > +	struct copy_from_user_nmi_state state = { NULL };
> >  	unsigned long from = cpuc->lbr_entries[0].from;
> >  	unsigned long old_to, to = cpuc->lbr_entries[0].to;
> >  	unsigned long ip = regs->ip;
> > +	u8 buf[MAX_INSN_SIZE];
> > +	struct insn insn;
> >  	int is_64bit = 0;
> > +	void *kaddr;
> >  
> >  	/*
> >  	 * We don't need to fixup if the PEBS assist is fault like
> > @@ -764,19 +768,12 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
> >  	}
> >  
> >  	do {
> > -		struct insn insn;
> > -		u8 buf[MAX_INSN_SIZE];
> > -		void *kaddr;
> > -
> >  		old_to = to;
> >  		if (!kernel_ip(ip)) {
> > -			int bytes, size = MAX_INSN_SIZE;
> > -
> > -			bytes = copy_from_user_nmi(buf, (void __user *)to, size);
> > -			if (bytes != size)
> > -				return 0;
> > -
> > -			kaddr = buf;
> > +			kaddr = copy_from_user_nmi_iter(buf, (void __user *)to,
> > +							MAX_INSN_SIZE, &state);
> > +			if (!kaddr)
> > +				break;
> >  		} else
> >  			kaddr = (void *)to;
> >  
> > @@ -788,6 +785,8 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
> >  		to += insn.length;
> >  	} while (to < ip);
> >  
> > +	copy_from_user_nmi_end(&state);
> > +
> >  	if (to == ip) {
> >  		set_linear_ip(regs, old_to);
> >  		return 1;
> > diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
> > index 4f74d94c8d97..da6c36a8b842 100644
> > --- a/arch/x86/lib/usercopy.c
> > +++ b/arch/x86/lib/usercopy.c
> > @@ -10,6 +10,8 @@
> >  #include <asm/word-at-a-time.h>
> >  #include <linux/sched.h>
> >  
> > +extern int ___get_user_pages_fast(unsigned long start, int nr_pages, int flags,
> > +			  struct page **pages);
> >  /*
> >   * best effort, GUP based copy_from_user() that is NMI-safe
> >   */
> > @@ -18,6 +20,7 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
> >  {
> >  	unsigned long offset, addr = (unsigned long)from;
> >  	unsigned long size, len = 0;
> > +	unsigned long flags;
> >  	struct page *page;
> >  	void *map;
> >  	int ret;
> > @@ -26,9 +29,12 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
> >  		return len;
> >  
> >  	do {
> > -		ret = __get_user_pages_fast(addr, 1, 0, &page);
> > -		if (!ret)
> > +		local_irq_save(flags);
> > +		ret = ___get_user_pages_fast(addr, 1, 0, &page);
> > +		if (!ret) {
> > +			local_irq_restore(flags);
> >  			break;
> > +		}
> >  
> >  		offset = addr & (PAGE_SIZE - 1);
> >  		size = min(PAGE_SIZE - offset, n - len);
> > @@ -36,7 +42,7 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
> >  		map = kmap_atomic(page);
> >  		memcpy(to, map+offset, size);
> >  		kunmap_atomic(map);
> > -		put_page(page);
> > +		local_irq_restore(flags);
> >  
> >  		len  += size;
> >  		to   += size;
> > @@ -47,3 +53,82 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
> >  	return len;
> >  }
> >  EXPORT_SYMBOL_GPL(copy_from_user_nmi);
> > +
> > +void *copy_from_user_nmi_iter(void *to, const void __user *from,
> > +		unsigned long n, struct copy_from_user_nmi_state *state)
> > +{
> > +	unsigned long offset, addr = (unsigned long)from;
> > +	unsigned long size, len = 0;
> > +	unsigned long flags;
> > +	struct page *page;
> > +	void *map, *_to = to;
> > +	int ret;
> > +
> > +	if (__range_not_ok(from, n, TASK_SIZE))
> > +		return NULL;
> > +
> > +	if (state->map) {
> > +		if ((state->address >> PAGE_SHIFT) ==
> > +		    (addr >> PAGE_SHIFT)) {
> > +			flags = state->flags;
> > +			map = state->map;
> > +			goto got_page;
> > +		}
> > +		kunmap_atomic(state->map);
> > +		local_irq_restore(state->flags);
> > +	}
> > +
> > +	for (;;) {
> > +		local_irq_save(flags);
> > +		ret = ___get_user_pages_fast(addr, 1, 0, &page);
> > +		if (!ret) {
> > +			local_irq_restore(flags);
> > +			state->map = NULL;
> > +			return NULL;
> > +		}
> > +
> > +		map = kmap_atomic(page);
> > +
> > +got_page:
> > +		offset = addr & (PAGE_SIZE - 1);
> > +		size = min(PAGE_SIZE - offset, n - len);
> > +
> > +		/*
> > +		 * If the entire desired range falls within the one page
> > +		 * avoid the copy and return a pointer into the kmap.
> > +		 */
> > +		if (size == n) {
> > +			_to = map + offset;
> > +			break;
> > +		}
> > +
> > +		memcpy(to, map+offset, size);
> > +		len += size;
> > +
> > +		if (len == n)
> > +			break;
> > +
> > +		to   += size;
> > +		addr += size;
> > +
> > +		kunmap_atomic(map);
> > +		local_irq_restore(flags);
> > +	}
> > +
> > +	state->address = addr;
> > +	state->flags = flags;
> > +	state->map = map;
> > +
> > +	return _to;
> > +}
> > +EXPORT_SYMBOL_GPL(copy_from_user_nmi_iter);
> > +
> > +void copy_from_user_nmi_end(struct copy_from_user_nmi_state *state)
> > +{
> > +	if (state->map) {
> > +		kunmap_atomic(state->map);
> > +		local_irq_restore(state->flags);
> > +		state->map = NULL;
> > +	}
> > +}
> > +EXPORT_SYMBOL_GPL(copy_from_user_nmi_end);
> > diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
> > index dd74e46828c0..e383caf323e4 100644
> > --- a/arch/x86/mm/gup.c
> > +++ b/arch/x86/mm/gup.c
> > @@ -63,19 +63,22 @@ static inline pte_t gup_get_pte(pte_t *ptep)
> >  #endif
> >  }
> >  
> > +#define GUPF_GET	0x01
> > +#define GUPF_WRITE	0x02
> > +
> >  /*
> >   * The performance critical leaf functions are made noinline otherwise gcc
> >   * inlines everything into a single function which results in too much
> >   * register pressure.
> >   */
> >  static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
> > -		unsigned long end, int write, struct page **pages, int *nr)
> > +		unsigned long end, int flags, struct page **pages, int *nr)
> >  {
> >  	unsigned long mask;
> >  	pte_t *ptep;
> >  
> >  	mask = _PAGE_PRESENT|_PAGE_USER;
> > -	if (write)
> > +	if (flags & GUPF_WRITE)
> >  		mask |= _PAGE_RW;
> >  
> >  	ptep = pte_offset_map(&pmd, addr);
> > @@ -89,7 +92,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
> >  		}
> >  		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
> >  		page = pte_page(pte);
> > -		get_page(page);
> > +		if (flags & GUPF_GET)
> > +			get_page(page);
> >  		SetPageReferenced(page);
> >  		pages[*nr] = page;
> >  		(*nr)++;
> > @@ -109,7 +113,7 @@ static inline void get_head_page_multiple(struct page *page, int nr)
> >  }
> >  
> >  static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
> > -		unsigned long end, int write, struct page **pages, int *nr)
> > +		unsigned long end, int flags, struct page **pages, int *nr)
> >  {
> >  	unsigned long mask;
> >  	pte_t pte = *(pte_t *)&pmd;
> > @@ -117,7 +121,7 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
> >  	int refs;
> >  
> >  	mask = _PAGE_PRESENT|_PAGE_USER;
> > -	if (write)
> > +	if (flags & GUPF_WRITE)
> >  		mask |= _PAGE_RW;
> >  	if ((pte_flags(pte) & mask) != mask)
> >  		return 0;
> > @@ -131,19 +135,20 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
> >  	do {
> >  		VM_BUG_ON(compound_head(page) != head);
> >  		pages[*nr] = page;
> > -		if (PageTail(page))
> > +		if ((flags & GUPF_GET) && PageTail(page))
> >  			get_huge_page_tail(page);
> >  		(*nr)++;
> >  		page++;
> >  		refs++;
> >  	} while (addr += PAGE_SIZE, addr != end);
> > -	get_head_page_multiple(head, refs);
> > +	if (flags & GUPF_GET)
> > +		get_head_page_multiple(head, refs);
> >  
> >  	return 1;
> >  }
> >  
> >  static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
> > -		int write, struct page **pages, int *nr)
> > +		int flags, struct page **pages, int *nr)
> >  {
> >  	unsigned long next;
> >  	pmd_t *pmdp;
> > @@ -167,10 +172,10 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
> >  		if (pmd_none(pmd) || pmd_trans_splitting(pmd))
> >  			return 0;
> >  		if (unlikely(pmd_large(pmd))) {
> > -			if (!gup_huge_pmd(pmd, addr, next, write, pages, nr))
> > +			if (!gup_huge_pmd(pmd, addr, next, flags, pages, nr))
> >  				return 0;
> >  		} else {
> > -			if (!gup_pte_range(pmd, addr, next, write, pages, nr))
> > +			if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
> >  				return 0;
> >  		}
> >  	} while (pmdp++, addr = next, addr != end);
> > @@ -179,7 +184,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
> >  }
> >  
> >  static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
> > -		unsigned long end, int write, struct page **pages, int *nr)
> > +		unsigned long end, int flags, struct page **pages, int *nr)
> >  {
> >  	unsigned long mask;
> >  	pte_t pte = *(pte_t *)&pud;
> > @@ -187,7 +192,7 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
> >  	int refs;
> >  
> >  	mask = _PAGE_PRESENT|_PAGE_USER;
> > -	if (write)
> > +	if (flags & GUPF_WRITE)
> >  		mask |= _PAGE_RW;
> >  	if ((pte_flags(pte) & mask) != mask)
> >  		return 0;
> > @@ -201,19 +206,20 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
> >  	do {
> >  		VM_BUG_ON(compound_head(page) != head);
> >  		pages[*nr] = page;
> > -		if (PageTail(page))
> > +		if ((flags & GUPF_GET) && PageTail(page))
> >  			get_huge_page_tail(page);
> >  		(*nr)++;
> >  		page++;
> >  		refs++;
> >  	} while (addr += PAGE_SIZE, addr != end);
> > -	get_head_page_multiple(head, refs);
> > +	if (flags & GUPF_GET)
> > +		get_head_page_multiple(head, refs);
> >  
> >  	return 1;
> >  }
> >  
> >  static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
> > -			int write, struct page **pages, int *nr)
> > +			int flags, struct page **pages, int *nr)
> >  {
> >  	unsigned long next;
> >  	pud_t *pudp;
> > @@ -226,10 +232,10 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
> >  		if (pud_none(pud))
> >  			return 0;
> >  		if (unlikely(pud_large(pud))) {
> > -			if (!gup_huge_pud(pud, addr, next, write, pages, nr))
> > +			if (!gup_huge_pud(pud, addr, next, flags, pages, nr))
> >  				return 0;
> >  		} else {
> > -			if (!gup_pmd_range(pud, addr, next, write, pages, nr))
> > +			if (!gup_pmd_range(pud, addr, next, flags, pages, nr))
> >  				return 0;
> >  		}
> >  	} while (pudp++, addr = next, addr != end);
> > @@ -241,13 +247,12 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
> >   * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
> >   * back to the regular GUP.
> >   */
> > -int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> > +int ___get_user_pages_fast(unsigned long start, int nr_pages, int flags,
> >  			  struct page **pages)
> >  {
> >  	struct mm_struct *mm = current->mm;
> >  	unsigned long addr, len, end;
> >  	unsigned long next;
> > -	unsigned long flags;
> >  	pgd_t *pgdp;
> >  	int nr = 0;
> >  
> > @@ -255,7 +260,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> >  	addr = start;
> >  	len = (unsigned long) nr_pages << PAGE_SHIFT;
> >  	end = start + len;
> > -	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
> > +	if (unlikely(!access_ok((flags & GUPF_WRITE) ? VERIFY_WRITE : VERIFY_READ,
> >  					(void __user *)start, len)))
> >  		return 0;
> >  
> > @@ -277,7 +282,6 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> >  	 * (which we do on x86, with the above PAE exception), we can follow the
> >  	 * address down to the the page and take a ref on it.
> >  	 */
> > -	local_irq_save(flags);
> >  	pgdp = pgd_offset(mm, addr);
> >  	do {
> >  		pgd_t pgd = *pgdp;
> > @@ -285,14 +289,27 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> >  		next = pgd_addr_end(addr, end);
> >  		if (pgd_none(pgd))
> >  			break;
> > -		if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
> > +		if (!gup_pud_range(pgd, addr, next, flags, pages, &nr))
> >  			break;
> >  	} while (pgdp++, addr = next, addr != end);
> > -	local_irq_restore(flags);
> >  
> >  	return nr;
> >  }
> >  
> > +int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> > +			  struct page **pages)
> > +{
> > +	unsigned long flags;
> > +	int ret;
> > +
> > +	local_irq_save(flags);
> > +	ret = ___get_user_pages_fast(start, nr_pages,
> > +			GUPF_GET | (write ? GUPF_WRITE : 0), pages);
> > +	local_irq_restore(flags);
> > +
> > +	return ret;
> > +}
> > +
> >  /**
> >   * get_user_pages_fast() - pin user pages in memory
> >   * @start:	starting user address
> > 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ