lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190201103433.5clveqiciw2m2emb@kshutemo-mobl1>
Date:   Fri, 1 Feb 2019 13:34:33 +0300
From:   "Kirill A. Shutemov" <kirill@...temov.name>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     kan.liang@...ux.intel.com, acme@...nel.org, tglx@...utronix.de,
        mingo@...hat.com, linux-kernel@...r.kernel.org, eranian@...gle.com,
        jolsa@...hat.com, namhyung@...nel.org, ak@...ux.intel.com,
        luto@...capital.net, Vlastimil Babka <vbabka@...e.cz>
Subject: Re: [PATCH V4 01/13] perf/core, x86: Add PERF_SAMPLE_DATA_PAGE_SIZE

On Fri, Feb 01, 2019 at 10:22:40AM +0100, Peter Zijlstra wrote:
> On Thu, Jan 31, 2019 at 12:27:54PM -0800, kan.liang@...ux.intel.com wrote:
> > diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> > index 374a197..229a73b 100644
> > --- a/arch/x86/events/core.c
> > +++ b/arch/x86/events/core.c
> > @@ -2578,3 +2578,34 @@ void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
> >  	cap->events_mask_len	= x86_pmu.events_mask_len;
> >  }
> >  EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
> > +
> > +u64 perf_get_page_size(u64 virt)
> > +{
> > +	unsigned long flags;
> > +	unsigned int level;
> > +	pte_t *pte;
> > +
> > +	if (!virt)
> > +		return 0;
> > +
> > +	/*
> > +	 * Interrupts are disabled, so it prevents any tear down
> > +	 * of the page tables.
> > +	 * See the comment near struct mmu_table_batch.
> > +	 */
> > +	local_irq_save(flags);
> > +	if (virt >= TASK_SIZE)
> > +		pte = lookup_address(virt, &level);
> > +	else {
> > +		if (current->mm) {
> > +			pte = lookup_address_in_pgd(pgd_offset(current->mm, virt),
> > +						    virt, &level);
> > +		} else
> > +			level = PG_LEVEL_NUM;
> > +	}
> > +	local_irq_restore(flags);
> > +	if (level >= PG_LEVEL_NUM)
> > +		return 0;
> > +
> > +	return (u64)page_level_size(level);
> > +}
> 
> *sigh* there really isn't anything x86 specific there.
> 
> > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > index 236bb8d..d233f45 100644
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -6352,6 +6358,12 @@ static u64 perf_virt_to_phys(u64 virt)
> >  	return phys_addr;
> >  }
> >  
> > +/* Return page size of given virtual address. IRQ-safe required. */
> > +u64 __weak perf_get_page_size(u64 virt)
> > +{
> > +	return 0;
> > +}
> > +
> >  static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
> >  
> >  struct perf_callchain_entry *
> 
> How about something like so instead?
> 
> (completely untested, will likely make your grandma eat puppies)
> 
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -6357,10 +6357,72 @@ static u64 perf_virt_to_phys(u64 virt)
>  	return phys_addr;
>  }
>  
> -/* Return page size of given virtual address. IRQ-safe required. */
> -u64 __weak perf_get_page_size(u64 virt)
> +static u64 __perf_get_page_size(struct mm_struct *mm, unsigned long addr)
>  {
> -	return 0;
> +	pgd_t *pgd;
> +	p4d_t *p4d;
> +	pud_t *pud;
> +	pmd_t *pmd;
> +
> +	pgd = pgd_offset(mm, addr);
> +	if (pgd_none(*pgd))
> +		return 0;
> +
> +	p4d = p4d_offset(pgd, addr);
> +	if (p4d_none(*p4d))
> +		return 0;
> +
> +	if (p4d_large(*p4d));

We dont have 512GiB pages yet.

> +		return 1ULL << P4D_SHIFT;

		return P4D_SIZE;

And the same P?D_SIZE below.

> +
> +	if (!p4d_present(*p4d))
> +		return 0;

No need to check p4d_none() *and* p4d_present(). Just p4d_present() should
be enough. Large is still suppose to be present. The same for other levels.

> +
> +	pud = pud_offset(p4d, addr);
> +	if (pud_none(*pud))
> +		return 0;
> +
> +	if (pud_large(*pud))
> +		return 1ULL << PUD_SHIFT;
> +
> +	if (!pud_present(*pud))
> +		return 0;
> +
> +	pmd = pmd_offset(pud, addr);
> +	if (pmd_none(*pmd))
> +		return 0;
> +
> +	if (pmd_large(*pmd))
> +		return 1ULL << PMD_SHIFT;
> +
> +	if (!pmd_present(*pmd))
> +		return 0;
> +
> +	return 1ULL << PAGE_SHIFT;
> +}
> +
> +static u64 perf_get_page_size(unsigned long addr)
> +{
> +	struct mm_struct *mm;
> +	unsigned long flags;
> +	u64 ret;
> +
> +	/*
> +	 * Software page-table walkers must disable IRQs, see asm-generic/tlb.h.
> +	 */
> +	local_irq_save(flags);
> +	mm = current->mm;
> +	if (!mm) {
> +		/*
> +		 * For kernel threads and the like, use init_mm so that
> +		 * we can find kernel memory.
> +		 */
> +		mm = &init_mm;
> +	}
> +	ret = __perf_get_page_size(mm, addr);
> +	local_irq_restore(flags);
> +
> +	return ret;
>  }
>  
>  static struct perf_callchain_entry __empty_callchain = { .nr = 0, };

-- 
 Kirill A. Shutemov

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ