lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1437F525-1510-45CD-A67F-13DF525083B4@nvidia.com>
Date:   Fri, 27 Mar 2020 13:23:07 -0400
From:   Zi Yan <ziy@...dia.com>
To:     "Kirill A. Shutemov" <kirill@...temov.name>
CC:     <akpm@...ux-foundation.org>, <linux-mm@...ck.org>,
        <linux-kernel@...r.kernel.org>,
        "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Subject: Re: [PATCH] thp: Simplify splitting PMD mapping huge zero page

On 27 Mar 2020, at 13:03, Kirill A. Shutemov wrote:

> Splitting PMD mapping huge zero page can be simplified a lot: we can
> just unmap it and fallback to PTE handling.

So we will have an extra page fault for the first read to each subpage, but nothing changes if the first access to a subpage is a write, right? BTW, what is the motivation for this code simplification?

Thanks.

>
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
> ---
>  mm/huge_memory.c | 57 ++++--------------------------------------------
>  1 file changed, 4 insertions(+), 53 deletions(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 42407e16bd80..ef6a6bcb291f 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2114,40 +2114,6 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
>  }
>  #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
>
> -static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
> -		unsigned long haddr, pmd_t *pmd)
> -{
> -	struct mm_struct *mm = vma->vm_mm;
> -	pgtable_t pgtable;
> -	pmd_t _pmd;
> -	int i;
> -
> -	/*
> -	 * Leave pmd empty until pte is filled note that it is fine to delay
> -	 * notification until mmu_notifier_invalidate_range_end() as we are
> -	 * replacing a zero pmd write protected page with a zero pte write
> -	 * protected page.
> -	 *
> -	 * See Documentation/vm/mmu_notifier.rst
> -	 */
> -	pmdp_huge_clear_flush(vma, haddr, pmd);
> -
> -	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
> -	pmd_populate(mm, &_pmd, pgtable);
> -
> -	for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
> -		pte_t *pte, entry;
> -		entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
> -		entry = pte_mkspecial(entry);
> -		pte = pte_offset_map(&_pmd, haddr);
> -		VM_BUG_ON(!pte_none(*pte));
> -		set_pte_at(mm, haddr, pte, entry);
> -		pte_unmap(pte);
> -	}
> -	smp_wmb(); /* make pte visible before pmd */
> -	pmd_populate(mm, pmd, pgtable);
> -}
> -
>  static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>  		unsigned long haddr, bool freeze)
>  {
> @@ -2167,7 +2133,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>
>  	count_vm_event(THP_SPLIT_PMD);
>
> -	if (!vma_is_anonymous(vma)) {
> +	if (!vma_is_anonymous(vma) || is_huge_zero_pmd(*pmd)) {
>  		_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
>  		/*
>  		 * We are going to unmap this huge page. So
> @@ -2175,7 +2141,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>  		 */
>  		if (arch_needs_pgtable_deposit())
>  			zap_deposited_table(mm, pmd);
> -		if (vma_is_dax(vma))
> +		if (vma_is_dax(vma) || is_huge_zero_pmd(*pmd))
>  			return;
>  		page = pmd_page(_pmd);
>  		if (!PageDirty(page) && pmd_dirty(_pmd))
> @@ -2186,17 +2152,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
>  		put_page(page);
>  		add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
>  		return;
> -	} else if (is_huge_zero_pmd(*pmd)) {
> -		/*
> -		 * FIXME: Do we want to invalidate secondary mmu by calling
> -		 * mmu_notifier_invalidate_range() see comments below inside
> -		 * __split_huge_pmd() ?
> -		 *
> -		 * We are going from a zero huge page write protected to zero
> -		 * small page also write protected so it does not seems useful
> -		 * to invalidate secondary mmu at this time.
> -		 */
> -		return __split_huge_zero_page_pmd(vma, haddr, pmd);
>  	}
>
>  	/*
> @@ -2339,13 +2294,9 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
>  	spin_unlock(ptl);
>  	/*
>  	 * No need to double call mmu_notifier->invalidate_range() callback.
> -	 * They are 3 cases to consider inside __split_huge_pmd_locked():
> +	 * They are 2 cases to consider inside __split_huge_pmd_locked():
>  	 *  1) pmdp_huge_clear_flush_notify() call invalidate_range() obvious
> -	 *  2) __split_huge_zero_page_pmd() read only zero page and any write
> -	 *    fault will trigger a flush_notify before pointing to a new page
> -	 *    (it is fine if the secondary mmu keeps pointing to the old zero
> -	 *    page in the meantime)
> -	 *  3) Split a huge pmd into pte pointing to the same page. No need
> +	 *  2) Split a huge pmd into pte pointing to the same page. No need
>  	 *     to invalidate secondary tlb entry they are all still valid.
>  	 *     any further changes to individual pte will notify. So no need
>  	 *     to call mmu_notifier->invalidate_range()
> -- 
> 2.26.0


—
Best Regards,
Yan Zi

Download attachment "signature.asc" of type "application/pgp-signature" (855 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ