lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <54F9CEA2.90402@redhat.com>
Date:	Fri, 06 Mar 2015 16:58:26 +0100
From:	Jerome Marchand <jmarchan@...hat.com>
To:	"Kirill A. Shutemov" <kirill@...temov.name>
CC:	"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Andrea Arcangeli <aarcange@...hat.com>,
	Dave Hansen <dave.hansen@...el.com>,
	Hugh Dickins <hughd@...gle.com>, Mel Gorman <mgorman@...e.de>,
	Rik van Riel <riel@...hat.com>,
	Vlastimil Babka <vbabka@...e.cz>,
	Christoph Lameter <cl@...two.org>,
	Naoya Horiguchi <n-horiguchi@...jp.nec.com>,
	Steve Capper <steve.capper@...aro.org>,
	"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>,
	Johannes Weiner <hannes@...xchg.org>,
	Michal Hocko <mhocko@...e.cz>, linux-kernel@...r.kernel.org,
	linux-mm@...ck.org
Subject: Re: [PATCHv4 00/24] THP refcounting redesign

On 03/06/2015 01:18 PM, Kirill A. Shutemov wrote:
> On Thu, Mar 05, 2015 at 01:55:15PM +0100, Jerome Marchand wrote:
>> On 03/04/2015 05:32 PM, Kirill A. Shutemov wrote:
>>> Hello everybody,
>>>
>>> It's bug-fix update of my thp refcounting work.
>>>
>>> The goal of patchset is to make refcounting on THP pages cheaper with
>>> simpler semantics and allow the same THP compound page to be mapped with
>>> PMD and PTEs. This is required to get reasonable THP-pagecache
>>> implementation.
>>>
>>> With the new refcounting design it's much easier to protect against
>>> split_huge_page(): simple reference on a page will make you the deal.
>>> It makes gup_fast() implementation simpler and doesn't require
>>> special-case in futex code to handle tail THP pages.
>>>
>>> It should improve THP utilization over the system since splitting THP in
>>> one process doesn't necessary lead to splitting the page in all other
>>> processes have the page mapped.
>>>
>> [...]
>>> I believe all known bugs have been fixed, but I'm sure Sasha will bring more
>>> reports.
>>>
>>> The patchset also available on git:
>>>
>>> git://git.kernel.org/pub/scm/linux/kernel/git/kas/linux.git thp/refcounting/v4
>>>
>>
>> Hi Kirill,
>>
>> I ran some ltp tests and it triggered two bugs:
>>
> 
> Could you test with the patch below?

It seems to fix the issue. I can't reproduce the bugs anymore.

Thanks,
Jerome

> 
> diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
> index 8c3a3e02ba92..1baa4d23314b 100644
> --- a/arch/arc/mm/cache_arc700.c
> +++ b/arch/arc/mm/cache_arc700.c
> @@ -490,7 +490,7 @@ void flush_dcache_page(struct page *page)
>  	 */
>  	if (!mapping_mapped(mapping)) {
>  		clear_bit(PG_dc_clean, &page->flags);
> -	} else if (page_mapped(page)) {
> +	} else if (page_mapcount(page)) {
>  
>  		/* kernel reading from page with U-mapping */
>  		void *paddr = page_address(page);
> @@ -675,7 +675,7 @@ void copy_user_highpage(struct page *to, struct page *from,
>  	 * Note that while @u_vaddr refers to DST page's userspace vaddr, it is
>  	 * equally valid for SRC page as well
>  	 */
> -	if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
> +	if (page_mapcount(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
>  		__flush_dcache_page(kfrom, u_vaddr);
>  		clean_src_k_mappings = 1;
>  	}
> diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
> index 34b66af516ea..8f972fc8933d 100644
> --- a/arch/arm/mm/flush.c
> +++ b/arch/arm/mm/flush.c
> @@ -315,7 +315,7 @@ void flush_dcache_page(struct page *page)
>  	mapping = page_mapping(page);
>  
>  	if (!cache_ops_need_broadcast() &&
> -	    mapping && !page_mapped(page))
> +	    mapping && !page_mapcount(page))
>  		clear_bit(PG_dcache_clean, &page->flags);
>  	else {
>  		__flush_dcache_page(mapping, page);
> diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
> index 3f8059602765..b28bf185ef77 100644
> --- a/arch/mips/mm/c-r4k.c
> +++ b/arch/mips/mm/c-r4k.c
> @@ -578,7 +578,8 @@ static inline void local_r4k_flush_cache_page(void *args)
>  		 * another ASID than the current one.
>  		 */
>  		map_coherent = (cpu_has_dc_aliases &&
> -				page_mapped(page) && !Page_dcache_dirty(page));
> +				page_mapcount(page) &&
> +				!Page_dcache_dirty(page));
>  		if (map_coherent)
>  			vaddr = kmap_coherent(page, addr);
>  		else
> diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
> index 7e3ea7766822..e695b28dc32c 100644
> --- a/arch/mips/mm/cache.c
> +++ b/arch/mips/mm/cache.c
> @@ -106,7 +106,7 @@ void __flush_anon_page(struct page *page, unsigned long vmaddr)
>  	unsigned long addr = (unsigned long) page_address(page);
>  
>  	if (pages_do_alias(addr, vmaddr)) {
> -		if (page_mapped(page) && !Page_dcache_dirty(page)) {
> +		if (page_mapcount(page) && !Page_dcache_dirty(page)) {
>  			void *kaddr;
>  
>  			kaddr = kmap_coherent(page, vmaddr);
> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
> index 448cde372af0..2c8e44aa536e 100644
> --- a/arch/mips/mm/init.c
> +++ b/arch/mips/mm/init.c
> @@ -156,7 +156,7 @@ void copy_user_highpage(struct page *to, struct page *from,
>  
>  	vto = kmap_atomic(to);
>  	if (cpu_has_dc_aliases &&
> -	    page_mapped(from) && !Page_dcache_dirty(from)) {
> +	    page_mapcount(from) && !Page_dcache_dirty(from)) {
>  		vfrom = kmap_coherent(from, vaddr);
>  		copy_page(vto, vfrom);
>  		kunmap_coherent();
> @@ -178,7 +178,7 @@ void copy_to_user_page(struct vm_area_struct *vma,
>  	unsigned long len)
>  {
>  	if (cpu_has_dc_aliases &&
> -	    page_mapped(page) && !Page_dcache_dirty(page)) {
> +	    page_mapcount(page) && !Page_dcache_dirty(page)) {
>  		void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
>  		memcpy(vto, src, len);
>  		kunmap_coherent();
> @@ -196,7 +196,7 @@ void copy_from_user_page(struct vm_area_struct *vma,
>  	unsigned long len)
>  {
>  	if (cpu_has_dc_aliases &&
> -	    page_mapped(page) && !Page_dcache_dirty(page)) {
> +	    page_mapcount(page) && !Page_dcache_dirty(page)) {
>  		void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
>  		memcpy(dst, vfrom, len);
>  		kunmap_coherent();
> diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c
> index 51d8f7f31d1d..58aaa4f33b81 100644
> --- a/arch/sh/mm/cache-sh4.c
> +++ b/arch/sh/mm/cache-sh4.c
> @@ -241,7 +241,7 @@ static void sh4_flush_cache_page(void *args)
>  		 */
>  		map_coherent = (current_cpu_data.dcache.n_aliases &&
>  			test_bit(PG_dcache_clean, &page->flags) &&
> -			page_mapped(page));
> +			page_mapcount(page));
>  		if (map_coherent)
>  			vaddr = kmap_coherent(page, address);
>  		else
> diff --git a/arch/sh/mm/cache.c b/arch/sh/mm/cache.c
> index f770e3992620..e58cfbf45150 100644
> --- a/arch/sh/mm/cache.c
> +++ b/arch/sh/mm/cache.c
> @@ -59,7 +59,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
>  		       unsigned long vaddr, void *dst, const void *src,
>  		       unsigned long len)
>  {
> -	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
> +	if (boot_cpu_data.dcache.n_aliases && page_mapcount(page) &&
>  	    test_bit(PG_dcache_clean, &page->flags)) {
>  		void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
>  		memcpy(vto, src, len);
> @@ -78,7 +78,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
>  			 unsigned long vaddr, void *dst, const void *src,
>  			 unsigned long len)
>  {
> -	if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
> +	if (boot_cpu_data.dcache.n_aliases && page_mapcount(page) &&
>  	    test_bit(PG_dcache_clean, &page->flags)) {
>  		void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
>  		memcpy(dst, vfrom, len);
> @@ -97,7 +97,7 @@ void copy_user_highpage(struct page *to, struct page *from,
>  
>  	vto = kmap_atomic(to);
>  
> -	if (boot_cpu_data.dcache.n_aliases && page_mapped(from) &&
> +	if (boot_cpu_data.dcache.n_aliases && page_mapcount(from) &&
>  	    test_bit(PG_dcache_clean, &from->flags)) {
>  		vfrom = kmap_coherent(from, vaddr);
>  		copy_page(vto, vfrom);
> @@ -153,7 +153,7 @@ void __flush_anon_page(struct page *page, unsigned long vmaddr)
>  	unsigned long addr = (unsigned long) page_address(page);
>  
>  	if (pages_do_alias(addr, vmaddr)) {
> -		if (boot_cpu_data.dcache.n_aliases && page_mapped(page) &&
> +		if (boot_cpu_data.dcache.n_aliases && page_mapcount(page) &&
>  		    test_bit(PG_dcache_clean, &page->flags)) {
>  			void *kaddr;
>  
> diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c
> index 5ece856c5725..35c822286bbe 100644
> --- a/arch/xtensa/mm/tlb.c
> +++ b/arch/xtensa/mm/tlb.c
> @@ -245,7 +245,7 @@ static int check_tlb_entry(unsigned w, unsigned e, bool dtlb)
>  						page_mapcount(p));
>  				if (!page_count(p))
>  					rc |= TLB_INSANE;
> -				else if (page_mapped(p))
> +				else if (page_mapcount(p))
>  					rc |= TLB_SUSPICIOUS;
>  			} else {
>  				rc |= TLB_INSANE;
> diff --git a/fs/proc/page.c b/fs/proc/page.c
> index 7eee2d8b97d9..e99c059339f6 100644
> --- a/fs/proc/page.c
> +++ b/fs/proc/page.c
> @@ -97,9 +97,9 @@ u64 stable_page_flags(struct page *page)
>  	 * pseudo flags for the well known (anonymous) memory mapped pages
>  	 *
>  	 * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the
> -	 * simple test in page_mapped() is not enough.
> +	 * simple test in page_mapcount() is not enough.
>  	 */
> -	if (!PageSlab(page) && page_mapped(page))
> +	if (!PageSlab(page) && page_mapcount(page))
>  		u |= 1 << KPF_MMAP;
>  	if (PageAnon(page))
>  		u |= 1 << KPF_ANON;
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 25bec2c3e7a3..da8f66067670 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -200,7 +200,7 @@ void __delete_from_page_cache(struct page *page, void *shadow)
>  	__dec_zone_page_state(page, NR_FILE_PAGES);
>  	if (PageSwapBacked(page))
>  		__dec_zone_page_state(page, NR_SHMEM);
> -	BUG_ON(page_mapped(page));
> +	VM_BUG_ON_PAGE(page_mapped(page), page);
>  
>  	/*
>  	 * At this point page must be either written or cleaned by truncate.
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 7b8838f2c5d0..3361fe78fbe2 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1141,12 +1141,15 @@ static void page_remove_file_rmap(struct page *page)
>  
>  	memcg = mem_cgroup_begin_page_stat(page);
>  
> -	/* page still mapped by someone else? */
> -	if (!atomic_add_negative(-1, &page->_mapcount))
> +	/* Hugepages are not counted in NR_FILE_MAPPED for now. */
> +	if (unlikely(PageHuge(page))) {
> +		/* hugetlb pages are always mapped with pmds */
> +		atomic_dec(compound_mapcount_ptr(page));
>  		goto out;
> +	}
>  
> -	/* Hugepages are not counted in NR_FILE_MAPPED for now. */
> -	if (unlikely(PageHuge(page)))
> +	/* page still mapped by someone else? */
> +	if (!atomic_add_negative(-1, &page->_mapcount))
>  		goto out;
>  
>  	/*
> 



Download attachment "signature.asc" of type "application/pgp-signature" (474 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ