lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 8 Feb 2017 19:57:27 -0800
From:   Matthew Wilcox <willy@...radead.org>
To:     "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Cc:     Theodore Ts'o <tytso@....edu>,
        Andreas Dilger <adilger.kernel@...ger.ca>,
        Jan Kara <jack@...e.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Alexander Viro <viro@...iv.linux.org.uk>,
        Hugh Dickins <hughd@...gle.com>,
        Andrea Arcangeli <aarcange@...hat.com>,
        Dave Hansen <dave.hansen@...el.com>,
        Vlastimil Babka <vbabka@...e.cz>,
        Ross Zwisler <ross.zwisler@...ux.intel.com>,
        linux-ext4@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-mm@...ck.org,
        linux-block@...r.kernel.org
Subject: Re: [PATCHv6 01/37] mm, shmem: swich huge tmpfs to multi-order
 radix-tree entries

On Thu, Jan 26, 2017 at 02:57:43PM +0300, Kirill A. Shutemov wrote:
> +++ b/include/linux/pagemap.h
> @@ -332,6 +332,15 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
>  			mapping_gfp_mask(mapping));
>  }
>  
> +static inline struct page *find_subpage(struct page *page, pgoff_t offset)
> +{
> +	VM_BUG_ON_PAGE(PageTail(page), page);
> +	VM_BUG_ON_PAGE(page->index > offset, page);
> +	VM_BUG_ON_PAGE(page->index + (1 << compound_order(page)) < offset,
> +			page);
> +	return page - page->index + offset;
> +}

What would you think to:

static inline void check_page_index(struct page *page, pgoff_t offset)
{
	VM_BUG_ON_PAGE(PageTail(page), page);
	VM_BUG_ON_PAGE(page->index > offset, page);
	VM_BUG_ON_PAGE(page->index + (1 << compound_order(page)) <= offset,
			page);
}

(I think I fixed an off-by-one error up there ...  if
index + (1 << order) == offset, this is also a bug, right?
because offset would then refer to the next page, not this page)

static inline struct page *find_subpage(struct page *page, pgoff_t offset)
{
	check_page_index(page, offset);
	return page + (offset - page->index);
}

... then you can use check_page_index down ...

> @@ -1250,7 +1233,6 @@ struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
>  			put_page(page);
>  			goto repeat;
>  		}
> -		VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);

... here?

> @@ -1472,25 +1451,35 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
>  			goto repeat;
>  		}
>  
> +		/* For multi-order entries, find relevant subpage */
> +		if (PageTransHuge(page)) {
> +			VM_BUG_ON(index - page->index < 0);
> +			VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
> +			page += index - page->index;
> +		}

Use find_subpage() here?

>  		pages[ret] = page;
>  		if (++ret == nr_pages)
>  			break;
> +		if (!PageTransCompound(page))
> +			continue;
> +		for (refs = 0; ret < nr_pages &&
> +				(index + 1) % HPAGE_PMD_NR;
> +				ret++, refs++, index++)
> +			pages[ret] = ++page;
> +		if (refs)
> +			page_ref_add(compound_head(page), refs);
> +		if (ret == nr_pages)
> +			break;

Can we avoid referencing huge pages specifically in the page cache?  I'd
like us to get to the point where we can put arbitrary compound pages into
the page cache.  For example, I think this can be written as:

		if (!PageCompound(page))
			continue;
		for (refs = 0; ret < nr_pages; refs++, index++) {
			if (index > page->index + (1 << compound_order(page)))
				break;
			pages[ret++] = ++page;
		}
		if (refs)
			page_ref_add(compound_head(page), refs);
		if (ret == nr_pages)
			break;

> @@ -1541,19 +1533,12 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
>  
> +		/* For multi-order entries, find relevant subpage */
> +		if (PageTransHuge(page)) {
> +			VM_BUG_ON(index - page->index < 0);
> +			VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
> +			page += index - page->index;
> +		}
> +
>  		pages[ret] = page;
>  		if (++ret == nr_pages)
>  			break;
> +		if (!PageTransCompound(page))
> +			continue;
> +		for (refs = 0; ret < nr_pages &&
> +				(index + 1) % HPAGE_PMD_NR;
> +				ret++, refs++, index++)
> +			pages[ret] = ++page;
> +		if (refs)
> +			page_ref_add(compound_head(page), refs);
> +		if (ret == nr_pages)
> +			break;
>  	}
>  	rcu_read_unlock();
>  	return ret;

Ugh, the same code again.  Hmm ... we only need to modify 'ret' as a result
of this ... so could we split it out like this?

static unsigned populate_pages(struct page **pages, unsigned i, unsigned max,
                                struct page *page)
{
        unsigned refs = 0;
        for (;;) {
                pages[i++] = page;
                if (i == max)
                        break;
                if (PageHead(page + 1))
                        break;
                page++;
                refs++;
        }
        if (refs)
                page_ref_add(compound_head(page), refs);
        return i;
}

> +unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *indexp,
>  			int tag, unsigned int nr_pages, struct page **pages)

>  			break;
> +		if (!PageTransCompound(page))
> +			continue;
> +		for (refs = 0; ret < nr_pages &&
> +				(index + 1) % HPAGE_PMD_NR;
> +				ret++, refs++, index++)
> +			pages[ret] = ++page;
> +		if (refs)
> +			page_ref_add(compound_head(page), refs);
> +		if (ret == nr_pages)
> +			break;
>  	}

... and again!

> @@ -2326,25 +2337,26 @@ void filemap_map_pages(struct vm_fault *vmf,
> +		/* For multi-order entries, find relevant subpage */
> +		if (PageTransHuge(page)) {
> +			VM_BUG_ON(index - page->index < 0);
> +			VM_BUG_ON(index - page->index >= HPAGE_PMD_NR);
> +			page += index - page->index;
> +		}
> +
>  		if (!PageUptodate(page) ||
>  				PageReadahead(page) ||

Readahead is PF_NO_COMPOUND ... so I don't see why this works?

> @@ -2378,8 +2390,14 @@ void filemap_map_pages(struct vm_fault *vmf,
>  		/* Huge page is mapped? No need to proceed. */
>  		if (pmd_trans_huge(*vmf->pmd))
>  			break;
> -		if (iter.index == end_pgoff)
> +		if (index == end_pgoff)
>  			break;
> +		if (page && PageTransCompound(page) &&
> +				(index & (HPAGE_PMD_NR - 1)) !=
> +				HPAGE_PMD_NR - 1) {
> +			index++;
> +			goto repeat;

Do we really have to go all the way back to the beginning of the loop?  It'd
be nice to be able to insert all of the relevant PTEs in a shorter loop here.
We'd need to bump the reference count N more times, and I think we do need
to check HWPoison for each subpage.

Powered by blists - more mailing lists