lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <60a1a526-52dd-478a-9ed4-79e6428743ac@redhat.com>
Date: Tue, 8 Jul 2025 12:59:07 -0400
From: Luiz Capitulino <luizcap@...hat.com>
To: Shivank Garg <shivankg@....com>, david@...hat.com, willy@...radead.org,
 akpm@...ux-foundation.org
Cc: linux-kernel@...r.kernel.org, linux-mm@...ck.org, sj@...nel.org
Subject: Re: [PATCH v2 2/4] mm/util: introduce snapshot_page()

On 2025-07-08 01:49, Shivank Garg wrote:
> 
> 
> On 7/8/2025 12:20 AM, Luiz Capitulino wrote:
>> This commit refactors __dump_page() into snapshot_page().
>>
>> snapshot_page() tries to take a faithful snapshot of a page and its
>> folio representation. The snapshot is returned in the struct
>> page_snapshot parameter along with additional flags that are best
>> retrieved at snapshot creation time to reduce race windows.
>>
>> This function is intended to be used by callers that need a stable
>> representation of a struct page and struct folio so that pointers
>> or page information doesn't change while working on a page.
>>
>> The idea and original implementation of snapshot_page() comes from
>> Matthew Wilcox with suggestions for improvements from David Hildenbrand.
>> All bugs and misconceptions are mine.
>>
>> Signed-off-by: Luiz Capitulino <luizcap@...hat.com>
>> ---
>>   include/linux/mm.h | 19 ++++++++++++
>>   mm/debug.c         | 42 +++----------------------
>>   mm/util.c          | 77 ++++++++++++++++++++++++++++++++++++++++++++++
>>   3 files changed, 100 insertions(+), 38 deletions(-)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 0ef2ba0c667a..090968c6eebb 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -4184,4 +4184,23 @@ static inline bool page_pool_page_is_pp(struct page *page)
>>   }
>>   #endif
>>   
>> +#define PAGE_SNAPSHOT_FAITHFUL     (1 << 0)
>> +#define PAGE_SNAPSHOT_PG_FREE      (1 << 1)
>> +#define PAGE_SNAPSHOT_PG_IDLE      (1 << 2)
>> +
>> +struct page_snapshot {
>> +	struct folio folio_snapshot;
>> +	struct page page_snapshot;
>> +	unsigned long pfn;
>> +	unsigned long idx;
>> +	unsigned long flags;
>> +};
>> +
>> +static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps)
>> +{
>> +	return ps->flags & PAGE_SNAPSHOT_FAITHFUL;
>> +}
>> +
>> +void snapshot_page(struct page_snapshot *ps, const struct page *page);
>> +
>>   #endif /* _LINUX_MM_H */
>> diff --git a/mm/debug.c b/mm/debug.c
>> index 907382257062..7349330ea506 100644
>> --- a/mm/debug.c
>> +++ b/mm/debug.c
>> @@ -129,47 +129,13 @@ static void __dump_folio(struct folio *folio, struct page *page,
>>   
>>   static void __dump_page(const struct page *page)
>>   {
>> -	struct folio *foliop, folio;
>> -	struct page precise;
>> -	unsigned long head;
>> -	unsigned long pfn = page_to_pfn(page);
>> -	unsigned long idx, nr_pages = 1;
>> -	int loops = 5;
>> -
>> -again:
>> -	memcpy(&precise, page, sizeof(*page));
>> -	head = precise.compound_head;
>> -	if ((head & 1) == 0) {
>> -		foliop = (struct folio *)&precise;
>> -		idx = 0;
>> -		if (!folio_test_large(foliop))
>> -			goto dump;
>> -		foliop = (struct folio *)page;
>> -	} else {
>> -		foliop = (struct folio *)(head - 1);
>> -		idx = folio_page_idx(foliop, page);
>> -	}
>> +	struct page_snapshot ps;
>>   
>> -	if (idx < MAX_FOLIO_NR_PAGES) {
>> -		memcpy(&folio, foliop, 2 * sizeof(struct page));
>> -		nr_pages = folio_nr_pages(&folio);
>> -		if (nr_pages > 1)
>> -			memcpy(&folio.__page_2, &foliop->__page_2,
>> -			       sizeof(struct page));
>> -		foliop = &folio;
>> -	}
>> -
>> -	if (idx > nr_pages) {
>> -		if (loops-- > 0)
>> -			goto again;
>> +	snapshot_page(&ps, page);
>> +	if (!snapshot_page_is_faithful(&ps))
>>   		pr_warn("page does not match folio\n");
>> -		precise.compound_head &= ~1UL;
>> -		foliop = (struct folio *)&precise;
>> -		idx = 0;
>> -	}
>>   
>> -dump:
>> -	__dump_folio(foliop, &precise, pfn, idx);
>> +	__dump_folio(&ps.folio_snapshot, &ps.page_snapshot, ps.pfn, ps.idx);
>>   }
>>   
>>   void dump_page(const struct page *page, const char *reason)
>> diff --git a/mm/util.c b/mm/util.c
>> index 0b270c43d7d1..c38d213be83f 100644
>> --- a/mm/util.c
>> +++ b/mm/util.c
>> @@ -25,6 +25,7 @@
>>   #include <linux/sizes.h>
>>   #include <linux/compat.h>
>>   #include <linux/fsnotify.h>
>> +#include <linux/page_idle.h>
>>   
>>   #include <linux/uaccess.h>
>>   
>> @@ -1171,3 +1172,79 @@ int compat_vma_mmap_prepare(struct file *file, struct vm_area_struct *vma)
>>   	return 0;
>>   }
>>   EXPORT_SYMBOL(compat_vma_mmap_prepare);
>> +
>> +static void set_flags(struct page_snapshot *ps, const struct folio *folio,
>> +		      const struct page *page)
>> +{
>> +	/*
>> +	 * Caveats on high order pages: PG_buddy and PG_slab will only be set
>> +	 * on the head page.
>> +	 */
>> +	if (PageBuddy(page))
>> +		ps->flags |= PAGE_SNAPSHOT_PG_FREE;
>> +	else if (page_count(page) == 0 && is_free_buddy_page(page))
>> +		ps->flags |= PAGE_SNAPSHOT_PG_FREE;
>> +
>> +	if (folio_test_idle(folio))
>> +		ps->flags |= PAGE_SNAPSHOT_PG_IDLE;
>> +}
>> +
>> +/*
>> + * Create a snapshot of a page and store its struct page and struct folio
>> + * representations in a struct page_snapshot.
>> + *
>> + * @ps: struct page_snapshot to store the page snapshot
>> + * @page: the page we want to snapshot
>> + *
>> + * Note that creating a faithful snapshot of a page may fail if the page
>> + * compound keeps changing (eg. due to folio split). In this case we set
>> + * ps->faithful to false and the snapshot will assume that @page refers
>> + * to a single page.
>> + */
>> +void snapshot_page(struct page_snapshot *ps, const struct page *page)
>> +{
>> +	unsigned long head, nr_pages = 1;
>> +	struct folio *foliop, folio;
>> +	int loops = 5;
>> +
>> +	ps->pfn = page_to_pfn(page);
>> +	ps->flags = PAGE_SNAPSHOT_FAITHFUL;
>> +
>> +again:
>> +	memcpy(&ps->page_snapshot, page, sizeof(*page));
>> +	head = ps->page_snapshot.compound_head;
>> +	if ((head & 1) == 0) {
>> +		foliop = (struct folio *)&ps->page_snapshot;
>> +		ps->idx = 0;
>> +		if (!folio_test_large(foliop)) {
>> +			set_flags(ps, page_folio(page), page);
>> +			goto out;
>> +		}
>> +		foliop = (struct folio *)page;
>> +	} else {
>> +		foliop = (struct folio *)(page->compound_head - 1);
> 					  ^^^^
> should we use ps->page_snapshot here?
> IIUC, page may change due to race.

You're right the race exists, but we can't make foliop point to the
snapshot because we do pointer arithmetic and other operations that need
the real page memory address to work properly.

>> +		ps->idx = folio_page_idx(foliop, page);
>> +	}
>> +
>> +	if (ps->idx < MAX_FOLIO_NR_PAGES) {
>> +		memcpy(&folio, foliop, 2 * sizeof(struct page));
>> +		nr_pages = folio_nr_pages(&folio);
>> +		if (nr_pages > 1)
>> +			memcpy(&folio.__page_2, &foliop->__page_2,
>> +			       sizeof(struct page));
>> +		set_flags(ps, foliop, page);
>> +		foliop = &folio;
>> +	}
>> +
>> +	if (ps->idx > nr_pages) {
>> +		if (loops-- > 0)
>> +			goto again;
>> +		ps->page_snapshot.compound_head &= ~1UL;
> 
> Should we use clear_compound_head() for clarity?

Yes, we can do that.

Andrew, since you applied this series already, can you squash the change
below into this patch? If not, I can send v3.

diff --git a/mm/util.c b/mm/util.c
index c38d213be83f..ce4200529772 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1239,7 +1239,7 @@ void snapshot_page(struct page_snapshot *ps, const struct page *page)
  	if (ps->idx > nr_pages) {
  		if (loops-- > 0)
  			goto again;
-		ps->page_snapshot.compound_head &= ~1UL;
+		clear_compound_head(&ps->page_snapshot);
  		foliop = (struct folio *)&ps->page_snapshot;
  		ps->flags = 0;
  		ps->idx = 0;


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ