lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <76537454-272b-4fbb-b073-5387bbaaf28d@redhat.com>
Date: Wed, 12 Feb 2025 11:03:48 +0100
From: David Hildenbrand <david@...hat.com>
To: Shivank Garg <shivankg@....com>, akpm@...ux-foundation.org,
 willy@...radead.org, pbonzini@...hat.com
Cc: linux-fsdevel@...r.kernel.org, linux-mm@...ck.org,
 linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
 linux-coco@...ts.linux.dev, chao.gao@...el.com, seanjc@...gle.com,
 ackerleytng@...gle.com, vbabka@...e.cz, bharata@....com, nikunj@....com,
 michael.day@....com, Neeraj.Upadhyay@....com, thomas.lendacky@....com,
 michael.roth@....com, Shivansh Dhiman <shivansh.dhiman@....com>
Subject: Re: [RFC PATCH v4 1/3] mm/filemap: add mempolicy support to the
 filemap layer

On 10.02.25 07:32, Shivank Garg wrote:
> From: Shivansh Dhiman <shivansh.dhiman@....com>
> 
> Add NUMA mempolicy support to the filemap allocation path by introducing
> new APIs that take a mempolicy argument:
> - filemap_grab_folio_mpol()
> - filemap_alloc_folio_mpol()
> - __filemap_get_folio_mpol()
> 
> These APIs allow callers to specify a NUMA policy during page cache
> allocations, enabling fine-grained control over memory placement. This is
> particularly needed by KVM when using guest-memfd memory backends, where
> the guest memory needs to be allocated according to the NUMA policy
> specified by VMM.
> 

shmem handles this using custom shmem_alloc_folio()->folio_alloc_mpol().

I'm curious, is there

(1) A way to make shmem also use this new API?
(2) Handle it in guest_memfd manually, like shmem does?

> The existing non-mempolicy APIs remain unchanged and continue to use the
> default allocation behavior.
> 
> Signed-off-by: Shivansh Dhiman <shivansh.dhiman@....com>
> Signed-off-by: Shivank Garg <shivankg@....com>
> ---
>   include/linux/pagemap.h | 40 ++++++++++++++++++++++++++++++++++++++++
>   mm/filemap.c            | 30 +++++++++++++++++++++++++-----
>   2 files changed, 65 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index 47bfc6b1b632..4ae7fa63cb26 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -662,15 +662,25 @@ static inline void *detach_page_private(struct page *page)
>   
>   #ifdef CONFIG_NUMA
>   struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order);
> +struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, unsigned int order,
> +						struct mempolicy *mpol);

Two tabs indent on second parameter line, please.

>   #else
>   static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
>   {
>   	return folio_alloc_noprof(gfp, order);
>   }
> +static inline struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp,
> +						unsigned int order,
> +						struct mempolicy *mpol)
> +{
> +	return filemap_alloc_folio_noprof(gfp, order);
> +}

Dito.

>   #endif
>   
>   #define filemap_alloc_folio(...)				\
>   	alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__))
> +#define filemap_alloc_folio_mpol(...)				\
> +	alloc_hooks(filemap_alloc_folio_mpol_noprof(__VA_ARGS__))
>   
>   static inline struct page *__page_cache_alloc(gfp_t gfp)
>   {
> @@ -762,6 +772,8 @@ static inline fgf_t fgf_set_order(size_t size)
>   void *filemap_get_entry(struct address_space *mapping, pgoff_t index);
>   struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>   		fgf_t fgp_flags, gfp_t gfp);
> +struct folio *__filemap_get_folio_mpol(struct address_space *mapping,
> +		pgoff_t index, fgf_t fgp_flags, gfp_t gfp, struct mempolicy *mpol);
>   struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
>   		fgf_t fgp_flags, gfp_t gfp);
>   
> @@ -820,6 +832,34 @@ static inline struct folio *filemap_grab_folio(struct address_space *mapping,
>   			mapping_gfp_mask(mapping));
>   }
>   
> +/**
> + * filemap_grab_folio_mpol - grab a folio from the page cache
> + * @mapping: The address space to search
> + * @index: The page index
> + * @mpol: The mempolicy to apply

"The mempolicy to apply when allocating a new folio." ?

> + *
> + * Same as filemap_grab_folio(), except that it allocates the folio using
> + * given memory policy.
> + *
> + * Return: A found or created folio. ERR_PTR(-ENOMEM) if no folio is found
> + * and failed to create a folio.
> + */
> +#ifdef CONFIG_NUMA
> +static inline struct folio *filemap_grab_folio_mpol(struct address_space *mapping,
> +					pgoff_t index, struct mempolicy *mpol)
> +{
> +	return __filemap_get_folio_mpol(mapping, index,
> +			FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
> +			mapping_gfp_mask(mapping), mpol);
> +}
> +#else
> +static inline struct folio *filemap_grab_folio_mpol(struct address_space *mapping,
> +					pgoff_t index, struct mempolicy *mpol)
> +{
> +	return filemap_grab_folio(mapping, index);
> +}
> +#endif /* CONFIG_NUMA */
> +
>   /**
>    * find_get_page - find and get a page reference
>    * @mapping: the address_space to search
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 804d7365680c..c5ea32702774 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1001,8 +1001,13 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio,
>   EXPORT_SYMBOL_GPL(filemap_add_folio);
>   
>   #ifdef CONFIG_NUMA
> -struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
> +struct folio *filemap_alloc_folio_mpol_noprof(gfp_t gfp, unsigned int order,
> +			struct mempolicy *mpol)
>   {
> +	if (mpol)
> +		return folio_alloc_mpol_noprof(gfp, order, mpol,
> +				NO_INTERLEAVE_INDEX, numa_node_id());
> +

This should go below the variable declaration. (and indentation on 
second parameter line should align with the first parameter)

>   	int n;
>   	struct folio *folio;
>   
> @@ -1018,6 +1023,12 @@ struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
>   	}
>   	return folio_alloc_noprof(gfp, order);
>   }
> +EXPORT_SYMBOL(filemap_alloc_folio_mpol_noprof);
> +
> +struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
> +{
> +	return filemap_alloc_folio_mpol_noprof(gfp, order, NULL);
> +}
>   EXPORT_SYMBOL(filemap_alloc_folio_noprof);
>   #endif
>   
> @@ -1881,11 +1892,12 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
>   }
>   
>   /**
> - * __filemap_get_folio - Find and get a reference to a folio.
> + * __filemap_get_folio_mpol - Find and get a reference to a folio.
>    * @mapping: The address_space to search.
>    * @index: The page index.
>    * @fgp_flags: %FGP flags modify how the folio is returned.
>    * @gfp: Memory allocation flags to use if %FGP_CREAT is specified.
> + * @mpol: The mempolicy to apply.

"The mempolicy to apply when allocating a new folio." ?

>    *
>    * Looks up the page cache entry at @mapping & @index.
>    *
> @@ -1896,8 +1908,8 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index)
>    *
>    * Return: The found folio or an ERR_PTR() otherwise.
>    */
> -struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> -		fgf_t fgp_flags, gfp_t gfp)
> +struct folio *__filemap_get_folio_mpol(struct address_space *mapping, pgoff_t index,
> +		fgf_t fgp_flags, gfp_t gfp, struct mempolicy *mpol)
>   {
>   	struct folio *folio;
>   
> @@ -1967,7 +1979,7 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>   			err = -ENOMEM;
>   			if (order > min_order)
>   				alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
> -			folio = filemap_alloc_folio(alloc_gfp, order);
> +			folio = filemap_alloc_folio_mpol(alloc_gfp, order, mpol);
>   			if (!folio)
>   				continue;
>   
> @@ -2003,6 +2015,14 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
>   		folio_clear_dropbehind(folio);
>   	return folio;
>   }
> +EXPORT_SYMBOL(__filemap_get_folio_mpol);
> +
> +struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
> +		fgf_t fgp_flags, gfp_t gfp)
> +{
> +	return __filemap_get_folio_mpol(mapping, index,
> +			fgp_flags, gfp, NULL);
> +}
>   EXPORT_SYMBOL(__filemap_get_folio);
>   
>   static inline struct folio *find_get_entry(struct xa_state *xas, pgoff_t max,


For guest_memfd, where pages are un-movable and un-swappable, the memory 
policy will never change later.

shmem seems to handle the swap-in case, because it keeps care of 
allocating pages in that case itself.

For ordinary pagecache pages (movable), page migration would likely not 
be aware of the specified mpol; I assume the same applies to shmem?

alloc_migration_target() seems to prefer the current nid (nid = 
folio_nid(src)), but apart from that, does not lookup any mempolicy.

compaction likely handles this by comapcting within a node/zone.

Maybe migration to the right target node on misplacement is handled on a 
higher level lagter (numa hinting faults -> migrate_misplaced_folio). 
Likely at least for anon memory, not sure about unmapped shmem.


-- 
Cheers,

David / dhildenb


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ