lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <66ef6c5ebd068_109ae294a3@dwillia2-mobl3.amr.corp.intel.com.notmuch>
Date: Sun, 22 Sep 2024 03:01:18 +0200
From: Dan Williams <dan.j.williams@...el.com>
To: Alistair Popple <apopple@...dia.com>, <dan.j.williams@...el.com>,
	<linux-mm@...ck.org>
CC: Alistair Popple <apopple@...dia.com>, <vishal.l.verma@...el.com>,
	<dave.jiang@...el.com>, <logang@...tatee.com>, <bhelgaas@...gle.com>,
	<jack@...e.cz>, <jgg@...pe.ca>, <catalin.marinas@....com>, <will@...nel.org>,
	<mpe@...erman.id.au>, <npiggin@...il.com>, <dave.hansen@...ux.intel.com>,
	<ira.weiny@...el.com>, <willy@...radead.org>, <djwong@...nel.org>,
	<tytso@....edu>, <linmiaohe@...wei.com>, <david@...hat.com>,
	<peterx@...hat.com>, <linux-doc@...r.kernel.org>,
	<linux-kernel@...r.kernel.org>, <linux-arm-kernel@...ts.infradead.org>,
	<linuxppc-dev@...ts.ozlabs.org>, <nvdimm@...ts.linux.dev>,
	<linux-cxl@...r.kernel.org>, <linux-fsdevel@...r.kernel.org>,
	<linux-ext4@...r.kernel.org>, <linux-xfs@...r.kernel.org>,
	<jhubbard@...dia.com>, <hch@....de>, <david@...morbit.com>, Jason Gunthorpe
	<jgg@...dia.com>
Subject: Re: [PATCH 04/12] mm: Allow compound zone device pages

Alistair Popple wrote:
> Zone device pages are used to represent various type of device memory
> managed by device drivers. Currently compound zone device pages are
> not supported. This is because MEMORY_DEVICE_FS_DAX pages are the only
> user of higher order zone device pages and have their own page
> reference counting.
> 
> A future change will unify FS DAX reference counting with normal page
> reference counting rules and remove the special FS DAX reference
> counting. Supporting that requires compound zone device pages.
> 
> Supporting compound zone device pages requires compound_head() to
> distinguish between head and tail pages whilst still preserving the
> special struct page fields that are specific to zone device pages.
> 
> A tail page is distinguished by having bit zero being set in
> page->compound_head, with the remaining bits pointing to the head
> page. For zone device pages page->compound_head is shared with
> page->pgmap.
> 
> The page->pgmap field is common to all pages within a memory section.
> Therefore pgmap is the same for both head and tail pages and can be
> moved into the folio and we can use the standard scheme to find
> compound_head from a tail page.
> 
> Signed-off-by: Alistair Popple <apopple@...dia.com>
> Reviewed-by: Jason Gunthorpe <jgg@...dia.com>
> 
> ---
> 
> Changes since v1:
> 
>  - Move pgmap to the folio as suggested by Matthew Wilcox
> ---
>  drivers/gpu/drm/nouveau/nouveau_dmem.c |  3 ++-
>  drivers/pci/p2pdma.c                   |  6 +++---
>  include/linux/memremap.h               |  6 +++---
>  include/linux/migrate.h                |  4 ++--
>  include/linux/mm_types.h               |  9 +++++++--
>  include/linux/mmzone.h                 |  8 +++++++-
>  lib/test_hmm.c                         |  3 ++-
>  mm/hmm.c                               |  2 +-
>  mm/memory.c                            |  4 +++-
>  mm/memremap.c                          | 14 +++++++-------
>  mm/migrate_device.c                    |  7 +++++--
>  mm/mm_init.c                           |  2 +-
>  12 files changed, 43 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> index 6fb65b0..58d308c 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> @@ -88,7 +88,8 @@ struct nouveau_dmem {
>  
>  static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page)
>  {
> -	return container_of(page->pgmap, struct nouveau_dmem_chunk, pagemap);
> +	return container_of(page_dev_pagemap(page), struct nouveau_dmem_chunk,

page_dev_pagemap() feels like a mouthful. I would be ok with
page_pgmap() since that is the most common idenifier for struct
struct dev_pagemap instances.

> +			    pagemap);
>  }
>  
>  static struct nouveau_drm *page_to_drm(struct page *page)
> diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
> index 210b9f4..a58f2c1 100644
> --- a/drivers/pci/p2pdma.c
> +++ b/drivers/pci/p2pdma.c
> @@ -199,7 +199,7 @@ static const struct attribute_group p2pmem_group = {
>  
>  static void p2pdma_page_free(struct page *page)
>  {
> -	struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page->pgmap);
> +	struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page_dev_pagemap(page));
>  	/* safe to dereference while a reference is held to the percpu ref */
>  	struct pci_p2pdma *p2pdma =
>  		rcu_dereference_protected(pgmap->provider->p2pdma, 1);
> @@ -1022,8 +1022,8 @@ enum pci_p2pdma_map_type
>  pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
>  		       struct scatterlist *sg)
>  {
> -	if (state->pgmap != sg_page(sg)->pgmap) {
> -		state->pgmap = sg_page(sg)->pgmap;
> +	if (state->pgmap != page_dev_pagemap(sg_page(sg))) {
> +		state->pgmap = page_dev_pagemap(sg_page(sg));
>  		state->map = pci_p2pdma_map_type(state->pgmap, dev);
>  		state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset;
>  	}
> diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> index 3f7143a..14273e6 100644
> --- a/include/linux/memremap.h
> +++ b/include/linux/memremap.h
> @@ -161,7 +161,7 @@ static inline bool is_device_private_page(const struct page *page)
>  {
>  	return IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
>  		is_zone_device_page(page) &&
> -		page->pgmap->type == MEMORY_DEVICE_PRIVATE;
> +		page_dev_pagemap(page)->type == MEMORY_DEVICE_PRIVATE;
>  }
>  
>  static inline bool folio_is_device_private(const struct folio *folio)
> @@ -173,13 +173,13 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
>  {
>  	return IS_ENABLED(CONFIG_PCI_P2PDMA) &&
>  		is_zone_device_page(page) &&
> -		page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
> +		page_dev_pagemap(page)->type == MEMORY_DEVICE_PCI_P2PDMA;
>  }
>  
>  static inline bool is_device_coherent_page(const struct page *page)
>  {
>  	return is_zone_device_page(page) &&
> -		page->pgmap->type == MEMORY_DEVICE_COHERENT;
> +		page_dev_pagemap(page)->type == MEMORY_DEVICE_COHERENT;
>  }
>  
>  static inline bool folio_is_device_coherent(const struct folio *folio)
> diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> index 002e49b..9a85a82 100644
> --- a/include/linux/migrate.h
> +++ b/include/linux/migrate.h
> @@ -207,8 +207,8 @@ struct migrate_vma {
>  	unsigned long		end;
>  
>  	/*
> -	 * Set to the owner value also stored in page->pgmap->owner for
> -	 * migrating out of device private memory. The flags also need to
> +	 * Set to the owner value also stored in page_dev_pagemap(page)->owner
> +	 * for migrating out of device private memory. The flags also need to
>  	 * be set to MIGRATE_VMA_SELECT_DEVICE_PRIVATE.
>  	 * The caller should always set this field when using mmu notifier
>  	 * callbacks to avoid device MMU invalidations for device private
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 6e3bdf8..c2f1d53 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -129,8 +129,11 @@ struct page {
>  			unsigned long compound_head;	/* Bit zero is set */
>  		};
>  		struct {	/* ZONE_DEVICE pages */
> -			/** @pgmap: Points to the hosting device page map. */
> -			struct dev_pagemap *pgmap;
> +			/*
> +			 * The first word is used for compound_head or folio
> +			 * pgmap
> +			 */
> +			void *_unused;

I would feel better with "_unused_pgmap_compound_head", similar to how
_unused_slab_obj_exts in 'struct foio' indicates the placeholer
contents.

>  			void *zone_device_data;
>  			/*
>  			 * ZONE_DEVICE private pages are counted as being
> @@ -299,6 +302,7 @@ typedef struct {
>   * @_refcount: Do not access this member directly.  Use folio_ref_count()
>   *    to find how many references there are to this folio.
>   * @memcg_data: Memory Control Group data.
> + * @pgmap: Metadata for ZONE_DEVICE mappings
>   * @virtual: Virtual address in the kernel direct map.
>   * @_last_cpupid: IDs of last CPU and last process that accessed the folio.
>   * @_entire_mapcount: Do not use directly, call folio_entire_mapcount().
> @@ -337,6 +341,7 @@ struct folio {
>  	/* private: */
>  				};
>  	/* public: */
> +			struct dev_pagemap *pgmap;
>  			};
>  			struct address_space *mapping;
>  			pgoff_t index;
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 17506e4..e191434 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -1134,6 +1134,12 @@ static inline bool is_zone_device_page(const struct page *page)
>  	return page_zonenum(page) == ZONE_DEVICE;
>  }
>  
> +static inline struct dev_pagemap *page_dev_pagemap(const struct page *page)
> +{
> +	WARN_ON(!is_zone_device_page(page));

VM_WARN_ON()?

With the above fixups:

Reviewed-by: Dan Williams <dan.j.williams@...el.com>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ