[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <66ef6c5ebd068_109ae294a3@dwillia2-mobl3.amr.corp.intel.com.notmuch>
Date: Sun, 22 Sep 2024 03:01:18 +0200
From: Dan Williams <dan.j.williams@...el.com>
To: Alistair Popple <apopple@...dia.com>, <dan.j.williams@...el.com>,
<linux-mm@...ck.org>
CC: Alistair Popple <apopple@...dia.com>, <vishal.l.verma@...el.com>,
<dave.jiang@...el.com>, <logang@...tatee.com>, <bhelgaas@...gle.com>,
<jack@...e.cz>, <jgg@...pe.ca>, <catalin.marinas@....com>, <will@...nel.org>,
<mpe@...erman.id.au>, <npiggin@...il.com>, <dave.hansen@...ux.intel.com>,
<ira.weiny@...el.com>, <willy@...radead.org>, <djwong@...nel.org>,
<tytso@....edu>, <linmiaohe@...wei.com>, <david@...hat.com>,
<peterx@...hat.com>, <linux-doc@...r.kernel.org>,
<linux-kernel@...r.kernel.org>, <linux-arm-kernel@...ts.infradead.org>,
<linuxppc-dev@...ts.ozlabs.org>, <nvdimm@...ts.linux.dev>,
<linux-cxl@...r.kernel.org>, <linux-fsdevel@...r.kernel.org>,
<linux-ext4@...r.kernel.org>, <linux-xfs@...r.kernel.org>,
<jhubbard@...dia.com>, <hch@....de>, <david@...morbit.com>, Jason Gunthorpe
<jgg@...dia.com>
Subject: Re: [PATCH 04/12] mm: Allow compound zone device pages
Alistair Popple wrote:
> Zone device pages are used to represent various type of device memory
> managed by device drivers. Currently compound zone device pages are
> not supported. This is because MEMORY_DEVICE_FS_DAX pages are the only
> user of higher order zone device pages and have their own page
> reference counting.
>
> A future change will unify FS DAX reference counting with normal page
> reference counting rules and remove the special FS DAX reference
> counting. Supporting that requires compound zone device pages.
>
> Supporting compound zone device pages requires compound_head() to
> distinguish between head and tail pages whilst still preserving the
> special struct page fields that are specific to zone device pages.
>
> A tail page is distinguished by having bit zero being set in
> page->compound_head, with the remaining bits pointing to the head
> page. For zone device pages page->compound_head is shared with
> page->pgmap.
>
> The page->pgmap field is common to all pages within a memory section.
> Therefore pgmap is the same for both head and tail pages and can be
> moved into the folio and we can use the standard scheme to find
> compound_head from a tail page.
>
> Signed-off-by: Alistair Popple <apopple@...dia.com>
> Reviewed-by: Jason Gunthorpe <jgg@...dia.com>
>
> ---
>
> Changes since v1:
>
> - Move pgmap to the folio as suggested by Matthew Wilcox
> ---
> drivers/gpu/drm/nouveau/nouveau_dmem.c | 3 ++-
> drivers/pci/p2pdma.c | 6 +++---
> include/linux/memremap.h | 6 +++---
> include/linux/migrate.h | 4 ++--
> include/linux/mm_types.h | 9 +++++++--
> include/linux/mmzone.h | 8 +++++++-
> lib/test_hmm.c | 3 ++-
> mm/hmm.c | 2 +-
> mm/memory.c | 4 +++-
> mm/memremap.c | 14 +++++++-------
> mm/migrate_device.c | 7 +++++--
> mm/mm_init.c | 2 +-
> 12 files changed, 43 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> index 6fb65b0..58d308c 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
> @@ -88,7 +88,8 @@ struct nouveau_dmem {
>
> static struct nouveau_dmem_chunk *nouveau_page_to_chunk(struct page *page)
> {
> - return container_of(page->pgmap, struct nouveau_dmem_chunk, pagemap);
> + return container_of(page_dev_pagemap(page), struct nouveau_dmem_chunk,
page_dev_pagemap() feels like a mouthful. I would be ok with
page_pgmap() since that is the most common idenifier for struct
struct dev_pagemap instances.
> + pagemap);
> }
>
> static struct nouveau_drm *page_to_drm(struct page *page)
> diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
> index 210b9f4..a58f2c1 100644
> --- a/drivers/pci/p2pdma.c
> +++ b/drivers/pci/p2pdma.c
> @@ -199,7 +199,7 @@ static const struct attribute_group p2pmem_group = {
>
> static void p2pdma_page_free(struct page *page)
> {
> - struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page->pgmap);
> + struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page_dev_pagemap(page));
> /* safe to dereference while a reference is held to the percpu ref */
> struct pci_p2pdma *p2pdma =
> rcu_dereference_protected(pgmap->provider->p2pdma, 1);
> @@ -1022,8 +1022,8 @@ enum pci_p2pdma_map_type
> pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev,
> struct scatterlist *sg)
> {
> - if (state->pgmap != sg_page(sg)->pgmap) {
> - state->pgmap = sg_page(sg)->pgmap;
> + if (state->pgmap != page_dev_pagemap(sg_page(sg))) {
> + state->pgmap = page_dev_pagemap(sg_page(sg));
> state->map = pci_p2pdma_map_type(state->pgmap, dev);
> state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset;
> }
> diff --git a/include/linux/memremap.h b/include/linux/memremap.h
> index 3f7143a..14273e6 100644
> --- a/include/linux/memremap.h
> +++ b/include/linux/memremap.h
> @@ -161,7 +161,7 @@ static inline bool is_device_private_page(const struct page *page)
> {
> return IS_ENABLED(CONFIG_DEVICE_PRIVATE) &&
> is_zone_device_page(page) &&
> - page->pgmap->type == MEMORY_DEVICE_PRIVATE;
> + page_dev_pagemap(page)->type == MEMORY_DEVICE_PRIVATE;
> }
>
> static inline bool folio_is_device_private(const struct folio *folio)
> @@ -173,13 +173,13 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
> {
> return IS_ENABLED(CONFIG_PCI_P2PDMA) &&
> is_zone_device_page(page) &&
> - page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
> + page_dev_pagemap(page)->type == MEMORY_DEVICE_PCI_P2PDMA;
> }
>
> static inline bool is_device_coherent_page(const struct page *page)
> {
> return is_zone_device_page(page) &&
> - page->pgmap->type == MEMORY_DEVICE_COHERENT;
> + page_dev_pagemap(page)->type == MEMORY_DEVICE_COHERENT;
> }
>
> static inline bool folio_is_device_coherent(const struct folio *folio)
> diff --git a/include/linux/migrate.h b/include/linux/migrate.h
> index 002e49b..9a85a82 100644
> --- a/include/linux/migrate.h
> +++ b/include/linux/migrate.h
> @@ -207,8 +207,8 @@ struct migrate_vma {
> unsigned long end;
>
> /*
> - * Set to the owner value also stored in page->pgmap->owner for
> - * migrating out of device private memory. The flags also need to
> + * Set to the owner value also stored in page_dev_pagemap(page)->owner
> + * for migrating out of device private memory. The flags also need to
> * be set to MIGRATE_VMA_SELECT_DEVICE_PRIVATE.
> * The caller should always set this field when using mmu notifier
> * callbacks to avoid device MMU invalidations for device private
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 6e3bdf8..c2f1d53 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -129,8 +129,11 @@ struct page {
> unsigned long compound_head; /* Bit zero is set */
> };
> struct { /* ZONE_DEVICE pages */
> - /** @pgmap: Points to the hosting device page map. */
> - struct dev_pagemap *pgmap;
> + /*
> + * The first word is used for compound_head or folio
> + * pgmap
> + */
> + void *_unused;
I would feel better with "_unused_pgmap_compound_head", similar to how
_unused_slab_obj_exts in 'struct foio' indicates the placeholer
contents.
> void *zone_device_data;
> /*
> * ZONE_DEVICE private pages are counted as being
> @@ -299,6 +302,7 @@ typedef struct {
> * @_refcount: Do not access this member directly. Use folio_ref_count()
> * to find how many references there are to this folio.
> * @memcg_data: Memory Control Group data.
> + * @pgmap: Metadata for ZONE_DEVICE mappings
> * @virtual: Virtual address in the kernel direct map.
> * @_last_cpupid: IDs of last CPU and last process that accessed the folio.
> * @_entire_mapcount: Do not use directly, call folio_entire_mapcount().
> @@ -337,6 +341,7 @@ struct folio {
> /* private: */
> };
> /* public: */
> + struct dev_pagemap *pgmap;
> };
> struct address_space *mapping;
> pgoff_t index;
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 17506e4..e191434 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -1134,6 +1134,12 @@ static inline bool is_zone_device_page(const struct page *page)
> return page_zonenum(page) == ZONE_DEVICE;
> }
>
> +static inline struct dev_pagemap *page_dev_pagemap(const struct page *page)
> +{
> + WARN_ON(!is_zone_device_page(page));
VM_WARN_ON()?
With the above fixups:
Reviewed-by: Dan Williams <dan.j.williams@...el.com>
Powered by blists - more mailing lists