[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250512123424.637989-3-david@redhat.com>
Date: Mon, 12 May 2025 14:34:15 +0200
From: David Hildenbrand <david@...hat.com>
To: linux-kernel@...r.kernel.org
Cc: linux-mm@...ck.org,
x86@...nel.org,
intel-gfx@...ts.freedesktop.org,
dri-devel@...ts.freedesktop.org,
linux-trace-kernel@...r.kernel.org,
David Hildenbrand <david@...hat.com>,
Dave Hansen <dave.hansen@...ux.intel.com>,
Andy Lutomirski <luto@...nel.org>,
Peter Zijlstra <peterz@...radead.org>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>,
Borislav Petkov <bp@...en8.de>,
"H. Peter Anvin" <hpa@...or.com>,
Jani Nikula <jani.nikula@...ux.intel.com>,
Joonas Lahtinen <joonas.lahtinen@...ux.intel.com>,
Rodrigo Vivi <rodrigo.vivi@...el.com>,
Tvrtko Ursulin <tursulin@...ulin.net>,
David Airlie <airlied@...il.com>,
Simona Vetter <simona@...ll.ch>,
Andrew Morton <akpm@...ux-foundation.org>,
Steven Rostedt <rostedt@...dmis.org>,
Masami Hiramatsu <mhiramat@...nel.org>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
"Liam R. Howlett" <Liam.Howlett@...cle.com>,
Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
Vlastimil Babka <vbabka@...e.cz>,
Jann Horn <jannh@...gle.com>,
Pedro Falcato <pfalcato@...e.de>,
Peter Xu <peterx@...hat.com>,
Ingo Molnar <mingo@...nel.org>
Subject: [PATCH v2 02/11] mm: convert track_pfn_insert() to pfnmap_setup_cachemode*()
... by factoring it out from track_pfn_remap() into
pfnmap_setup_cachemode() and provide pfnmap_setup_cachemode_pfn() as
a replacement for track_pfn_insert().
For PMDs/PUDs, we keep checking a single pfn only. Add some documentation,
and also document why it is valid to not check the whole pfn range.
We'll reuse pfnmap_setup_cachemode() from core MM next.
Acked-by: Ingo Molnar <mingo@...nel.org> # x86 bits
Signed-off-by: David Hildenbrand <david@...hat.com>
---
arch/x86/mm/pat/memtype.c | 24 ++++++------------
include/linux/pgtable.h | 52 +++++++++++++++++++++++++++++++++------
mm/huge_memory.c | 5 ++--
mm/memory.c | 4 +--
4 files changed, 57 insertions(+), 28 deletions(-)
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index edec5859651d6..fa78facc6f633 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -1031,7 +1031,6 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long addr, unsigned long size)
{
resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
- enum page_cache_mode pcm;
/* reserve the whole chunk starting from paddr */
if (!vma || (addr == vma->vm_start
@@ -1044,13 +1043,17 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
return ret;
}
+ return pfnmap_setup_cachemode(pfn, size, prot);
+}
+
+int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size, pgprot_t *prot)
+{
+ resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
+ enum page_cache_mode pcm;
+
if (!pat_enabled())
return 0;
- /*
- * For anything smaller than the vma size we set prot based on the
- * lookup.
- */
pcm = lookup_memtype(paddr);
/* Check memtype for the remaining pages */
@@ -1065,17 +1068,6 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
return 0;
}
-void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn)
-{
- enum page_cache_mode pcm;
-
- if (!pat_enabled())
- return;
-
- pcm = lookup_memtype(pfn_t_to_phys(pfn));
- pgprot_set_cachemode(prot, pcm);
-}
-
/*
* untrack_pfn is called while unmapping a pfnmap for a region.
* untrack can be called for a specific region indicated by pfn and size or
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index f1e890b604609..be1745839871c 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1496,13 +1496,10 @@ static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
return 0;
}
-/*
- * track_pfn_insert is called when a _new_ single pfn is established
- * by vmf_insert_pfn().
- */
-static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
- pfn_t pfn)
+static inline int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
+ pgprot_t *prot)
{
+ return 0;
}
/*
@@ -1552,8 +1549,32 @@ static inline void untrack_pfn_clear(struct vm_area_struct *vma)
extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long addr,
unsigned long size);
-extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
- pfn_t pfn);
+
+/**
+ * pfnmap_setup_cachemode - setup the cachemode in the pgprot for a pfn range
+ * @pfn: the start of the pfn range
+ * @size: the size of the pfn range in bytes
+ * @prot: the pgprot to modify
+ *
+ * Lookup the cachemode for the pfn range starting at @pfn with the size
+ * @size and store it in @prot, leaving other data in @prot unchanged.
+ *
+ * This allows for a hardware implementation to have fine-grained control of
+ * memory cache behavior at page level granularity. Without a hardware
+ * implementation, this function does nothing.
+ *
+ * Currently there is only one implementation for this - x86 Page Attribute
+ * Table (PAT). See Documentation/arch/x86/pat.rst for more details.
+ *
+ * This function can fail if the pfn range spans pfns that require differing
+ * cachemodes. If the pfn range was previously verified to have a single
+ * cachemode, it is sufficient to query only a single pfn. The assumption is
+ * that this is the case for drivers using the vmf_insert_pfn*() interface.
+ *
+ * Returns 0 on success and -EINVAL on error.
+ */
+int pfnmap_setup_cachemode(unsigned long pfn, unsigned long size,
+ pgprot_t *prot);
extern int track_pfn_copy(struct vm_area_struct *dst_vma,
struct vm_area_struct *src_vma, unsigned long *pfn);
extern void untrack_pfn_copy(struct vm_area_struct *dst_vma,
@@ -1563,6 +1584,21 @@ extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
extern void untrack_pfn_clear(struct vm_area_struct *vma);
#endif
+/**
+ * pfnmap_setup_cachemode_pfn - setup the cachemode in the pgprot for a pfn
+ * @pfn: the pfn
+ * @prot: the pgprot to modify
+ *
+ * Lookup the cachemode for @pfn and store it in @prot, leaving other
+ * data in @prot unchanged.
+ *
+ * See pfnmap_setup_cachemode() for details.
+ */
+static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot)
+{
+ pfnmap_setup_cachemode(pfn, PAGE_SIZE, prot);
+}
+
#ifdef CONFIG_MMU
#ifdef __HAVE_COLOR_ZERO_PAGE
static inline int is_zero_pfn(unsigned long pfn)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2780a12b25f01..d3e66136e41a3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1455,7 +1455,8 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write)
return VM_FAULT_OOM;
}
- track_pfn_insert(vma, &pgprot, pfn);
+ pfnmap_setup_cachemode_pfn(pfn_t_to_pfn(pfn), &pgprot);
+
ptl = pmd_lock(vma->vm_mm, vmf->pmd);
error = insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write,
pgtable);
@@ -1577,7 +1578,7 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write)
if (addr < vma->vm_start || addr >= vma->vm_end)
return VM_FAULT_SIGBUS;
- track_pfn_insert(vma, &pgprot, pfn);
+ pfnmap_setup_cachemode_pfn(pfn_t_to_pfn(pfn), &pgprot);
ptl = pud_lock(vma->vm_mm, vmf->pud);
insert_pfn_pud(vma, addr, vmf->pud, pfn, write);
diff --git a/mm/memory.c b/mm/memory.c
index 99af83434e7c5..064fc55d8eab9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2564,7 +2564,7 @@ vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
if (!pfn_modify_allowed(pfn, pgprot))
return VM_FAULT_SIGBUS;
- track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV));
+ pfnmap_setup_cachemode_pfn(pfn, &pgprot);
return insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot,
false);
@@ -2627,7 +2627,7 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma,
if (addr < vma->vm_start || addr >= vma->vm_end)
return VM_FAULT_SIGBUS;
- track_pfn_insert(vma, &pgprot, pfn);
+ pfnmap_setup_cachemode_pfn(pfn_t_to_pfn(pfn), &pgprot);
if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot))
return VM_FAULT_SIGBUS;
--
2.49.0
Powered by blists - more mailing lists