[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251009015839.3460231-6-samuel.holland@sifive.com>
Date: Wed, 8 Oct 2025 18:57:41 -0700
From: Samuel Holland <samuel.holland@...ive.com>
To: Palmer Dabbelt <palmer@...belt.com>,
Paul Walmsley <pjw@...nel.org>,
linux-riscv@...ts.infradead.org
Cc: devicetree@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-mm@...ck.org,
Conor Dooley <conor@...nel.org>,
Alexandre Ghiti <alex@...ti.fr>,
Emil Renner Berthing <kernel@...il.dk>,
Andrew Morton <akpm@...ux-foundation.org>,
Rob Herring <robh+dt@...nel.org>,
Krzysztof Kozlowski <krzk+dt@...nel.org>,
Samuel Holland <samuel.holland@...ive.com>
Subject: [PATCH v2 05/18] mm: Allow page table accessors to be non-idempotent
Currently, some functions such as pte_offset_map() are passed both
pointers to hardware page tables, and pointers to previously-read PMD
entries on the stack. To ensure correctness in the first case, these
functions must use the page table accessor function (pmdp_get()) to
dereference the supplied pointer. However, this means pmdp_get() is
called twice in the second case. This double call must be avoided if
pmdp_get() applies some non-idempotent transformation to the value.
Avoid the double transformation by calling set_pmd() on the stack
variables where necessary to keep set_pmd()/pmdp_get() calls balanced.
Signed-off-by: Samuel Holland <samuel.holland@...ive.com>
---
Changes in v2:
- New patch for v2
kernel/events/core.c | 2 ++
mm/gup.c | 3 +++
mm/khugepaged.c | 6 ++++--
mm/page_table_check.c | 3 +++
mm/pgtable-generic.c | 2 ++
5 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 952ba4e3d8815..d75be3d9e0405 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8142,6 +8142,8 @@ static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr)
if (pmd_leaf(pmd))
return pmd_leaf_size(pmd);
+ /* transform pmd as if &pmd pointed to a hardware page table */
+ set_pmd(&pmd, pmd);
ptep = pte_offset_map(&pmd, addr);
if (!ptep)
goto again;
diff --git a/mm/gup.c b/mm/gup.c
index f5676a3aa525d..34d1b59bd59c1 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2865,7 +2865,10 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
int ret = 0;
pte_t *ptep, *ptem;
+ /* transform pmd as if &pmd pointed to a hardware page table */
+ set_pmd(&pmd, pmd);
ptem = ptep = pte_offset_map(&pmd, addr);
+ pmd = pmdp_get(&pmd);
if (!ptep)
return 0;
do {
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index acc620158696e..d8c22ef8406bd 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1731,7 +1731,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
struct mmu_notifier_range range;
struct mm_struct *mm;
unsigned long addr;
- pmd_t *pmd, pgt_pmd;
+ pmd_t *pmd, pgt_pmd, pmdval;
spinlock_t *pml;
spinlock_t *ptl;
bool success = false;
@@ -1784,7 +1784,9 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
*/
if (check_pmd_state(pmd) != SCAN_SUCCEED)
goto drop_pml;
- ptl = pte_lockptr(mm, pmd);
+ /* pte_lockptr() needs a value, not a pointer to a page table */
+ pmdval = pmdp_get(pmd);
+ ptl = pte_lockptr(mm, &pmdval);
if (ptl != pml)
spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 31f4c39d20ef9..77d6688db0de9 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -260,7 +260,10 @@ void __page_table_check_pte_clear_range(struct mm_struct *mm,
return;
if (!pmd_bad(pmd) && !pmd_leaf(pmd)) {
+ /* transform pmd as if &pmd pointed to a hardware page table */
+ set_pmd(&pmd, pmd);
pte_t *ptep = pte_offset_map(&pmd, addr);
+ pmd = pmdp_get(&pmd);
unsigned long i;
if (WARN_ON(!ptep))
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 63a573306bfa2..6602deb002f10 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -299,6 +299,8 @@ pte_t *___pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp)
pmd_clear_bad(pmd);
goto nomap;
}
+ /* transform pmdval as if &pmdval pointed to a hardware page table */
+ set_pmd(&pmdval, pmdval);
return __pte_map(&pmdval, addr);
nomap:
rcu_read_unlock();
--
2.47.2
Powered by blists - more mailing lists