[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240803094715.23900-3-gourry@gourry.net>
Date: Sat, 3 Aug 2024 05:47:14 -0400
From: Gregory Price <gourry@...rry.net>
To: linux-mm@...ck.org
Cc: linux-kernel@...r.kernel.org,
akpm@...ux-foundation.org,
david@...hat.com,
ying.huang@...el.com,
nphamcs@...il.com,
nehagholkar@...a.com,
abhishekd@...a.com
Subject: [PATCH 2/3] memory: allow non-fault migration in numa_migrate_prep path
numa_migrate_prep and mpol_misplaced presume callers are in the
fault path. To enable migrations from the pagecache, re-using
the same logic to handle migration prep would be preferable.
Mildly refactor numa_migrate_prep and mpol_misplaced so that it
may be called with (vmf=NULL) from non-faulting paths.
Count non-fault events as page-cache numa hints instead of
fault hints.
Signed-off-by: Gregory Price <gourry@...rry.net>
---
include/linux/vm_event_item.h | 1 +
mm/memory.c | 19 ++++++++++---------
mm/mempolicy.c | 25 +++++++++++++++++--------
mm/vmstat.c | 1 +
4 files changed, 29 insertions(+), 17 deletions(-)
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 747943bc8cc2..b917bcfffe6d 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -62,6 +62,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
NUMA_HUGE_PTE_UPDATES,
NUMA_HINT_FAULTS,
NUMA_HINT_FAULTS_LOCAL,
+ NUMA_HINT_PAGE_CACHE,
NUMA_PAGE_MIGRATE,
#endif
#ifdef CONFIG_MIGRATION
diff --git a/mm/memory.c b/mm/memory.c
index 1d97bdfd0ed6..8b664b08915c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5205,15 +5205,16 @@ static vm_fault_t do_fault(struct vm_fault *vmf)
int numa_migrate_prep(struct folio *folio, struct vm_fault *vmf,
unsigned long addr, int page_nid, int *flags)
{
- struct vm_area_struct *vma = vmf->vma;
-
- /* Record the current PID acceesing VMA */
- vma_set_access_pid_bit(vma);
-
- count_vm_numa_event(NUMA_HINT_FAULTS);
- if (page_nid == numa_node_id()) {
- count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
- *flags |= TNF_FAULT_LOCAL;
+ /* If accessed in fault path, record the current PID acceesing VMA */
+ if (vmf) {
+ vma_set_access_pid_bit(vmf->vma);
+ count_vm_numa_event(NUMA_HINT_FAULTS);
+ if (page_nid == numa_node_id()) {
+ count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
+ *flags |= TNF_FAULT_LOCAL;
+ }
+ } else {
+ count_vm_numa_event(NUMA_HINT_PAGE_CACHE);
}
return mpol_misplaced(folio, vmf, addr);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index b858e22b259d..0f654aff477a 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2726,12 +2726,16 @@ static void sp_free(struct sp_node *n)
* mpol_misplaced - check whether current folio node is valid in policy
*
* @folio: folio to be checked
- * @vmf: structure describing the fault
+ * @vmf: structure describing the fault (NULL if called outside fault path)
* @addr: virtual address in @vma for shared policy lookup and interleave policy
+ * Ignored if vmf is NULL.
*
* Lookup current policy node id for vma,addr and "compare to" folio's
- * node id. Policy determination "mimics" alloc_page_vma().
- * Called from fault path where we know the vma and faulting address.
+ * node id - or task's policy node id if vmf is NULL. Policy determination
+ * "mimics" alloc_page_vma()
+ *
+ * vmf must be non-NULL if called from fault path where we know the vma and
+ * faulting address. The PTL must be held by caller if vmf is not NULL.
*
* Return: NUMA_NO_NODE if the page is in a node that is valid for this
* policy, or a suitable node ID to allocate a replacement folio from.
@@ -2743,7 +2747,6 @@ int mpol_misplaced(struct folio *folio, struct vm_fault *vmf,
pgoff_t ilx;
struct zoneref *z;
int curnid = folio_nid(folio);
- struct vm_area_struct *vma = vmf->vma;
int thiscpu = raw_smp_processor_id();
int thisnid = numa_node_id();
int polnid = NUMA_NO_NODE;
@@ -2753,18 +2756,24 @@ int mpol_misplaced(struct folio *folio, struct vm_fault *vmf,
* Make sure ptl is held so that we don't preempt and we
* have a stable smp processor id
*/
- lockdep_assert_held(vmf->ptl);
- pol = get_vma_policy(vma, addr, folio_order(folio), &ilx);
+ if (vmf) {
+ lockdep_assert_held(vmf->ptl);
+ pol = get_vma_policy(vmf->vma, addr, folio_order(folio), &ilx);
+ } else
+ pol = get_task_policy(current);
+
if (!(pol->flags & MPOL_F_MOF))
goto out;
switch (pol->mode) {
case MPOL_INTERLEAVE:
- polnid = interleave_nid(pol, ilx);
+ polnid = vmf ? interleave_nid(pol, ilx) :
+ interleave_nodes(pol);
break;
case MPOL_WEIGHTED_INTERLEAVE:
- polnid = weighted_interleave_nid(pol, ilx);
+ polnid = vmf ? weighted_interleave_nid(pol, ilx) :
+ weighted_interleave_nodes(pol);
break;
case MPOL_PREFERRED:
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 04a1cb6cc636..5a02e6ff043d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1316,6 +1316,7 @@ const char * const vmstat_text[] = {
"numa_huge_pte_updates",
"numa_hint_faults",
"numa_hint_faults_local",
+ "numa_hint_page_cache",
"numa_pages_migrated",
#endif
#ifdef CONFIG_MIGRATION
--
2.43.0
Powered by blists - more mailing lists