[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250612181330.31236-5-bijan311@gmail.com>
Date: Thu, 12 Jun 2025 13:13:30 -0500
From: Bijan Tabatabai <bijan311@...il.com>
To: damon@...ts.linux.com,
linux-mm@...ck.org,
linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org
Cc: sj@...nel.org,
akpm@...ux-foundation.org,
corbet@....net,
david@...hat.com,
ziy@...dia.com,
matthew.brost@...el.com,
joshua.hahnjy@...il.com,
rakie.kim@...com,
byungchul@...com,
gourry@...rry.net,
ying.huang@...ux.alibaba.com,
apopple@...dia.com,
bijantabatab@...ron.com,
venkataravis@...ron.com,
emirakhur@...ron.com,
ajayjoshi@...ron.com,
vtavarespetr@...ron.com
Subject: [RFC PATCH 4/4] mm/damon/vaddr: Add vaddr version of DAMOS_INTERLEAVE
From: Bijan Tabatabai <bijantabatab@...ron.com>
This patch adds a vaddr implementation of the DAMOS_INTERLEAVE action.
Below is an example of its usage where pages are initially interleaved at
a 1:1 ratio and then changed to be interleaved at a 2:1 ratio. The
alloc_data program simply allocates 1GB of data then sleeps.
$ echo 1 | sudo tee /sys/kernel/mm/mempolicy/weighted_interleave/node0
$ echo 1 | sudo tee /sys/kernel/mm/mempolicy/weighted_interleave/node1
$ numactl -w 0,1 ./alloc_data 1G&
[1] 11447
$ cat interleave_vaddr.yaml
kdamonds:
- state: null
pid: null
contexts:
- ops: vaddr
targets:
- pid: 11447
regions: []
intervals:
sample_us: 200 ms
aggr_us: 5 s
ops_update_us: 10 s
nr_regions:
min: 200
max: 500
schemes:
- action: interleave
access_pattern:
sz_bytes:
min: 0 B
max: max
nr_accesses:
min: 0 %
max: 100 %
age:
min: 0 ns
max: max
$ sudo ./damo/damo start interleave_vaddr.yaml
$ numastat -c -p 11447
Per-node process memory usage (in MBs) for PID 11447 (alloc_data)
Node 0 Node 1 Total
------ ------ -----
Huge 0 0 0
Heap 0 0 0
Stack 0 0 0
Private 514 514 1027
------- ------ ------ -----
Total 514 514 1027
$ echo 2 | sudo tee /sys/kernel/mm/mempolicy/weighted_interleave/node0
$ numastat -c -p 11447
Per-node process memory usage (in MBs) for PID 11447 (alloc_data)
Node 0 Node 1 Total
------ ------ -----
Huge 0 0 0
Heap 0 0 0
Stack 0 0 0
Private 684 343 1027
------- ------ ------ -----
Total 684 343 1027
Signed-off-by: Bijan Tabatabai <bijantabatab@...ron.com>
---
Documentation/mm/damon/design.rst | 2 +-
mm/damon/ops-common.c | 13 +++
mm/damon/ops-common.h | 2 +
mm/damon/paddr.c | 11 +--
mm/damon/vaddr.c | 135 ++++++++++++++++++++++++++++++
5 files changed, 155 insertions(+), 8 deletions(-)
diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst
index c50d2105cea0..a79ba62f820b 100644
--- a/Documentation/mm/damon/design.rst
+++ b/Documentation/mm/damon/design.rst
@@ -456,7 +456,7 @@ that supports each action are as below.
- ``migrate_cold``: Migrate the regions prioritizing colder regions.
Supported by ``paddr`` operations set.
- ``interleave``: Interleave the regions according to the weighted interleave weights.
- Supported by ``paddr`` operations set.
+ Supported by ``vaddr``, ``fvaddr`` and ``paddr`` operations set.
- ``stat``: Do nothing but count the statistics.
Supported by all operations sets.
diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c
index 2c4fb274b7f6..59d92404fc8f 100644
--- a/mm/damon/ops-common.c
+++ b/mm/damon/ops-common.c
@@ -261,3 +261,16 @@ unsigned long damon_migrate_pages(struct list_head *folio_list,
return nr_migrated;
}
+
+int damon_interleave_target_nid(unsigned long addr, struct vm_area_struct *vma,
+ struct mempolicy *pol, struct folio *folio)
+{
+ pgoff_t ilx;
+ int target_nid;
+
+ ilx = vma->vm_pgoff >> folio_order(folio);
+ ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + folio_order(folio));
+ policy_nodemask(0, pol, ilx, &target_nid);
+
+ return target_nid;
+}
diff --git a/mm/damon/ops-common.h b/mm/damon/ops-common.h
index 54209a7e70e6..bacb4de92dc9 100644
--- a/mm/damon/ops-common.h
+++ b/mm/damon/ops-common.h
@@ -18,3 +18,5 @@ int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
struct damos *s);
unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid);
+int damon_interleave_target_nid(unsigned long addr, struct vm_area_struct *vma,
+ struct mempolicy *pol, struct folio *folio);
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 722d69f26e37..93e3c72b54c7 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -415,7 +415,7 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
}
#if defined(CONFIG_MEMCG) && defined(CONFIG_NUMA)
-struct damos_interleave_private {
+struct damos_pa_interleave_private {
struct list_head *folio_migration_list;
bool putback_lru;
};
@@ -425,9 +425,8 @@ static bool damon_pa_interleave_rmap(struct folio *folio, struct vm_area_struct
{
struct mempolicy *pol;
struct task_struct *task;
- pgoff_t ilx;
int target_nid;
- struct damos_interleave_private *priv = arg;
+ struct damos_pa_interleave_private *priv = arg;
task = rcu_dereference(vma->vm_mm->owner);
if (!task)
@@ -443,9 +442,7 @@ static bool damon_pa_interleave_rmap(struct folio *folio, struct vm_area_struct
return true;
}
- ilx = vma->vm_pgoff >> folio_order(folio);
- ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + folio_order(folio));
- policy_nodemask(0, pol, ilx, &target_nid);
+ target_nid = damon_interleave_target_nid(addr, vma, pol, folio);
if (target_nid != NUMA_NO_NODE && folio_nid(folio) != target_nid) {
list_add(&folio->lru, &priv->folio_migration_list[target_nid]);
@@ -459,7 +456,7 @@ static bool damon_pa_interleave_rmap(struct folio *folio, struct vm_area_struct
static unsigned long damon_pa_interleave(struct damon_region *r, struct damos *s,
unsigned long *sz_filter_passed)
{
- struct damos_interleave_private priv;
+ struct damos_pa_interleave_private priv;
struct rmap_walk_control rwc;
unsigned long addr, applied;
struct folio *folio;
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 46554e49a478..1d1170f49317 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -9,12 +9,14 @@
#include <linux/highmem.h>
#include <linux/hugetlb.h>
+#include <linux/mempolicy.h>
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
#include <linux/pagewalk.h>
#include <linux/sched/mm.h>
+#include "../internal.h"
#include "ops-common.h"
#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
@@ -653,6 +655,137 @@ static unsigned long damos_madvise(struct damon_target *target,
}
#endif /* CONFIG_ADVISE_SYSCALLS */
+#ifdef CONFIG_NUMA
+struct damos_va_interleave_private {
+ struct list_head *folio_migration_list;
+ struct mempolicy *pol;
+};
+
+static void damos_va_interleave_folio(unsigned long addr, struct folio *folio,
+ struct vm_area_struct *vma, struct damos_va_interleave_private *priv)
+{
+ int target_nid;
+
+ if (!folio_isolate_lru(folio))
+ return;
+
+ target_nid = damon_interleave_target_nid(addr, vma, priv->pol, folio);
+
+ if (target_nid != NUMA_NO_NODE && folio_nid(folio) != target_nid)
+ list_add(&folio->lru, &priv->folio_migration_list[target_nid]);
+ else
+ folio_putback_lru(folio);
+
+}
+
+static int damos_va_interleave_pmd(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct damos_va_interleave_private *priv = walk->private;
+ struct folio *folio;
+ spinlock_t *ptl;
+ pmd_t pmde;
+
+ ptl = pmd_lock(walk->mm, pmd);
+ pmde = pmdp_get(pmd);
+
+ if (!pmd_present(pmde) || !pmd_trans_huge(pmde))
+ goto unlock;
+
+ folio = damon_get_folio(pmd_pfn(pmde));
+ if (!folio)
+ goto unlock;
+
+ damos_va_interleave_folio(addr, folio, walk->vma, priv);
+
+ folio_put(folio);
+unlock:
+ spin_unlock(ptl);
+ return 0;
+}
+
+static int damos_va_interleave_pte(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct damos_va_interleave_private *priv = walk->private;
+ struct folio *folio;
+
+ if (pte_none(*pte) || !pte_present(*pte))
+ return 0;
+
+ folio = vm_normal_folio(walk->vma, addr, *pte);
+ if (!folio)
+ return 0;
+ folio_get(folio);
+
+ damos_va_interleave_folio(addr, folio, walk->vma, priv);
+
+ folio_put(folio);
+ return 0;
+}
+
+static unsigned long damos_va_interleave(struct damon_target *target,
+ struct damon_region *r, struct damos *s)
+{
+ struct damos_va_interleave_private priv;
+ struct task_struct *task;
+ struct mm_struct *mm;
+ int ret;
+ unsigned long applied = 0;
+ struct mm_walk_ops walk_ops = {
+ .pmd_entry = damos_va_interleave_pmd,
+ .pte_entry = damos_va_interleave_pte,
+ };
+
+ task = damon_get_task_struct(target);
+ if (!task)
+ return 0;
+
+ priv.pol = get_task_policy(task);
+ if (!priv.pol)
+ goto put_task;
+
+ if (priv.pol->mode != MPOL_WEIGHTED_INTERLEAVE)
+ goto put_pol;
+
+ priv.folio_migration_list = kmalloc_array(nr_node_ids, sizeof(struct list_head),
+ GFP_KERNEL);
+ if (!priv.folio_migration_list)
+ goto put_pol;
+
+ for (int i = 0; i < nr_node_ids; i++)
+ INIT_LIST_HEAD(&priv.folio_migration_list[i]);
+
+ mm = damon_get_mm(target);
+ if (!mm)
+ goto free_folio_list;
+
+ mmap_read_lock(mm);
+ ret = walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
+ mmap_read_unlock(mm);
+ mmput(mm);
+
+ for (int i = 0; i < nr_node_ids; i++) {
+ applied += damon_migrate_pages(&priv.folio_migration_list[i], i);
+ cond_resched();
+ }
+
+free_folio_list:
+ kfree(priv.folio_migration_list);
+put_pol:
+ mpol_cond_put(priv.pol);
+put_task:
+ put_task_struct(task);
+ return applied * PAGE_SIZE;
+}
+#else
+static unsigned long damos_va_interleave(struct damon_target *target,
+ struct damon_region *r, struct damos *s)
+{
+ return 0;
+}
+#endif /* CONFIG_NUMA */
+
static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
struct damon_target *t, struct damon_region *r,
struct damos *scheme, unsigned long *sz_filter_passed)
@@ -675,6 +808,8 @@ static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
case DAMOS_NOHUGEPAGE:
madv_action = MADV_NOHUGEPAGE;
break;
+ case DAMOS_INTERLEAVE:
+ return damos_va_interleave(t, r, scheme);
case DAMOS_STAT:
return 0;
default:
--
2.43.5
Powered by blists - more mailing lists