lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250612181330.31236-5-bijan311@gmail.com>
Date: Thu, 12 Jun 2025 13:13:30 -0500
From: Bijan Tabatabai <bijan311@...il.com>
To: damon@...ts.linux.com,
	linux-mm@...ck.org,
	linux-doc@...r.kernel.org,
	linux-kernel@...r.kernel.org
Cc: sj@...nel.org,
	akpm@...ux-foundation.org,
	corbet@....net,
	david@...hat.com,
	ziy@...dia.com,
	matthew.brost@...el.com,
	joshua.hahnjy@...il.com,
	rakie.kim@...com,
	byungchul@...com,
	gourry@...rry.net,
	ying.huang@...ux.alibaba.com,
	apopple@...dia.com,
	bijantabatab@...ron.com,
	venkataravis@...ron.com,
	emirakhur@...ron.com,
	ajayjoshi@...ron.com,
	vtavarespetr@...ron.com
Subject: [RFC PATCH 4/4] mm/damon/vaddr: Add vaddr version of DAMOS_INTERLEAVE

From: Bijan Tabatabai <bijantabatab@...ron.com>

This patch adds a vaddr implementation of the DAMOS_INTERLEAVE action.

Below is an example of its usage where pages are initially interleaved at
a 1:1 ratio and then changed to be interleaved at a 2:1 ratio. The
alloc_data program simply allocates 1GB of data then sleeps.
  $ echo 1 | sudo tee /sys/kernel/mm/mempolicy/weighted_interleave/node0
  $ echo 1 | sudo tee /sys/kernel/mm/mempolicy/weighted_interleave/node1
  $ numactl -w 0,1 ./alloc_data 1G&
  [1] 11447
  $ cat interleave_vaddr.yaml
  kdamonds:
  - state: null
    pid: null
    contexts:
    - ops: vaddr
      targets:
      - pid: 11447
        regions: []
      intervals:
        sample_us: 200 ms
        aggr_us: 5 s
        ops_update_us: 10 s
      nr_regions:
        min: 200
        max: 500
      schemes:
      - action: interleave
        access_pattern:
          sz_bytes:
            min: 0 B
            max: max
          nr_accesses:
            min: 0 %
            max: 100 %
          age:
            min: 0 ns
            max: max

  $ sudo ./damo/damo start interleave_vaddr.yaml
  $ numastat -c -p 11447

  Per-node process memory usage (in MBs) for PID 11447 (alloc_data)
           Node 0 Node 1 Total
           ------ ------ -----
  Huge          0      0     0
  Heap          0      0     0
  Stack         0      0     0
  Private     514    514  1027
  -------  ------ ------ -----
  Total       514    514  1027
  $ echo 2 | sudo tee /sys/kernel/mm/mempolicy/weighted_interleave/node0
  $ numastat -c -p 11447

  Per-node process memory usage (in MBs) for PID 11447 (alloc_data)
           Node 0 Node 1 Total
           ------ ------ -----
  Huge          0      0     0
  Heap          0      0     0
  Stack         0      0     0
  Private     684    343  1027
  -------  ------ ------ -----
  Total       684    343  1027

Signed-off-by: Bijan Tabatabai <bijantabatab@...ron.com>
---
 Documentation/mm/damon/design.rst |   2 +-
 mm/damon/ops-common.c             |  13 +++
 mm/damon/ops-common.h             |   2 +
 mm/damon/paddr.c                  |  11 +--
 mm/damon/vaddr.c                  | 135 ++++++++++++++++++++++++++++++
 5 files changed, 155 insertions(+), 8 deletions(-)

diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst
index c50d2105cea0..a79ba62f820b 100644
--- a/Documentation/mm/damon/design.rst
+++ b/Documentation/mm/damon/design.rst
@@ -456,7 +456,7 @@ that supports each action are as below.
  - ``migrate_cold``: Migrate the regions prioritizing colder regions.
    Supported by ``paddr`` operations set.
  - ``interleave``: Interleave the regions according to the weighted interleave weights.
-   Supported by ``paddr`` operations set.
+   Supported by ``vaddr``, ``fvaddr`` and ``paddr`` operations set.
  - ``stat``: Do nothing but count the statistics.
    Supported by all operations sets.
 
diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c
index 2c4fb274b7f6..59d92404fc8f 100644
--- a/mm/damon/ops-common.c
+++ b/mm/damon/ops-common.c
@@ -261,3 +261,16 @@ unsigned long damon_migrate_pages(struct list_head *folio_list,
 
 	return nr_migrated;
 }
+
+int damon_interleave_target_nid(unsigned long addr, struct vm_area_struct *vma,
+		struct mempolicy *pol, struct folio *folio)
+{
+	pgoff_t ilx;
+	int target_nid;
+
+	ilx = vma->vm_pgoff >> folio_order(folio);
+	ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + folio_order(folio));
+	policy_nodemask(0, pol, ilx, &target_nid);
+
+	return target_nid;
+}
diff --git a/mm/damon/ops-common.h b/mm/damon/ops-common.h
index 54209a7e70e6..bacb4de92dc9 100644
--- a/mm/damon/ops-common.h
+++ b/mm/damon/ops-common.h
@@ -18,3 +18,5 @@ int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
 			struct damos *s);
 
 unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid);
+int damon_interleave_target_nid(unsigned long addr, struct vm_area_struct *vma,
+			struct mempolicy *pol, struct folio *folio);
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 722d69f26e37..93e3c72b54c7 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -415,7 +415,7 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s,
 }
 
 #if defined(CONFIG_MEMCG) && defined(CONFIG_NUMA)
-struct damos_interleave_private {
+struct damos_pa_interleave_private {
 	struct list_head *folio_migration_list;
 	bool putback_lru;
 };
@@ -425,9 +425,8 @@ static bool damon_pa_interleave_rmap(struct folio *folio, struct vm_area_struct
 {
 	struct mempolicy *pol;
 	struct task_struct *task;
-	pgoff_t ilx;
 	int target_nid;
-	struct damos_interleave_private *priv = arg;
+	struct damos_pa_interleave_private *priv = arg;
 
 	task = rcu_dereference(vma->vm_mm->owner);
 	if (!task)
@@ -443,9 +442,7 @@ static bool damon_pa_interleave_rmap(struct folio *folio, struct vm_area_struct
 		return true;
 	}
 
-	ilx = vma->vm_pgoff >> folio_order(folio);
-	ilx += (addr - vma->vm_start) >> (PAGE_SHIFT + folio_order(folio));
-	policy_nodemask(0, pol, ilx, &target_nid);
+	target_nid = damon_interleave_target_nid(addr, vma, pol, folio);
 
 	if (target_nid != NUMA_NO_NODE && folio_nid(folio) != target_nid) {
 		list_add(&folio->lru, &priv->folio_migration_list[target_nid]);
@@ -459,7 +456,7 @@ static bool damon_pa_interleave_rmap(struct folio *folio, struct vm_area_struct
 static unsigned long damon_pa_interleave(struct damon_region *r, struct damos *s,
 		unsigned long *sz_filter_passed)
 {
-	struct damos_interleave_private priv;
+	struct damos_pa_interleave_private priv;
 	struct rmap_walk_control rwc;
 	unsigned long addr, applied;
 	struct folio *folio;
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 46554e49a478..1d1170f49317 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -9,12 +9,14 @@
 
 #include <linux/highmem.h>
 #include <linux/hugetlb.h>
+#include <linux/mempolicy.h>
 #include <linux/mman.h>
 #include <linux/mmu_notifier.h>
 #include <linux/page_idle.h>
 #include <linux/pagewalk.h>
 #include <linux/sched/mm.h>
 
+#include "../internal.h"
 #include "ops-common.h"
 
 #ifdef CONFIG_DAMON_VADDR_KUNIT_TEST
@@ -653,6 +655,137 @@ static unsigned long damos_madvise(struct damon_target *target,
 }
 #endif	/* CONFIG_ADVISE_SYSCALLS */
 
+#ifdef CONFIG_NUMA
+struct damos_va_interleave_private {
+	struct list_head *folio_migration_list;
+	struct mempolicy *pol;
+};
+
+static void damos_va_interleave_folio(unsigned long addr, struct folio *folio,
+	struct vm_area_struct *vma, struct damos_va_interleave_private *priv)
+{
+	int target_nid;
+
+	if (!folio_isolate_lru(folio))
+		return;
+
+	target_nid = damon_interleave_target_nid(addr, vma, priv->pol, folio);
+
+	if (target_nid != NUMA_NO_NODE && folio_nid(folio) != target_nid)
+		list_add(&folio->lru, &priv->folio_migration_list[target_nid]);
+	else
+		folio_putback_lru(folio);
+
+}
+
+static int damos_va_interleave_pmd(pmd_t *pmd, unsigned long addr,
+		unsigned long next, struct mm_walk *walk)
+{
+	struct damos_va_interleave_private *priv = walk->private;
+	struct folio *folio;
+	spinlock_t *ptl;
+	pmd_t pmde;
+
+	ptl = pmd_lock(walk->mm, pmd);
+	pmde = pmdp_get(pmd);
+
+	if (!pmd_present(pmde) || !pmd_trans_huge(pmde))
+		goto unlock;
+
+	folio = damon_get_folio(pmd_pfn(pmde));
+	if (!folio)
+		goto unlock;
+
+	damos_va_interleave_folio(addr, folio, walk->vma, priv);
+
+	folio_put(folio);
+unlock:
+	spin_unlock(ptl);
+	return 0;
+}
+
+static int damos_va_interleave_pte(pte_t *pte, unsigned long addr,
+		unsigned long next, struct mm_walk *walk)
+{
+	struct damos_va_interleave_private *priv = walk->private;
+	struct folio *folio;
+
+	if (pte_none(*pte) || !pte_present(*pte))
+		return 0;
+
+	folio = vm_normal_folio(walk->vma, addr, *pte);
+	if (!folio)
+		return 0;
+	folio_get(folio);
+
+	damos_va_interleave_folio(addr, folio, walk->vma, priv);
+
+	folio_put(folio);
+	return 0;
+}
+
+static unsigned long damos_va_interleave(struct damon_target *target,
+		struct damon_region *r, struct damos *s)
+{
+	struct damos_va_interleave_private priv;
+	struct task_struct *task;
+	struct mm_struct *mm;
+	int ret;
+	unsigned long applied = 0;
+	struct mm_walk_ops walk_ops = {
+		.pmd_entry = damos_va_interleave_pmd,
+		.pte_entry = damos_va_interleave_pte,
+	};
+
+	task = damon_get_task_struct(target);
+	if (!task)
+		return 0;
+
+	priv.pol = get_task_policy(task);
+	if (!priv.pol)
+		goto put_task;
+
+	if (priv.pol->mode != MPOL_WEIGHTED_INTERLEAVE)
+		goto put_pol;
+
+	priv.folio_migration_list = kmalloc_array(nr_node_ids, sizeof(struct list_head),
+		GFP_KERNEL);
+	if (!priv.folio_migration_list)
+		goto put_pol;
+
+	for (int i = 0; i < nr_node_ids; i++)
+		INIT_LIST_HEAD(&priv.folio_migration_list[i]);
+
+	mm = damon_get_mm(target);
+	if (!mm)
+		goto free_folio_list;
+
+	mmap_read_lock(mm);
+	ret = walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
+	mmap_read_unlock(mm);
+	mmput(mm);
+
+	for (int i = 0; i < nr_node_ids; i++) {
+		applied += damon_migrate_pages(&priv.folio_migration_list[i], i);
+		cond_resched();
+	}
+
+free_folio_list:
+	kfree(priv.folio_migration_list);
+put_pol:
+	mpol_cond_put(priv.pol);
+put_task:
+	put_task_struct(task);
+	return applied * PAGE_SIZE;
+}
+#else
+static unsigned long damos_va_interleave(struct damon_target *target,
+		struct damon_region *r, struct damos *s)
+{
+	return 0;
+}
+#endif /* CONFIG_NUMA */
+
 static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
 		struct damon_target *t, struct damon_region *r,
 		struct damos *scheme, unsigned long *sz_filter_passed)
@@ -675,6 +808,8 @@ static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
 	case DAMOS_NOHUGEPAGE:
 		madv_action = MADV_NOHUGEPAGE;
 		break;
+	case DAMOS_INTERLEAVE:
+		return damos_va_interleave(t, r, scheme);
 	case DAMOS_STAT:
 		return 0;
 	default:
-- 
2.43.5


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ