[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20260206151424734QIyWL_pA-1QeJPbJlUxsO@zte.com.cn>
Date: Fri, 6 Feb 2026 15:14:24 +0800 (CST)
From: <xu.xin16@....com.cn>
To: <david@...nel.org>
Cc: <akpm@...ux-foundation.org>, <chengming.zhou@...ux.dev>,
<hughd@...gle.com>, <wang.yaxin@....com.cn>, <yang.yang29@....com.cn>,
<linux-mm@...ck.org>, <linux-kernel@...r.kernel.org>
Subject: [Reproducer]: [PATCH 2/2] ksm: Optimize rmap_walk_ksm by passing a suitable address range
Hi,
This is a simple demo reproducer for the high delay of rmap_walk_ksm which uses mprotect()
to split so many VMAs from a large VMA, and these VMA shares the same anon_vma.
Reproducing steps:
On a Linux machine with 1GB or 4GB memory, doing as follows:
1 Compile:
gcc test_ksm_rmap.c -o test_ksm_rmap -lpthread
2 Configure Swap Space, for example we use CONFIG_ZRAM=y:
echo 300M > /sys/block/zram0/disksize;
mkswap /dev/zram0;
swapon /dev/zram0;
echo 150 > /proc/sys/vm/swappiness;
3 Running this test program:
./test_ksm_rmap
4 There are two ways to monitor the rmap_walk_ksm delay.
1) Before running test program (./test_ksm_rmap), you can use Ftrace's function_graph to monitor.
2) you can apply a monitoring sample patch at the end. You can acquire the following data by:
"cat /proc/rmap_walk/delay_max"
/*
* KSM rmap_walk delay reproducer.
*
* The main idea is to make KSM pages scanned by kswapped or kcompactd
* or swapped by kswapd. So do the following steps:
*
* 1) Alloc some same-content pages and trigger ksmd to merge them
* 2) Create another thread and alloc memory gradually to increase memory
* pressure.
* 3) Wait 1 mintutes at maximum.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <pthread.h>
#include <errno.h>
#include <time.h>
#include <signal.h>
#define PAGE_SIZE 4096
#define KSM_PAGES 50001
#define TEST_PATTERN 0xAA
#define WAIT_PRESSURE_TIME 60
#define SWAP_THRESHHOLD_KB 100
#define LOW_MEMORY_THRESH_KB (15 * 1024)
#define KSM_PATH "/sys/kernel/mm/ksm/"
#define KSM_RUN KSM_PATH "run"
#define KSM_PAGES_TO_SCAN KSM_PATH "pages_to_scan"
#define KSM_SLEEP_MILLISECONDS KSM_PATH "sleep_millisecs"
#define KSM_MAX_SHARING KSM_PATH "max_page_sharing"
#define KSM_PAGES_SHARED KSM_PATH "pages_shared"
#define KSM_PAGES_SHARING KSM_PATH "pages_sharing"
static int read_sysfs(const char *path, unsigned long *value)
{
FILE *f = fopen(path, "r");
if (!f) {
perror("fopen");
return -1;
}
if (fscanf(f, "%lu", value) != 1) {
fclose(f);
return -1;
}
fclose(f);
return 0;
}
static int write_sysfs(const char *path, const char *value)
{
FILE *f = fopen(path, "w");
if (!f) {
perror("fopen");
return -1;
}
if (fprintf(f, "%s", value) < 0) {
fclose(f);
return -1;
}
fclose(f);
return 0;
}
static unsigned long get_system_memory_pages()
{
FILE *f = fopen("/proc/meminfo", "r");
if (!f) {
perror("fopen /proc/meminfo");
return 0;
}
unsigned long mem_total_kb = 0;
char line[256];
while (fgets(line, sizeof(line), f)) {
if (strstr(line, "MemTotal:")) {
sscanf(line, "MemTotal: %lu kB", &mem_total_kb);
break;
}
}
fclose(f);
return mem_total_kb / 4;
}
static int configure_ksm(void)
{
printf("Configuring KSM parameters...\n");
if (write_sysfs(KSM_RUN, "1") < 0) {
fprintf(stderr, "Failed to start KSM\n");
return -1;
}
if (write_sysfs(KSM_MAX_SHARING, "10") < 0) {
fprintf(stderr, "Failed to set max_page_sharing\n");
}
if (write_sysfs(KSM_PAGES_TO_SCAN, "2000") < 0) {
fprintf(stderr, "Failed to set pages_to_scan\n");
return -1;
}
if (write_sysfs(KSM_SLEEP_MILLISECONDS, "10") < 0) {
fprintf(stderr, "Failed to set sleep_millisecs\n");
return -1;
}
printf("KSM started, scan speed increased\n");
return 0;
}
static void **allocate_ksm_pages(size_t ksm_pages_number)
{
printf("Allocating %zu KSM pages (%.2f MB)...\n",
ksm_pages_number, (ksm_pages_number * PAGE_SIZE) / (1024.0 * 1024.0));
void *ksm_region = mmap(NULL, PAGE_SIZE * ksm_pages_number, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (!ksm_region) {
perror("mmap ksm region pages");
return NULL;
}
if (madvise(ksm_region, PAGE_SIZE * ksm_pages_number, MADV_MERGEABLE) != 0)
fprintf(stderr, "madvise failed: %s\n", strerror(errno));
for (size_t i = 0; i < ksm_pages_number; i++) {
memset(ksm_region + i * PAGE_SIZE, i, PAGE_SIZE);
((char *)ksm_region)[i * PAGE_SIZE] = TEST_PATTERN;
}
/* Use mprotect to split many VMAs by one vma per page*/
for (size_t i = 0; i < ksm_pages_number; i++) {
if(i % 2 == 0){
int ret = mprotect(ksm_region + i * PAGE_SIZE, PAGE_SIZE, PROT_READ);
if (ret == -1) {
printf("seq:%ld\n",i);
perror("mprotect failed");
}
}
}
return ksm_region;
}
static void free_ksm_pages(void *pages, size_t ksm_pages_number)
{
if (!pages) return;
munmap(pages, PAGE_SIZE * ksm_pages_number);
}
static unsigned long get_available_memory_kb()
{
FILE *f = fopen("/proc/meminfo", "r");
if (!f) {
perror("fopen /proc/meminfo");
return 0;
}
unsigned long mem_available_kb = 0;
char line[256];
while (fgets(line, sizeof(line), f)) {
if (strstr(line, "MemAvailable:")) {
sscanf(line, "MemAvailable: %lu kB", &mem_available_kb);
break;
}
}
fclose(f);
return mem_available_kb;
}
/* Get swap used memory (kb) */
static unsigned long get_swap_used_memory_kb()
{
FILE *f = fopen("/proc/meminfo", "r");
if (!f) {
perror("fopen /proc/meminfo when get swap");
return 0;
}
unsigned long swap_free_kb = 0;
unsigned long swap_total_kb = 0;
char line[256];
while (fgets(line, sizeof(line), f)) {
if (strstr(line, "SwapTotal"))
sscanf(line, "SwapTotal: %lu kB", &swap_total_kb);
if (strstr(line, "SwapFree")) {
sscanf(line, "SwapFree: %lu kB", &swap_free_kb);
break;
}
}
fclose(f);
return (swap_total_kb - swap_free_kb);
}
typedef struct {
size_t max_alloc_times;
void ***pressure_memory_ptr;
volatile int running;
size_t *allocated_pages;
} pressure_args_t;
static void *memory_pressure_thread(void *arg)
{
pressure_args_t *args = (pressure_args_t *)arg;
void **pressure_memory = malloc(args->max_alloc_times * sizeof(void *));
if (!pressure_memory) {
perror("malloc pressure pages array");
return NULL;
}
size_t allocated_times = 0;
size_t allocated_pages = 0;
unsigned long available_memory_kb, current_swap_used;
size_t pages_to_alloc;
while (allocated_times < args->max_alloc_times && args->running) {
available_memory_kb = get_available_memory_kb();
if (available_memory_kb <= LOW_MEMORY_THRESH_KB) {
pages_to_alloc = available_memory_kb / 4;
printf("Now available_memory_kb (%lu) is low, allocation %zu page by page\n", available_memory_kb, pages_to_alloc);
for (size_t i = 0; i < pages_to_alloc; i++) {
/* If SWAP has been trggered ,then task completed! */
if ((current_swap_used = get_swap_used_memory_kb()) > SWAP_THRESHHOLD_KB) {
printf("Swap space %lu kbused, now pressure thread quit\n", current_swap_used);
args->running = 0;
break;
} else if (allocated_times + i >= args->max_alloc_times) {
printf("\n The index allocated_times:%ld, i:%ld excced the limit\n\n", allocated_times, i);
args->running = 0;
break;
} else if (args->running == 0) {
printf("Maybe timeout, pressure thread"
"should quit\n");
}
pressure_memory[allocated_times + i] = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (args->running == 0)
printf("Maybe timeout, pressure thread"
"should quit\n");
memset(pressure_memory[allocated_times + i], (allocated_times + i) % 256, PAGE_SIZE);
if ( i % 100 == 0) {
printf("Now available_memory_kb:%lu, Swap used kb: %lu\n",
get_available_memory_kb(), get_swap_used_memory_kb());
usleep(200000);
}
}
allocated_times += pages_to_alloc;
allocated_pages += pages_to_alloc;
} else {
/* Memeory is enough! alloc a large area */
pages_to_alloc = (available_memory_kb - LOW_MEMORY_THRESH_KB) / 4 + 1;
pressure_memory[allocated_times] = mmap(NULL, pages_to_alloc * PAGE_SIZE,
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
/* force the kernel to alloc physical memory */
memset(pressure_memory[allocated_times], (allocated_times) % 256, pages_to_alloc* PAGE_SIZE);
allocated_times++;
allocated_pages += pages_to_alloc;
printf(" Allocated %zu pressure pages, available memory: %lu KB\n",
allocated_pages, available_memory_kb);
continue;
}
}
printf(" Allocated %zu pressure pages, available memory: %lu KB\n",
allocated_pages, available_memory_kb);
*args->pressure_memory_ptr = pressure_memory;
*args->allocated_pages = allocated_pages;
printf("Memory pressure thread completed allocation, actually allocated %zu pages\n", allocated_pages);
return NULL;
}
static int monitor_ksm_merging(unsigned long *initial_shared)
{
printf("Waiting for KSM page merging...\n");
unsigned long pages_shared = 0;
unsigned long pages_sharing = 0;
unsigned long last_shared = 0;
int stable_count = 0;
int max_wait = 60;
for (int i = 0; i < max_wait; i++) {
if (read_sysfs(KSM_PAGES_SHARED, &pages_shared) < 0)
return -1;
if (read_sysfs(KSM_PAGES_SHARING, &pages_sharing) < 0)
return -1;
printf(" Second %2d: pages_shared = %lu pages_sharing = %lu\n", i, pages_shared, pages_sharing);
if (pages_shared == last_shared) {
stable_count++;
if (stable_count >= 2) {
break;
}
} else {
stable_count = 0;
last_shared = pages_shared;
}
sleep(1);
}
if (initial_shared) {
*initial_shared = pages_shared;
}
printf("KSM merging completed, shared pages: %lu\n", pages_shared);
return 0;
}
static int test_rmap_walk()
{
void **ksm_pages = allocate_ksm_pages(KSM_PAGES);
if (!ksm_pages)
return -1;
unsigned long shared_before_pressure;
if (monitor_ksm_merging(&shared_before_pressure) < 0) {
free_ksm_pages(ksm_pages, KSM_PAGES);
return -1;
}
if (shared_before_pressure == 0) {
printf("Warning: No KSM merging detected!\n");
sleep(15);
free_ksm_pages(ksm_pages, KSM_PAGES);
return -1;
}
printf("\nStarting to create memory pressure to trigger swap or compact...\n");
void **pressure_memory = NULL;
size_t allocated_pressure_memory = 0;
pressure_args_t pressure_args = {
.max_alloc_times = 10000,
.pressure_memory_ptr = &pressure_memory,
.running = 1,
.allocated_pages = &allocated_pressure_memory
};
pthread_t pressure_thread;
if (pthread_create(&pressure_thread, NULL,
memory_pressure_thread, &pressure_args) != 0) {
perror("pthread_create");
free_ksm_pages(ksm_pages, KSM_PAGES);
return -1;
}
int wait_time = WAIT_PRESSURE_TIME;
unsigned long swap_used;
while (wait_time > 0 && pressure_args.running) {
if ((swap_used = get_swap_used_memory_kb()) > SWAP_THRESHHOLD_KB) {
printf("Swap space used (%lu) is > %d kb\n", swap_used, SWAP_THRESHHOLD_KB);
break;
}
sleep(1);
wait_time--;
}
if (!wait_time)
printf("Timeout now quit\n");
pressure_args.running = 0;
printf("Wait pressure_thread exit.\n");
pthread_join(pressure_thread, NULL);
printf("\nDone. Please check ftrace trace result to see how long rmap_walk_ksm...\n");
return 0;
}
/* Get system memory information */
static void print_system_memory_info(void)
{
printf("System memory information:\n");
FILE *f = fopen("/proc/meminfo", "r");
if (!f) {
perror("fopen /proc/meminfo");
return;
}
char line[256];
while (fgets(line, sizeof(line), f)) {
if (strstr(line, "MemTotal:") ||
strstr(line, "MemFree:") ||
strstr(line, "MemAvailable:") ||
strstr(line, "SwapTotal:") ||
strstr(line, "SwapFree:"))
printf(" %s", line);
}
fclose(f);
}
/* Monitor page reclaim statistics in /proc/vmstat */
static void print_vmstat_info(void)
{
printf("VM statistics (relevant items):\n");
FILE *f = fopen("/proc/vmstat", "r");
if (!f) {
perror("fopen /proc/vmstat");
return;
}
char line[256];
while (fgets(line, sizeof(line), f)) {
if (strstr(line, "pgscan") || strstr(line, "pgsteal") || strstr(line, "ksm")
|| strstr(line, "swap"))
printf(" %s", line);
}
fclose(f);
}
int main(int argc, char *argv[])
{
printf("\n========================================\n");
printf("KSM rmap_walk Feature Test Program\n");
printf("========================================\n\n");
if (geteuid() != 0) {
fprintf(stderr, "Error: Root privileges required to run this test program\n");
fprintf(stderr, "Please use: sudo %s\n", argv[0]);
return 1;
}
print_system_memory_info();
print_vmstat_info();
if (configure_ksm() < 0)
return 1;
if (test_rmap_walk() < 0) {
fprintf(stderr, "Test 1 failed\n");
return 1;
}
printf("\nRestoring KSM default settings...\n");
write_sysfs(KSM_PAGES_TO_SCAN, "100");
write_sysfs(KSM_SLEEP_MILLISECONDS, "20");
printf("\nTest completed!\n");
return 0;
}
====================================================================
Subject: [PATCH] Sample monitoring: monitor rmap_walk_ksm() delay
This is a sample patch to monitor rmap_walk_ksm() metrics as shown at
https://lore.kernel.org/all/20260112220143497dgs9w3S7sfdTUNRbflDtb@zte.com.cn/
You can acquire the following data by:
cat /proc/rmap_walk/delay_max
1) Time_ms: Max time for holding anon_vma lock in a single rmap_walk_ksm.
2) Nr_iteration_total: The max times of iterations in a loop of anon_vma_interval_tree_foreach
3) Skip_addr_out_of_range: The max times of skipping due to the first check (vma->vm_start
and vma->vm_end) in a loop of anon_vma_interval_tree_foreach.
4) Skip_mm_mismatch: The max times of skipping due to the second check (rmap_item->mm == vma->vm_mm)
in a loop of anon_vma_interval_tree_foreach.w
---
include/linux/delayacct.h | 26 +++++++++
kernel/delayacct.c | 112 ++++++++++++++++++++++++++++++++++++++
mm/ksm.c | 25 ++++++++-
3 files changed, 160 insertions(+), 3 deletions(-)
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index ecb06f16d22c..398df73dbe75 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -107,6 +107,18 @@ extern void __delayacct_compact_end(void);
extern void __delayacct_wpcopy_start(void);
extern void __delayacct_wpcopy_end(void);
extern void __delayacct_irq(struct task_struct *task, u32 delta);
+struct rmap_walk_call_stats {
+ u64 skip_addr_out_of_range;
+ u64 skip_mm_mismatch;
+ u64 skip_invalid_vma;
+ u64 rmap_one_false;
+ u64 done_true;
+ u64 complete_processed;
+ u64 interval_tree_total;
+};
+
+extern void __delayacct_rmap_start(u64 *start_time);
+extern void __delayacct_rmap_end(u64 start_time, struct rmap_walk_call_stats *stats);
static inline void delayacct_tsk_init(struct task_struct *tsk)
{
@@ -250,6 +262,16 @@ static inline void delayacct_irq(struct task_struct *task, u32 delta)
__delayacct_irq(task, delta);
}
+static inline void delayacct_rmap_start(u64 *start_time)
+{
+ __delayacct_rmap_start(start_time);
+}
+
+static inline void delayacct_rmap_end(u64 start_time, struct rmap_walk_call_stats *stats)
+{
+ __delayacct_rmap_end(start_time, stats);
+}
+
#else
static inline void delayacct_init(void)
{}
@@ -290,6 +312,10 @@ static inline void delayacct_wpcopy_end(void)
{}
static inline void delayacct_irq(struct task_struct *task, u32 delta)
{}
+static inline void delayacct_rmap_start(u64 *start_time)
+{}
+static inline void delayacct_rmap_end(u64 start_time, struct rmap_walk_call_stats *stats)
+{}
#endif /* CONFIG_TASK_DELAY_ACCT */
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 2e55c493c98b..77d0f362d336 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -10,9 +10,14 @@
#include <linux/sched/clock.h>
#include <linux/slab.h>
#include <linux/taskstats.h>
+#include <linux/time.h>
+#include <linux/time64.h>
#include <linux/sysctl.h>
#include <linux/delayacct.h>
#include <linux/module.h>
+#include <linux/sched/debug.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#define UPDATE_DELAY(type) \
do { \
@@ -29,6 +34,16 @@ DEFINE_STATIC_KEY_FALSE(delayacct_key);
int delayacct_on __read_mostly; /* Delay accounting turned on/off */
struct kmem_cache *delayacct_cache;
+/* Global statistics for rmap_walk_ksm lock delay */
+static DEFINE_RAW_SPINLOCK(rmap_stats_lock);
+
+/* Maximum delay statistics */
+static u64 rmap_delay_max __read_mostly = 0;
+static struct timespec64 rmap_delay_max_ts;
+static char rmap_delay_max_comm[TASK_COMM_LEN];
+static struct rmap_walk_call_stats rmap_delay_max_stats;
+
+
static void set_delayacct(bool enabled)
{
if (enabled) {
@@ -318,3 +333,100 @@ void __delayacct_irq(struct task_struct *task, u32 delta)
raw_spin_unlock_irqrestore(&task->delays->lock, flags);
}
+void __delayacct_rmap_start(u64 *start_time)
+{
+ *start_time = ktime_get_ns();
+}
+
+void __delayacct_rmap_end(u64 start_time, struct rmap_walk_call_stats *stats)
+{
+ unsigned long flags;
+ s64 ns;
+ u64 delay_ns;
+
+ if (start_time == 0)
+ return;
+
+ ns = ktime_get_ns() - start_time;
+ if (ns <= 0)
+ return;
+
+ delay_ns = (u64)ns;
+
+ raw_spin_lock_irqsave(&rmap_stats_lock, flags);
+
+ /* Update maximum delay */
+ if (delay_ns > rmap_delay_max) {
+ rmap_delay_max = delay_ns;
+ ktime_get_real_ts64(&rmap_delay_max_ts);
+ memcpy(rmap_delay_max_comm, current->comm, TASK_COMM_LEN);
+ /* Save statistics for this call that produced the max delay */
+ if (stats)
+ rmap_delay_max_stats = *stats;
+ }
+
+ raw_spin_unlock_irqrestore(&rmap_stats_lock, flags);
+}
+
+
+#ifdef CONFIG_PROC_FS
+
+/* Show maximum delay information */
+static int proc_rmap_delay_max_show(struct seq_file *m, void *v)
+{
+ unsigned long flags;
+ u64 max_delay;
+ struct timespec64 ts;
+ char comm[TASK_COMM_LEN];
+ struct rmap_walk_call_stats stats;
+ struct tm tm;
+
+ raw_spin_lock_irqsave(&rmap_stats_lock, flags);
+ max_delay = rmap_delay_max;
+ ts = rmap_delay_max_ts;
+ memcpy(comm, rmap_delay_max_comm, TASK_COMM_LEN);
+ stats = rmap_delay_max_stats;
+ raw_spin_unlock_irqrestore(&rmap_stats_lock, flags);
+
+ /* Convert timestamp to hour:minute:second format */
+ time64_to_tm(ts.tv_sec, 0, &tm);
+
+ seq_printf(m, "max_delay_ns: %llu\n", max_delay);
+ seq_printf(m, "max_delay_ms: %llu\n", max_delay / 1000000ULL);
+ seq_printf(m, "max_delay_ts: %04ld-%02d-%02d %02d:%02d:%02d\n",
+ (long)(tm.tm_year + 1900), tm.tm_mon + 1, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec);
+ seq_printf(m, "max_delay_comm: %s\n", comm);
+ seq_printf(m, "\n");
+ seq_printf(m, "=== Statistics for the call that produced max_delay ===\n");
+ seq_printf(m, "interval_tree_total: %llu\n", stats.interval_tree_total);
+ seq_printf(m, "skip_addr_out_of_range: %llu\n", stats.skip_addr_out_of_range);
+ seq_printf(m, "skip_mm_mismatch: %llu\n", stats.skip_mm_mismatch);
+ seq_printf(m, "skip_invalid_vma: %llu\n", stats.skip_invalid_vma);
+ seq_printf(m, "rmap_one_false: %llu\n", stats.rmap_one_false);
+ seq_printf(m, "done_true: %llu\n", stats.done_true);
+ seq_printf(m, "complete_processed: %llu\n", stats.complete_processed);
+
+ return 0;
+}
+
+static struct proc_dir_entry *rmap_walk_dir;
+
+static int __init proc_rmap_stats_init(void)
+{
+ /* Create /proc/rmap_walk directory */
+ rmap_walk_dir = proc_mkdir("rmap_walk", NULL);
+ if (!rmap_walk_dir) {
+ pr_err("Failed to create /proc/rmap_walk directory\n");
+ return -ENOMEM;
+ }
+
+ /* Create proc files under /proc/rmap_walk/ */
+ proc_create_single("delay_max", 0444, rmap_walk_dir, proc_rmap_delay_max_show);
+
+ return 0;
+}
+fs_initcall(proc_rmap_stats_init);
+
+#endif /* CONFIG_PROC_FS */
+
diff --git a/mm/ksm.c b/mm/ksm.c
index 031c17e4ada6..0f45a8ea9006 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -39,6 +39,7 @@
#include <linux/freezer.h>
#include <linux/oom.h>
#include <linux/numa.h>
+#include <linux/delayacct.h>
#include <linux/pagewalk.h>
#include <asm/tlbflush.h>
@@ -3154,6 +3155,7 @@ void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc)
struct ksm_stable_node *stable_node;
struct ksm_rmap_item *rmap_item;
int search_new_forks = 0;
+ u64 lock_start_time = 0;
VM_BUG_ON_FOLIO(!folio_test_ksm(folio), folio);
@@ -3173,6 +3175,7 @@ void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc)
struct vm_area_struct *vma;
unsigned long addr;
pgoff_t pgoff_start, pgoff_end;
+ struct rmap_walk_call_stats call_stats = {0};
cond_resched();
if (!anon_vma_trylock_read(anon_vma)) {
@@ -3189,35 +3192,51 @@ void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc)
pgoff_start = rmap_item->address >> PAGE_SHIFT;
pgoff_end = pgoff_start + folio_nr_pages(folio) - 1;
+ delayacct_rmap_start(&lock_start_time);
anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root,
pgoff_start, pgoff_end) {
+
+ call_stats.interval_tree_total++;
cond_resched();
vma = vmac->vma;
- if (addr < vma->vm_start || addr >= vma->vm_end)
+ if (addr < vma->vm_start || addr >= vma->vm_end) {
+ call_stats.skip_addr_out_of_range++;
continue;
+ }
/*
* Initially we examine only the vma which covers this
* rmap_item; but later, if there is still work to do,
* we examine covering vmas in other mms: in case they
* were forked from the original since ksmd passed.
*/
- if ((rmap_item->mm == vma->vm_mm) == search_new_forks)
+ if ((rmap_item->mm == vma->vm_mm) == search_new_forks) {
+ call_stats.skip_mm_mismatch++;
continue;
+ }
- if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+ if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) {
+ call_stats.skip_invalid_vma++;
+ delayacct_rmap_end(lock_start_time, &call_stats);
continue;
+ }
if (!rwc->rmap_one(folio, vma, addr, rwc->arg)) {
+ call_stats.rmap_one_false++;
+ delayacct_rmap_end(lock_start_time, &call_stats);
anon_vma_unlock_read(anon_vma);
return;
}
if (rwc->done && rwc->done(folio)) {
+ call_stats.done_true++;
+ delayacct_rmap_end(lock_start_time, &call_stats);
anon_vma_unlock_read(anon_vma);
return;
}
+ call_stats.complete_processed++;
}
+ delayacct_rmap_end(lock_start_time, &call_stats);
anon_vma_unlock_read(anon_vma);
}
if (!search_new_forks++)
--
2.25.1
Powered by blists - more mailing lists