[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250321173729.3175898-7-souravpanda@google.com>
Date: Fri, 21 Mar 2025 17:37:29 +0000
From: Sourav Panda <souravpanda@...gle.com>
To: mathieu.desnoyers@...icios.com, willy@...radead.org, david@...hat.com,
pasha.tatashin@...een.com, rientjes@...gle.com, akpm@...ux-foundation.org,
linux-mm@...ck.org, linux-kernel@...r.kernel.org, weixugc@...gle.com,
gthelen@...gle.com, souravpanda@...gle.com, surenb@...gle.com
Subject: [RFC PATCH 6/6] mm: syscall alternative for SELECTIVE_KSM
Partition can be created or opened using:
int ksm_fd = ksm_open(ksm_name, flag);
name specifies the ksm partition to be created or opened.
flags:
O_CREAT
Create the ksm partition object if it does not exist.
O_EXCL
If O_CREAT was also specified, and a ksm partition object
with the given name already exists, return an error.
Trigger the merge using:
ksm_merge(ksm_fd, pid, start_addr, size);
Limitation: Only supporting x86 syscall_64.
Signed-off-by: Sourav Panda <souravpanda@...gle.com>
---
arch/x86/entry/syscalls/syscall_64.tbl | 3 +-
include/linux/ksm.h | 4 +
mm/ksm.c | 156 ++++++++++++++++++++++++-
3 files changed, 161 insertions(+), 2 deletions(-)
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 5eb708bff1c7..352d747dbe33 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -390,7 +390,8 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
-
+467 common ksm_open sys_ksm_open
+468 common ksm_merge sys_ksm_merge
#
# Due to a historical design error, certain syscalls are numbered differently
# in x32 as compared to native x86_64. These syscalls have numbers 512-547.
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index d73095b5cd96..a94c89403c29 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -14,6 +14,10 @@
#include <linux/rmap.h>
#include <linux/sched.h>
+#include <linux/anon_inodes.h>
+#include <linux/syscalls.h>
+#define MAX_KSM_NAME_LEN 128
+
#ifdef CONFIG_KSM
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags);
diff --git a/mm/ksm.c b/mm/ksm.c
index fd7626d5d8c9..71558120b034 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -147,7 +147,8 @@ struct ksm_scan {
static struct kobject *ksm_base_kobj;
struct partition_kobj {
- struct kobject *kobj;
+ struct kobject *kobj; /* Not required for the syscall interface */
+ char name[MAX_KSM_NAME_LEN];
struct list_head list;
struct rb_root *root_stable_tree;
struct rb_root *root_unstable_tree;
@@ -166,6 +167,106 @@ static struct partition_kobj *find_partition_by_kobj(struct kobject *kobj)
return NULL;
}
+static struct partition_kobj *find_ksm_partition(char *partition_name)
+{
+ struct partition_kobj *partition;
+
+ list_for_each_entry(partition, &partition_list, list) {
+ if (strcmp(partition->name, partition_name) == 0)
+ return partition;
+ }
+ return NULL;
+}
+
+static DEFINE_MUTEX(ksm_partition_lock);
+
+static int ksm_release(struct inode *inode, struct file *file)
+{
+ struct partition_kobj *ksm = file->private_data;
+
+ mutex_lock(&ksm_partition_lock);
+ list_del(&ksm->list);
+ mutex_unlock(&ksm_partition_lock);
+
+ kfree(ksm);
+ return 0;
+}
+
+static const struct file_operations ksm_fops = {
+ .release = ksm_release,
+};
+
+static struct partition_kobj *ksm_create_partition(char *ksm_name)
+{
+ struct partition_kobj *partition;
+ struct rb_root *tree_root;
+
+ partition = kzalloc(sizeof(*partition), GFP_KERNEL);
+ if (!partition)
+ return NULL;
+
+ tree_root = kcalloc(nr_node_ids + nr_node_ids, sizeof(*tree_root),
+ GFP_KERNEL);
+ if (!tree_root)
+ return NULL;
+
+ partition->root_stable_tree = tree_root;
+ partition->root_unstable_tree = tree_root + nr_node_ids;
+ strncpy(partition->name, ksm_name, sizeof(partition->name));
+
+ list_add(&partition->list, &partition_list);
+
+ return partition;
+}
+
+static int ksm_partition_fd(struct partition_kobj *partition)
+{
+ int fd;
+ struct file *file;
+ int ret;
+
+ file = anon_inode_getfile("ksm_partition", &ksm_fops, partition, O_RDWR);
+ if (IS_ERR(file)) {
+ ret = PTR_ERR(file);
+ return ret;
+ }
+
+ fd = get_unused_fd_flags(O_RDWR);
+ if (fd < 0) {
+ fput(file);
+ return fd;
+ }
+ fd_install(fd, file);
+ return fd;
+}
+
+SYSCALL_DEFINE2(ksm_open, const char __user *, ksm_name, int, flags) {
+ char name[MAX_KSM_NAME_LEN];
+ struct partition_kobj *partition;
+ int ret;
+
+ ret = strncpy_from_user(name, ksm_name, sizeof(name));
+ if (ret < 0)
+ return -EFAULT;
+
+ partition = find_ksm_partition(name);
+
+ if (flags & O_EXCL && partition) /* Partition already exists, return error */
+ return -EEXIST;
+
+ if (flags & O_CREAT && !partition) {
+ /* Partition does not exist, but we are allowed to create one */
+ mutex_lock(&ksm_partition_lock);
+ partition = ksm_create_partition(name);
+ mutex_unlock(&ksm_partition_lock);
+ }
+
+ if (!partition)
+ return flags & O_CREAT ? -ENOMEM : -ENOENT;
+
+ return ksm_partition_fd(partition);
+}
+
/**
* struct ksm_stable_node - node of the stable rbtree
* @node: rb node of this ksm page in the stable tree
@@ -4324,6 +4425,59 @@ static int __init ksm_thread_sysfs_init(void)
}
#endif /* CONFIG_SELECTIVE_KSM */
+SYSCALL_DEFINE4(ksm_merge, int, ksm_fd, pid_t, pid, unsigned long, start, size_t, size) {
+ unsigned long end = start + size;
+ struct task_struct *task;
+ struct mm_struct *mm;
+ struct partition_kobj *partition;
+ struct file *file;
+
+ file = fget(ksm_fd);
+ if (!file)
+ return -EBADF;
+
+ partition = file->private_data;
+ if (!partition) {
+ fput(file);
+ return -EINVAL;
+ }
+
+ if (start >= end) {
+ fput(file);
+ return -EINVAL;
+ }
+
+ /* Find the mm_struct */
+ rcu_read_lock();
+ task = find_task_by_vpid(pid);
+ if (!task) {
+ fput(file);
+ rcu_read_unlock();
+ return -ESRCH;
+ }
+
+ get_task_struct(task);
+
+ rcu_read_unlock();
+ mm = get_task_mm(task);
+ put_task_struct(task);
+
+ if (!mm) {
+ fput(file);
+ return -EINVAL;
+ }
+
+ mutex_lock(&ksm_thread_mutex);
+ wait_while_offlining();
+ ksm_sync_merge(mm, start, end, partition);
+ mutex_unlock(&ksm_thread_mutex);
+
+ mmput(mm);
+
+ fput(file);
+ return 0;
+}
+
static int __init ksm_init(void)
{
int err;
--
2.49.0.395.g12beb8f557-goog
Powered by blists - more mailing lists