lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190630075650.8516-6-lpf.vector@gmail.com>
Date:   Sun, 30 Jun 2019 15:56:50 +0800
From:   Pengfei Li <lpf.vector@...il.com>
To:     akpm@...ux-foundation.org, peterz@...radead.org, urezki@...il.com
Cc:     rpenyaev@...e.de, mhocko@...e.com, guro@...com,
        aryabinin@...tuozzo.com, rppt@...ux.ibm.com, mingo@...nel.org,
        rick.p.edgecombe@...el.com, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org, Pengfei Li <lpf.vector@...il.com>
Subject: [PATCH 5/5] mm/vmalloc.c: Rewrite struct vmap_area to reduce its size

Members of struct vmap_area are not being used at the same time.
For example,
(1)members @flags, @purge_list, and @vm are unused when vmap area is
in the *FREE* tree;

(2)members @subtree_max_size and @purge_list are unused when vmap area is
in the *BUSY* tree and not in the purge list;

(3)members @subtree_max_size and @vm are unused when vmap area is
in the *BUSY* tree and in the purge list.

Since members @subtree_max_size, @purge_list and @vm are not used
at the same time, so they can be placed in a union to reduce the
size of struct vmap_area.

Besides, rename @flags to @_vm_valid to indicate if @vm is valid.
The reason why @_vm_valid can be placed in a union with @va_start
is that if @vm is valid, then @va_start can be known by @vm.

Signed-off-by: Pengfei Li <lpf.vector@...il.com>
---
 include/linux/vmalloc.h | 28 ++++++++++----
 mm/vmalloc.c            | 85 +++++++++++++++++++++++++++++++----------
 2 files changed, 85 insertions(+), 28 deletions(-)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 51e131245379..7b99de5ccbec 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -48,18 +48,30 @@ struct vm_struct {
 };
 
 struct vmap_area {
-	unsigned long va_start;
+	union {
+		unsigned long va_start;
+		/*
+		 * Determine whether vm is valid according to
+		 * the value of _vm_valid
+		 */
+		unsigned long _vm_valid;
+	};
+
 	unsigned long va_end;
 
-	/*
-	 * Largest available free size in subtree.
-	 */
-	unsigned long subtree_max_size;
-	unsigned long flags;
+	union {
+		/* Only used when vmap area in *FREE* vmap_area tree */
+		unsigned long subtree_max_size;
+
+		/* Only used when vmap area in vmap_purge_list */
+		struct llist_node purge_list;
+
+		/* Only used when va_vm_is_valid() return true */
+		struct vm_struct *vm;
+	};
+
 	struct rb_node rb_node;         /* address sorted rbtree */
 	struct list_head list;          /* address sorted list */
-	struct llist_node purge_list;    /* "lazy purge" list */
-	struct vm_struct *vm;
 };
 
 /*
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 4148d6fdfb6d..89b93ee0ec04 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -329,8 +329,28 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
 #define DEBUG_AUGMENT_PROPAGATE_CHECK 0
 #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
 
-#define VM_LAZY_FREE	0x02
-#define VM_VM_AREA	0x04
+#define VA_VM_VALID	0x01UL
+
+static __always_inline void
+__va_set_vm(struct vmap_area *va, struct vm_struct *vm)
+{
+	/* Overwrite va->va_start by va->_vm_valid */
+	va->_vm_valid = VA_VM_VALID;
+	va->vm = vm;
+}
+
+static __always_inline void
+__va_unset_vm(struct vmap_area *va)
+{
+	/* Restore va->va_start and overwrite va->_vm_valid */
+	va->va_start = (unsigned long)va->vm->addr;
+}
+
+static __always_inline bool
+va_vm_is_valid(struct vmap_area *va)
+{
+	return (va->_vm_valid == VA_VM_VALID);
+}
 
 static DEFINE_SPINLOCK(vmap_area_lock);
 /* Export for kexec only */
@@ -399,15 +419,26 @@ static void purge_vmap_area_lazy(void);
 static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
 static unsigned long lazy_max_pages(void);
 
+/*
+ * Search the *BUSY* vmap_area tree. If va_vm_is_valid() return true,
+ * then va->va_start has been overwritten by va->_vm_valid,
+ * otherwise va->va_start remains the original value.
+ */
 static struct vmap_area *__search_va_from_busy_tree(unsigned long addr)
 {
 	struct rb_node *n = vmap_area_root.rb_node;
 
 	while (n) {
 		struct vmap_area *va;
+		unsigned long start;
 
 		va = rb_entry(n, struct vmap_area, rb_node);
-		if (addr < va->va_start)
+		if (va_vm_is_valid(va))
+			start = (unsigned long)va->vm->addr;
+		else
+			start = va->va_start;
+
+		if (addr < start)
 			n = n->rb_left;
 		else if (addr >= va->va_end)
 			n = n->rb_right;
@@ -429,8 +460,13 @@ find_va_links(struct vmap_area *va,
 {
 	struct vmap_area *tmp_va;
 	struct rb_node **link;
+	unsigned long start;
+	bool is_busy_va_tree = false;
 
 	if (root) {
+		if (root == &vmap_area_root)
+			is_busy_va_tree = true;
+
 		link = &root->rb_node;
 		if (unlikely(!*link)) {
 			*parent = NULL;
@@ -447,6 +483,10 @@ find_va_links(struct vmap_area *va,
 	 */
 	do {
 		tmp_va = rb_entry(*link, struct vmap_area, rb_node);
+		if (is_busy_va_tree && va_vm_is_valid(tmp_va))
+			start = (unsigned long)tmp_va->vm->addr;
+		else
+			start = tmp_va->va_start;
 
 		/*
 		 * During the traversal we also do some sanity check.
@@ -454,9 +494,9 @@ find_va_links(struct vmap_area *va,
 		 * or full overlaps.
 		 */
 		if (va->va_start < tmp_va->va_end &&
-				va->va_end <= tmp_va->va_start)
+				va->va_end <= start)
 			link = &(*link)->rb_left;
-		else if (va->va_end > tmp_va->va_start &&
+		else if (va->va_end > start &&
 				va->va_start >= tmp_va->va_end)
 			link = &(*link)->rb_right;
 		else
@@ -1079,8 +1119,8 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 
 	va->va_start = addr;
 	va->va_end = addr + size;
-	va->flags = 0;
 	insert_va_to_busy_tree(va);
+	va->vm = NULL;
 
 	spin_unlock(&vmap_area_lock);
 
@@ -1872,11 +1912,11 @@ void __init vmalloc_init(void)
 		if (WARN_ON_ONCE(!va))
 			continue;
 
-		va->flags = VM_VM_AREA;
 		va->va_start = (unsigned long)tmp->addr;
 		va->va_end = va->va_start + tmp->size;
-		va->vm = tmp;
 		insert_va_to_busy_tree(va);
+
+		__va_set_vm(va, tmp);
 	}
 
 	/*
@@ -1969,8 +2009,9 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
 	vm->addr = (void *)va->va_start;
 	vm->size = va->va_end - va->va_start;
 	vm->caller = caller;
-	va->vm = vm;
-	va->flags |= VM_VM_AREA;
+
+	__va_set_vm(va, vm);
+
 	spin_unlock(&vmap_area_lock);
 }
 
@@ -2075,7 +2116,7 @@ struct vm_struct *find_vm_area(const void *addr)
 	struct vmap_area *va;
 
 	va = find_vmap_area((unsigned long)addr);
-	if (va && va->flags & VM_VM_AREA)
+	if (va && va_vm_is_valid(va))
 		return va->vm;
 
 	return NULL;
@@ -2098,13 +2139,15 @@ struct vm_struct *remove_vm_area(const void *addr)
 	might_sleep();
 
 	va = find_vmap_area((unsigned long)addr);
-	if (va && va->flags & VM_VM_AREA) {
+	if (va && va_vm_is_valid(va)) {
 		struct vm_struct *vm = va->vm;
 
 		spin_lock(&vmap_area_lock);
-		va->vm = NULL;
-		va->flags &= ~VM_VM_AREA;
-		va->flags |= VM_LAZY_FREE;
+		/*
+		 * Call __va_unset_vm() to restore the value of va->va_start
+		 * before calling free_unmap_vmap_area() to add it to purge list
+		 */
+		__va_unset_vm(va);
 		spin_unlock(&vmap_area_lock);
 
 		kasan_free_shadow(vm);
@@ -2813,7 +2856,7 @@ long vread(char *buf, char *addr, unsigned long count)
 		if (!count)
 			break;
 
-		if (!(va->flags & VM_VM_AREA))
+		if (!va_vm_is_valid(va))
 			continue;
 
 		vm = va->vm;
@@ -2893,7 +2936,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
 		if (!count)
 			break;
 
-		if (!(va->flags & VM_VM_AREA))
+		if (!va_vm_is_valid(va))
 			continue;
 
 		vm = va->vm;
@@ -3407,14 +3450,16 @@ static int s_show(struct seq_file *m, void *p)
 	va = list_entry(p, struct vmap_area, list);
 
 	/*
-	 * s_show can encounter race with remove_vm_area, !VM_VM_AREA on
+	 * s_show can encounter race with remove_vm_area, !va_vm_is_valid() on
 	 * behalf of vmap area is being tear down or vm_map_ram allocation.
+	 * And if va->vm != NULL then vmap area is being tear down,
+	 * otherwise vmap area is allocated by vm_map_ram().
 	 */
-	if (!(va->flags & VM_VM_AREA)) {
+	if (!va_vm_is_valid(va)) {
 		seq_printf(m, "0x%pK-0x%pK %7ld %s\n",
 			(void *)va->va_start, (void *)va->va_end,
 			va->va_end - va->va_start,
-			va->flags & VM_LAZY_FREE ? "unpurged vm_area" : "vm_map_ram");
+			va->vm ? "unpurged vm_area" : "vm_map_ram");
 
 		return 0;
 	}
-- 
2.21.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ