lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230522110849.2921-8-urezki@gmail.com>
Date:   Mon, 22 May 2023 13:08:47 +0200
From:   "Uladzislau Rezki (Sony)" <urezki@...il.com>
To:     linux-mm@...ck.org, Andrew Morton <akpm@...ux-foundation.org>
Cc:     LKML <linux-kernel@...r.kernel.org>, Baoquan He <bhe@...hat.com>,
        Lorenzo Stoakes <lstoakes@...il.com>,
        Christoph Hellwig <hch@...radead.org>,
        Matthew Wilcox <willy@...radead.org>,
        "Liam R . Howlett" <Liam.Howlett@...cle.com>,
        Dave Chinner <david@...morbit.com>,
        "Paul E . McKenney" <paulmck@...nel.org>,
        Joel Fernandes <joel@...lfernandes.org>,
        Uladzislau Rezki <urezki@...il.com>,
        Oleksiy Avramchenko <oleksiy.avramchenko@...y.com>
Subject: [PATCH 7/9] mm: vmalloc: Insert lazy-VA per-cpu zone

Similar to busy VAs, lazy ones are stored per a CPU zone
also. Freed address is converted into a correct zone it
belongs to and resides there for further handling.

Such approach does not require to have any global locking
primitive, instead an access becomes scalable to number of
CPUs.

This patch removes a global purge-lock, global purge-tree
and list.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@...il.com>
---
 mm/vmalloc.c | 127 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 71 insertions(+), 56 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index dd83deb5ef4f..fe993c0561dd 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -734,10 +734,6 @@ static DEFINE_SPINLOCK(free_vmap_area_lock);
 LIST_HEAD(vmap_area_list);
 static bool vmap_initialized __read_mostly;
 
-static struct rb_root purge_vmap_area_root = RB_ROOT;
-static LIST_HEAD(purge_vmap_area_list);
-static DEFINE_SPINLOCK(purge_vmap_area_lock);
-
 /*
  * This kmem_cache is used for vmap_area objects. Instead of
  * allocating from slab we reuse an object from this cache to
@@ -1792,39 +1788,17 @@ static DEFINE_MUTEX(vmap_purge_lock);
 /* for per-CPU blocks */
 static void purge_fragmented_blocks_allcpus(void);
 
-/*
- * Purges all lazily-freed vmap areas.
- */
-static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
+static unsigned long
+purge_cpu_vmap_zone(struct cpu_vmap_zone *z)
 {
-	unsigned long resched_threshold;
-	unsigned int num_purged_areas = 0;
-	struct list_head local_purge_list;
+	unsigned long num_purged_areas = 0;
 	struct vmap_area *va, *n_va;
 
-	lockdep_assert_held(&vmap_purge_lock);
-
-	spin_lock(&purge_vmap_area_lock);
-	purge_vmap_area_root = RB_ROOT;
-	list_replace_init(&purge_vmap_area_list, &local_purge_list);
-	spin_unlock(&purge_vmap_area_lock);
-
-	if (unlikely(list_empty(&local_purge_list)))
+	if (list_empty(&z->purge_list))
 		goto out;
 
-	start = min(start,
-		list_first_entry(&local_purge_list,
-			struct vmap_area, list)->va_start);
-
-	end = max(end,
-		list_last_entry(&local_purge_list,
-			struct vmap_area, list)->va_end);
-
-	flush_tlb_kernel_range(start, end);
-	resched_threshold = lazy_max_pages() << 1;
-
 	spin_lock(&free_vmap_area_lock);
-	list_for_each_entry_safe(va, n_va, &local_purge_list, list) {
+	list_for_each_entry_safe(va, n_va, &z->purge_list, list) {
 		unsigned long nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
 		unsigned long orig_start = va->va_start;
 		unsigned long orig_end = va->va_end;
@@ -1846,13 +1820,57 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
 
 		atomic_long_sub(nr, &vmap_lazy_nr);
 		num_purged_areas++;
-
-		if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
-			cond_resched_lock(&free_vmap_area_lock);
 	}
 	spin_unlock(&free_vmap_area_lock);
 
 out:
+	return num_purged_areas;
+}
+
+/*
+ * Purges all lazily-freed vmap areas.
+ */
+static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
+{
+	unsigned long num_purged_areas = 0;
+	struct cpu_vmap_zone *z;
+	int need_purge = 0;
+	int i;
+
+	lockdep_assert_held(&vmap_purge_lock);
+
+	for_each_possible_cpu(i) {
+		z = per_cpu_ptr(&cpu_vmap_zone, i);
+		INIT_LIST_HEAD(&z->purge_list);
+
+		if (RB_EMPTY_ROOT(&fbl_root(z, LAZY)))
+			continue;
+
+		fbl_lock(z, LAZY);
+		WRITE_ONCE(fbl(z, LAZY, root.rb_node), NULL);
+		list_replace_init(&fbl_head(z, LAZY), &z->purge_list);
+		fbl_unlock(z, LAZY);
+
+		start = min(start,
+			list_first_entry(&z->purge_list,
+				struct vmap_area, list)->va_start);
+
+		end = max(end,
+			list_last_entry(&z->purge_list,
+				struct vmap_area, list)->va_end);
+
+		need_purge++;
+	}
+
+	if (need_purge) {
+		flush_tlb_kernel_range(start, end);
+
+		for_each_possible_cpu(i) {
+			z = per_cpu_ptr(&cpu_vmap_zone, i);
+			num_purged_areas += purge_cpu_vmap_zone(z);
+		}
+	}
+
 	trace_purge_vmap_area_lazy(start, end, num_purged_areas);
 	return num_purged_areas > 0;
 }
@@ -1870,16 +1888,9 @@ static void purge_vmap_area_lazy(void)
 
 static void drain_vmap_area_work(struct work_struct *work)
 {
-	unsigned long nr_lazy;
-
-	do {
-		mutex_lock(&vmap_purge_lock);
-		__purge_vmap_area_lazy(ULONG_MAX, 0);
-		mutex_unlock(&vmap_purge_lock);
-
-		/* Recheck if further work is required. */
-		nr_lazy = atomic_long_read(&vmap_lazy_nr);
-	} while (nr_lazy > lazy_max_pages());
+	mutex_lock(&vmap_purge_lock);
+	__purge_vmap_area_lazy(ULONG_MAX, 0);
+	mutex_unlock(&vmap_purge_lock);
 }
 
 /*
@@ -1889,6 +1900,7 @@ static void drain_vmap_area_work(struct work_struct *work)
  */
 static void free_vmap_area_noflush(struct vmap_area *va)
 {
+	struct cpu_vmap_zone *z = addr_to_cvz(va->va_start);
 	unsigned long nr_lazy_max = lazy_max_pages();
 	unsigned long va_start = va->va_start;
 	unsigned long nr_lazy;
@@ -1902,10 +1914,9 @@ static void free_vmap_area_noflush(struct vmap_area *va)
 	/*
 	 * Merge or place it to the purge tree/list.
 	 */
-	spin_lock(&purge_vmap_area_lock);
-	merge_or_add_vmap_area(va,
-		&purge_vmap_area_root, &purge_vmap_area_list);
-	spin_unlock(&purge_vmap_area_lock);
+	fbl_lock(z, LAZY);
+	merge_or_add_vmap_area(va, &fbl_root(z, LAZY), &fbl_head(z, LAZY));
+	fbl_unlock(z, LAZY);
 
 	trace_free_vmap_area_noflush(va_start, nr_lazy, nr_lazy_max);
 
@@ -4199,17 +4210,21 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
 
 static void show_purge_info(struct seq_file *m)
 {
+	struct cpu_vmap_zone *z;
 	struct vmap_area *va;
+	int i;
 
-	mutex_lock(&vmap_purge_lock);
-	spin_lock(&purge_vmap_area_lock);
-	list_for_each_entry(va, &purge_vmap_area_list, list) {
-		seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
-			(void *)va->va_start, (void *)va->va_end,
-			va->va_end - va->va_start);
+	for_each_possible_cpu(i) {
+		z = per_cpu_ptr(&cpu_vmap_zone, i);
+
+		fbl_lock(z, LAZY);
+		list_for_each_entry(va, &fbl_head(z, LAZY), list) {
+			seq_printf(m, "0x%pK-0x%pK %7ld unpurged vm_area\n",
+				(void *)va->va_start, (void *)va->va_end,
+				va->va_end - va->va_start);
+		}
+		fbl_unlock(z, LAZY);
 	}
-	spin_unlock(&purge_vmap_area_lock);
-	mutex_unlock(&vmap_purge_lock);
 }
 
 static int s_show(struct seq_file *m, void *p)
-- 
2.30.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ