lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210419225047.3415425-5-dennis@kernel.org>
Date:   Mon, 19 Apr 2021 22:50:47 +0000
From:   Dennis Zhou <dennis@...nel.org>
To:     Tejun Heo <tj@...nel.org>, Christoph Lameter <cl@...ux.com>,
        Roman Gushchin <guro@...com>
Cc:     linux-mm@...ck.org, linux-kernel@...r.kernel.org,
        Dennis Zhou <dennis@...nel.org>
Subject: [PATCH 4/4] percpu: use reclaim threshold instead of running for every page

The last patch implements reclaim by adding 2 additional lists where a
chunk's lifecycle is:
  active_slot -> to_depopulate_slot -> sidelined_slot

This worked great because we're able to nicely converge paths into
isolation. However, it's a bit aggressive to run for every free page.
Let's accumulate a few free pages before we do this. To do this, the new
lifecycle is:
  active_slot -> sidelined_slot -> to_depopulate_slot -> sidelined_slot

The transition from sidelined_slot -> to_depopulate_slot occurs on a
threshold instead of before where it directly went to the
to_depopulate_slot. pcpu_nr_isolated_empty_pop_pages[] is introduced to
aid with this.

Suggested-by: Roman Gushchin <guro@...com>
Signed-off-by: Dennis Zhou <dennis@...nel.org>
---
 mm/percpu-internal.h |  1 +
 mm/percpu-stats.c    |  8 ++++++--
 mm/percpu.c          | 44 +++++++++++++++++++++++++++++++++++++-------
 3 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index 10604dce806f..b3e43b016276 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -92,6 +92,7 @@ extern int pcpu_nr_slots;
 extern int pcpu_sidelined_slot;
 extern int pcpu_to_depopulate_slot;
 extern int pcpu_nr_empty_pop_pages[];
+extern int pcpu_nr_isolated_empty_pop_pages[];
 
 extern struct pcpu_chunk *pcpu_first_chunk;
 extern struct pcpu_chunk *pcpu_reserved_chunk;
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c
index 2125981acfb9..facc804eb86c 100644
--- a/mm/percpu-stats.c
+++ b/mm/percpu-stats.c
@@ -145,7 +145,7 @@ static int percpu_stats_show(struct seq_file *m, void *v)
 	int slot, max_nr_alloc;
 	int *buffer;
 	enum pcpu_chunk_type type;
-	int nr_empty_pop_pages;
+	int nr_empty_pop_pages, nr_isolated_empty_pop_pages;
 
 alloc_buffer:
 	spin_lock_irq(&pcpu_lock);
@@ -167,8 +167,11 @@ static int percpu_stats_show(struct seq_file *m, void *v)
 	}
 
 	nr_empty_pop_pages = 0;
-	for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
+	nr_isolated_empty_pop_pages = 0;
+	for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) {
 		nr_empty_pop_pages += pcpu_nr_empty_pop_pages[type];
+		nr_isolated_empty_pop_pages += pcpu_nr_isolated_empty_pop_pages[type];
+	}
 
 #define PL(X)								\
 	seq_printf(m, "  %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X)
@@ -202,6 +205,7 @@ static int percpu_stats_show(struct seq_file *m, void *v)
 	PU(min_alloc_size);
 	PU(max_alloc_size);
 	P("empty_pop_pages", nr_empty_pop_pages);
+	P("iso_empty_pop_pages", nr_isolated_empty_pop_pages);
 	seq_putc(m, '\n');
 
 #undef PU
diff --git a/mm/percpu.c b/mm/percpu.c
index 79eebc80860d..ba13e683d022 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -110,6 +110,9 @@
 #define PCPU_EMPTY_POP_PAGES_LOW	2
 #define PCPU_EMPTY_POP_PAGES_HIGH	4
 
+/* only schedule reclaim if there are at least N empty pop pages sidelined */
+#define PCPU_EMPTY_POP_RECLAIM_THRESHOLD	4
+
 #ifdef CONFIG_SMP
 /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
 #ifndef __addr_to_pcpu_ptr
@@ -183,6 +186,7 @@ static LIST_HEAD(pcpu_map_extend_chunks);
  * The reserved chunk doesn't contribute to the count.
  */
 int pcpu_nr_empty_pop_pages[PCPU_NR_CHUNK_TYPES];
+int pcpu_nr_isolated_empty_pop_pages[PCPU_NR_CHUNK_TYPES];
 
 /*
  * The number of populated pages in use by the allocator, protected by
@@ -582,8 +586,10 @@ static void pcpu_isolate_chunk(struct pcpu_chunk *chunk)
 	if (!chunk->isolated) {
 		chunk->isolated = true;
 		pcpu_nr_empty_pop_pages[type] -= chunk->nr_empty_pop_pages;
+		pcpu_nr_isolated_empty_pop_pages[type] +=
+			chunk->nr_empty_pop_pages;
+		list_move(&chunk->list, &pcpu_slot[pcpu_sidelined_slot]);
 	}
-	list_move(&chunk->list, &pcpu_slot[pcpu_to_depopulate_slot]);
 }
 
 static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
@@ -595,6 +601,8 @@ static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
 	if (chunk->isolated) {
 		chunk->isolated = false;
 		pcpu_nr_empty_pop_pages[type] += chunk->nr_empty_pop_pages;
+		pcpu_nr_isolated_empty_pop_pages[type] -=
+			chunk->nr_empty_pop_pages;
 		pcpu_chunk_relocate(chunk, -1);
 	}
 }
@@ -610,9 +618,15 @@ static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
  */
 static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr)
 {
+	enum pcpu_chunk_type type = pcpu_chunk_type(chunk);
+
 	chunk->nr_empty_pop_pages += nr;
-	if (chunk != pcpu_reserved_chunk && !chunk->isolated)
-		pcpu_nr_empty_pop_pages[pcpu_chunk_type(chunk)] += nr;
+	if (chunk != pcpu_reserved_chunk) {
+		if (chunk->isolated)
+			pcpu_nr_isolated_empty_pop_pages[type] += nr;
+		else
+			pcpu_nr_empty_pop_pages[type] += nr;
+	}
 }
 
 /*
@@ -2138,10 +2152,13 @@ static void pcpu_reclaim_populated(enum pcpu_chunk_type type)
 	struct list_head *pcpu_slot = pcpu_chunk_list(type);
 	struct pcpu_chunk *chunk;
 	struct pcpu_block_md *block;
+	LIST_HEAD(to_depopulate);
 	int i, end;
 
 	spin_lock_irq(&pcpu_lock);
 
+	list_splice_init(&pcpu_slot[pcpu_to_depopulate_slot], &to_depopulate);
+
 restart:
 	/*
 	 * Once a chunk is isolated to the to_depopulate list, the chunk is no
@@ -2149,9 +2166,9 @@ static void pcpu_reclaim_populated(enum pcpu_chunk_type type)
 	 * other accessor is the free path which only returns area back to the
 	 * allocator not touching the populated bitmap.
 	 */
-	while (!list_empty(&pcpu_slot[pcpu_to_depopulate_slot])) {
-		chunk = list_first_entry(&pcpu_slot[pcpu_to_depopulate_slot],
-					 struct pcpu_chunk, list);
+	while (!list_empty(&to_depopulate)) {
+		chunk = list_first_entry(&to_depopulate, struct pcpu_chunk,
+					 list);
 		WARN_ON(chunk->immutable);
 
 		/*
@@ -2208,6 +2225,13 @@ static void pcpu_reclaim_populated(enum pcpu_chunk_type type)
 				  &pcpu_slot[pcpu_sidelined_slot]);
 	}
 
+	if (pcpu_nr_isolated_empty_pop_pages[type] >=
+	    PCPU_EMPTY_POP_RECLAIM_THRESHOLD) {
+		list_splice_tail_init(&pcpu_slot[pcpu_sidelined_slot],
+				      &pcpu_slot[pcpu_to_depopulate_slot]);
+		pcpu_schedule_balance_work();
+	}
+
 	spin_unlock_irq(&pcpu_lock);
 }
 
@@ -2291,7 +2315,13 @@ void free_percpu(void __percpu *ptr)
 			}
 	} else if (pcpu_should_reclaim_chunk(chunk)) {
 		pcpu_isolate_chunk(chunk);
-		need_balance = true;
+		if (chunk->free_bytes == pcpu_unit_size ||
+		    pcpu_nr_isolated_empty_pop_pages[pcpu_chunk_type(chunk)] >=
+		    PCPU_EMPTY_POP_RECLAIM_THRESHOLD) {
+			list_splice_tail_init(&pcpu_slot[pcpu_sidelined_slot],
+					      &pcpu_slot[pcpu_to_depopulate_slot]);
+			need_balance = true;
+		}
 	}
 
 	trace_percpu_free_percpu(chunk->base_addr, off, ptr);
-- 
2.31.1.368.gbe11c130af-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ