lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230817080559.43200-5-byungchul@sk.com>
Date:   Thu, 17 Aug 2023 17:05:57 +0900
From:   Byungchul Park <byungchul@...com>
To:     linux-kernel@...r.kernel.org, linux-mm@...ck.org
Cc:     kernel_team@...ynix.com, akpm@...ux-foundation.org,
        ying.huang@...el.com, namit@...are.com, xhao@...ux.alibaba.com,
        mgorman@...hsingularity.net, hughd@...gle.com, willy@...radead.org,
        david@...hat.com, peterz@...radead.org, luto@...nel.org,
        dave.hansen@...ux.intel.com
Subject: [RFC v2 4/6] mm, migrc: Ajust __zone_watermark_ok() with the amount of pending folios

CONFIG_MIGRC duplicates folios participated in migration to avoid TLB
flushes and provide a consistent view to CPUs that are still caching its
old mapping in TLB. However, the duplicated folios can be freed and
available right away through appropreate TLB flushes if needed.

Adjusted watermark check routine, __zone_watermark_ok(), with the number
of duplicated folios and made it perform TLB flushes and free the
duplicated folios if page allocation routine is in trouble due to memory
pressure, even more aggresively for high order allocation.

Signed-off-by: Byungchul Park <byungchul@...com>
---
 include/linux/mm.h     |  2 ++
 include/linux/mmzone.h |  3 +++
 mm/migrate.c           | 12 ++++++++++++
 mm/page_alloc.c        | 16 ++++++++++++++++
 4 files changed, 33 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1ceec7f3591e..9df393074e6a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3827,6 +3827,7 @@ bool migrc_try_flush_free_folios(void);
 void migrc_try_flush_free_folios_dirty(void);
 struct migrc_req *fold_ubc_nowr_to_migrc(void);
 void free_migrc_req(struct migrc_req *req);
+int migrc_pending_nr_in_zone(struct zone *z);
 
 extern atomic_t migrc_gen;
 extern struct llist_head migrc_reqs;
@@ -3842,6 +3843,7 @@ static inline bool migrc_try_flush_free_folios(void) { return false; }
 static inline void migrc_try_flush_free_folios_dirty(void) {}
 static inline struct migrc_req *fold_ubc_nowr_to_migrc(void) { return NULL; }
 static inline void free_migrc_req(struct migrc_req *req) {}
+static inline int migrc_pending_nr_in_zone(struct zone *z) { return 0; }
 #endif
 
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6d645beaf7a6..1ec79bb63ba7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -958,6 +958,9 @@ struct zone {
 	/* Zone statistics */
 	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
 	atomic_long_t		vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
+#ifdef CONFIG_MIGRC
+	atomic_t		migrc_pending_nr;
+#endif
 } ____cacheline_internodealigned_in_smp;
 
 enum pgdat_flags {
diff --git a/mm/migrate.c b/mm/migrate.c
index c7b72d275b2a..badef3d89c6c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -117,9 +117,12 @@ void migrc_shrink(struct llist_head *h)
 	llist_for_each_entry_safe(p, p2, n, migrc_node) {
 		if (p->migrc_state == MIGRC_SRC_PENDING) {
 			struct pglist_data *node;
+			struct zone *zone;
 
 			node = NODE_DATA(page_to_nid(p));
+			zone = page_zone(p);
 			atomic_dec(&node->migrc_pending_nr);
+			atomic_dec(&zone->migrc_pending_nr);
 		}
 
 		if (WARN_ON(!migrc_pending(page_folio(p))))
@@ -172,6 +175,7 @@ static void migrc_expand_req(struct folio *fsrc, struct folio *fdst)
 {
 	struct migrc_req *req;
 	struct pglist_data *node;
+	struct zone *zone;
 
 	req = fold_ubc_nowr_to_migrc();
 	if (!req)
@@ -190,7 +194,9 @@ static void migrc_expand_req(struct folio *fsrc, struct folio *fdst)
 		req->last = &fsrc->page.migrc_node;
 
 	node = NODE_DATA(folio_nid(fsrc));
+	zone = page_zone(&fsrc->page);
 	atomic_inc(&node->migrc_pending_nr);
+	atomic_inc(&zone->migrc_pending_nr);
 
 	if (migrc_is_full(folio_nid(fsrc)))
 		migrc_try_flush_free_folios();
@@ -275,6 +281,12 @@ bool migrc_req_processing(void)
 {
 	return current->mreq && current->mreq_dirty;
 }
+
+int migrc_pending_nr_in_zone(struct zone *z)
+{
+	return atomic_read(&z->migrc_pending_nr);
+
+}
 #else
 static inline bool migrc_src_pending(struct folio *f) { return false; }
 static inline bool migrc_dst_pending(struct folio *f) { return false; }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c51cbdb45d86..9f791c0fa15d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3179,6 +3179,11 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
 	long min = mark;
 	int o;
 
+	/*
+	 * There are pages that can be freed by migrc_try_flush_free_folios().
+	 */
+	free_pages += migrc_pending_nr_in_zone(z);
+
 	/* free_pages may go negative - that's OK */
 	free_pages -= __zone_watermark_unusable_free(z, order, alloc_flags);
 
@@ -4257,6 +4262,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	unsigned int zonelist_iter_cookie;
 	int reserve_flags;
 
+	migrc_try_flush_free_folios();
 restart:
 	compaction_retries = 0;
 	no_progress_loops = 0;
@@ -4772,6 +4778,16 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
 	if (likely(page))
 		goto out;
 
+	if (order && migrc_try_flush_free_folios()) {
+		/*
+		 * Try again after freeing migrc's pending pages in case
+		 * of high order allocation.
+		 */
+		page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
+		if (likely(page))
+			goto out;
+	}
+
 	alloc_gfp = gfp;
 	ac.spread_dirty_pages = false;
 
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ