[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250220052027.58847-9-byungchul@sk.com>
Date: Thu, 20 Feb 2025 14:20:09 +0900
From: Byungchul Park <byungchul@...com>
To: linux-kernel@...r.kernel.org,
linux-mm@...ck.org
Cc: kernel_team@...ynix.com,
akpm@...ux-foundation.org,
ying.huang@...el.com,
vernhao@...cent.com,
mgorman@...hsingularity.net,
hughd@...gle.com,
willy@...radead.org,
david@...hat.com,
peterz@...radead.org,
luto@...nel.org,
tglx@...utronix.de,
mingo@...hat.com,
bp@...en8.de,
dave.hansen@...ux.intel.com,
rjgolo@...il.com
Subject: [RFC PATCH v12 08/26] mm: introduce luf_batch to be used as hash table to store luf meta data
Functionally, no change. This is a preparation for luf mechanism that
needs to keep luf meta data per page while staying in pcp or buddy
allocator. The meta data includes cpumask for tlb shootdown and luf's
request generation number.
Since struct page doesn't have enough room to store luf meta data, this
patch introduces a hash table to store them and makes each page keep its
hash key instead.
Since all the pages in pcp or buddy share the hash table, confliction is
inevitable so care must be taken when reading or updating its entry.
Signed-off-by: Byungchul Park <byungchul@...com>
---
include/linux/mm_types.h | 10 ++++
mm/internal.h | 8 +++
mm/rmap.c | 122 +++++++++++++++++++++++++++++++++++++--
3 files changed, 136 insertions(+), 4 deletions(-)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 20d85c4e609de..39a6b5124b01f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -32,6 +32,16 @@
struct address_space;
struct mem_cgroup;
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+struct luf_batch {
+ struct tlbflush_unmap_batch batch;
+ unsigned long ugen;
+ rwlock_t lock;
+};
+#else
+struct luf_batch {};
+#endif
+
/*
* Each physical page in the system has a struct page associated with
* it to keep track of whatever it is we are using the page for at the
diff --git a/mm/internal.h b/mm/internal.h
index e3084d32272e3..b38a9ae9d6993 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1240,6 +1240,8 @@ extern struct workqueue_struct *mm_percpu_wq;
void try_to_unmap_flush(void);
void try_to_unmap_flush_dirty(void);
void flush_tlb_batched_pending(struct mm_struct *mm);
+void fold_batch(struct tlbflush_unmap_batch *dst, struct tlbflush_unmap_batch *src, bool reset);
+void fold_luf_batch(struct luf_batch *dst, struct luf_batch *src);
#else
static inline void try_to_unmap_flush(void)
{
@@ -1250,6 +1252,12 @@ static inline void try_to_unmap_flush_dirty(void)
static inline void flush_tlb_batched_pending(struct mm_struct *mm)
{
}
+static inline void fold_batch(struct tlbflush_unmap_batch *dst, struct tlbflush_unmap_batch *src, bool reset)
+{
+}
+static inline void fold_luf_batch(struct luf_batch *dst, struct luf_batch *src)
+{
+}
#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
extern const struct trace_print_flags pageflag_names[];
diff --git a/mm/rmap.c b/mm/rmap.c
index ed345503e4f88..74fbf6c2fb3a7 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -641,7 +641,7 @@ struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio,
* function, ugen_before(), should be used to evaluate the temporal
* sequence of events because the number is designed to wraparound.
*/
-static atomic_long_t __maybe_unused luf_ugen = ATOMIC_LONG_INIT(LUF_UGEN_INIT);
+static atomic_long_t luf_ugen = ATOMIC_LONG_INIT(LUF_UGEN_INIT);
/*
* Don't return invalid luf_ugen, zero.
@@ -656,6 +656,122 @@ static unsigned long __maybe_unused new_luf_ugen(void)
return ugen;
}
+static void reset_batch(struct tlbflush_unmap_batch *batch)
+{
+ arch_tlbbatch_clear(&batch->arch);
+ batch->flush_required = false;
+ batch->writable = false;
+}
+
+void fold_batch(struct tlbflush_unmap_batch *dst,
+ struct tlbflush_unmap_batch *src, bool reset)
+{
+ if (!src->flush_required)
+ return;
+
+ /*
+ * Fold src to dst.
+ */
+ arch_tlbbatch_fold(&dst->arch, &src->arch);
+ dst->writable = dst->writable || src->writable;
+ dst->flush_required = true;
+
+ if (!reset)
+ return;
+
+ /*
+ * Reset src.
+ */
+ reset_batch(src);
+}
+
+/*
+ * The range that luf_key covers, which is 'unsigned short' type.
+ */
+#define NR_LUF_BATCH (1 << (sizeof(short) * 8))
+
+/*
+ * Use 0th entry as accumulated batch.
+ */
+static struct luf_batch luf_batch[NR_LUF_BATCH];
+
+static void luf_batch_init(struct luf_batch *lb)
+{
+ rwlock_init(&lb->lock);
+ reset_batch(&lb->batch);
+ lb->ugen = atomic_long_read(&luf_ugen) - 1;
+}
+
+static int __init luf_init(void)
+{
+ int i;
+
+ for (i = 0; i < NR_LUF_BATCH; i++)
+ luf_batch_init(&luf_batch[i]);
+
+ return 0;
+}
+early_initcall(luf_init);
+
+/*
+ * key to point an entry of the luf_batch array
+ *
+ * note: zero means invalid key
+ */
+static atomic_t luf_kgen = ATOMIC_INIT(1);
+
+/*
+ * Don't return invalid luf_key, zero.
+ */
+static unsigned short __maybe_unused new_luf_key(void)
+{
+ unsigned short luf_key = atomic_inc_return(&luf_kgen);
+
+ if (!luf_key)
+ luf_key = atomic_inc_return(&luf_kgen);
+
+ return luf_key;
+}
+
+static void __fold_luf_batch(struct luf_batch *dst_lb,
+ struct tlbflush_unmap_batch *src_batch,
+ unsigned long src_ugen)
+{
+ /*
+ * dst_lb->ugen represents one that requires tlb shootdown for
+ * it, that is, sort of request number. The newer it is, the
+ * more tlb shootdown might be needed to fulfill the newer
+ * request. Conservertively keep the newer one.
+ */
+ if (!dst_lb->ugen || ugen_before(dst_lb->ugen, src_ugen))
+ dst_lb->ugen = src_ugen;
+ fold_batch(&dst_lb->batch, src_batch, false);
+}
+
+void fold_luf_batch(struct luf_batch *dst, struct luf_batch *src)
+{
+ unsigned long flags;
+
+ /*
+ * Exactly same. Nothing to fold.
+ */
+ if (dst == src)
+ return;
+
+ if (&src->lock < &dst->lock) {
+ read_lock_irqsave(&src->lock, flags);
+ write_lock(&dst->lock);
+ } else {
+ write_lock_irqsave(&dst->lock, flags);
+ read_lock(&src->lock);
+ }
+
+ __fold_luf_batch(dst, &src->batch, src->ugen);
+
+ write_unlock(&dst->lock);
+ read_unlock_irqrestore(&src->lock, flags);
+}
+
/*
* Flush TLB entries for recently unmapped pages from remote CPUs. It is
* important if a PTE was dirty when it was unmapped that it's flushed
@@ -670,9 +786,7 @@ void try_to_unmap_flush(void)
return;
arch_tlbbatch_flush(&tlb_ubc->arch);
- arch_tlbbatch_clear(&tlb_ubc->arch);
- tlb_ubc->flush_required = false;
- tlb_ubc->writable = false;
+ reset_batch(tlb_ubc);
}
/* Flush iff there are potentially writable TLB entries that can race with IO */
--
2.17.1
Powered by blists - more mailing lists