[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230501165450.15352-8-surenb@google.com>
Date: Mon, 1 May 2023 09:54:17 -0700
From: Suren Baghdasaryan <surenb@...gle.com>
To: akpm@...ux-foundation.org
Cc: kent.overstreet@...ux.dev, mhocko@...e.com, vbabka@...e.cz,
hannes@...xchg.org, roman.gushchin@...ux.dev, mgorman@...e.de,
dave@...olabs.net, willy@...radead.org, liam.howlett@...cle.com,
corbet@....net, void@...ifault.com, peterz@...radead.org,
juri.lelli@...hat.com, ldufour@...ux.ibm.com,
catalin.marinas@....com, will@...nel.org, arnd@...db.de,
tglx@...utronix.de, mingo@...hat.com, dave.hansen@...ux.intel.com,
x86@...nel.org, peterx@...hat.com, david@...hat.com,
axboe@...nel.dk, mcgrof@...nel.org, masahiroy@...nel.org,
nathan@...nel.org, dennis@...nel.org, tj@...nel.org,
muchun.song@...ux.dev, rppt@...nel.org, paulmck@...nel.org,
pasha.tatashin@...een.com, yosryahmed@...gle.com,
yuzhao@...gle.com, dhowells@...hat.com, hughd@...gle.com,
andreyknvl@...il.com, keescook@...omium.org,
ndesaulniers@...gle.com, gregkh@...uxfoundation.org,
ebiggers@...gle.com, ytcoode@...il.com, vincent.guittot@...aro.org,
dietmar.eggemann@....com, rostedt@...dmis.org, bsegall@...gle.com,
bristot@...hat.com, vschneid@...hat.com, cl@...ux.com,
penberg@...nel.org, iamjoonsoo.kim@....com, 42.hyeyoo@...il.com,
glider@...gle.com, elver@...gle.com, dvyukov@...gle.com,
shakeelb@...gle.com, songmuchun@...edance.com, jbaron@...mai.com,
rientjes@...gle.com, minchan@...gle.com, kaleshsingh@...gle.com,
surenb@...gle.com, kernel-team@...roid.com,
linux-doc@...r.kernel.org, linux-kernel@...r.kernel.org,
iommu@...ts.linux.dev, linux-arch@...r.kernel.org,
linux-fsdevel@...r.kernel.org, linux-mm@...ck.org,
linux-modules@...r.kernel.org, kasan-dev@...glegroups.com,
cgroups@...r.kernel.org
Subject: [PATCH 07/40] Lazy percpu counters
From: Kent Overstreet <kent.overstreet@...ux.dev>
This patch adds lib/lazy-percpu-counter.c, which implements counters
that start out as atomics, but lazily switch to percpu mode if the
update rate crosses some threshold (arbitrarily set at 256 per second).
Signed-off-by: Kent Overstreet <kent.overstreet@...ux.dev>
Signed-off-by: Suren Baghdasaryan <surenb@...gle.com>
---
include/linux/lazy-percpu-counter.h | 102 ++++++++++++++++++++++
lib/Kconfig | 3 +
lib/Makefile | 2 +
lib/lazy-percpu-counter.c | 127 ++++++++++++++++++++++++++++
4 files changed, 234 insertions(+)
create mode 100644 include/linux/lazy-percpu-counter.h
create mode 100644 lib/lazy-percpu-counter.c
diff --git a/include/linux/lazy-percpu-counter.h b/include/linux/lazy-percpu-counter.h
new file mode 100644
index 000000000000..45ca9e2ce58b
--- /dev/null
+++ b/include/linux/lazy-percpu-counter.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Lazy percpu counters:
+ * (C) 2022 Kent Overstreet
+ *
+ * Lazy percpu counters start out in atomic mode, then switch to percpu mode if
+ * the update rate crosses some threshold.
+ *
+ * This means we don't have to decide between low memory overhead atomic
+ * counters and higher performance percpu counters - we can have our cake and
+ * eat it, too!
+ *
+ * Internally we use an atomic64_t, where the low bit indicates whether we're in
+ * percpu mode, and the high 8 bits are a secondary counter that's incremented
+ * when the counter is modified - meaning 55 bits of precision are available for
+ * the counter itself.
+ */
+
+#ifndef _LINUX_LAZY_PERCPU_COUNTER_H
+#define _LINUX_LAZY_PERCPU_COUNTER_H
+
+#include <linux/atomic.h>
+#include <asm/percpu.h>
+
+struct lazy_percpu_counter {
+ atomic64_t v;
+ unsigned long last_wrap;
+};
+
+void lazy_percpu_counter_exit(struct lazy_percpu_counter *c);
+void lazy_percpu_counter_add_slowpath(struct lazy_percpu_counter *c, s64 i);
+void lazy_percpu_counter_add_slowpath_noupgrade(struct lazy_percpu_counter *c, s64 i);
+s64 lazy_percpu_counter_read(struct lazy_percpu_counter *c);
+
+/*
+ * We use the high bits of the atomic counter for a secondary counter, which is
+ * incremented every time the counter is touched. When the secondary counter
+ * wraps, we check the time the counter last wrapped, and if it was recent
+ * enough that means the update frequency has crossed our threshold and we
+ * switch to percpu mode:
+ */
+#define COUNTER_MOD_BITS 8
+#define COUNTER_MOD_MASK ~(~0ULL >> COUNTER_MOD_BITS)
+#define COUNTER_MOD_BITS_START (64 - COUNTER_MOD_BITS)
+
+/*
+ * We use the low bit of the counter to indicate whether we're in atomic mode
+ * (low bit clear), or percpu mode (low bit set, counter is a pointer to actual
+ * percpu counters:
+ */
+#define COUNTER_IS_PCPU_BIT 1
+
+static inline u64 __percpu *lazy_percpu_counter_is_pcpu(u64 v)
+{
+ if (!(v & COUNTER_IS_PCPU_BIT))
+ return NULL;
+
+ v ^= COUNTER_IS_PCPU_BIT;
+ return (u64 __percpu *)(unsigned long)v;
+}
+
+/**
+ * lazy_percpu_counter_add: Add a value to a lazy_percpu_counter
+ *
+ * @c: counter to modify
+ * @i: value to add
+ */
+static inline void lazy_percpu_counter_add(struct lazy_percpu_counter *c, s64 i)
+{
+ u64 v = atomic64_read(&c->v);
+ u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v);
+
+ if (likely(pcpu_v))
+ this_cpu_add(*pcpu_v, i);
+ else
+ lazy_percpu_counter_add_slowpath(c, i);
+}
+
+/**
+ * lazy_percpu_counter_add_noupgrade: Add a value to a lazy_percpu_counter,
+ * without upgrading to percpu mode
+ *
+ * @c: counter to modify
+ * @i: value to add
+ */
+static inline void lazy_percpu_counter_add_noupgrade(struct lazy_percpu_counter *c, s64 i)
+{
+ u64 v = atomic64_read(&c->v);
+ u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v);
+
+ if (likely(pcpu_v))
+ this_cpu_add(*pcpu_v, i);
+ else
+ lazy_percpu_counter_add_slowpath_noupgrade(c, i);
+}
+
+static inline void lazy_percpu_counter_sub(struct lazy_percpu_counter *c, s64 i)
+{
+ lazy_percpu_counter_add(c, -i);
+}
+
+#endif /* _LINUX_LAZY_PERCPU_COUNTER_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 5c2da561c516..7380292a8fcd 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -505,6 +505,9 @@ config ASSOCIATIVE_ARRAY
for more information.
+config LAZY_PERCPU_COUNTER
+ bool
+
config HAS_IOMEM
bool
depends on !NO_IOMEM
diff --git a/lib/Makefile b/lib/Makefile
index 876fcdeae34e..293a0858a3f8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -164,6 +164,8 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
obj-$(CONFIG_DEBUG_LIST) += list_debug.o
obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
+obj-$(CONFIG_LAZY_PERCPU_COUNTER) += lazy-percpu-counter.o
+
obj-$(CONFIG_BITREVERSE) += bitrev.o
obj-$(CONFIG_LINEAR_RANGES) += linear_ranges.o
obj-$(CONFIG_PACKING) += packing.o
diff --git a/lib/lazy-percpu-counter.c b/lib/lazy-percpu-counter.c
new file mode 100644
index 000000000000..4f4e32c2dc09
--- /dev/null
+++ b/lib/lazy-percpu-counter.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/atomic.h>
+#include <linux/gfp.h>
+#include <linux/jiffies.h>
+#include <linux/lazy-percpu-counter.h>
+#include <linux/percpu.h>
+
+static inline s64 lazy_percpu_counter_atomic_val(s64 v)
+{
+ /* Ensure output is sign extended properly: */
+ return (v << COUNTER_MOD_BITS) >>
+ (COUNTER_MOD_BITS + COUNTER_IS_PCPU_BIT);
+}
+
+static void lazy_percpu_counter_switch_to_pcpu(struct lazy_percpu_counter *c)
+{
+ u64 __percpu *pcpu_v = alloc_percpu_gfp(u64, GFP_ATOMIC|__GFP_NOWARN);
+ u64 old, new, v;
+
+ if (!pcpu_v)
+ return;
+
+ preempt_disable();
+ v = atomic64_read(&c->v);
+ do {
+ if (lazy_percpu_counter_is_pcpu(v)) {
+ free_percpu(pcpu_v);
+ return;
+ }
+
+ old = v;
+ new = (unsigned long)pcpu_v | 1;
+
+ *this_cpu_ptr(pcpu_v) = lazy_percpu_counter_atomic_val(v);
+ } while ((v = atomic64_cmpxchg(&c->v, old, new)) != old);
+ preempt_enable();
+}
+
+/**
+ * lazy_percpu_counter_exit: Free resources associated with a
+ * lazy_percpu_counter
+ *
+ * @c: counter to exit
+ */
+void lazy_percpu_counter_exit(struct lazy_percpu_counter *c)
+{
+ free_percpu(lazy_percpu_counter_is_pcpu(atomic64_read(&c->v)));
+}
+EXPORT_SYMBOL_GPL(lazy_percpu_counter_exit);
+
+/**
+ * lazy_percpu_counter_read: Read current value of a lazy_percpu_counter
+ *
+ * @c: counter to read
+ */
+s64 lazy_percpu_counter_read(struct lazy_percpu_counter *c)
+{
+ s64 v = atomic64_read(&c->v);
+ u64 __percpu *pcpu_v = lazy_percpu_counter_is_pcpu(v);
+
+ if (pcpu_v) {
+ int cpu;
+
+ v = 0;
+ for_each_possible_cpu(cpu)
+ v += *per_cpu_ptr(pcpu_v, cpu);
+ } else {
+ v = lazy_percpu_counter_atomic_val(v);
+ }
+
+ return v;
+}
+EXPORT_SYMBOL_GPL(lazy_percpu_counter_read);
+
+void lazy_percpu_counter_add_slowpath(struct lazy_percpu_counter *c, s64 i)
+{
+ u64 atomic_i;
+ u64 old, v = atomic64_read(&c->v);
+ u64 __percpu *pcpu_v;
+
+ atomic_i = i << COUNTER_IS_PCPU_BIT;
+ atomic_i &= ~COUNTER_MOD_MASK;
+ atomic_i |= 1ULL << COUNTER_MOD_BITS_START;
+
+ do {
+ pcpu_v = lazy_percpu_counter_is_pcpu(v);
+ if (pcpu_v) {
+ this_cpu_add(*pcpu_v, i);
+ return;
+ }
+
+ old = v;
+ } while ((v = atomic64_cmpxchg(&c->v, old, old + atomic_i)) != old);
+
+ if (unlikely(!(v & COUNTER_MOD_MASK))) {
+ unsigned long now = jiffies;
+
+ if (c->last_wrap &&
+ unlikely(time_after(c->last_wrap + HZ, now)))
+ lazy_percpu_counter_switch_to_pcpu(c);
+ else
+ c->last_wrap = now;
+ }
+}
+EXPORT_SYMBOL(lazy_percpu_counter_add_slowpath);
+
+void lazy_percpu_counter_add_slowpath_noupgrade(struct lazy_percpu_counter *c, s64 i)
+{
+ u64 atomic_i;
+ u64 old, v = atomic64_read(&c->v);
+ u64 __percpu *pcpu_v;
+
+ atomic_i = i << COUNTER_IS_PCPU_BIT;
+ atomic_i &= ~COUNTER_MOD_MASK;
+
+ do {
+ pcpu_v = lazy_percpu_counter_is_pcpu(v);
+ if (pcpu_v) {
+ this_cpu_add(*pcpu_v, i);
+ return;
+ }
+
+ old = v;
+ } while ((v = atomic64_cmpxchg(&c->v, old, old + atomic_i)) != old);
+}
+EXPORT_SYMBOL(lazy_percpu_counter_add_slowpath_noupgrade);
--
2.40.1.495.gc816e09b53d-goog
Powered by blists - more mailing lists