[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251009071512.3952240-2-robertpang@google.com>
Date: Thu, 9 Oct 2025 00:15:13 -0700
From: Robert Pang <robertpang@...gle.com>
To: Coly Li <colyli@...as.com>, Kent Overstreet <kent.overstreet@...ux.dev>
Cc: linux-bcache@...r.kernel.org, linux-kernel@...r.kernel.org,
Robert Pang <robertpang@...gle.com>
Subject: [PATCH v2] bcache: add "clock" cache replacement policy
This new policy extends the FIFO policy to approximate the classic clock policy
(O(n) time complexity) by considering bucket priority, similar to the LRU
policy.
This clock policy addresses the high IO latency (1-2 seconds) experienced on
multi-terabyte cache devices when the free list is empty due to the default LRU
policy. The LRU policy's O(n log n) complexity for sorting priorities for the
entire bucket list causes this delay.
Here are the average execution times (in microseconds) of the LRU and the clock
replacement policies:
SSD Size Bucket Count LRU (us) Clock (us)
375 GB 1536000 58292 1163
750 GB 3072000 121769 1729
1.5 TB 6144000 244012 3862
3 TB 12288000 496569 6428
6 TB 24576000 1067628 14298
9 TB 36864000 1564348 25763
Signed-off-by: Robert Pang <robertpang@...gle.com>
---
Documentation/admin-guide/bcache.rst | 2 +-
drivers/md/bcache/alloc.c | 34 ++++++++++++++++++++++++----
drivers/md/bcache/bcache_ondisk.h | 1 +
drivers/md/bcache/sysfs.c | 1 +
4 files changed, 33 insertions(+), 5 deletions(-)
diff --git a/Documentation/admin-guide/bcache.rst b/Documentation/admin-guide/bcache.rst
index 6fdb495ac466..2be2999c7de4 100644
--- a/Documentation/admin-guide/bcache.rst
+++ b/Documentation/admin-guide/bcache.rst
@@ -616,7 +616,7 @@ bucket_size
Size of buckets
cache_replacement_policy
- One of either lru, fifo or random.
+ One of either lru, fifo, random or clock.
discard
Boolean; if on a discard/TRIM will be issued to each bucket before it is
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 48ce750bf70a..c65c48eab169 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -69,7 +69,8 @@
#include <linux/random.h>
#include <trace/events/bcache.h>
-#define MAX_OPEN_BUCKETS 128
+#define MAX_OPEN_BUCKETS 128
+#define CHECK_PRIO_SLICES 16
/* Bucket heap / gen */
@@ -211,19 +212,41 @@ static void invalidate_buckets_lru(struct cache *ca)
}
}
-static void invalidate_buckets_fifo(struct cache *ca)
+/*
+ * When check_prio is true, this FIFO policy examines the priority of the
+ * buckets and invalidates only the ones below a threshold in the priority
+ * ladder. As it goes, the threshold will be raised if not enough buckets are
+ * invalidated. Empty buckets are also invalidated. This evaulation resembles
+ * the LRU policy, and is used to approximate the classic clock-sweep cache
+ * replacement algorithm.
+ */
+static void invalidate_buckets_fifo(struct cache *ca, bool check_prio)
{
struct bucket *b;
size_t checked = 0;
+ size_t check_quota = 0;
+ uint16_t prio_threshold = ca->set->min_prio;
while (!fifo_full(&ca->free_inc)) {
if (ca->fifo_last_bucket < ca->sb.first_bucket ||
ca->fifo_last_bucket >= ca->sb.nbuckets)
ca->fifo_last_bucket = ca->sb.first_bucket;
+ if (check_prio && checked >= check_quota) {
+ BUG_ON(ca->set->min_prio > INITIAL_PRIO);
+ prio_threshold +=
+ DIV_ROUND_UP(INITIAL_PRIO - ca->set->min_prio,
+ CHECK_PRIO_SLICES);
+ check_quota += DIV_ROUND_UP(ca->sb.nbuckets,
+ CHECK_PRIO_SLICES);
+ }
+
b = ca->buckets + ca->fifo_last_bucket++;
- if (bch_can_invalidate_bucket(ca, b))
+ if (bch_can_invalidate_bucket(ca, b) &&
+ (!check_prio ||
+ b->prio <= prio_threshold ||
+ !GC_SECTORS_USED(b)))
bch_invalidate_one_bucket(ca, b);
if (++checked >= ca->sb.nbuckets) {
@@ -269,11 +292,14 @@ static void invalidate_buckets(struct cache *ca)
invalidate_buckets_lru(ca);
break;
case CACHE_REPLACEMENT_FIFO:
- invalidate_buckets_fifo(ca);
+ invalidate_buckets_fifo(ca, false);
break;
case CACHE_REPLACEMENT_RANDOM:
invalidate_buckets_random(ca);
break;
+ case CACHE_REPLACEMENT_CLOCK:
+ invalidate_buckets_fifo(ca, true);
+ break;
}
}
diff --git a/drivers/md/bcache/bcache_ondisk.h b/drivers/md/bcache/bcache_ondisk.h
index 6620a7f8fffc..d45794e01fe1 100644
--- a/drivers/md/bcache/bcache_ondisk.h
+++ b/drivers/md/bcache/bcache_ondisk.h
@@ -288,6 +288,7 @@ BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
#define CACHE_REPLACEMENT_LRU 0U
#define CACHE_REPLACEMENT_FIFO 1U
#define CACHE_REPLACEMENT_RANDOM 2U
+#define CACHE_REPLACEMENT_CLOCK 3U
BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4);
#define CACHE_MODE_WRITETHROUGH 0U
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 826b14cae4e5..c8617bad0648 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -45,6 +45,7 @@ static const char * const cache_replacement_policies[] = {
"lru",
"fifo",
"random",
+ "clock",
NULL
};
--
2.51.0.710.ga91ca5db03-goog
Powered by blists - more mailing lists