[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAJhEC05VTcS-=jsDUoyybQ5Cc33DbPXqJ=5FxfVxo06xfugwAQ@mail.gmail.com>
Date: Thu, 9 Oct 2025 15:06:47 -0700
From: Robert Pang <robertpang@...gle.com>
To: Coly Li <colyli@...as.com>, Kent Overstreet <kent.overstreet@...ux.dev>
Cc: linux-bcache@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] bcache: add "clock" cache replacement policy
Apology for missing the change log earlier:
v2: Add "clock" cache replacement policy to admin-guide/bcache.rst and
performance results.
v1: initial version.
On Thu, Oct 9, 2025 at 12:17 AM Robert Pang <robertpang@...gle.com> wrote:
>
> This new policy extends the FIFO policy to approximate the classic clock policy
> (O(n) time complexity) by considering bucket priority, similar to the LRU
> policy.
>
> This clock policy addresses the high IO latency (1-2 seconds) experienced on
> multi-terabyte cache devices when the free list is empty due to the default LRU
> policy. The LRU policy's O(n log n) complexity for sorting priorities for the
> entire bucket list causes this delay.
>
> Here are the average execution times (in microseconds) of the LRU and the clock
> replacement policies:
>
> SSD Size Bucket Count LRU (us) Clock (us)
> 375 GB 1536000 58292 1163
> 750 GB 3072000 121769 1729
> 1.5 TB 6144000 244012 3862
> 3 TB 12288000 496569 6428
> 6 TB 24576000 1067628 14298
> 9 TB 36864000 1564348 25763
>
> Signed-off-by: Robert Pang <robertpang@...gle.com>
> ---
> Documentation/admin-guide/bcache.rst | 2 +-
> drivers/md/bcache/alloc.c | 34 ++++++++++++++++++++++++----
> drivers/md/bcache/bcache_ondisk.h | 1 +
> drivers/md/bcache/sysfs.c | 1 +
> 4 files changed, 33 insertions(+), 5 deletions(-)
>
> diff --git a/Documentation/admin-guide/bcache.rst b/Documentation/admin-guide/bcache.rst
> index 6fdb495ac466..2be2999c7de4 100644
> --- a/Documentation/admin-guide/bcache.rst
> +++ b/Documentation/admin-guide/bcache.rst
> @@ -616,7 +616,7 @@ bucket_size
> Size of buckets
>
> cache_replacement_policy
> - One of either lru, fifo or random.
> + One of either lru, fifo, random or clock.
>
> discard
> Boolean; if on a discard/TRIM will be issued to each bucket before it is
> diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
> index 48ce750bf70a..c65c48eab169 100644
> --- a/drivers/md/bcache/alloc.c
> +++ b/drivers/md/bcache/alloc.c
> @@ -69,7 +69,8 @@
> #include <linux/random.h>
> #include <trace/events/bcache.h>
>
> -#define MAX_OPEN_BUCKETS 128
> +#define MAX_OPEN_BUCKETS 128
> +#define CHECK_PRIO_SLICES 16
>
> /* Bucket heap / gen */
>
> @@ -211,19 +212,41 @@ static void invalidate_buckets_lru(struct cache *ca)
> }
> }
>
> -static void invalidate_buckets_fifo(struct cache *ca)
> +/*
> + * When check_prio is true, this FIFO policy examines the priority of the
> + * buckets and invalidates only the ones below a threshold in the priority
> + * ladder. As it goes, the threshold will be raised if not enough buckets are
> + * invalidated. Empty buckets are also invalidated. This evaulation resembles
> + * the LRU policy, and is used to approximate the classic clock-sweep cache
> + * replacement algorithm.
> + */
> +static void invalidate_buckets_fifo(struct cache *ca, bool check_prio)
> {
> struct bucket *b;
> size_t checked = 0;
> + size_t check_quota = 0;
> + uint16_t prio_threshold = ca->set->min_prio;
>
> while (!fifo_full(&ca->free_inc)) {
> if (ca->fifo_last_bucket < ca->sb.first_bucket ||
> ca->fifo_last_bucket >= ca->sb.nbuckets)
> ca->fifo_last_bucket = ca->sb.first_bucket;
>
> + if (check_prio && checked >= check_quota) {
> + BUG_ON(ca->set->min_prio > INITIAL_PRIO);
> + prio_threshold +=
> + DIV_ROUND_UP(INITIAL_PRIO - ca->set->min_prio,
> + CHECK_PRIO_SLICES);
> + check_quota += DIV_ROUND_UP(ca->sb.nbuckets,
> + CHECK_PRIO_SLICES);
> + }
> +
> b = ca->buckets + ca->fifo_last_bucket++;
>
> - if (bch_can_invalidate_bucket(ca, b))
> + if (bch_can_invalidate_bucket(ca, b) &&
> + (!check_prio ||
> + b->prio <= prio_threshold ||
> + !GC_SECTORS_USED(b)))
> bch_invalidate_one_bucket(ca, b);
>
> if (++checked >= ca->sb.nbuckets) {
> @@ -269,11 +292,14 @@ static void invalidate_buckets(struct cache *ca)
> invalidate_buckets_lru(ca);
> break;
> case CACHE_REPLACEMENT_FIFO:
> - invalidate_buckets_fifo(ca);
> + invalidate_buckets_fifo(ca, false);
> break;
> case CACHE_REPLACEMENT_RANDOM:
> invalidate_buckets_random(ca);
> break;
> + case CACHE_REPLACEMENT_CLOCK:
> + invalidate_buckets_fifo(ca, true);
> + break;
> }
> }
>
> diff --git a/drivers/md/bcache/bcache_ondisk.h b/drivers/md/bcache/bcache_ondisk.h
> index 6620a7f8fffc..d45794e01fe1 100644
> --- a/drivers/md/bcache/bcache_ondisk.h
> +++ b/drivers/md/bcache/bcache_ondisk.h
> @@ -288,6 +288,7 @@ BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
> #define CACHE_REPLACEMENT_LRU 0U
> #define CACHE_REPLACEMENT_FIFO 1U
> #define CACHE_REPLACEMENT_RANDOM 2U
> +#define CACHE_REPLACEMENT_CLOCK 3U
>
> BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4);
> #define CACHE_MODE_WRITETHROUGH 0U
> diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
> index 826b14cae4e5..c8617bad0648 100644
> --- a/drivers/md/bcache/sysfs.c
> +++ b/drivers/md/bcache/sysfs.c
> @@ -45,6 +45,7 @@ static const char * const cache_replacement_policies[] = {
> "lru",
> "fifo",
> "random",
> + "clock",
> NULL
> };
>
> --
> 2.51.0.710.ga91ca5db03-goog
>
Powered by blists - more mailing lists