lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAJhEC05VTcS-=jsDUoyybQ5Cc33DbPXqJ=5FxfVxo06xfugwAQ@mail.gmail.com>
Date: Thu, 9 Oct 2025 15:06:47 -0700
From: Robert Pang <robertpang@...gle.com>
To: Coly Li <colyli@...as.com>, Kent Overstreet <kent.overstreet@...ux.dev>
Cc: linux-bcache@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] bcache: add "clock" cache replacement policy

Apology for missing the change log earlier:
v2: Add "clock" cache replacement policy to admin-guide/bcache.rst and
performance results.
v1: initial version.

On Thu, Oct 9, 2025 at 12:17 AM Robert Pang <robertpang@...gle.com> wrote:
>
> This new policy extends the FIFO policy to approximate the classic clock policy
> (O(n) time complexity) by considering bucket priority, similar to the LRU
> policy.
>
> This clock policy addresses the high IO latency (1-2 seconds) experienced on
> multi-terabyte cache devices when the free list is empty due to the default LRU
> policy. The LRU policy's O(n log n) complexity for sorting priorities for the
> entire bucket list causes this delay.
>
> Here are the average execution times (in microseconds) of the LRU and the clock
> replacement policies:
>
> SSD Size  Bucket Count  LRU (us)  Clock (us)
> 375 GB      1536000       58292        1163
> 750 GB      3072000      121769        1729
> 1.5 TB      6144000      244012        3862
> 3 TB       12288000      496569        6428
> 6 TB       24576000     1067628       14298
> 9 TB       36864000     1564348       25763
>
> Signed-off-by: Robert Pang <robertpang@...gle.com>
> ---
>  Documentation/admin-guide/bcache.rst |  2 +-
>  drivers/md/bcache/alloc.c            | 34 ++++++++++++++++++++++++----
>  drivers/md/bcache/bcache_ondisk.h    |  1 +
>  drivers/md/bcache/sysfs.c            |  1 +
>  4 files changed, 33 insertions(+), 5 deletions(-)
>
> diff --git a/Documentation/admin-guide/bcache.rst b/Documentation/admin-guide/bcache.rst
> index 6fdb495ac466..2be2999c7de4 100644
> --- a/Documentation/admin-guide/bcache.rst
> +++ b/Documentation/admin-guide/bcache.rst
> @@ -616,7 +616,7 @@ bucket_size
>    Size of buckets
>
>  cache_replacement_policy
> -  One of either lru, fifo or random.
> +  One of either lru, fifo, random or clock.
>
>  discard
>    Boolean; if on a discard/TRIM will be issued to each bucket before it is
> diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
> index 48ce750bf70a..c65c48eab169 100644
> --- a/drivers/md/bcache/alloc.c
> +++ b/drivers/md/bcache/alloc.c
> @@ -69,7 +69,8 @@
>  #include <linux/random.h>
>  #include <trace/events/bcache.h>
>
> -#define MAX_OPEN_BUCKETS 128
> +#define MAX_OPEN_BUCKETS       128
> +#define CHECK_PRIO_SLICES      16
>
>  /* Bucket heap / gen */
>
> @@ -211,19 +212,41 @@ static void invalidate_buckets_lru(struct cache *ca)
>         }
>  }
>
> -static void invalidate_buckets_fifo(struct cache *ca)
> +/*
> + * When check_prio is true, this FIFO policy examines the priority of the
> + * buckets and invalidates only the ones below a threshold in the priority
> + * ladder. As it goes, the threshold will be raised if not enough buckets are
> + * invalidated. Empty buckets are also invalidated. This evaulation resembles
> + * the LRU policy, and is used to approximate the classic clock-sweep cache
> + * replacement algorithm.
> + */
> +static void invalidate_buckets_fifo(struct cache *ca, bool check_prio)
>  {
>         struct bucket *b;
>         size_t checked = 0;
> +       size_t check_quota = 0;
> +       uint16_t prio_threshold = ca->set->min_prio;
>
>         while (!fifo_full(&ca->free_inc)) {
>                 if (ca->fifo_last_bucket <  ca->sb.first_bucket ||
>                     ca->fifo_last_bucket >= ca->sb.nbuckets)
>                         ca->fifo_last_bucket = ca->sb.first_bucket;
>
> +               if (check_prio && checked >= check_quota) {
> +                       BUG_ON(ca->set->min_prio > INITIAL_PRIO);
> +                       prio_threshold +=
> +                               DIV_ROUND_UP(INITIAL_PRIO - ca->set->min_prio,
> +                                            CHECK_PRIO_SLICES);
> +                       check_quota += DIV_ROUND_UP(ca->sb.nbuckets,
> +                                                   CHECK_PRIO_SLICES);
> +               }
> +
>                 b = ca->buckets + ca->fifo_last_bucket++;
>
> -               if (bch_can_invalidate_bucket(ca, b))
> +               if (bch_can_invalidate_bucket(ca, b) &&
> +                   (!check_prio ||
> +                    b->prio <= prio_threshold ||
> +                    !GC_SECTORS_USED(b)))
>                         bch_invalidate_one_bucket(ca, b);
>
>                 if (++checked >= ca->sb.nbuckets) {
> @@ -269,11 +292,14 @@ static void invalidate_buckets(struct cache *ca)
>                 invalidate_buckets_lru(ca);
>                 break;
>         case CACHE_REPLACEMENT_FIFO:
> -               invalidate_buckets_fifo(ca);
> +               invalidate_buckets_fifo(ca, false);
>                 break;
>         case CACHE_REPLACEMENT_RANDOM:
>                 invalidate_buckets_random(ca);
>                 break;
> +       case CACHE_REPLACEMENT_CLOCK:
> +               invalidate_buckets_fifo(ca, true);
> +               break;
>         }
>  }
>
> diff --git a/drivers/md/bcache/bcache_ondisk.h b/drivers/md/bcache/bcache_ondisk.h
> index 6620a7f8fffc..d45794e01fe1 100644
> --- a/drivers/md/bcache/bcache_ondisk.h
> +++ b/drivers/md/bcache/bcache_ondisk.h
> @@ -288,6 +288,7 @@ BITMASK(CACHE_REPLACEMENT,          struct cache_sb, flags, 2, 3);
>  #define CACHE_REPLACEMENT_LRU          0U
>  #define CACHE_REPLACEMENT_FIFO         1U
>  #define CACHE_REPLACEMENT_RANDOM       2U
> +#define CACHE_REPLACEMENT_CLOCK                3U
>
>  BITMASK(BDEV_CACHE_MODE,               struct cache_sb, flags, 0, 4);
>  #define CACHE_MODE_WRITETHROUGH                0U
> diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
> index 826b14cae4e5..c8617bad0648 100644
> --- a/drivers/md/bcache/sysfs.c
> +++ b/drivers/md/bcache/sysfs.c
> @@ -45,6 +45,7 @@ static const char * const cache_replacement_policies[] = {
>         "lru",
>         "fifo",
>         "random",
> +       "clock",
>         NULL
>  };
>
> --
> 2.51.0.710.ga91ca5db03-goog
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ