lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAPjX3FdxA46s=JZONj=Y3tBu7ugD8i5mWFhvvAoB1oTY+8vW7Q@mail.gmail.com>
Date: Tue, 28 Jan 2025 15:40:45 +0100
From: Daniel Vacek <neelx@...e.com>
To: Chris Mason <clm@...com>, Josef Bacik <josef@...icpanda.com>, David Sterba <dsterba@...e.com>, 
	Nick Terrell <terrelln@...com>
Cc: linux-btrfs@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] btrfs/zstd: enable negative compression levels mount option

On Tue, 28 Jan 2025 at 14:22, Daniel Vacek <neelx@...e.com> wrote:
>
> This patch allows using the fast modes (negative compression levels) of zstd
> as a mount option.
>
> As per the results, the compression ratio is lower:
>
> for level in {-10..-1} 1 2 3; \
> do printf "level %3d\n" $level; \
>   mount -o compress=zstd:$level /dev/sdb /mnt/test/; \
>   grep sdb /proc/mounts; \
>   cp -r /usr/bin       /mnt/test/; sync; compsize /mnt/test/bin; \
>   cp -r /usr/share/doc /mnt/test/; sync; compsize /mnt/test/doc; \
>   cp    enwik9         /mnt/test/; sync; compsize /mnt/test/enwik9; \
>   cp    linux-6.13.tar /mnt/test/; sync; compsize /mnt/test/linux-6.13.tar; \
>   rm -r /mnt/test/{bin,doc,enwik9,linux-6.13.tar}; \
>   umount /mnt/test/; \
> done |& tee results | \
> awk '/^level/{print}/^TOTAL/{print$3"\t"$2"  |"}' | paste - - - - -
>
>                 266M    bin  |  45M     doc  |  953M    wiki |  1.4G    source
> =============================+===============+===============+===============+
> level -10       171M    64%  |  28M     62%  |  631M    66%  |  512M    34%  |
> level  -9       165M    62%  |  27M     61%  |  615M    64%  |  493M    33%  |
> level  -8       161M    60%  |  27M     59%  |  598M    62%  |  475M    32%  |
> level  -7       155M    58%  |  26M     58%  |  582M    61%  |  457M    30%  |
> level  -6       151M    56%  |  25M     56%  |  565M    59%  |  437M    29%  |
> level  -5       145M    54%  |  24M     55%  |  545M    57%  |  417M    28%  |
> level  -4       139M    52%  |  23M     52%  |  520M    54%  |  391M    26%  |
> level  -3       135M    50%  |  22M     50%  |  495M    51%  |  369M    24%  |
> level  -2       127M    47%  |  22M     48%  |  470M    49%  |  349M    23%  |
> level  -1       120M    45%  |  21M     47%  |  452M    47%  |  332M    22%  |
> level   1       110M    41%  |  17M     39%  |  362M    38%  |  290M    19%  |
> level   2       106M    40%  |  17M     38%  |  349M    36%  |  288M    19%  |
> level   3       104M    39%  |  16M     37%  |  340M    35%  |  276M    18%  |
>
> Signed-off-by: Daniel Vacek <neelx@...e.com>
> ---
> Changes in v2:
>  * Set the minimal level to -10 and add a `clip_level()` helper as suggested
>    by Dave.
>  * Add more filetypes to comparison in commit message.
>
> ---
>  fs/btrfs/compression.c | 18 +++++++---------
>  fs/btrfs/compression.h | 25 +++++++---------------
>  fs/btrfs/fs.h          |  2 +-
>  fs/btrfs/inode.c       |  2 +-
>  fs/btrfs/super.c       |  2 +-
>  fs/btrfs/zlib.c        |  1 +
>  fs/btrfs/zstd.c        | 48 +++++++++++++++++++++++++-----------------
>  7 files changed, 49 insertions(+), 49 deletions(-)
>
> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
> index 0c4d486c3048d..6d073e69af4e3 100644
> --- a/fs/btrfs/compression.c
> +++ b/fs/btrfs/compression.c
> @@ -740,7 +740,7 @@ static const struct btrfs_compress_op * const btrfs_compress_op[] = {
>         &btrfs_zstd_compress,
>  };
>
> -static struct list_head *alloc_workspace(int type, unsigned int level)
> +static struct list_head *alloc_workspace(int type, int level)
>  {
>         switch (type) {
>         case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws();
> @@ -818,7 +818,7 @@ static void btrfs_cleanup_workspace_manager(int type)
>   * Preallocation makes a forward progress guarantees and we do not return
>   * errors.
>   */
> -struct list_head *btrfs_get_workspace(int type, unsigned int level)
> +struct list_head *btrfs_get_workspace(int type, int level)
>  {
>         struct workspace_manager *wsm;
>         struct list_head *workspace;
> @@ -968,14 +968,14 @@ static void put_workspace(int type, struct list_head *ws)
>   * Adjust @level according to the limits of the compression algorithm or
>   * fallback to default
>   */
> -static unsigned int btrfs_compress_set_level(int type, unsigned level)
> +static int btrfs_compress_set_level(unsigned int type, int level)
>  {
>         const struct btrfs_compress_op *ops = btrfs_compress_op[type];
>
>         if (level == 0)
>                 level = ops->default_level;
>         else
> -               level = min(level, ops->max_level);
> +               level = min(max(level, ops->min_level), ops->max_level);
>
>         return level;
>  }
> @@ -1023,12 +1023,10 @@ int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start,
>   * @total_out is an in/out parameter, must be set to the input length and will
>   * be also used to return the total number of compressed bytes
>   */
> -int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping,
> +int btrfs_compress_folios(unsigned int type, int level, struct address_space *mapping,
>                          u64 start, struct folio **folios, unsigned long *out_folios,
>                          unsigned long *total_in, unsigned long *total_out)
>  {
> -       int type = btrfs_compress_type(type_level);
> -       int level = btrfs_compress_level(type_level);
>         const unsigned long orig_len = *total_out;
>         struct list_head *workspace;
>         int ret;
> @@ -1592,16 +1590,16 @@ int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end)
>   * Convert the compression suffix (eg. after "zlib" starting with ":") to
>   * level, unrecognized string will set the default level
>   */
> -unsigned int btrfs_compress_str2level(unsigned int type, const char *str)
> +int btrfs_compress_str2level(unsigned int type, const char *str)
>  {
> -       unsigned int level = 0;
> +       int level = 0;
>         int ret;
>
>         if (!type)
>                 return 0;
>
>         if (str[0] == ':') {
> -               ret = kstrtouint(str + 1, 10, &level);
> +               ret = kstrtoint(str + 1, 10, &level);
>                 if (ret)
>                         level = 0;
>         }
> diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
> index 954034086d0d4..933178f03d8f8 100644
> --- a/fs/btrfs/compression.h
> +++ b/fs/btrfs/compression.h
> @@ -72,16 +72,6 @@ struct compressed_bio {
>         struct btrfs_bio bbio;
>  };
>
> -static inline unsigned int btrfs_compress_type(unsigned int type_level)
> -{
> -       return (type_level & 0xF);
> -}
> -
> -static inline unsigned int btrfs_compress_level(unsigned int type_level)
> -{
> -       return ((type_level & 0xF0) >> 4);
> -}
> -
>  /* @range_end must be exclusive. */
>  static inline u32 btrfs_calc_input_length(u64 range_end, u64 cur)
>  {
> @@ -93,7 +83,7 @@ static inline u32 btrfs_calc_input_length(u64 range_end, u64 cur)
>  int __init btrfs_init_compress(void);
>  void __cold btrfs_exit_compress(void);
>
> -int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping,
> +int btrfs_compress_folios(unsigned int type, int level, struct address_space *mapping,
>                           u64 start, struct folio **folios, unsigned long *out_folios,
>                          unsigned long *total_in, unsigned long *total_out);
>  int btrfs_decompress(int type, const u8 *data_in, struct folio *dest_folio,
> @@ -107,7 +97,7 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
>                                    bool writeback);
>  void btrfs_submit_compressed_read(struct btrfs_bio *bbio);
>
> -unsigned int btrfs_compress_str2level(unsigned int type, const char *str);
> +int btrfs_compress_str2level(unsigned int type, const char *str);
>
>  struct folio *btrfs_alloc_compr_folio(void);
>  void btrfs_free_compr_folio(struct folio *folio);
> @@ -131,14 +121,15 @@ struct workspace_manager {
>         wait_queue_head_t ws_wait;
>  };
>
> -struct list_head *btrfs_get_workspace(int type, unsigned int level);
> +struct list_head *btrfs_get_workspace(int type, int level);
>  void btrfs_put_workspace(int type, struct list_head *ws);
>
>  struct btrfs_compress_op {
>         struct workspace_manager *workspace_manager;
>         /* Maximum level supported by the compression algorithm */
> -       unsigned int max_level;
> -       unsigned int default_level;
> +       int min_level;
> +       int max_level;
> +       int default_level;
>  };
>
>  /* The heuristic workspaces are managed via the 0th workspace manager */
> @@ -187,9 +178,9 @@ int zstd_decompress(struct list_head *ws, const u8 *data_in,
>                 size_t destlen);
>  void zstd_init_workspace_manager(void);
>  void zstd_cleanup_workspace_manager(void);
> -struct list_head *zstd_alloc_workspace(unsigned int level);
> +struct list_head *zstd_alloc_workspace(int level);
>  void zstd_free_workspace(struct list_head *ws);
> -struct list_head *zstd_get_workspace(unsigned int level);
> +struct list_head *zstd_get_workspace(int level);
>  void zstd_put_workspace(struct list_head *ws);
>
>  #endif
> diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
> index 79a1a3d6f04d1..be6d5a24bd4e6 100644
> --- a/fs/btrfs/fs.h
> +++ b/fs/btrfs/fs.h
> @@ -486,7 +486,7 @@ struct btrfs_fs_info {
>         unsigned long long mount_opt;
>
>         unsigned long compress_type:4;
> -       unsigned int compress_level;
> +       int compress_level;
>         u32 commit_interval;
>         /*
>          * It is a suggestive number, the read side is safe even it gets a
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 27b2fe7f735d5..fa04b027d53ac 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -1013,7 +1013,7 @@ static void compress_file_range(struct btrfs_work *work)
>                 compress_type = inode->prop_compress;
>
>         /* Compression level is applied here. */
> -       ret = btrfs_compress_folios(compress_type | (fs_info->compress_level << 4),
> +       ret = btrfs_compress_folios(compress_type, fs_info->compress_level,
>                                     mapping, start, folios, &nr_folios, &total_in,
>                                     &total_compressed);
>         if (ret)
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 7dfe5005129a1..cebbb0890c37e 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -84,7 +84,7 @@ struct btrfs_fs_context {
>         u32 thread_pool_size;
>         unsigned long long mount_opt;
>         unsigned long compress_type:4;
> -       unsigned int compress_level;
> +       int compress_level;
>         refcount_t refs;
>  };
>
> diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
> index c9e92c6941ec4..047d30d20ff16 100644
> --- a/fs/btrfs/zlib.c
> +++ b/fs/btrfs/zlib.c
> @@ -463,6 +463,7 @@ int zlib_decompress(struct list_head *ws, const u8 *data_in,
>
>  const struct btrfs_compress_op btrfs_zlib_compress = {
>         .workspace_manager      = &wsm,
> +       .min_level              = 1,
>         .max_level              = 9,
>         .default_level          = BTRFS_ZLIB_DEFAULT_LEVEL,
>  };
> diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
> index 5232b56d58925..a31975833725e 100644
> --- a/fs/btrfs/zstd.c
> +++ b/fs/btrfs/zstd.c
> @@ -26,11 +26,12 @@
>  #define ZSTD_BTRFS_MAX_WINDOWLOG 17
>  #define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
>  #define ZSTD_BTRFS_DEFAULT_LEVEL 3
> +#define ZSTD_BTRFS_MIN_LEVEL -10
>  #define ZSTD_BTRFS_MAX_LEVEL 15
>  /* 307s to avoid pathologically clashing with transaction commit */
>  #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
>
> -static zstd_parameters zstd_get_btrfs_parameters(unsigned int level,
> +static zstd_parameters zstd_get_btrfs_parameters(int level,
>                                                  size_t src_len)
>  {
>         zstd_parameters params = zstd_get_params(level, src_len);
> @@ -45,8 +46,8 @@ struct workspace {
>         void *mem;
>         size_t size;
>         char *buf;
> -       unsigned int level;
> -       unsigned int req_level;
> +       int level;
> +       int req_level;
>         unsigned long last_used; /* jiffies */
>         struct list_head list;
>         struct list_head lru_list;
> @@ -93,8 +94,10 @@ static inline struct workspace *list_to_workspace(struct list_head *list)
>         return container_of(list, struct workspace, list);
>  }
>
> -void zstd_free_workspace(struct list_head *ws);
> -struct list_head *zstd_alloc_workspace(unsigned int level);
> +static inline int clip_level(int level)
> +{
> +       return clip_level(level);

Sigh. I've accidentally sent a wrong patch file. This should obviously be

return max(0, level - 1);

Please disregard, I'll send a v3 shortly.

> +}
>
>  /*
>   * Timer callback to free unused workspaces.
> @@ -123,7 +126,7 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
>         list_for_each_prev_safe(pos, next, &wsm.lru_list) {
>                 struct workspace *victim = container_of(pos, struct workspace,
>                                                         lru_list);
> -               unsigned int level;
> +               int level;
>
>                 if (time_after(victim->last_used, reclaim_threshold))
>                         break;
> @@ -132,13 +135,13 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
>                 if (victim->req_level)
>                         continue;
>
> -               level = victim->level;
> +               level = clip_level(victim->level);
>                 list_del(&victim->lru_list);
>                 list_del(&victim->list);
>                 zstd_free_workspace(&victim->list);
>
> -               if (list_empty(&wsm.idle_ws[level - 1]))
> -                       clear_bit(level - 1, &wsm.active_map);
> +               if (list_empty(&wsm.idle_ws[level]))
> +                       clear_bit(level, &wsm.active_map);
>
>         }
>
> @@ -160,9 +163,11 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
>  static void zstd_calc_ws_mem_sizes(void)
>  {
>         size_t max_size = 0;
> -       unsigned int level;
> +       int level;
>
> -       for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
> +       for (level = ZSTD_BTRFS_MIN_LEVEL; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
> +               if (level == 0)
> +                       continue;
>                 zstd_parameters params =
>                         zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
>                 size_t level_size =
> @@ -171,7 +176,8 @@ static void zstd_calc_ws_mem_sizes(void)
>                               zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT));
>
>                 max_size = max_t(size_t, max_size, level_size);
> -               zstd_ws_mem_sizes[level - 1] = max_size;
> +               /* Use level 1 workspace size for all the fast mode negative levels. */
> +               zstd_ws_mem_sizes[clip_level(level)] = max_size;
>         }
>  }
>
> @@ -233,11 +239,11 @@ void zstd_cleanup_workspace_manager(void)
>   * offer the opportunity to reclaim the workspace in favor of allocating an
>   * appropriately sized one in the future.
>   */
> -static struct list_head *zstd_find_workspace(unsigned int level)
> +static struct list_head *zstd_find_workspace(int level)
>  {
>         struct list_head *ws;
>         struct workspace *workspace;
> -       int i = level - 1;
> +       int i = clip_level(level);
>
>         spin_lock_bh(&wsm.lock);
>         for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
> @@ -270,7 +276,7 @@ static struct list_head *zstd_find_workspace(unsigned int level)
>   * attempt to allocate a new workspace.  If we fail to allocate one due to
>   * memory pressure, go to sleep waiting for the max level workspace to free up.
>   */
> -struct list_head *zstd_get_workspace(unsigned int level)
> +struct list_head *zstd_get_workspace(int level)
>  {
>         struct list_head *ws;
>         unsigned int nofs_flag;
> @@ -315,6 +321,7 @@ struct list_head *zstd_get_workspace(unsigned int level)
>  void zstd_put_workspace(struct list_head *ws)
>  {
>         struct workspace *workspace = list_to_workspace(ws);
> +       int level;
>
>         spin_lock_bh(&wsm.lock);
>
> @@ -332,8 +339,9 @@ void zstd_put_workspace(struct list_head *ws)
>                 }
>         }
>
> -       set_bit(workspace->level - 1, &wsm.active_map);
> -       list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]);
> +       level = clip_level(workspace->level);
> +       set_bit(level, &wsm.active_map);
> +       list_add(&workspace->list, &wsm.idle_ws[level]);
>         workspace->req_level = 0;
>
>         spin_unlock_bh(&wsm.lock);
> @@ -351,7 +359,7 @@ void zstd_free_workspace(struct list_head *ws)
>         kfree(workspace);
>  }
>
> -struct list_head *zstd_alloc_workspace(unsigned int level)
> +struct list_head *zstd_alloc_workspace(int level)
>  {
>         struct workspace *workspace;
>
> @@ -359,7 +367,8 @@ struct list_head *zstd_alloc_workspace(unsigned int level)
>         if (!workspace)
>                 return ERR_PTR(-ENOMEM);
>
> -       workspace->size = zstd_ws_mem_sizes[level - 1];
> +       /* Use level 1 workspace size for all the fast mode negative levels. */
> +       workspace->size = zstd_ws_mem_sizes[clip_level(level)];
>         workspace->level = level;
>         workspace->req_level = level;
>         workspace->last_used = jiffies;
> @@ -717,6 +726,7 @@ int zstd_decompress(struct list_head *ws, const u8 *data_in,
>  const struct btrfs_compress_op btrfs_zstd_compress = {
>         /* ZSTD uses own workspace manager */
>         .workspace_manager = NULL,
> +       .min_level      = ZSTD_BTRFS_MIN_LEVEL,
>         .max_level      = ZSTD_BTRFS_MAX_LEVEL,
>         .default_level  = ZSTD_BTRFS_DEFAULT_LEVEL,
>  };
> --
> 2.45.2
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ