lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <3617ba19-e721-4e70-bb94-8a207d2aa8a6@linux.alibaba.com>
Date: Tue, 13 May 2025 13:47:16 +0800
From: Gao Xiang <hsiangkao@...ux.alibaba.com>
To: Bo Liu <liubo03@...pur.com>, xiang@...nel.org, chao@...nel.org
Cc: linux-erofs@...ts.ozlabs.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] erofs: support deflate decompress by using Intel QAT

Hi Bo,

On 2025/4/10 12:20, Bo Liu wrote:
> This patch introdueces the use of the Intel QAT to decompress compressed
> data in the EROFS filesystem, aiming to improve the decompression speed
> of compressed datea.
> 
> We created a 285MiB compressed file and then used the following command to
> create EROFS images with different cluster size.
>       # mkfs.erofs -zdeflate,level=9 -C16384
> 
> fio command was used to test random read and small random read(~5%) and
> sequential read performance.
>       # fio -filename=testfile  -bs=4k -rw=read -name=job1
>       # fio -filename=testfile  -bs=4k -rw=randread -name=job1
>       # fio -filename=testfile  -bs=4k -rw=randread --io_size=14m -name=job1
> 
> Here are some performance numbers for reference:
> 
> Processors: Intel(R) Xeon(R) 6766E(144 core)
> Memory:     521 GiB
> 
> |-----------------------------------------------------------------------------|
> |           | Cluster size | sequential read | randread  | small randread(5%) |
> |-----------|--------------|-----------------|-----------|--------------------|
> | Intel QAT |    4096      |    538  MiB/s   | 112 MiB/s |     20.76 MiB/s    |
> | Intel QAT |    16384     |    699  MiB/s   | 158 MiB/s |     21.02 MiB/s    |
> | Intel QAT |    65536     |    917  MiB/s   | 278 MiB/s |     20.90 MiB/s    |
> | Intel QAT |    131072    |    1056 MiB/s   | 351 MiB/s |     23.36 MiB/s    |
> | Intel QAT |    262144    |    1145 MiB/s   | 431 MiB/s |     26.66 MiB/s    |
> | deflate   |    4096      |    499  MiB/s   | 108 MiB/s |     21.50 MiB/s    |
> | deflate   |    16384     |    422  MiB/s   | 125 MiB/s |     18.94 MiB/s    |
> | deflate   |    65536     |    452  MiB/s   | 159 MiB/s |     13.02 MiB/s    |
> | deflate   |    65536     |    452  MiB/s   | 177 MiB/s |     11.44 MiB/s    |
> | deflate   |    262144    |    466  MiB/s   | 194 MiB/s |     10.60 MiB/s    |
> 
> Signed-off-by: Bo Liu <liubo03@...pur.com>

Sorry for late reply due to internal stuffs.

> ---
>   fs/erofs/decompressor_deflate.c | 145 +++++++++++++++++++++++++++++++-
>   fs/erofs/internal.h             |   1 +
>   fs/erofs/sysfs.c                |  30 +++++++
>   3 files changed, 175 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c
> index c6908a487054..a293c44e86d2 100644
> --- a/fs/erofs/decompressor_deflate.c
> +++ b/fs/erofs/decompressor_deflate.c
> @@ -1,5 +1,8 @@
>   // SPDX-License-Identifier: GPL-2.0-or-later
>   #include <linux/zlib.h>
> +#include <linux/scatterlist.h>
> +#include <crypto/acompress.h>

I guess we could move this feature into a new file called
"decompressor_crypto.c"?

> +
>   #include "compress.h"
>   
>   struct z_erofs_deflate {
> @@ -97,7 +100,7 @@ static int z_erofs_load_deflate_config(struct super_block *sb,
>   	return -ENOMEM;
>   }
>   
> -static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
> +static int __z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
>   				      struct page **pgpl)
>   {
>   	struct super_block *sb = rq->sb;
> @@ -178,6 +181,146 @@ static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
>   	return err;
>   }
>   
> +static int z_erofs_crypto_decompress_mem(struct z_erofs_decompress_req *rq)
> +{
> +	struct erofs_sb_info *sbi = EROFS_SB(rq->sb);
> +	unsigned int nrpages_out =
> +				PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
> +	unsigned int nrpages_in = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;

I've optimized out those fields in
commit 0243cc257ffa ("erofs: move {in,out}pages into struct z_erofs_decompress_req")

so we don't need to recalculate here again.

> +	struct sg_table st_src, st_dst;
> +	struct scatterlist *sg_src, *sg_dst;
> +	struct acomp_req *req;
> +	struct crypto_wait wait;
> +	u8 *headpage;
> +	int ret, i;
> +
> +	WARN_ON(!*rq->in);
> +	headpage = kmap_local_page(*rq->in);
> +
> +	ret = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in,
> +				min_t(unsigned int, rq->inputsize,
> +							rq->sb->s_blocksize - rq->pageofs_in));
> +
> +	kunmap_local(headpage);
> +	if (ret) {
> +		return ret;
> +	}

Unnecessary brace.

> +
> +	req = acomp_request_alloc(sbi->erofs_tfm);
> +	if (!req) {
> +		erofs_err(rq->sb, "failed to alloc decompress request");
> +		return -ENOMEM;
> +	}
> +
> +	if (sg_alloc_table(&st_src, nrpages_in, GFP_KERNEL)) {
> +		acomp_request_free(req);
> +		return -ENOMEM;
> +	}
> +
> +	if (sg_alloc_table(&st_dst, nrpages_out, GFP_KERNEL)) {
> +		acomp_request_free(req);
> +		sg_free_table(&st_src);
> +		return -ENOMEM;
> +	}
> +
> +	for_each_sg(st_src.sgl, sg_src, nrpages_in, i) {
> +		if (i == 0)
> +			sg_set_page(sg_src, rq->in[0],
> +					PAGE_SIZE - rq->pageofs_in, rq->pageofs_in);
> +		else
> +			sg_set_page(sg_src, rq->in[i], PAGE_SIZE, 0);

^ should we consider rq->inputsize here?

> +	}
> +
> +	i = 0;
> +	for_each_sg(st_dst.sgl, sg_dst, nrpages_out, i) {
> +		if (i == 0)
> +			sg_set_page(sg_dst, rq->out[0],
> +					PAGE_SIZE - rq->pageofs_out, rq->pageofs_out);
> +		 else
> +			sg_set_page(sg_dst, rq->out[i], PAGE_SIZE, 0);

^ should we consider rq->outputsize here?

> +	}
> +
> +	acomp_request_set_params(req, st_src.sgl,
> +				st_dst.sgl, rq->inputsize, rq->outputsize);
> +
> +	crypto_init_wait(&wait);
> +	acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
> +						crypto_req_done, &wait);
> +
> +	ret = crypto_wait_req(crypto_acomp_decompress(req), &wait);
> +	if (ret < 0) {
> +		if (ret == -EBADMSG && rq->partial_decoding) {

Does QAT support partial decompression?

> +			ret = 0;
> +		} else {
> +			erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
> +					ret, rq->inputsize, rq->pageofs_in, rq->outputsize);
> +			ret = -EIO;
> +		}
> +	} else {
> +		ret = 0;
> +	}
> +
> +	acomp_request_free(req);
> +	sg_free_table(&st_src);
> +	sg_free_table(&st_dst);
> +	return ret;
> +}
> +
> +static int z_erofs_crypto_prepare_dstpages(struct z_erofs_decompress_req *rq,
> +							struct page **pagepool)
> +{
> +	const unsigned int nr =
> +				PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;

same here.

Also I suggest fold this `z_erofs_crypto_prepare_dstpages()` into
__z_erofs_deflate_crypto_decompress().

> +	unsigned int i;
> +
> +	for (i = 0; i < nr; ++i) {
> +		struct page *const page = rq->out[i];
> +		struct page *victim;
> +
> +		if (!page) {
> +			victim = __erofs_allocpage(pagepool, rq->gfp, true);
> +			if (!victim)
> +				return -ENOMEM;
> +			set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
> +			rq->out[i] = victim;
> +		}
> +	}
> +	return 0;
> +}
> +
> +static int __z_erofs_deflate_crypto_decompress(struct z_erofs_decompress_req *rq,
> +									 struct page **pgpl)

It seems this function can be used for other hardware
accelerators, so I guess z_erofs_crypto_decompress is
enough.

> +{
> +	const unsigned int nrpages_out =
> +			PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
> +	int ret;
> +
> +	/* one optimized fast path only for non bigpcluster cases yet */
> +	if (rq->inputsize <= PAGE_SIZE && nrpages_out == 1 && !rq->inplace_io) {
> +		WARN_ON(!*rq->out);
> +		goto dstmap_out;
> +	}
> +
> +	ret = z_erofs_crypto_prepare_dstpages(rq, pgpl);
> +	if (ret < 0)
> +		return ret;
> +
> +dstmap_out:
> +	return z_erofs_crypto_decompress_mem(rq);
> +}
> +
> +static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
> +				struct page **pgpl)

I wonder if it's possible to add a hardware acceleralor list
such as

struct z_erofs_crypto_engine {
	char *name;
	struct z_erofs_decompressor *decomp;
	bool enabled;
};

struct z_erofs_crypto_engine eng = {
	{ "qat_deflate", &z_erofs_deflate_decomp },
	...

};

so that we could add more hardware accelerators easily.

> +{
> +	struct super_block *sb = rq->sb;
> +	struct erofs_sb_info *sbi = EROFS_SB(sb);
> +
> +	if (sbi->erofs_tfm)
> +		return __z_erofs_deflate_crypto_decompress(rq, pgpl);
> +	else
> +		return __z_erofs_deflate_decompress(rq, pgpl);
> +}
> +
>   const struct z_erofs_decompressor z_erofs_deflate_decomp = {
>   	.config = z_erofs_load_deflate_config,
>   	.decompress = z_erofs_deflate_decompress,
> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h> index 4ac188d5d894..96fcee07d353 100644
> --- a/fs/erofs/internal.h
> +++ b/fs/erofs/internal.h
> @@ -122,6 +122,7 @@ struct erofs_sb_info {
>   	/* pseudo inode to manage cached pages */
>   	struct inode *managed_cache;
>   
> +	struct crypto_acomp *erofs_tfm;
>   	struct erofs_sb_lz4_info lz4;
>   #endif	/* CONFIG_EROFS_FS_ZIP */
>   	struct inode *packed_inode;
> diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c
> index dad4e6c6c155..d4630697dafd 100644
> --- a/fs/erofs/sysfs.c
> +++ b/fs/erofs/sysfs.c
> @@ -5,6 +5,7 @@
>    */
>   #include <linux/sysfs.h>
>   #include <linux/kobject.h>
> +#include <crypto/acompress.h>
>   
>   #include "internal.h"
>   
> @@ -13,6 +14,7 @@ enum {
>   	attr_drop_caches,
>   	attr_pointer_ui,
>   	attr_pointer_bool,
> +	attr_comp_crypto,
>   };
>   
>   enum {
> @@ -59,12 +61,14 @@ static struct erofs_attr erofs_attr_##_name = {			\
>   #ifdef CONFIG_EROFS_FS_ZIP
>   EROFS_ATTR_RW_UI(sync_decompress, erofs_mount_opts);
>   EROFS_ATTR_FUNC(drop_caches, 0200);
> +EROFS_ATTR_FUNC(comp_crypto, 0644);
>   #endif
>   
>   static struct attribute *erofs_attrs[] = {
>   #ifdef CONFIG_EROFS_FS_ZIP
>   	ATTR_LIST(sync_decompress),
>   	ATTR_LIST(drop_caches),
> +	ATTR_LIST(comp_crypto),
>   #endif
>   	NULL,
>   };
> @@ -128,6 +132,12 @@ static ssize_t erofs_attr_show(struct kobject *kobj,
>   		if (!ptr)
>   			return 0;
>   		return sysfs_emit(buf, "%d\n", *(bool *)ptr);
> +	case attr_comp_crypto:
> +		if (sbi->erofs_tfm)
> +			return sysfs_emit(buf, "%s\n",
> +				crypto_comp_alg_common(sbi->erofs_tfm)->base.cra_driver_name);
> +		else
> +			return sysfs_emit(buf, "NONE\n");
>   	}
>   	return 0;
>   }
> @@ -181,6 +191,26 @@ static ssize_t erofs_attr_store(struct kobject *kobj, struct attribute *attr,
>   		if (t & 1)
>   			invalidate_mapping_pages(MNGD_MAPPING(sbi), 0, -1);
>   		return len;
> +	case attr_comp_crypto:

Then I'd like to make this sysfs to enable accelerators.

Thanks,
Gao XIang

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ