[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <631728e2-2808-47af-8db7-28cd8ae17622@linux.alibaba.com>
Date: Thu, 17 Jul 2025 16:26:44 +0800
From: Gao Xiang <hsiangkao@...ux.alibaba.com>
To: Chao Yu <chao@...nel.org>, xiang@...nel.org
Cc: linux-erofs@...ts.ozlabs.org, linux-kernel@...r.kernel.org,
Yue Hu <zbestahu@...il.com>, Jeffle Xu <jefflexu@...ux.alibaba.com>,
Sandeep Dhavale <dhavale@...gle.com>, Hongbo Li <lihongbo22@...wei.com>
Subject: Re: [PATCH v3] erofs: support to readahead dirent blocks in
erofs_readdir()
Hi Chao,
On 2025/7/14 17:39, Chao Yu wrote:
> This patch supports to readahead more blocks in erofs_readdir(), it can
> enhance readdir performance in large direcotry.
>
> readdir test in a large directory which contains 12000 sub-files.
>
> files_per_second
> Before: 926385.54
> After: 2380435.562
>
> Meanwhile, let's introduces a new sysfs entry to control readahead
> bytes to provide more flexible policy for readahead of readdir().
> - location: /sys/fs/erofs/<disk>/dir_ra_bytes
> - default value: 16384
> - disable readahead: set the value to 0
>
> Signed-off-by: Chao Yu <chao@...nel.org>
> ---
> v3:
> - add EROFS prefix for macro
> - update new sysfs interface to 1) use bytes instead of pages
> 2) remove upper boundary limitation
> - fix bug of pageidx calculation
> Documentation/ABI/testing/sysfs-fs-erofs | 8 ++++++++
> fs/erofs/dir.c | 13 +++++++++++++
> fs/erofs/internal.h | 4 ++++
> fs/erofs/super.c | 2 ++
> fs/erofs/sysfs.c | 2 ++
> 5 files changed, 29 insertions(+)
>
> diff --git a/Documentation/ABI/testing/sysfs-fs-erofs b/Documentation/ABI/testing/sysfs-fs-erofs
> index bf3b6299c15e..85fa56ca092c 100644
> --- a/Documentation/ABI/testing/sysfs-fs-erofs
> +++ b/Documentation/ABI/testing/sysfs-fs-erofs
> @@ -35,3 +35,11 @@ Description: Used to set or show hardware accelerators in effect
> and multiple accelerators are separated by '\n'.
> Supported accelerator(s): qat_deflate.
> Disable all accelerators with an empty string (echo > accel).
> +
> +What: /sys/fs/erofs/<disk>/dir_ra_bytes
> +Date: July 2025
> +Contact: "Chao Yu" <chao@...nel.org>
> +Description: Used to set or show readahead bytes during readdir(), by
> + default the value is 16384.
> +
> + - 0: disable readahead.
> diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
> index 3e4b38bec0aa..950d6b0046f4 100644
> --- a/fs/erofs/dir.c
> +++ b/fs/erofs/dir.c
> @@ -47,8 +47,10 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
> struct inode *dir = file_inode(f);
> struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
> struct super_block *sb = dir->i_sb;
> + struct file_ra_state *ra = &f->f_ra;
> unsigned long bsz = sb->s_blocksize;
> unsigned int ofs = erofs_blkoff(sb, ctx->pos);
> + unsigned long nr_pages = DIV_ROUND_UP_POW2(dir->i_size, PAGE_SIZE);
pgoff_t ra_pages = PAGE_ALIGN(EROFS_SB(dir)->dir_ra_bytes);
> int err = 0;
> bool initial = true;
>
> @@ -63,6 +65,17 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
> break;
> }
>
> + /* readahead blocks to enhance performance in large directory */
> + if (EROFS_I_SB(dir)->dir_ra_bytes) {
if (ra_pages) {
> + unsigned long idx = DIV_ROUND_UP(ctx->pos, PAGE_SIZE);
> + pgoff_t ra_pages = DIV_ROUND_UP(
> + EROFS_I_SB(dir)->dir_ra_bytes, PAGE_SIZE);
pgoff_t idx = PAGE_ALIGN(ctx->pos);
pgoff_t pages = min(nr_pages - idx, ra_pages);
> +
> + if (nr_pages - idx > 1 && !ra_has_index(ra, idx))
if (pages > 1 && !ra_has_index(ra, idx))
page_cache_sync_readahead(dir->i_mapping, ra,
f, idx, pages)?
> + page_cache_sync_readahead(dir->i_mapping, ra,
> + f, idx, min(nr_pages - idx, ra_pages));
> + }
> +
> de = erofs_bread(&buf, dbstart, true);
> if (IS_ERR(de)) {
> erofs_err(sb, "failed to readdir of logical block %llu of nid %llu",
> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> index 0d19bde8c094..4399b9332307 100644
> --- a/fs/erofs/internal.h
> +++ b/fs/erofs/internal.h
> @@ -157,6 +157,7 @@ struct erofs_sb_info {
> /* sysfs support */
> struct kobject s_kobj; /* /sys/fs/erofs/<devname> */
> struct completion s_kobj_unregister;
> + erofs_off_t dir_ra_bytes;
>
> /* fscache support */
> struct fscache_volume *volume;
> @@ -238,6 +239,9 @@ EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER)
> #define EROFS_I_BL_XATTR_BIT (BITS_PER_LONG - 1)
> #define EROFS_I_BL_Z_BIT (BITS_PER_LONG - 2)
>
> +/* default readahead size of directory */
/* default readahead size of directories */
Otherwise it looks good to me.
Thanks,
Gao Xiang
> +#define EROFS_DIR_RA_BYTES 16384
Powered by blists - more mailing lists