[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <37fafc56-af2f-4a73-a5b7-2041049b8c71@linux.alibaba.com>
Date: Mon, 22 Dec 2025 16:59:52 +0800
From: Gao Xiang <hsiangkao@...ux.alibaba.com>
To: Hongbo Li <lihongbo22@...wei.com>, chao@...nel.org, brauner@...nel.org,
djwong@...nel.org, amir73il@...il.com, joannelkoong@...il.com
Cc: linux-fsdevel@...r.kernel.org, linux-erofs@...ts.ozlabs.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v9 07/10] erofs: introduce the page cache share feature
Hi Hongbo,
On 2025/11/17 21:25, Hongbo Li wrote:
> From: Hongzhen Luo <hongzhen@...ux.alibaba.com>
>
> Currently, reading files with different paths (or names) but the same
> content will consume multiple copies of the page cache, even if the
> content of these page caches is the same. For example, reading
> identical files (e.g., *.so files) from two different minor versions of
> container images will cost multiple copies of the same page cache,
> since different containers have different mount points. Therefore,
> sharing the page cache for files with the same content can save memory.
>
> This introduces the page cache share feature in erofs. It allocate a
> deduplicated inode and use its page cache as shared. Reads for files
> with identical content will ultimately be routed to the page cache of
> the deduplicated inode. In this way, a single page cache satisfies
> multiple read requests for different files with the same contents.
>
> Signed-off-by: Hongzhen Luo <hongzhen@...ux.alibaba.com>
> Signed-off-by: Hongbo Li <lihongbo22@...wei.com>
> ---
> fs/erofs/Makefile | 1 +
> fs/erofs/internal.h | 29 ++++++
> fs/erofs/ishare.c | 241 ++++++++++++++++++++++++++++++++++++++++++++
> fs/erofs/super.c | 31 +++++-
> 4 files changed, 300 insertions(+), 2 deletions(-)
> create mode 100644 fs/erofs/ishare.c
>
> diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
> index 549abc424763..a80e1762b607 100644
> --- a/fs/erofs/Makefile
> +++ b/fs/erofs/Makefile
> @@ -10,3 +10,4 @@ erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
> erofs-$(CONFIG_EROFS_FS_ZIP_ACCEL) += decompressor_crypto.o
> erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
> erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
> +erofs-$(CONFIG_EROFS_FS_PAGE_CACHE_SHARE) += ishare.o
> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> index 3033252211ba..93ad34f2b488 100644
> --- a/fs/erofs/internal.h
> +++ b/fs/erofs/internal.h
> @@ -304,6 +304,22 @@ struct erofs_inode {
> };
> #endif /* CONFIG_EROFS_FS_ZIP */
> };
> +#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
> + union {
> + /* internal dedup inode */
> + struct {
> + char *fingerprint;
> + spinlock_t lock;
> + /* all backing inodes */
> + struct list_head backing_head;
> + };
> +
> + struct {
> + struct inode *ishare;
> + struct list_head backing_link;
> + };
> + };
> +#endif
> /* the corresponding vfs inode */
> struct inode vfs_inode;
> };
> @@ -410,6 +426,7 @@ extern const struct inode_operations erofs_dir_iops;
>
> extern const struct file_operations erofs_file_fops;
> extern const struct file_operations erofs_dir_fops;
> +extern const struct file_operations erofs_ishare_fops;
>
> extern const struct iomap_ops z_erofs_iomap_report_ops;
>
> @@ -541,6 +558,18 @@ static inline struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) {
> static inline void erofs_fscache_submit_bio(struct bio *bio) {}
> #endif
>
> +#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
> +int erofs_ishare_init(struct super_block *sb);
> +void erofs_ishare_exit(struct super_block *sb);
> +bool erofs_ishare_fill_inode(struct inode *inode);
> +void erofs_ishare_free_inode(struct inode *inode);
> +#else
> +static inline int erofs_ishare_init(struct super_block *sb) { return 0; }
> +static inline void erofs_ishare_exit(struct super_block *sb) {}
> +static inline bool erofs_ishare_fill_inode(struct inode *inode) { return false; }
> +static inline void erofs_ishare_free_inode(struct inode *inode) {}
> +#endif // CONFIG_EROFS_FS_PAGE_CACHE_SHARE
> +
> long erofs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
> long erofs_compat_ioctl(struct file *filp, unsigned int cmd,
> unsigned long arg);
> diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c
> new file mode 100644
> index 000000000000..f386efb260da
> --- /dev/null
> +++ b/fs/erofs/ishare.c
> @@ -0,0 +1,241 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2024, Alibaba Cloud
> + */
> +#include <linux/xxhash.h>
> +#include <linux/refcount.h>
> +#include <linux/mount.h>
> +#include <linux/mutex.h>
> +#include <linux/ramfs.h>
> +#include "internal.h"
> +#include "xattr.h"
> +
> +#include "../internal.h"
> +
> +static DEFINE_MUTEX(erofs_ishare_lock);
> +static struct vfsmount *erofs_ishare_mnt;
> +static refcount_t erofs_ishare_supers;
> +
> +int erofs_ishare_init(struct super_block *sb)
> +{
> + struct vfsmount *mnt = NULL;
> + struct erofs_sb_info *sbi = EROFS_SB(sb);
> +
> + if (!erofs_sb_has_ishare_key(sbi))
> + return 0;
> +
> + mutex_lock(&erofs_ishare_lock);
> + if (erofs_ishare_mnt) {
> + refcount_inc(&erofs_ishare_supers);
> + } else {
> + mnt = kern_mount(&erofs_anon_fs_type);
> + if (!IS_ERR(mnt)) {
> + erofs_ishare_mnt = mnt;
> + refcount_set(&erofs_ishare_supers, 1);
> + }
> + }
> + mutex_unlock(&erofs_ishare_lock);
It seems this part is too complex, we could just
kern_mount() once.
and kern_unmount() before unregistering the module.
And since `erofs_anon_fs_type` is an internal fstype, we
could drop ".owner" field to avoid it from unloading the fs
module I think.
> + return IS_ERR(mnt) ? PTR_ERR(mnt) : 0;
> +}
> +
> +void erofs_ishare_exit(struct super_block *sb)
> +{
> + struct erofs_sb_info *sbi = EROFS_SB(sb);
> + struct vfsmount *tmp;
> +
> + if (!erofs_sb_has_ishare_key(sbi) || !erofs_ishare_mnt)
> + return;
> +
> + mutex_lock(&erofs_ishare_lock);
> + if (refcount_dec_and_test(&erofs_ishare_supers)) {
> + tmp = erofs_ishare_mnt;
> + erofs_ishare_mnt = NULL;
> + mutex_unlock(&erofs_ishare_lock);
> + kern_unmount(tmp);
> + mutex_lock(&erofs_ishare_lock);
> + }
> + mutex_unlock(&erofs_ishare_lock);
Same here.
Thanks,
Gao Xiang
Powered by blists - more mailing lists