lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <37fafc56-af2f-4a73-a5b7-2041049b8c71@linux.alibaba.com>
Date: Mon, 22 Dec 2025 16:59:52 +0800
From: Gao Xiang <hsiangkao@...ux.alibaba.com>
To: Hongbo Li <lihongbo22@...wei.com>, chao@...nel.org, brauner@...nel.org,
 djwong@...nel.org, amir73il@...il.com, joannelkoong@...il.com
Cc: linux-fsdevel@...r.kernel.org, linux-erofs@...ts.ozlabs.org,
 linux-kernel@...r.kernel.org
Subject: Re: [PATCH v9 07/10] erofs: introduce the page cache share feature

Hi Hongbo,

On 2025/11/17 21:25, Hongbo Li wrote:
> From: Hongzhen Luo <hongzhen@...ux.alibaba.com>
> 
> Currently, reading files with different paths (or names) but the same
> content will consume multiple copies of the page cache, even if the
> content of these page caches is the same. For example, reading
> identical files (e.g., *.so files) from two different minor versions of
> container images will cost multiple copies of the same page cache,
> since different containers have different mount points. Therefore,
> sharing the page cache for files with the same content can save memory.
> 
> This introduces the page cache share feature in erofs. It allocate a
> deduplicated inode and use its page cache as shared. Reads for files
> with identical content will ultimately be routed to the page cache of
> the deduplicated inode. In this way, a single page cache satisfies
> multiple read requests for different files with the same contents.
> 
> Signed-off-by: Hongzhen Luo <hongzhen@...ux.alibaba.com>
> Signed-off-by: Hongbo Li <lihongbo22@...wei.com>
> ---
>   fs/erofs/Makefile   |   1 +
>   fs/erofs/internal.h |  29 ++++++
>   fs/erofs/ishare.c   | 241 ++++++++++++++++++++++++++++++++++++++++++++
>   fs/erofs/super.c    |  31 +++++-
>   4 files changed, 300 insertions(+), 2 deletions(-)
>   create mode 100644 fs/erofs/ishare.c
> 
> diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
> index 549abc424763..a80e1762b607 100644
> --- a/fs/erofs/Makefile
> +++ b/fs/erofs/Makefile
> @@ -10,3 +10,4 @@ erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
>   erofs-$(CONFIG_EROFS_FS_ZIP_ACCEL) += decompressor_crypto.o
>   erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
>   erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
> +erofs-$(CONFIG_EROFS_FS_PAGE_CACHE_SHARE) += ishare.o
> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> index 3033252211ba..93ad34f2b488 100644
> --- a/fs/erofs/internal.h
> +++ b/fs/erofs/internal.h
> @@ -304,6 +304,22 @@ struct erofs_inode {
>   		};
>   #endif	/* CONFIG_EROFS_FS_ZIP */
>   	};
> +#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
> +	union {
> +		/* internal dedup inode */
> +		struct {
> +			char *fingerprint;
> +			spinlock_t lock;
> +			/* all backing inodes */
> +			struct list_head backing_head;
> +		};
> +
> +		struct {
> +			struct inode *ishare;
> +			struct list_head backing_link;
> +		};
> +	};
> +#endif
>   	/* the corresponding vfs inode */
>   	struct inode vfs_inode;
>   };
> @@ -410,6 +426,7 @@ extern const struct inode_operations erofs_dir_iops;
>   
>   extern const struct file_operations erofs_file_fops;
>   extern const struct file_operations erofs_dir_fops;
> +extern const struct file_operations erofs_ishare_fops;
>   
>   extern const struct iomap_ops z_erofs_iomap_report_ops;
>   
> @@ -541,6 +558,18 @@ static inline struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) {
>   static inline void erofs_fscache_submit_bio(struct bio *bio) {}
>   #endif
>   
> +#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
> +int erofs_ishare_init(struct super_block *sb);
> +void erofs_ishare_exit(struct super_block *sb);
> +bool erofs_ishare_fill_inode(struct inode *inode);
> +void erofs_ishare_free_inode(struct inode *inode);
> +#else
> +static inline int erofs_ishare_init(struct super_block *sb) { return 0; }
> +static inline void erofs_ishare_exit(struct super_block *sb) {}
> +static inline bool erofs_ishare_fill_inode(struct inode *inode) { return false; }
> +static inline void erofs_ishare_free_inode(struct inode *inode) {}
> +#endif // CONFIG_EROFS_FS_PAGE_CACHE_SHARE
> +
>   long erofs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
>   long erofs_compat_ioctl(struct file *filp, unsigned int cmd,
>   			unsigned long arg);
> diff --git a/fs/erofs/ishare.c b/fs/erofs/ishare.c
> new file mode 100644
> index 000000000000..f386efb260da
> --- /dev/null
> +++ b/fs/erofs/ishare.c
> @@ -0,0 +1,241 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2024, Alibaba Cloud
> + */
> +#include <linux/xxhash.h>
> +#include <linux/refcount.h>
> +#include <linux/mount.h>
> +#include <linux/mutex.h>
> +#include <linux/ramfs.h>
> +#include "internal.h"
> +#include "xattr.h"
> +
> +#include "../internal.h"
> +
> +static DEFINE_MUTEX(erofs_ishare_lock);
> +static struct vfsmount *erofs_ishare_mnt;
> +static refcount_t erofs_ishare_supers;
> +
> +int erofs_ishare_init(struct super_block *sb)
> +{
> +	struct vfsmount *mnt = NULL;
> +	struct erofs_sb_info *sbi = EROFS_SB(sb);
> +
> +	if (!erofs_sb_has_ishare_key(sbi))
> +		return 0;
> +
> +	mutex_lock(&erofs_ishare_lock);
> +	if (erofs_ishare_mnt) {
> +		refcount_inc(&erofs_ishare_supers);
> +	} else {
> +		mnt = kern_mount(&erofs_anon_fs_type);
> +		if (!IS_ERR(mnt)) {
> +			erofs_ishare_mnt = mnt;
> +			refcount_set(&erofs_ishare_supers, 1);
> +		}
> +	}
> +	mutex_unlock(&erofs_ishare_lock);

It seems this part is too complex, we could just
kern_mount() once.

and kern_unmount() before unregistering the module.

And since `erofs_anon_fs_type` is an internal fstype, we
could drop ".owner" field to avoid it from unloading the fs
module I think.

> +	return IS_ERR(mnt) ? PTR_ERR(mnt) : 0;
> +}
> +
> +void erofs_ishare_exit(struct super_block *sb)
> +{
> +	struct erofs_sb_info *sbi = EROFS_SB(sb);
> +	struct vfsmount *tmp;
> +
> +	if (!erofs_sb_has_ishare_key(sbi) || !erofs_ishare_mnt)
> +		return;
> +
> +	mutex_lock(&erofs_ishare_lock);
> +	if (refcount_dec_and_test(&erofs_ishare_supers)) {
> +		tmp = erofs_ishare_mnt;
> +		erofs_ishare_mnt = NULL;
> +		mutex_unlock(&erofs_ishare_lock);
> +		kern_unmount(tmp);
> +		mutex_lock(&erofs_ishare_lock);
> +	}
> +	mutex_unlock(&erofs_ishare_lock);

Same here.

Thanks,
Gao Xiang

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ