[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <06419d60-1fe9-4fcc-9d14-2751e12b6f7a@huawei.com>
Date: Wed, 7 Jan 2026 14:50:42 +0800
From: Hongbo Li <lihongbo22@...wei.com>
To: Gao Xiang <hsiangkao@...ux.alibaba.com>
CC: <djwong@...nel.org>, <amir73il@...il.com>, <hch@....de>,
<linux-fsdevel@...r.kernel.org>, <linux-erofs@...ts.ozlabs.org>,
<linux-kernel@...r.kernel.org>, Chao Yu <chao@...nel.org>, Christian Brauner
<brauner@...nel.org>
Subject: Re: [PATCH v12 08/10] erofs: support unencoded inodes for page cache
share
On 2026/1/7 14:12, Gao Xiang wrote:
>
>
> On 2025/12/31 17:01, Hongbo Li wrote:
>> This patch adds inode page cache sharing functionality for unencoded
>> files.
>>
>> I conducted experiments in the container environment. Below is the
>> memory usage for reading all files in two different minor versions
>> of container images:
>>
>> +-------------------+------------------+-------------+---------------+
>> | Image | Page Cache Share | Memory (MB) | Memory |
>> | | | | Reduction (%) |
>> +-------------------+------------------+-------------+---------------+
>> | | No | 241 | - |
>> | redis +------------------+-------------+---------------+
>> | 7.2.4 & 7.2.5 | Yes | 163 | 33% |
>> +-------------------+------------------+-------------+---------------+
>> | | No | 872 | - |
>> | postgres +------------------+-------------+---------------+
>> | 16.1 & 16.2 | Yes | 630 | 28% |
>> +-------------------+------------------+-------------+---------------+
>> | | No | 2771 | - |
>> | tensorflow +------------------+-------------+---------------+
>> | 2.11.0 & 2.11.1 | Yes | 2340 | 16% |
>> +-------------------+------------------+-------------+---------------+
>> | | No | 926 | - |
>> | mysql +------------------+-------------+---------------+
>> | 8.0.11 & 8.0.12 | Yes | 735 | 21% |
>> +-------------------+------------------+-------------+---------------+
>> | | No | 390 | - |
>> | nginx +------------------+-------------+---------------+
>> | 7.2.4 & 7.2.5 | Yes | 219 | 44% |
>> +-------------------+------------------+-------------+---------------+
>> | tomcat | No | 924 | - |
>> | 10.1.25 & 10.1.26 +------------------+-------------+---------------+
>> | | Yes | 474 | 49% |
>> +-------------------+------------------+-------------+---------------+
>>
>> Additionally, the table below shows the runtime memory usage of the
>> container:
>>
>> +-------------------+------------------+-------------+---------------+
>> | Image | Page Cache Share | Memory (MB) | Memory |
>> | | | | Reduction (%) |
>> +-------------------+------------------+-------------+---------------+
>> | | No | 35 | - |
>> | redis +------------------+-------------+---------------+
>> | 7.2.4 & 7.2.5 | Yes | 28 | 20% |
>> +-------------------+------------------+-------------+---------------+
>> | | No | 149 | - |
>> | postgres +------------------+-------------+---------------+
>> | 16.1 & 16.2 | Yes | 95 | 37% |
>> +-------------------+------------------+-------------+---------------+
>> | | No | 1028 | - |
>> | tensorflow +------------------+-------------+---------------+
>> | 2.11.0 & 2.11.1 | Yes | 930 | 10% |
>> +-------------------+------------------+-------------+---------------+
>> | | No | 155 | - |
>> | mysql +------------------+-------------+---------------+
>> | 8.0.11 & 8.0.12 | Yes | 132 | 15% |
>> +-------------------+------------------+-------------+---------------+
>> | | No | 25 | - |
>> | nginx +------------------+-------------+---------------+
>> | 7.2.4 & 7.2.5 | Yes | 20 | 20% |
>> +-------------------+------------------+-------------+---------------+
>> | tomcat | No | 186 | - |
>> | 10.1.25 & 10.1.26 +------------------+-------------+---------------+
>> | | Yes | 98 | 48% |
>> +-------------------+------------------+-------------+---------------+
>>
>> Co-developed-by: Hongzhen Luo <hongzhen@...ux.alibaba.com>
>> Signed-off-by: Hongzhen Luo <hongzhen@...ux.alibaba.com>
>> Signed-off-by: Hongbo Li <lihongbo22@...wei.com>
>> ---
>> fs/erofs/data.c | 30 +++++++++++++++++++++++-------
>> fs/erofs/inode.c | 4 ++++
>> fs/erofs/internal.h | 6 ++++++
>> fs/erofs/ishare.c | 32 ++++++++++++++++++++++++++++++++
>> 4 files changed, 65 insertions(+), 7 deletions(-)
>>
>> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
>> index 71e23d91123d..5fc8e3ce0d9e 100644
>> --- a/fs/erofs/data.c
>> +++ b/fs/erofs/data.c
>> @@ -269,6 +269,7 @@ void erofs_onlinefolio_end(struct folio *folio,
>> int err, bool dirty)
>> struct erofs_iomap_iter_ctx {
>> struct page *page;
>> void *base;
>> + struct inode *realinode;
>> };
>> static int erofs_iomap_begin(struct inode *inode, loff_t offset,
>> loff_t length,
>> @@ -276,14 +277,15 @@ static int erofs_iomap_begin(struct inode
>> *inode, loff_t offset, loff_t length,
>> {
>> struct iomap_iter *iter = container_of(iomap, struct iomap_iter,
>> iomap);
>> struct erofs_iomap_iter_ctx *ctx = iter->private;
>> - struct super_block *sb = inode->i_sb;
>> + struct inode *realinode = ctx ? ctx->realinode : inode;
>> + struct super_block *sb = realinode->i_sb;
>> struct erofs_map_blocks map;
>> struct erofs_map_dev mdev;
>> int ret;
>> map.m_la = offset;
>> map.m_llen = length;
>> - ret = erofs_map_blocks(inode, &map);
>> + ret = erofs_map_blocks(realinode, &map);
>> if (ret < 0)
>> return ret;
>> @@ -296,7 +298,7 @@ static int erofs_iomap_begin(struct inode *inode,
>> loff_t offset, loff_t length,
>> return 0;
>> }
>> - if (!(map.m_flags & EROFS_MAP_META) ||
>> !erofs_inode_in_metabox(inode)) {
>> + if (!(map.m_flags & EROFS_MAP_META) ||
>> !erofs_inode_in_metabox(realinode)) {
>> mdev = (struct erofs_map_dev) {
>> .m_deviceid = map.m_deviceid,
>> .m_pa = map.m_pa,
>> @@ -322,7 +324,7 @@ static int erofs_iomap_begin(struct inode *inode,
>> loff_t offset, loff_t length,
>> void *ptr;
>> ptr = erofs_read_metabuf(&buf, sb, map.m_pa,
>> - erofs_inode_in_metabox(inode));
>> + erofs_inode_in_metabox(realinode));
>> if (IS_ERR(ptr))
>> return PTR_ERR(ptr);
>> iomap->inline_data = ptr;
>> @@ -379,30 +381,42 @@ int erofs_fiemap(struct inode *inode, struct
>> fiemap_extent_info *fieinfo,
>> */
>> static int erofs_read_folio(struct file *file, struct folio *folio)
>> {
>> + struct inode *inode = folio_inode(folio);
>> struct iomap_read_folio_ctx read_ctx = {
>> .ops = &iomap_bio_read_ops,
>> .cur_folio = folio,
>> };
>> - struct erofs_iomap_iter_ctx iter_ctx = {};
>> + bool need_iput;
>> + struct erofs_iomap_iter_ctx iter_ctx = {
>> + .realinode = erofs_real_inode(inode, &need_iput),
>> + };
>> trace_erofs_read_folio(folio, true);
>> iomap_read_folio(&erofs_iomap_ops, &read_ctx, &iter_ctx);
>> + if (need_iput)
>> + iput(iter_ctx.realinode);
>> return 0;
>> }
>> static void erofs_readahead(struct readahead_control *rac)
>> {
>> + struct inode *inode = rac->mapping->host;
>> struct iomap_read_folio_ctx read_ctx = {
>> .ops = &iomap_bio_read_ops,
>> .rac = rac,
>> };
>> - struct erofs_iomap_iter_ctx iter_ctx = {};
>> + bool need_iput;
>> + struct erofs_iomap_iter_ctx iter_ctx = {
>> + .realinode = erofs_real_inode(inode, &need_iput),
>> + };
>> trace_erofs_readahead(rac->mapping->host, readahead_index(rac),
>> readahead_count(rac), true);
>> iomap_readahead(&erofs_iomap_ops, &read_ctx, &iter_ctx);
>> + if (need_iput)
>> + iput(iter_ctx.realinode);
>> }
>> static sector_t erofs_bmap(struct address_space *mapping, sector_t
>> block)
>> @@ -423,7 +437,9 @@ static ssize_t erofs_file_read_iter(struct kiocb
>> *iocb, struct iov_iter *to)
>> return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
>> #endif
>> if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_sb->s_bdev) {
>> - struct erofs_iomap_iter_ctx iter_ctx = {};
>> + struct erofs_iomap_iter_ctx iter_ctx = {
>> + .realinode = inode,
>> + };
>> return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
>> NULL, 0, &iter_ctx, 0);
>> diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
>> index bce98c845a18..8116738fe432 100644
>> --- a/fs/erofs/inode.c
>> +++ b/fs/erofs/inode.c
>> @@ -215,6 +215,10 @@ static int erofs_fill_inode(struct inode *inode)
>> case S_IFREG:
>> inode->i_op = &erofs_generic_iops;
>> inode->i_fop = &erofs_file_fops;
>> +#ifdef CONFIG_EROFS_FS_PAGE_CACHE_SHARE
>
> Is that unnecessary?
>
Yeah, I will remove it in next version.
Thanks,
Hongbo
> It seems erofs_ishare_fill_inode() will return false if
> CONFIG_EROFS_FS_PAGE_CACHE_SHARE is undefined.
>
> Otherwise it looks good to me,
> Reviewed-by: Gao Xiang <hsiangkao@...ux.alibaba.com>
>
> Thanks,
> Gao Xiang
Powered by blists - more mailing lists