[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6389156c-c6df-4e02-ab46-3aaf6230ef76@linux.alibaba.com>
Date: Thu, 8 May 2025 12:09:27 +0800
From: Gao Xiang <hsiangkao@...ux.alibaba.com>
To: Sheng Yong <shengyong2021@...il.com>, xiang@...nel.org, chao@...nel.org,
zbestahu@...il.com, jefflexu@...ux.alibaba.com, dhavale@...gle.com,
kzak@...hat.com
Cc: linux-erofs@...ts.ozlabs.org, linux-kernel@...r.kernel.org,
wangshuai12@...omi.com, Sheng Yong <shengyong1@...omi.com>
Subject: Re: [PATCH v4 2/2] erofs: add 'fsoffset' mount option for file-backed
& bdev-based mounts
Hi Yong,
On 2025/4/8 20:23, Sheng Yong wrote:
> From: Sheng Yong <shengyong1@...omi.com>
>
> When attempting to use an archive file, such as APEX on android,
> as a file-backed mount source, it fails because EROFS image within
> the archive file does not start at offset 0. As a result, a loop
> device is still needed to attach the image file at an appropriate
> offset first. Similarly, if an EROFS image within a block device
> does not start at offset 0, it cannot be mounted directly either.
>
> To address this issue, this patch adds a new mount option `fsoffset=x'
> to accept a start offset for both file-backed and bdev-based mounts.
> The offset should be aligned to block size. EROFS will add this offset
> before performing read requests.
>
> Signed-off-by: Sheng Yong <shengyong1@...omi.com>
> Signed-off-by: Wang Shuai <wangshuai12@...omi.com>
Sorry for late reply. I was busying in other stuffs, but
since it's for the next cycle I guess it's not too late..
> ---
> Documentation/filesystems/erofs.rst | 1 +
> fs/erofs/data.c | 8 ++++++--
> fs/erofs/fileio.c | 4 +++-
> fs/erofs/internal.h | 2 ++
> fs/erofs/super.c | 24 +++++++++++++++++++++++-
> fs/erofs/zdata.c | 22 ++++++++++++++--------
> 6 files changed, 49 insertions(+), 12 deletions(-)
> ---
> v4: * change mount option `offset=x' to `fsoffset=x'
>
> v3: * rename `offs' to `off'
> * parse offset using fsparam_u64 and validate it in fill_super
> * update bi_sector inline
> https://lore.kernel.org/linux-erofs/98585dd8-d0b6-4000-b46d-a08c64eae44d@linux.alibaba.com
>
> v2: * add a new mount option `offset=X' for start offset, and offset
> should be aligned to PAGE_SIZE
> * add start offset for both file-backed and bdev-based mounts
> https://lore.kernel.org/linux-erofs/0725c2ec-528c-42a8-9557-4713e7e35153@linux.alibaba.com
>
> v1: https://lore.kernel.org/all/20250324022849.2715578-1-shengyong1@xiaomi.com/
>
> diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst
> index c293f8e37468..0fa4c7826203 100644
> --- a/Documentation/filesystems/erofs.rst
> +++ b/Documentation/filesystems/erofs.rst
> @@ -128,6 +128,7 @@ device=%s Specify a path to an extra device to be used together.
> fsid=%s Specify a filesystem image ID for Fscache back-end.
> domain_id=%s Specify a domain ID in fscache mode so that different images
> with the same blobs under a given domain ID can share storage.
> +fsoffset=%s Specify image offset for file-backed or bdev-based mounts.
> =================== =========================================================
>
> Sysfs Entries
> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
> index 2409d2ab0c28..7da503480f4d 100644
> --- a/fs/erofs/data.c
> +++ b/fs/erofs/data.c
> @@ -27,9 +27,12 @@ void erofs_put_metabuf(struct erofs_buf *buf)
>
> void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap)
> {
> - pgoff_t index = offset >> PAGE_SHIFT;
> + pgoff_t index;
> struct folio *folio = NULL;
>
> + offset += buf->off;
> + index = offset >> PAGE_SHIFT;
> +
> if (buf->page) {
> folio = page_folio(buf->page);
> if (folio_file_page(folio, index) != buf->page)
> @@ -54,6 +57,7 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
> struct erofs_sb_info *sbi = EROFS_SB(sb);
>
> buf->file = NULL;
> + buf->off = sbi->dif0.off;
> if (erofs_is_fileio_mode(sbi)) {
> buf->file = sbi->dif0.file; /* some fs like FUSE needs it */
> buf->mapping = buf->file->f_mapping;
> @@ -299,7 +303,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> iomap->private = buf.base;
> } else {
> iomap->type = IOMAP_MAPPED;
> - iomap->addr = mdev.m_pa;
> + iomap->addr = EROFS_SB(sb)->dif0.off + mdev.m_pa;
> if (flags & IOMAP_DAX)
> iomap->addr += mdev.m_dif->dax_part_off;
> }
> diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
> index 4fa0a0121288..2c003cbb0fbb 100644
> --- a/fs/erofs/fileio.c
> +++ b/fs/erofs/fileio.c
> @@ -52,7 +52,9 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq)
>
> if (!rq)
> return;
> - rq->iocb.ki_pos = rq->bio.bi_iter.bi_sector << SECTOR_SHIFT;
> +
> + rq->iocb.ki_pos = EROFS_SB(rq->sb)->dif0.off +
> + (rq->bio.bi_iter.bi_sector << SECTOR_SHIFT);
> rq->iocb.ki_ioprio = get_current_ioprio();
> rq->iocb.ki_complete = erofs_fileio_ki_complete;
> if (test_opt(&EROFS_SB(rq->sb)->opt, DIRECT_IO) &&
> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> index 4ac188d5d894..10656bd986bd 100644
> --- a/fs/erofs/internal.h
> +++ b/fs/erofs/internal.h
> @@ -43,6 +43,7 @@ struct erofs_device_info {
> char *path;
> struct erofs_fscache *fscache;
> struct file *file;
> + loff_t off;
> struct dax_device *dax_dev;
> u64 dax_part_off;
I wonder if it's possible to combine off as dax_part_off since
they are the same functionality...
>
> @@ -199,6 +200,7 @@ enum {
> struct erofs_buf {
> struct address_space *mapping;
> struct file *file;
> + loff_t off;
I wonder if there is some other way to check
if it's a metabuf, so that we could just use sbi->dif0.off..
But not sure.
> struct page *page;
> void *base;
> };
..
> + if (sb->s_bdev)
> + devsz = bdev_nr_bytes(sb->s_bdev);
> + else if (erofs_is_fileio_mode(sbi))
> + devsz = i_size_read(file_inode(sbi->dif0.file));
> + else
> + return invalfc(fc, "fsoffset only supports file or bdev backing");
> + if (sbi->dif0.off + (1 << sbi->blkszbits) > devsz)
> + return invalfc(fc, "fsoffset exceeds device size");
I wonder if those checks are really necessary? even it exceeds
the device size, it won't find the valid on-disk superblock then.
> + }
> +
> err = erofs_read_superblock(sb);
> if (err)
> return err;
> @@ -948,6 +968,8 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
> if (sbi->domain_id)
> seq_printf(seq, ",domain_id=%s", sbi->domain_id);
> #endif
> + if (sbi->dif0.off)
> + seq_printf(seq, ",fsoffset=%lld", sbi->dif0.off);
> return 0;
> }
>
> diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
> index 0671184d9cf1..671527b63c6d 100644
> --- a/fs/erofs/zdata.c
> +++ b/fs/erofs/zdata.c
> @@ -1624,7 +1624,8 @@ static void z_erofs_submit_queue(struct z_erofs_frontend *f,
> bool *force_fg, bool readahead)
> {
> struct super_block *sb = f->inode->i_sb;
> - struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb));
> + struct erofs_sb_info *sbi = EROFS_SB(sb);
> + struct address_space *mc = MNGD_MAPPING(sbi);
> struct z_erofs_pcluster **qtail[NR_JOBQUEUES];
> struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
> struct z_erofs_pcluster *pcl, *next;
> @@ -1673,12 +1674,15 @@ static void z_erofs_submit_queue(struct z_erofs_frontend *f,
> if (bio && (cur != last_pa ||
> bio->bi_bdev != mdev.m_bdev)) {
> drain_io:
> - if (erofs_is_fileio_mode(EROFS_SB(sb)))
> + if (erofs_is_fileio_mode(sbi)) {
> erofs_fileio_submit_bio(bio);
> - else if (erofs_is_fscache_mode(sb))
> + } else if (erofs_is_fscache_mode(sb)) {
> erofs_fscache_submit_bio(bio);
> - else
> + } else {
> + bio->bi_iter.bi_sector +=
> + sbi->dif0.off >> SECTOR_SHIFT;
How about multi-device? I guess we should modify
erofs_map_dev() directly rather than callers.
Thanks,
Gao Xiang
Powered by blists - more mailing lists