[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4de79cbb-6977-1172-4e69-5e9040caadf2@kernel.org>
Date: Wed, 4 Aug 2021 15:14:34 +0800
From: Chao Yu <chao@...nel.org>
To: Gao Xiang <hsiangkao@...ux.alibaba.com>,
linux-erofs@...ts.ozlabs.org
Cc: linux-fsdevel@...r.kernel.org, nvdimm@...ts.linux.dev,
LKML <linux-kernel@...r.kernel.org>,
"Darrick J. Wong" <djwong@...nel.org>,
Liu Bo <bo.liu@...ux.alibaba.com>,
Joseph Qi <joseph.qi@...ux.alibaba.com>,
Liu Jiang <gerry@...ux.alibaba.com>,
Huang Jianan <huangjianan@...o.com>,
Tao Ma <boyu.mt@...bao.com>
Subject: Re: [PATCH v2 2/3] erofs: dax support for non-tailpacking regular
file
On 2021/7/31 3:46, Gao Xiang wrote:
> DAX is quite useful for some VM use cases in order to save guest
> memory extremely with minimal lightweight EROFS.
>
> In order to prepare for such use cases, add preliminary dax support
> for non-tailpacking regular files for now.
>
> Tested with the DRAM-emulated PMEM and the EROFS image generated by
> "mkfs.erofs -Enoinline_data enwik9.fsdax.img enwik9"
>
> Cc: nvdimm@...ts.linux.dev
> Cc: linux-fsdevel@...r.kernel.org
> Signed-off-by: Gao Xiang <hsiangkao@...ux.alibaba.com>
> ---
> fs/erofs/data.c | 42 +++++++++++++++++++++++++++++--
> fs/erofs/inode.c | 4 +++
> fs/erofs/internal.h | 3 +++
> fs/erofs/super.c | 60 +++++++++++++++++++++++++++++++++++++++++++--
> 4 files changed, 105 insertions(+), 4 deletions(-)
>
> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
> index 1f97151a9f90..911521293b20 100644
> --- a/fs/erofs/data.c
> +++ b/fs/erofs/data.c
> @@ -6,7 +6,7 @@
> #include "internal.h"
> #include <linux/prefetch.h>
> #include <linux/iomap.h>
> -
> +#include <linux/dax.h>
> #include <trace/events/erofs.h>
>
> static void erofs_readendio(struct bio *bio)
> @@ -323,6 +323,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> return ret;
>
> iomap->bdev = inode->i_sb->s_bdev;
> + iomap->dax_dev = EROFS_I_SB(inode)->dax_dev;
> iomap->offset = map.m_la;
> iomap->length = map.m_llen;
> iomap->flags = 0;
> @@ -382,6 +383,10 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
> if (!iov_iter_count(to))
> return 0;
>
> +#ifdef CONFIG_FS_DAX
> + if (IS_DAX(iocb->ki_filp->f_mapping->host))
> + return dax_iomap_rw(iocb, to, &erofs_iomap_ops);
> +#endif
> if (iocb->ki_flags & IOCB_DIRECT) {
> int err = erofs_prepare_dio(iocb, to);
>
> @@ -410,9 +415,42 @@ const struct address_space_operations erofs_raw_access_aops = {
> .direct_IO = noop_direct_IO,
> };
>
> +#ifdef CONFIG_FS_DAX
> +static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf,
> + enum page_entry_size pe_size)
> +{
> + return dax_iomap_fault(vmf, pe_size, NULL, NULL, &erofs_iomap_ops);
> +}
> +
> +static vm_fault_t erofs_dax_fault(struct vm_fault *vmf)
> +{
> + return erofs_dax_huge_fault(vmf, PE_SIZE_PTE);
> +}
> +
> +static const struct vm_operations_struct erofs_dax_vm_ops = {
> + .fault = erofs_dax_fault,
> + .huge_fault = erofs_dax_huge_fault,
> +};
> +
> +static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> + if (!IS_DAX(file_inode(file)))
> + return generic_file_readonly_mmap(file, vma);
> +
> + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
> + return -EINVAL;
> +
> + vma->vm_ops = &erofs_dax_vm_ops;
> + vma->vm_flags |= VM_HUGEPAGE;
> + return 0;
> +}
> +#else
> +#define erofs_file_mmap generic_file_readonly_mmap
> +#endif
> +
> const struct file_operations erofs_file_fops = {
> .llseek = generic_file_llseek,
> .read_iter = erofs_file_read_iter,
> - .mmap = generic_file_readonly_mmap,
> + .mmap = erofs_file_mmap,
> .splice_read = generic_file_splice_read,
> };
> diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
> index 00edb7562fea..e875fba18159 100644
> --- a/fs/erofs/inode.c
> +++ b/fs/erofs/inode.c
> @@ -174,6 +174,10 @@ static struct page *erofs_read_inode(struct inode *inode,
> inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec;
> inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec;
>
> + inode->i_flags &= ~S_DAX;
> + if (test_opt(&sbi->ctx, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
> + vi->datalayout == EROFS_INODE_FLAT_PLAIN)
> + inode->i_flags |= S_DAX;
> if (!nblks)
> /* measure inode.i_blocks as generic filesystems */
> inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9;
> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> index 2669c785d548..7c9abfc93109 100644
> --- a/fs/erofs/internal.h
> +++ b/fs/erofs/internal.h
> @@ -83,6 +83,7 @@ struct erofs_sb_info {
>
> struct erofs_sb_lz4_info lz4;
> #endif /* CONFIG_EROFS_FS_ZIP */
> + struct dax_device *dax_dev;
> u32 blocks;
> u32 meta_blkaddr;
> #ifdef CONFIG_EROFS_FS_XATTR
> @@ -115,6 +116,8 @@ struct erofs_sb_info {
> /* Mount flags set via mount options or defaults */
> #define EROFS_MOUNT_XATTR_USER 0x00000010
> #define EROFS_MOUNT_POSIX_ACL 0x00000020
> +#define EROFS_MOUNT_DAX_ALWAYS 0x00000040
> +#define EROFS_MOUNT_DAX_NEVER 0x00000080
>
> #define clear_opt(ctx, option) ((ctx)->mount_opt &= ~EROFS_MOUNT_##option)
> #define set_opt(ctx, option) ((ctx)->mount_opt |= EROFS_MOUNT_##option)
> diff --git a/fs/erofs/super.c b/fs/erofs/super.c
> index 8fc6c04b54f4..d5b110fd365d 100644
> --- a/fs/erofs/super.c
> +++ b/fs/erofs/super.c
> @@ -11,6 +11,7 @@
> #include <linux/crc32c.h>
> #include <linux/fs_context.h>
> #include <linux/fs_parser.h>
> +#include <linux/dax.h>
> #include "xattr.h"
>
> #define CREATE_TRACE_POINTS
> @@ -355,6 +356,8 @@ enum {
> Opt_user_xattr,
> Opt_acl,
> Opt_cache_strategy,
> + Opt_dax,
> + Opt_dax_enum,
We need to update doc for those new dax mount options.
> Opt_err
> };
>
> @@ -365,14 +368,47 @@ static const struct constant_table erofs_param_cache_strategy[] = {
> {}
> };
>
> +static const struct constant_table erofs_dax_param_enums[] = {
> + {"always", EROFS_MOUNT_DAX_ALWAYS},
> + {"never", EROFS_MOUNT_DAX_NEVER},
> + {}
> +};
> +
> static const struct fs_parameter_spec erofs_fs_parameters[] = {
> fsparam_flag_no("user_xattr", Opt_user_xattr),
> fsparam_flag_no("acl", Opt_acl),
> fsparam_enum("cache_strategy", Opt_cache_strategy,
> erofs_param_cache_strategy),
> + fsparam_flag("dax", Opt_dax),
> + fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums),
> {}
> };
>
> +static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
> +{
> +#ifdef CONFIG_FS_DAX
> + struct erofs_fs_context *ctx = fc->fs_private;
> +
> + switch (mode) {
> + case EROFS_MOUNT_DAX_ALWAYS:
> + warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
> + set_opt(ctx, DAX_ALWAYS);
> + clear_opt(ctx, DAX_NEVER);
> + return true;
> + case EROFS_MOUNT_DAX_NEVER:
> + set_opt(ctx, DAX_NEVER);
> + clear_opt(ctx, DAX_ALWAYS);
> + return true;
> + default:
> + DBG_BUGON(1);
> + return false;
> + }
> +#else
> + errorfc(fc, "dax options not supported");
> + return false;
> +#endif
> +}
> +
> static int erofs_fc_parse_param(struct fs_context *fc,
> struct fs_parameter *param)
> {
> @@ -412,6 +448,14 @@ static int erofs_fc_parse_param(struct fs_context *fc,
> errorfc(fc, "compression not supported, cache_strategy ignored");
> #endif
> break;
> + case Opt_dax:
> + if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS))
> + return -EINVAL;
> + break;
> + case Opt_dax_enum:
> + if (!erofs_fc_set_dax_mode(fc, result.uint_32))
> + return -EINVAL;
> + break;
> default:
> return -ENOPARAM;
> }
> @@ -496,10 +540,16 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
> return -ENOMEM;
>
> sb->s_fs_info = sbi;
> + sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
> err = erofs_read_superblock(sb);
> if (err)
> return err;
>
> + if (test_opt(ctx, DAX_ALWAYS) &&
> + !bdev_dax_supported(sb->s_bdev, EROFS_BLKSIZ)) {
> + errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
> + clear_opt(ctx, DAX_ALWAYS);
> + }
> sb->s_flags |= SB_RDONLY | SB_NOATIME;
> sb->s_maxbytes = MAX_LFS_FILESIZE;
> sb->s_time_gran = 1;
> @@ -609,6 +659,8 @@ static void erofs_kill_sb(struct super_block *sb)
> sbi = EROFS_SB(sb);
> if (!sbi)
> return;
> + if (sbi->dax_dev)
> + fs_put_dax(sbi->dax_dev);
fs_put_dax(sbi->dax_dev);
Thanks,
> kfree(sbi);
> sb->s_fs_info = NULL;
> }
> @@ -711,8 +763,8 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
>
> static int erofs_show_options(struct seq_file *seq, struct dentry *root)
> {
> - struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb);
> - struct erofs_fs_context *ctx __maybe_unused = &sbi->ctx;
> + struct erofs_sb_info *sbi = EROFS_SB(root->d_sb);
> + struct erofs_fs_context *ctx = &sbi->ctx;
>
> #ifdef CONFIG_EROFS_FS_XATTR
> if (test_opt(ctx, XATTR_USER))
> @@ -734,6 +786,10 @@ static int erofs_show_options(struct seq_file *seq, struct dentry *root)
> else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAROUND)
> seq_puts(seq, ",cache_strategy=readaround");
> #endif
> + if (test_opt(ctx, DAX_ALWAYS))
> + seq_puts(seq, ",dax=always");
> + if (test_opt(ctx, DAX_NEVER))
> + seq_puts(seq, ",dax=never");
> return 0;
> }
>
>
Powered by blists - more mailing lists