[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210920181159.GA570565@magnolia>
Date: Mon, 20 Sep 2021 11:11:59 -0700
From: "Darrick J. Wong" <djwong@...nel.org>
To: riteshh <riteshh@...ux.ibm.com>
Cc: jane.chu@...cle.com, linux-xfs@...r.kernel.org, hch@...radead.org,
dan.j.williams@...el.com, linux-fsdevel@...r.kernel.org,
linux-ext4 <linux-ext4@...r.kernel.org>
Subject: Re: [PATCH 5/5] ext4: implement FALLOC_FL_ZEROINIT_RANGE
On Sat, Sep 18, 2021 at 10:37:57PM +0530, riteshh wrote:
> +cc linux-ext4
>
> [Thread]: https://lore.kernel.org/linux-xfs/163192864476.417973.143014658064006895.stgit@magnolia/T/#t
>
> On 21/09/17 06:31PM, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@...nel.org>
> >
> > Implement this new fallocate mode so that persistent memory users can,
> > upon receipt of a pmem poison notification, cause the pmem to be
> > reinitialized to a known value (zero) and clear any hardware poison
> > state that might be lurking.
> >
> > Signed-off-by: Darrick J. Wong <djwong@...nel.org>
> > ---
> > fs/ext4/extents.c | 93 +++++++++++++++++++++++++++++++++++++++++++
> > include/trace/events/ext4.h | 7 +++
> > 2 files changed, 99 insertions(+), 1 deletion(-)
> >
> >
> > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> > index c0de30f25185..c345002e2da6 100644
> > --- a/fs/ext4/extents.c
> > +++ b/fs/ext4/extents.c
> > @@ -29,6 +29,7 @@
> > #include <linux/fiemap.h>
> > #include <linux/backing-dev.h>
> > #include <linux/iomap.h>
> > +#include <linux/dax.h>
> > #include "ext4_jbd2.h"
> > #include "ext4_extents.h"
> > #include "xattr.h"
> > @@ -4475,6 +4476,90 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
> >
> > static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
> >
> > +static long ext4_zeroinit_range(struct file *file, loff_t offset, loff_t len)
> > +{
> > + struct inode *inode = file_inode(file);
> > + struct address_space *mapping = inode->i_mapping;
> > + handle_t *handle = NULL;
> > + loff_t end = offset + len;
> > + long ret;
> > +
> > + trace_ext4_zeroinit_range(inode, offset, len,
> > + FALLOC_FL_ZEROINIT_RANGE | FALLOC_FL_KEEP_SIZE);
> > +
> > + /* We don't support data=journal mode */
> > + if (ext4_should_journal_data(inode))
> > + return -EOPNOTSUPP;
> > +
> > + inode_lock(inode);
> > +
> > + /*
> > + * Indirect files do not support unwritten extents
> > + */
> > + if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
> > + ret = -EOPNOTSUPP;
> > + goto out_mutex;
> > + }
> > +
> > + /* Wait all existing dio workers, newcomers will block on i_mutex */
> > + inode_dio_wait(inode);
> > +
> > + /*
> > + * Prevent page faults from reinstantiating pages we have released from
> > + * page cache.
> > + */
> > + filemap_invalidate_lock(mapping);
> > +
> > + ret = ext4_break_layouts(inode);
> > + if (ret)
> > + goto out_mmap;
> > +
> > + /* Now release the pages and zero block aligned part of pages */
> > + truncate_pagecache_range(inode, offset, end - 1);
> > + inode->i_mtime = inode->i_ctime = current_time(inode);
> > +
> > + if (IS_DAX(inode))
> > + ret = dax_zeroinit_range(inode, offset, len,
> > + &ext4_iomap_report_ops);
> > + else
> > + ret = iomap_zeroout_range(inode, offset, len,
> > + &ext4_iomap_report_ops);
> > + if (ret == -ECANCELED)
> > + ret = -EOPNOTSUPP;
> > + if (ret)
> > + goto out_mmap;
> > +
> > + /*
> > + * In worst case we have to writeout two nonadjacent unwritten
> > + * blocks and update the inode
> > + */
>
> Is this comment true? We are actually not touching IOMAP_UNWRITTEN blocks no?
> So is there any need for journal transaction for this?
> We are essentially only writing to blocks which are already allocated on disk
> and zeroing it out in both dax_zeroinit_range() and iomap_zeroinit_range().
Oops. Yeah, the comment is wrong. Deleted.
> > + handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
>
> I guess credits is 1 here since only inode is getting modified.
Yep.
>
> > + if (IS_ERR(handle)) {
> > + ret = PTR_ERR(handle);
> > + ext4_std_error(inode->i_sb, ret);
> > + goto out_mmap;
> > + }
> > +
> > + inode->i_mtime = inode->i_ctime = current_time(inode);
> > + ret = ext4_mark_inode_dirty(handle, inode);
> > + if (unlikely(ret))
> > + goto out_handle;
> > + ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits,
> > + (offset + len - 1) >> inode->i_sb->s_blocksize_bits);
>
> I am not sure whether we need ext4_fc_track_range() here?
> We are not doing any metadata operation except maybe updating inode timestamp
> right?
I wasn't sure what fastcommit needs to track about the range. Is it
/only/ tracking changes to the file mapping?
/me is sadly falling further and further behind on where ext4 is these
days... :/
--D
>
> -ritesh
>
> > + ext4_update_inode_fsync_trans(handle, inode, 1);
> > +
> > + if (file->f_flags & O_SYNC)
> > + ext4_handle_sync(handle);
> > +
> > +out_handle:
> > + ext4_journal_stop(handle);
> > +out_mmap:
> > + filemap_invalidate_unlock(mapping);
> > +out_mutex:
> > + inode_unlock(inode);
> > + return ret;
> > +}
> > +
> > static long ext4_zero_range(struct file *file, loff_t offset,
> > loff_t len, int mode)
> > {
> > @@ -4659,7 +4744,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
> > /* Return error if mode is not supported */
> > if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
> > FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
> > - FALLOC_FL_INSERT_RANGE))
> > + FALLOC_FL_INSERT_RANGE | FALLOC_FL_ZEROINIT_RANGE))
> > return -EOPNOTSUPP;
> >
> > ext4_fc_start_update(inode);
> > @@ -4687,6 +4772,12 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
> > ret = ext4_zero_range(file, offset, len, mode);
> > goto exit;
> > }
> > +
> > + if (mode & FALLOC_FL_ZEROINIT_RANGE) {
> > + ret = ext4_zeroinit_range(file, offset, len);
> > + goto exit;
> > + }
> > +
> > trace_ext4_fallocate_enter(inode, offset, len, mode);
> > lblk = offset >> blkbits;
> >
> > diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> > index 0ea36b2b0662..282f1208067f 100644
> > --- a/include/trace/events/ext4.h
> > +++ b/include/trace/events/ext4.h
> > @@ -1407,6 +1407,13 @@ DEFINE_EVENT(ext4__fallocate_mode, ext4_zero_range,
> > TP_ARGS(inode, offset, len, mode)
> > );
> >
> > +DEFINE_EVENT(ext4__fallocate_mode, ext4_zeroinit_range,
> > +
> > + TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode),
> > +
> > + TP_ARGS(inode, offset, len, mode)
> > +);
> > +
> > TRACE_EVENT(ext4_fallocate_exit,
> > TP_PROTO(struct inode *inode, loff_t offset,
> > unsigned int max_blocks, int ret),
> >
Powered by blists - more mailing lists