[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5df78e1d0909232227y2cb52abew827d7732a3bc9040@mail.gmail.com>
Date: Wed, 23 Sep 2009 22:27:17 -0700
From: Jiaying Zhang <jiayingz@...gle.com>
To: Andreas Dilger <adilger@....com>
Cc: Theodore Tso <tytso@....edu>, Frank Mayhar <fmayhar@...gle.com>,
Eric Sandeen <sandeen@...hat.com>,
Curt Wohlgemuth <curtw@...gle.com>,
ext4 development <linux-ext4@...r.kernel.org>
Subject: Re: Question on fallocate/ftruncate sequence
Sorry for taking so long to finish this. Here is the new patch based on
Andreas's suggestions. Now the patch clears the EXT4_EOFBLOCKS_FL
flag when we allocate beyond the maximum allocated block. I also
made the EOFBLOCKS flag user visible and added the handling
in ext4_ioctl as Andrea suggested.
Index: linux-2.6.30.5/fs/ext4/inode.c
===================================================================
--- linux-2.6.30.5.orig/fs/ext4/inode.c 2009-08-31 12:08:10.000000000 -0700
+++ linux-2.6.30.5/fs/ext4/inode.c 2009-09-23 21:42:33.000000000 -0700
@@ -3973,6 +3973,8 @@ void ext4_truncate(struct inode *inode)
if (!ext4_can_truncate(inode))
return;
+ inode->i_flags &= ~EXT4_EOFBLOCKS_FL;
+
if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
@@ -4285,8 +4287,8 @@ void ext4_get_inode_flags(struct ext4_in
{
unsigned int flags = ei->vfs_inode.i_flags;
- ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
- EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
+ ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|EXT4_IMMUTABLE_FL|
+ EXT4_NOATIME_FL|EXT4_DIRSYNC_FL|EXT4_EOFBLOCKS_FL);
if (flags & S_SYNC)
ei->i_flags |= EXT4_SYNC_FL;
if (flags & S_APPEND)
@@ -4297,6 +4299,8 @@ void ext4_get_inode_flags(struct ext4_in
ei->i_flags |= EXT4_NOATIME_FL;
if (flags & S_DIRSYNC)
ei->i_flags |= EXT4_DIRSYNC_FL;
+ if (flags & FS_EOFBLOCKS_FL)
+ ei->i_flags |= EXT4_EOFBLOCKS_FL;
}
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
struct ext4_inode_info *ei)
@@ -4807,7 +4811,9 @@ int ext4_setattr(struct dentry *dentry,
}
if (S_ISREG(inode->i_mode) &&
- attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
+ attr->ia_valid & ATTR_SIZE &&
+ (attr->ia_size < inode->i_size ||
+ (inode->i_flags & EXT4_EOFBLOCKS_FL))) {
handle_t *handle;
handle = ext4_journal_start(inode, 3);
@@ -4838,6 +4844,11 @@ int ext4_setattr(struct dentry *dentry,
goto err_out;
}
}
+ if ((inode->i_flags & EXT4_EOFBLOCKS_FL)) {
+ rc = vmtruncate(inode, attr->ia_size);
+ if (rc)
+ goto err_out;
+ }
}
rc = inode_setattr(inode, attr);
Index: linux-2.6.30.5/include/linux/fs.h
===================================================================
--- linux-2.6.30.5.orig/include/linux/fs.h 2009-08-31
12:08:10.000000000 -0700
+++ linux-2.6.30.5/include/linux/fs.h 2009-09-10 21:27:30.000000000 -0700
@@ -343,9 +343,10 @@ struct inodes_stat_t {
#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define FS_EXTENT_FL 0x00080000 /* Extents */
#define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */
+#define FS_EOFBLOCKS_FL 0x00200000 /* Blocks allocated beyond EOF */
#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
-#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
+#define FS_FL_USER_VISIBLE 0x0023DFFF /* User visible flags */
#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
Index: linux-2.6.30.5/fs/ext4/ext4.h
===================================================================
--- linux-2.6.30.5.orig/fs/ext4/ext4.h 2009-08-31 12:08:10.000000000 -0700
+++ linux-2.6.30.5/fs/ext4/ext4.h 2009-09-10 21:28:14.000000000 -0700
@@ -235,9 +235,10 @@ struct flex_groups {
#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */
+#define EXT4_EOFBLOCKS_FL 0x00200000 /* Blocks allocated
beyond EOF (bit reserved in fs.h) */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
+#define EXT4_FL_USER_VISIBLE 0x002BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */
/* Flags that should be inherited by new inodes from their parent. */
Index: linux-2.6.30.5/fs/ext4/extents.c
===================================================================
--- linux-2.6.30.5.orig/fs/ext4/extents.c 2009-09-01 18:14:58.000000000 -0700
+++ linux-2.6.30.5/fs/ext4/extents.c 2009-09-23 22:12:22.000000000 -0700
@@ -2788,7 +2788,7 @@ int ext4_ext_get_blocks(handle_t *handle
{
struct ext4_ext_path *path = NULL;
struct ext4_extent_header *eh;
- struct ext4_extent newex, *ex;
+ struct ext4_extent newex, *ex, *last_ex;
ext4_fsblk_t newblock;
int err = 0, depth, ret, cache_type;
unsigned int allocated = 0;
@@ -2968,6 +2968,14 @@ int ext4_ext_get_blocks(handle_t *handle
newex.ee_len = cpu_to_le16(ar.len);
if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */
ext4_ext_mark_uninitialized(&newex);
+
+ if (unlikely(inode->i_flags & EXT4_EOFBLOCKS_FL)) {
+ BUG_ON(!eh->eh_entries);
+ last_ex = EXT_LAST_EXTENT(eh);
+ if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
+ + ext4_ext_get_actual_len(last_ex))
+ inode->i_flags &= ~EXT4_EOFBLOCKS_FL;
+ }
err = ext4_ext_insert_extent(handle, inode, path, &newex);
if (err) {
/* free data blocks we just allocated */
@@ -3095,6 +3103,13 @@ static void ext4_falloc_update_inode(str
i_size_write(inode, new_size);
if (new_size > EXT4_I(inode)->i_disksize)
ext4_update_i_disksize(inode, new_size);
+ } else {
+ /*
+ * Mark that we allocate beyond EOF so the subsequent truncate
+ * can proceed even if the new size is the same as i_size.
+ */
+ if (new_size > i_size_read(inode))
+ inode->i_flags |= EXT4_EOFBLOCKS_FL;
}
}
Index: linux-2.6.30.5/fs/ext4/ioctl.c
===================================================================
--- linux-2.6.30.5.orig/fs/ext4/ioctl.c 2009-08-16 14:19:38.000000000 -0700
+++ linux-2.6.30.5/fs/ext4/ioctl.c 2009-09-23 22:04:47.000000000 -0700
@@ -92,6 +92,16 @@ long ext4_ioctl(struct file *filp, unsig
flags &= ~EXT4_EXTENTS_FL;
}
+ if (flags & EXT4_EOFBLOCKS_FL) {
+ /* we don't support adding EOFBLOCKS flag */
+ if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
+ err = -EOPNOTSUPP;
+ goto flags_out;
+ }
+ } else if (oldflags & EXT4_EOFBLOCKS_FL)
+ /* free the space reserved with fallocate KEEPSIZE */
+ vmtruncate(inode, inode->i_size);
+
handle = ext4_journal_start(inode, 1);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
Jiaying
On Wed, Sep 2, 2009 at 10:20 PM, Jiaying Zhang <jiayingz@...gle.com> wrote:
>
> On Wed, Sep 2, 2009 at 1:41 AM, Andreas Dilger<adilger@....com> wrote:
> > On Aug 31, 2009 16:33 -0700, Jiaying Zhang wrote:
> >> > EXT4_KEEPSIZE_FL should only be cleared if there were writes to
> >> > the end of the fallocated space. In that regard, I think the name
> >> > of this flag should be changed to something like "EXT4_EOFBLOCKS_FL"
> >> > to indicate that blocks are allocated beyond the end of file (i_size).
> >>
> >> Thanks for catching this! I changed the patch to only clear the flag
> >> when the new_size is larger than i_size and changed the flag name
> >> as you suggested. It would be nice if we only clear the flag when we
> >> write beyond the fallocated space, but this seems hard to detect
> >> because we no longer have the allocated size once that keepsize
> >> fallocate call returns.
> >
> > The problem is that if e2fsck depends on the EXT4_EOFBLOCKS_FL set
> > for fallocate-beyond-EOF then it is worse to clear it than to leave
> > it set. At worst, leaving the flag set results in too many truncates
> > on the file. Clearing the flag when not correct may result in user
> > visible data corruption if the file size is extended...
> >
> >> Here is the new patch:
> >>
> >> --- .pc/fallocate_keepsizse.patch/fs/ext4/extents.c 2009-08-31
> >> 12:08:10.000000000 -0700
> >> +++ fs/ext4/extents.c 2009-08-31 15:51:13.000000000 -0700
> >> @@ -3091,11 +3091,19 @@ static void ext4_falloc_update_inode(str
> >> * the file size.
> >> */
> >> if (!(mode & FALLOC_FL_KEEP_SIZE)) {
> >> + if (new_size > i_size_read(inode)) {
> >> i_size_write(inode, new_size);
> >> + inode->i_flags &= ~EXT4_EOFBLOCKS_FL;
> >
> > This again isn't quite correct, since the EOFBLOCKS_FL shouldn't
> > be cleared unless new_size is beyond the allocated size. The
> > allocation code itself might be a better place to clear this,
> > since it knows whether there were new blocks being added beyond
> > the current max allocated block.
>
> We were thinking to clear this flag when we need to allocate new
> blocks, but I was not sure how to get the current max allocated
> block -- that is mostly because I just started working on the ext4
> code. After digging into the ext4 allocation code today, I think we
> can put the check&clear in ext4_ext_get_blocks:
>
> @@ -2968,6 +2968,14 @@ int ext4_ext_get_blocks(handle_t *handle
> newex.ee_len = cpu_to_le16(ar.len);
> if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */
> ext4_ext_mark_uninitialized(&newex);
> +
> + if (unlikely(inode->i_flags & EXT4_EOFBLOCKS_FL)) {
> + BUG_ON(!eh->eh_entries);
> + last_ex = EXT_LAST_EXTENT(eh);
> + if (iblock + max_blocks > le32_to_cpu(last_ex->ee_block)
> + + ext4_ext_get_actual_len(last_ex))
> + inode->i_flags &= ~EXT4_EOFBLOCKS_FL;
> + }
> err = ext4_ext_insert_extent(handle, inode, path, &newex);
> if (err) {
> /* free data blocks we just allocated */
>
> Again, I just started looking at this part of code, so please let me know
> if I am in the right direction.
>
> Another thing I am not sure is whether we can allocate a non-data block,
> like extended attributes, beyond the current max block without changing
> the i_size. In that case, clearing the EOFBLOCKS flag will be wrong.
>
> >> #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
> >
> > It probably isn't a bad idea to make this flag user-visible, since it
> > would allow scanning for files that have excess space reserved (e.g.
> > if the filesystem is getting full). Making it user-settable (i.e.
> > clearable) would essentially mean truncating the file to i_size without
> > updating the timestamps so that the reserved space is discarded. I
> > don't think there is any value in allowing a user to turn this flag on
> > for a file.
>
> So to make it user-settable, we need to add the handling in ext4_ioctl
> that calls vmtruncate when the flag to be cleared. But how can we get
> the right size to truncate in that case? Can we just set that to the
> max initialized block shift with block size? But that may also truncate
> the blocks reserved without the KEEP_SIZE flag.
>
> Jiaying
>
> >
> > Cheers, Andreas
> > --
> > Andreas Dilger
> > Sr. Staff Engineer, Lustre Group
> > Sun Microsystems of Canada, Inc.
> >
> >
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists