[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20170805225908.g2lcjzsdrkde4qse@thunk.org>
Date: Sat, 5 Aug 2017 18:59:08 -0400
From: Theodore Ts'o <tytso@....edu>
To: Tahsin Erdogan <tahsin@...gle.com>
Cc: Andreas Dilger <adilger@...ger.ca>,
Emoly Liu <emoly.liu@...el.com>, linux-ext4@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH] ext4: backward compatibility support for Lustre ea_inode
implementation
Andreas, Emoly,
I'd appreciate your thoughts; what do you think about Tahsin's patch.
Could you give it a try on a file system created with Lustre and let
me know how it works for you?
Many thanks!!
- Ted
On Mon, Jul 24, 2017 at 01:13:08PM -0700, Tahsin Erdogan wrote:
> Original Lustre ea_inode feature did not have ref counts on xattr inodes
> because there was always one parent that referenced it. New
> implementation expects ref count to be initialized which is not true for
> Lustre case. Handle this by detecting Lustre created xattr inode and set
> its ref count to 1.
>
> The quota handling of xattr inodes have also changed with deduplication
> support. New implementation manually manages quotas to support sharing
> across multiple users. A consequence is that, a referencing inode
> incorporates the blocks of xattr inode into its own i_block field.
>
> We need to know how a xattr inode was created so that we can reverse the
> block charges during reference removal. This is handled by introducing a
> EXT4_STATE_LUSTRE_EA_INODE flag. The flag is set on a xattr inode if
> inode appears to have been created by Lustre. During xattr inode reference
> removal, the manual quota uncharge is skipped if the flag is set.
>
> Signed-off-by: Tahsin Erdogan <tahsin@...gle.com>
> ---
> fs/ext4/ext4.h | 1 +
> fs/ext4/inode.c | 8 ----
> fs/ext4/xattr.c | 141 +++++++++++++++++++++++++++++++++++++-------------------
> 3 files changed, 94 insertions(+), 56 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index e9440ed605c0..21e8b1dea958 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1569,6 +1569,7 @@ enum {
> nolocking */
> EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
> EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
> + EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */
> };
>
> #define EXT4_INODE_BIT_FNS(name, field, offset) \
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 70699940e20d..cebb6e60a8af 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4897,14 +4897,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
> brelse(iloc.bh);
> ext4_set_inode_flags(inode);
>
> - if (ei->i_flags & EXT4_EA_INODE_FL) {
> - ext4_xattr_inode_set_class(inode);
> -
> - inode_lock(inode);
> - inode->i_flags |= S_NOQUOTA;
> - inode_unlock(inode);
> - }
> -
> unlock_new_inode(inode);
> return inode;
>
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index 949b4ea3ff58..415be4a88cc3 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -354,8 +354,10 @@ static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
> return ret;
> }
>
> +#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec)
> +
> static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
> - struct inode **ea_inode)
> + u32 ea_inode_hash, struct inode **ea_inode)
> {
> struct inode *inode;
> int err;
> @@ -385,6 +387,24 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
> goto error;
> }
>
> + ext4_xattr_inode_set_class(inode);
> +
> + /*
> + * Check whether this is an old Lustre-style xattr inode. Lustre
> + * implementation does not have hash validation, rather it has a
> + * backpointer from ea_inode to the parent inode.
> + */
> + if (ea_inode_hash != ext4_xattr_inode_get_hash(inode) &&
> + EXT4_XATTR_INODE_GET_PARENT(inode) == parent->i_ino &&
> + inode->i_generation == parent->i_generation) {
> + ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE);
> + ext4_xattr_inode_set_ref(inode, 1);
> + } else {
> + inode_lock(inode);
> + inode->i_flags |= S_NOQUOTA;
> + inode_unlock(inode);
> + }
> +
> *ea_inode = inode;
> return 0;
> error:
> @@ -417,8 +437,6 @@ ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
> return 0;
> }
>
> -#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec)
> -
> /*
> * Read xattr value from the EA inode.
> */
> @@ -431,7 +449,7 @@ ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry,
> int err;
>
> err = ext4_xattr_inode_iget(inode, le32_to_cpu(entry->e_value_inum),
> - &ea_inode);
> + le32_to_cpu(entry->e_hash), &ea_inode);
> if (err) {
> ea_inode = NULL;
> goto out;
> @@ -449,29 +467,20 @@ ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry,
> if (err)
> goto out;
>
> - err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer, size);
> - /*
> - * Compatibility check for old Lustre ea_inode implementation. Old
> - * version does not have hash validation, but it has a backpointer
> - * from ea_inode to the parent inode.
> - */
> - if (err == -EFSCORRUPTED) {
> - if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != inode->i_ino ||
> - ea_inode->i_generation != inode->i_generation) {
> + if (!ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE)) {
> + err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer,
> + size);
> + if (err) {
> ext4_warning_inode(ea_inode,
> "EA inode hash validation failed");
> goto out;
> }
> - /* Do not add ea_inode to the cache. */
> - ea_inode_cache = NULL;
> - err = 0;
> - } else if (err)
> - goto out;
>
> - if (ea_inode_cache)
> - mb_cache_entry_create(ea_inode_cache, GFP_NOFS,
> - ext4_xattr_inode_get_hash(ea_inode),
> - ea_inode->i_ino, true /* reusable */);
> + if (ea_inode_cache)
> + mb_cache_entry_create(ea_inode_cache, GFP_NOFS,
> + ext4_xattr_inode_get_hash(ea_inode),
> + ea_inode->i_ino, true /* reusable */);
> + }
> out:
> iput(ea_inode);
> return err;
> @@ -838,10 +847,15 @@ static int ext4_xattr_inode_alloc_quota(struct inode *inode, size_t len)
> return err;
> }
>
> -static void ext4_xattr_inode_free_quota(struct inode *inode, size_t len)
> +static void ext4_xattr_inode_free_quota(struct inode *parent,
> + struct inode *ea_inode,
> + size_t len)
> {
> - dquot_free_space_nodirty(inode, round_up_cluster(inode, len));
> - dquot_free_inode(inode);
> + if (ea_inode &&
> + ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE))
> + return;
> + dquot_free_space_nodirty(parent, round_up_cluster(parent, len));
> + dquot_free_inode(parent);
> }
>
> int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
> @@ -1071,7 +1085,9 @@ static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
> if (!entry->e_value_inum)
> continue;
> ea_ino = le32_to_cpu(entry->e_value_inum);
> - err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
> + err = ext4_xattr_inode_iget(parent, ea_ino,
> + le32_to_cpu(entry->e_hash),
> + &ea_inode);
> if (err)
> goto cleanup;
> err = ext4_xattr_inode_inc_ref(handle, ea_inode);
> @@ -1093,7 +1109,9 @@ static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
> if (!entry->e_value_inum)
> continue;
> ea_ino = le32_to_cpu(entry->e_value_inum);
> - err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
> + err = ext4_xattr_inode_iget(parent, ea_ino,
> + le32_to_cpu(entry->e_hash),
> + &ea_inode);
> if (err) {
> ext4_warning(parent->i_sb,
> "cleanup ea_ino %u iget error %d", ea_ino,
> @@ -1131,7 +1149,9 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
> if (!entry->e_value_inum)
> continue;
> ea_ino = le32_to_cpu(entry->e_value_inum);
> - err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
> + err = ext4_xattr_inode_iget(parent, ea_ino,
> + le32_to_cpu(entry->e_hash),
> + &ea_inode);
> if (err)
> continue;
>
> @@ -1159,7 +1179,7 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
> }
>
> if (!skip_quota)
> - ext4_xattr_inode_free_quota(parent,
> + ext4_xattr_inode_free_quota(parent, ea_inode,
> le32_to_cpu(entry->e_value_size));
>
> /*
> @@ -1591,6 +1611,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
> if (!s->not_found && here->e_value_inum) {
> ret = ext4_xattr_inode_iget(inode,
> le32_to_cpu(here->e_value_inum),
> + le32_to_cpu(here->e_hash),
> &old_ea_inode);
> if (ret) {
> old_ea_inode = NULL;
> @@ -1609,7 +1630,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
> &new_ea_inode);
> if (ret) {
> new_ea_inode = NULL;
> - ext4_xattr_inode_free_quota(inode, i->value_len);
> + ext4_xattr_inode_free_quota(inode, NULL, i->value_len);
> goto out;
> }
> }
> @@ -1628,13 +1649,13 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
> ext4_warning_inode(new_ea_inode,
> "dec ref new_ea_inode err=%d",
> err);
> - ext4_xattr_inode_free_quota(inode,
> + ext4_xattr_inode_free_quota(inode, new_ea_inode,
> i->value_len);
> }
> goto out;
> }
>
> - ext4_xattr_inode_free_quota(inode,
> + ext4_xattr_inode_free_quota(inode, old_ea_inode,
> le32_to_cpu(here->e_value_size));
> }
>
> @@ -1803,8 +1824,10 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
> struct mb_cache_entry *ce = NULL;
> int error = 0;
> struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
> - struct inode *ea_inode = NULL;
> - size_t old_ea_inode_size = 0;
> + struct inode *ea_inode = NULL, *tmp_inode;
> + size_t old_ea_inode_quota = 0;
> + unsigned int ea_ino;
> +
>
> #define header(x) ((struct ext4_xattr_header *)(x))
>
> @@ -1866,12 +1889,24 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
> * like it has an empty value.
> */
> if (!s->not_found && s->here->e_value_inum) {
> - /*
> - * Defer quota free call for previous inode
> - * until success is guaranteed.
> - */
> - old_ea_inode_size = le32_to_cpu(
> + ea_ino = le32_to_cpu(s->here->e_value_inum);
> + error = ext4_xattr_inode_iget(inode, ea_ino,
> + le32_to_cpu(s->here->e_hash),
> + &tmp_inode);
> + if (error)
> + goto cleanup;
> +
> + if (!ext4_test_inode_state(tmp_inode,
> + EXT4_STATE_LUSTRE_EA_INODE)) {
> + /*
> + * Defer quota free call for previous
> + * inode until success is guaranteed.
> + */
> + old_ea_inode_quota = le32_to_cpu(
> s->here->e_value_size);
> + }
> + iput(tmp_inode);
> +
> s->here->e_value_inum = 0;
> s->here->e_value_size = 0;
> }
> @@ -1898,8 +1933,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
> goto cleanup;
>
> if (i->value && s->here->e_value_inum) {
> - unsigned int ea_ino;
> -
> /*
> * A ref count on ea_inode has been taken as part of the call to
> * ext4_xattr_set_entry() above. We would like to drop this
> @@ -1907,7 +1940,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
> * initialized and has its own ref count on the ea_inode.
> */
> ea_ino = le32_to_cpu(s->here->e_value_inum);
> - error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
> + error = ext4_xattr_inode_iget(inode, ea_ino,
> + le32_to_cpu(s->here->e_hash),
> + &ea_inode);
> if (error) {
> ea_inode = NULL;
> goto cleanup;
> @@ -2056,8 +2091,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
> }
> }
>
> - if (old_ea_inode_size)
> - ext4_xattr_inode_free_quota(inode, old_ea_inode_size);
> + if (old_ea_inode_quota)
> + ext4_xattr_inode_free_quota(inode, NULL, old_ea_inode_quota);
>
> /* Update the inode. */
> EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
> @@ -2084,7 +2119,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
>
> /* If there was an error, revert the quota charge. */
> if (error)
> - ext4_xattr_inode_free_quota(inode,
> + ext4_xattr_inode_free_quota(inode, ea_inode,
> i_size_read(ea_inode));
> iput(ea_inode);
> }
> @@ -2807,6 +2842,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
> struct ext4_xattr_ibody_header *header;
> struct ext4_iloc iloc = { .bh = NULL };
> struct ext4_xattr_entry *entry;
> + struct inode *ea_inode;
> int error;
>
> error = ext4_xattr_ensure_credits(handle, inode, extra_credits,
> @@ -2861,10 +2897,19 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
>
> if (ext4_has_feature_ea_inode(inode->i_sb)) {
> for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
> - entry = EXT4_XATTR_NEXT(entry))
> - if (entry->e_value_inum)
> - ext4_xattr_inode_free_quota(inode,
> + entry = EXT4_XATTR_NEXT(entry)) {
> + if (!entry->e_value_inum)
> + continue;
> + error = ext4_xattr_inode_iget(inode,
> + le32_to_cpu(entry->e_value_inum),
> + le32_to_cpu(entry->e_hash),
> + &ea_inode);
> + if (error)
> + continue;
> + ext4_xattr_inode_free_quota(inode, ea_inode,
> le32_to_cpu(entry->e_value_size));
> + iput(ea_inode);
> + }
>
> }
>
> --
> 2.14.0.rc0.284.gd933b75aa4-goog
>
Powered by blists - more mailing lists