lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <537B1353.8060704@hp.com>
Date:	Tue, 20 May 2014 02:33:23 -0600
From:	Thavatchai Makphaibulchoke <thavatchai.makpahibulchoke@...com>
To:	Jan Kara <jack@...e.cz>, linux-ext4@...r.kernel.org
CC:	Ted Tso <tytso@....edu>
Subject: Re: [PATCH 2/2] ext4: Reduce contention on s_orphan_lock

Please see my one comment below.

BTW, I've run aim7 on your before I notice what I commented below.  There are workloads that my patch outperform yours and vice versa.  I will have to redo it over again.

On 05/15/2014 02:17 PM, Jan Kara wrote:
> Shuffle code around in ext4_orphan_add() and ext4_orphan_del() so that
> we avoid taking global s_orphan_lock in some cases and hold it for
> shorter time in other cases.
> 
> Signed-off-by: Jan Kara <jack@...e.cz>
> ---
>  fs/ext4/namei.c | 109 +++++++++++++++++++++++++++++++++-----------------------
>  1 file changed, 65 insertions(+), 44 deletions(-)
> 
> diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
> index 5fcaa85b6dc5..0486fbafb808 100644
> --- a/fs/ext4/namei.c
> +++ b/fs/ext4/namei.c
> @@ -2539,13 +2539,17 @@ static int empty_dir(struct inode *inode)
>  	return 1;
>  }
>  
> -/* ext4_orphan_add() links an unlinked or truncated inode into a list of
> +/*
> + * ext4_orphan_add() links an unlinked or truncated inode into a list of
>   * such inodes, starting at the superblock, in case we crash before the
>   * file is closed/deleted, or in case the inode truncate spans multiple
>   * transactions and the last transaction is not recovered after a crash.
>   *
>   * At filesystem recovery time, we walk this list deleting unlinked
>   * inodes and truncating linked inodes in ext4_orphan_cleanup().
> + *
> + * Orphan list manipulation functions must be called under i_mutex unless
> + * we are just creating the inode or deleting it.
>   */
>  int ext4_orphan_add(handle_t *handle, struct inode *inode)
>  {
> @@ -2553,13 +2557,19 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
>  	struct ext4_sb_info *sbi = EXT4_SB(sb);
>  	struct ext4_iloc iloc;
>  	int err = 0, rc;
> +	bool dirty = false;
>  
>  	if (!sbi->s_journal)
>  		return 0;
>  
> -	mutex_lock(&sbi->s_orphan_lock);
> +	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
> +		     !mutex_is_locked(&inode->i_mutex));
> +	/*
> +	 * Exit early if inode already is on orphan list. This is a big speedup
> +	 * since we don't have to contend on the global s_orphan_lock.
> +	 */
>  	if (!list_empty(&EXT4_I(inode)->i_orphan))
> -		goto out_unlock;
> +		return 0;
>  
>  	/*
>  	 * Orphan handling is only valid for files with data blocks
> @@ -2573,44 +2583,47 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
>  	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
>  	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
>  	if (err)
> -		goto out_unlock;
> +		goto out;
>  
>  	err = ext4_reserve_inode_write(handle, inode, &iloc);
>  	if (err)
> -		goto out_unlock;
> +		goto out;
> +
> +	mutex_lock(&sbi->s_orphan_lock);
>  	/*
>  	 * Due to previous errors inode may be already a part of on-disk
>  	 * orphan list. If so skip on-disk list modification.
>  	 */
> -	if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <=
> -		(le32_to_cpu(sbi->s_es->s_inodes_count)))
> -			goto mem_insert;
> -
> -	/* Insert this inode at the head of the on-disk orphan list... */
> -	NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
> -	sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
> -	err = ext4_handle_dirty_super(handle, sb);
> -	rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
> -	if (!err)
> -		err = rc;
> -
> -	/* Only add to the head of the in-memory list if all the
> -	 * previous operations succeeded.  If the orphan_add is going to
> -	 * fail (possibly taking the journal offline), we can't risk
> -	 * leaving the inode on the orphan list: stray orphan-list
> -	 * entries can cause panics at unmount time.
> -	 *
> -	 * This is safe: on error we're going to ignore the orphan list
> -	 * anyway on the next recovery. */
> -mem_insert:
> -	if (!err)
> -		list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
> +	if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) >
> +	    (le32_to_cpu(sbi->s_es->s_inodes_count))) {
> +		/* Insert this inode at the head of the on-disk orphan list */
> +		NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
> +		sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
> +		dirty = true;
> +	}
> +	list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
> +	mutex_unlock(&sbi->s_orphan_lock);
>  
> +	if (dirty) {
> +		err = ext4_handle_dirty_super(handle, sb);
> +		rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
> +		if (!err)
> +			err = rc;
> +		if (err) {
> +			/*
> +			 * We have to remove inode from in-memory list if
> + 			 * addition to on disk orphan list failed. Stray orphan
> +			 * list entries can cause panics at unmount time.
> +			 */
> +			mutex_lock(&sbi->s_orphan_lock);
> +			list_del(&EXT4_I(inode)->i_orphan);
> +			mutex_unlock(&sbi->s_orphan_lock);
> +		}
> +	}
>  	jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
>  	jbd_debug(4, "orphan inode %lu will point to %d\n",
>  			inode->i_ino, NEXT_ORPHAN(inode));
> -out_unlock:
> -	mutex_unlock(&sbi->s_orphan_lock);
> +out:
>  	ext4_std_error(sb, err);
>  	return err;
>  }
> @@ -2631,13 +2644,18 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
>  	if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))
>  		return 0;
>  
> -	mutex_lock(&sbi->s_orphan_lock);
> +	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
> +		     !mutex_is_locked(&inode->i_mutex));
> +	/* Do this quick check before taking global s_orphan_lock. */
>  	if (list_empty(&ei->i_orphan))
> -		goto out;
> +		return 0;
>  
> -	ino_next = NEXT_ORPHAN(inode);
> -	prev = ei->i_orphan.prev;
> +	if (handle) {
> +		/* Grab inode buffer early before taking global s_orphan_lock */
> +		err = ext4_reserve_inode_write(handle, inode, &iloc);
> +	}
>  
> +	mutex_lock(&sbi->s_orphan_lock);
>  	jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
> 

Should set prev = ei->i_orphan.prev; here, instead of down below where it has already been removed from the list.

Thanks,
Mak.
 
>  	list_del_init(&ei->i_orphan);
> @@ -2646,20 +2664,23 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
>  	 * transaction handle with which to update the orphan list on
>  	 * disk, but we still need to remove the inode from the linked
>  	 * list in memory. */
> -	if (!handle)
> -		goto out;
> -
> -	err = ext4_reserve_inode_write(handle, inode, &iloc);
> -	if (err)
> +	if (!handle || err) {
> +		mutex_unlock(&sbi->s_orphan_lock);
>  		goto out_err;
> +	}
>  
> +	ino_next = NEXT_ORPHAN(inode);
> +	prev = ei->i_orphan.prev;

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists