lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3FF09C2E-2855-457E-8533-50BBD8EECEF1@oracle.com>
Date:   Tue, 7 Sep 2021 15:57:06 +0000
From:   Wengang Wang <wen.gang.wang@...cle.com>
To:     Gang He <ghe@...e.com>
CC:     "mark@...heh.com" <mark@...heh.com>,
        "jlbec@...lplan.org" <jlbec@...lplan.org>,
        Joseph Qi <joseph.qi@...ux.alibaba.com>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "ocfs2-devel@....oracle.com" <ocfs2-devel@....oracle.com>
Subject: Re: [Ocfs2-devel] [PATCH] ocfs2: avoid getting dlm lock of the target
 directory multiple times during reflink process

Hi Gang,

Sure, I will look into the problem you are trying to address. Any bug fix and performance improvement is welcomed!
Well, can you please provide the analysis on the tcpdumps between the (two) nodes that covers the reflink operation with/without your patch to show how you saved dlm locking ping-pongs?

And what cases did you test to get better performance?

thanks,
wengang

> On Aug 30, 2021, at 11:25 PM, Gang He <ghe@...e.com> wrote:
> 
> Hello Joseph and Wengang,
> 
> When you have time, please help review this patch.
> About the deadlock problem which was caused by ocfs2_downconvert_lock 
> failure, we have the fix patch, it is very key.
> But I feel this patch is still useful as a optimization patch, the user
> case is to reflink the files to the same directory concurrently, our 
> users usually backup the files(via reflink) from the cluster nodes 
> concurrently(via crontab) every day/hour.
> The current design, during the reflink process, the node will 
> acquire/release dlm lock of the target directory multiple times,
> this is very inefficient in concurrently reflink.
> 
> 
> Thanks
> Gang
> 
> On 2021/8/26 15:59, Gang He wrote:
>> During the reflink process, we should acquire the target directory
>> inode dlm lock at the beginning, and hold this dlm lock until end
>> of the function.
>> With this patch, we avoid dlm lock ping-pong effect when clone
>> files to the same directory simultaneously from multiple nodes.
>> There is a typical user scenario, users regularly back up files
>> to a specified directory through the reflink feature from the
>> multiple nodes.
>> 
>> Signed-off-by: Gang He <ghe@...e.com>
>> ---
>>  fs/ocfs2/namei.c        | 32 +++++++++++++-------------------
>>  fs/ocfs2/namei.h        |  2 ++
>>  fs/ocfs2/refcounttree.c | 15 +++++++++++----
>>  fs/ocfs2/xattr.c        | 12 +-----------
>>  fs/ocfs2/xattr.h        |  1 +
>>  5 files changed, 28 insertions(+), 34 deletions(-)
>> 
>> diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
>> index 2c46ff6ba4ea..f8bbb22cc60b 100644
>> --- a/fs/ocfs2/namei.c
>> +++ b/fs/ocfs2/namei.c
>> @@ -2489,6 +2489,7 @@ static int ocfs2_prep_new_orphaned_file(struct inode *dir,
>>  }
>> 
>>  int ocfs2_create_inode_in_orphan(struct inode *dir,
>> +				 struct buffer_head **dir_bh,
>>  				 int mode,
>>  				 struct inode **new_inode)
>>  {
>> @@ -2597,13 +2598,16 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
>> 
>>  	brelse(new_di_bh);
>> 
>> -	if (!status)
>> -		*new_inode = inode;
>> -
>>  	ocfs2_free_dir_lookup_result(&orphan_insert);
>> 
>> -	ocfs2_inode_unlock(dir, 1);
>> -	brelse(parent_di_bh);
>> +	if (!status) {
>> +		*new_inode = inode;
>> +		*dir_bh = parent_di_bh;
>> +	} else {
>> +		ocfs2_inode_unlock(dir, 1);
>> +		brelse(parent_di_bh);
>> +	}
>> +
>>  	return status;
>>  }
>> 
>> @@ -2760,11 +2764,11 @@ int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
>>  }
>> 
>>  int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
>> +				   struct buffer_head *dir_bh,
>>  				   struct inode *inode,
>>  				   struct dentry *dentry)
>>  {
>>  	int status = 0;
>> -	struct buffer_head *parent_di_bh = NULL;
>>  	handle_t *handle = NULL;
>>  	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
>>  	struct ocfs2_dinode *dir_di, *di;
>> @@ -2778,14 +2782,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
>>  				(unsigned long long)OCFS2_I(dir)->ip_blkno,
>>  				(unsigned long long)OCFS2_I(inode)->ip_blkno);
>> 
>> -	status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
>> -	if (status < 0) {
>> -		if (status != -ENOENT)
>> -			mlog_errno(status);
>> -		return status;
>> -	}
>> -
>> -	dir_di = (struct ocfs2_dinode *) parent_di_bh->b_data;
>> +	dir_di = (struct ocfs2_dinode *) dir_bh->b_data;
>>  	if (!dir_di->i_links_count) {
>>  		/* can't make a file in a deleted directory. */
>>  		status = -ENOENT;
>> @@ -2798,7 +2795,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
>>  		goto leave;
>> 
>>  	/* get a spot inside the dir. */
>> -	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_di_bh,
>> +	status = ocfs2_prepare_dir_for_insert(osb, dir, dir_bh,
>>  					      dentry->d_name.name,
>>  					      dentry->d_name.len, &lookup);
>>  	if (status < 0) {
>> @@ -2862,7 +2859,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
>>  	ocfs2_journal_dirty(handle, di_bh);
>> 
>>  	status = ocfs2_add_entry(handle, dentry, inode,
>> -				 OCFS2_I(inode)->ip_blkno, parent_di_bh,
>> +				 OCFS2_I(inode)->ip_blkno, dir_bh,
>>  				 &lookup);
>>  	if (status < 0) {
>>  		mlog_errno(status);
>> @@ -2886,10 +2883,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
>>  	iput(orphan_dir_inode);
>>  leave:
>> 
>> -	ocfs2_inode_unlock(dir, 1);
>> -
>>  	brelse(di_bh);
>> -	brelse(parent_di_bh);
>>  	brelse(orphan_dir_bh);
>> 
>>  	ocfs2_free_dir_lookup_result(&lookup);
>> diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h
>> index 9cc891eb874e..03a2c526e2c1 100644
>> --- a/fs/ocfs2/namei.h
>> +++ b/fs/ocfs2/namei.h
>> @@ -24,6 +24,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
>>  		     struct buffer_head *orphan_dir_bh,
>>  		     bool dio);
>>  int ocfs2_create_inode_in_orphan(struct inode *dir,
>> +				 struct buffer_head **dir_bh,
>>  				 int mode,
>>  				 struct inode **new_inode);
>>  int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
>> @@ -32,6 +33,7 @@ int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
>>  		struct inode *inode, struct buffer_head *di_bh,
>>  		int update_isize, loff_t end);
>>  int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
>> +				   struct buffer_head *dir_bh,
>>  				   struct inode *new_inode,
>>  				   struct dentry *new_dentry);
>> 
>> diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
>> index 7f6355cbb587..a9a0c7c37e8e 100644
>> --- a/fs/ocfs2/refcounttree.c
>> +++ b/fs/ocfs2/refcounttree.c
>> @@ -4250,7 +4250,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
>>  {
>>  	int error, had_lock;
>>  	struct inode *inode = d_inode(old_dentry);
>> -	struct buffer_head *old_bh = NULL;
>> +	struct buffer_head *old_bh = NULL, *dir_bh = NULL;
>>  	struct inode *new_orphan_inode = NULL;
>>  	struct ocfs2_lock_holder oh;
>> 
>> @@ -4258,7 +4258,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
>>  		return -EOPNOTSUPP;
>> 
>> 
>> -	error = ocfs2_create_inode_in_orphan(dir, inode->i_mode,
>> +	error = ocfs2_create_inode_in_orphan(dir, &dir_bh, inode->i_mode,
>>  					     &new_orphan_inode);
>>  	if (error) {
>>  		mlog_errno(error);
>> @@ -4304,13 +4304,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
>> 
>>  	/* If the security isn't preserved, we need to re-initialize them. */
>>  	if (!preserve) {
>> -		error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
>> +		error = ocfs2_init_security_and_acl(dir, dir_bh,
>> +						    new_orphan_inode,
>>  						    &new_dentry->d_name);
>>  		if (error)
>>  			mlog_errno(error);
>>  	}
>>  	if (!error) {
>> -		error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
>> +		error = ocfs2_mv_orphaned_inode_to_new(dir, dir_bh,
>> +						       new_orphan_inode,
>>  						       new_dentry);
>>  		if (error)
>>  			mlog_errno(error);
>> @@ -4328,6 +4330,11 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
>>  			iput(new_orphan_inode);
>>  	}
>> 
>> +	if (dir_bh) {
>> +		ocfs2_inode_unlock(dir, 1);
>> +		brelse(dir_bh);
>> +	}
>> +
>>  	return error;
>>  }
>> 
>> diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
>> index dd784eb0cd7c..3f23e3a5018c 100644
>> --- a/fs/ocfs2/xattr.c
>> +++ b/fs/ocfs2/xattr.c
>> @@ -7203,16 +7203,13 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
>>  /*
>>   * Initialize security and acl for a already created inode.
>>   * Used for reflink a non-preserve-security file.
>> - *
>> - * It uses common api like ocfs2_xattr_set, so the caller
>> - * must not hold any lock expect i_mutex.
>>   */
>>  int ocfs2_init_security_and_acl(struct inode *dir,
>> +				struct buffer_head *dir_bh,
>>  				struct inode *inode,
>>  				const struct qstr *qstr)
>>  {
>>  	int ret = 0;
>> -	struct buffer_head *dir_bh = NULL;
>> 
>>  	ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
>>  	if (ret) {
>> @@ -7220,17 +7217,10 @@ int ocfs2_init_security_and_acl(struct inode *dir,
>>  		goto leave;
>>  	}
>> 
>> -	ret = ocfs2_inode_lock(dir, &dir_bh, 0);
>> -	if (ret) {
>> -		mlog_errno(ret);
>> -		goto leave;
>> -	}
>>  	ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
>>  	if (ret)
>>  		mlog_errno(ret);
>> 
>> -	ocfs2_inode_unlock(dir, 0);
>> -	brelse(dir_bh);
>>  leave:
>>  	return ret;
>>  }
>> diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
>> index 00308b57f64f..b27fd8ba0019 100644
>> --- a/fs/ocfs2/xattr.h
>> +++ b/fs/ocfs2/xattr.h
>> @@ -83,6 +83,7 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
>>  			 struct buffer_head *new_bh,
>>  			 bool preserve_security);
>>  int ocfs2_init_security_and_acl(struct inode *dir,
>> +				struct buffer_head *dir_bh,
>>  				struct inode *inode,
>>  				const struct qstr *qstr);
>>  #endif /* OCFS2_XATTR_H */
>> 
> 
> 
> _______________________________________________
> Ocfs2-devel mailing list
> Ocfs2-devel@....oracle.com
> https://oss.oracle.com/mailman/listinfo/ocfs2-devel

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ