lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <99686926-542d-d558-ce22-e4d5c883b838@redhat.com>
Date:   Mon, 14 Nov 2022 21:46:21 +0800
From:   Xiubo Li <xiubli@...hat.com>
To:     Jeff Layton <jlayton@...nel.org>, ceph-devel@...r.kernel.org,
        idryomov@...il.com, viro@...iv.linux.org.uk
Cc:     lhenriques@...e.de, mchangir@...hat.com,
        linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        stable@...r.kernel.org
Subject: Re: [PATCH 2/2 v2] ceph: use a xarray to record all the opened files
 for each inode


On 14/11/2022 21:39, Jeff Layton wrote:
> On Mon, 2022-11-14 at 13:19 +0800, xiubli@...hat.com wrote:
>> From: Xiubo Li <xiubli@...hat.com>
>>
>> When releasing the file locks the fl->fl_file memory could be
>> already released by another thread in filp_close(), so we couldn't
>> depend on fl->fl_file to get the inode. Just use a xarray to record
>> the opened files for each inode.
>>
>> Cc: stable@...r.kernel.org
>> URL: https://tracker.ceph.com/issues/57986
>> Signed-off-by: Xiubo Li <xiubli@...hat.com>
>> ---
>>   fs/ceph/file.c  |  9 +++++++++
>>   fs/ceph/inode.c |  4 ++++
>>   fs/ceph/locks.c | 17 ++++++++++++++++-
>>   fs/ceph/super.h |  4 ++++
>>   4 files changed, 33 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>> index 85afcbbb5648..cb4a9c52df27 100644
>> --- a/fs/ceph/file.c
>> +++ b/fs/ceph/file.c
>> @@ -231,6 +231,13 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
>>   			fi->flags |= CEPH_F_SYNC;
>>   
>>   		file->private_data = fi;
>> +
>> +		ret = xa_insert(&ci->i_opened_files, (unsigned long)file,
>> +				CEPH_FILP_AVAILABLE, GFP_KERNEL);
>> +		if (ret) {
>> +			kmem_cache_free(ceph_file_cachep, fi);
>> +			return ret;
>> +		}
>>   	}
>>   
>>   	ceph_get_fmode(ci, fmode, 1);
>> @@ -932,6 +939,8 @@ int ceph_release(struct inode *inode, struct file *file)
>>   		dout("release inode %p regular file %p\n", inode, file);
>>   		WARN_ON(!list_empty(&fi->rw_contexts));
>>   
>> +		xa_erase(&ci->i_opened_files, (unsigned long)file);
>> +
>>   		ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE);
>>   		ceph_put_fmode(ci, fi->fmode, 1);
>>   
>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>> index 77b0cd9af370..554450838e44 100644
>> --- a/fs/ceph/inode.c
>> +++ b/fs/ceph/inode.c
>> @@ -619,6 +619,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
>>   	INIT_LIST_HEAD(&ci->i_unsafe_iops);
>>   	spin_lock_init(&ci->i_unsafe_lock);
>>   
>> +	xa_init(&ci->i_opened_files);
>> +
>>   	ci->i_snap_realm = NULL;
>>   	INIT_LIST_HEAD(&ci->i_snap_realm_item);
>>   	INIT_LIST_HEAD(&ci->i_snap_flush_item);
>> @@ -637,6 +639,8 @@ void ceph_free_inode(struct inode *inode)
>>   {
>>   	struct ceph_inode_info *ci = ceph_inode(inode);
>>   
>> +	xa_destroy(&ci->i_opened_files);
>> +
>>   	kfree(ci->i_symlink);
>>   #ifdef CONFIG_FS_ENCRYPTION
>>   	kfree(ci->fscrypt_auth);
>> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
>> index d8385dd0076e..a176a30badd0 100644
>> --- a/fs/ceph/locks.c
>> +++ b/fs/ceph/locks.c
>> @@ -42,9 +42,10 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
>>   
>>   static void ceph_fl_release_lock(struct file_lock *fl)
>>   {
>> -	struct ceph_file_info *fi = fl->fl_file->private_data;
>>   	struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
>>   	struct ceph_inode_info *ci;
>> +	struct ceph_file_info *fi;
>> +	void *val;
>>   
>>   	/*
>>   	 * If inode is NULL it should be a request file_lock,
>> @@ -54,6 +55,20 @@ static void ceph_fl_release_lock(struct file_lock *fl)
>>   		return;
>>   
>>   	ci = ceph_inode(inode);
>> +
>> +	/*
>> +	 * For Posix-style locks, it may race between filp_close()s,
>> +	 * and it's possible that the 'file' memory pointed by
>> +	 * 'fl->fl_file' has been released. If so just skip it.
>> +	 */
>> +	rcu_read_lock();
>> +	val = xa_load(&ci->i_opened_files, (unsigned long)fl->fl_file);
>> +	if (val == CEPH_FILP_AVAILABLE) {
>> +		fi = fl->fl_file->private_data;
>> +		atomic_dec(&fi->num_locks);
> Don't you need to remove the old atomic_dec from this function if you
> move it here?

Yeah, I thought I have removed that. Not sure why I added it back.

>
>> +	}
>> +	rcu_read_unlock();
>> +
>>   	if (atomic_dec_and_test(&ci->i_filelock_ref)) {
>>   		/* clear error when all locks are released */
>>   		spin_lock(&ci->i_ceph_lock);
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index 7b75a84ba48d..b3e89192cbec 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -329,6 +329,8 @@ struct ceph_inode_xattrs_info {
>>   	u64 version, index_version;
>>   };
>>   
>> +#define CEPH_FILP_AVAILABLE         xa_mk_value(1)
>> +
>>   /*
>>    * Ceph inode.
>>    */
>> @@ -434,6 +436,8 @@ struct ceph_inode_info {
>>   	struct list_head i_unsafe_iops;   /* uncommitted mds inode ops */
>>   	spinlock_t i_unsafe_lock;
>>   
>> +	struct xarray		i_opened_files;
>> +
>>   	union {
>>   		struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */
>>   		struct ceph_snapid_map *i_snapid_map; /* snapid -> dev_t */
> This looks like it'll work, but it's a lot of extra work, having to
> track this extra xarray just on the off chance that one of these fd's
> might have file locks. The num_locks field is only checked in one place
> in ceph_get_caps.
>
> Here's what I'd recommend instead:
>
> Have ceph_get_caps look at the lists in inode->i_flctx and see whether
> any of its locks have an fl_file that matches the @filp argument in that
> function. Most inodes never get any file locks, so in most cases  this
> will turn out to just be a NULL pointer check for i_flctx anyway.
>
> Then you can just remove the num_locks field and call the new helper
> from ceph_get_caps instead. I'll send along a proposed patch for the
> helper in a bit.

Sure, Thanks Jeff.

- Xiubo


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ