linux-kernel - Re: [PATCH 4/7] kernfs: Skip kernfs_drain_open

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <42d482dc-be96-a624-2ecf-3d22f5baed1c@bytedance.com>
Date:   Thu, 25 Aug 2022 20:11:57 +0800
From:   Chengming Zhou <zhouchengming@...edance.com>
To:     Tejun Heo <tj@...nel.org>
Cc:     linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        Johannes Weiner <hannes@...xchg.org>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Imran Khan <imran.f.khan@...cle.com>, kernel-team@...com
Subject: Re: [PATCH 4/7] kernfs: Skip kernfs_drain_open_files() more
 aggressively

On 2022/8/20 08:05, Tejun Heo wrote:
> Track the number of mmapped files and files that need to be released and
> skip kernfs_drain_open_file() if both are zero, which are the precise
> conditions which require draining open_files. The early exit test is
> factored into kernfs_should_drain_open_files() which is now tested by
> kernfs_drain_open_files()'s caller - kernfs_drain().
> 
> This isn't a meaningful optimization on its own but will enable future
> stand-alone kernfs_deactivate() implementation.
> 
> Signed-off-by: Tejun Heo <tj@...nel.org>
> ---
>  fs/kernfs/dir.c             |  3 ++-
>  fs/kernfs/file.c            | 51 +++++++++++++++++++++++++------------
>  fs/kernfs/kernfs-internal.h |  1 +
>  3 files changed, 38 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index 1cc88ba6de90..8ae44db920d4 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -489,7 +489,8 @@ static void kernfs_drain(struct kernfs_node *kn)
>  		rwsem_release(&kn->dep_map, _RET_IP_);
>  	}
>  
> -	kernfs_drain_open_files(kn);
> +	if (kernfs_should_drain_open_files(kn))
> +		kernfs_drain_open_files(kn);
>  
>  	down_write(&root->kernfs_rwsem);
>  }
> diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
> index 7060a2a714b8..b510589af427 100644
> --- a/fs/kernfs/file.c
> +++ b/fs/kernfs/file.c
> @@ -23,6 +23,8 @@ struct kernfs_open_node {
>  	atomic_t		event;
>  	wait_queue_head_t	poll;
>  	struct list_head	files; /* goes through kernfs_open_file.list */
> +	unsigned int		nr_mmapped;
> +	unsigned int		nr_to_release;
>  };
>  
>  /*
> @@ -527,6 +529,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
>  
>  	rc = 0;
>  	of->mmapped = true;
> +	of_on(of)->nr_mmapped++;
>  	of->vm_ops = vma->vm_ops;
>  	vma->vm_ops = &kernfs_vm_ops;
>  out_put:
> @@ -574,6 +577,8 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
>  	}

The current code use kmalloc() to alloc kernfs_open_node, leave nr_to_release and
nr_mmapped uninitialized.

Found by below stress test:

```
cd /sys/fs/cgroup

while true; do echo 0 > cgroup.pressure; echo 1 > cgroup.pressure; done

while true; do cat *.pressure; done

```

Thanks.

>  
>  	list_add_tail(&of->list, &on->files);
> +	if (kn->flags & KERNFS_HAS_RELEASE)
> +		on->nr_to_release++;
>  
>  	mutex_unlock(mutex);
>  	return 0;
> @@ -606,8 +611,12 @@ static void kernfs_unlink_open_file(struct kernfs_node *kn,
>  		return;
>  	}
>  
> -	if (of)
> +	if (of) {
> +		WARN_ON_ONCE((kn->flags & KERNFS_HAS_RELEASE) && !of->released);
> +		if (of->mmapped)
> +			on->nr_mmapped--;
>  		list_del(&of->list);
> +	}
>  
>  	if (list_empty(&on->files)) {
>  		rcu_assign_pointer(kn->attr.open, NULL);
> @@ -766,6 +775,7 @@ static void kernfs_release_file(struct kernfs_node *kn,
>  		 */
>  		kn->attr.ops->release(of);
>  		of->released = true;
> +		of_on(of)->nr_to_release--;
>  	}
>  }
>  
> @@ -790,25 +800,30 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
>  	return 0;
>  }
>  
> -void kernfs_drain_open_files(struct kernfs_node *kn)
> +bool kernfs_should_drain_open_files(struct kernfs_node *kn)
>  {
>  	struct kernfs_open_node *on;
> -	struct kernfs_open_file *of;
> -	struct mutex *mutex;
> -
> -	if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE)))
> -		return;
> +	bool ret;
>  
>  	/*
> -	 * lockless opportunistic check is safe below because no one is adding to
> -	 * ->attr.open at this point of time. This check allows early bail out
> -	 * if ->attr.open is already NULL. kernfs_unlink_open_file makes
> -	 * ->attr.open NULL only while holding kernfs_open_file_mutex so below
> -	 * check under kernfs_open_file_mutex_ptr(kn) will ensure bailing out if
> -	 * ->attr.open became NULL while waiting for the mutex.
> +	 * @kn being deactivated guarantees that @kn->attr.open can't change
> +	 * beneath us making the lockless test below safe.
>  	 */
> -	if (!rcu_access_pointer(kn->attr.open))
> -		return;
> +	WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
> +
> +	rcu_read_lock();
> +	on = rcu_dereference(kn->attr.open);
> +	ret = on && (on->nr_mmapped || on->nr_to_release);
> +	rcu_read_unlock();
> +
> +	return ret;
> +}
> +
> +void kernfs_drain_open_files(struct kernfs_node *kn)
> +{
> +	struct kernfs_open_node *on;
> +	struct kernfs_open_file *of;
> +	struct mutex *mutex;
>  
>  	mutex = kernfs_open_file_mutex_lock(kn);
>  	on = kernfs_deref_open_node_locked(kn);
> @@ -820,13 +835,17 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
>  	list_for_each_entry(of, &on->files, list) {
>  		struct inode *inode = file_inode(of->file);
>  
> -		if (kn->flags & KERNFS_HAS_MMAP)
> +		if (of->mmapped) {
>  			unmap_mapping_range(inode->i_mapping, 0, 0, 1);
> +			of->mmapped = false;
> +			on->nr_mmapped--;
> +		}
>  
>  		if (kn->flags & KERNFS_HAS_RELEASE)
>  			kernfs_release_file(kn, of);
>  	}
>  
> +	WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release);
>  	mutex_unlock(mutex);
>  }
>  
> diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
> index 3ae214d02d44..fc5821effd97 100644
> --- a/fs/kernfs/kernfs-internal.h
> +++ b/fs/kernfs/kernfs-internal.h
> @@ -157,6 +157,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
>   */
>  extern const struct file_operations kernfs_file_fops;
>  
> +bool kernfs_should_drain_open_files(struct kernfs_node *kn);
>  void kernfs_drain_open_files(struct kernfs_node *kn);
>  
>  /*