lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1497405190.2595.3.camel@themaw.net>
Date:   Wed, 14 Jun 2017 09:53:10 +0800
From:   Ian Kent <raven@...maw.net>
To:     Andrei Vagin <avagin@...nvz.org>,
        "Eric W . Biederman" <ebiederm@...ssion.com>
Cc:     Alexander Viro <viro@...iv.linux.org.uk>,
        linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
        Linux Containers <containers@...ts.linux-foundation.org>
Subject: Re: [PATCH RFC] mnt: umount mounts one by one in umount_tree()

On Fri, 2017-05-12 at 00:08 -0700, Andrei Vagin wrote:
> With this patch, we don't try to umount all mounts of a tree together.
> Instead of this we umount them one by one. In this case, we see a significant
> improvement in performance for the worsе case.

Indeed, umount has been very slow for a while now.
Even a moderately large number of mounts (~10000) become painfully slow.

Re you still perusing this?
Anything I can do to help?

Eric, what are your thoughts on this latest attempt?

> 
> The reason of this optimization is that umount() can hold namespace_sem
> for a long time, this semaphore is global, so it affects all users.
> Recently Eric W. Biederman added a per mount namespace limit on the
> number of mounts. The default number of mounts allowed per mount
> namespace at 100,000. Currently this value is allowed to construct a tree
> which requires hours to be umounted.
> 
> In a worse case the current complexity of umount_tree() is O(n^3).
> * Enumirate all mounts in a target tree (propagate_umount)
> * Enumirate mounts to find where these changes have to
>   be propagated (mark_umount_candidates)
> * Enumirate mounts to find a requered mount by parent and dentry
>   (__lookup_mnt). __lookup_mnt() searches a mount in m_hash, but
>   the number of mounts is much bigger than a size of the hash.
> 
> The worse case is when all mounts from the tree live in the same shared
> group. In this case we have to enumirate all mounts on each step.
> 
> There is CVE-2016-6213 about this case.
> 
> Here are results for the kernel with this patch
> $ for i in `seq 10 15`; do  unshare -m sh ./run.sh $i; done
> umount -l /mnt/1 -> 	0:00.00
> umount -l /mnt/1 -> 	0:00.01
> umount -l /mnt/1 -> 	0:00.01
> umount -l /mnt/1 -> 	0:00.03
> umount -l /mnt/1 -> 	0:00.07
> umount -l /mnt/1 -> 	0:00.14
> 
> Here are results for the kernel without this patch
> $ for i in `seq 10 15`; do  unshare -m sh ./run.sh $i; done
> umount -l /mnt/1 -> 	0:00.04
> umount -l /mnt/1 -> 	0:00.17
> umount -l /mnt/1 -> 	0:00.75
> umount -l /mnt/1 -> 	0:05.96
> umount -l /mnt/1 -> 	0:34.40
> umount -l /mnt/1 -> 	3:46.27
> 
> And here is a test script:
> $ cat run.sh
> set -e -m
> 
> mount -t tmpfs zdtm /mnt
> mkdir -p /mnt/1 /mnt/2
> mount -t tmpfs zdtm /mnt/1
> mount --make-shared /mnt/1
> mkdir /mnt/1/1
> 
> for i in `seq $1`; do
> 	./mount --bind /mnt/1/1 /mnt/1/1
> done
> 
> echo -n "umount -l /mnt/1 -> "
> /usr/bin/time -f '%E' ./umount -l /mnt/1
> 
> And we need these simple mount and umount tools, because the standard
> ones read /proc/self/mountinfo, but this is extremely slow when we have
> thousands of mounts.
> $ cat mount.c
>  #include <sys/mount.h>
>  #include <stdlib.h>
> 
>  int main(int argc, char **argv)
>  {
>  	return mount(argv[2], argv[3], NULL, MS_BIND, NULL);
>  }
> 
> $ cat umount.c
>  #include <sys/mount.h>
> 
>  int main(int argc, char **argv)
>  {
>  	return umount2(argv[2], MNT_DETACH);
>  }
> 
> Here is a previous attempt to optimize this code:
> https://lkml.org/lkml/2016/10/10/495
> 
> Signed-off-by: Andrei Vagin <avagin@...nvz.org>
> ---
>  fs/namespace.c | 81 +++++++++++++++++++++++++++++++------------------------
> ---
>  1 file changed, 43 insertions(+), 38 deletions(-)
> 
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 3bf0cd2..4e6f258 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -1474,56 +1474,61 @@ static bool disconnect_mount(struct mount *mnt, enum
> umount_tree_flags how)
>   */
>  static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
>  {
> -	LIST_HEAD(tmp_list);
>  	struct mount *p;
> +	int done = 0;
>  
>  	if (how & UMOUNT_PROPAGATE)
>  		propagate_mount_unlock(mnt);
>  
>  	/* Gather the mounts to umount */
> -	for (p = mnt; p; p = next_mnt(p, mnt)) {
> +	while (!done) {
> +		LIST_HEAD(tmp_list);
> +
> +		p = mnt;
> +		while (!list_empty(&p->mnt_mounts))
> +			p = list_entry(p->mnt_mounts.next, struct mount,
> mnt_child);
> +
>  		p->mnt.mnt_flags |= MNT_UMOUNT;
>  		list_move(&p->mnt_list, &tmp_list);
> -	}
> -
> -	/* Hide the mounts from mnt_mounts */
> -	list_for_each_entry(p, &tmp_list, mnt_list) {
>  		list_del_init(&p->mnt_child);
> -	}
>  
> -	/* Add propogated mounts to the tmp_list */
> -	if (how & UMOUNT_PROPAGATE)
> -		propagate_umount(&tmp_list);
> -
> -	while (!list_empty(&tmp_list)) {
> -		struct mnt_namespace *ns;
> -		bool disconnect;
> -		p = list_first_entry(&tmp_list, struct mount, mnt_list);
> -		list_del_init(&p->mnt_expire);
> -		list_del_init(&p->mnt_list);
> -		ns = p->mnt_ns;
> -		if (ns) {
> -			ns->mounts--;
> -			__touch_mnt_namespace(ns);
> -		}
> -		p->mnt_ns = NULL;
> -		if (how & UMOUNT_SYNC)
> -			p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
> -
> -		disconnect = disconnect_mount(p, how);
> -
> -		pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
> -				 disconnect ? &unmounted : NULL);
> -		if (mnt_has_parent(p)) {
> -			mnt_add_count(p->mnt_parent, -1);
> -			if (!disconnect) {
> -				/* Don't forget about p */
> -				list_add_tail(&p->mnt_child, &p->mnt_parent-
> >mnt_mounts);
> -			} else {
> -				umount_mnt(p);
> +		/* Add propogated mounts to the tmp_list */
> +		if (how & UMOUNT_PROPAGATE)
> +			propagate_umount(&tmp_list);
> +
> +		if (p == mnt)
> +			done = 1;
> +
> +		while (!list_empty(&tmp_list)) {
> +			struct mnt_namespace *ns;
> +			bool disconnect;
> +			p = list_first_entry(&tmp_list, struct mount,
> mnt_list);
> +			list_del_init(&p->mnt_expire);
> +			list_del_init(&p->mnt_list);
> +			ns = p->mnt_ns;
> +			if (ns) {
> +				ns->mounts--;
> +				__touch_mnt_namespace(ns);
> +			}
> +			p->mnt_ns = NULL;
> +			if (how & UMOUNT_SYNC)
> +				p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
> +
> +			disconnect = disconnect_mount(p, how);
> +
> +			pin_insert_group(&p->mnt_umount, &p->mnt_parent->mnt,
> +					 disconnect ? &unmounted : NULL);
> +			if (mnt_has_parent(p)) {
> +				mnt_add_count(p->mnt_parent, -1);
> +				if (!disconnect) {
> +					/* Don't forget about p */
> +					list_add_tail(&p->mnt_child, &p-
> >mnt_parent->mnt_mounts);
> +				} else {
> +					umount_mnt(p);
> +				}
>  			}
> +			change_mnt_propagation(p, MS_PRIVATE);
>  		}
> -		change_mnt_propagation(p, MS_PRIVATE);
>  	}
>  }
>  

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ