linux-kernel - Re: [RFC PATCH] shm: extend forced shm destroy to support objects from several IPC nses

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <bd0a1f71-4624-d88a-98a8-6550926349b3@colorfullife.com>
Date:   Thu, 22 Jul 2021 20:46:14 +0200
From:   Manfred Spraul <manfred@...orfullife.com>
To:     Alexander Mikhalitsyn <alexander.mikhalitsyn@...tuozzo.com>,
        linux-kernel@...r.kernel.org
Cc:     "Eric W . Biederman" <ebiederm@...ssion.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Davidlohr Bueso <dave@...olabs.net>,
        Johannes Weiner <hannes@...xchg.org>,
        Michal Hocko <mhocko@...e.com>,
        Vladimir Davydov <vdavydov.dev@...il.com>,
        Andrei Vagin <avagin@...il.com>,
        Christian Brauner <christian@...uner.io>,
        Pavel Tikhomirov <ptikhomirov@...tuozzo.com>,
        Alexander Mikhalitsyn <alexander@...alicyn.com>
Subject: Re: [RFC PATCH] shm: extend forced shm destroy to support objects
 from several IPC nses

Hi Alexander,

A few more remarks.

On 7/14/21 7:30 PM, Alexander Mikhalitsyn wrote:
> This is total rework of fix.
> Thanks to Eric Biederman for suggestions (but may be I've misunderstood some of them :))
>
> I've tested it with reproducer of the original problem. But of course it needs
> detailed testing. I hope that I get some general comments about design and implementation.
>
> ToDo: remove unneeded "ns" argument from shm_destroy, shm_rmid and other functions.

What ensures the that shp->ns is not destroyed prematurely?

I did some tests, and it seems that shmat() acquires a namespace 
refcount, and shm_release() puts it again, and the shm_release is late 
enough to ensure that the ns cannot go out of scope.

But I haven't checked all combinations (with/without shm_rmid_forced, 
delete via exit(), shmctl(), shmdt(), mmap()).

And: This should be documented somewhere.


> --- a/ipc/shm.c
> +++ b/ipc/shm.c
> @@ -65,6 +65,7 @@ struct shmid_kernel /* private to the kernel */
>   	/* The task created the shm object.  NULL if the task is dead. */
>   	struct task_struct	*shm_creator;
>   	struct list_head	shm_clist;	/* list by creator */

I think the comments are wrong/outdated.

Some parts of the new code checks with list_empty(shm_clist), not by 
looking at shm_creator.

> +	struct ipc_namespace	*ns;
>   } __randomize_layout;
>   
>   /* shm_mode upper byte flags */
> @@ -115,6 +116,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
>   	struct shmid_kernel *shp;
>   
>   	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
> +	BUG_ON(shp->ns && ns != shp->ns);

Is shp->ns == NULL allowed/possible? From what I see, it is impossible.

I think we should not have  NULL check in a few codepaths, but not in 
other codepaths. Either everywhere, or nowhere.


>   
>   	if (shp->shm_nattch) {
>   		shp->shm_perm.mode |= SHM_DEST;
> @@ -225,9 +227,32 @@ static void shm_rcu_free(struct rcu_head *head)
>   	kfree(shp);
>   }
>   
> +static inline void task_shm_clist_lock(struct task_struct *task)
> +{
> +	spin_lock(&task->sysvshm.shm_clist_lock);
> +}
> +
> +static inline void task_shm_clist_unlock(struct task_struct *task)
> +{
> +	spin_unlock(&task->sysvshm.shm_clist_lock);
> +}
> +
> +void shm_clist_rm(struct shmid_kernel *shp)
> +{
> +	if (!shp->shm_creator)
> +		return;
> +
> +	task_shm_clist_lock(shp->shm_creator);
> +	list_del_init(&shp->shm_clist);
> +	task_shm_clist_unlock(shp->shm_creator);
> +	shp->shm_creator = NULL;
> +}
> +
>   static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
>   {
> -	list_del(&s->shm_clist);
> +	WARN_ON(s->ns && ns != s->ns);
> +	//list_del_init(&s->shm_clist);
> +	shm_clist_rm(s);
>   	ipc_rmid(&shm_ids(ns), &s->shm_perm);
>   }
>   
> @@ -306,10 +331,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
>    *
>    * 2) sysctl kernel.shm_rmid_forced is set to 1.
>    */
> -static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
> +static bool shm_may_destroy(struct shmid_kernel *shp)
>   {
>   	return (shp->shm_nattch == 0) &&
> -	       (ns->shm_rmid_forced ||
> +	       (shp->ns->shm_rmid_forced ||
>   		(shp->shm_perm.mode & SHM_DEST));
>   }
>   
As written before: what ensures that shp->ns->shm_rmid_forced was not 
released already?
> @@ -340,7 +365,7 @@ static void shm_close(struct vm_area_struct *vma)
>   	ipc_update_pid(&shp->shm_lprid, task_tgid(current));
>   	shp->shm_dtim = ktime_get_real_seconds();
>   	shp->shm_nattch--;
> -	if (shm_may_destroy(ns, shp))
> +	if (shm_may_destroy(shp))
>   		shm_destroy(ns, shp);
>   	else
>   		shm_unlock(shp);
> @@ -361,10 +386,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
>   	 *
>   	 * As shp->* are changed under rwsem, it's safe to skip shp locking.
>   	 */
> -	if (shp->shm_creator != NULL)
> +	if (!list_empty(&shp->shm_clist))
>   		return 0;
>   
This collides with the comment above: here, list_empty() is used.
> -	if (shm_may_destroy(ns, shp)) {
> +	if (shm_may_destroy(shp)) {
>   		shm_lock_by_ptr(shp);
>   		shm_destroy(ns, shp);
>   	}
> @@ -379,51 +404,77 @@ void shm_destroy_orphaned(struct ipc_namespace *ns)
>   	up_write(&shm_ids(ns).rwsem);
>   }
>   
> +void shm_init_task(struct task_struct *task)
> +{
> +	INIT_LIST_HEAD(&(task)->sysvshm.shm_clist);
> +	spin_lock_init(&task->sysvshm.shm_clist_lock);
> +}
> +
>   /* Locking assumes this will only be called with task == current */
>   void exit_shm(struct task_struct *task)
>   {
> -	struct ipc_namespace *ns = task->nsproxy->ipc_ns;
> +	LIST_HEAD(tmp);
>   	struct shmid_kernel *shp, *n;
>   
>   	if (list_empty(&task->sysvshm.shm_clist))
>   		return;
>   
> -	/*
> -	 * If kernel.shm_rmid_forced is not set then only keep track of
> -	 * which shmids are orphaned, so that a later set of the sysctl
> -	 * can clean them up.
> -	 */
> -	if (!ns->shm_rmid_forced) {
> -		down_read(&shm_ids(ns).rwsem);
> -		list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
> -			shp->shm_creator = NULL;
> -		/*
> -		 * Only under read lock but we are only called on current
> -		 * so no entry on the list will be shared.
> -		 */
> -		list_del(&task->sysvshm.shm_clist);
> -		up_read(&shm_ids(ns).rwsem);
> -		return;
> -	}
> +	rcu_read_lock(); /* for refcount_inc_not_zero */
> +	task_shm_clist_lock(task);
>   
> -	/*
> -	 * Destroy all already created segments, that were not yet mapped,
> -	 * and mark any mapped as orphan to cover the sysctl toggling.
> -	 * Destroy is skipped if shm_may_destroy() returns false.
> -	 */
> -	down_write(&shm_ids(ns).rwsem);
>   	list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
> +		struct ipc_namespace *ns = shp->ns;
> +
> +		/*
> +		 * Remove shm from task list and nullify shm_creator which
> +		 * marks object as orphaned.
> +		 *
> +		 * If kernel.shm_rmid_forced is not set then only keep track of
> +		 * which shmids are orphaned, so that a later set of the sysctl
> +		 * can clean them up.
> +		 */
> +		list_del_init(&shp->shm_clist);
>   		shp->shm_creator = NULL;
>   
> -		if (shm_may_destroy(ns, shp)) {
> -			shm_lock_by_ptr(shp);
> -			shm_destroy(ns, shp);
> +		printk("exit_shm() %px refcnt=%u, id=%d,key=%x\n", shp,
> +			refcount_read(&shp->shm_perm.refcount),
> +			shp->shm_perm.id, shp->shm_perm.key
> +		);
> +
> +		/*
> +		 * Will destroy all already created segments, that were not yet mapped,
> +		 * and mark any mapped as orphan to cover the sysctl toggling.
> +		 * Destroy is skipped if shm_may_destroy() returns false.
> +		 */
> +		if (shp->ns->shm_rmid_forced && shm_may_destroy(shp)) {
> +			/*
> +			 * We may race with shm_exit_ns() if refcounter
> +			 * already zero. Let's skip shm_destroy() of such
> +			 * shm object as it will be destroyed during shm_exit_ns()
> +			 */
> +			if (!refcount_inc_not_zero(&ns->ns.count)) /* get_ipc_ns */
> +				continue;
> +
> +			list_add(&shp->shm_clist, &tmp);
>   		}
>   	}
>   
> -	/* Remove the list head from any segments still attached. */
>   	list_del(&task->sysvshm.shm_clist);
> -	up_write(&shm_ids(ns).rwsem);
> +	task_shm_clist_unlock(task);
> +	rcu_read_unlock();
> +
> +	list_for_each_entry_safe(shp, n, &tmp, shm_clist) {
> +		struct ipc_namespace *ns = shp->ns;
> +
> +		list_del_init(&shp->shm_clist);
> +
> +		down_write(&shm_ids(ns).rwsem);
> +		shm_lock_by_ptr(shp);
> +		/* will do put_ipc_ns(shp->ns) */
> +		shm_destroy(ns, shp);
> +		up_write(&shm_ids(ns).rwsem);
> +		put_ipc_ns(ns); /* see refcount_inc_not_zero */
> +	}
>   }
>   

I do not see the advantage of first collecting everything in a local 
list, and then destroying the elements.

Attached is my current test case. Feel free to merge whatever you 
consider as useful into your change.


--

     Manfred


View attachment "0003-ipc-shm-locking-questions.patch" of type "text/x-patch" (10630 bytes)