lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <49BA7B60.60607@free.fr>
Date:	Fri, 13 Mar 2009 16:27:28 +0100
From:	Cedric Le Goater <legoater@...e.fr>
To:	Ying Han <yinghan@...gle.com>
CC:	"Serge E. Hallyn" <serue@...ibm.com>, Greg Kurz <gkurz@...ibm.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	linux-api@...r.kernel.org, containers@...ts.linux-foundation.org,
	mpm@...enic.com, linux-kernel@...r.kernel.org,
	Dave Hansen <dave@...ux.vnet.ibm.com>, linux-mm@...ck.org,
	tglx@...utronix.de, viro@...iv.linux.org.uk, hpa@...or.com,
	mingo@...e.hu, torvalds@...ux-foundation.org,
	Alexey Dobriyan <adobriyan@...il.com>, xemul@...nvz.org
Subject: Re: How much of a mess does OpenVZ make? ;) Was: What can OpenVZ
 do?

Ying Han wrote:
> Hi Serge:
> I made a patch based on Oren's tree recently which implement a new
> syscall clone_with_pid. I tested with checkpoint/restart process tree
> and it works as expected.
> This patch has some hack in it which i made a copy of libc's clone and
> made modifications of passing one more argument(pid number). I will
> try to clean up the code and do more testing.

ok. 2 patches would also be interesting. one for the syscall and one
for the kernel support (which might be acceptable)

> New syscall clone_with_pid
> Implement a new syscall which clone a thread with a preselected pid number.

yes this definitely needed to restart a task/thread. we maintain an ugly 
hack which stores a pid in the current task that will be used by the next 
clone() call. 

> clone_with_pid(child_func, child_stack + CHILD_STACK - 16,
> 			CLONE_WITH_PID|SIGCHLD, pid, NULL);

since you're introducing a new syscall, I don't see why you need a 
CLONE_WITH_PID flag ?

(FYI, attached is my old attempt of clone_with_pid but that's too old)

[ ... ]

> +#define DEBUG
>  #include <linux/slab.h>
>  #include <linux/init.h>
>  #include <linux/unistd.h>
> @@ -959,10 +959,19 @@ static struct task_struct *copy_process(unsigned long cl
>  	int retval;
>  	struct task_struct *p;
>  	int cgroup_callbacks_done = 0;
> +	pid_t clone_pid = stack_size;
> 
>  	if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
>  		return ERR_PTR(-EINVAL);
> 
> +	/* We only allow the clone_with_pid when a new pid namespace is
> +	 * created. FIXME: how to restrict it.
> +	 */
> +	if ((clone_flags & CLONE_NEWPID) && (clone_flags & CLONE_WITH_PID))
> +		return ERR_PTR(-EINVAL);
> +	if ((clone_flags & CLONE_WITH_PID) && (clone_pid <= 1))
> +		return ERR_PTR(-EINVAL);

I would let alloc_pid() handle the error.

>  	/*
>  	 * Thread groups must share signals as well, and detached threads
>  	 * can only be started up within the thread group.
> @@ -1135,7 +1144,10 @@ static struct task_struct *copy_process(unsigned long c
> 
>  	if (pid != &init_struct_pid) {
>  		retval = -ENOMEM;
> -		pid = alloc_pid(task_active_pid_ns(p));
> +		if (clone_flags & CLONE_WITH_PID)
> +			pid = alloc_pid(task_active_pid_ns(p), clone_pid);
> +		else
> +			pid = alloc_pid(task_active_pid_ns(p), 0);

this is overkill IMO.

[ ... ]

> -static int alloc_pidmap(struct pid_namespace *pid_ns)
> +static int alloc_pidmap(struct pid_namespace *pid_ns, pid_t pid_nr)
>  {
>  	int i, offset, max_scan, pid, last = pid_ns->last_pid;
>  	struct pidmap *map;
> 
> -	pid = last + 1;
> +	if (pid_nr)
> +		pid = pid_nr;
> +	else
> +		pid = last + 1;
>
>  	if (pid >= pid_max)
>  		pid = RESERVED_PIDS;
>  	offset = pid & BITS_PER_PAGE_MASK;
> @@ -153,9 +156,12 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
>  			do {
>  				if (!test_and_set_bit(offset, map->page)) {
>  					atomic_dec(&map->nr_free);
> -					pid_ns->last_pid = pid;
> +					if (!pid_nr)
> +						pid_ns->last_pid = pid;
>  					return pid;
>  				}
> +				if (pid_nr)
> +					return -1;
>  				offset = find_next_offset(map, offset);
>  				pid = mk_pid(pid_ns, map, offset);
>  			/*
> @@ -239,21 +245,25 @@ void free_pid(struct pid *pid)
>  	call_rcu(&pid->rcu, delayed_put_pid);
>  }
> 
> -struct pid *alloc_pid(struct pid_namespace *ns)
> +struct pid *alloc_pid(struct pid_namespace *ns, pid_t pid_nr)
>  {
>  	struct pid *pid;
>  	enum pid_type type;
>  	int i, nr;
>  	struct pid_namespace *tmp;
>  	struct upid *upid;
> +	int level = ns->level;
> +
> +	if (pid_nr >= pid_max)
> +		return NULL;

let alloc_pidmap() handle it ? 

> 
>  	pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
>  	if (!pid)
>  		goto out;
> 
> -	tmp = ns;
> -	for (i = ns->level; i >= 0; i--) {
> -		nr = alloc_pidmap(tmp);
> +	tmp = ns->parent;
> +	for (i = level-1; i >= 0; i--) {
> +		nr = alloc_pidmap(tmp, 0);
>  		if (nr < 0)
>  			goto out_free;
> 
> @@ -262,6 +272,14 @@ struct pid *alloc_pid(struct pid_namespace *ns)
>  		tmp = tmp->parent;
>  	}
> 
> +	nr = alloc_pidmap(ns, pid_nr);
> +	if (nr < 0)
> +		goto out_free;
> +	pid->numbers[level].nr = nr;
> +	pid->numbers[level].ns = ns;
> +	if (nr == pid_nr)
> +		pr_debug("nr == pid_nr == %d\n", nr);
> +
>  	get_pid_ns(ns);
>  	pid->level = ns->level;
>  	atomic_set(&pid->count, 1);
> 
> 
> 
> 
> 
> 
> 
> 
> On Thu, Mar 12, 2009 at 2:21 PM, Serge E. Hallyn <serue@...ibm.com> wrote:
>> Quoting Greg Kurz (gkurz@...ibm.com):
>>> On Thu, 2009-03-12 at 09:53 -0500, Serge E. Hallyn wrote:
>>>> Or are you suggesting that you'll do a dummy clone of (5594,2) so that
>>>> the next clone(CLONE_NEWPID) will be expected to be (5594,3,1)?
>>>>
>>> Of course not
>> Ok - someone *did* argue that at some point I think...
>>
>>> but one should be able to tell clone() to pick a specific
>>> pid.
>> Can you explain exactly how?  I must be missing something clever.
>>
>> -serge
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majordomo@...ck.org.  For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: <a href=mailto:"dont@...ck.org"> email@...ck.org </a>


View attachment "clone_with_pid.patch" of type "text/plain" (7144 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ