lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20111013140216.GD11230@moon>
Date:	Thu, 13 Oct 2011 18:02:16 +0400
From:	Cyrill Gorcunov <gorcunov@...nvz.org>
To:	LKML <linux-kernel@...r.kernel.org>, Nathan Lynch <ntl@...ox.com>,
	Oren Laadan <orenl@...columbia.edu>,
	Daniel Lezcano <dlezcano@...ibm.com>,
	Serge Hallyn <serue@...ibm.com>, Tejun Heo <tj@...nel.org>
Cc:	Pavel Emelyanov <xemul@...allels.com>,
	Glauber Costa <glommer@...allels.com>,
	Linux Containers <containers@...ts.osdl.org>,
	James Bottomley <jbottomley@...allels.com>
Subject: [PATCH] clone: Introduce the CLONE_CHILD_USEPID functionality

At previous review session seems no final conclusion was made so this is
a second attempt to step forward.

---
From: Pavel Emelyanov <xemul@...nvz.org>
Subject: [PATCH] clone: Introduce the CLONE_CHILD_USEPID functionality

When restoring a task (or a set of tasks) we need to recreate them with
exactly the same pid as they had before. Thus we need the ability to create
a task with specified pid.

The proposal is to reuse the already free CLONE_STOPPED clone flag.

About the security implication - this can create some problems with pids
wraparound and similar, so this approach can be restricted with the "don't
allow for CLONE_CHILD_USEPID when the current pid namespace has ever done
real pid allocation". This will work perfectly for checkpoint-restore and
will not give anyone chances for screwing pids up on a living system.

Signed-off-by: Pavel Emelyanov <xemul@...nvz.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@...nvz.org>
---
 include/linux/pid.h   |    2 -
 include/linux/sched.h |    1 
 kernel/fork.c         |   10 ++++++-
 kernel/pid.c          |   70 ++++++++++++++++++++++++++++++++++++--------------
 4 files changed, 62 insertions(+), 21 deletions(-)

Index: linux-2.6.git/include/linux/pid.h
===================================================================
--- linux-2.6.git.orig/include/linux/pid.h
+++ linux-2.6.git/include/linux/pid.h
@@ -119,7 +119,7 @@ extern struct pid *find_get_pid(int nr);
 extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
 int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
 
-extern struct pid *alloc_pid(struct pid_namespace *ns);
+extern struct pid *alloc_pid(struct pid_namespace *ns, int pid);
 extern void free_pid(struct pid *pid);
 
 /*
Index: linux-2.6.git/include/linux/sched.h
===================================================================
--- linux-2.6.git.orig/include/linux/sched.h
+++ linux-2.6.git/include/linux/sched.h
@@ -23,6 +23,7 @@
 #define CLONE_CHILD_SETTID	0x01000000	/* set the TID in the child */
 /* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
    and is now available for re-use. */
+#define CLONE_CHILD_USEPID	0x02000000	/* use the given pid */
 #define CLONE_NEWUTS		0x04000000	/* New utsname group? */
 #define CLONE_NEWIPC		0x08000000	/* New ipcs */
 #define CLONE_NEWUSER		0x10000000	/* New user namespace */
Index: linux-2.6.git/kernel/fork.c
===================================================================
--- linux-2.6.git.orig/kernel/fork.c
+++ linux-2.6.git/kernel/fork.c
@@ -1239,8 +1239,16 @@ static struct task_struct *copy_process(
 		goto bad_fork_cleanup_io;
 
 	if (pid != &init_struct_pid) {
+		int want_pid = 0;
+
+		if (clone_flags & CLONE_CHILD_USEPID) {
+			retval = get_user(want_pid, child_tidptr);
+			if (retval)
+				goto bad_fork_cleanup_io;
+		}
+
 		retval = -ENOMEM;
-		pid = alloc_pid(p->nsproxy->pid_ns);
+		pid = alloc_pid(p->nsproxy->pid_ns, want_pid);
 		if (!pid)
 			goto bad_fork_cleanup_io;
 	}
Index: linux-2.6.git/kernel/pid.c
===================================================================
--- linux-2.6.git.orig/kernel/pid.c
+++ linux-2.6.git/kernel/pid.c
@@ -159,11 +159,55 @@ static void set_last_pid(struct pid_name
 	} while ((prev != last_write) && (pid_before(base, last_write, pid)));
 }
 
-static int alloc_pidmap(struct pid_namespace *pid_ns)
+static int alloc_pidmap_page(struct pidmap *map)
+{
+	if (unlikely(!map->page)) {
+		void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+		/*
+		 * Free the page if someone raced with us
+		 * installing it:
+		 */
+		spin_lock_irq(&pidmap_lock);
+		if (!map->page) {
+			map->page = page;
+			page = NULL;
+		}
+		spin_unlock_irq(&pidmap_lock);
+		kfree(page);
+		if (unlikely(!map->page))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int set_pidmap(struct pid_namespace *pid_ns, int pid)
+{
+	int offset;
+	struct pidmap *map;
+
+	offset = pid & BITS_PER_PAGE_MASK;
+	map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
+
+	if (alloc_pidmap_page(map) < 0)
+		return -ENOMEM;
+
+	if (!test_and_set_bit(offset, map->page)) {
+		atomic_dec(&map->nr_free);
+		return pid;
+	}
+
+	return -EBUSY;
+}
+
+static int alloc_pidmap(struct pid_namespace *pid_ns, int desired_pid)
 {
 	int i, offset, max_scan, pid, last = pid_ns->last_pid;
 	struct pidmap *map;
 
+	if (desired_pid)
+		return set_pidmap(pid_ns, desired_pid);
+
 	pid = last + 1;
 	if (pid >= pid_max)
 		pid = RESERVED_PIDS;
@@ -176,22 +220,9 @@ static int alloc_pidmap(struct pid_names
 	 */
 	max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset;
 	for (i = 0; i <= max_scan; ++i) {
-		if (unlikely(!map->page)) {
-			void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
-			/*
-			 * Free the page if someone raced with us
-			 * installing it:
-			 */
-			spin_lock_irq(&pidmap_lock);
-			if (!map->page) {
-				map->page = page;
-				page = NULL;
-			}
-			spin_unlock_irq(&pidmap_lock);
-			kfree(page);
-			if (unlikely(!map->page))
-				break;
-		}
+		if (alloc_pidmap_page(map) < 0)
+			break;
+
 		if (likely(atomic_read(&map->nr_free))) {
 			do {
 				if (!test_and_set_bit(offset, map->page)) {
@@ -277,7 +308,7 @@ void free_pid(struct pid *pid)
 	call_rcu(&pid->rcu, delayed_put_pid);
 }
 
-struct pid *alloc_pid(struct pid_namespace *ns)
+struct pid *alloc_pid(struct pid_namespace *ns, int this_ns_pid)
 {
 	struct pid *pid;
 	enum pid_type type;
@@ -291,13 +322,14 @@ struct pid *alloc_pid(struct pid_namespa
 
 	tmp = ns;
 	for (i = ns->level; i >= 0; i--) {
-		nr = alloc_pidmap(tmp);
+		nr = alloc_pidmap(tmp, this_ns_pid);
 		if (nr < 0)
 			goto out_free;
 
 		pid->numbers[i].nr = nr;
 		pid->numbers[i].ns = tmp;
 		tmp = tmp->parent;
+		this_ns_pid = 0;
 	}
 
 	get_pid_ns(ns);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ