[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150925135247.27620.37109.stgit@buzz>
Date: Fri, 25 Sep 2015 16:52:47 +0300
From: Konstantin Khlebnikov <khlebnikov@...dex-team.ru>
To: linux-api@...r.kernel.org, containers@...ts.linux-foundation.org,
linux-kernel@...r.kernel.org
Cc: Roman Gushchin <klamm@...dex-team.ru>,
Serge Hallyn <serge.hallyn@...ntu.com>,
Oleg Nesterov <oleg@...hat.com>,
"Eric W. Biederman" <ebiederm@...ssion.com>,
Chen Fan <chen.fan.fnst@...fujitsu.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Linus Torvalds <torvalds@...ux-foundation.org>,
Stéphane Graber <stgraber@...ntu.com>
Subject: [PATCH RFC v3 2/2] pidns: introduce syscall getvpid
pid_t getvpid(pid_t pid, int source, int target);
This syscall converts pid from source pid-namespace into pid visible
in target pid-namespace. If pid is unreachable from target namespace
then getvpid() returns zero.
Namespaces are defined by file descriptors pointing to entries in
proc (/proc/[pid]/ns/pid). If argument is negative then current pid
namespace is used.
If pid is negative then getvpid() returns pid of parent task for -pid.
Possible error codes:
ESRCH - task not found
EBADF - closed file descriptor
EINVAL - not pid-namespace file descriptor
Such conversion is required for interaction between processes from
different pid-namespaces. For example system service at host system
who provide access to restricted set of privileged operations for
clients from containers have to convert pids back and forward.
Recent kernels expose virtual pids in /proc/[pid]/status:NSpid, but
this interface works only in one way and even that is non-trivial.
Other option is passing pids with credentials via unix socket, but
this solution requires a lot of preparation and CAP_SYS_ADMIN for
sending arbitrary pids.
This syscall works in both directions, it's fast and simple.
Examples:
getvpid(pid, ns, -1) - get pid in our pid namespace
getvpid(pid, -1, ns) - get pid in container
getvpid(pid, -1, ns) > 0 - is pid is reachable from container?
getvpid(1, ns1, ns2) > 0 - is ns1 inside ns2?
getvpid(1, ns1, ns2) == 0 - is ns1 outside ns2?
getvpid(1, ns, -1) - get init task of pid-namespace
getvpid(-1, ns, -1) - get reaper of init task in parent pid-namespace
getvpid(-pid, -1, -1) - get ppid by pid
Signed-off-by: Konstantin Khlebnikov <khlebnikov@...dex-team.ru>
--
v1: https://lkml.org/lkml/2015/9/15/411
v2: https://lkml.org/lkml/2015/9/24/278
v3:
* use proc_ns_fdget()
* update description
* rebase to next-20150925
* fix conflict with mlock2
---
arch/x86/entry/syscalls/syscall_32.tbl | 1 +
arch/x86/entry/syscalls/syscall_64.tbl | 1 +
include/linux/syscalls.h | 1 +
include/uapi/asm-generic/unistd.h | 4 ++-
kernel/sys.c | 51 ++++++++++++++++++++++++++++++++
5 files changed, 57 insertions(+), 1 deletion(-)
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 143ef9f37932..c36c2c65d204 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -383,3 +383,4 @@
374 i386 userfaultfd sys_userfaultfd
375 i386 membarrier sys_membarrier
376 i386 mlock2 sys_mlock2
+377 i386 getvpid sys_getvpid
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 314a90bfc09c..90bbbc7fdbe0 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -332,6 +332,7 @@
323 common userfaultfd sys_userfaultfd
324 common membarrier sys_membarrier
325 common mlock2 sys_mlock2
+326 common getvpid sys_getvpid
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a156b82dd14c..dbb5638260b5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -222,6 +222,7 @@ asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __us
asmlinkage long sys_alarm(unsigned int seconds);
asmlinkage long sys_getpid(void);
asmlinkage long sys_getppid(void);
+asmlinkage long sys_getvpid(pid_t pid, int source, int target);
asmlinkage long sys_getuid(void);
asmlinkage long sys_geteuid(void);
asmlinkage long sys_getgid(void);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 1324b0292ec2..2c1123130f90 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -715,9 +715,11 @@ __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
__SYSCALL(__NR_membarrier, sys_membarrier)
#define __NR_mlock2 284
__SYSCALL(__NR_mlock2, sys_mlock2)
+#define __NR_mlock2 285
+__SYSCALL(__NR_getvpid, sys_getvpid)
#undef __NR_syscalls
-#define __NR_syscalls 285
+#define __NR_syscalls 286
/*
* All syscalls below here should go away really,
diff --git a/kernel/sys.c b/kernel/sys.c
index fa2f2f671a5c..1e28a36b84fa 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -46,6 +46,7 @@
#include <linux/syscalls.h>
#include <linux/kprobes.h>
#include <linux/user_namespace.h>
+#include <linux/proc_ns.h>
#include <linux/binfmts.h>
#include <linux/sched.h>
@@ -855,6 +856,56 @@ SYSCALL_DEFINE0(getppid)
return pid;
}
+SYSCALL_DEFINE3(getvpid, pid_t, pid, int, source, int, target)
+{
+ struct pid_namespace *source_ns, *target_ns;
+ struct fd source_fd = {}, target_fd = {};
+ struct pid *struct_pid;
+ struct ns_common *ns;
+ pid_t result;
+
+ if (source >= 0) {
+ ns = proc_ns_fdget(source, CLONE_NEWPID, &source_fd);
+ result = PTR_ERR(ns);
+ if (IS_ERR(ns))
+ goto out;
+ source_ns = container_of(ns, struct pid_namespace, ns);
+ } else
+ source_ns = task_active_pid_ns(current);
+
+ if (target >= 0) {
+ ns = proc_ns_fdget(target, CLONE_NEWPID, &target_fd);
+ result = PTR_ERR(ns);
+ if (IS_ERR(ns))
+ goto out;
+ target_ns = container_of(ns, struct pid_namespace, ns);
+ } else
+ target_ns = task_active_pid_ns(current);
+
+ rcu_read_lock();
+ struct_pid = find_pid_ns(abs(pid), source_ns);
+
+ if (struct_pid && pid < 0) {
+ struct task_struct *task;
+
+ task = pid_task(struct_pid, PIDTYPE_PID);
+ if (task)
+ task = rcu_dereference(task->real_parent);
+ struct_pid = task ? task_pid(task) : NULL;
+ }
+
+ if (struct_pid)
+ result = pid_nr_ns(struct_pid, target_ns);
+ else
+ result = -ESRCH;
+ rcu_read_unlock();
+
+out:
+ fdput(target_fd);
+ fdput(source_fd);
+ return result;
+}
+
SYSCALL_DEFINE0(getuid)
{
/* Only we change this so SMP safe */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists