lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180809160700.mrs6gcdcghrxex7z@krondor>
Date:   Thu, 9 Aug 2018 12:07:00 -0400
From:   Carlos Neira <cneirabustos@...il.com>
To:     Jesper Dangaard Brouer <brouer@...hat.com>
Cc:     Carlos Neira <cneirabustos@...il.com>, netdev@...r.kernel.org,
        ebiederm@...ssion.com, quentin.monnet@...ronome.com,
        ys114321@...il.com
Subject: Re: [PATCH bpf-next] BPF: helpers: New helper to obtain namespace
 data from current task

Jesper,
Here is the updated patch.
 
>From 92633f6819423093932e8d04aa3dc99a5913f6fd Mon Sep 17 00:00:00 2001
From: Carlos Neira <cneirabustos@...il.com>
Date: Thu, 9 Aug 2018 09:55:32 -0400
Subject: [PATCH bpf-next] BPF: helpers: New helper to obtain namespace
 data from current task

This helper obtains the active namespace from current and returns pid, tgid,
device and namespace id as seen from that namespace, allowing to instrument
a process inside a container.
Device is read from /proc/self/ns/pid, as in the future it's possible that
different pid_ns files may belong to different devices, according
to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
conference.

Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
scripts but this helper returns the pid as seen by the root namespace which is
fine when a bcc script is not executed inside a container.
When the process of interest is inside a container, pid filtering will not work
if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
returning the pid as it's seen by the current namespace where the script is
executing.

This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
used to do pid filtering even inside a container.

For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):

	u32 pid = bpf_get_current_pid_tgid() >> 32;
	if (pid != <pid_arg_passed_in>)
		return 0;

Could be modified to use bpf_get_current_pidns_info() as follows:

	struct bpf_pidns pidns;
	bpf_get_current_pidns_info(&pidns, sizeof(struct bpf_pidns));
	u32 pid = pidns.tgid;
	u32 nsid = pidns.nsid;
	if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
		return 0;

To find out the name PID namespace id of a process, you could use this command:

$ ps -h -o pidns -p <pid_of_interest>

Or this other command:

$ ls -Li /proc/<pid_of_interest>/ns/pid

Signed-off-by: Carlos Antonio Neira Bustos <cneirabustos@...il.com>
---
 include/linux/bpf.h                       |  1 +
 include/uapi/linux/bpf.h                  | 24 +++++++++++-
 kernel/bpf/core.c                         |  1 +
 kernel/bpf/helpers.c                      | 64 +++++++++++++++++++++++++++++++
 kernel/trace/bpf_trace.c                  |  2 +
 samples/bpf/Makefile                      |  3 ++
 samples/bpf/trace_ns_info_user.c          | 35 +++++++++++++++++
 samples/bpf/trace_ns_info_user_kern.c     | 45 ++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h            | 24 +++++++++++-
 tools/testing/selftests/bpf/bpf_helpers.h |  3 ++
 10 files changed, 200 insertions(+), 2 deletions(-)
 create mode 100644 samples/bpf/trace_ns_info_user.c
 create mode 100644 samples/bpf/trace_ns_info_user_kern.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cd8790d2c6ed..3f4b999f7c99 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -787,6 +787,7 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
 extern const struct bpf_func_proto bpf_sock_map_update_proto;
 extern const struct bpf_func_proto bpf_sock_hash_update_proto;
 extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
+extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;
 
 extern const struct bpf_func_proto bpf_get_local_storage_proto;
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dd5758dc35d3..8462f9881465 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2113,6 +2113,18 @@ union bpf_attr {
  *		the shared data.
  *	Return
  *		Pointer to the local storage area.
+ *
+ * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ *	Description
+ *		Copies into *pidns* pid, namespace id and tgid as seen by the
+ *		current namespace and also device from /proc/self/ns/pid.
+ *		*size_of_pidns* must be the size of *pidns*
+ *
+ *		This helper is used when pid filtering is needed inside a
+ *		container as bpf_get_current_tgid() helper returns always the
+ *		pid id as seen by the root namespace.
+ *	Return
+ *		0 on success -EINVAL on error.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2196,7 +2208,8 @@ union bpf_attr {
 	FN(rc_keydown),			\
 	FN(skb_cgroup_id),		\
 	FN(get_current_cgroup_id),	\
-	FN(get_local_storage),
+	FN(get_local_storage),		\
+	FN(get_current_pidns_info),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2724,4 +2737,13 @@ enum bpf_task_fd_type {
 	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
 };
 
+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/ns/pid.
+ */
+struct bpf_pidns_info {
+	__u32 dev;
+	__u32 nsid;
+	__u32 tgid;
+	__u32 pid;
+};
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 4d09e610777f..98ce53ce2ea6 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1796,6 +1796,7 @@ const struct bpf_func_proto bpf_sock_map_update_proto __weak;
 const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
 const struct bpf_func_proto bpf_get_local_storage_proto __weak;
+const struct bpf_func_proto bpf_get_current_pidns_info __weak;
 
 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1991466b8327..d06d723b9cff 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -18,6 +18,9 @@
 #include <linux/sched.h>
 #include <linux/uidgid.h>
 #include <linux/filter.h>
+#include <linux/pid_namespace.h>
+#include <linux/major.h>
+#include <linux/stat.h>
 
 /* If kernel subsystem is allowing eBPF programs to call this function,
  * inside its own verifier_ops->get_func_proto() callback it should return
@@ -214,3 +217,64 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 #endif
+
+BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
+	 size)
+{
+	const char *ppath = "/proc/self/ns/pid";
+	struct pid_namespace *pidns = NULL;
+	mm_segment_t oldsegfs;
+	struct kstat stat;
+	pid_t tgid = 0;
+	pid_t pid = 0;
+	int res = 0;
+
+	if (unlikely(size != sizeof(struct bpf_pidns_info)))
+		goto clear;
+
+	pidns = task_active_pid_ns(current);
+
+	if (unlikely(!pidns))
+		goto clear;
+
+	pidns_info->nsid =  pidns->ns.inum;
+	pid = task_pid_nr_ns(current, pidns);
+
+	if (unlikely(!pid))
+		goto clear;
+
+	tgid = task_tgid_nr_ns(current, pidns);
+
+	if (unlikely(!tgid))
+		goto clear;
+
+	pidns_info->tgid = (u32) tgid;
+	pidns_info->pid = (u32) pid;
+
+	oldsegfs = get_fs();
+	set_fs(KERNEL_DS);
+	res = vfs_stat((const char __user *)ppath, &stat);
+	set_fs(oldsegfs);
+
+	if (unlikely(res))
+		goto clear;
+
+	pidns_info->dev =  stat.dev;
+
+	return 0;
+
+clear:
+	if (pidns_info)
+		memset((void *)pidns, 0, (size_t) size);
+
+	return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
+	.func	= bpf_get_current_pidns_info,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
+};
+
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0ae6829804bc..f70be29e49ab 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -568,6 +568,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_get_current_cgroup_id:
 		return &bpf_get_current_cgroup_id_proto;
 #endif
+	case BPF_FUNC_get_current_pidns_info:
+		return &bpf_get_current_pidns_info_proto;
 	default:
 		return NULL;
 	}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index f88d5683d6ee..fdcde00554ce 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -53,6 +53,7 @@ hostprogs-y += xdpsock
 hostprogs-y += xdp_fwd
 hostprogs-y += task_fd_query
 hostprogs-y += xdp_sample_pkts
+hostprogs-y += trace_ns_info
 
 # Libbpf dependencies
 LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@@ -109,6 +110,7 @@ xdpsock-objs := xdpsock_user.o
 xdp_fwd-objs := xdp_fwd_user.o
 task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
 xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
+trace_ns_info-objs := bpf_load.o trace_ns_info_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -166,6 +168,7 @@ always += xdpsock_kern.o
 always += xdp_fwd_kern.o
 always += task_fd_query_kern.o
 always += xdp_sample_pkts_kern.o
+always += trace_ns_info_user_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/lib/
diff --git a/samples/bpf/trace_ns_info_user.c b/samples/bpf/trace_ns_info_user.c
new file mode 100644
index 000000000000..e06d08db6f30
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...il.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "bpf/libbpf.h"
+#include "bpf_load.h"
+
+/* This code was taken verbatim from tracex1_user.c, it's used
+ * to exercize bpf_get_current_pidns_info() helper call.
+ */
+int main(int ac, char **argv)
+{
+	FILE *f;
+	char filename[256];
+
+	snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]);
+	printf("loading %s\n", filename);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	f = popen("taskset 1 ping  localhost", "r");
+	(void) f;
+	read_trace_pipe();
+	return 0;
+}
diff --git a/samples/bpf/trace_ns_info_user_kern.c b/samples/bpf/trace_ns_info_user_kern.c
new file mode 100644
index 000000000000..ceaf3e83c8e7
--- /dev/null
+++ b/samples/bpf/trace_ns_info_user_kern.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Carlos Neira cneirabustos@...il.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+typedef __u64 u64;
+typedef __u32 u32;
+
+
+/* kprobe is NOT a stable ABI
+ * kernel functions can be removed, renamed or completely change semantics.
+ * Number of arguments and their positions can change, etc.
+ * In such case this bpf+kprobe example will no longer be meaningful
+ */
+
+/* This will call bpf_get_current_pidns_info() to display pid and ns values
+ * as seen by the current namespace, on the far left you will see the pid as
+ * seen as by the root namespace.
+ */
+
+SEC("kprobe/__netif_receive_skb_core")
+int bpf_prog1(struct pt_regs *ctx)
+{
+	char fmt[] = "nsid:%u, dev: %u,  pid:%u\n";
+	struct bpf_pidns_info nsinfo;
+	int ok = 0;
+
+	ok = bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo));
+	if (ok == 0)
+		bpf_trace_printk(fmt, sizeof(fmt), (u32)nsinfo.nsid,
+				 (u32) nsinfo.dev, (u32)nsinfo.pid);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index dd5758dc35d3..8462f9881465 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2113,6 +2113,18 @@ union bpf_attr {
  *		the shared data.
  *	Return
  *		Pointer to the local storage area.
+ *
+ * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns)
+ *	Description
+ *		Copies into *pidns* pid, namespace id and tgid as seen by the
+ *		current namespace and also device from /proc/self/ns/pid.
+ *		*size_of_pidns* must be the size of *pidns*
+ *
+ *		This helper is used when pid filtering is needed inside a
+ *		container as bpf_get_current_tgid() helper returns always the
+ *		pid id as seen by the root namespace.
+ *	Return
+ *		0 on success -EINVAL on error.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2196,7 +2208,8 @@ union bpf_attr {
 	FN(rc_keydown),			\
 	FN(skb_cgroup_id),		\
 	FN(get_current_cgroup_id),	\
-	FN(get_local_storage),
+	FN(get_local_storage),		\
+	FN(get_current_pidns_info),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2724,4 +2737,13 @@ enum bpf_task_fd_type {
 	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
 };
 
+/* helper bpf_get_current_pidns_info will store the following
+ * data, dev will contain major/minor from /proc/self/ns/pid.
+ */
+struct bpf_pidns_info {
+	__u32 dev;
+	__u32 nsid;
+	__u32 tgid;
+	__u32 pid;
+};
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index cb9fcfbc9307..c3060f3284e0 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -137,6 +137,9 @@ static unsigned long long (*bpf_get_current_cgroup_id)(void) =
 	(void *) BPF_FUNC_get_current_cgroup_id;
 static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
 	(void *) BPF_FUNC_get_local_storage;
+static int (*bpf_get_current_pidns_info)(struct bpf_pidns_info *buf,
+					 unsigned int buf_size) =
+	(void *) BPF_FUNC_get_current_pidns_info;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
-- 
2.11.0




On Thu, Aug 09, 2018 at 03:46:53PM +0200, Jesper Dangaard Brouer wrote:
> On Thu,  9 Aug 2018 09:18:00 -0400
> Carlos Neira <cneirabustos@...il.com> wrote:
> 
> > From: cneira <cneirabustos@...il.com>
> > 
> > This helper obtains the active namespace from current and returns pid, tgid,
> > device and namespace id as seen from that namespace, allowing to instrument
> > a process inside a container.
> > Device is read from /proc/self/ns/pid, as in the future it's possible that
> > different pid_ns files may belong to different devices, according
> > to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
> > conference.
> > 
> > Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
> > scripts but this helper returns the pid as seen by the root namespace which is
> > fine when a bcc script is not executed inside a container.
> > When the process of interest is inside a container, pid filtering will not work
> > if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
> > returning the pid as it's seen by the current namespace where the script is
> > executing.
> > 
> > This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
> > used to do pid filtering even inside a container.
> > 
> > For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):
> > 
> > 	u32 pid = bpf_get_current_pid_tgid() >> 32;
> > 	if (pid != <pid_arg_passed_in>)
> > 		return 0;
> > 
> > Could be modified to use bpf_get_current_pidns_info() as follows:
> > 
> > 	struct bpf_pidns pidns;
> > 	bpf_get_current_pid_tgid(&pidns, sizeof(struct bpf_pidns));
>         ^^^^^^^^^^^^^^^^^^^^^^^^
> 
> Shouldn't this be:
>         bpf_get_current_pidns_info(...)
> 
> > 	u32 pid = pidns.tgid;
> > 	u32 nsid = pidns.nsid;
> > 	if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
> > 		return 0;
> > 
> 
> [...]
> 
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index dd5758dc35d3..031e7d9dba09 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -2113,6 +2113,18 @@ union bpf_attr {
> >   *		the shared data.
> >   *	Return
> >   *		Pointer to the local storage area.
> > + *
> > + * int bpf_get_current_pidns(struct bpf_pidns_info *pidns, u32 size_of_pidns)
> 
> Should this be:
>           bpf_get_current_pidns_info(...)
> 
> > + *	Description
> > + *		Copies into *pidns* pid, namespace id and tgid as seen by the
> > + *		current namespace and also device from /proc/self/ns/pid.
> > + *		*size_of_pidns* must be the size of *pidns*
> > + *
> > + *		This helper is used when pid filtering is needed inside a
> > + *		container as bpf_get_current_tgid() helper returns always the
> > + *		pid id as seen by the root namespace.
> > + *	Return
> > + *		0 on success -EINVAL on error.
> >   */
> >  #define __BPF_FUNC_MAPPER(FN)		\
> >  	FN(unspec),			\
> > @@ -2196,7 +2208,8 @@ union bpf_attr {
> >  	FN(rc_keydown),			\
> >  	FN(skb_cgroup_id),		\
> >  	FN(get_current_cgroup_id),	\
> > -	FN(get_local_storage),
> > +	FN(get_local_storage),		\
> > +	FN(get_current_pidns_info),
> 
> -- 
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ