[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20180810230753.salbwvjupppfqnvq@ast-mbp.dhcp.thefacebook.com>
Date: Fri, 10 Aug 2018 16:07:55 -0700
From: Alexei Starovoitov <alexei.starovoitov@...il.com>
To: Carlos Neira <cneirabustos@...il.com>
Cc: netdev@...r.kernel.org, ebiederm@...ssion.com,
quentin.monnet@...ronome.com, ys114321@...il.com, brouer@...hat.com
Subject: Re: [PATCH v2 bpf-next] BPF: helpers: New helper to obtain namespace
data from current task
On Fri, Aug 10, 2018 at 08:41:04AM -0400, Carlos Neira wrote:
> This helper obtains the active namespace from current and returns pid, tgid,
> device and namespace id as seen from that namespace, allowing to instrument
> a process inside a container.
> Device is read from /proc/self/ns/pid, as in the future it's possible that
> different pid_ns files may belong to different devices, according
> to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers
> conference.
>
> Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's
> scripts but this helper returns the pid as seen by the root namespace which is
> fine when a bcc script is not executed inside a container.
> When the process of interest is inside a container, pid filtering will not work
> if bpf_get_current_pid_tgid() is used. This helper addresses this limitation
> returning the pid as it's seen by the current namespace where the script is
> executing.
>
> This helper has the same use cases as bpf_get_current_pid_tgid() as it can be
> used to do pid filtering even inside a container.
>
> For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py):
>
> u32 pid = bpf_get_current_pid_tgid() >> 32;
> if (pid != <pid_arg_passed_in>)
> return 0;
>
> Could be modified to use bpf_get_current_pidns_info() as follows:
>
> struct bpf_pidns pidns;
> bpf_get_current_pidns_info(&pidns, sizeof(struct bpf_pidns));
> u32 pid = pidns.tgid;
> u32 nsid = pidns.nsid;
> if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>))
> return 0;
>
> To find out the name PID namespace id of a process, you could use this command:
>
> $ ps -h -o pidns -p <pid_of_interest>
>
> Or this other command:
>
> $ ls -Li /proc/<pid_of_interest>/ns/pid
>
> Signed-off-by: Carlos Antonio Neira Bustos <cneirabustos@...il.com>
> ---
> include/linux/bpf.h | 1 +
> include/uapi/linux/bpf.h | 24 +++++++++++-
> kernel/bpf/core.c | 1 +
> kernel/bpf/helpers.c | 64 +++++++++++++++++++++++++++++++
> kernel/trace/bpf_trace.c | 2 +
> samples/bpf/Makefile | 3 ++
> samples/bpf/trace_ns_info_user.c | 35 +++++++++++++++++
> samples/bpf/trace_ns_info_user_kern.c | 45 ++++++++++++++++++++++
> tools/include/uapi/linux/bpf.h | 24 +++++++++++-
> tools/testing/selftests/bpf/bpf_helpers.h | 3 ++
> 10 files changed, 200 insertions(+), 2 deletions(-)
> create mode 100644 samples/bpf/trace_ns_info_user.c
> create mode 100644 samples/bpf/trace_ns_info_user_kern.c
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index cd8790d2c6ed..3f4b999f7c99 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -787,6 +787,7 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
> extern const struct bpf_func_proto bpf_sock_map_update_proto;
> extern const struct bpf_func_proto bpf_sock_hash_update_proto;
> extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
> +extern const struct bpf_func_proto bpf_get_current_pidns_info_proto;
>
> extern const struct bpf_func_proto bpf_get_local_storage_proto;
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index dd5758dc35d3..8462f9881465 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2113,6 +2113,18 @@ union bpf_attr {
> * the shared data.
> * Return
> * Pointer to the local storage area.
> + *
> + * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 size_of_pidns)
> + * Description
> + * Copies into *pidns* pid, namespace id and tgid as seen by the
> + * current namespace and also device from /proc/self/ns/pid.
> + * *size_of_pidns* must be the size of *pidns*
> + *
> + * This helper is used when pid filtering is needed inside a
> + * container as bpf_get_current_tgid() helper returns always the
> + * pid id as seen by the root namespace.
> + * Return
> + * 0 on success -EINVAL on error.
> */
> #define __BPF_FUNC_MAPPER(FN) \
> FN(unspec), \
> @@ -2196,7 +2208,8 @@ union bpf_attr {
> FN(rc_keydown), \
> FN(skb_cgroup_id), \
> FN(get_current_cgroup_id), \
> - FN(get_local_storage),
> + FN(get_local_storage), \
> + FN(get_current_pidns_info),
>
> /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> * function eBPF program intends to call
> @@ -2724,4 +2737,13 @@ enum bpf_task_fd_type {
> BPF_FD_TYPE_URETPROBE, /* filename + offset */
> };
>
> +/* helper bpf_get_current_pidns_info will store the following
> + * data, dev will contain major/minor from /proc/self/ns/pid.
> + */
> +struct bpf_pidns_info {
> + __u32 dev;
> + __u32 nsid;
> + __u32 tgid;
> + __u32 pid;
> +};
> #endif /* _UAPI__LINUX_BPF_H__ */
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 4d09e610777f..98ce53ce2ea6 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -1796,6 +1796,7 @@ const struct bpf_func_proto bpf_sock_map_update_proto __weak;
> const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
> const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
> const struct bpf_func_proto bpf_get_local_storage_proto __weak;
> +const struct bpf_func_proto bpf_get_current_pidns_info __weak;
>
> const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
> {
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 1991466b8327..d06d723b9cff 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -18,6 +18,9 @@
> #include <linux/sched.h>
> #include <linux/uidgid.h>
> #include <linux/filter.h>
> +#include <linux/pid_namespace.h>
> +#include <linux/major.h>
> +#include <linux/stat.h>
>
> /* If kernel subsystem is allowing eBPF programs to call this function,
> * inside its own verifier_ops->get_func_proto() callback it should return
> @@ -214,3 +217,64 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
> .arg2_type = ARG_ANYTHING,
> };
> #endif
> +
> +BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, u32,
> + size)
> +{
> + const char *ppath = "/proc/self/ns/pid";
> + struct pid_namespace *pidns = NULL;
> + mm_segment_t oldsegfs;
> + struct kstat stat;
> + pid_t tgid = 0;
> + pid_t pid = 0;
> + int res = 0;
> +
> + if (unlikely(size != sizeof(struct bpf_pidns_info)))
> + goto clear;
> +
> + pidns = task_active_pid_ns(current);
> +
> + if (unlikely(!pidns))
> + goto clear;
> +
> + pidns_info->nsid = pidns->ns.inum;
> + pid = task_pid_nr_ns(current, pidns);
> +
> + if (unlikely(!pid))
> + goto clear;
> +
> + tgid = task_tgid_nr_ns(current, pidns);
> +
> + if (unlikely(!tgid))
> + goto clear;
> +
> + pidns_info->tgid = (u32) tgid;
> + pidns_info->pid = (u32) pid;
> +
> + oldsegfs = get_fs();
> + set_fs(KERNEL_DS);
> + res = vfs_stat((const char __user *)ppath, &stat);
> + set_fs(oldsegfs);
such fs magic cannot be done from the helper.
please find a way to retrieve device differently.
> +
> + if (unlikely(res))
> + goto clear;
> +
> + pidns_info->dev = stat.dev;
> +
> + return 0;
> +
> +clear:
> + if (pidns_info)
> + memset((void *)pidns, 0, (size_t) size);
> +
> + return -EINVAL;
> +}
> +
> +const struct bpf_func_proto bpf_get_current_pidns_info_proto = {
> + .func = bpf_get_current_pidns_info,
> + .gpl_only = false,
> + .ret_type = RET_INTEGER,
> + .arg1_type = ARG_PTR_TO_UNINIT_MEM,
> + .arg2_type = ARG_CONST_SIZE,
> +};
> +
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 0ae6829804bc..f70be29e49ab 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -568,6 +568,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> case BPF_FUNC_get_current_cgroup_id:
> return &bpf_get_current_cgroup_id_proto;
> #endif
> + case BPF_FUNC_get_current_pidns_info:
> + return &bpf_get_current_pidns_info_proto;
> default:
> return NULL;
> }
> diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
> index f88d5683d6ee..fdcde00554ce 100644
> --- a/samples/bpf/Makefile
> +++ b/samples/bpf/Makefile
> @@ -53,6 +53,7 @@ hostprogs-y += xdpsock
> hostprogs-y += xdp_fwd
> hostprogs-y += task_fd_query
> hostprogs-y += xdp_sample_pkts
> +hostprogs-y += trace_ns_info
>
> # Libbpf dependencies
> LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
> @@ -109,6 +110,7 @@ xdpsock-objs := xdpsock_user.o
> xdp_fwd-objs := xdp_fwd_user.o
> task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
> xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS)
> +trace_ns_info-objs := bpf_load.o trace_ns_info_user.o
>
> # Tell kbuild to always build the programs
> always := $(hostprogs-y)
> @@ -166,6 +168,7 @@ always += xdpsock_kern.o
> always += xdp_fwd_kern.o
> always += task_fd_query_kern.o
> always += xdp_sample_pkts_kern.o
> +always += trace_ns_info_user_kern.o
>
> HOSTCFLAGS += -I$(objtree)/usr/include
> HOSTCFLAGS += -I$(srctree)/tools/lib/
> diff --git a/samples/bpf/trace_ns_info_user.c b/samples/bpf/trace_ns_info_user.c
> new file mode 100644
> index 000000000000..e06d08db6f30
> --- /dev/null
> +++ b/samples/bpf/trace_ns_info_user.c
please split addition of sample code into separate patch
and convert it into selftests.
> @@ -0,0 +1,35 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2018 Carlos Neira cneirabustos@...il.com
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + */
> +
> +#include <stdio.h>
> +#include <linux/bpf.h>
> +#include <unistd.h>
> +#include "bpf/libbpf.h"
> +#include "bpf_load.h"
> +
> +/* This code was taken verbatim from tracex1_user.c, it's used
> + * to exercize bpf_get_current_pidns_info() helper call.
> + */
> +int main(int ac, char **argv)
> +{
> + FILE *f;
> + char filename[256];
> +
> + snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]);
> + printf("loading %s\n", filename);
> +
> + if (load_bpf_file(filename)) {
> + printf("%s", bpf_log_buf);
> + return 1;
> + }
> +
> + f = popen("taskset 1 ping localhost", "r");
> + (void) f;
> + read_trace_pipe();
> + return 0;
> +}
> diff --git a/samples/bpf/trace_ns_info_user_kern.c b/samples/bpf/trace_ns_info_user_kern.c
> new file mode 100644
> index 000000000000..ceaf3e83c8e7
> --- /dev/null
> +++ b/samples/bpf/trace_ns_info_user_kern.c
> @@ -0,0 +1,45 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2018 Carlos Neira cneirabustos@...il.com
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of version 2 of the GNU General Public
> + * License as published by the Free Software Foundation.
> + */
> +#include <linux/skbuff.h>
> +#include <linux/netdevice.h>
> +#include <linux/version.h>
> +#include <uapi/linux/bpf.h>
> +#include "bpf_helpers.h"
> +
> +typedef __u64 u64;
> +typedef __u32 u32;
> +
> +
> +/* kprobe is NOT a stable ABI
> + * kernel functions can be removed, renamed or completely change semantics.
> + * Number of arguments and their positions can change, etc.
> + * In such case this bpf+kprobe example will no longer be meaningful
> + */
> +
> +/* This will call bpf_get_current_pidns_info() to display pid and ns values
> + * as seen by the current namespace, on the far left you will see the pid as
> + * seen as by the root namespace.
> + */
> +
> +SEC("kprobe/__netif_receive_skb_core")
> +int bpf_prog1(struct pt_regs *ctx)
> +{
> + char fmt[] = "nsid:%u, dev: %u, pid:%u\n";
> + struct bpf_pidns_info nsinfo;
> + int ok = 0;
> +
> + ok = bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo));
> + if (ok == 0)
> + bpf_trace_printk(fmt, sizeof(fmt), (u32)nsinfo.nsid,
> + (u32) nsinfo.dev, (u32)nsinfo.pid);
> +
> + return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> +u32 _version SEC("version") = LINUX_VERSION_CODE;
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index dd5758dc35d3..8462f9881465 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
update to tools/../bpf.h should be separate patch as well.
At the end it should be:
p1 - feature introduction
p2 - update tools/.../bpf.h
p3 - selftest
Powered by blists - more mailing lists