>From 7b7759029fb2bbf6d65a6ac26bb78121e91a30f1 Mon Sep 17 00:00:00 2001 From: Carlos Date: Tue, 19 Mar 2019 18:00:56 -0300 Subject: [PATCH] [PATCH bpf-next 3/3] BPF: New helper to obtain namespace data from current task This helper obtains the active namespace from current and returns pid, tgid, device and namespace id as seen from that namespace, allowing to instrument a process inside a container. Device is read from /proc/self/ns/pid, as in the future it's possible that different pid_ns files may belong to different devices, according to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers conference. Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's scripts but this helper returns the pid as seen by the root namespace which is fine when a bcc script is not executed inside a container. When the process of interest is inside a container, pid filtering will not work if bpf_get_current_pid_tgid() is used. This helper addresses this limitation returning the pid as it's seen by the current namespace where the script is executing. This helper has the same use cases as bpf_get_current_pid_tgid() as it can be used to do pid filtering even inside a container. For example a bcc script using bpf_get_current_pid_tgid() (tools/funccount.py): u32 pid = bpf_get_current_pid_tgid() >> 32; if (pid != ) return 0; Could be modified to use bpf_get_current_pidns_info() as follows: struct bpf_pidns pidns; bpf_get_current_pidns_info(&pidns, sizeof(struct bpf_pidns)); u32 pid = pidns.tgid; u32 nsid = pidns.nsid; if ((pid != ) && (nsid != )) return 0; To find out the name PID namespace id of a process, you could use this command: $ ps -h -o pidns -p Or this other command: $ ls -Li /proc//ns/pid Signed-off-by: Carlos Antonio Neira Bustos --- --- samples/bpf/Makefile | 3 + samples/bpf/trace_ns_info_user.c | 35 ++++++ samples/bpf/trace_ns_info_user_kern.c | 44 +++++++ tools/testing/selftests/bpf/Makefile | 2 +- .../testing/selftests/bpf/progs/test_pidns_kern.c | 48 +++++++ tools/testing/selftests/bpf/test_pidns.c | 138 +++++++++++++++++++++ 6 files changed, 269 insertions(+), 1 deletion(-) create mode 100644 samples/bpf/trace_ns_info_user.c create mode 100644 samples/bpf/trace_ns_info_user_kern.c create mode 100644 tools/testing/selftests/bpf/progs/test_pidns_kern.c create mode 100644 tools/testing/selftests/bpf/test_pidns.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 65e667bdf979..4af5abc230e5 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -53,6 +53,7 @@ hostprogs-y += xdp_fwd hostprogs-y += task_fd_query hostprogs-y += xdp_sample_pkts hostprogs-y += hbm +hostprogs-y += trace_ns_info # Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a @@ -109,6 +110,7 @@ xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS) +trace_ns_info-objs := bpf_load.o trace_ns_info_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -167,6 +169,7 @@ always += xdp_fwd_kern.o always += task_fd_query_kern.o always += xdp_sample_pkts_kern.o always += hbm_out_kern.o +always += trace_ns_info_user_kern.o KBUILD_HOSTCFLAGS += -I$(objtree)/usr/include KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ diff --git a/samples/bpf/trace_ns_info_user.c b/samples/bpf/trace_ns_info_user.c new file mode 100644 index 000000000000..e06d08db6f30 --- /dev/null +++ b/samples/bpf/trace_ns_info_user.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Carlos Neira cneirabustos@gmail.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include +#include +#include +#include "bpf/libbpf.h" +#include "bpf_load.h" + +/* This code was taken verbatim from tracex1_user.c, it's used + * to exercize bpf_get_current_pidns_info() helper call. + */ +int main(int ac, char **argv) +{ + FILE *f; + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]); + printf("loading %s\n", filename); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + f = popen("taskset 1 ping localhost", "r"); + (void) f; + read_trace_pipe(); + return 0; +} diff --git a/samples/bpf/trace_ns_info_user_kern.c b/samples/bpf/trace_ns_info_user_kern.c new file mode 100644 index 000000000000..96675e02b707 --- /dev/null +++ b/samples/bpf/trace_ns_info_user_kern.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Carlos Neira cneirabustos@gmail.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include "bpf_helpers.h" + +typedef __u64 u64; +typedef __u32 u32; + + +/* kprobe is NOT a stable ABI + * kernel functions can be removed, renamed or completely change semantics. + * Number of arguments and their positions can change, etc. + * In such case this bpf+kprobe example will no longer be meaningful + */ + +/* This will call bpf_get_current_pidns_info() to display pid and ns values + * as seen by the current namespace, on the far left you will see the pid as + * seen as by the root namespace. + */ + +SEC("kprobe/__netif_receive_skb_core") +int bpf_prog1(struct pt_regs *ctx) +{ + char fmt[] = "nsid:%u, dev: %u, pid:%u\n"; + struct bpf_pidns_info nsinfo; + int ok = 0; + + ok = bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo)); + if (ok == 0) + bpf_trace_printk(fmt, sizeof(fmt), (u32)nsinfo.nsid, + (u32) nsinfo.dev, (u32)nsinfo.pid); + + return 0; +} +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 518cd587cd63..170d0f650318 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -23,7 +23,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ - test_netcnt test_tcpnotify_user test_sock_fields + test_netcnt test_tcpnotify_user test_sock_fields test_pidns BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) TEST_GEN_FILES = $(BPF_OBJ_FILES) diff --git a/tools/testing/selftests/bpf/progs/test_pidns_kern.c b/tools/testing/selftests/bpf/progs/test_pidns_kern.c new file mode 100644 index 000000000000..524a84f596e9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pidns_kern.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Carlos Neira cneirabustos@gmail.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include +#include +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") nsidmap = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") pidmap = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 1, +}; + +SEC("tracepoint/syscalls/sys_enter_nanosleep") +int trace(void *ctx) +{ + struct bpf_pidns_info nsinfo; + __u32 key = 0, *expected_pid, *val; + + if(bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo))) + return EINVAL; + + expected_pid = bpf_map_lookup_elem(&pidmap, &key); + if (!expected_pid || *expected_pid != nsinfo.pid) + return 0; + + val = bpf_map_lookup_elem(&nsidmap, &key); + if (val) + *val = nsinfo.nsid; + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/test_pidns.c b/tools/testing/selftests/bpf/test_pidns.c new file mode 100644 index 000000000000..5d62f6414397 --- /dev/null +++ b/tools/testing/selftests/bpf/test_pidns.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Carlos Neira cneirabustos@gmail.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "cgroup_helpers.h" +#include "bpf_rlimit.h" + +#define CHECK(condition, tag, format...) ({ \ + int __ret = !!(condition); \ + if (__ret) { \ + printf("%s:FAIL:%s ", __func__, tag); \ + printf(format); \ + } else { \ + printf("%s:PASS:%s\n", __func__, tag); \ + } \ + __ret; \ +}) + +static int bpf_find_map(const char *test, struct bpf_object *obj, + const char *name) +{ + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, name); + if (!map) + return -1; + return bpf_map__fd(map); +} + + +int main(int argc, char **argv) +{ + const char *probe_name = "syscalls/sys_enter_nanosleep"; + const char *file = "test_pidns_kern.o"; + int err, bytes, efd, prog_fd, pmu_fd; + int pidmap_fd, nsidmap_fd; + struct perf_event_attr attr = {}; + struct bpf_object *obj; + __u32 knsid = 0; + __u32 key = 0, pid; + int exit_code = 1; + struct stat st; + char buf[256]; + + err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno)) + goto cleanup_cgroup_env; + + nsidmap_fd = bpf_find_map(__func__, obj, "nsidmap"); + if (CHECK(nsidmap_fd < 0, "bpf_find_map", "err %d errno %d\n", + nsidmap_fd, errno)) + goto close_prog; + + pidmap_fd = bpf_find_map(__func__, obj, "pidmap"); + if (CHECK(pidmap_fd < 0, "bpf_find_map", "err %d errno %d\n", + pidmap_fd, errno)) + goto close_prog; + + pid = getpid(); + bpf_map_update_elem(pidmap_fd, &key, &pid, 0); + + snprintf(buf, sizeof(buf), + "/sys/kernel/debug/tracing/events/%s/id", probe_name); + efd = open(buf, O_RDONLY, 0); + if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno)) + goto close_prog; + bytes = read(efd, buf, sizeof(buf)); + close(efd); + if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read", + "bytes %d errno %d\n", bytes, errno)) + goto close_prog; + + attr.config = strtol(buf, NULL, 0); + attr.type = PERF_TYPE_TRACEPOINT; + attr.sample_type = PERF_SAMPLE_RAW; + attr.sample_period = 1; + attr.wakeup_events = 1; + + pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0); + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd, + errno)) + goto close_prog; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err, + errno)) + goto close_pmu; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err, + errno)) + goto close_pmu; + + /* trigger some syscalls */ + sleep(1); + + err = bpf_map_lookup_elem(nsidmap_fd, &key, &knsid); + if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno)) + goto close_pmu; + + if(stat("/proc/self/ns/pid",&st)) + goto close_pmu; + + if (CHECK(knsid != (__u32) st.st_ino, "compare_namespace_id", + "kern knsid %u user unsid %u\n", knsid, (__u32) st.st_ino)) + goto close_pmu; + + exit_code = 0; + printf("%s:PASS\n", argv[0]); + +close_pmu: + close(pmu_fd); +close_prog: + bpf_object__close(obj); +cleanup_cgroup_env: + return exit_code; +} -- 2.11.0