[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <32ad4312197becd442b8bf136ded8859dd593340.1478736685.git.daniel@iogearbox.net>
Date: Thu, 10 Nov 2016 01:20:59 +0100
From: Daniel Borkmann <daniel@...earbox.net>
To: stephen@...workplumber.org
Cc: tgraf@...g.ch, alexei.starovoitov@...il.com,
netdev@...r.kernel.org, Daniel Borkmann <daniel@...earbox.net>
Subject: [PATCH iproute2 -net-next] bpf: make tc's bpf loader generic and move into lib
This work moves the bpf loader into the iproute2 library and reworks
the tc specific parts into generic code. It's useful as we can then
more easily support new program types by just having the same ELF
loader backend. Joint work with Thomas Graf. I hacked a rough start
of a test suite to make sure nothing breaks [1] and looks all good.
[1] https://github.com/borkmann/clsact/blob/master/test_bpf.sh
Signed-off-by: Daniel Borkmann <daniel@...earbox.net>
Signed-off-by: Thomas Graf <tgraf@...g.ch>
---
Makefile | 13 +
configure | 2 +-
include/bpf_api.h | 26 +-
include/bpf_util.h | 95 +++
lib/Makefile | 2 +-
lib/bpf.c | 2262 ++++++++++++++++++++++++++++++++++++++++++++++++++++
tc/Makefile | 7 +-
tc/e_bpf.c | 2 +-
tc/f_bpf.c | 58 +-
tc/m_bpf.c | 47 +-
tc/tc_bpf.c | 2010 ----------------------------------------------
tc/tc_bpf.h | 82 --
12 files changed, 2467 insertions(+), 2139 deletions(-)
create mode 100644 include/bpf_util.h
create mode 100644 lib/bpf.c
delete mode 100644 tc/tc_bpf.c
delete mode 100644 tc/tc_bpf.h
diff --git a/Makefile b/Makefile
index fa200dd..37b68ad 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,8 @@
+# Include "Config" if already generated
+ifneq ($(wildcard Config),)
+include Config
+endif
+
ifndef VERBOSE
MAKEFLAGS += --no-print-directory
endif
@@ -7,6 +12,7 @@ LIBDIR?=$(PREFIX)/lib
SBINDIR?=/sbin
CONFDIR?=/etc/iproute2
DATADIR?=$(PREFIX)/share
+HDRDIR?=$(PREFIX)/include/iproute2
DOCDIR?=$(DATADIR)/doc/iproute2
MANDIR?=$(DATADIR)/man
ARPDDIR?=/var/lib/arpd
@@ -51,6 +57,11 @@ SUBDIRS=lib ip tc bridge misc netem genl tipc devlink man
LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a
LDLIBS += $(LIBNETLINK)
+ifeq ($(HAVE_ELF),y)
+CFLAGS += -DHAVE_ELF
+LDLIBS += -lelf
+endif
+
all: Config
@set -e; \
for i in $(SUBDIRS); \
@@ -63,6 +74,7 @@ install: all
install -m 0755 -d $(DESTDIR)$(SBINDIR)
install -m 0755 -d $(DESTDIR)$(CONFDIR)
install -m 0755 -d $(DESTDIR)$(ARPDDIR)
+ install -m 0755 -d $(DESTDIR)$(HDRDIR)
install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples
install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples/diffserv
install -m 0644 README.iproute2+tc $(shell find examples -maxdepth 1 -type f) \
@@ -73,6 +85,7 @@ install: all
install -m 0644 $(shell find etc/iproute2 -maxdepth 1 -type f) $(DESTDIR)$(CONFDIR)
install -m 0755 -d $(DESTDIR)$(BASH_COMPDIR)
install -m 0644 bash-completion/tc $(DESTDIR)$(BASH_COMPDIR)
+ install -m 0644 include/bpf_elf.h $(DESTDIR)$(HDRDIR)
snapshot:
echo "static const char SNAPSHOT[] = \""`date +%y%m%d`"\";" \
diff --git a/configure b/configure
index c978da3..6c431c3 100755
--- a/configure
+++ b/configure
@@ -272,7 +272,7 @@ EOF
if $CC -I$INCLUDE -o $TMPDIR/elftest $TMPDIR/elftest.c -lelf >/dev/null 2>&1
then
- echo "TC_CONFIG_ELF:=y" >>Config
+ echo "HAVE_ELF:=y" >>Config
echo "yes"
else
echo "no"
diff --git a/include/bpf_api.h b/include/bpf_api.h
index 1b250d2..7642623 100644
--- a/include/bpf_api.h
+++ b/include/bpf_api.h
@@ -107,9 +107,14 @@
/** BPF helper functions for tc. Individual flags are in linux/bpf.h */
+#ifndef __BPF_FUNC
+# define __BPF_FUNC(NAME, ...) \
+ (* NAME)(__VA_ARGS__) __maybe_unused
+#endif
+
#ifndef BPF_FUNC
# define BPF_FUNC(NAME, ...) \
- (* NAME)(__VA_ARGS__) __maybe_unused = (void *) BPF_FUNC_##NAME
+ __BPF_FUNC(NAME, __VA_ARGS__) = (void *) BPF_FUNC_##NAME
#endif
/* Map access/manipulation */
@@ -147,10 +152,15 @@ static void BPF_FUNC(tail_call, struct __sk_buff *skb, void *map,
/* System helpers */
static uint32_t BPF_FUNC(get_smp_processor_id);
+static uint32_t BPF_FUNC(get_numa_node_id);
/* Packet misc meta data */
static uint32_t BPF_FUNC(get_cgroup_classid, struct __sk_buff *skb);
+static int BPF_FUNC(skb_under_cgroup, void *map, uint32_t index);
+
static uint32_t BPF_FUNC(get_route_realm, struct __sk_buff *skb);
+static uint32_t BPF_FUNC(get_hash_recalc, struct __sk_buff *skb);
+static uint32_t BPF_FUNC(set_hash_invalid, struct __sk_buff *skb);
/* Packet redirection */
static int BPF_FUNC(redirect, int ifindex, uint32_t flags);
@@ -169,6 +179,20 @@ static int BPF_FUNC(l4_csum_replace, struct __sk_buff *skb, uint32_t off,
uint32_t from, uint32_t to, uint32_t flags);
static int BPF_FUNC(csum_diff, const void *from, uint32_t from_size,
const void *to, uint32_t to_size, uint32_t seed);
+static int BPF_FUNC(csum_update, struct __sk_buff *skb, uint32_t wsum);
+
+static int BPF_FUNC(skb_change_type, struct __sk_buff *skb, uint32_t type);
+static int BPF_FUNC(skb_change_proto, struct __sk_buff *skb, uint32_t proto,
+ uint32_t flags);
+static int BPF_FUNC(skb_change_tail, struct __sk_buff *skb, uint32_t nlen,
+ uint32_t flags);
+
+static int BPF_FUNC(skb_pull_data, struct __sk_buff *skb, uint32_t len);
+
+/* Event notification */
+static int __BPF_FUNC(skb_event_output, struct __sk_buff *skb, void *map,
+ uint64_t index, const void *data, uint32_t size) =
+ (void *) BPF_FUNC_perf_event_output;
/* Packet vlan encap/decap */
static int BPF_FUNC(skb_vlan_push, struct __sk_buff *skb, uint16_t proto,
diff --git a/include/bpf_util.h b/include/bpf_util.h
new file mode 100644
index 0000000..05baeec
--- /dev/null
+++ b/include/bpf_util.h
@@ -0,0 +1,95 @@
+/*
+ * bpf_util.h BPF common code
+ *
+ * This program is free software; you can distribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Daniel Borkmann <daniel@...earbox.net>
+ * Jiri Pirko <jiri@...nulli.us>
+ */
+
+#ifndef __BPF_UTIL__
+#define __BPF_UTIL__
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/magic.h>
+#include <linux/elf-em.h>
+#include <linux/if_alg.h>
+
+#include "utils.h"
+#include "bpf_scm.h"
+
+#define BPF_ENV_UDS "TC_BPF_UDS"
+#define BPF_ENV_MNT "TC_BPF_MNT"
+
+#ifndef BPF_MAX_LOG
+# define BPF_MAX_LOG 4096
+#endif
+
+#define BPF_DIR_GLOBALS "globals"
+
+#ifndef BPF_FS_MAGIC
+# define BPF_FS_MAGIC 0xcafe4a11
+#endif
+
+#define BPF_DIR_MNT "/sys/fs/bpf"
+
+#ifndef TRACEFS_MAGIC
+# define TRACEFS_MAGIC 0x74726163
+#endif
+
+#define TRACE_DIR_MNT "/sys/kernel/tracing"
+
+#ifndef AF_ALG
+# define AF_ALG 38
+#endif
+
+#ifndef EM_BPF
+# define EM_BPF 247
+#endif
+
+struct bpf_cfg_ops {
+ void (*cbpf_cb)(void *nl, const struct sock_filter *ops, int ops_len);
+ void (*ebpf_cb)(void *nl, int fd, const char *annotation);
+};
+
+struct bpf_cfg_in {
+ const char *object;
+ const char *section;
+ const char *uds;
+ int argc;
+ char **argv;
+ struct sock_filter *ops;
+};
+
+int bpf_parse_common(enum bpf_prog_type type, struct bpf_cfg_in *cfg,
+ const struct bpf_cfg_ops *ops, void *nl);
+
+const char *bpf_prog_to_default_section(enum bpf_prog_type type);
+
+int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv);
+int bpf_trace_pipe(void);
+
+void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
+
+#ifdef HAVE_ELF
+int bpf_send_map_fds(const char *path, const char *obj);
+int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
+ unsigned int entries);
+#else
+static inline int bpf_send_map_fds(const char *path, const char *obj)
+{
+ return 0;
+}
+
+static inline int bpf_recv_map_fds(const char *path, int *fds,
+ struct bpf_map_aux *aux,
+ unsigned int entries)
+{
+ return -1;
+}
+#endif /* HAVE_ELF */
+#endif /* __BPF_UTIL__ */
diff --git a/lib/Makefile b/lib/Makefile
index 52e016d..5b7ec16 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -8,7 +8,7 @@ CFLAGS += -fPIC
UTILOBJ = utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o \
inet_proto.o namespace.o json_writer.o \
- names.o color.o
+ names.o color.o bpf.o
NLOBJ=libgenl.o ll_map.o libnetlink.o
diff --git a/lib/bpf.c b/lib/bpf.c
new file mode 100644
index 0000000..8a5b84b
--- /dev/null
+++ b/lib/bpf.c
@@ -0,0 +1,2262 @@
+/*
+ * bpf.c BPF common code
+ *
+ * This program is free software; you can distribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Daniel Borkmann <daniel@...earbox.net>
+ * Jiri Pirko <jiri@...nulli.us>
+ * Alexei Starovoitov <ast@...nel.org>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <assert.h>
+
+#ifdef HAVE_ELF
+#include <libelf.h>
+#include <gelf.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/un.h>
+#include <sys/vfs.h>
+#include <sys/mount.h>
+#include <sys/syscall.h>
+#include <sys/sendfile.h>
+#include <sys/resource.h>
+
+#include <arpa/inet.h>
+
+#include "utils.h"
+
+#include "bpf_util.h"
+#include "bpf_elf.h"
+#include "bpf_scm.h"
+
+struct bpf_prog_meta {
+ const char *type;
+ const char *subdir;
+ const char *section;
+ bool may_uds_export;
+};
+
+static const enum bpf_prog_type __bpf_types[] = {
+ BPF_PROG_TYPE_SCHED_CLS,
+ BPF_PROG_TYPE_SCHED_ACT,
+};
+
+static const struct bpf_prog_meta __bpf_prog_meta[] = {
+ [BPF_PROG_TYPE_SCHED_CLS] = {
+ .type = "cls",
+ .subdir = "tc",
+ .section = ELF_SECTION_CLASSIFIER,
+ .may_uds_export = true,
+ },
+ [BPF_PROG_TYPE_SCHED_ACT] = {
+ .type = "act",
+ .subdir = "tc",
+ .section = ELF_SECTION_ACTION,
+ .may_uds_export = true,
+ },
+};
+
+static const char *bpf_prog_to_subdir(enum bpf_prog_type type)
+{
+ assert(type < ARRAY_SIZE(__bpf_prog_meta) &&
+ __bpf_prog_meta[type].subdir);
+ return __bpf_prog_meta[type].subdir;
+}
+
+const char *bpf_prog_to_default_section(enum bpf_prog_type type)
+{
+ assert(type < ARRAY_SIZE(__bpf_prog_meta) &&
+ __bpf_prog_meta[type].section);
+ return __bpf_prog_meta[type].section;
+}
+
+#ifdef HAVE_ELF
+static int bpf_obj_open(const char *path, enum bpf_prog_type type,
+ const char *sec, bool verbose);
+#else
+static int bpf_obj_open(const char *path, enum bpf_prog_type type,
+ const char *sec, bool verbose)
+{
+ fprintf(stderr, "No ELF library support compiled in.\n");
+ errno = ENOSYS;
+ return -1;
+}
+#endif
+
+static inline __u64 bpf_ptr_to_u64(const void *ptr)
+{
+ return (__u64)(unsigned long)ptr;
+}
+
+static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
+{
+#ifdef __NR_bpf
+ return syscall(__NR_bpf, cmd, attr, size);
+#else
+ fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+static int bpf_map_update(int fd, const void *key, const void *value,
+ uint64_t flags)
+{
+ union bpf_attr attr = {};
+
+ attr.map_fd = fd;
+ attr.key = bpf_ptr_to_u64(key);
+ attr.value = bpf_ptr_to_u64(value);
+ attr.flags = flags;
+
+ return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
+
+static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
+ char **bpf_string, bool *need_release,
+ const char separator)
+{
+ char sp;
+
+ if (from_file) {
+ size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
+ char *tmp_string, *last;
+ FILE *fp;
+
+ tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
+ tmp_string = calloc(1, tmp_len);
+ if (tmp_string == NULL)
+ return -ENOMEM;
+
+ fp = fopen(arg, "r");
+ if (fp == NULL) {
+ perror("Cannot fopen");
+ free(tmp_string);
+ return -ENOENT;
+ }
+
+ if (!fgets(tmp_string, tmp_len, fp)) {
+ free(tmp_string);
+ fclose(fp);
+ return -EIO;
+ }
+
+ fclose(fp);
+
+ last = &tmp_string[strlen(tmp_string) - 1];
+ if (*last == '\n')
+ *last = 0;
+
+ *need_release = true;
+ *bpf_string = tmp_string;
+ } else {
+ *need_release = false;
+ *bpf_string = arg;
+ }
+
+ if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
+ sp != separator) {
+ if (*need_release)
+ free(*bpf_string);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops,
+ bool from_file)
+{
+ char *bpf_string, *token, separator = ',';
+ int ret = 0, i = 0;
+ bool need_release;
+ __u16 bpf_len = 0;
+
+ if (argc < 1)
+ return -EINVAL;
+ if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
+ &need_release, separator))
+ return -EINVAL;
+ if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ token = bpf_string;
+ while ((token = strchr(token, separator)) && (++token)[0]) {
+ if (i >= bpf_len) {
+ fprintf(stderr, "Real program length exceeds encoded length parameter!\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (sscanf(token, "%hu %hhu %hhu %u,",
+ &bpf_ops[i].code, &bpf_ops[i].jt,
+ &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
+ fprintf(stderr, "Error at instruction %d!\n", i);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ i++;
+ }
+
+ if (i != bpf_len) {
+ fprintf(stderr, "Parsed program length is less than encoded length parameter!\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = bpf_len;
+out:
+ if (need_release)
+ free(bpf_string);
+
+ return ret;
+}
+
+void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
+{
+ struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops);
+ int i;
+
+ if (len == 0)
+ return;
+
+ fprintf(f, "bytecode \'%u,", len);
+
+ for (i = 0; i < len - 1; i++)
+ fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
+ ops[i].jf, ops[i].k);
+
+ fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt,
+ ops[i].jf, ops[i].k);
+}
+
+static void bpf_map_pin_report(const struct bpf_elf_map *pin,
+ const struct bpf_elf_map *obj)
+{
+ fprintf(stderr, "Map specification differs from pinned file!\n");
+
+ if (obj->type != pin->type)
+ fprintf(stderr, " - Type: %u (obj) != %u (pin)\n",
+ obj->type, pin->type);
+ if (obj->size_key != pin->size_key)
+ fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n",
+ obj->size_key, pin->size_key);
+ if (obj->size_value != pin->size_value)
+ fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n",
+ obj->size_value, pin->size_value);
+ if (obj->max_elem != pin->max_elem)
+ fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n",
+ obj->max_elem, pin->max_elem);
+ if (obj->flags != pin->flags)
+ fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n",
+ obj->flags, pin->flags);
+
+ fprintf(stderr, "\n");
+}
+
+static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
+ int length)
+{
+ char file[PATH_MAX], buff[4096];
+ struct bpf_elf_map tmp = {}, zero = {};
+ unsigned int val;
+ FILE *fp;
+
+ snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
+
+ fp = fopen(file, "r");
+ if (!fp) {
+ fprintf(stderr, "No procfs support?!\n");
+ return -EIO;
+ }
+
+ while (fgets(buff, sizeof(buff), fp)) {
+ if (sscanf(buff, "map_type:\t%u", &val) == 1)
+ tmp.type = val;
+ else if (sscanf(buff, "key_size:\t%u", &val) == 1)
+ tmp.size_key = val;
+ else if (sscanf(buff, "value_size:\t%u", &val) == 1)
+ tmp.size_value = val;
+ else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
+ tmp.max_elem = val;
+ else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
+ tmp.flags = val;
+ }
+
+ fclose(fp);
+
+ if (!memcmp(&tmp, map, length)) {
+ return 0;
+ } else {
+ /* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
+ * so just accept it. We know we do have an eBPF fd and in this
+ * case, everything is 0. It is guaranteed that no such map exists
+ * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
+ */
+ if (!memcmp(&tmp, &zero, length))
+ return 0;
+
+ bpf_map_pin_report(&tmp, map);
+ return -EINVAL;
+ }
+}
+
+static int bpf_mnt_fs(const char *target)
+{
+ bool bind_done = false;
+
+ while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
+ if (errno != EINVAL || bind_done) {
+ fprintf(stderr, "mount --make-private %s failed: %s\n",
+ target, strerror(errno));
+ return -1;
+ }
+
+ if (mount(target, target, "none", MS_BIND, NULL)) {
+ fprintf(stderr, "mount --bind %s %s failed: %s\n",
+ target, target, strerror(errno));
+ return -1;
+ }
+
+ bind_done = true;
+ }
+
+ if (mount("bpf", target, "bpf", 0, "mode=0700")) {
+ fprintf(stderr, "mount -t bpf bpf %s failed: %s\n",
+ target, strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
+{
+ struct statfs st_fs;
+
+ if (statfs(mnt, &st_fs) < 0)
+ return -ENOENT;
+ if ((unsigned long)st_fs.f_type != magic)
+ return -ENOENT;
+
+ return 0;
+}
+
+static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
+ char *mnt, int len,
+ const char * const *known_mnts)
+{
+ const char * const *ptr;
+ char type[100];
+ FILE *fp;
+
+ if (known_mnts) {
+ ptr = known_mnts;
+ while (*ptr) {
+ if (bpf_valid_mntpt(*ptr, magic) == 0) {
+ strncpy(mnt, *ptr, len - 1);
+ mnt[len - 1] = 0;
+ return mnt;
+ }
+ ptr++;
+ }
+ }
+
+ fp = fopen("/proc/mounts", "r");
+ if (fp == NULL || len != PATH_MAX)
+ return NULL;
+
+ while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n",
+ mnt, type) == 2) {
+ if (strcmp(type, fstype) == 0)
+ break;
+ }
+
+ fclose(fp);
+ if (strcmp(type, fstype) != 0)
+ return NULL;
+
+ return mnt;
+}
+
+int bpf_trace_pipe(void)
+{
+ char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT;
+ static const char * const tracefs_known_mnts[] = {
+ TRACE_DIR_MNT,
+ "/sys/kernel/debug/tracing",
+ "/tracing",
+ "/trace",
+ 0,
+ };
+ char tpipe[PATH_MAX];
+ const char *mnt;
+ int fd;
+
+ mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt,
+ sizeof(tracefs_mnt), tracefs_known_mnts);
+ if (!mnt) {
+ fprintf(stderr, "tracefs not mounted?\n");
+ return -1;
+ }
+
+ snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt);
+
+ fd = open(tpipe, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ fprintf(stderr, "Running! Hang up with ^C!\n\n");
+ while (1) {
+ static char buff[4096];
+ ssize_t ret;
+
+ ret = read(fd, buff, sizeof(buff) - 1);
+ if (ret > 0) {
+ write(2, buff, ret);
+ fflush(stderr);
+ }
+ }
+
+ return 0;
+}
+
+static int bpf_gen_global(const char *bpf_sub_dir)
+{
+ char bpf_glo_dir[PATH_MAX];
+ int ret;
+
+ snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s/",
+ bpf_sub_dir, BPF_DIR_GLOBALS);
+
+ ret = mkdir(bpf_glo_dir, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir,
+ strerror(errno));
+ return ret;
+ }
+
+ return 0;
+}
+
+static int bpf_gen_master(const char *base, const char *name)
+{
+ char bpf_sub_dir[PATH_MAX];
+ int ret;
+
+ snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s/", base, name);
+
+ ret = mkdir(bpf_sub_dir, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", bpf_sub_dir,
+ strerror(errno));
+ return ret;
+ }
+
+ return bpf_gen_global(bpf_sub_dir);
+}
+
+static int bpf_slave_via_bind_mnt(const char *full_name,
+ const char *full_link)
+{
+ int ret;
+
+ ret = mkdir(full_name, S_IRWXU);
+ if (ret) {
+ assert(errno != EEXIST);
+ fprintf(stderr, "mkdir %s failed: %s\n", full_name,
+ strerror(errno));
+ return ret;
+ }
+
+ ret = mount(full_link, full_name, "none", MS_BIND, NULL);
+ if (ret) {
+ rmdir(full_name);
+ fprintf(stderr, "mount --bind %s %s failed: %s\n",
+ full_link, full_name, strerror(errno));
+ }
+
+ return ret;
+}
+
+static int bpf_gen_slave(const char *base, const char *name,
+ const char *link)
+{
+ char bpf_lnk_dir[PATH_MAX];
+ char bpf_sub_dir[PATH_MAX];
+ struct stat sb = {};
+ int ret;
+
+ snprintf(bpf_lnk_dir, sizeof(bpf_lnk_dir), "%s%s/", base, link);
+ snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s", base, name);
+
+ ret = symlink(bpf_lnk_dir, bpf_sub_dir);
+ if (ret) {
+ if (errno != EEXIST) {
+ if (errno != EPERM) {
+ fprintf(stderr, "symlink %s failed: %s\n",
+ bpf_sub_dir, strerror(errno));
+ return ret;
+ }
+
+ return bpf_slave_via_bind_mnt(bpf_sub_dir,
+ bpf_lnk_dir);
+ }
+
+ ret = lstat(bpf_sub_dir, &sb);
+ if (ret) {
+ fprintf(stderr, "lstat %s failed: %s\n",
+ bpf_sub_dir, strerror(errno));
+ return ret;
+ }
+
+ if ((sb.st_mode & S_IFMT) != S_IFLNK)
+ return bpf_gen_global(bpf_sub_dir);
+ }
+
+ return 0;
+}
+
+static int bpf_gen_hierarchy(const char *base)
+{
+ int ret, i;
+
+ ret = bpf_gen_master(base, bpf_prog_to_subdir(__bpf_types[0]));
+ for (i = 1; i < ARRAY_SIZE(__bpf_types) && !ret; i++)
+ ret = bpf_gen_slave(base,
+ bpf_prog_to_subdir(__bpf_types[i]),
+ bpf_prog_to_subdir(__bpf_types[0]));
+ return ret;
+}
+
+static const char *bpf_get_work_dir(enum bpf_prog_type type)
+{
+ static char bpf_tmp[PATH_MAX] = BPF_DIR_MNT;
+ static char bpf_wrk_dir[PATH_MAX];
+ static const char *mnt;
+ static bool bpf_mnt_cached;
+ static const char * const bpf_known_mnts[] = {
+ BPF_DIR_MNT,
+ "/bpf",
+ 0,
+ };
+ int ret;
+
+ if (bpf_mnt_cached) {
+ const char *out = mnt;
+
+ if (out) {
+ snprintf(bpf_tmp, sizeof(bpf_tmp), "%s%s/",
+ out, bpf_prog_to_subdir(type));
+ out = bpf_tmp;
+ }
+ return out;
+ }
+
+ mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_tmp, sizeof(bpf_tmp),
+ bpf_known_mnts);
+ if (!mnt) {
+ mnt = getenv(BPF_ENV_MNT);
+ if (!mnt)
+ mnt = BPF_DIR_MNT;
+ ret = bpf_mnt_fs(mnt);
+ if (ret) {
+ mnt = NULL;
+ goto out;
+ }
+ }
+
+ snprintf(bpf_wrk_dir, sizeof(bpf_wrk_dir), "%s/", mnt);
+
+ ret = bpf_gen_hierarchy(bpf_wrk_dir);
+ if (ret) {
+ mnt = NULL;
+ goto out;
+ }
+
+ mnt = bpf_wrk_dir;
+out:
+ bpf_mnt_cached = true;
+ return mnt;
+}
+
+static int bpf_obj_get(const char *pathname, enum bpf_prog_type type)
+{
+ union bpf_attr attr = {};
+ char tmp[PATH_MAX];
+
+ if (strlen(pathname) > 2 && pathname[0] == 'm' &&
+ pathname[1] == ':' && bpf_get_work_dir(type)) {
+ snprintf(tmp, sizeof(tmp), "%s/%s",
+ bpf_get_work_dir(type), pathname + 2);
+ pathname = tmp;
+ }
+
+ attr.pathname = bpf_ptr_to_u64(pathname);
+
+ return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+}
+
+enum bpf_mode {
+ CBPF_BYTECODE,
+ CBPF_FILE,
+ EBPF_OBJECT,
+ EBPF_PINNED,
+ BPF_MODE_MAX,
+};
+
+static int bpf_parse(enum bpf_prog_type *type, enum bpf_mode *mode,
+ struct bpf_cfg_in *cfg, const bool *opt_tbl)
+{
+ const char *file, *section, *uds_name;
+ bool verbose = false;
+ int i, ret, argc;
+ char **argv;
+
+ argv = cfg->argv;
+ argc = cfg->argc;
+
+ if (opt_tbl[CBPF_BYTECODE] &&
+ (matches(*argv, "bytecode") == 0 ||
+ strcmp(*argv, "bc") == 0)) {
+ *mode = CBPF_BYTECODE;
+ } else if (opt_tbl[CBPF_FILE] &&
+ (matches(*argv, "bytecode-file") == 0 ||
+ strcmp(*argv, "bcf") == 0)) {
+ *mode = CBPF_FILE;
+ } else if (opt_tbl[EBPF_OBJECT] &&
+ (matches(*argv, "object-file") == 0 ||
+ strcmp(*argv, "obj") == 0)) {
+ *mode = EBPF_OBJECT;
+ } else if (opt_tbl[EBPF_PINNED] &&
+ (matches(*argv, "object-pinned") == 0 ||
+ matches(*argv, "pinned") == 0 ||
+ matches(*argv, "fd") == 0)) {
+ *mode = EBPF_PINNED;
+ } else {
+ fprintf(stderr, "What mode is \"%s\"?\n", *argv);
+ return -1;
+ }
+
+ NEXT_ARG();
+ file = section = uds_name = NULL;
+ if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) {
+ file = *argv;
+ NEXT_ARG_FWD();
+
+ if (*type == BPF_PROG_TYPE_UNSPEC) {
+ if (argc > 0 && matches(*argv, "type") == 0) {
+ NEXT_ARG();
+ for (i = 0; i < ARRAY_SIZE(__bpf_prog_meta);
+ i++) {
+ if (!__bpf_prog_meta[i].type)
+ continue;
+ if (!matches(*argv,
+ __bpf_prog_meta[i].type)) {
+ *type = i;
+ break;
+ }
+ }
+
+ if (*type == BPF_PROG_TYPE_UNSPEC) {
+ fprintf(stderr, "What type is \"%s\"?\n",
+ *argv);
+ return -1;
+ }
+ NEXT_ARG_FWD();
+ } else {
+ *type = BPF_PROG_TYPE_SCHED_CLS;
+ }
+ }
+
+ section = bpf_prog_to_default_section(*type);
+ if (argc > 0 && matches(*argv, "section") == 0) {
+ NEXT_ARG();
+ section = *argv;
+ NEXT_ARG_FWD();
+ }
+
+ if (__bpf_prog_meta[*type].may_uds_export) {
+ uds_name = getenv(BPF_ENV_UDS);
+ if (argc > 0 && !uds_name &&
+ matches(*argv, "export") == 0) {
+ NEXT_ARG();
+ uds_name = *argv;
+ NEXT_ARG_FWD();
+ }
+ }
+
+ if (argc > 0 && matches(*argv, "verbose") == 0) {
+ verbose = true;
+ NEXT_ARG_FWD();
+ }
+
+ PREV_ARG();
+ }
+
+ if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE)
+ ret = bpf_ops_parse(argc, argv, cfg->ops, *mode == CBPF_FILE);
+ else if (*mode == EBPF_OBJECT)
+ ret = bpf_obj_open(file, *type, section, verbose);
+ else if (*mode == EBPF_PINNED)
+ ret = bpf_obj_get(file, *type);
+ else
+ return -1;
+
+ cfg->object = file;
+ cfg->section = section;
+ cfg->uds = uds_name;
+ cfg->argc = argc;
+ cfg->argv = argv;
+
+ return ret;
+}
+
+static int bpf_parse_opt_tbl(enum bpf_prog_type type, struct bpf_cfg_in *cfg,
+ const struct bpf_cfg_ops *ops, void *nl,
+ const bool *opt_tbl)
+{
+ struct sock_filter opcodes[BPF_MAXINSNS];
+ char annotation[256];
+ enum bpf_mode mode;
+ int ret;
+
+ cfg->ops = opcodes;
+ ret = bpf_parse(&type, &mode, cfg, opt_tbl);
+ cfg->ops = NULL;
+ if (ret < 0)
+ return ret;
+
+ if (mode == CBPF_BYTECODE || mode == CBPF_FILE)
+ ops->cbpf_cb(nl, opcodes, ret);
+ if (mode == EBPF_OBJECT || mode == EBPF_PINNED) {
+ snprintf(annotation, sizeof(annotation), "%s:[%s]",
+ basename(cfg->object), mode == EBPF_PINNED ?
+ "*fsobj" : cfg->section);
+ ops->ebpf_cb(nl, ret, annotation);
+ }
+
+ return 0;
+}
+
+int bpf_parse_common(enum bpf_prog_type type, struct bpf_cfg_in *cfg,
+ const struct bpf_cfg_ops *ops, void *nl)
+{
+ bool opt_tbl[BPF_MODE_MAX] = {};
+
+ if (ops->cbpf_cb) {
+ opt_tbl[CBPF_BYTECODE] = true;
+ opt_tbl[CBPF_FILE] = true;
+ }
+
+ if (ops->ebpf_cb) {
+ opt_tbl[EBPF_OBJECT] = true;
+ opt_tbl[EBPF_PINNED] = true;
+ }
+
+ return bpf_parse_opt_tbl(type, cfg, ops, nl, opt_tbl);
+}
+
+int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
+{
+ enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC;
+ const bool opt_tbl[BPF_MODE_MAX] = {
+ [EBPF_OBJECT] = true,
+ [EBPF_PINNED] = true,
+ };
+ const struct bpf_elf_map test = {
+ .type = BPF_MAP_TYPE_PROG_ARRAY,
+ .size_key = sizeof(int),
+ .size_value = sizeof(int),
+ };
+ struct bpf_cfg_in cfg = {
+ .argc = argc,
+ .argv = argv,
+ };
+ int ret, prog_fd, map_fd;
+ enum bpf_mode mode;
+ uint32_t map_key;
+
+ prog_fd = bpf_parse(&type, &mode, &cfg, opt_tbl);
+ if (prog_fd < 0)
+ return prog_fd;
+ if (key) {
+ map_key = *key;
+ } else {
+ ret = sscanf(cfg.section, "%*i/%i", &map_key);
+ if (ret != 1) {
+ fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n");
+ ret = -EINVAL;
+ goto out_prog;
+ }
+ }
+
+ map_fd = bpf_obj_get(map_path, type);
+ if (map_fd < 0) {
+ fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n",
+ map_path, strerror(errno));
+ ret = map_fd;
+ goto out_prog;
+ }
+
+ ret = bpf_map_selfcheck_pinned(map_fd, &test,
+ offsetof(struct bpf_elf_map, max_elem));
+ if (ret < 0) {
+ fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path);
+ goto out_map;
+ }
+
+ ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY);
+ if (ret < 0)
+ fprintf(stderr, "Map update failed: %s\n", strerror(errno));
+out_map:
+ close(map_fd);
+out_prog:
+ close(prog_fd);
+ return ret;
+}
+
+#ifdef HAVE_ELF
+struct bpf_elf_prog {
+ enum bpf_prog_type type;
+ const struct bpf_insn *insns;
+ size_t size;
+ const char *license;
+};
+
+struct bpf_hash_entry {
+ unsigned int pinning;
+ const char *subpath;
+ struct bpf_hash_entry *next;
+};
+
+struct bpf_elf_ctx {
+ Elf *elf_fd;
+ GElf_Ehdr elf_hdr;
+ Elf_Data *sym_tab;
+ Elf_Data *str_tab;
+ int obj_fd;
+ int map_fds[ELF_MAX_MAPS];
+ struct bpf_elf_map maps[ELF_MAX_MAPS];
+ int sym_num;
+ int map_num;
+ int map_len;
+ bool *sec_done;
+ int sec_maps;
+ char license[ELF_MAX_LICENSE_LEN];
+ enum bpf_prog_type type;
+ bool verbose;
+ struct bpf_elf_st stat;
+ struct bpf_hash_entry *ht[256];
+ char *log;
+ size_t log_size;
+};
+
+struct bpf_elf_sec_data {
+ GElf_Shdr sec_hdr;
+ Elf_Data *sec_data;
+ const char *sec_name;
+};
+
+struct bpf_map_data {
+ int *fds;
+ const char *obj;
+ struct bpf_elf_st *st;
+ struct bpf_elf_map *ent;
+};
+
+static __check_format_string(2, 3) void
+bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...)
+{
+ va_list vl;
+
+ va_start(vl, format);
+ vfprintf(stderr, format, vl);
+ va_end(vl);
+
+ if (ctx->log && ctx->log[0]) {
+ if (ctx->verbose) {
+ fprintf(stderr, "%s\n", ctx->log);
+ } else {
+ unsigned int off = 0, len = strlen(ctx->log);
+
+ if (len > BPF_MAX_LOG) {
+ off = len - BPF_MAX_LOG;
+ fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n",
+ off);
+ }
+ fprintf(stderr, "%s\n", ctx->log + off);
+ }
+
+ memset(ctx->log, 0, ctx->log_size);
+ }
+}
+
+static int bpf_log_realloc(struct bpf_elf_ctx *ctx)
+{
+ size_t log_size = ctx->log_size;
+ void *ptr;
+
+ if (!ctx->log) {
+ log_size = 65536;
+ } else {
+ log_size <<= 1;
+ if (log_size > (UINT_MAX >> 8))
+ return -EINVAL;
+ }
+
+ ptr = realloc(ctx->log, log_size);
+ if (!ptr)
+ return -ENOMEM;
+
+ ctx->log = ptr;
+ ctx->log_size = log_size;
+
+ return 0;
+}
+
+static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
+ uint32_t size_value, uint32_t max_elem,
+ uint32_t flags)
+{
+ union bpf_attr attr = {};
+
+ attr.map_type = type;
+ attr.key_size = size_key;
+ attr.value_size = size_value;
+ attr.max_entries = max_elem;
+ attr.map_flags = flags;
+
+ return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t size_insns, const char *license, char *log,
+ size_t size_log)
+{
+ union bpf_attr attr = {};
+
+ attr.prog_type = type;
+ attr.insns = bpf_ptr_to_u64(insns);
+ attr.insn_cnt = size_insns / sizeof(struct bpf_insn);
+ attr.license = bpf_ptr_to_u64(license);
+
+ if (size_log > 0) {
+ attr.log_buf = bpf_ptr_to_u64(log);
+ attr.log_size = size_log;
+ attr.log_level = 1;
+ }
+
+ return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+static int bpf_obj_pin(int fd, const char *pathname)
+{
+ union bpf_attr attr = {};
+
+ attr.pathname = bpf_ptr_to_u64(pathname);
+ attr.bpf_fd = fd;
+
+ return bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+static int bpf_obj_hash(const char *object, uint8_t *out, size_t len)
+{
+ struct sockaddr_alg alg = {
+ .salg_family = AF_ALG,
+ .salg_type = "hash",
+ .salg_name = "sha1",
+ };
+ int ret, cfd, ofd, ffd;
+ struct stat stbuff;
+ ssize_t size;
+
+ if (!object || len != 20)
+ return -EINVAL;
+
+ cfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ if (cfd < 0) {
+ fprintf(stderr, "Cannot get AF_ALG socket: %s\n",
+ strerror(errno));
+ return cfd;
+ }
+
+ ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg));
+ if (ret < 0) {
+ fprintf(stderr, "Error binding socket: %s\n", strerror(errno));
+ goto out_cfd;
+ }
+
+ ofd = accept(cfd, NULL, 0);
+ if (ofd < 0) {
+ fprintf(stderr, "Error accepting socket: %s\n",
+ strerror(errno));
+ ret = ofd;
+ goto out_cfd;
+ }
+
+ ffd = open(object, O_RDONLY);
+ if (ffd < 0) {
+ fprintf(stderr, "Error opening object %s: %s\n",
+ object, strerror(errno));
+ ret = ffd;
+ goto out_ofd;
+ }
+
+ ret = fstat(ffd, &stbuff);
+ if (ret < 0) {
+ fprintf(stderr, "Error doing fstat: %s\n",
+ strerror(errno));
+ goto out_ffd;
+ }
+
+ size = sendfile(ofd, ffd, NULL, stbuff.st_size);
+ if (size != stbuff.st_size) {
+ fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n",
+ size, stbuff.st_size, strerror(errno));
+ ret = -1;
+ goto out_ffd;
+ }
+
+ size = read(ofd, out, len);
+ if (size != len) {
+ fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n",
+ size, len, strerror(errno));
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+out_ffd:
+ close(ffd);
+out_ofd:
+ close(ofd);
+out_cfd:
+ close(cfd);
+ return ret;
+}
+
+static const char *bpf_get_obj_uid(const char *pathname)
+{
+ static bool bpf_uid_cached;
+ static char bpf_uid[64];
+ uint8_t tmp[20];
+ int ret;
+
+ if (bpf_uid_cached)
+ goto done;
+
+ ret = bpf_obj_hash(pathname, tmp, sizeof(tmp));
+ if (ret) {
+ fprintf(stderr, "Object hashing failed!\n");
+ return NULL;
+ }
+
+ hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid));
+ bpf_uid_cached = true;
+done:
+ return bpf_uid;
+}
+
+static int bpf_init_env(const char *pathname)
+{
+ struct rlimit limit = {
+ .rlim_cur = RLIM_INFINITY,
+ .rlim_max = RLIM_INFINITY,
+ };
+
+ /* Don't bother in case we fail! */
+ setrlimit(RLIMIT_MEMLOCK, &limit);
+
+ if (!bpf_get_work_dir(BPF_PROG_TYPE_UNSPEC)) {
+ fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n");
+ return 0;
+ }
+
+ if (!bpf_get_obj_uid(pathname))
+ return -1;
+
+ return 0;
+}
+
+static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx,
+ uint32_t pinning)
+{
+ struct bpf_hash_entry *entry;
+
+ entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
+ while (entry && entry->pinning != pinning)
+ entry = entry->next;
+
+ return entry ? entry->subpath : NULL;
+}
+
+static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx,
+ uint32_t pinning)
+{
+ switch (pinning) {
+ case PIN_OBJECT_NS:
+ case PIN_GLOBAL_NS:
+ return false;
+ case PIN_NONE:
+ return true;
+ default:
+ return !bpf_custom_pinning(ctx, pinning);
+ }
+}
+
+static void bpf_make_pathname(char *pathname, size_t len, const char *name,
+ const struct bpf_elf_ctx *ctx, uint32_t pinning)
+{
+ switch (pinning) {
+ case PIN_OBJECT_NS:
+ snprintf(pathname, len, "%s/%s/%s",
+ bpf_get_work_dir(ctx->type),
+ bpf_get_obj_uid(NULL), name);
+ break;
+ case PIN_GLOBAL_NS:
+ snprintf(pathname, len, "%s/%s/%s",
+ bpf_get_work_dir(ctx->type),
+ BPF_DIR_GLOBALS, name);
+ break;
+ default:
+ snprintf(pathname, len, "%s/../%s/%s",
+ bpf_get_work_dir(ctx->type),
+ bpf_custom_pinning(ctx, pinning), name);
+ break;
+ }
+}
+
+static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx,
+ uint32_t pinning)
+{
+ char pathname[PATH_MAX];
+
+ if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
+ return 0;
+
+ bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
+ return bpf_obj_get(pathname, ctx->type);
+}
+
+static int bpf_make_obj_path(const struct bpf_elf_ctx *ctx)
+{
+ char tmp[PATH_MAX];
+ int ret;
+
+ snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_work_dir(ctx->type),
+ bpf_get_obj_uid(NULL));
+
+ ret = mkdir(tmp, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno));
+ return ret;
+ }
+
+ return 0;
+}
+
+static int bpf_make_custom_path(const struct bpf_elf_ctx *ctx,
+ const char *todo)
+{
+ char tmp[PATH_MAX], rem[PATH_MAX], *sub;
+ int ret;
+
+ snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_work_dir(ctx->type));
+ snprintf(rem, sizeof(rem), "%s/", todo);
+ sub = strtok(rem, "/");
+
+ while (sub) {
+ if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX)
+ return -EINVAL;
+
+ strcat(tmp, sub);
+ strcat(tmp, "/");
+
+ ret = mkdir(tmp, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ fprintf(stderr, "mkdir %s failed: %s\n", tmp,
+ strerror(errno));
+ return ret;
+ }
+
+ sub = strtok(NULL, "/");
+ }
+
+ return 0;
+}
+
+static int bpf_place_pinned(int fd, const char *name,
+ const struct bpf_elf_ctx *ctx, uint32_t pinning)
+{
+ char pathname[PATH_MAX];
+ const char *tmp;
+ int ret = 0;
+
+ if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type))
+ return 0;
+
+ if (pinning == PIN_OBJECT_NS)
+ ret = bpf_make_obj_path(ctx);
+ else if ((tmp = bpf_custom_pinning(ctx, pinning)))
+ ret = bpf_make_custom_path(ctx, tmp);
+ if (ret < 0)
+ return ret;
+
+ bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
+ return bpf_obj_pin(fd, pathname);
+}
+
+static void bpf_prog_report(int fd, const char *section,
+ const struct bpf_elf_prog *prog,
+ struct bpf_elf_ctx *ctx)
+{
+ unsigned int insns = prog->size / sizeof(struct bpf_insn);
+
+ fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section,
+ fd < 0 ? "rejected: " : "loaded",
+ fd < 0 ? strerror(errno) : "",
+ fd < 0 ? errno : fd);
+
+ fprintf(stderr, " - Type: %u\n", prog->type);
+ fprintf(stderr, " - Instructions: %u (%u over limit)\n",
+ insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0);
+ fprintf(stderr, " - License: %s\n\n", prog->license);
+
+ bpf_dump_error(ctx, "Verifier analysis:\n\n");
+}
+
+static int bpf_prog_attach(const char *section,
+ const struct bpf_elf_prog *prog,
+ struct bpf_elf_ctx *ctx)
+{
+ int tries = 0, fd;
+retry:
+ errno = 0;
+ fd = bpf_prog_load(prog->type, prog->insns, prog->size,
+ prog->license, ctx->log, ctx->log_size);
+ if (fd < 0 || ctx->verbose) {
+ /* The verifier log is pretty chatty, sometimes so chatty
+ * on larger programs, that we could fail to dump everything
+ * into our buffer. Still, try to give a debuggable error
+ * log for the user, so enlarge it and re-fail.
+ */
+ if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) {
+ if (tries++ < 6 && !bpf_log_realloc(ctx))
+ goto retry;
+
+ fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
+ ctx->log_size, tries);
+ return fd;
+ }
+
+ bpf_prog_report(fd, section, prog, ctx);
+ }
+
+ return fd;
+}
+
+static void bpf_map_report(int fd, const char *name,
+ const struct bpf_elf_map *map,
+ struct bpf_elf_ctx *ctx)
+{
+ fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name,
+ fd < 0 ? "rejected: " : "loaded",
+ fd < 0 ? strerror(errno) : "",
+ fd < 0 ? errno : fd);
+
+ fprintf(stderr, " - Type: %u\n", map->type);
+ fprintf(stderr, " - Identifier: %u\n", map->id);
+ fprintf(stderr, " - Pinning: %u\n", map->pinning);
+ fprintf(stderr, " - Size key: %u\n", map->size_key);
+ fprintf(stderr, " - Size value: %u\n", map->size_value);
+ fprintf(stderr, " - Max elems: %u\n", map->max_elem);
+ fprintf(stderr, " - Flags: %#x\n\n", map->flags);
+}
+
+static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
+ struct bpf_elf_ctx *ctx)
+{
+ int fd, ret;
+
+ fd = bpf_probe_pinned(name, ctx, map->pinning);
+ if (fd > 0) {
+ ret = bpf_map_selfcheck_pinned(fd, map,
+ offsetof(struct bpf_elf_map,
+ id));
+ if (ret < 0) {
+ close(fd);
+ fprintf(stderr, "Map \'%s\' self-check failed!\n",
+ name);
+ return ret;
+ }
+ if (ctx->verbose)
+ fprintf(stderr, "Map \'%s\' loaded as pinned!\n",
+ name);
+ return fd;
+ }
+
+ errno = 0;
+ fd = bpf_map_create(map->type, map->size_key, map->size_value,
+ map->max_elem, map->flags);
+ if (fd < 0 || ctx->verbose) {
+ bpf_map_report(fd, name, map, ctx);
+ if (fd < 0)
+ return fd;
+ }
+
+ ret = bpf_place_pinned(fd, name, ctx, map->pinning);
+ if (ret < 0 && errno != EEXIST) {
+ fprintf(stderr, "Could not pin %s map: %s\n", name,
+ strerror(errno));
+ close(fd);
+ return ret;
+ }
+
+ return fd;
+}
+
+static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
+ const GElf_Sym *sym)
+{
+ return ctx->str_tab->d_buf + sym->st_name;
+}
+
+static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
+{
+ GElf_Sym sym;
+ int i;
+
+ for (i = 0; i < ctx->sym_num; i++) {
+ if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
+ continue;
+
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
+ GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
+ sym.st_shndx != ctx->sec_maps ||
+ sym.st_value / ctx->map_len != which)
+ continue;
+
+ return bpf_str_tab_name(ctx, &sym);
+ }
+
+ return NULL;
+}
+
+static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
+{
+ const char *map_name;
+ int i, fd;
+
+ for (i = 0; i < ctx->map_num; i++) {
+ map_name = bpf_map_fetch_name(ctx, i);
+ if (!map_name)
+ return -EIO;
+
+ fd = bpf_map_attach(map_name, &ctx->maps[i], ctx);
+ if (fd < 0)
+ return fd;
+
+ ctx->map_fds[i] = fd;
+ }
+
+ return 0;
+}
+
+static int bpf_map_num_sym(struct bpf_elf_ctx *ctx)
+{
+ int i, num = 0;
+ GElf_Sym sym;
+
+ for (i = 0; i < ctx->sym_num; i++) {
+ if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
+ continue;
+
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
+ GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
+ sym.st_shndx != ctx->sec_maps)
+ continue;
+ num++;
+ }
+
+ return num;
+}
+
+static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ Elf_Data *sec_edata;
+ GElf_Shdr sec_hdr;
+ Elf_Scn *sec_fd;
+ char *sec_name;
+
+ memset(data, 0, sizeof(*data));
+
+ sec_fd = elf_getscn(ctx->elf_fd, section);
+ if (!sec_fd)
+ return -EINVAL;
+ if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
+ return -EIO;
+
+ sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
+ sec_hdr.sh_name);
+ if (!sec_name || !sec_hdr.sh_size)
+ return -ENOENT;
+
+ sec_edata = elf_getdata(sec_fd, NULL);
+ if (!sec_edata || elf_getdata(sec_fd, sec_edata))
+ return -EIO;
+
+ memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
+
+ data->sec_name = sec_name;
+ data->sec_data = sec_edata;
+ return 0;
+}
+
+struct bpf_elf_map_min {
+ __u32 type;
+ __u32 size_key;
+ __u32 size_value;
+ __u32 max_elem;
+};
+
+static int bpf_fetch_maps_begin(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ ctx->map_num = data->sec_data->d_size;
+ ctx->sec_maps = section;
+ ctx->sec_done[section] = true;
+
+ if (ctx->map_num > sizeof(ctx->maps)) {
+ fprintf(stderr, "Too many BPF maps in ELF section!\n");
+ return -ENOMEM;
+ }
+
+ memcpy(ctx->maps, data->sec_data->d_buf, ctx->map_num);
+ return 0;
+}
+
+static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end)
+{
+ GElf_Sym sym;
+ int off, i;
+
+ for (off = 0; off < end; off += ctx->map_len) {
+ /* Order doesn't need to be linear here, hence we walk
+ * the table again.
+ */
+ for (i = 0; i < ctx->sym_num; i++) {
+ if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
+ continue;
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
+ GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
+ sym.st_shndx != ctx->sec_maps)
+ continue;
+ if (sym.st_value == off)
+ break;
+ if (i == ctx->sym_num - 1)
+ return -1;
+ }
+ }
+
+ return off == end ? 0 : -1;
+}
+
+static int bpf_fetch_maps_end(struct bpf_elf_ctx *ctx)
+{
+ struct bpf_elf_map fixup[ARRAY_SIZE(ctx->maps)] = {};
+ int i, sym_num = bpf_map_num_sym(ctx);
+ __u8 *buff;
+
+ if (sym_num == 0 || sym_num > ARRAY_SIZE(ctx->maps)) {
+ fprintf(stderr, "%u maps not supported in current map section!\n",
+ sym_num);
+ return -EINVAL;
+ }
+
+ if (ctx->map_num % sym_num != 0 ||
+ ctx->map_num % sizeof(__u32) != 0) {
+ fprintf(stderr, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n");
+ return -EINVAL;
+ }
+
+ ctx->map_len = ctx->map_num / sym_num;
+ if (bpf_map_verify_all_offs(ctx, ctx->map_num)) {
+ fprintf(stderr, "Different struct bpf_elf_map in use!\n");
+ return -EINVAL;
+ }
+
+ if (ctx->map_len == sizeof(struct bpf_elf_map)) {
+ ctx->map_num = sym_num;
+ return 0;
+ } else if (ctx->map_len > sizeof(struct bpf_elf_map)) {
+ fprintf(stderr, "struct bpf_elf_map not supported, coming from future version?\n");
+ return -EINVAL;
+ } else if (ctx->map_len < sizeof(struct bpf_elf_map_min)) {
+ fprintf(stderr, "struct bpf_elf_map too small, not supported!\n");
+ return -EINVAL;
+ }
+
+ ctx->map_num = sym_num;
+ for (i = 0, buff = (void *)ctx->maps; i < ctx->map_num;
+ i++, buff += ctx->map_len) {
+ /* The fixup leaves the rest of the members as zero, which
+ * is fine currently, but option exist to set some other
+ * default value as well when needed in future.
+ */
+ memcpy(&fixup[i], buff, ctx->map_len);
+ }
+
+ memcpy(ctx->maps, fixup, sizeof(fixup));
+
+ printf("Note: %zu bytes struct bpf_elf_map fixup performed due to size mismatch!\n",
+ sizeof(struct bpf_elf_map) - ctx->map_len);
+ return 0;
+}
+
+static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ if (data->sec_data->d_size > sizeof(ctx->license))
+ return -ENOMEM;
+
+ memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size);
+ ctx->sec_done[section] = true;
+ return 0;
+}
+
+static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ ctx->sym_tab = data->sec_data;
+ ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize;
+ ctx->sec_done[section] = true;
+ return 0;
+}
+
+static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
+ struct bpf_elf_sec_data *data)
+{
+ ctx->str_tab = data->sec_data;
+ ctx->sec_done[section] = true;
+ return 0;
+}
+
+static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
+{
+ return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
+}
+
+static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
+{
+ struct bpf_elf_sec_data data;
+ int i, ret = -1;
+
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ ret = bpf_fill_section_data(ctx, i, &data);
+ if (ret < 0)
+ continue;
+
+ if (data.sec_hdr.sh_type == SHT_PROGBITS &&
+ !strcmp(data.sec_name, ELF_SECTION_MAPS))
+ ret = bpf_fetch_maps_begin(ctx, i, &data);
+ else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
+ !strcmp(data.sec_name, ELF_SECTION_LICENSE))
+ ret = bpf_fetch_license(ctx, i, &data);
+ else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
+ !strcmp(data.sec_name, ".symtab"))
+ ret = bpf_fetch_symtab(ctx, i, &data);
+ else if (data.sec_hdr.sh_type == SHT_STRTAB &&
+ !strcmp(data.sec_name, ".strtab"))
+ ret = bpf_fetch_strtab(ctx, i, &data);
+ if (ret < 0) {
+ fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
+ i);
+ return ret;
+ }
+ }
+
+ if (bpf_has_map_data(ctx)) {
+ ret = bpf_fetch_maps_end(ctx);
+ if (ret < 0) {
+ fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n");
+ return ret;
+ }
+
+ ret = bpf_maps_attach_all(ctx);
+ if (ret < 0) {
+ fprintf(stderr, "Error loading maps into kernel!\n");
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section,
+ bool *sseen)
+{
+ struct bpf_elf_sec_data data;
+ struct bpf_elf_prog prog;
+ int ret, i, fd = -1;
+
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ if (ctx->sec_done[i])
+ continue;
+
+ ret = bpf_fill_section_data(ctx, i, &data);
+ if (ret < 0 ||
+ !(data.sec_hdr.sh_type == SHT_PROGBITS &&
+ data.sec_hdr.sh_flags & SHF_EXECINSTR &&
+ !strcmp(data.sec_name, section)))
+ continue;
+
+ *sseen = true;
+
+ memset(&prog, 0, sizeof(prog));
+ prog.type = ctx->type;
+ prog.insns = data.sec_data->d_buf;
+ prog.size = data.sec_data->d_size;
+ prog.license = ctx->license;
+
+ fd = bpf_prog_attach(section, &prog, ctx);
+ if (fd < 0)
+ return fd;
+
+ ctx->sec_done[i] = true;
+ break;
+ }
+
+ return fd;
+}
+
+static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
+ struct bpf_elf_sec_data *data_relo,
+ struct bpf_elf_sec_data *data_insn)
+{
+ Elf_Data *idata = data_insn->sec_data;
+ GElf_Shdr *rhdr = &data_relo->sec_hdr;
+ int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
+ struct bpf_insn *insns = idata->d_buf;
+ unsigned int num_insns = idata->d_size / sizeof(*insns);
+
+ for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
+ unsigned int ioff, rmap;
+ GElf_Rel relo;
+ GElf_Sym sym;
+
+ if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
+ return -EIO;
+
+ ioff = relo.r_offset / sizeof(struct bpf_insn);
+ if (ioff >= num_insns ||
+ insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) {
+ fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
+ ioff);
+ if (ioff < num_insns &&
+ insns[ioff].code == (BPF_JMP | BPF_CALL))
+ fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n");
+ return -EINVAL;
+ }
+
+ if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
+ return -EIO;
+ if (sym.st_shndx != ctx->sec_maps) {
+ fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n",
+ relo_ent, sym.st_shndx);
+ return -EIO;
+ }
+
+ rmap = sym.st_value / ctx->map_len;
+ if (rmap >= ARRAY_SIZE(ctx->map_fds))
+ return -EINVAL;
+ if (!ctx->map_fds[rmap])
+ return -EINVAL;
+
+ if (ctx->verbose)
+ fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n",
+ bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
+ data_insn->sec_name, ioff);
+
+ insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
+ insns[ioff].imm = ctx->map_fds[rmap];
+ }
+
+ return 0;
+}
+
+static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
+ bool *lderr, bool *sseen)
+{
+ struct bpf_elf_sec_data data_relo, data_insn;
+ struct bpf_elf_prog prog;
+ int ret, idx, i, fd = -1;
+
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ ret = bpf_fill_section_data(ctx, i, &data_relo);
+ if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
+ continue;
+
+ idx = data_relo.sec_hdr.sh_info;
+
+ ret = bpf_fill_section_data(ctx, idx, &data_insn);
+ if (ret < 0 ||
+ !(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
+ data_insn.sec_hdr.sh_flags & SHF_EXECINSTR &&
+ !strcmp(data_insn.sec_name, section)))
+ continue;
+
+ *sseen = true;
+
+ ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn);
+ if (ret < 0)
+ return ret;
+
+ memset(&prog, 0, sizeof(prog));
+ prog.type = ctx->type;
+ prog.insns = data_insn.sec_data->d_buf;
+ prog.size = data_insn.sec_data->d_size;
+ prog.license = ctx->license;
+
+ fd = bpf_prog_attach(section, &prog, ctx);
+ if (fd < 0) {
+ *lderr = true;
+ return fd;
+ }
+
+ ctx->sec_done[i] = true;
+ ctx->sec_done[idx] = true;
+ break;
+ }
+
+ return fd;
+}
+
+static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
+{
+ bool lderr = false, sseen = false;
+ int ret = -1;
+
+ if (bpf_has_map_data(ctx))
+ ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen);
+ if (ret < 0 && !lderr)
+ ret = bpf_fetch_prog(ctx, section, &sseen);
+ if (ret < 0 && !sseen)
+ fprintf(stderr, "Program section \'%s\' not found in ELF file!\n",
+ section);
+ return ret;
+}
+
+static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++)
+ if (ctx->map_fds[i] && ctx->maps[i].id == id &&
+ ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
+ return i;
+ return -1;
+}
+
+static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
+{
+ struct bpf_elf_sec_data data;
+ uint32_t map_id, key_id;
+ int fd, i, ret, idx;
+
+ for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
+ if (ctx->sec_done[i])
+ continue;
+
+ ret = bpf_fill_section_data(ctx, i, &data);
+ if (ret < 0)
+ continue;
+
+ ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id);
+ if (ret != 2)
+ continue;
+
+ idx = bpf_find_map_by_id(ctx, map_id);
+ if (idx < 0)
+ continue;
+
+ fd = bpf_fetch_prog_sec(ctx, data.sec_name);
+ if (fd < 0)
+ return -EIO;
+
+ ret = bpf_map_update(ctx->map_fds[idx], &key_id,
+ &fd, BPF_ANY);
+ if (ret < 0) {
+ if (errno == E2BIG)
+ fprintf(stderr, "Tail call key %u for map %u out of bounds?\n",
+ key_id, map_id);
+ return -errno;
+ }
+
+ ctx->sec_done[i] = true;
+ }
+
+ return 0;
+}
+
+static void bpf_save_finfo(struct bpf_elf_ctx *ctx)
+{
+ struct stat st;
+ int ret;
+
+ memset(&ctx->stat, 0, sizeof(ctx->stat));
+
+ ret = fstat(ctx->obj_fd, &st);
+ if (ret < 0) {
+ fprintf(stderr, "Stat of elf file failed: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ ctx->stat.st_dev = st.st_dev;
+ ctx->stat.st_ino = st.st_ino;
+}
+
+static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path)
+{
+ char buff[PATH_MAX];
+
+ while (fgets(buff, sizeof(buff), fp)) {
+ char *ptr = buff;
+
+ while (*ptr == ' ' || *ptr == '\t')
+ ptr++;
+
+ if (*ptr == '#' || *ptr == '\n' || *ptr == 0)
+ continue;
+
+ if (sscanf(ptr, "%i %s\n", id, path) != 2 &&
+ sscanf(ptr, "%i %s #", id, path) != 2) {
+ strcpy(path, ptr);
+ return -1;
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static bool bpf_pinning_reserved(uint32_t pinning)
+{
+ switch (pinning) {
+ case PIN_NONE:
+ case PIN_OBJECT_NS:
+ case PIN_GLOBAL_NS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file)
+{
+ struct bpf_hash_entry *entry;
+ char subpath[PATH_MAX] = {};
+ uint32_t pinning;
+ FILE *fp;
+ int ret;
+
+ fp = fopen(db_file, "r");
+ if (!fp)
+ return;
+
+ while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) {
+ if (ret == -1) {
+ fprintf(stderr, "Database %s is corrupted at: %s\n",
+ db_file, subpath);
+ fclose(fp);
+ return;
+ }
+
+ if (bpf_pinning_reserved(pinning)) {
+ fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n",
+ db_file, pinning);
+ continue;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (!entry) {
+ fprintf(stderr, "No memory left for db entry!\n");
+ continue;
+ }
+
+ entry->pinning = pinning;
+ entry->subpath = strdup(subpath);
+ if (!entry->subpath) {
+ fprintf(stderr, "No memory left for db entry!\n");
+ free(entry);
+ continue;
+ }
+
+ entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
+ ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry;
+ }
+
+ fclose(fp);
+}
+
+static void bpf_hash_destroy(struct bpf_elf_ctx *ctx)
+{
+ struct bpf_hash_entry *entry;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) {
+ while ((entry = ctx->ht[i]) != NULL) {
+ ctx->ht[i] = entry->next;
+ free((char *)entry->subpath);
+ free(entry);
+ }
+ }
+}
+
+static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
+{
+ if (ctx->elf_hdr.e_type != ET_REL ||
+ (ctx->elf_hdr.e_machine != EM_NONE &&
+ ctx->elf_hdr.e_machine != EM_BPF) ||
+ ctx->elf_hdr.e_version != EV_CURRENT) {
+ fprintf(stderr, "ELF format error, ELF file not for eBPF?\n");
+ return -EINVAL;
+ }
+
+ switch (ctx->elf_hdr.e_ident[EI_DATA]) {
+ default:
+ fprintf(stderr, "ELF format error, wrong endianness info?\n");
+ return -EINVAL;
+ case ELFDATA2LSB:
+ if (htons(1) == 1) {
+ fprintf(stderr,
+ "We are big endian, eBPF object is little endian!\n");
+ return -EIO;
+ }
+ break;
+ case ELFDATA2MSB:
+ if (htons(1) != 1) {
+ fprintf(stderr,
+ "We are little endian, eBPF object is big endian!\n");
+ return -EIO;
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
+ enum bpf_prog_type type, bool verbose)
+{
+ int ret = -EINVAL;
+
+ if (elf_version(EV_CURRENT) == EV_NONE ||
+ bpf_init_env(pathname))
+ return ret;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->verbose = verbose;
+ ctx->type = type;
+
+ ctx->obj_fd = open(pathname, O_RDONLY);
+ if (ctx->obj_fd < 0)
+ return ctx->obj_fd;
+
+ ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
+ if (!ctx->elf_fd) {
+ ret = -EINVAL;
+ goto out_fd;
+ }
+
+ if (elf_kind(ctx->elf_fd) != ELF_K_ELF) {
+ ret = -EINVAL;
+ goto out_fd;
+ }
+
+ if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
+ &ctx->elf_hdr) {
+ ret = -EIO;
+ goto out_elf;
+ }
+
+ ret = bpf_elf_check_ehdr(ctx);
+ if (ret < 0)
+ goto out_elf;
+
+ ctx->sec_done = calloc(ctx->elf_hdr.e_shnum,
+ sizeof(*(ctx->sec_done)));
+ if (!ctx->sec_done) {
+ ret = -ENOMEM;
+ goto out_elf;
+ }
+
+ if (ctx->verbose && bpf_log_realloc(ctx)) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ bpf_save_finfo(ctx);
+ bpf_hash_init(ctx, CONFDIR "/bpf_pinning");
+
+ return 0;
+out_free:
+ free(ctx->sec_done);
+out_elf:
+ elf_end(ctx->elf_fd);
+out_fd:
+ close(ctx->obj_fd);
+ return ret;
+}
+
+static int bpf_maps_count(struct bpf_elf_ctx *ctx)
+{
+ int i, count = 0;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
+ if (!ctx->map_fds[i])
+ break;
+ count++;
+ }
+
+ return count;
+}
+
+static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
+ if (ctx->map_fds[i])
+ close(ctx->map_fds[i]);
+ }
+}
+
+static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
+{
+ if (failure)
+ bpf_maps_teardown(ctx);
+
+ bpf_hash_destroy(ctx);
+
+ free(ctx->sec_done);
+ free(ctx->log);
+
+ elf_end(ctx->elf_fd);
+ close(ctx->obj_fd);
+}
+
+static struct bpf_elf_ctx __ctx;
+
+static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
+ const char *section, bool verbose)
+{
+ struct bpf_elf_ctx *ctx = &__ctx;
+ int fd = 0, ret;
+
+ ret = bpf_elf_ctx_init(ctx, pathname, type, verbose);
+ if (ret < 0) {
+ fprintf(stderr, "Cannot initialize ELF context!\n");
+ return ret;
+ }
+
+ ret = bpf_fetch_ancillary(ctx);
+ if (ret < 0) {
+ fprintf(stderr, "Error fetching ELF ancillary data!\n");
+ goto out;
+ }
+
+ fd = bpf_fetch_prog_sec(ctx, section);
+ if (fd < 0) {
+ fprintf(stderr, "Error fetching program/map!\n");
+ ret = fd;
+ goto out;
+ }
+
+ ret = bpf_fill_prog_arrays(ctx);
+ if (ret < 0)
+ fprintf(stderr, "Error filling program arrays!\n");
+out:
+ bpf_elf_ctx_destroy(ctx, ret < 0);
+ if (ret < 0) {
+ if (fd)
+ close(fd);
+ return ret;
+ }
+
+ return fd;
+}
+
+static int
+bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len,
+ const struct bpf_map_data *aux, unsigned int entries)
+{
+ struct bpf_map_set_msg msg = {
+ .aux.uds_ver = BPF_SCM_AUX_VER,
+ .aux.num_ent = entries,
+ };
+ int *cmsg_buf, min_fd;
+ char *amsg_buf;
+ int i;
+
+ strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name));
+ memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st));
+
+ cmsg_buf = bpf_map_set_init(&msg, addr, addr_len);
+ amsg_buf = (char *)msg.aux.ent;
+
+ for (i = 0; i < entries; i += min_fd) {
+ int ret;
+
+ min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
+ bpf_map_set_init_single(&msg, min_fd);
+
+ memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd);
+ memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd);
+
+ ret = sendmsg(fd, &msg.hdr, 0);
+ if (ret <= 0)
+ return ret ? : -1;
+ }
+
+ return 0;
+}
+
+static int
+bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
+ unsigned int entries)
+{
+ struct bpf_map_set_msg msg;
+ int *cmsg_buf, min_fd;
+ char *amsg_buf, *mmsg_buf;
+ unsigned int needed = 1;
+ int i;
+
+ cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
+ amsg_buf = (char *)msg.aux.ent;
+ mmsg_buf = (char *)&msg.aux;
+
+ for (i = 0; i < min(entries, needed); i += min_fd) {
+ struct cmsghdr *cmsg;
+ int ret;
+
+ min_fd = min(entries, entries - i);
+ bpf_map_set_init_single(&msg, min_fd);
+
+ ret = recvmsg(fd, &msg.hdr, 0);
+ if (ret <= 0)
+ return ret ? : -1;
+
+ cmsg = CMSG_FIRSTHDR(&msg.hdr);
+ if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
+ return -EINVAL;
+ if (msg.hdr.msg_flags & MSG_CTRUNC)
+ return -EIO;
+ if (msg.aux.uds_ver != BPF_SCM_AUX_VER)
+ return -ENOSYS;
+
+ min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
+ if (min_fd > entries || min_fd <= 0)
+ return -EINVAL;
+
+ memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
+ memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
+ memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
+
+ needed = aux->num_ent;
+ }
+
+ return 0;
+}
+
+int bpf_send_map_fds(const char *path, const char *obj)
+{
+ struct bpf_elf_ctx *ctx = &__ctx;
+ struct sockaddr_un addr = { .sun_family = AF_UNIX };
+ struct bpf_map_data bpf_aux = {
+ .fds = ctx->map_fds,
+ .ent = ctx->maps,
+ .st = &ctx->stat,
+ .obj = obj,
+ };
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ fprintf(stderr, "Cannot open socket: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ strncpy(addr.sun_path, path, sizeof(addr.sun_path));
+
+ ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret < 0) {
+ fprintf(stderr, "Cannot connect to %s: %s\n",
+ path, strerror(errno));
+ return -1;
+ }
+
+ ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
+ bpf_maps_count(ctx));
+ if (ret < 0)
+ fprintf(stderr, "Cannot send fds to %s: %s\n",
+ path, strerror(errno));
+
+ bpf_maps_teardown(ctx);
+ close(fd);
+ return ret;
+}
+
+int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
+ unsigned int entries)
+{
+ struct sockaddr_un addr = { .sun_family = AF_UNIX };
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ fprintf(stderr, "Cannot open socket: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ strncpy(addr.sun_path, path, sizeof(addr.sun_path));
+
+ ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret < 0) {
+ fprintf(stderr, "Cannot bind to socket: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ ret = bpf_map_set_recv(fd, fds, aux, entries);
+ if (ret < 0)
+ fprintf(stderr, "Cannot recv fds from %s: %s\n",
+ path, strerror(errno));
+
+ unlink(addr.sun_path);
+ close(fd);
+ return ret;
+}
+#endif /* HAVE_ELF */
diff --git a/tc/Makefile b/tc/Makefile
index dfa875b..f986fcb 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -1,5 +1,5 @@
TCOBJ= tc.o tc_qdisc.o tc_class.o tc_filter.o tc_util.o tc_monitor.o \
- tc_exec.o tc_bpf.o m_police.o m_estimator.o m_action.o m_ematch.o \
+ tc_exec.o m_police.o m_estimator.o m_action.o m_ematch.o \
emp_ematch.yacc.o emp_ematch.lex.o
include ../Config
@@ -94,11 +94,6 @@ ifneq ($(TC_CONFIG_NO_XT),y)
endif
endif
-ifeq ($(TC_CONFIG_ELF),y)
- CFLAGS += -DHAVE_ELF
- LDLIBS += -lelf
-endif
-
TCOBJ += $(TCMODULES)
LDLIBS += -L. -ltc -lm
diff --git a/tc/e_bpf.c b/tc/e_bpf.c
index d1f5d87..84f43e6 100644
--- a/tc/e_bpf.c
+++ b/tc/e_bpf.c
@@ -15,8 +15,8 @@
#include "utils.h"
#include "tc_util.h"
-#include "tc_bpf.h"
+#include "bpf_util.h"
#include "bpf_elf.h"
#include "bpf_scm.h"
diff --git a/tc/f_bpf.c b/tc/f_bpf.c
index 665bc66..c4764d8 100644
--- a/tc/f_bpf.c
+++ b/tc/f_bpf.c
@@ -6,7 +6,7 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Authors: Daniel Borkmann <dborkman@...hat.com>
+ * Authors: Daniel Borkmann <daniel@...earbox.net>
*/
#include <stdio.h>
@@ -15,18 +15,12 @@
#include <linux/bpf.h>
#include "utils.h"
+
#include "tc_util.h"
-#include "tc_bpf.h"
+#include "bpf_util.h"
static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_CLS;
-static const int nla_tbl[BPF_NLA_MAX] = {
- [BPF_NLA_OPS_LEN] = TCA_BPF_OPS_LEN,
- [BPF_NLA_OPS] = TCA_BPF_OPS,
- [BPF_NLA_FD] = TCA_BPF_FD,
- [BPF_NLA_NAME] = TCA_BPF_NAME,
-};
-
static void explain(void)
{
fprintf(stderr, "Usage: ... bpf ...\n");
@@ -52,7 +46,7 @@ static void explain(void)
fprintf(stderr, "pinned eBPF program.\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where CLS_NAME refers to the section name containing the\n");
- fprintf(stderr, "classifier (default \'%s\').\n", bpf_default_section(bpf_type));
+ fprintf(stderr, "classifier (default \'%s\').\n", bpf_prog_to_default_section(bpf_type));
fprintf(stderr, "\n");
fprintf(stderr, "Where UDS_FILE points to a unix domain socket file in order\n");
fprintf(stderr, "to hand off control of all created eBPF maps to an agent.\n");
@@ -61,6 +55,24 @@ static void explain(void)
fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n");
}
+static void bpf_cbpf_cb(void *nl, const struct sock_filter *ops, int ops_len)
+{
+ addattr16(nl, MAX_MSG, TCA_BPF_OPS_LEN, ops_len);
+ addattr_l(nl, MAX_MSG, TCA_BPF_OPS, ops,
+ ops_len * sizeof(struct sock_filter));
+}
+
+static void bpf_ebpf_cb(void *nl, int fd, const char *annotation)
+{
+ addattr32(nl, MAX_MSG, TCA_BPF_FD, fd);
+ addattrstrz(nl, MAX_MSG, TCA_BPF_NAME, annotation);
+}
+
+static const struct bpf_cfg_ops bpf_cb_ops = {
+ .cbpf_cb = bpf_cbpf_cb,
+ .ebpf_cb = bpf_ebpf_cb,
+};
+
static int bpf_parse_opt(struct filter_util *qu, char *handle,
int argc, char **argv, struct nlmsghdr *n)
{
@@ -68,6 +80,7 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle,
struct tcmsg *t = NLMSG_DATA(n);
unsigned int bpf_gen_flags = 0;
unsigned int bpf_flags = 0;
+ struct bpf_cfg_in cfg = {};
bool seen_run = false;
struct rtattr *tail;
int ret = 0;
@@ -90,11 +103,17 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle,
NEXT_ARG();
opt_bpf:
seen_run = true;
- if (bpf_parse_common(&argc, &argv, nla_tbl, bpf_type,
- &bpf_obj, &bpf_uds_name, n)) {
- fprintf(stderr, "Failed to retrieve (e)BPF data!\n");
+ cfg.argc = argc;
+ cfg.argv = argv;
+
+ if (bpf_parse_common(bpf_type, &cfg, &bpf_cb_ops, n))
return -1;
- }
+
+ argc = cfg.argc;
+ argv = cfg.argv;
+
+ bpf_obj = cfg.object;
+ bpf_uds_name = cfg.uds;
} else if (matches(*argv, "classid") == 0 ||
matches(*argv, "flowid") == 0) {
unsigned int handle;
@@ -143,7 +162,7 @@ opt_bpf:
if (bpf_gen_flags)
addattr32(n, MAX_MSG, TCA_BPF_FLAGS_GEN, bpf_gen_flags);
- if (bpf_obj && bpf_flags)
+ if (bpf_flags)
addattr32(n, MAX_MSG, TCA_BPF_FLAGS, bpf_flags);
tail->rta_len = (((void *)n) + n->nlmsg_len) - (void *)tail;
@@ -175,8 +194,6 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f,
if (tb[TCA_BPF_NAME])
fprintf(f, "%s ", rta_getattr_str(tb[TCA_BPF_NAME]));
- else if (tb[TCA_BPF_FD])
- fprintf(f, "pfd %u ", rta_getattr_u32(tb[TCA_BPF_FD]));
if (tb[TCA_BPF_FLAGS]) {
unsigned int flags = rta_getattr_u32(tb[TCA_BPF_FLAGS]);
@@ -195,20 +212,17 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f,
fprintf(f, "skip_sw ");
}
- if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN]) {
+ if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN])
bpf_print_ops(f, tb[TCA_BPF_OPS],
rta_getattr_u16(tb[TCA_BPF_OPS_LEN]));
- fprintf(f, "\n");
- }
if (tb[TCA_BPF_POLICE]) {
fprintf(f, "\n");
tc_print_police(f, tb[TCA_BPF_POLICE]);
}
- if (tb[TCA_BPF_ACT]) {
+ if (tb[TCA_BPF_ACT])
tc_print_action(f, tb[TCA_BPF_ACT]);
- }
return 0;
}
diff --git a/tc/m_bpf.c b/tc/m_bpf.c
index 9bf2a85..e26b85d 100644
--- a/tc/m_bpf.c
+++ b/tc/m_bpf.c
@@ -17,18 +17,12 @@
#include <linux/tc_act/tc_bpf.h>
#include "utils.h"
+
#include "tc_util.h"
-#include "tc_bpf.h"
+#include "bpf_util.h"
static const enum bpf_prog_type bpf_type = BPF_PROG_TYPE_SCHED_ACT;
-static const int nla_tbl[BPF_NLA_MAX] = {
- [BPF_NLA_OPS_LEN] = TCA_ACT_BPF_OPS_LEN,
- [BPF_NLA_OPS] = TCA_ACT_BPF_OPS,
- [BPF_NLA_FD] = TCA_ACT_BPF_FD,
- [BPF_NLA_NAME] = TCA_ACT_BPF_NAME,
-};
-
static void explain(void)
{
fprintf(stderr, "Usage: ... bpf ... [ index INDEX ]\n");
@@ -50,7 +44,7 @@ static void explain(void)
fprintf(stderr, "pinned eBPF program.\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where ACT_NAME refers to the section name containing the\n");
- fprintf(stderr, "action (default \'%s\').\n", bpf_default_section(bpf_type));
+ fprintf(stderr, "action (default \'%s\').\n", bpf_prog_to_default_section(bpf_type));
fprintf(stderr, "\n");
fprintf(stderr, "Where UDS_FILE points to a unix domain socket file in order\n");
fprintf(stderr, "to hand off control of all created eBPF maps to an agent.\n");
@@ -59,11 +53,30 @@ static void explain(void)
fprintf(stderr, "explicitly specifies an action index upon creation.\n");
}
+static void bpf_cbpf_cb(void *nl, const struct sock_filter *ops, int ops_len)
+{
+ addattr16(nl, MAX_MSG, TCA_ACT_BPF_OPS_LEN, ops_len);
+ addattr_l(nl, MAX_MSG, TCA_ACT_BPF_OPS, ops,
+ ops_len * sizeof(struct sock_filter));
+}
+
+static void bpf_ebpf_cb(void *nl, int fd, const char *annotation)
+{
+ addattr32(nl, MAX_MSG, TCA_ACT_BPF_FD, fd);
+ addattrstrz(nl, MAX_MSG, TCA_ACT_BPF_NAME, annotation);
+}
+
+static const struct bpf_cfg_ops bpf_cb_ops = {
+ .cbpf_cb = bpf_cbpf_cb,
+ .ebpf_cb = bpf_ebpf_cb,
+};
+
static int bpf_parse_opt(struct action_util *a, int *ptr_argc, char ***ptr_argv,
int tca_id, struct nlmsghdr *n)
{
const char *bpf_obj = NULL, *bpf_uds_name = NULL;
struct tc_act_bpf parm = { .action = TC_ACT_PIPE };
+ struct bpf_cfg_in cfg = {};
bool seen_run = false;
struct rtattr *tail;
int argc, ret = 0;
@@ -85,11 +98,17 @@ static int bpf_parse_opt(struct action_util *a, int *ptr_argc, char ***ptr_argv,
NEXT_ARG();
opt_bpf:
seen_run = true;
- if (bpf_parse_common(&argc, &argv, nla_tbl, bpf_type,
- &bpf_obj, &bpf_uds_name, n)) {
- fprintf(stderr, "Failed to retrieve (e)BPF data!\n");
+ cfg.argc = argc;
+ cfg.argv = argv;
+
+ if (bpf_parse_common(bpf_type, &cfg, &bpf_cb_ops, n))
return -1;
- }
+
+ argc = cfg.argc;
+ argv = cfg.argv;
+
+ bpf_obj = cfg.object;
+ bpf_uds_name = cfg.uds;
} else if (matches(*argv, "help") == 0) {
explain();
return -1;
@@ -151,8 +170,6 @@ static int bpf_print_opt(struct action_util *au, FILE *f, struct rtattr *arg)
if (tb[TCA_ACT_BPF_NAME])
fprintf(f, "%s ", rta_getattr_str(tb[TCA_ACT_BPF_NAME]));
- else if (tb[TCA_ACT_BPF_FD])
- fprintf(f, "pfd %u ", rta_getattr_u32(tb[TCA_ACT_BPF_FD]));
if (tb[TCA_ACT_BPF_OPS] && tb[TCA_ACT_BPF_OPS_LEN]) {
bpf_print_ops(f, tb[TCA_ACT_BPF_OPS],
diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c
deleted file mode 100644
index b390f7e..0000000
--- a/tc/tc_bpf.c
+++ /dev/null
@@ -1,2010 +0,0 @@
-/*
- * tc_bpf.c BPF common code
- *
- * This program is free software; you can distribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Authors: Daniel Borkmann <dborkman@...hat.com>
- * Jiri Pirko <jiri@...nulli.us>
- * Alexei Starovoitov <ast@...mgrid.com>
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <stdarg.h>
-#include <limits.h>
-
-#ifdef HAVE_ELF
-#include <libelf.h>
-#include <gelf.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/un.h>
-#include <sys/vfs.h>
-#include <sys/mount.h>
-#include <sys/syscall.h>
-#include <sys/sendfile.h>
-#include <sys/resource.h>
-
-#include <linux/bpf.h>
-#include <linux/filter.h>
-#include <linux/if_alg.h>
-
-#include <arpa/inet.h>
-
-#include "utils.h"
-
-#include "bpf_elf.h"
-#include "bpf_scm.h"
-
-#include "tc_util.h"
-#include "tc_bpf.h"
-
-#ifndef AF_ALG
-#define AF_ALG 38
-#endif
-
-#ifndef EM_BPF
-#define EM_BPF 247
-#endif
-
-#ifdef HAVE_ELF
-static int bpf_obj_open(const char *path, enum bpf_prog_type type,
- const char *sec, bool verbose);
-#else
-static int bpf_obj_open(const char *path, enum bpf_prog_type type,
- const char *sec, bool verbose)
-{
- fprintf(stderr, "No ELF library support compiled in.\n");
- errno = ENOSYS;
- return -1;
-}
-#endif
-
-static inline __u64 bpf_ptr_to_u64(const void *ptr)
-{
- return (__u64)(unsigned long)ptr;
-}
-
-static int bpf(int cmd, union bpf_attr *attr, unsigned int size)
-{
-#ifdef __NR_bpf
- return syscall(__NR_bpf, cmd, attr, size);
-#else
- fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
- errno = ENOSYS;
- return -1;
-#endif
-}
-
-static int bpf_map_update(int fd, const void *key, const void *value,
- uint64_t flags)
-{
- union bpf_attr attr = {};
-
- attr.map_fd = fd;
- attr.key = bpf_ptr_to_u64(key);
- attr.value = bpf_ptr_to_u64(value);
- attr.flags = flags;
-
- return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
-}
-
-static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
- char **bpf_string, bool *need_release,
- const char separator)
-{
- char sp;
-
- if (from_file) {
- size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,");
- char *tmp_string;
- FILE *fp;
-
- tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len;
- tmp_string = calloc(1, tmp_len);
- if (tmp_string == NULL)
- return -ENOMEM;
-
- fp = fopen(arg, "r");
- if (fp == NULL) {
- perror("Cannot fopen");
- free(tmp_string);
- return -ENOENT;
- }
-
- if (!fgets(tmp_string, tmp_len, fp)) {
- free(tmp_string);
- fclose(fp);
- return -EIO;
- }
-
- fclose(fp);
-
- *need_release = true;
- *bpf_string = tmp_string;
- } else {
- *need_release = false;
- *bpf_string = arg;
- }
-
- if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 ||
- sp != separator) {
- if (*need_release)
- free(*bpf_string);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops,
- bool from_file)
-{
- char *bpf_string, *token, separator = ',';
- int ret = 0, i = 0;
- bool need_release;
- __u16 bpf_len = 0;
-
- if (argc < 1)
- return -EINVAL;
- if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string,
- &need_release, separator))
- return -EINVAL;
- if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) {
- ret = -EINVAL;
- goto out;
- }
-
- token = bpf_string;
- while ((token = strchr(token, separator)) && (++token)[0]) {
- if (i >= bpf_len) {
- fprintf(stderr, "Real program length exceeds encoded length parameter!\n");
- ret = -EINVAL;
- goto out;
- }
-
- if (sscanf(token, "%hu %hhu %hhu %u,",
- &bpf_ops[i].code, &bpf_ops[i].jt,
- &bpf_ops[i].jf, &bpf_ops[i].k) != 4) {
- fprintf(stderr, "Error at instruction %d!\n", i);
- ret = -EINVAL;
- goto out;
- }
-
- i++;
- }
-
- if (i != bpf_len) {
- fprintf(stderr, "Parsed program length is less than encoded length parameter!\n");
- ret = -EINVAL;
- goto out;
- }
- ret = bpf_len;
-out:
- if (need_release)
- free(bpf_string);
-
- return ret;
-}
-
-void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
-{
- struct sock_filter *ops = (struct sock_filter *) RTA_DATA(bpf_ops);
- int i;
-
- if (len == 0)
- return;
-
- fprintf(f, "bytecode \'%u,", len);
-
- for (i = 0; i < len - 1; i++)
- fprintf(f, "%hu %hhu %hhu %u,", ops[i].code, ops[i].jt,
- ops[i].jf, ops[i].k);
-
- fprintf(f, "%hu %hhu %hhu %u\'", ops[i].code, ops[i].jt,
- ops[i].jf, ops[i].k);
-}
-
-static void bpf_map_pin_report(const struct bpf_elf_map *pin,
- const struct bpf_elf_map *obj)
-{
- fprintf(stderr, "Map specification differs from pinned file!\n");
-
- if (obj->type != pin->type)
- fprintf(stderr, " - Type: %u (obj) != %u (pin)\n",
- obj->type, pin->type);
- if (obj->size_key != pin->size_key)
- fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n",
- obj->size_key, pin->size_key);
- if (obj->size_value != pin->size_value)
- fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n",
- obj->size_value, pin->size_value);
- if (obj->max_elem != pin->max_elem)
- fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n",
- obj->max_elem, pin->max_elem);
- if (obj->flags != pin->flags)
- fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n",
- obj->flags, pin->flags);
-
- fprintf(stderr, "\n");
-}
-
-static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map,
- int length)
-{
- char file[PATH_MAX], buff[4096];
- struct bpf_elf_map tmp = {}, zero = {};
- unsigned int val;
- FILE *fp;
-
- snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
-
- fp = fopen(file, "r");
- if (!fp) {
- fprintf(stderr, "No procfs support?!\n");
- return -EIO;
- }
-
- while (fgets(buff, sizeof(buff), fp)) {
- if (sscanf(buff, "map_type:\t%u", &val) == 1)
- tmp.type = val;
- else if (sscanf(buff, "key_size:\t%u", &val) == 1)
- tmp.size_key = val;
- else if (sscanf(buff, "value_size:\t%u", &val) == 1)
- tmp.size_value = val;
- else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
- tmp.max_elem = val;
- else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
- tmp.flags = val;
- }
-
- fclose(fp);
-
- if (!memcmp(&tmp, map, length)) {
- return 0;
- } else {
- /* If kernel doesn't have eBPF-related fdinfo, we cannot do much,
- * so just accept it. We know we do have an eBPF fd and in this
- * case, everything is 0. It is guaranteed that no such map exists
- * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC.
- */
- if (!memcmp(&tmp, &zero, length))
- return 0;
-
- bpf_map_pin_report(&tmp, map);
- return -EINVAL;
- }
-}
-
-static int bpf_mnt_fs(const char *target)
-{
- bool bind_done = false;
-
- while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
- if (errno != EINVAL || bind_done) {
- fprintf(stderr, "mount --make-private %s failed: %s\n",
- target, strerror(errno));
- return -1;
- }
-
- if (mount(target, target, "none", MS_BIND, NULL)) {
- fprintf(stderr, "mount --bind %s %s failed: %s\n",
- target, target, strerror(errno));
- return -1;
- }
-
- bind_done = true;
- }
-
- if (mount("bpf", target, "bpf", 0, NULL)) {
- fprintf(stderr, "mount -t bpf bpf %s failed: %s\n",
- target, strerror(errno));
- return -1;
- }
-
- return 0;
-}
-
-static int bpf_valid_mntpt(const char *mnt, unsigned long magic)
-{
- struct statfs st_fs;
-
- if (statfs(mnt, &st_fs) < 0)
- return -ENOENT;
- if ((unsigned long)st_fs.f_type != magic)
- return -ENOENT;
-
- return 0;
-}
-
-static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
- char *mnt, int len,
- const char * const *known_mnts)
-{
- const char * const *ptr;
- char type[100];
- FILE *fp;
-
- if (known_mnts) {
- ptr = known_mnts;
- while (*ptr) {
- if (bpf_valid_mntpt(*ptr, magic) == 0) {
- strncpy(mnt, *ptr, len - 1);
- mnt[len - 1] = 0;
- return mnt;
- }
- ptr++;
- }
- }
-
- fp = fopen("/proc/mounts", "r");
- if (fp == NULL || len != PATH_MAX)
- return NULL;
-
- while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n",
- mnt, type) == 2) {
- if (strcmp(type, fstype) == 0)
- break;
- }
-
- fclose(fp);
- if (strcmp(type, fstype) != 0)
- return NULL;
-
- return mnt;
-}
-
-int bpf_trace_pipe(void)
-{
- char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT;
- static const char * const tracefs_known_mnts[] = {
- TRACE_DIR_MNT,
- "/sys/kernel/debug/tracing",
- "/tracing",
- "/trace",
- 0,
- };
- char tpipe[PATH_MAX];
- const char *mnt;
- int fd;
-
- mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt,
- sizeof(tracefs_mnt), tracefs_known_mnts);
- if (!mnt) {
- fprintf(stderr, "tracefs not mounted?\n");
- return -1;
- }
-
- snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt);
-
- fd = open(tpipe, O_RDONLY);
- if (fd < 0)
- return -1;
-
- fprintf(stderr, "Running! Hang up with ^C!\n\n");
- while (1) {
- static char buff[4096];
- ssize_t ret;
-
- ret = read(fd, buff, sizeof(buff) - 1);
- if (ret > 0) {
- write(2, buff, ret);
- fflush(stderr);
- }
- }
-
- return 0;
-}
-
-static const char *bpf_get_tc_dir(void)
-{
- static bool bpf_mnt_cached;
- static char bpf_tc_dir[PATH_MAX];
- static const char *mnt;
- static const char * const bpf_known_mnts[] = {
- BPF_DIR_MNT,
- 0,
- };
- char bpf_mnt[PATH_MAX] = BPF_DIR_MNT;
- char bpf_glo_dir[PATH_MAX];
- int ret;
-
- if (bpf_mnt_cached)
- goto done;
-
- mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_mnt, sizeof(bpf_mnt),
- bpf_known_mnts);
- if (!mnt) {
- mnt = getenv(BPF_ENV_MNT);
- if (!mnt)
- mnt = BPF_DIR_MNT;
- ret = bpf_mnt_fs(mnt);
- if (ret) {
- mnt = NULL;
- goto out;
- }
- }
-
- snprintf(bpf_tc_dir, sizeof(bpf_tc_dir), "%s/%s", mnt, BPF_DIR_TC);
- ret = mkdir(bpf_tc_dir, S_IRWXU);
- if (ret && errno != EEXIST) {
- fprintf(stderr, "mkdir %s failed: %s\n", bpf_tc_dir,
- strerror(errno));
- mnt = NULL;
- goto out;
- }
-
- snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s",
- bpf_tc_dir, BPF_DIR_GLOBALS);
- ret = mkdir(bpf_glo_dir, S_IRWXU);
- if (ret && errno != EEXIST) {
- fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir,
- strerror(errno));
- mnt = NULL;
- goto out;
- }
-
- mnt = bpf_tc_dir;
-out:
- bpf_mnt_cached = true;
-done:
- return mnt;
-}
-
-static int bpf_obj_get(const char *pathname)
-{
- union bpf_attr attr = {};
- char tmp[PATH_MAX];
-
- if (strlen(pathname) > 2 && pathname[0] == 'm' &&
- pathname[1] == ':' && bpf_get_tc_dir()) {
- snprintf(tmp, sizeof(tmp), "%s/%s",
- bpf_get_tc_dir(), pathname + 2);
- pathname = tmp;
- }
-
- attr.pathname = bpf_ptr_to_u64(pathname);
-
- return bpf(BPF_OBJ_GET, &attr, sizeof(attr));
-}
-
-const char *bpf_default_section(const enum bpf_prog_type type)
-{
- switch (type) {
- case BPF_PROG_TYPE_SCHED_CLS:
- return ELF_SECTION_CLASSIFIER;
- case BPF_PROG_TYPE_SCHED_ACT:
- return ELF_SECTION_ACTION;
- default:
- return NULL;
- }
-}
-
-enum bpf_mode {
- CBPF_BYTECODE = 0,
- CBPF_FILE,
- EBPF_OBJECT,
- EBPF_PINNED,
- __BPF_MODE_MAX,
-#define BPF_MODE_MAX __BPF_MODE_MAX
-};
-
-static int bpf_parse(int *ptr_argc, char ***ptr_argv, const bool *opt_tbl,
- enum bpf_prog_type *type, enum bpf_mode *mode,
- const char **ptr_object, const char **ptr_section,
- const char **ptr_uds_name, struct sock_filter *opcodes)
-{
- const char *file, *section, *uds_name;
- bool verbose = false;
- int ret, argc;
- char **argv;
-
- argv = *ptr_argv;
- argc = *ptr_argc;
-
- if (opt_tbl[CBPF_BYTECODE] &&
- (matches(*argv, "bytecode") == 0 ||
- strcmp(*argv, "bc") == 0)) {
- *mode = CBPF_BYTECODE;
- } else if (opt_tbl[CBPF_FILE] &&
- (matches(*argv, "bytecode-file") == 0 ||
- strcmp(*argv, "bcf") == 0)) {
- *mode = CBPF_FILE;
- } else if (opt_tbl[EBPF_OBJECT] &&
- (matches(*argv, "object-file") == 0 ||
- strcmp(*argv, "obj") == 0)) {
- *mode = EBPF_OBJECT;
- } else if (opt_tbl[EBPF_PINNED] &&
- (matches(*argv, "object-pinned") == 0 ||
- matches(*argv, "pinned") == 0 ||
- matches(*argv, "fd") == 0)) {
- *mode = EBPF_PINNED;
- } else {
- fprintf(stderr, "What mode is \"%s\"?\n", *argv);
- return -1;
- }
-
- NEXT_ARG();
- file = section = uds_name = NULL;
- if (*mode == EBPF_OBJECT || *mode == EBPF_PINNED) {
- file = *argv;
- NEXT_ARG_FWD();
-
- if (*type == BPF_PROG_TYPE_UNSPEC) {
- if (argc > 0 && matches(*argv, "type") == 0) {
- NEXT_ARG();
- if (matches(*argv, "cls") == 0) {
- *type = BPF_PROG_TYPE_SCHED_CLS;
- } else if (matches(*argv, "act") == 0) {
- *type = BPF_PROG_TYPE_SCHED_ACT;
- } else {
- fprintf(stderr, "What type is \"%s\"?\n",
- *argv);
- return -1;
- }
- NEXT_ARG_FWD();
- } else {
- *type = BPF_PROG_TYPE_SCHED_CLS;
- }
- }
-
- section = bpf_default_section(*type);
- if (argc > 0 && matches(*argv, "section") == 0) {
- NEXT_ARG();
- section = *argv;
- NEXT_ARG_FWD();
- }
-
- uds_name = getenv(BPF_ENV_UDS);
- if (argc > 0 && !uds_name &&
- matches(*argv, "export") == 0) {
- NEXT_ARG();
- uds_name = *argv;
- NEXT_ARG_FWD();
- }
-
- if (argc > 0 && matches(*argv, "verbose") == 0) {
- verbose = true;
- NEXT_ARG_FWD();
- }
-
- PREV_ARG();
- }
-
- if (*mode == CBPF_BYTECODE || *mode == CBPF_FILE)
- ret = bpf_ops_parse(argc, argv, opcodes, *mode == CBPF_FILE);
- else if (*mode == EBPF_OBJECT)
- ret = bpf_obj_open(file, *type, section, verbose);
- else if (*mode == EBPF_PINNED)
- ret = bpf_obj_get(file);
- else
- return -1;
-
- if (ptr_object)
- *ptr_object = file;
- if (ptr_section)
- *ptr_section = section;
- if (ptr_uds_name)
- *ptr_uds_name = uds_name;
-
- *ptr_argc = argc;
- *ptr_argv = argv;
-
- return ret;
-}
-
-int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl,
- enum bpf_prog_type type, const char **ptr_object,
- const char **ptr_uds_name, struct nlmsghdr *n)
-{
- struct sock_filter opcodes[BPF_MAXINSNS];
- const bool opt_tbl[BPF_MODE_MAX] = {
- [CBPF_BYTECODE] = true,
- [CBPF_FILE] = true,
- [EBPF_OBJECT] = true,
- [EBPF_PINNED] = true,
- };
- char annotation[256];
- const char *section;
- enum bpf_mode mode;
- int ret;
-
- ret = bpf_parse(ptr_argc, ptr_argv, opt_tbl, &type, &mode,
- ptr_object, §ion, ptr_uds_name, opcodes);
- if (ret < 0)
- return ret;
-
- if (mode == CBPF_BYTECODE || mode == CBPF_FILE) {
- addattr16(n, MAX_MSG, nla_tbl[BPF_NLA_OPS_LEN], ret);
- addattr_l(n, MAX_MSG, nla_tbl[BPF_NLA_OPS], opcodes,
- ret * sizeof(struct sock_filter));
- }
-
- if (mode == EBPF_OBJECT || mode == EBPF_PINNED) {
- snprintf(annotation, sizeof(annotation), "%s:[%s]",
- basename(*ptr_object), mode == EBPF_PINNED ?
- "*fsobj" : section);
-
- addattr32(n, MAX_MSG, nla_tbl[BPF_NLA_FD], ret);
- addattrstrz(n, MAX_MSG, nla_tbl[BPF_NLA_NAME], annotation);
- }
-
- return 0;
-}
-
-int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv)
-{
- enum bpf_prog_type type = BPF_PROG_TYPE_UNSPEC;
- const bool opt_tbl[BPF_MODE_MAX] = {
- [CBPF_BYTECODE] = false,
- [CBPF_FILE] = false,
- [EBPF_OBJECT] = true,
- [EBPF_PINNED] = true,
- };
- const struct bpf_elf_map test = {
- .type = BPF_MAP_TYPE_PROG_ARRAY,
- .size_key = sizeof(int),
- .size_value = sizeof(int),
- };
- int ret, prog_fd, map_fd;
- const char *section;
- enum bpf_mode mode;
- uint32_t map_key;
-
- prog_fd = bpf_parse(&argc, &argv, opt_tbl, &type, &mode,
- NULL, §ion, NULL, NULL);
- if (prog_fd < 0)
- return prog_fd;
- if (key) {
- map_key = *key;
- } else {
- ret = sscanf(section, "%*i/%i", &map_key);
- if (ret != 1) {
- fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n");
- ret = -EINVAL;
- goto out_prog;
- }
- }
-
- map_fd = bpf_obj_get(map_path);
- if (map_fd < 0) {
- fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n",
- map_path, strerror(errno));
- ret = map_fd;
- goto out_prog;
- }
-
- ret = bpf_map_selfcheck_pinned(map_fd, &test,
- offsetof(struct bpf_elf_map, max_elem));
- if (ret < 0) {
- fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path);
- goto out_map;
- }
-
- ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY);
- if (ret < 0)
- fprintf(stderr, "Map update failed: %s\n", strerror(errno));
-out_map:
- close(map_fd);
-out_prog:
- close(prog_fd);
- return ret;
-}
-
-#ifdef HAVE_ELF
-struct bpf_elf_prog {
- enum bpf_prog_type type;
- const struct bpf_insn *insns;
- size_t size;
- const char *license;
-};
-
-struct bpf_hash_entry {
- unsigned int pinning;
- const char *subpath;
- struct bpf_hash_entry *next;
-};
-
-struct bpf_elf_ctx {
- Elf *elf_fd;
- GElf_Ehdr elf_hdr;
- Elf_Data *sym_tab;
- Elf_Data *str_tab;
- int obj_fd;
- int map_fds[ELF_MAX_MAPS];
- struct bpf_elf_map maps[ELF_MAX_MAPS];
- int sym_num;
- int map_num;
- bool *sec_done;
- int sec_maps;
- char license[ELF_MAX_LICENSE_LEN];
- enum bpf_prog_type type;
- bool verbose;
- struct bpf_elf_st stat;
- struct bpf_hash_entry *ht[256];
- char *log;
- size_t log_size;
-};
-
-struct bpf_elf_sec_data {
- GElf_Shdr sec_hdr;
- Elf_Data *sec_data;
- const char *sec_name;
-};
-
-struct bpf_map_data {
- int *fds;
- const char *obj;
- struct bpf_elf_st *st;
- struct bpf_elf_map *ent;
-};
-
-static __check_format_string(2, 3) void
-bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...)
-{
- va_list vl;
-
- va_start(vl, format);
- vfprintf(stderr, format, vl);
- va_end(vl);
-
- if (ctx->log && ctx->log[0]) {
- if (ctx->verbose) {
- fprintf(stderr, "%s\n", ctx->log);
- } else {
- unsigned int off = 0, len = strlen(ctx->log);
-
- if (len > BPF_MAX_LOG) {
- off = len - BPF_MAX_LOG;
- fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n",
- off);
- }
- fprintf(stderr, "%s\n", ctx->log + off);
- }
-
- memset(ctx->log, 0, ctx->log_size);
- }
-}
-
-static int bpf_log_realloc(struct bpf_elf_ctx *ctx)
-{
- size_t log_size = ctx->log_size;
- void *ptr;
-
- if (!ctx->log) {
- log_size = 65536;
- } else {
- log_size <<= 1;
- if (log_size > (UINT_MAX >> 8))
- return -EINVAL;
- }
-
- ptr = realloc(ctx->log, log_size);
- if (!ptr)
- return -ENOMEM;
-
- ctx->log = ptr;
- ctx->log_size = log_size;
-
- return 0;
-}
-
-static int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
- uint32_t size_value, uint32_t max_elem,
- uint32_t flags)
-{
- union bpf_attr attr = {};
-
- attr.map_type = type;
- attr.key_size = size_key;
- attr.value_size = size_value;
- attr.max_entries = max_elem;
- attr.map_flags = flags;
-
- return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
-}
-
-static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
- size_t size_insns, const char *license, char *log,
- size_t size_log)
-{
- union bpf_attr attr = {};
-
- attr.prog_type = type;
- attr.insns = bpf_ptr_to_u64(insns);
- attr.insn_cnt = size_insns / sizeof(struct bpf_insn);
- attr.license = bpf_ptr_to_u64(license);
-
- if (size_log > 0) {
- attr.log_buf = bpf_ptr_to_u64(log);
- attr.log_size = size_log;
- attr.log_level = 1;
- }
-
- return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
-}
-
-static int bpf_obj_pin(int fd, const char *pathname)
-{
- union bpf_attr attr = {};
-
- attr.pathname = bpf_ptr_to_u64(pathname);
- attr.bpf_fd = fd;
-
- return bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
-}
-
-static int bpf_obj_hash(const char *object, uint8_t *out, size_t len)
-{
- struct sockaddr_alg alg = {
- .salg_family = AF_ALG,
- .salg_type = "hash",
- .salg_name = "sha1",
- };
- int ret, cfd, ofd, ffd;
- struct stat stbuff;
- ssize_t size;
-
- if (!object || len != 20)
- return -EINVAL;
-
- cfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
- if (cfd < 0) {
- fprintf(stderr, "Cannot get AF_ALG socket: %s\n",
- strerror(errno));
- return cfd;
- }
-
- ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg));
- if (ret < 0) {
- fprintf(stderr, "Error binding socket: %s\n", strerror(errno));
- goto out_cfd;
- }
-
- ofd = accept(cfd, NULL, 0);
- if (ofd < 0) {
- fprintf(stderr, "Error accepting socket: %s\n",
- strerror(errno));
- ret = ofd;
- goto out_cfd;
- }
-
- ffd = open(object, O_RDONLY);
- if (ffd < 0) {
- fprintf(stderr, "Error opening object %s: %s\n",
- object, strerror(errno));
- ret = ffd;
- goto out_ofd;
- }
-
- ret = fstat(ffd, &stbuff);
- if (ret < 0) {
- fprintf(stderr, "Error doing fstat: %s\n",
- strerror(errno));
- goto out_ffd;
- }
-
- size = sendfile(ofd, ffd, NULL, stbuff.st_size);
- if (size != stbuff.st_size) {
- fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n",
- size, stbuff.st_size, strerror(errno));
- ret = -1;
- goto out_ffd;
- }
-
- size = read(ofd, out, len);
- if (size != len) {
- fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n",
- size, len, strerror(errno));
- ret = -1;
- } else {
- ret = 0;
- }
-out_ffd:
- close(ffd);
-out_ofd:
- close(ofd);
-out_cfd:
- close(cfd);
- return ret;
-}
-
-static const char *bpf_get_obj_uid(const char *pathname)
-{
- static bool bpf_uid_cached;
- static char bpf_uid[64];
- uint8_t tmp[20];
- int ret;
-
- if (bpf_uid_cached)
- goto done;
-
- ret = bpf_obj_hash(pathname, tmp, sizeof(tmp));
- if (ret) {
- fprintf(stderr, "Object hashing failed!\n");
- return NULL;
- }
-
- hexstring_n2a(tmp, sizeof(tmp), bpf_uid, sizeof(bpf_uid));
- bpf_uid_cached = true;
-done:
- return bpf_uid;
-}
-
-static int bpf_init_env(const char *pathname)
-{
- struct rlimit limit = {
- .rlim_cur = RLIM_INFINITY,
- .rlim_max = RLIM_INFINITY,
- };
-
- /* Don't bother in case we fail! */
- setrlimit(RLIMIT_MEMLOCK, &limit);
-
- if (!bpf_get_tc_dir()) {
- fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n");
- return 0;
- }
-
- if (!bpf_get_obj_uid(pathname))
- return -1;
-
- return 0;
-}
-
-static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx,
- uint32_t pinning)
-{
- struct bpf_hash_entry *entry;
-
- entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
- while (entry && entry->pinning != pinning)
- entry = entry->next;
-
- return entry ? entry->subpath : NULL;
-}
-
-static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx,
- uint32_t pinning)
-{
- switch (pinning) {
- case PIN_OBJECT_NS:
- case PIN_GLOBAL_NS:
- return false;
- case PIN_NONE:
- return true;
- default:
- return !bpf_custom_pinning(ctx, pinning);
- }
-}
-
-static void bpf_make_pathname(char *pathname, size_t len, const char *name,
- const struct bpf_elf_ctx *ctx, uint32_t pinning)
-{
- switch (pinning) {
- case PIN_OBJECT_NS:
- snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(),
- bpf_get_obj_uid(NULL), name);
- break;
- case PIN_GLOBAL_NS:
- snprintf(pathname, len, "%s/%s/%s", bpf_get_tc_dir(),
- BPF_DIR_GLOBALS, name);
- break;
- default:
- snprintf(pathname, len, "%s/../%s/%s", bpf_get_tc_dir(),
- bpf_custom_pinning(ctx, pinning), name);
- break;
- }
-}
-
-static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx,
- uint32_t pinning)
-{
- char pathname[PATH_MAX];
-
- if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir())
- return 0;
-
- bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
- return bpf_obj_get(pathname);
-}
-
-static int bpf_make_obj_path(void)
-{
- char tmp[PATH_MAX];
- int ret;
-
- snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_tc_dir(),
- bpf_get_obj_uid(NULL));
-
- ret = mkdir(tmp, S_IRWXU);
- if (ret && errno != EEXIST) {
- fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno));
- return ret;
- }
-
- return 0;
-}
-
-static int bpf_make_custom_path(const char *todo)
-{
- char tmp[PATH_MAX], rem[PATH_MAX], *sub;
- int ret;
-
- snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_tc_dir());
- snprintf(rem, sizeof(rem), "%s/", todo);
- sub = strtok(rem, "/");
-
- while (sub) {
- if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX)
- return -EINVAL;
-
- strcat(tmp, sub);
- strcat(tmp, "/");
-
- ret = mkdir(tmp, S_IRWXU);
- if (ret && errno != EEXIST) {
- fprintf(stderr, "mkdir %s failed: %s\n", tmp,
- strerror(errno));
- return ret;
- }
-
- sub = strtok(NULL, "/");
- }
-
- return 0;
-}
-
-static int bpf_place_pinned(int fd, const char *name,
- const struct bpf_elf_ctx *ctx, uint32_t pinning)
-{
- char pathname[PATH_MAX];
- const char *tmp;
- int ret = 0;
-
- if (bpf_no_pinning(ctx, pinning) || !bpf_get_tc_dir())
- return 0;
-
- if (pinning == PIN_OBJECT_NS)
- ret = bpf_make_obj_path();
- else if ((tmp = bpf_custom_pinning(ctx, pinning)))
- ret = bpf_make_custom_path(tmp);
- if (ret < 0)
- return ret;
-
- bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning);
- return bpf_obj_pin(fd, pathname);
-}
-
-static void bpf_prog_report(int fd, const char *section,
- const struct bpf_elf_prog *prog,
- struct bpf_elf_ctx *ctx)
-{
- unsigned int insns = prog->size / sizeof(struct bpf_insn);
-
- fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section,
- fd < 0 ? "rejected: " : "loaded",
- fd < 0 ? strerror(errno) : "",
- fd < 0 ? errno : fd);
-
- fprintf(stderr, " - Type: %u\n", prog->type);
- fprintf(stderr, " - Instructions: %u (%u over limit)\n",
- insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0);
- fprintf(stderr, " - License: %s\n\n", prog->license);
-
- bpf_dump_error(ctx, "Verifier analysis:\n\n");
-}
-
-static int bpf_prog_attach(const char *section,
- const struct bpf_elf_prog *prog,
- struct bpf_elf_ctx *ctx)
-{
- int tries = 0, fd;
-retry:
- errno = 0;
- fd = bpf_prog_load(prog->type, prog->insns, prog->size,
- prog->license, ctx->log, ctx->log_size);
- if (fd < 0 || ctx->verbose) {
- /* The verifier log is pretty chatty, sometimes so chatty
- * on larger programs, that we could fail to dump everything
- * into our buffer. Still, try to give a debuggable error
- * log for the user, so enlarge it and re-fail.
- */
- if (fd < 0 && (errno == ENOSPC || !ctx->log_size)) {
- if (tries++ < 6 && !bpf_log_realloc(ctx))
- goto retry;
-
- fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n",
- ctx->log_size, tries);
- return fd;
- }
-
- bpf_prog_report(fd, section, prog, ctx);
- }
-
- return fd;
-}
-
-static void bpf_map_report(int fd, const char *name,
- const struct bpf_elf_map *map,
- struct bpf_elf_ctx *ctx)
-{
- fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name,
- fd < 0 ? "rejected: " : "loaded",
- fd < 0 ? strerror(errno) : "",
- fd < 0 ? errno : fd);
-
- fprintf(stderr, " - Type: %u\n", map->type);
- fprintf(stderr, " - Identifier: %u\n", map->id);
- fprintf(stderr, " - Pinning: %u\n", map->pinning);
- fprintf(stderr, " - Size key: %u\n", map->size_key);
- fprintf(stderr, " - Size value: %u\n", map->size_value);
- fprintf(stderr, " - Max elems: %u\n", map->max_elem);
- fprintf(stderr, " - Flags: %#x\n\n", map->flags);
-}
-
-static int bpf_map_attach(const char *name, const struct bpf_elf_map *map,
- struct bpf_elf_ctx *ctx)
-{
- int fd, ret;
-
- fd = bpf_probe_pinned(name, ctx, map->pinning);
- if (fd > 0) {
- ret = bpf_map_selfcheck_pinned(fd, map,
- offsetof(struct bpf_elf_map,
- id));
- if (ret < 0) {
- close(fd);
- fprintf(stderr, "Map \'%s\' self-check failed!\n",
- name);
- return ret;
- }
- if (ctx->verbose)
- fprintf(stderr, "Map \'%s\' loaded as pinned!\n",
- name);
- return fd;
- }
-
- errno = 0;
- fd = bpf_map_create(map->type, map->size_key, map->size_value,
- map->max_elem, map->flags);
- if (fd < 0 || ctx->verbose) {
- bpf_map_report(fd, name, map, ctx);
- if (fd < 0)
- return fd;
- }
-
- ret = bpf_place_pinned(fd, name, ctx, map->pinning);
- if (ret < 0 && errno != EEXIST) {
- fprintf(stderr, "Could not pin %s map: %s\n", name,
- strerror(errno));
- close(fd);
- return ret;
- }
-
- return fd;
-}
-
-static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx,
- const GElf_Sym *sym)
-{
- return ctx->str_tab->d_buf + sym->st_name;
-}
-
-static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which)
-{
- GElf_Sym sym;
- int i;
-
- for (i = 0; i < ctx->sym_num; i++) {
- if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym)
- continue;
-
- if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
- GELF_ST_TYPE(sym.st_info) != STT_NOTYPE ||
- sym.st_shndx != ctx->sec_maps ||
- sym.st_value / sizeof(struct bpf_elf_map) != which)
- continue;
-
- return bpf_str_tab_name(ctx, &sym);
- }
-
- return NULL;
-}
-
-static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx)
-{
- const char *map_name;
- int i, fd;
-
- for (i = 0; i < ctx->map_num; i++) {
- map_name = bpf_map_fetch_name(ctx, i);
- if (!map_name)
- return -EIO;
-
- fd = bpf_map_attach(map_name, &ctx->maps[i], ctx);
- if (fd < 0)
- return fd;
-
- ctx->map_fds[i] = fd;
- }
-
- return 0;
-}
-
-static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section,
- struct bpf_elf_sec_data *data)
-{
- Elf_Data *sec_edata;
- GElf_Shdr sec_hdr;
- Elf_Scn *sec_fd;
- char *sec_name;
-
- memset(data, 0, sizeof(*data));
-
- sec_fd = elf_getscn(ctx->elf_fd, section);
- if (!sec_fd)
- return -EINVAL;
- if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
- return -EIO;
-
- sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx,
- sec_hdr.sh_name);
- if (!sec_name || !sec_hdr.sh_size)
- return -ENOENT;
-
- sec_edata = elf_getdata(sec_fd, NULL);
- if (!sec_edata || elf_getdata(sec_fd, sec_edata))
- return -EIO;
-
- memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
-
- data->sec_name = sec_name;
- data->sec_data = sec_edata;
- return 0;
-}
-
-static int bpf_fetch_maps(struct bpf_elf_ctx *ctx, int section,
- struct bpf_elf_sec_data *data)
-{
- if (data->sec_data->d_size % sizeof(struct bpf_elf_map) != 0)
- return -EINVAL;
-
- ctx->map_num = data->sec_data->d_size / sizeof(struct bpf_elf_map);
- ctx->sec_maps = section;
- ctx->sec_done[section] = true;
-
- if (ctx->map_num > ARRAY_SIZE(ctx->map_fds)) {
- fprintf(stderr, "Too many BPF maps in ELF section!\n");
- return -ENOMEM;
- }
-
- memcpy(ctx->maps, data->sec_data->d_buf, data->sec_data->d_size);
- return 0;
-}
-
-static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section,
- struct bpf_elf_sec_data *data)
-{
- if (data->sec_data->d_size > sizeof(ctx->license))
- return -ENOMEM;
-
- memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size);
- ctx->sec_done[section] = true;
- return 0;
-}
-
-static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section,
- struct bpf_elf_sec_data *data)
-{
- ctx->sym_tab = data->sec_data;
- ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize;
- ctx->sec_done[section] = true;
- return 0;
-}
-
-static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section,
- struct bpf_elf_sec_data *data)
-{
- ctx->str_tab = data->sec_data;
- ctx->sec_done[section] = true;
- return 0;
-}
-
-static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx)
-{
- return ctx->sym_tab && ctx->str_tab && ctx->sec_maps;
-}
-
-static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx)
-{
- struct bpf_elf_sec_data data;
- int i, ret = -1;
-
- for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
- ret = bpf_fill_section_data(ctx, i, &data);
- if (ret < 0)
- continue;
-
- if (data.sec_hdr.sh_type == SHT_PROGBITS &&
- !strcmp(data.sec_name, ELF_SECTION_MAPS))
- ret = bpf_fetch_maps(ctx, i, &data);
- else if (data.sec_hdr.sh_type == SHT_PROGBITS &&
- !strcmp(data.sec_name, ELF_SECTION_LICENSE))
- ret = bpf_fetch_license(ctx, i, &data);
- else if (data.sec_hdr.sh_type == SHT_SYMTAB &&
- !strcmp(data.sec_name, ".symtab"))
- ret = bpf_fetch_symtab(ctx, i, &data);
- else if (data.sec_hdr.sh_type == SHT_STRTAB &&
- !strcmp(data.sec_name, ".strtab"))
- ret = bpf_fetch_strtab(ctx, i, &data);
- if (ret < 0) {
- fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n",
- i);
- break;
- }
- }
-
- if (bpf_has_map_data(ctx)) {
- ret = bpf_maps_attach_all(ctx);
- if (ret < 0) {
- fprintf(stderr, "Error loading maps into kernel!\n");
- return ret;
- }
- }
-
- return ret;
-}
-
-static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section)
-{
- struct bpf_elf_sec_data data;
- struct bpf_elf_prog prog;
- int ret, i, fd = -1;
-
- for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
- if (ctx->sec_done[i])
- continue;
-
- ret = bpf_fill_section_data(ctx, i, &data);
- if (ret < 0 ||
- !(data.sec_hdr.sh_type == SHT_PROGBITS &&
- data.sec_hdr.sh_flags & SHF_EXECINSTR &&
- !strcmp(data.sec_name, section)))
- continue;
-
- memset(&prog, 0, sizeof(prog));
- prog.type = ctx->type;
- prog.insns = data.sec_data->d_buf;
- prog.size = data.sec_data->d_size;
- prog.license = ctx->license;
-
- fd = bpf_prog_attach(section, &prog, ctx);
- if (fd < 0)
- break;
-
- ctx->sec_done[i] = true;
- break;
- }
-
- return fd;
-}
-
-static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx,
- struct bpf_elf_sec_data *data_relo,
- struct bpf_elf_sec_data *data_insn)
-{
- Elf_Data *idata = data_insn->sec_data;
- GElf_Shdr *rhdr = &data_relo->sec_hdr;
- int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
- struct bpf_insn *insns = idata->d_buf;
- unsigned int num_insns = idata->d_size / sizeof(*insns);
-
- for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
- unsigned int ioff, rmap;
- GElf_Rel relo;
- GElf_Sym sym;
-
- if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
- return -EIO;
-
- ioff = relo.r_offset / sizeof(struct bpf_insn);
- if (ioff >= num_insns ||
- insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW)) {
- fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n",
- ioff);
- if (ioff < num_insns &&
- insns[ioff].code == (BPF_JMP | BPF_CALL))
- fprintf(stderr, " - Try to annotate functions with always_inline attribute!\n");
- return -EINVAL;
- }
-
- if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
- return -EIO;
- if (sym.st_shndx != ctx->sec_maps) {
- fprintf(stderr, "ELF contains non-map related relo data in entry %u pointing to section %u! Compiler bug?!\n",
- relo_ent, sym.st_shndx);
- return -EIO;
- }
-
- rmap = sym.st_value / sizeof(struct bpf_elf_map);
- if (rmap >= ARRAY_SIZE(ctx->map_fds))
- return -EINVAL;
- if (!ctx->map_fds[rmap])
- return -EINVAL;
-
- if (ctx->verbose)
- fprintf(stderr, "Map \'%s\' (%d) injected into prog section \'%s\' at offset %u!\n",
- bpf_str_tab_name(ctx, &sym), ctx->map_fds[rmap],
- data_insn->sec_name, ioff);
-
- insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
- insns[ioff].imm = ctx->map_fds[rmap];
- }
-
- return 0;
-}
-
-static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section,
- bool *lderr)
-{
- struct bpf_elf_sec_data data_relo, data_insn;
- struct bpf_elf_prog prog;
- int ret, idx, i, fd = -1;
-
- for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
- ret = bpf_fill_section_data(ctx, i, &data_relo);
- if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
- continue;
-
- idx = data_relo.sec_hdr.sh_info;
- ret = bpf_fill_section_data(ctx, idx, &data_insn);
- if (ret < 0 ||
- !(data_insn.sec_hdr.sh_type == SHT_PROGBITS &&
- data_insn.sec_hdr.sh_flags & SHF_EXECINSTR &&
- !strcmp(data_insn.sec_name, section)))
- continue;
-
- ret = bpf_apply_relo_data(ctx, &data_relo, &data_insn);
- if (ret < 0)
- continue;
-
- memset(&prog, 0, sizeof(prog));
- prog.type = ctx->type;
- prog.insns = data_insn.sec_data->d_buf;
- prog.size = data_insn.sec_data->d_size;
- prog.license = ctx->license;
-
- fd = bpf_prog_attach(section, &prog, ctx);
- if (fd < 0) {
- *lderr = true;
- break;
- }
-
- ctx->sec_done[i] = true;
- ctx->sec_done[idx] = true;
- break;
- }
-
- return fd;
-}
-
-static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section)
-{
- bool lderr = false;
- int ret = -1;
-
- if (bpf_has_map_data(ctx))
- ret = bpf_fetch_prog_relo(ctx, section, &lderr);
- if (ret < 0 && !lderr)
- ret = bpf_fetch_prog(ctx, section);
-
- return ret;
-}
-
-static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++)
- if (ctx->map_fds[i] && ctx->maps[i].id == id &&
- ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
- return i;
- return -1;
-}
-
-static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx)
-{
- struct bpf_elf_sec_data data;
- uint32_t map_id, key_id;
- int fd, i, ret, idx;
-
- for (i = 1; i < ctx->elf_hdr.e_shnum; i++) {
- if (ctx->sec_done[i])
- continue;
-
- ret = bpf_fill_section_data(ctx, i, &data);
- if (ret < 0)
- continue;
-
- ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id);
- if (ret != 2)
- continue;
-
- idx = bpf_find_map_by_id(ctx, map_id);
- if (idx < 0)
- continue;
-
- fd = bpf_fetch_prog_sec(ctx, data.sec_name);
- if (fd < 0)
- return -EIO;
-
- ret = bpf_map_update(ctx->map_fds[idx], &key_id,
- &fd, BPF_ANY);
- if (ret < 0) {
- if (errno == E2BIG)
- fprintf(stderr, "Tail call key %u for map %u out of bounds?\n",
- key_id, map_id);
- return -errno;
- }
-
- ctx->sec_done[i] = true;
- }
-
- return 0;
-}
-
-static void bpf_save_finfo(struct bpf_elf_ctx *ctx)
-{
- struct stat st;
- int ret;
-
- memset(&ctx->stat, 0, sizeof(ctx->stat));
-
- ret = fstat(ctx->obj_fd, &st);
- if (ret < 0) {
- fprintf(stderr, "Stat of elf file failed: %s\n",
- strerror(errno));
- return;
- }
-
- ctx->stat.st_dev = st.st_dev;
- ctx->stat.st_ino = st.st_ino;
-}
-
-static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path)
-{
- char buff[PATH_MAX];
-
- while (fgets(buff, sizeof(buff), fp)) {
- char *ptr = buff;
-
- while (*ptr == ' ' || *ptr == '\t')
- ptr++;
-
- if (*ptr == '#' || *ptr == '\n' || *ptr == 0)
- continue;
-
- if (sscanf(ptr, "%i %s\n", id, path) != 2 &&
- sscanf(ptr, "%i %s #", id, path) != 2) {
- strcpy(path, ptr);
- return -1;
- }
-
- return 1;
- }
-
- return 0;
-}
-
-static bool bpf_pinning_reserved(uint32_t pinning)
-{
- switch (pinning) {
- case PIN_NONE:
- case PIN_OBJECT_NS:
- case PIN_GLOBAL_NS:
- return true;
- default:
- return false;
- }
-}
-
-static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file)
-{
- struct bpf_hash_entry *entry;
- char subpath[PATH_MAX] = {};
- uint32_t pinning;
- FILE *fp;
- int ret;
-
- fp = fopen(db_file, "r");
- if (!fp)
- return;
-
- while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) {
- if (ret == -1) {
- fprintf(stderr, "Database %s is corrupted at: %s\n",
- db_file, subpath);
- fclose(fp);
- return;
- }
-
- if (bpf_pinning_reserved(pinning)) {
- fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n",
- db_file, pinning);
- continue;
- }
-
- entry = malloc(sizeof(*entry));
- if (!entry) {
- fprintf(stderr, "No memory left for db entry!\n");
- continue;
- }
-
- entry->pinning = pinning;
- entry->subpath = strdup(subpath);
- if (!entry->subpath) {
- fprintf(stderr, "No memory left for db entry!\n");
- free(entry);
- continue;
- }
-
- entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)];
- ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry;
- }
-
- fclose(fp);
-}
-
-static void bpf_hash_destroy(struct bpf_elf_ctx *ctx)
-{
- struct bpf_hash_entry *entry;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) {
- while ((entry = ctx->ht[i]) != NULL) {
- ctx->ht[i] = entry->next;
- free((char *)entry->subpath);
- free(entry);
- }
- }
-}
-
-static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx)
-{
- if (ctx->elf_hdr.e_type != ET_REL ||
- (ctx->elf_hdr.e_machine != EM_NONE &&
- ctx->elf_hdr.e_machine != EM_BPF) ||
- ctx->elf_hdr.e_version != EV_CURRENT) {
- fprintf(stderr, "ELF format error, ELF file not for eBPF?\n");
- return -EINVAL;
- }
-
- switch (ctx->elf_hdr.e_ident[EI_DATA]) {
- default:
- fprintf(stderr, "ELF format error, wrong endianness info?\n");
- return -EINVAL;
- case ELFDATA2LSB:
- if (htons(1) == 1) {
- fprintf(stderr,
- "We are big endian, eBPF object is little endian!\n");
- return -EIO;
- }
- break;
- case ELFDATA2MSB:
- if (htons(1) != 1) {
- fprintf(stderr,
- "We are little endian, eBPF object is big endian!\n");
- return -EIO;
- }
- break;
- }
-
- return 0;
-}
-
-static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname,
- enum bpf_prog_type type, bool verbose)
-{
- int ret = -EINVAL;
-
- if (elf_version(EV_CURRENT) == EV_NONE ||
- bpf_init_env(pathname))
- return ret;
-
- memset(ctx, 0, sizeof(*ctx));
- ctx->verbose = verbose;
- ctx->type = type;
-
- ctx->obj_fd = open(pathname, O_RDONLY);
- if (ctx->obj_fd < 0)
- return ctx->obj_fd;
-
- ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL);
- if (!ctx->elf_fd) {
- ret = -EINVAL;
- goto out_fd;
- }
-
- if (elf_kind(ctx->elf_fd) != ELF_K_ELF) {
- ret = -EINVAL;
- goto out_fd;
- }
-
- if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) !=
- &ctx->elf_hdr) {
- ret = -EIO;
- goto out_elf;
- }
-
- ret = bpf_elf_check_ehdr(ctx);
- if (ret < 0)
- goto out_elf;
-
- ctx->sec_done = calloc(ctx->elf_hdr.e_shnum,
- sizeof(*(ctx->sec_done)));
- if (!ctx->sec_done) {
- ret = -ENOMEM;
- goto out_elf;
- }
-
- if (ctx->verbose && bpf_log_realloc(ctx)) {
- ret = -ENOMEM;
- goto out_free;
- }
-
- bpf_save_finfo(ctx);
- bpf_hash_init(ctx, CONFDIR "/bpf_pinning");
-
- return 0;
-out_free:
- free(ctx->sec_done);
-out_elf:
- elf_end(ctx->elf_fd);
-out_fd:
- close(ctx->obj_fd);
- return ret;
-}
-
-static int bpf_maps_count(struct bpf_elf_ctx *ctx)
-{
- int i, count = 0;
-
- for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
- if (!ctx->map_fds[i])
- break;
- count++;
- }
-
- return count;
-}
-
-static void bpf_maps_teardown(struct bpf_elf_ctx *ctx)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) {
- if (ctx->map_fds[i])
- close(ctx->map_fds[i]);
- }
-}
-
-static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure)
-{
- if (failure)
- bpf_maps_teardown(ctx);
-
- bpf_hash_destroy(ctx);
-
- free(ctx->sec_done);
- free(ctx->log);
-
- elf_end(ctx->elf_fd);
- close(ctx->obj_fd);
-}
-
-static struct bpf_elf_ctx __ctx;
-
-static int bpf_obj_open(const char *pathname, enum bpf_prog_type type,
- const char *section, bool verbose)
-{
- struct bpf_elf_ctx *ctx = &__ctx;
- int fd = 0, ret;
-
- ret = bpf_elf_ctx_init(ctx, pathname, type, verbose);
- if (ret < 0) {
- fprintf(stderr, "Cannot initialize ELF context!\n");
- return ret;
- }
-
- ret = bpf_fetch_ancillary(ctx);
- if (ret < 0) {
- fprintf(stderr, "Error fetching ELF ancillary data!\n");
- goto out;
- }
-
- fd = bpf_fetch_prog_sec(ctx, section);
- if (fd < 0) {
- fprintf(stderr, "Error fetching program/map!\n");
- ret = fd;
- goto out;
- }
-
- ret = bpf_fill_prog_arrays(ctx);
- if (ret < 0)
- fprintf(stderr, "Error filling program arrays!\n");
-out:
- bpf_elf_ctx_destroy(ctx, ret < 0);
- if (ret < 0) {
- if (fd)
- close(fd);
- return ret;
- }
-
- return fd;
-}
-
-static int
-bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len,
- const struct bpf_map_data *aux, unsigned int entries)
-{
- struct bpf_map_set_msg msg = {
- .aux.uds_ver = BPF_SCM_AUX_VER,
- .aux.num_ent = entries,
- };
- int *cmsg_buf, min_fd;
- char *amsg_buf;
- int i;
-
- strncpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name));
- memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st));
-
- cmsg_buf = bpf_map_set_init(&msg, addr, addr_len);
- amsg_buf = (char *)msg.aux.ent;
-
- for (i = 0; i < entries; i += min_fd) {
- int ret;
-
- min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i);
- bpf_map_set_init_single(&msg, min_fd);
-
- memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd);
- memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd);
-
- ret = sendmsg(fd, &msg.hdr, 0);
- if (ret <= 0)
- return ret ? : -1;
- }
-
- return 0;
-}
-
-static int
-bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux,
- unsigned int entries)
-{
- struct bpf_map_set_msg msg;
- int *cmsg_buf, min_fd;
- char *amsg_buf, *mmsg_buf;
- unsigned int needed = 1;
- int i;
-
- cmsg_buf = bpf_map_set_init(&msg, NULL, 0);
- amsg_buf = (char *)msg.aux.ent;
- mmsg_buf = (char *)&msg.aux;
-
- for (i = 0; i < min(entries, needed); i += min_fd) {
- struct cmsghdr *cmsg;
- int ret;
-
- min_fd = min(entries, entries - i);
- bpf_map_set_init_single(&msg, min_fd);
-
- ret = recvmsg(fd, &msg.hdr, 0);
- if (ret <= 0)
- return ret ? : -1;
-
- cmsg = CMSG_FIRSTHDR(&msg.hdr);
- if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS)
- return -EINVAL;
- if (msg.hdr.msg_flags & MSG_CTRUNC)
- return -EIO;
- if (msg.aux.uds_ver != BPF_SCM_AUX_VER)
- return -ENOSYS;
-
- min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd);
- if (min_fd > entries || min_fd <= 0)
- return -EINVAL;
-
- memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd);
- memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd);
- memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent));
-
- needed = aux->num_ent;
- }
-
- return 0;
-}
-
-int bpf_send_map_fds(const char *path, const char *obj)
-{
- struct bpf_elf_ctx *ctx = &__ctx;
- struct sockaddr_un addr = { .sun_family = AF_UNIX };
- struct bpf_map_data bpf_aux = {
- .fds = ctx->map_fds,
- .ent = ctx->maps,
- .st = &ctx->stat,
- .obj = obj,
- };
- int fd, ret;
-
- fd = socket(AF_UNIX, SOCK_DGRAM, 0);
- if (fd < 0) {
- fprintf(stderr, "Cannot open socket: %s\n",
- strerror(errno));
- return -1;
- }
-
- strncpy(addr.sun_path, path, sizeof(addr.sun_path));
-
- ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr));
- if (ret < 0) {
- fprintf(stderr, "Cannot connect to %s: %s\n",
- path, strerror(errno));
- return -1;
- }
-
- ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux,
- bpf_maps_count(ctx));
- if (ret < 0)
- fprintf(stderr, "Cannot send fds to %s: %s\n",
- path, strerror(errno));
-
- bpf_maps_teardown(ctx);
- close(fd);
- return ret;
-}
-
-int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
- unsigned int entries)
-{
- struct sockaddr_un addr = { .sun_family = AF_UNIX };
- int fd, ret;
-
- fd = socket(AF_UNIX, SOCK_DGRAM, 0);
- if (fd < 0) {
- fprintf(stderr, "Cannot open socket: %s\n",
- strerror(errno));
- return -1;
- }
-
- strncpy(addr.sun_path, path, sizeof(addr.sun_path));
-
- ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
- if (ret < 0) {
- fprintf(stderr, "Cannot bind to socket: %s\n",
- strerror(errno));
- return -1;
- }
-
- ret = bpf_map_set_recv(fd, fds, aux, entries);
- if (ret < 0)
- fprintf(stderr, "Cannot recv fds from %s: %s\n",
- path, strerror(errno));
-
- unlink(addr.sun_path);
- close(fd);
- return ret;
-}
-#endif /* HAVE_ELF */
diff --git a/tc/tc_bpf.h b/tc/tc_bpf.h
deleted file mode 100644
index 30306de..0000000
--- a/tc/tc_bpf.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * tc_bpf.h BPF common code
- *
- * This program is free software; you can distribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Authors: Daniel Borkmann <dborkman@...hat.com>
- * Jiri Pirko <jiri@...nulli.us>
- */
-
-#ifndef _TC_BPF_H_
-#define _TC_BPF_H_ 1
-
-#include <linux/netlink.h>
-#include <linux/bpf.h>
-#include <linux/magic.h>
-
-#include "utils.h"
-#include "bpf_scm.h"
-
-enum {
- BPF_NLA_OPS_LEN = 0,
- BPF_NLA_OPS,
- BPF_NLA_FD,
- BPF_NLA_NAME,
- __BPF_NLA_MAX,
-};
-
-#define BPF_NLA_MAX __BPF_NLA_MAX
-
-#define BPF_ENV_UDS "TC_BPF_UDS"
-#define BPF_ENV_MNT "TC_BPF_MNT"
-
-#ifndef BPF_MAX_LOG
-# define BPF_MAX_LOG 4096
-#endif
-
-#ifndef BPF_FS_MAGIC
-# define BPF_FS_MAGIC 0xcafe4a11
-#endif
-
-#define BPF_DIR_MNT "/sys/fs/bpf"
-
-#define BPF_DIR_TC "tc"
-#define BPF_DIR_GLOBALS "globals"
-
-#ifndef TRACEFS_MAGIC
-# define TRACEFS_MAGIC 0x74726163
-#endif
-
-#define TRACE_DIR_MNT "/sys/kernel/tracing"
-
-int bpf_trace_pipe(void);
-const char *bpf_default_section(const enum bpf_prog_type type);
-
-int bpf_parse_common(int *ptr_argc, char ***ptr_argv, const int *nla_tbl,
- enum bpf_prog_type type, const char **ptr_object,
- const char **ptr_uds_name, struct nlmsghdr *n);
-int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv);
-
-void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
-
-#ifdef HAVE_ELF
-int bpf_send_map_fds(const char *path, const char *obj);
-int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux,
- unsigned int entries);
-#else
-static inline int bpf_send_map_fds(const char *path, const char *obj)
-{
- return 0;
-}
-
-static inline int bpf_recv_map_fds(const char *path, int *fds,
- struct bpf_map_aux *aux,
- unsigned int entries)
-{
- return -1;
-}
-#endif /* HAVE_ELF */
-#endif /* _TC_BPF_H_ */
--
1.9.3
Powered by blists - more mailing lists