linux-kernel - [PATCH 08/10] bpf samples: Add utils.[ch] for using BPF

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1450329794-161948-9-git-send-email-wangnan0@huawei.com>
Date:	Thu, 17 Dec 2015 05:23:12 +0000
From:	Wang Nan <wangnan0@...wei.com>
To:	<ast@...nel.org>, <agartrell@...com>, <acme@...hat.com>,
	<bblanco@...mgrid.com>, <daniel@...earbox.net>,
	<daniel.wagner@...-carit.de>, <davem@...emloft.net>,
	<mingo@...nel.org>, <jolsa@...nel.org>, <xiakaixu@...wei.com>,
	<holzheu@...ux.vnet.ibm.com>, <yang.shi@...aro.org>
CC:	<linux-kernel@...r.kernel.org>, <pi3orama@....com>,
	Wang Nan <wangnan0@...wei.com>
Subject: [PATCH 08/10] bpf samples: Add utils.[ch] for using BPF

We are going to uses libbpf to replace old libbpf.[ch] and
bpf_load.[ch]. This is the first patch of this work. In this patch,
several macros and helpers in libbpf.[ch] and bpf_load.[ch] are
merged into utils.[ch]. utils.[ch] utilizes libbpf in tools/lib to
deal with BPF related things. They would be compiled after Makefile
changes.

Signed-off-by: Wang Nan <wangnan0@...wei.com>
Cc: Alexei Starovoitov <ast@...nel.org>
Cc: Alex Gartrell <agartrell@...com>
Cc: Arnaldo Carvalho de Melo <acme@...hat.com>
Cc: Brenden Blanco <bblanco@...mgrid.com>
Cc: Daniel Borkmann <daniel@...earbox.net>
Cc: Daniel Wagner <daniel.wagner@...-carit.de>
Cc: David S. Miller <davem@...emloft.net>
Cc: Ingo Molnar <mingo@...nel.org>
Cc: Kaixu Xia <xiakaixu@...wei.com>
Cc: Michael Holzheu <holzheu@...ux.vnet.ibm.com>
Cc: Yang Shi <yang.shi@...aro.org>
---
 samples/bpf/include/linux/err.h |  56 ++++++++
 samples/bpf/utils.c             | 276 ++++++++++++++++++++++++++++++++++++++++
 samples/bpf/utils.h             | 217 +++++++++++++++++++++++++++++++
 3 files changed, 549 insertions(+)
 create mode 100644 samples/bpf/include/linux/err.h
 create mode 100644 samples/bpf/utils.c
 create mode 100644 samples/bpf/utils.h

diff --git a/samples/bpf/include/linux/err.h b/samples/bpf/include/linux/err.h
new file mode 100644
index 0000000..671b874
--- /dev/null
+++ b/samples/bpf/include/linux/err.h
@@ -0,0 +1,56 @@
+#ifndef __TOOLS_LINUX_ERR_H
+#define __TOOLS_LINUX_ERR_H
+
+#include <asm/errno.h>
+
+#ifndef __must_check
+# define __must_check
+#endif
+#ifndef __force
+# define __force
+#endif
+#ifndef unlikely
+# define unlikely(x) x
+#endif
+
+/*
+ * Original kernel header comment:
+ *
+ * Kernel pointers have redundant information, so we can use a
+ * scheme where we can return either an error code or a normal
+ * pointer with the same return value.
+ *
+ * This should be a per-architecture thing, to allow different
+ * error and pointer decisions.
+ *
+ * Userspace note:
+ * The same principle works for userspace, because 'error' pointers
+ * fall down to the unused hole far from user space, as described
+ * in Documentation/x86/x86_64/mm.txt for x86_64 arch:
+ *
+ * 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension
+ * ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+ *
+ * It should be the same case for other architectures, because
+ * this code is used in generic kernel code.
+ */
+#define MAX_ERRNO	4095
+
+#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
+
+static inline void * __must_check ERR_PTR(long error_)
+{
+	return (void *) error_;
+}
+
+static inline long __must_check PTR_ERR(__force const void *ptr)
+{
+	return (long) ptr;
+}
+
+static inline bool __must_check IS_ERR(__force const void *ptr)
+{
+	return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+#endif /* _LINUX_ERR_H */
diff --git a/samples/bpf/utils.c b/samples/bpf/utils.c
new file mode 100644
index 0000000..73262a9
--- /dev/null
+++ b/samples/bpf/utils.c
@@ -0,0 +1,276 @@
+/* eBPF mini library */
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <linux/unistd.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <string.h>
+#include <linux/netlink.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <linux/if_packet.h>
+#include <arpa/inet.h>
+#include <linux/perf_event.h>
+#include "utils.h"
+
+#define DEBUGFS "/sys/kernel/debug/tracing/"
+
+int open_raw_sock(const char *name)
+{
+	struct sockaddr_ll sll;
+	int sock;
+
+	sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
+	if (sock < 0) {
+		printf("cannot create raw socket\n");
+		return -1;
+	}
+
+	memset(&sll, 0, sizeof(sll));
+	sll.sll_family = AF_PACKET;
+	sll.sll_ifindex = if_nametoindex(name);
+	sll.sll_protocol = htons(ETH_P_ALL);
+	if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
+		printf("bind to %s: %s\n", name, strerror(errno));
+		close(sock);
+		return -1;
+	}
+
+	return sock;
+}
+
+void read_trace_pipe(void)
+{
+	int trace_fd;
+
+	trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+	if (trace_fd < 0)
+		return;
+
+	while (1) {
+		static char buf[4096];
+		ssize_t sz;
+
+		sz = read(trace_fd, buf, sizeof(buf));
+		if (sz > 0) {
+			buf[sz] = 0;
+			puts(buf);
+		}
+	}
+}
+
+int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
+		    int group_fd, unsigned long flags)
+{
+	return syscall(__NR_perf_event_open, attr, pid, cpu,
+		       group_fd, flags);
+}
+
+static int prog_load_prep(struct bpf_program *prog, int n,
+			  struct bpf_insn *insns, int insns_cnt,
+			  struct bpf_prog_prep_result *res)
+{
+	enum bpf_prog_type prog_type;
+	int is_socket, is_kprobe, is_kretprobe;
+	const char *event = bpf_program__title(prog, false);
+
+	LIBBPF_PTR_ASSERT(event, return -1);
+
+	is_socket = strncmp(event, "socket", 6) == 0;
+	is_kprobe = strncmp(event, "kprobe/", 7) == 0;
+	is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
+
+	if (is_socket) {
+		prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	} else if (is_kprobe || is_kretprobe) {
+		prog_type = BPF_PROG_TYPE_KPROBE;
+	} else {
+		fprintf(stderr, "Unknown event '%s'\n", event);
+		return -1;
+	}
+
+	LIBBPF_ASSERT(bpf_program__set_type(prog, prog_type), return -1);
+	res->new_insn_ptr = insns;
+	res->new_insn_cnt = insns_cnt;
+	return 0;
+}
+
+static int populate_prog_array(int map_fd, struct bpf_object *obj)
+{
+	struct bpf_program *prog;
+
+	if (map_fd < 0) {
+		fprintf(stderr, "Invalid map fd\n");
+		return -1;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		const char *event = bpf_program__title(prog, false);
+		int ind, prog_fd;
+		const char *ptr;
+
+		LIBBPF_PTR_ASSERT(event, return -1);
+		ptr = event + strlen(event) - 1;
+		while (isdigit(*ptr))
+			ptr--;
+		ptr++;
+		if (!isdigit(*ptr)) {
+			fprintf(stderr, "Invalid event: %s\n", event);
+			return -1;
+		}
+
+		ind = atoi(ptr);
+
+		__LIBBPF_ASSERT(prog_fd = bpf_program__nth_fd(prog, 0),
+				>= 0, return -1);
+		LIBBPF_ASSERT(bpf_map_update_elem(map_fd, &ind,
+						  &prog_fd, BPF_ANY),
+			      return -1);
+	}
+	return 0;
+}
+
+static int create_kprobes(int fd, const char *event, bool is_kprobe)
+{
+	char buf[256];
+	int efd, err, id;
+	struct perf_event_attr attr = {};
+
+	if (isdigit(event[0]))
+		return 0;
+
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+
+	snprintf(buf, sizeof(buf),
+		 "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
+		 is_kprobe ? 'p' : 'r', event, event);
+
+	err = system(buf);
+	if (err < 0) {
+		fprintf(stderr, "failed to create kprobe '%s' error '%s'\n",
+				event, strerror(errno));
+		return -1;
+	}
+
+	strcpy(buf, DEBUGFS);
+	strcat(buf, "events/kprobes/");
+	strcat(buf, event);
+	strcat(buf, "/id");
+
+	efd = open(buf, O_RDONLY, 0);
+	if (efd < 0) {
+		fprintf(stderr, "failed to open event %s\n", event);
+		return -1;
+	}
+
+	err = read(efd, buf, sizeof(buf));
+	if (err < 0 || err >= sizeof(buf)) {
+		fprintf(stderr, "read from '%s' failed '%s'\n",
+			event, strerror(errno));
+		return -1;
+	}
+
+	close(efd);
+
+	buf[err] = 0;
+	id = atoi(buf);
+	attr.config = id;
+
+	efd = perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
+	if (efd < 0) {
+		fprintf(stderr, "event %d fd %d err %s\n", id, efd,
+			strerror(errno));
+		return -1;
+	}
+
+	ioctl(efd, PERF_EVENT_IOC_ENABLE, 0);
+	ioctl(efd, PERF_EVENT_IOC_SET_BPF, fd);
+	return 0;
+}
+
+struct bpf_object *load_bpf_file(char *path)
+{
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	struct bpf_map *map;
+	int err;
+
+	/* clear all kprobes */
+	err = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
+	if (err)
+		fprintf(stderr, "WARNING: clear kprobe_events failed: %s\n", strerror(errno));
+
+	LIBBPF_PTR_ASSERT(obj = bpf_object__open(path), return NULL);
+
+	bpf_object__for_each_program(prog, obj)
+		LIBBPF_ASSERT(bpf_program__set_prep(prog, 1, prog_load_prep),
+			      goto errout);
+
+	LIBBPF_ASSERT(bpf_object__load(obj), goto errout);
+
+	bpf_map__for_each(map, obj) {
+		struct bpf_map_def def;
+
+		LIBBPF_ASSERT(bpf_map__get_def(map, &def), goto errout);
+		if (def.type == BPF_MAP_TYPE_PROG_ARRAY) {
+			if (populate_prog_array(bpf_map__get_fd(map), obj)) {
+				fprintf(stderr, "failed to populate program array\n");
+				goto errout;
+			}
+		}
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		const char *event = bpf_program__title(prog, false);
+		int fd, err;
+
+		LIBBPF_PTR_ASSERT(event, goto errout);
+		__LIBBPF_ASSERT(fd = bpf_program__nth_fd(prog, 0),
+				>= 0,
+				goto errout);
+
+		if (strncmp(event, "kprobe/", 7) == 0)
+			err = create_kprobes(fd, event + 7, true);
+		else if (strncmp(event, "kretprobe/", 10) == 0)
+			err = create_kprobes(fd, event + 10, false);
+
+		if (err) {
+			fprintf(stderr, "failed to create kprobes\n");
+			goto errout;
+		}
+	}
+
+	return obj;
+errout:
+	bpf_object__close(obj);
+	return NULL;
+}
+
+int get_prog_fd(struct bpf_object *obj, int idx)
+{
+	int i = 0;
+	struct bpf_program *prog;
+
+	bpf_object__for_each_program(prog, obj)
+		if (i++ == idx)
+			return bpf_program__nth_fd(prog, 0);
+	return -1;
+}
+
+int get_map_fd(struct bpf_object *obj, int idx)
+{
+	int i = 0;
+	struct bpf_map *map;
+
+	bpf_map__for_each(map, obj)
+		if (i++ == idx)
+			return bpf_map__get_fd(map);
+	return -1;
+}
diff --git a/samples/bpf/utils.h b/samples/bpf/utils.h
new file mode 100644
index 0000000..5962a68
--- /dev/null
+++ b/samples/bpf/utils.h
@@ -0,0 +1,217 @@
+#ifndef __SAMPELS_UTILS_H
+#define __SAMPELS_UTILS_H
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU | BPF_OP(OP) | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU | BPF_OP(OP) | BPF_K,		\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+#define BPF_MOV32_REG(DST, SRC)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_K,		\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM)					\
+	BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_LD | BPF_DW | BPF_IMM,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = (__u32) (IMM) }),			\
+	((struct bpf_insn) {					\
+		.code  = 0, /* zero is reserved opcode */	\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = ((__u64) (IMM)) >> 32 })
+
+#ifndef BPF_PSEUDO_MAP_FD
+# define BPF_PSEUDO_MAP_FD	1
+#endif
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD)				\
+	BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,	\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_OP(OP) | BPF_X,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_OP(OP) | BPF_K,		\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)			\
+	((struct bpf_insn) {					\
+		.code  = CODE,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN()						\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP | BPF_EXIT,			\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+#define __LIBBPF_ASSERT(stat, cond, ret) do {	\
+	char ___errbuf[256];			\
+	int ___err = stat;			\
+						\
+	if ((___err) cond)			\
+		break;				\
+	libbpf_strerror(___err, ___errbuf, sizeof(___errbuf));\
+	fprintf(stderr, "libbpf error: %s\n", ___errbuf);\
+	ret;					\
+} while(0)
+
+#define __LIBBPF_PTR_ASSERT(stat, cond, ret) do {	\
+	const void *___ptr = stat;			\
+							\
+	if (!IS_ERR(___ptr) && ___ptr)			\
+		break;					\
+	if (!___ptr)					\
+		___ptr = ERR_PTR(-EEXIST);		\
+	LIBBPF_ASSERT(PTR_ERR(___ptr), ret);		\
+} while(0)
+
+#define LIBBPF_ASSERT(stat, ret) __LIBBPF_ASSERT(stat, == 0, ret)
+#define LIBBPF_PTR_ASSERT(stat, ret) __LIBBPF_PTR_ASSERT(stat, == 0, ret)
+
+/* create RAW socket and bind to interface 'name' */
+int open_raw_sock(const char *name);
+void read_trace_pipe(void);
+
+struct perf_event_attr;
+int perf_event_open(struct perf_event_attr *attr, int pid, int cpu,
+		    int group_fd, unsigned long flags);
+
+int prog_load_prepare(struct bpf_program *prog, int n,
+		      struct bpf_insn *insns, int insns_cnt,
+		      struct bpf_prog_prep_result *res);
+
+struct bpf_object *load_bpf_file(char *path);
+int get_prog_fd(struct bpf_object *obj, int idx);
+int get_map_fd(struct bpf_object *obj, int idx);
+#endif
-- 
1.8.3.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/