netdev - [bpf-next PATCH 3/3] bpf: add sample program to trace map events

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <152406545918.3465.14253635905960610284.stgit@localhost.localdomain>
Date:   Wed, 18 Apr 2018 17:30:59 +0200
From:   Sebastiano Miano <sebastiano.miano@...ito.it>
To:     netdev@...r.kernel.org, ast@...nel.org, daniel@...earbox.net
Cc:     mingo@...hat.com, rostedt@...dmis.org, brouer@...hat.com,
        fulvio.risso@...ito.it
Subject: [bpf-next PATCH 3/3] bpf: add sample program to trace map events

This patch adds a sample program, called trace_map_events,
that shows how to capture map events and filter them based on
the map id.

The program accepts a list of map IDs, via the -i command line
option, and filters all the map events related to those IDs (i.e.,
map_create/update/lookup/next_key).
If no IDs are specified, all map events are listed and no filtering
is performed.

Sample usage:

 # trace_map_events -i <map_id1> -i <map_id2> -i <map_id3> ...

Signed-off-by: Sebastiano Miano <sebastiano.miano@...ito.it>
---
 samples/bpf/Makefile                |    4 
 samples/bpf/trace_map_events_kern.c |  225 +++++++++++++++++++++++++
 samples/bpf/trace_map_events_user.c |  314 +++++++++++++++++++++++++++++++++++
 3 files changed, 543 insertions(+)
 create mode 100644 samples/bpf/trace_map_events_kern.c
 create mode 100644 samples/bpf/trace_map_events_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4d6a6ed..a7d52b6 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -15,6 +15,7 @@ hostprogs-y += tracex6
 hostprogs-y += tracex7
 hostprogs-y += test_probe_write_user
 hostprogs-y += trace_output
+hostprogs-y += trace_map_events
 hostprogs-y += lathist
 hostprogs-y += offwaketime
 hostprogs-y += spintest
@@ -65,6 +66,7 @@ tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o
 load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o
 test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o
 trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o
+trace_map_events-objs := bpf_load.o $(LIBBPF) trace_map_events_user.o
 lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o
 offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o
 spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o
@@ -111,6 +113,7 @@ always += tracex7_kern.o
 always += sock_flags_kern.o
 always += test_probe_write_user_kern.o
 always += trace_output_kern.o
+always += trace_map_events_kern.o
 always += tcbpf1_kern.o
 always += tcbpf2_kern.o
 always += tc_l2_redirect_kern.o
@@ -171,6 +174,7 @@ HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
 HOSTLOADLIBES_load_sock_ops += -lelf
 HOSTLOADLIBES_test_probe_write_user += -lelf
 HOSTLOADLIBES_trace_output += -lelf -lrt
+HOSTLOADLIBES_trace_map_events += -lelf -lrt
 HOSTLOADLIBES_lathist += -lelf
 HOSTLOADLIBES_offwaketime += -lelf
 HOSTLOADLIBES_spintest += -lelf
diff --git a/samples/bpf/trace_map_events_kern.c b/samples/bpf/trace_map_events_kern.c
new file mode 100644
index 0000000..f887b5b
--- /dev/null
+++ b/samples/bpf/trace_map_events_kern.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2018 Politecnico di Torino, Italy
+ *
+ * Author: Sebastiano Miano <sebastiano.miano@...ito.it>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+enum map_event_type {
+	MAP_CREATE = 0,
+	MAP_UPDATE = 1,
+	MAP_LOOKUP = 2,
+	MAP_NEXT_KEY = 3
+};
+
+struct map_event_data {
+	u32 map_id;
+	enum map_event_type evnt_type;
+	u32 map_type;
+};
+
+struct bpf_map_def SEC("maps") map_event_trace = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(u32),
+	.max_entries = 64,
+};
+
+struct bpf_map_def SEC("maps") filtered_ids = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(u32),
+	.max_entries = 64,
+};
+
+struct bpf_map_def SEC("maps") filter_events = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(bool),
+	.max_entries = 1,
+};
+
+/*
+ * Tracepoint format: /sys/kernel/debug/tracing/events/bpf/bpf_map_create/format
+ * Code in:                kernel/include/trace/events/bpf.h
+ */
+struct bpf_map_create_ctx {
+	u64 pad;		// First 8 bytes are not accessible by bpf code
+	u32 type;		// offset:8;	size:4;	signed:0;
+	u32 size_key;		// offset:12;	size:4;	signed:0;
+	u32 size_value;		// offset:16;	size:4;	signed:0;
+	u32 max_entries;	// offset:20;	size:4;	signed:0;
+	u32 flags;		// offset:24;	size:4;	signed:0;
+	int ufd;		// offset:28;	size:4;	signed:1;
+	u32 id;			// offset:32;	size:4;	signed:0;
+};
+
+SEC("tracepoint/bpf/bpf_map_create")
+int trace_bpf_map_create(struct bpf_map_create_ctx *ctx)
+{
+	struct map_event_data data;
+	int cpu = bpf_get_smp_processor_id();
+	bool *filter;
+	u32 key = 0, map_id = ctx->id;
+
+	filter = bpf_map_lookup_elem(&filter_events, &key);
+	if (!filter)
+		return 1;
+
+	if (!*filter)
+		goto send_event;
+
+	/*
+	 * If the map_id is not in the list of filtered
+	 * ids we immediately return
+	 */
+	if (!bpf_map_lookup_elem(&filtered_ids, &map_id))
+		return 0;
+
+send_event:
+	data.map_id = map_id;
+	data.evnt_type = MAP_CREATE;
+	data.map_type = ctx->type;
+
+	bpf_perf_event_output(ctx, &map_event_trace, cpu, &data, sizeof(data));
+	return 0;
+}
+
+/*
+ * Tracepoint:	/sys/kernel/debug/tracing/events/bpf/bpf_map_lookup_elem/format
+ * Tracepoint:	/sys/kernel/debug/tracing/events/bpf/bpf_map_update_elem/format
+ * Code in:          kernel/include/trace/events/bpf.h
+ */
+struct bpf_map_keyval_ctx {
+	u64 pad;		// First 8 bytes are not accessible by bpf code
+	u32 type;		// offset:8;	size:4;	signed:0;
+	u32 key_len;		// offset:12;	size:4;	signed:0;
+	u32 key;		// offset:16;	size:4;	signed:0;
+	bool key_trunc;		// offset:20;	size:1;	signed:0;
+	u32 val_len;		// offset:24;	size:4;	signed:0;
+	u32 val;		// offset:28;	size:4;	signed:0;
+	bool val_trunc;		// offset:32;	size:1;	signed:0;
+	int ufd;		// offset:36;	size:4;	signed:1;
+	u32 id;			// offset:40;	size:4;	signed:0;
+};
+
+SEC("tracepoint/bpf/bpf_map_lookup_elem")
+int trace_bpf_map_lookup_elem(struct bpf_map_keyval_ctx *ctx)
+{
+	struct map_event_data data;
+	int cpu = bpf_get_smp_processor_id();
+	bool *filter;
+	u32 key = 0, map_id = ctx->id;
+
+	filter = bpf_map_lookup_elem(&filter_events, &key);
+	if (!filter)
+		return 1;
+
+	if (!*filter)
+		goto send_event;
+
+	/*
+	 * If the map_id is not in the list of filtered
+	 * ids we immediately return
+	 */
+	if (!bpf_map_lookup_elem(&filtered_ids, &map_id))
+		return 0;
+
+send_event:
+	data.map_id = map_id;
+	data.evnt_type = MAP_LOOKUP;
+	data.map_type = ctx->type;
+
+	bpf_perf_event_output(ctx, &map_event_trace, cpu, &data, sizeof(data));
+	return 0;
+}
+
+SEC("tracepoint/bpf/bpf_map_update_elem")
+int trace_bpf_map_update_elem(struct bpf_map_keyval_ctx *ctx)
+{
+	struct map_event_data data;
+	int cpu = bpf_get_smp_processor_id();
+	bool *filter;
+	u32 key = 0, map_id = ctx->id;
+
+	filter = bpf_map_lookup_elem(&filter_events, &key);
+	if (!filter)
+		return 1;
+
+	if (!*filter)
+		goto send_event;
+
+	/*
+	 * If the map_id is not in the list of filtered
+	 * ids we immediately return
+	 */
+	if (!bpf_map_lookup_elem(&filtered_ids, &map_id))
+		return 0;
+
+send_event:
+	data.map_id = map_id;
+	data.evnt_type = MAP_UPDATE;
+	data.map_type = ctx->type;
+
+	bpf_perf_event_output(ctx, &map_event_trace, cpu, &data, sizeof(data));
+	return 0;
+}
+
+/*
+ * Tracepoint:	/sys/kernel/debug/tracing/events/bpf/bpf_map_next_key/format
+ * Code in:          kernel/include/trace/events/bpf.h
+ */
+struct bpf_map_next_key_ctx {
+	u64 pad;		// First 8 bytes are not accessible by bpf code
+	u32 type;		// offset:8;	size:4;	signed:0;
+	u32 key_len;		// offset:12;	size:4;	signed:0;
+	u32 key;		// offset:16;	size:4;	signed:0;
+	u32 nxt;		// offset:20;	size:4;	signed:0;
+	bool key_trunc;		// offset:24;	size:1;	signed:0;
+	bool key_null;		// offset:25;	size:1;	signed:0;
+	int ufd;		// offset:28;	size:4;	signed:1;
+	u32 id;			// offset:32;	size:4;	signed:0;
+};
+
+SEC("tracepoint/bpf/bpf_map_next_key")
+int trace_bpf_map_next_key(struct bpf_map_next_key_ctx *ctx)
+{
+	struct map_event_data data;
+	int cpu = bpf_get_smp_processor_id();
+	bool *filter;
+	u32 key = 0, map_id = ctx->id;
+
+	filter = bpf_map_lookup_elem(&filter_events, &key);
+	if (!filter)
+		return 1;
+
+	if (!*filter)
+		goto send_event;
+
+	/*
+	 * If the map_id is not in the list of filtered
+	 * ids we immediately return
+	 */
+	if (!bpf_map_lookup_elem(&filtered_ids, &map_id))
+		return 0;
+
+send_event:
+	data.map_id = map_id;
+	data.evnt_type = MAP_NEXT_KEY;
+	data.map_type = ctx->type;
+
+	bpf_perf_event_output(ctx, &map_event_trace, cpu, &data, sizeof(data));
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/trace_map_events_user.c b/samples/bpf/trace_map_events_user.c
new file mode 100644
index 0000000..bc7447e
--- /dev/null
+++ b/samples/bpf/trace_map_events_user.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2018 Politecnico di Torino, Italy
+ *
+ * Author: Sebastiano Miano <sebastiano.miano@...ito.it>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+static const char *__desc__ =
+"Sample program to trace map related events\n"
+"The -i option allows to set the id(s) of the map you are interested in.\n"
+"If no ID is specified, all map events are listed.\n";
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/resource.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <signal.h>
+#include <getopt.h>
+
+#include "libbpf.h"
+#include "bpf_load.h"
+#include "bpf_util.h"
+#include "perf-sys.h"
+
+#define MAX_FILTERED_IDS 64
+
+static int *perf_fd;
+
+int epoll_fd;
+int page_size;
+int page_cnt = 8;
+volatile struct perf_event_mmap_page **readers;
+
+typedef void (*event_cb)(void *data, int size);
+
+enum map_event_type {
+	MAP_CREATE = 0,
+	MAP_UPDATE = 1,
+	MAP_LOOKUP = 2,
+	MAP_NEXT_KEY = 3
+};
+
+static void usage(char *argv[])
+{
+	printf("\nDESCRIPTION:\n%s", __desc__);
+	printf("\n");
+	printf(" Usage: %s [-i map_id1] [-i map_id2] ...\n", argv[0]);
+	printf("\n");
+}
+
+static int perf_event_mmap(int fd, int cpu)
+{
+	void *base;
+	int mmap_size;
+
+	page_size = getpagesize();
+	mmap_size = page_size * (page_cnt + 1);
+
+	base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (base == MAP_FAILED) {
+		printf("mmap err\n");
+		return -1;
+	}
+
+	readers[cpu] = base;
+	return 0;
+}
+
+static void init_bpf_perf_event_on_cpu(int cpu)
+{
+	struct perf_event_attr attr = {
+		.sample_type = PERF_SAMPLE_RAW,
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_BPF_OUTPUT,
+		.sample_period = 1,
+		.wakeup_events = 1,
+	};
+	int key = cpu;
+
+	perf_fd[cpu] = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+
+	assert(perf_fd[cpu] >= 0);
+	assert(perf_event_mmap(perf_fd[cpu], cpu) >= 0);
+	assert(ioctl(perf_fd[cpu], PERF_EVENT_IOC_ENABLE, 0) >= 0);
+	assert(bpf_map_update_elem(map_fd[0], &key, &perf_fd[cpu], 0) == 0);
+
+	struct epoll_event e = { .events = EPOLLIN, .data.u32 = cpu };
+
+	assert(epoll_ctl(epoll_fd, EPOLL_CTL_ADD, perf_fd[cpu], &e) == 0);
+}
+
+static int perf_event_poll(int fd, int num_cpus, struct epoll_event *events)
+{
+	return epoll_wait(fd, events, num_cpus, -1);
+}
+
+struct perf_event_sample {
+	struct perf_event_header header;
+	__u32 size;
+	char data[];
+};
+
+static void perf_event_read(event_cb fn, __u32 index)
+{
+	__u64 data_tail = readers[index]->data_tail;
+	__u64 data_head = readers[index]->data_head;
+	__u64 buffer_size = page_cnt * page_size;
+	void *base, *begin, *end;
+	char buf[256];
+
+	asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
+	if (data_head == data_tail)
+		return;
+
+	base = ((char *)readers[index]) + page_size;
+
+	begin = base + data_tail % buffer_size;
+	end = base + data_head % buffer_size;
+
+	while (begin != end) {
+		struct perf_event_sample *e;
+
+		e = begin;
+		if (begin + e->header.size > base + buffer_size) {
+			long len = base + buffer_size - begin;
+
+			assert(len < e->header.size);
+			memcpy(buf, begin, len);
+			memcpy(buf + len, base, e->header.size - len);
+			e = (void *) buf;
+			begin = base + e->header.size - len;
+		} else if (begin + e->header.size == base + buffer_size) {
+			begin = base;
+		} else {
+			begin += e->header.size;
+		}
+
+		if (e->header.type == PERF_RECORD_SAMPLE) {
+			fn(e->data, e->size);
+		} else if (e->header.type == PERF_RECORD_LOST) {
+			struct {
+				struct perf_event_header header;
+				__u64 id;
+				__u64 lost;
+			} *lost = (void *) e;
+			printf("lost %lld events\n", lost->lost);
+		} else {
+			printf("unknown event type=%d size=%d\n",
+			       e->header.type, e->header.size);
+		}
+	}
+
+	__sync_synchronize(); /* smp_mb() */
+	readers[index]->data_tail = data_head;
+}
+
+static const char *get_event_type(enum map_event_type event)
+{
+	switch (event) {
+	case MAP_CREATE:
+		return "CREATE";
+	case MAP_LOOKUP:
+		return "LOOKUP";
+	case MAP_UPDATE:
+		return "UPDATE";
+	case MAP_NEXT_KEY:
+		return "NEXT_KEY";
+	}
+
+	return "UNKNOWN";
+}
+
+
+static void map_event_callback(void *data, int size)
+{
+	struct {
+		__u32 map_id;
+		enum map_event_type event_type;
+		__u32 map_type;
+	} *e = data;
+
+	printf("%s event for map id: %d and type: %d\n",
+	       get_event_type(e->event_type), e->map_id, e->map_type);
+}
+
+static bool init_filtered_ids_map(int num_ids, int *filtered_ids)
+{
+	int i, key, value;
+	bool filtering = false;
+	/*
+	 * I am going to put the IDs in the map. Only event related to those IDs
+	 * will be shown. The key indicates the ID of the map while the value
+	 * is not used and then is set to 0.
+	 */
+	for (i = 0; i < num_ids; i++) {
+		key = filtered_ids[i];
+		value = 0;
+		if (bpf_map_update_elem(map_fd[1], &key, &value, 0) != 0) {
+			fprintf(stderr,
+			"ERR: bpf_map_update_elem failed key:0x%X\n", key);
+		return false;
+		}
+	}
+
+	if (num_ids > 0)
+		filtering = true;
+
+	key = 0;
+	assert(bpf_map_update_elem(map_fd[2], &key, &filtering, BPF_ANY) == 0);
+	return true;
+}
+
+static bool init_perf_buffer_data_structures(int nr_cpus)
+{
+	int i;
+
+	perf_fd = malloc(sizeof(int) * nr_cpus);
+	assert(perf_fd);
+	readers = malloc(sizeof(*readers) * nr_cpus);
+	assert(readers);
+
+	epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+
+	for (i = 0; i < nr_cpus; i++) {
+		printf("Init bpf_perf_event for cpu:%d\n", i);
+		init_bpf_perf_event_on_cpu(i);
+	}
+
+	return true;
+}
+
+int main(int argc, char **argv)
+{
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	int i, cnt, opt, ret = EXIT_SUCCESS;
+	char bpf_obj_file[256];
+	int num_ids = 0, nr_cpus = bpf_num_possible_cpus();
+	int filtered_ids[MAX_FILTERED_IDS];
+
+	snprintf(bpf_obj_file, sizeof(bpf_obj_file), "%s_kern.o", argv[0]);
+
+	/* Parse commands line args */
+	while ((opt = getopt(argc, argv, "hi:")) != -1) {
+		switch (opt) {
+		case 'i':
+			if (num_ids == MAX_FILTERED_IDS) {
+				printf("Reached maximum number of IDs");
+				return EXIT_FAILURE;
+			}
+			i = atoi(optarg);
+			if (!i)
+				printf("ERROR - Invalid id %s", optarg);
+			else
+				filtered_ids[num_ids++] = i;
+			break;
+		case 'h':
+		default:
+			usage(argv);
+			return EXIT_FAILURE;
+		}
+	}
+
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
+		return EXIT_FAILURE;
+	}
+
+	if (load_bpf_file(bpf_obj_file)) {
+		printf("ERROR - bpf_log_buf: %s", bpf_log_buf);
+		return EXIT_FAILURE;
+	}
+
+	if (!prog_fd[0]) {
+		printf("ERROR - load_bpf_file: %s\n", strerror(errno));
+		return EXIT_FAILURE;
+	}
+
+	init_filtered_ids_map(num_ids, filtered_ids);
+	init_perf_buffer_data_structures(nr_cpus);
+
+	struct epoll_event *events = calloc(nr_cpus, sizeof(*events));
+
+	while (true) {
+		printf("Waiting for map events...\n");
+		cnt = perf_event_poll(epoll_fd, nr_cpus, events);
+		for (i = 0; i < cnt; i++)
+			perf_event_read(map_event_callback, events[i].data.u32);
+	}
+
+	free(perf_fd);
+	free(readers);
+	free(events);
+
+	return ret;
+}