lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <1454564373-3185774-1-git-send-email-kafai@fb.com>
Date:	Wed, 3 Feb 2016 21:39:33 -0800
From:	Martin KaFai Lau <kafai@...com>
To:	<linux-kernel@...r.kernel.org>
CC:	Ingo Molnar <mingo@...hat.com>,
	Masami Hiramatsu <masami.hiramatsu.pt@...achi.com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Alexei Starovoitov <alexei.starovoitov@...il.com>,
	Josef Bacik <jbacik@...com>, Kernel Team <kernel-team@...com>
Subject: [PATCH RESEND] tcp_estats: ebpf hacks

Signed-off-by: Martin KaFai Lau <kafai@...com>
---
 kernel/trace/bpf_trace.c     |  20 ++
 samples/Makefile             |   2 +-
 samples/bpf/Makefile         |  11 +-
 samples/bpf/bpf_helpers.h    |   4 +
 samples/bpf/bpf_load.c       |  44 +++--
 samples/bpf/tcp_trace.h      |  51 +++++
 samples/bpf/tcp_trace_kern.c | 454 +++++++++++++++++++++++++++++++++++++++++++
 samples/bpf/tcp_trace_user.c | 115 +++++++++++
 tools/net/Makefile           |   6 +-
 9 files changed, 689 insertions(+), 18 deletions(-)
 create mode 100644 samples/bpf/tcp_trace.h
 create mode 100644 samples/bpf/tcp_trace_kern.c
 create mode 100644 samples/bpf/tcp_trace_user.c

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 47febbe..977702e 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -68,6 +68,7 @@ static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	void *unsafe_ptr = (void *) (long) r3;
 
 	return probe_kernel_read(dst, unsafe_ptr, size);
+	/* return __bpf_probe_read_hack(dst, unsafe_ptr, size); */
 }
 
 static const struct bpf_func_proto bpf_probe_read_proto = {
@@ -79,6 +80,25 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+static u64 bpf_probe_read_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	u32 *dst = (u32 *) (long) r1;
+	int size = (int) r2;
+	u32 *unsafe_ptr = (void *) (long) r3;
+
+	*dst = *unsafe_ptr;
+	return probe_kernel_read(dst, unsafe_ptr, size);
+}
+
+static const struct bpf_func_proto bpf_probe_read_u32_proto = {
+	.func		= bpf_probe_read,
+	.gpl_only	= true,
+	.ret_type	= RET_VOID,
+	.arg1_type	= ARG_PTR_TO_STACK,
+	.arg2_type	= ARG_CONST_STACK_SIZE,
+	.arg3_type	= ARG_ANYTHING,
+};
+
 /*
  * limited trace_printk()
  * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
diff --git a/samples/Makefile b/samples/Makefile
index f00257b..fb87be5 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,4 +1,4 @@
 # Makefile for Linux samples code
 
 obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ trace_events/ livepatch/ \
-			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/
+			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ bpf/
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 97e5243..02885ae 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -14,6 +14,7 @@ hostprogs-y += tracex4
 hostprogs-y += tracex5
 hostprogs-y += trace_output
 hostprogs-y += lathist
+hostprogs-y += tcp_trace
 
 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
@@ -28,6 +29,7 @@ tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
 tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
 trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
 lathist-objs := bpf_load.o libbpf.o lathist_user.o
+tcp_trace-objs := bpf_load.o libbpf.o tcp_trace_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -42,6 +44,7 @@ always += tracex5_kern.o
 always += trace_output_kern.o
 always += tcbpf1_kern.o
 always += lathist_kern.o
+always += tcp_trace_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
@@ -56,14 +59,16 @@ HOSTLOADLIBES_tracex4 += -lelf -lrt
 HOSTLOADLIBES_tracex5 += -lelf
 HOSTLOADLIBES_trace_output += -lelf -lrt
 HOSTLOADLIBES_lathist += -lelf
+HOSTLOADLIBES_tcp_trace += -lelf
 
 # point this to your LLVM backend with bpf support
-LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
+LLC=/home/kafai/local/llvm-git-master/bin/llc
+CLANG=/home/kafai/local/llvm-git-master/bin/clang
 
 $(obj)/%.o: $(src)/%.c
-	clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
+	$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
 		-D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
 		-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
-	clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
+	$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
 		-D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
 		-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=asm -o $@.s
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index e84dd3c..df3f00e 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -33,6 +33,10 @@ static int (*bpf_get_current_comm)(void *buf, int buf_size) =
 	(void *) BPF_FUNC_get_current_comm;
 static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
 	(void *) BPF_FUNC_perf_event_output;
+static unsigned long long (*bpf_get_prandom_u32)(void) =
+	(void *) BPF_FUNC_get_prandom_u32;
+static unsigned long long (*bpf_probe_read_u32)(void *dst, int size, void *unsafe_ptr) =
+	(void *) BPF_FUNC_probe_read_u32;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index da86a8e..408e429 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -68,12 +68,17 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 		return -1;
 	}
 
+	printf("%s:%d event=%s prog_cnt=%d\n", __FUNCTION__, __LINE__,
+	       event, prog_cnt);
+
 	fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
 	if (fd < 0) {
 		printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
 		return -1;
 	}
 
+	/* printf("bpf_prog_load() fd=%d\n%s", fd, bpf_log_buf); */
+
 	prog_fd[prog_cnt++] = fd;
 
 	if (is_socket) {
@@ -103,8 +108,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 			return populate_prog_array(event, fd);
 
 		snprintf(buf, sizeof(buf),
-			 "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
-			 is_kprobe ? 'p' : 'r', event, event);
+			 "echo '%c:%s%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
+			 is_kprobe ? 'p' : 'r', is_kprobe ? "" : "r", event, event);
 		err = system(buf);
 		if (err < 0) {
 			printf("failed to create kprobe '%s' error '%s'\n",
@@ -115,6 +120,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 
 	strcpy(buf, DEBUGFS);
 	strcat(buf, "events/kprobes/");
+	if (is_kretprobe)
+		strcat(buf, "r");
 	strcat(buf, event);
 	strcat(buf, "/id");
 
@@ -229,20 +236,28 @@ int load_bpf_file(char *path)
 	Elf_Data *data, *data_prog, *symbols = NULL;
 	char *shname, *shname_prog;
 
-	if (elf_version(EV_CURRENT) == EV_NONE)
+	if (elf_version(EV_CURRENT) == EV_NONE) {
+		printf("%s:%d\n", __FUNCTION__, __LINE__);
 		return 1;
+	}
 
 	fd = open(path, O_RDONLY, 0);
-	if (fd < 0)
+	if (fd < 0) {
+		printf("%s:%d\n", __FUNCTION__, __LINE__);
 		return 1;
+	}
 
 	elf = elf_begin(fd, ELF_C_READ, NULL);
 
-	if (!elf)
+	if (!elf) {
+		printf("%s:%d\n", __FUNCTION__, __LINE__);
 		return 1;
+	}
 
-	if (gelf_getehdr(elf, &ehdr) != &ehdr)
+	if (gelf_getehdr(elf, &ehdr) != &ehdr) {
+		printf("%s:%d\n", __FUNCTION__, __LINE__);
 		return 1;
+	}
 
 	/* clear all kprobes */
 	i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
@@ -271,8 +286,10 @@ int load_bpf_file(char *path)
 			memcpy(&kern_version, data->d_buf, sizeof(int));
 		} else if (strcmp(shname, "maps") == 0) {
 			processed_sec[i] = true;
-			if (load_maps(data->d_buf, data->d_size))
+			if (load_maps(data->d_buf, data->d_size)) {
+				printf("%s:%d\n", __FUNCTION__, __LINE__);
 				return 1;
+			}
 		} else if (shdr.sh_type == SHT_SYMTAB) {
 			symbols = data;
 		}
@@ -280,7 +297,6 @@ int load_bpf_file(char *path)
 
 	/* load programs that need map fixup (relocations) */
 	for (i = 1; i < ehdr.e_shnum; i++) {
-
 		if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
 			continue;
 		if (shdr.sh_type == SHT_REL) {
@@ -290,6 +306,8 @@ int load_bpf_file(char *path)
 				    &shdr_prog, &data_prog))
 				continue;
 
+			/* printf("%s:%d %s\n", __FUNCTION__, __LINE__, shname_prog); */
+
 			insns = (struct bpf_insn *) data_prog->d_buf;
 
 			processed_sec[shdr.sh_info] = true;
@@ -300,24 +318,28 @@ int load_bpf_file(char *path)
 
 			if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
 			    memcmp(shname_prog, "kretprobe/", 10) == 0 ||
-			    memcmp(shname_prog, "socket", 6) == 0)
+			    memcmp(shname_prog, "socket", 6) == 0) {
+				/* printf("%s:%d %s\n", __FUNCTION__, __LINE__, shname_prog); */
 				load_and_attach(shname_prog, insns, data_prog->d_size);
+			}
 		}
 	}
 
 	/* load programs that don't use maps */
 	for (i = 1; i < ehdr.e_shnum; i++) {
-
 		if (processed_sec[i])
 			continue;
 
 		if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
 			continue;
 
+		/* printf("%s:%d %s\n", __FUNCTION__, __LINE__, shname); */
 		if (memcmp(shname, "kprobe/", 7) == 0 ||
 		    memcmp(shname, "kretprobe/", 10) == 0 ||
-		    memcmp(shname, "socket", 6) == 0)
+		    memcmp(shname, "socket", 6) == 0) {
+			/* printf("%s:%d %s\n", __FUNCTION__, __LINE__, shname); */
 			load_and_attach(shname, data->d_buf, data->d_size);
+		}
 	}
 
 	close(fd);
diff --git a/samples/bpf/tcp_trace.h b/samples/bpf/tcp_trace.h
new file mode 100644
index 0000000..d6e7ea4
--- /dev/null
+++ b/samples/bpf/tcp_trace.h
@@ -0,0 +1,51 @@
+#ifndef __TCP_TRACE_H
+#define __TCP_TRACE_H
+
+/*
+struct tcp_trace_flow {
+	u32	dst[1];
+};
+*/
+
+struct tcp_trace_flow4 {
+	__be32	dst;
+};
+
+struct tcp_trace_flow6 {
+	__be32	dst0;
+	__be32	dst1;
+};
+
+struct tcp_estats {
+	u64	data_octets_out;
+	u32	data_segs_out;
+	u32	octets_retrans;
+	u32	fast_retrans;
+	u32	timeouts;
+
+	u32	data_segs_in;
+	u64	data_octets_in;
+	u32	segs_in;
+	u32	dup_acks_in;
+	/* u32	sacks_rcvd; */
+	/* u32	sack_blocks_rcvd */
+	u32	dup_acks_out;
+	u32	dup_ack_episodes;
+	u32	sum_octets_reordered;
+
+	/* u64	sndlim_state_ts; */
+	/* u64	sndlim_time[TCP_ESTATS_SNDLIM_NSTATS]; */
+	/* u64	sndlim_trans[TCP_ESTATS_SNDLIM_NSTATS]; */
+	/* u8	sndlim_state; */
+
+	/* u64	rtt_sample_us; */
+	/* u64	max_rtt_us; */
+	/* u64	min_rtt_us; */
+
+	u32	cong_signals;
+	u32	slow_start;
+	u32	cong_avoid;
+	u64	ts;
+};
+
+#endif
diff --git a/samples/bpf/tcp_trace_kern.c b/samples/bpf/tcp_trace_kern.c
new file mode 100644
index 0000000..fd4039f
--- /dev/null
+++ b/samples/bpf/tcp_trace_kern.c
@@ -0,0 +1,454 @@
+#include <linux/netdevice.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include <net/inet_sock.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
+#include "bpf_helpers.h"
+#include "tcp_trace.h"
+
+#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+
+#ifdef memset
+#undef memset
+#endif
+
+struct bpf_map_def SEC("maps") tcp_flow_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(void *),
+	.value_size = sizeof(struct tcp_estats),
+	.max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") dst_rack_map4 = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct tcp_trace_flow4),
+	.value_size = sizeof(struct tcp_estats),
+	.max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") dst_rack_map6 = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct tcp_trace_flow6),
+	.value_size = sizeof(struct tcp_estats),
+	.max_entries = 10000,
+};
+
+struct tcphdr_flags {
+	union {
+		__u16 flags;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+		__u16	res1:4,
+			doff:4,
+			fin:1,
+			syn:1,
+			rst:1,
+			psh:1,
+			ack:1,
+			urg:1,
+			ece:1,
+			cwr:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+		__u16	doff:4,
+			res1:4,
+			cwr:1,
+			ece:1,
+			urg:1,
+			ack:1,
+			psh:1,
+			rst:1,
+			syn:1,
+			fin:1;
+#else
+#error	"Adjust your <asm/byteorder.h> defines"
+#endif
+	} u;
+};
+
+static __always_inline unsigned char *__skb_transport_header(struct sk_buff *skb)
+{
+	return _(skb->head) + _(skb->transport_header);
+}
+
+static __always_inline struct tcphdr *__tcp_hdr(struct sk_buff *skb)
+{
+	return (struct tcphdr *)__skb_transport_header(skb);
+}
+
+static __always_inline struct tcphdr_flags  __tcp_hdr_flags(struct tcphdr *th)
+{
+	struct tcphdr_flags f;
+
+	f.u.flags = 0;
+
+	bpf_probe_read(&f.u.flags, sizeof(f.u.flags),
+			&th->ack_seq + sizeof(th->ack_seq));
+	return f;
+}
+
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+static __always_inline unsigned char *__skb_end_pointer(struct sk_buff *skb)
+{
+	return _(skb->head) + _(skb->end);
+}
+
+static __always_inline unsigned int __skb_end_offset(struct sk_buff *skb)
+{
+	return _(skb->end);
+}
+#else
+static __always_inline unsigned char *__skb_end_pointer(struct sk_buff *skb)
+{
+	return _(skb->end);
+}
+
+static __always_inline unsigned int __skb_end_offset(struct sk_buff *skb)
+{
+	return _(skb->end) - _(skb->head);
+}
+#endif
+
+static __always_inline struct skb_shared_info *__skb_shinfo(struct sk_buff *skb)
+{
+	return (struct skb_shared_info *)(__skb_end_pointer(skb));
+}
+
+static __always_inline unsigned int skb_get_data_len(struct sk_buff *skb)
+{
+	return _(TCP_SKB_CB(skb)->end_seq) - _(TCP_SKB_CB(skb)->seq);
+}
+
+static __always_inline u8 inet_csk_get_ca_state(struct sock *sk)
+{
+	/* FIXME: it is really ugly. We need to find a better solution.
+	 * How about a bpf helper to access some common sk bit fields?
+	 */
+
+	u8 s;
+
+	bpf_probe_read(&s, sizeof(u8), &inet_csk(sk)->icsk_retransmits - 1);
+
+	return (s & 0x3F);
+}
+
+static __always_inline u32 __tcp_receive_window(struct tcp_sock *tp)
+{
+	s32 win = _(tp->rcv_wup) + _(tp->rcv_wnd) - _(tp->rcv_nxt);
+
+	if (win < 0)
+		win = 0;
+	return (u32) win;
+}
+
+static __always_inline bool __skb_queue_empty(struct sk_buff_head *list)
+{
+	return _(list->next) == (struct sk_buff *) list;
+}
+
+#if 0
+static __always_inline void tcp_trace_flow_by_dst_rack(struct sock *sk,
+						       struct tcp_trace_flow *ttf)
+{
+	unsigned short family = _(sk->sk_family);
+
+	memset(ttf, 0, sizeof(*ttf));
+
+	if (family == AF_INET) {
+		ttf->family = AF_INET;
+		ttf->dst[0] = _(inet_sk(sk)->inet_daddr);
+	} else {
+		ttf->family = AF_INET6;
+		ttf->dst[0] = _(sk->sk_v6_daddr.s6_addr32[0]);
+		ttf->dst[1] = _(sk->sk_v6_daddr.s6_addr32[1]);
+		ttf->dst[2] = _(sk->sk_v6_daddr.s6_addr32[2]);
+		ttf->dst[3] = _(sk->sk_v6_daddr.s6_addr32[3]);
+	}
+}
+
+static __always_inline void tcp_trace_flow_by_dst_rack(struct sock *sk,
+						       struct tcp_trace_flow *ttf)
+{
+	/* char fmt[] = "%x\n"; */
+
+	unsigned short family = _(sk->sk_family);
+
+	memset(ttf, 0, sizeof(*ttf));
+
+	if (family == AF_INET) {
+		ttf->dst[0] = 0xFFFFFFFF;
+		return;
+	}
+
+	/* ttf->dst[0] = bpf_get_prandom_u32() & 0x07FF; */
+	/* ttf->dst[0] = _(inet_sk(sk)->inet_daddr); */
+	ttf->family = family;
+	/* ttf->dst[0] = _(sk->sk_daddr); */
+	/* ttf->dst[0] = _(sk->sk_txhash); */
+	ttf->dst[0] = 0xFFEEFFEE;
+	/* bpf_probe_read_u32(&ttf->dst[0], sizeof(ttf->dst[0]), &inet_sk(sk)->inet_daddr); */
+	/* bpf_trace_printk(fmt, sizeof(fmt), ttf->dst[0]); */
+
+	/* ttf->dst[1] = 0; */
+}
+
+#endif
+
+static __always_inline struct tcp_estats *tcp_estats_get_by_dst_rack(struct sock *sk)
+{
+	struct tcp_estats *tpes = NULL;
+	unsigned short family;
+
+	family = _(sk->sk_family);
+	if (family == AF_INET) {
+		struct tcp_trace_flow4 ttf;
+
+		memset(&ttf, 0, sizeof(ttf));
+
+		/* bpf_probe_read_u32(&ttf.dst, sizeof(u32), &inet_sk(sk)->inet_daddr); */
+		ttf.dst = _(inet_sk(sk)->inet_daddr);
+
+		tpes = (struct tcp_estats *)bpf_map_lookup_elem(&dst_rack_map4, &ttf);
+
+		if (!tpes) {
+			struct tcp_estats new_tpes;
+
+			memset(&new_tpes, 0, sizeof(new_tpes));
+			if (bpf_map_update_elem(&dst_rack_map4, &ttf, &new_tpes, 0))
+				return NULL;
+			else
+				tpes = bpf_map_lookup_elem(&dst_rack_map4, &ttf);
+
+			if (!tpes)
+				return NULL;
+		}
+	} else if (family == AF_INET6) {
+
+		struct tcp_trace_flow6 ttf;
+
+		memset(&ttf, 0, sizeof(ttf));
+
+/*
+		bpf_probe_read_u32(&ttf.dst0, sizeof(u32), &sk->sk_v6_daddr.s6_addr32[0]);
+		bpf_probe_read_u32(&ttf.dst1, sizeof(u32), &sk->sk_v6_daddr.s6_addr32[0]);
+*/
+
+/* ttf.dst[1] = _(sk->sk_v6_daddr.s6_addr32[1]); */
+
+		tpes = (struct tcp_estats *)bpf_map_lookup_elem(&dst_rack_map6, &ttf);
+
+		if (!tpes) {
+			struct tcp_estats new_tpes;
+
+			memset(&new_tpes, 0, sizeof(new_tpes));
+			if (bpf_map_update_elem(&dst_rack_map6, &ttf, &new_tpes, 0))
+				return NULL;
+			else
+				tpes = bpf_map_lookup_elem(&dst_rack_map6, &ttf);
+
+			if (!tpes)
+				return NULL;
+		}
+	}
+
+	return tpes;
+}
+
+static __always_inline struct tcp_estats *tcp_estats_get_by_sk(struct sock *sk)
+{
+	struct tcp_estats *tpes;
+
+	if (!sk)
+		return NULL;
+
+	tpes = (struct tcp_estats *)bpf_map_lookup_elem(&tcp_flow_map, &sk);
+
+	if (!tpes)
+		return NULL;
+
+	return tpes;
+}
+
+static __always_inline struct tcp_estats *tcp_estats_get(struct sock *sk)
+{
+
+	return tcp_estats_get_by_dst_rack(sk);
+}
+
+SEC("kprobe/tcp_rcv_established")
+int trace_rcv_established(struct pt_regs *ctx)
+{
+	struct skb_shared_info *shinfo;
+	struct tcphdr_flags thflags;
+	struct tcp_estats *tpes;
+	unsigned int data_len;
+	struct sk_buff *skb;
+	struct tcp_sock *tp;
+	struct tcphdr *th;
+	struct sock *sk;
+
+	sk = (struct sock *) PT_REGS_PARM1(ctx);
+	skb = (struct sk_buff *) PT_REGS_PARM2(ctx);
+	th = __tcp_hdr(skb);
+
+	if (!sk || !skb)
+		return 0;
+
+#if 0
+	thflags = __tcp_hdr_flags(th);
+	if (_(skb->len) < thflags.u.doff << 2)
+		return 0;
+#endif
+
+	tpes = tcp_estats_get(sk);
+	if (!tpes)
+		return 0;
+
+	tpes->segs_in++;
+
+
+#if 0
+	shinfo = __skb_shinfo(skb);
+	tp = tcp_sk(sk);
+
+
+	data_len = skb_get_data_len(skb);
+	if (data_len) {
+		tpes->data_segs_in += max_t(u16, 1, _(shinfo->gso_segs));
+		tpes->data_octets_in += data_len;
+
+		/* OOO */
+		if (after(_(TCP_SKB_CB(skb)->seq), _(tp->rcv_nxt)) &&
+		    before(_(TCP_SKB_CB(skb)->seq),
+			   _(tp->rcv_nxt) + __tcp_receive_window(tp))) {
+			tpes->dup_acks_out++;
+
+			if (__skb_queue_empty(&tp->out_of_order_queue))
+			 	tpes->dup_ack_episodes++;
+		}
+	} else {
+		/* Pure Ack */
+		if (_(TCP_SKB_CB(skb)->ack_seq) == _(tp->snd_una))
+			tpes->dup_acks_in++;
+	}
+#endif
+
+#if 0
+	if (inet_csk_get_ca_state(sk) == TCP_CA_Disorder) {
+		u32 prior_snd_una = _(tcp_sk(sk)->snd_una);
+		u32 ack = _(TCP_SKB_CB(skb)->ack_seq);
+
+		if (after(ack, prior_snd_una))
+			tpes->sum_octets_reordered += (ack - prior_snd_una);
+	}
+#endif
+
+	return 0;
+}
+
+SEC("kprobe/tcp_transmit_skb")
+int trace_transmit_skb(struct pt_regs *ctx)
+{
+	struct tcp_estats *tpes;
+	unsigned int data_len;
+	struct sk_buff *skb;
+	struct sock *sk;
+
+	sk = (struct sock *) PT_REGS_PARM1(ctx);
+	tpes = tcp_estats_get(sk);
+	if (!tpes)
+		return 0;
+
+	skb = (struct sk_buff *) PT_REGS_PARM2(ctx);
+
+	data_len = skb_get_data_len(skb);
+#if 0
+	if (unlikely(_(TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_FIN))
+		data_len--;
+#endif
+
+	if (data_len) {
+		tpes->data_segs_out += _(TCP_SKB_CB(skb)->tcp_gso_segs);
+		tpes->data_octets_out += data_len;
+	}
+
+	return 0;
+}
+
+SEC("kprobe/tcp_slow_start")
+int trace_slow_start(struct pt_regs *ctx)
+{
+	struct tcp_estats *tpes;
+	struct sock *sk;
+
+	sk = (struct sock *) PT_REGS_PARM1(ctx);
+	tpes = tcp_estats_get(sk);
+	if (!tpes)
+		return 0;
+
+	tpes->slow_start++;
+	return 0;
+}
+
+SEC("kprobe/tcp_cong_avoid_ai")
+int trace_cong_avoid_ai(struct pt_regs *ctx)
+{
+	struct tcp_estats *tpes;
+	struct sock *sk;
+
+	sk = (struct sock *) PT_REGS_PARM1(ctx);
+	tpes = tcp_estats_get(sk);
+	if (!tpes)
+		return 0;
+
+	tpes->cong_avoid++;
+	return 0;
+}
+
+SEC("kprobe/tcp_cwnd_reduction")
+int trace_cwnd_reduction(struct pt_regs *ctx)
+{
+	struct tcp_estats *tpes;
+	struct sock *sk;
+	int fast_rexmit;
+
+	sk = (struct sock *) PT_REGS_PARM1(ctx);
+	tpes = tcp_estats_get(sk);
+	if (!tpes)
+		return 0;
+
+	fast_rexmit = (int) PT_REGS_PARM3(ctx);
+	if (fast_rexmit)
+		tpes->fast_retrans++;
+
+	tpes->cong_signals++;
+	return 0;
+}
+
+SEC("kprobe/tcp_init_sock")
+int trace_init_sock(struct pt_regs *ctx)
+{
+	struct tcp_estats new_tpes;
+	struct sock *sk;
+
+	sk = (struct sock *) PT_REGS_PARM1(ctx);
+
+	memset(&new_tpes, 0, sizeof(new_tpes));
+	bpf_map_update_elem(&tcp_flow_map, &sk, &new_tpes, BPF_ANY);
+
+	return 0;
+}
+
+SEC("kprobe/tcp_v4_destroy_sock")
+int trace_destroy_sock(struct pt_regs *ctx)
+{
+	struct sock *sk;
+
+	sk = (struct sock *) PT_REGS_PARM1(ctx);
+	bpf_map_delete_elem(&tcp_flow_map, &sk);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tcp_trace_user.c b/samples/bpf/tcp_trace_user.c
new file mode 100644
index 0000000..c4d4752
--- /dev/null
+++ b/samples/bpf/tcp_trace_user.c
@@ -0,0 +1,115 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <linux/bpf.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+typedef uint64_t u64;
+typedef uint32_t u32;
+typedef uint8_t u8;
+#include "tcp_trace.h"
+
+static void log_ttf6(const struct tcp_trace_flow6 *ttf)
+{
+	char dst_ip[INET6_ADDRSTRLEN];
+	u32 dst[4];
+
+	dst[0] = ttf->dst0;
+	dst[1] = ttf->dst1;
+	dst[2] = 0;
+	dst[3] = 0;
+
+	inet_ntop(AF_INET6, dst, dst_ip, sizeof(dst_ip));
+
+	printf("family:%d dst: \"[%s]/%d:%d\"\n",
+	       AF_INET6, dst_ip, 0, 0); /*ntohs(ttf->dport));*/
+}
+
+static void log_ttf4(const struct tcp_trace_flow4 *ttf)
+{
+	char dst_ip[INET6_ADDRSTRLEN];
+
+	inet_ntop(AF_INET, &ttf->dst, dst_ip, sizeof(dst_ip));
+
+	printf("family:%d dst: \"[%s]/%d:%d\"\n",
+	       AF_INET, dst_ip, 0, 0); /*ntohs(ttf->dport));*/
+}
+
+static void log_stats(const struct tcp_estats *s)
+{
+	printf("\tslow_start:%u cong_avoid:%u cong_signals:%u "
+
+	       "\tdata_segs_out:%u data_octets_out:%lu "
+	       "\tdup_acks_out:%u dup_ack_episodes:%u sum_octets_reordered:%u "
+	       "\tfast_retrans:%u octets_retrans:%u "
+	       "\ttimeouts:%u\n"
+
+	       "\tsegs_in:%u data_segs_in:%u data_octets_in:%lu dup_acks_in:%u\n",
+
+	       s->slow_start, s->cong_avoid, s->cong_signals,
+
+	       s->data_segs_out, s->data_octets_out,
+	       s->dup_acks_out, s->dup_ack_episodes, s->sum_octets_reordered,
+	       s->fast_retrans, s->octets_retrans,
+	       s->timeouts,
+
+	       s->segs_in, s->data_segs_in, s->data_octets_in, s->dup_acks_in);
+}
+
+static void tcp_estats_log6(const struct tcp_trace_flow6 *ttf,
+			    const struct tcp_estats *tpes)
+{
+	log_ttf6(ttf);
+	log_stats(tpes);
+}
+
+static void tcp_estats_log4(const struct tcp_trace_flow4 *ttf,
+			    const struct tcp_estats *tpes)
+{
+	log_ttf4(ttf);
+	log_stats(tpes);
+}
+
+int main(int ac, char **argv)
+{
+	struct tcp_trace_flow4 ttf4, next_ttf4;
+	struct tcp_trace_flow6 ttf6, next_ttf6;
+	struct tcp_estats tpes;
+	char filename[256];
+	void *sk, *next_sk;
+	unsigned int c;
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	while (1) {
+		memset(&ttf4, 0, sizeof(ttf4));
+		memset(&next_ttf4, 0, sizeof(next_ttf4));
+		c = 0;
+		while (bpf_get_next_key(map_fd[1], &ttf4, &next_ttf4) == 0) {
+			if (!bpf_lookup_elem(map_fd[1], &next_ttf4, &tpes))
+				tcp_estats_log4(&next_ttf4, &tpes);
+			ttf4 = next_ttf4;
+			c++;
+		}
+		memset(&ttf6, 0, sizeof(ttf6));
+		memset(&next_ttf6, 0, sizeof(next_ttf6));
+		while (bpf_get_next_key(map_fd[2], &ttf6, &next_ttf6) == 0) {
+			if (!bpf_lookup_elem(map_fd[2], &next_ttf6, &tpes))
+				tcp_estats_log6(&next_ttf6, &tpes);
+			ttf6 = next_ttf6;
+			c++;
+		}
+		printf("c=%u\n", c);
+		sleep(10);
+	}
+
+	return 0;
+}
diff --git a/tools/net/Makefile b/tools/net/Makefile
index ee577ea..2528d02 100644
--- a/tools/net/Makefile
+++ b/tools/net/Makefile
@@ -12,15 +12,15 @@ YACC = bison
 
 all : bpf_jit_disasm bpf_dbg bpf_asm
 
-bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm'
+bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm'  -I '../../include/uapi' -I '../../include'
 bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
 bpf_jit_disasm : bpf_jit_disasm.o
 
-bpf_dbg : CFLAGS = -Wall -O2
+bpf_dbg : CFLAGS = -Wall -O2 -I '../../include/uapi' -I '../../include'
 bpf_dbg : LDLIBS = -lreadline
 bpf_dbg : bpf_dbg.o
 
-bpf_asm : CFLAGS = -Wall -O2 -I.
+bpf_asm : CFLAGS = -Wall -O2 -I. -I '../../include/uapi' -I '../../include'
 bpf_asm : LDLIBS =
 bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
 bpf_exp.lex.o : bpf_exp.yacc.c
-- 
2.5.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ