lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <C34620812FCF0F47BBC776FAEF35D3EE5F5C18EB@PRN-MBX01-2.TheFacebook.com>
Date:	Thu, 4 Feb 2016 05:41:02 +0000
From:	Martin Lau <kafai@...com>
To:	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
CC:	Ingo Molnar <mingo@...hat.com>,
	Masami Hiramatsu <masami.hiramatsu.pt@...achi.com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Alexei Starovoitov <alexei.starovoitov@...il.com>,
	Josef Bacik <jbacik@...com>, Kernel Team <Kernel-team@...com>
Subject: RE: [PATCH RESEND] tcp_estats: ebpf hacks

Please ignore. It is a mistake...
________________________________________
From: Martin KaFai Lau [kafai@...com]
Sent: Wednesday, February 03, 2016 9:39 PM
To: linux-kernel@...r.kernel.org
Cc: Ingo Molnar; Masami Hiramatsu; Steven Rostedt; Alexei Starovoitov; Josef Bacik; Kernel Team
Subject: [PATCH RESEND] tcp_estats: ebpf hacks

Signed-off-by: Martin KaFai Lau <kafai@...com>
---
 kernel/trace/bpf_trace.c     |  20 ++
 samples/Makefile             |   2 +-
 samples/bpf/Makefile         |  11 +-
 samples/bpf/bpf_helpers.h    |   4 +
 samples/bpf/bpf_load.c       |  44 +++--
 samples/bpf/tcp_trace.h      |  51 +++++
 samples/bpf/tcp_trace_kern.c | 454 +++++++++++++++++++++++++++++++++++++++++++
 samples/bpf/tcp_trace_user.c | 115 +++++++++++
 tools/net/Makefile           |   6 +-
 9 files changed, 689 insertions(+), 18 deletions(-)
 create mode 100644 samples/bpf/tcp_trace.h
 create mode 100644 samples/bpf/tcp_trace_kern.c
 create mode 100644 samples/bpf/tcp_trace_user.c

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 47febbe..977702e 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -68,6 +68,7 @@ static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
        void *unsafe_ptr = (void *) (long) r3;

        return probe_kernel_read(dst, unsafe_ptr, size);
+       /* return __bpf_probe_read_hack(dst, unsafe_ptr, size); */
 }

 static const struct bpf_func_proto bpf_probe_read_proto = {
@@ -79,6 +80,25 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
        .arg3_type      = ARG_ANYTHING,
 };

+static u64 bpf_probe_read_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+       u32 *dst = (u32 *) (long) r1;
+       int size = (int) r2;
+       u32 *unsafe_ptr = (void *) (long) r3;
+
+       *dst = *unsafe_ptr;
+       return probe_kernel_read(dst, unsafe_ptr, size);
+}
+
+static const struct bpf_func_proto bpf_probe_read_u32_proto = {
+       .func           = bpf_probe_read,
+       .gpl_only       = true,
+       .ret_type       = RET_VOID,
+       .arg1_type      = ARG_PTR_TO_STACK,
+       .arg2_type      = ARG_CONST_STACK_SIZE,
+       .arg3_type      = ARG_ANYTHING,
+};
+
 /*
  * limited trace_printk()
  * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
diff --git a/samples/Makefile b/samples/Makefile
index f00257b..fb87be5 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,4 +1,4 @@
 # Makefile for Linux samples code

 obj-$(CONFIG_SAMPLES)  += kobject/ kprobes/ trace_events/ livepatch/ \
-                          hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/
+                          hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ bpf/
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 97e5243..02885ae 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -14,6 +14,7 @@ hostprogs-y += tracex4
 hostprogs-y += tracex5
 hostprogs-y += trace_output
 hostprogs-y += lathist
+hostprogs-y += tcp_trace

 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
@@ -28,6 +29,7 @@ tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
 tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
 trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
 lathist-objs := bpf_load.o libbpf.o lathist_user.o
+tcp_trace-objs := bpf_load.o libbpf.o tcp_trace_user.o

 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -42,6 +44,7 @@ always += tracex5_kern.o
 always += trace_output_kern.o
 always += tcbpf1_kern.o
 always += lathist_kern.o
+always += tcp_trace_kern.o

 HOSTCFLAGS += -I$(objtree)/usr/include

@@ -56,14 +59,16 @@ HOSTLOADLIBES_tracex4 += -lelf -lrt
 HOSTLOADLIBES_tracex5 += -lelf
 HOSTLOADLIBES_trace_output += -lelf -lrt
 HOSTLOADLIBES_lathist += -lelf
+HOSTLOADLIBES_tcp_trace += -lelf

 # point this to your LLVM backend with bpf support
-LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
+LLC=/home/kafai/local/llvm-git-master/bin/llc
+CLANG=/home/kafai/local/llvm-git-master/bin/clang

 $(obj)/%.o: $(src)/%.c
-       clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
+       $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
                -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
                -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
-       clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
+       $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
                -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
                -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=asm -o $@.s
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index e84dd3c..df3f00e 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -33,6 +33,10 @@ static int (*bpf_get_current_comm)(void *buf, int buf_size) =
        (void *) BPF_FUNC_get_current_comm;
 static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
        (void *) BPF_FUNC_perf_event_output;
+static unsigned long long (*bpf_get_prandom_u32)(void) =
+       (void *) BPF_FUNC_get_prandom_u32;
+static unsigned long long (*bpf_probe_read_u32)(void *dst, int size, void *unsafe_ptr) =
+       (void *) BPF_FUNC_probe_read_u32;

 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index da86a8e..408e429 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -68,12 +68,17 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
                return -1;
        }

+       printf("%s:%d event=%s prog_cnt=%d\n", __FUNCTION__, __LINE__,
+              event, prog_cnt);
+
        fd = bpf_prog_load(prog_type, prog, size, license, kern_version);
        if (fd < 0) {
                printf("bpf_prog_load() err=%d\n%s", errno, bpf_log_buf);
                return -1;
        }

+       /* printf("bpf_prog_load() fd=%d\n%s", fd, bpf_log_buf); */
+
        prog_fd[prog_cnt++] = fd;

        if (is_socket) {
@@ -103,8 +108,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
                        return populate_prog_array(event, fd);

                snprintf(buf, sizeof(buf),
-                        "echo '%c:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
-                        is_kprobe ? 'p' : 'r', event, event);
+                        "echo '%c:%s%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
+                        is_kprobe ? 'p' : 'r', is_kprobe ? "" : "r", event, event);
                err = system(buf);
                if (err < 0) {
                        printf("failed to create kprobe '%s' error '%s'\n",
@@ -115,6 +120,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)

        strcpy(buf, DEBUGFS);
        strcat(buf, "events/kprobes/");
+       if (is_kretprobe)
+               strcat(buf, "r");
        strcat(buf, event);
        strcat(buf, "/id");

@@ -229,20 +236,28 @@ int load_bpf_file(char *path)
        Elf_Data *data, *data_prog, *symbols = NULL;
        char *shname, *shname_prog;

-       if (elf_version(EV_CURRENT) == EV_NONE)
+       if (elf_version(EV_CURRENT) == EV_NONE) {
+               printf("%s:%d\n", __FUNCTION__, __LINE__);
                return 1;
+       }

        fd = open(path, O_RDONLY, 0);
-       if (fd < 0)
+       if (fd < 0) {
+               printf("%s:%d\n", __FUNCTION__, __LINE__);
                return 1;
+       }

        elf = elf_begin(fd, ELF_C_READ, NULL);

-       if (!elf)
+       if (!elf) {
+               printf("%s:%d\n", __FUNCTION__, __LINE__);
                return 1;
+       }

-       if (gelf_getehdr(elf, &ehdr) != &ehdr)
+       if (gelf_getehdr(elf, &ehdr) != &ehdr) {
+               printf("%s:%d\n", __FUNCTION__, __LINE__);
                return 1;
+       }

        /* clear all kprobes */
        i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
@@ -271,8 +286,10 @@ int load_bpf_file(char *path)
                        memcpy(&kern_version, data->d_buf, sizeof(int));
                } else if (strcmp(shname, "maps") == 0) {
                        processed_sec[i] = true;
-                       if (load_maps(data->d_buf, data->d_size))
+                       if (load_maps(data->d_buf, data->d_size)) {
+                               printf("%s:%d\n", __FUNCTION__, __LINE__);
                                return 1;
+                       }
                } else if (shdr.sh_type == SHT_SYMTAB) {
                        symbols = data;
                }
@@ -280,7 +297,6 @@ int load_bpf_file(char *path)

        /* load programs that need map fixup (relocations) */
        for (i = 1; i < ehdr.e_shnum; i++) {
-
                if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
                        continue;
                if (shdr.sh_type == SHT_REL) {
@@ -290,6 +306,8 @@ int load_bpf_file(char *path)
                                    &shdr_prog, &data_prog))
                                continue;

+                       /* printf("%s:%d %s\n", __FUNCTION__, __LINE__, shname_prog); */
+
                        insns = (struct bpf_insn *) data_prog->d_buf;

                        processed_sec[shdr.sh_info] = true;
@@ -300,24 +318,28 @@ int load_bpf_file(char *path)

                        if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
                            memcmp(shname_prog, "kretprobe/", 10) == 0 ||
-                           memcmp(shname_prog, "socket", 6) == 0)
+                           memcmp(shname_prog, "socket", 6) == 0) {
+                               /* printf("%s:%d %s\n", __FUNCTION__, __LINE__, shname_prog); */
                                load_and_attach(shname_prog, insns, data_prog->d_size);
+                       }
                }
        }

        /* load programs that don't use maps */
        for (i = 1; i < ehdr.e_shnum; i++) {
-
                if (processed_sec[i])
                        continue;

                if (get_sec(elf, i, &ehdr, &shname, &shdr, &data))
                        continue;

+               /* printf("%s:%d %s\n", __FUNCTION__, __LINE__, shname); */
                if (memcmp(shname, "kprobe/", 7) == 0 ||
                    memcmp(shname, "kretprobe/", 10) == 0 ||
-                   memcmp(shname, "socket", 6) == 0)
+                   memcmp(shname, "socket", 6) == 0) {
+                       /* printf("%s:%d %s\n", __FUNCTION__, __LINE__, shname); */
                        load_and_attach(shname, data->d_buf, data->d_size);
+               }
        }

        close(fd);
diff --git a/samples/bpf/tcp_trace.h b/samples/bpf/tcp_trace.h
new file mode 100644
index 0000000..d6e7ea4
--- /dev/null
+++ b/samples/bpf/tcp_trace.h
@@ -0,0 +1,51 @@
+#ifndef __TCP_TRACE_H
+#define __TCP_TRACE_H
+
+/*
+struct tcp_trace_flow {
+       u32     dst[1];
+};
+*/
+
+struct tcp_trace_flow4 {
+       __be32  dst;
+};
+
+struct tcp_trace_flow6 {
+       __be32  dst0;
+       __be32  dst1;
+};
+
+struct tcp_estats {
+       u64     data_octets_out;
+       u32     data_segs_out;
+       u32     octets_retrans;
+       u32     fast_retrans;
+       u32     timeouts;
+
+       u32     data_segs_in;
+       u64     data_octets_in;
+       u32     segs_in;
+       u32     dup_acks_in;
+       /* u32  sacks_rcvd; */
+       /* u32  sack_blocks_rcvd */
+       u32     dup_acks_out;
+       u32     dup_ack_episodes;
+       u32     sum_octets_reordered;
+
+       /* u64  sndlim_state_ts; */
+       /* u64  sndlim_time[TCP_ESTATS_SNDLIM_NSTATS]; */
+       /* u64  sndlim_trans[TCP_ESTATS_SNDLIM_NSTATS]; */
+       /* u8   sndlim_state; */
+
+       /* u64  rtt_sample_us; */
+       /* u64  max_rtt_us; */
+       /* u64  min_rtt_us; */
+
+       u32     cong_signals;
+       u32     slow_start;
+       u32     cong_avoid;
+       u64     ts;
+};
+
+#endif
diff --git a/samples/bpf/tcp_trace_kern.c b/samples/bpf/tcp_trace_kern.c
new file mode 100644
index 0000000..fd4039f
--- /dev/null
+++ b/samples/bpf/tcp_trace_kern.c
@@ -0,0 +1,454 @@
+#include <linux/netdevice.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include <net/inet_sock.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
+#include "bpf_helpers.h"
+#include "tcp_trace.h"
+
+#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+
+#ifdef memset
+#undef memset
+#endif
+
+struct bpf_map_def SEC("maps") tcp_flow_map = {
+       .type = BPF_MAP_TYPE_HASH,
+       .key_size = sizeof(void *),
+       .value_size = sizeof(struct tcp_estats),
+       .max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") dst_rack_map4 = {
+       .type = BPF_MAP_TYPE_HASH,
+       .key_size = sizeof(struct tcp_trace_flow4),
+       .value_size = sizeof(struct tcp_estats),
+       .max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") dst_rack_map6 = {
+       .type = BPF_MAP_TYPE_HASH,
+       .key_size = sizeof(struct tcp_trace_flow6),
+       .value_size = sizeof(struct tcp_estats),
+       .max_entries = 10000,
+};
+
+struct tcphdr_flags {
+       union {
+               __u16 flags;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+               __u16   res1:4,
+                       doff:4,
+                       fin:1,
+                       syn:1,
+                       rst:1,
+                       psh:1,
+                       ack:1,
+                       urg:1,
+                       ece:1,
+                       cwr:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+               __u16   doff:4,
+                       res1:4,
+                       cwr:1,
+                       ece:1,
+                       urg:1,
+                       ack:1,
+                       psh:1,
+                       rst:1,
+                       syn:1,
+                       fin:1;
+#else
+#error "Adjust your <asm/byteorder.h> defines"
+#endif
+       } u;
+};
+
+static __always_inline unsigned char *__skb_transport_header(struct sk_buff *skb)
+{
+       return _(skb->head) + _(skb->transport_header);
+}
+
+static __always_inline struct tcphdr *__tcp_hdr(struct sk_buff *skb)
+{
+       return (struct tcphdr *)__skb_transport_header(skb);
+}
+
+static __always_inline struct tcphdr_flags  __tcp_hdr_flags(struct tcphdr *th)
+{
+       struct tcphdr_flags f;
+
+       f.u.flags = 0;
+
+       bpf_probe_read(&f.u.flags, sizeof(f.u.flags),
+                       &th->ack_seq + sizeof(th->ack_seq));
+       return f;
+}
+
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+static __always_inline unsigned char *__skb_end_pointer(struct sk_buff *skb)
+{
+       return _(skb->head) + _(skb->end);
+}
+
+static __always_inline unsigned int __skb_end_offset(struct sk_buff *skb)
+{
+       return _(skb->end);
+}
+#else
+static __always_inline unsigned char *__skb_end_pointer(struct sk_buff *skb)
+{
+       return _(skb->end);
+}
+
+static __always_inline unsigned int __skb_end_offset(struct sk_buff *skb)
+{
+       return _(skb->end) - _(skb->head);
+}
+#endif
+
+static __always_inline struct skb_shared_info *__skb_shinfo(struct sk_buff *skb)
+{
+       return (struct skb_shared_info *)(__skb_end_pointer(skb));
+}
+
+static __always_inline unsigned int skb_get_data_len(struct sk_buff *skb)
+{
+       return _(TCP_SKB_CB(skb)->end_seq) - _(TCP_SKB_CB(skb)->seq);
+}
+
+static __always_inline u8 inet_csk_get_ca_state(struct sock *sk)
+{
+       /* FIXME: it is really ugly. We need to find a better solution.
+        * How about a bpf helper to access some common sk bit fields?
+        */
+
+       u8 s;
+
+       bpf_probe_read(&s, sizeof(u8), &inet_csk(sk)->icsk_retransmits - 1);
+
+       return (s & 0x3F);
+}
+
+static __always_inline u32 __tcp_receive_window(struct tcp_sock *tp)
+{
+       s32 win = _(tp->rcv_wup) + _(tp->rcv_wnd) - _(tp->rcv_nxt);
+
+       if (win < 0)
+               win = 0;
+       return (u32) win;
+}
+
+static __always_inline bool __skb_queue_empty(struct sk_buff_head *list)
+{
+       return _(list->next) == (struct sk_buff *) list;
+}
+
+#if 0
+static __always_inline void tcp_trace_flow_by_dst_rack(struct sock *sk,
+                                                      struct tcp_trace_flow *ttf)
+{
+       unsigned short family = _(sk->sk_family);
+
+       memset(ttf, 0, sizeof(*ttf));
+
+       if (family == AF_INET) {
+               ttf->family = AF_INET;
+               ttf->dst[0] = _(inet_sk(sk)->inet_daddr);
+       } else {
+               ttf->family = AF_INET6;
+               ttf->dst[0] = _(sk->sk_v6_daddr.s6_addr32[0]);
+               ttf->dst[1] = _(sk->sk_v6_daddr.s6_addr32[1]);
+               ttf->dst[2] = _(sk->sk_v6_daddr.s6_addr32[2]);
+               ttf->dst[3] = _(sk->sk_v6_daddr.s6_addr32[3]);
+       }
+}
+
+static __always_inline void tcp_trace_flow_by_dst_rack(struct sock *sk,
+                                                      struct tcp_trace_flow *ttf)
+{
+       /* char fmt[] = "%x\n"; */
+
+       unsigned short family = _(sk->sk_family);
+
+       memset(ttf, 0, sizeof(*ttf));
+
+       if (family == AF_INET) {
+               ttf->dst[0] = 0xFFFFFFFF;
+               return;
+       }
+
+       /* ttf->dst[0] = bpf_get_prandom_u32() & 0x07FF; */
+       /* ttf->dst[0] = _(inet_sk(sk)->inet_daddr); */
+       ttf->family = family;
+       /* ttf->dst[0] = _(sk->sk_daddr); */
+       /* ttf->dst[0] = _(sk->sk_txhash); */
+       ttf->dst[0] = 0xFFEEFFEE;
+       /* bpf_probe_read_u32(&ttf->dst[0], sizeof(ttf->dst[0]), &inet_sk(sk)->inet_daddr); */
+       /* bpf_trace_printk(fmt, sizeof(fmt), ttf->dst[0]); */
+
+       /* ttf->dst[1] = 0; */
+}
+
+#endif
+
+static __always_inline struct tcp_estats *tcp_estats_get_by_dst_rack(struct sock *sk)
+{
+       struct tcp_estats *tpes = NULL;
+       unsigned short family;
+
+       family = _(sk->sk_family);
+       if (family == AF_INET) {
+               struct tcp_trace_flow4 ttf;
+
+               memset(&ttf, 0, sizeof(ttf));
+
+               /* bpf_probe_read_u32(&ttf.dst, sizeof(u32), &inet_sk(sk)->inet_daddr); */
+               ttf.dst = _(inet_sk(sk)->inet_daddr);
+
+               tpes = (struct tcp_estats *)bpf_map_lookup_elem(&dst_rack_map4, &ttf);
+
+               if (!tpes) {
+                       struct tcp_estats new_tpes;
+
+                       memset(&new_tpes, 0, sizeof(new_tpes));
+                       if (bpf_map_update_elem(&dst_rack_map4, &ttf, &new_tpes, 0))
+                               return NULL;
+                       else
+                               tpes = bpf_map_lookup_elem(&dst_rack_map4, &ttf);
+
+                       if (!tpes)
+                               return NULL;
+               }
+       } else if (family == AF_INET6) {
+
+               struct tcp_trace_flow6 ttf;
+
+               memset(&ttf, 0, sizeof(ttf));
+
+/*
+               bpf_probe_read_u32(&ttf.dst0, sizeof(u32), &sk->sk_v6_daddr.s6_addr32[0]);
+               bpf_probe_read_u32(&ttf.dst1, sizeof(u32), &sk->sk_v6_daddr.s6_addr32[0]);
+*/
+
+/* ttf.dst[1] = _(sk->sk_v6_daddr.s6_addr32[1]); */
+
+               tpes = (struct tcp_estats *)bpf_map_lookup_elem(&dst_rack_map6, &ttf);
+
+               if (!tpes) {
+                       struct tcp_estats new_tpes;
+
+                       memset(&new_tpes, 0, sizeof(new_tpes));
+                       if (bpf_map_update_elem(&dst_rack_map6, &ttf, &new_tpes, 0))
+                               return NULL;
+                       else
+                               tpes = bpf_map_lookup_elem(&dst_rack_map6, &ttf);
+
+                       if (!tpes)
+                               return NULL;
+               }
+       }
+
+       return tpes;
+}
+
+static __always_inline struct tcp_estats *tcp_estats_get_by_sk(struct sock *sk)
+{
+       struct tcp_estats *tpes;
+
+       if (!sk)
+               return NULL;
+
+       tpes = (struct tcp_estats *)bpf_map_lookup_elem(&tcp_flow_map, &sk);
+
+       if (!tpes)
+               return NULL;
+
+       return tpes;
+}
+
+static __always_inline struct tcp_estats *tcp_estats_get(struct sock *sk)
+{
+
+       return tcp_estats_get_by_dst_rack(sk);
+}
+
+SEC("kprobe/tcp_rcv_established")
+int trace_rcv_established(struct pt_regs *ctx)
+{
+       struct skb_shared_info *shinfo;
+       struct tcphdr_flags thflags;
+       struct tcp_estats *tpes;
+       unsigned int data_len;
+       struct sk_buff *skb;
+       struct tcp_sock *tp;
+       struct tcphdr *th;
+       struct sock *sk;
+
+       sk = (struct sock *) PT_REGS_PARM1(ctx);
+       skb = (struct sk_buff *) PT_REGS_PARM2(ctx);
+       th = __tcp_hdr(skb);
+
+       if (!sk || !skb)
+               return 0;
+
+#if 0
+       thflags = __tcp_hdr_flags(th);
+       if (_(skb->len) < thflags.u.doff << 2)
+               return 0;
+#endif
+
+       tpes = tcp_estats_get(sk);
+       if (!tpes)
+               return 0;
+
+       tpes->segs_in++;
+
+
+#if 0
+       shinfo = __skb_shinfo(skb);
+       tp = tcp_sk(sk);
+
+
+       data_len = skb_get_data_len(skb);
+       if (data_len) {
+               tpes->data_segs_in += max_t(u16, 1, _(shinfo->gso_segs));
+               tpes->data_octets_in += data_len;
+
+               /* OOO */
+               if (after(_(TCP_SKB_CB(skb)->seq), _(tp->rcv_nxt)) &&
+                   before(_(TCP_SKB_CB(skb)->seq),
+                          _(tp->rcv_nxt) + __tcp_receive_window(tp))) {
+                       tpes->dup_acks_out++;
+
+                       if (__skb_queue_empty(&tp->out_of_order_queue))
+                               tpes->dup_ack_episodes++;
+               }
+       } else {
+               /* Pure Ack */
+               if (_(TCP_SKB_CB(skb)->ack_seq) == _(tp->snd_una))
+                       tpes->dup_acks_in++;
+       }
+#endif
+
+#if 0
+       if (inet_csk_get_ca_state(sk) == TCP_CA_Disorder) {
+               u32 prior_snd_una = _(tcp_sk(sk)->snd_una);
+               u32 ack = _(TCP_SKB_CB(skb)->ack_seq);
+
+               if (after(ack, prior_snd_una))
+                       tpes->sum_octets_reordered += (ack - prior_snd_una);
+       }
+#endif
+
+       return 0;
+}
+
+SEC("kprobe/tcp_transmit_skb")
+int trace_transmit_skb(struct pt_regs *ctx)
+{
+       struct tcp_estats *tpes;
+       unsigned int data_len;
+       struct sk_buff *skb;
+       struct sock *sk;
+
+       sk = (struct sock *) PT_REGS_PARM1(ctx);
+       tpes = tcp_estats_get(sk);
+       if (!tpes)
+               return 0;
+
+       skb = (struct sk_buff *) PT_REGS_PARM2(ctx);
+
+       data_len = skb_get_data_len(skb);
+#if 0
+       if (unlikely(_(TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_FIN))
+               data_len--;
+#endif
+
+       if (data_len) {
+               tpes->data_segs_out += _(TCP_SKB_CB(skb)->tcp_gso_segs);
+               tpes->data_octets_out += data_len;
+       }
+
+       return 0;
+}
+
+SEC("kprobe/tcp_slow_start")
+int trace_slow_start(struct pt_regs *ctx)
+{
+       struct tcp_estats *tpes;
+       struct sock *sk;
+
+       sk = (struct sock *) PT_REGS_PARM1(ctx);
+       tpes = tcp_estats_get(sk);
+       if (!tpes)
+               return 0;
+
+       tpes->slow_start++;
+       return 0;
+}
+
+SEC("kprobe/tcp_cong_avoid_ai")
+int trace_cong_avoid_ai(struct pt_regs *ctx)
+{
+       struct tcp_estats *tpes;
+       struct sock *sk;
+
+       sk = (struct sock *) PT_REGS_PARM1(ctx);
+       tpes = tcp_estats_get(sk);
+       if (!tpes)
+               return 0;
+
+       tpes->cong_avoid++;
+       return 0;
+}
+
+SEC("kprobe/tcp_cwnd_reduction")
+int trace_cwnd_reduction(struct pt_regs *ctx)
+{
+       struct tcp_estats *tpes;
+       struct sock *sk;
+       int fast_rexmit;
+
+       sk = (struct sock *) PT_REGS_PARM1(ctx);
+       tpes = tcp_estats_get(sk);
+       if (!tpes)
+               return 0;
+
+       fast_rexmit = (int) PT_REGS_PARM3(ctx);
+       if (fast_rexmit)
+               tpes->fast_retrans++;
+
+       tpes->cong_signals++;
+       return 0;
+}
+
+SEC("kprobe/tcp_init_sock")
+int trace_init_sock(struct pt_regs *ctx)
+{
+       struct tcp_estats new_tpes;
+       struct sock *sk;
+
+       sk = (struct sock *) PT_REGS_PARM1(ctx);
+
+       memset(&new_tpes, 0, sizeof(new_tpes));
+       bpf_map_update_elem(&tcp_flow_map, &sk, &new_tpes, BPF_ANY);
+
+       return 0;
+}
+
+SEC("kprobe/tcp_v4_destroy_sock")
+int trace_destroy_sock(struct pt_regs *ctx)
+{
+       struct sock *sk;
+
+       sk = (struct sock *) PT_REGS_PARM1(ctx);
+       bpf_map_delete_elem(&tcp_flow_map, &sk);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tcp_trace_user.c b/samples/bpf/tcp_trace_user.c
new file mode 100644
index 0000000..c4d4752
--- /dev/null
+++ b/samples/bpf/tcp_trace_user.c
@@ -0,0 +1,115 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <linux/bpf.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+typedef uint64_t u64;
+typedef uint32_t u32;
+typedef uint8_t u8;
+#include "tcp_trace.h"
+
+static void log_ttf6(const struct tcp_trace_flow6 *ttf)
+{
+       char dst_ip[INET6_ADDRSTRLEN];
+       u32 dst[4];
+
+       dst[0] = ttf->dst0;
+       dst[1] = ttf->dst1;
+       dst[2] = 0;
+       dst[3] = 0;
+
+       inet_ntop(AF_INET6, dst, dst_ip, sizeof(dst_ip));
+
+       printf("family:%d dst: \"[%s]/%d:%d\"\n",
+              AF_INET6, dst_ip, 0, 0); /*ntohs(ttf->dport));*/
+}
+
+static void log_ttf4(const struct tcp_trace_flow4 *ttf)
+{
+       char dst_ip[INET6_ADDRSTRLEN];
+
+       inet_ntop(AF_INET, &ttf->dst, dst_ip, sizeof(dst_ip));
+
+       printf("family:%d dst: \"[%s]/%d:%d\"\n",
+              AF_INET, dst_ip, 0, 0); /*ntohs(ttf->dport));*/
+}
+
+static void log_stats(const struct tcp_estats *s)
+{
+       printf("\tslow_start:%u cong_avoid:%u cong_signals:%u "
+
+              "\tdata_segs_out:%u data_octets_out:%lu "
+              "\tdup_acks_out:%u dup_ack_episodes:%u sum_octets_reordered:%u "
+              "\tfast_retrans:%u octets_retrans:%u "
+              "\ttimeouts:%u\n"
+
+              "\tsegs_in:%u data_segs_in:%u data_octets_in:%lu dup_acks_in:%u\n",
+
+              s->slow_start, s->cong_avoid, s->cong_signals,
+
+              s->data_segs_out, s->data_octets_out,
+              s->dup_acks_out, s->dup_ack_episodes, s->sum_octets_reordered,
+              s->fast_retrans, s->octets_retrans,
+              s->timeouts,
+
+              s->segs_in, s->data_segs_in, s->data_octets_in, s->dup_acks_in);
+}
+
+static void tcp_estats_log6(const struct tcp_trace_flow6 *ttf,
+                           const struct tcp_estats *tpes)
+{
+       log_ttf6(ttf);
+       log_stats(tpes);
+}
+
+static void tcp_estats_log4(const struct tcp_trace_flow4 *ttf,
+                           const struct tcp_estats *tpes)
+{
+       log_ttf4(ttf);
+       log_stats(tpes);
+}
+
+int main(int ac, char **argv)
+{
+       struct tcp_trace_flow4 ttf4, next_ttf4;
+       struct tcp_trace_flow6 ttf6, next_ttf6;
+       struct tcp_estats tpes;
+       char filename[256];
+       void *sk, *next_sk;
+       unsigned int c;
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+       if (load_bpf_file(filename)) {
+               printf("%s", bpf_log_buf);
+               return 1;
+       }
+
+       while (1) {
+               memset(&ttf4, 0, sizeof(ttf4));
+               memset(&next_ttf4, 0, sizeof(next_ttf4));
+               c = 0;
+               while (bpf_get_next_key(map_fd[1], &ttf4, &next_ttf4) == 0) {
+                       if (!bpf_lookup_elem(map_fd[1], &next_ttf4, &tpes))
+                               tcp_estats_log4(&next_ttf4, &tpes);
+                       ttf4 = next_ttf4;
+                       c++;
+               }
+               memset(&ttf6, 0, sizeof(ttf6));
+               memset(&next_ttf6, 0, sizeof(next_ttf6));
+               while (bpf_get_next_key(map_fd[2], &ttf6, &next_ttf6) == 0) {
+                       if (!bpf_lookup_elem(map_fd[2], &next_ttf6, &tpes))
+                               tcp_estats_log6(&next_ttf6, &tpes);
+                       ttf6 = next_ttf6;
+                       c++;
+               }
+               printf("c=%u\n", c);
+               sleep(10);
+       }
+
+       return 0;
+}
diff --git a/tools/net/Makefile b/tools/net/Makefile
index ee577ea..2528d02 100644
--- a/tools/net/Makefile
+++ b/tools/net/Makefile
@@ -12,15 +12,15 @@ YACC = bison

 all : bpf_jit_disasm bpf_dbg bpf_asm

-bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm'
+bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm'  -I '../../include/uapi' -I '../../include'
 bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
 bpf_jit_disasm : bpf_jit_disasm.o

-bpf_dbg : CFLAGS = -Wall -O2
+bpf_dbg : CFLAGS = -Wall -O2 -I '../../include/uapi' -I '../../include'
 bpf_dbg : LDLIBS = -lreadline
 bpf_dbg : bpf_dbg.o

-bpf_asm : CFLAGS = -Wall -O2 -I.
+bpf_asm : CFLAGS = -Wall -O2 -I. -I '../../include/uapi' -I '../../include'
 bpf_asm : LDLIBS =
 bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
 bpf_exp.lex.o : bpf_exp.yacc.c
--
2.5.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ