[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1418608606-1569264-5-git-send-email-kafai@fb.com>
Date: Sun, 14 Dec 2014 17:56:45 -0800
From: Martin KaFai Lau <kafai@...com>
To: <netdev@...r.kernel.org>
CC: "David S. Miller" <davem@...emloft.net>,
Hannes Frederic Sowa <hannes@...essinduktion.org>,
Steven Rostedt <rostedt@...dmis.org>,
Lawrence Brakmo <brakmo@...com>, Josef Bacik <jbacik@...com>,
Kernel Team <Kernel-team@...com>
Subject: [RFC PATCH net-next 4/5] tcp: Introduce tcp_sk_trace and related structs.
The tcp_sk_trace and its related structs define what will be
collected and recorded to the tracing's ring_buffer by
the TCP tracer (in the following patch).
Signed-off-by: Martin KaFai Lau <kafai@...com>
---
include/linux/tcp.h | 4 +++
include/net/tcp_trace.h | 18 ++++++++++
include/uapi/linux/tcp_trace.h | 78 ++++++++++++++++++++++++++++++++++++++++++
kernel/trace/Kconfig | 11 ++++++
kernel/trace/Makefile | 1 +
kernel/trace/tcp_trace.c | 37 ++++++++++++++++++++
net/ipv4/tcp.c | 4 +++
7 files changed, 153 insertions(+)
create mode 100644 include/net/tcp_trace.h
create mode 100644 include/uapi/linux/tcp_trace.h
create mode 100644 kernel/trace/tcp_trace.c
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 67309ec..8d25cb3 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -315,6 +315,10 @@ struct tcp_sock {
* socket. Used to retransmit SYNACKs etc.
*/
struct request_sock *fastopen_rsk;
+
+#ifdef CONFIG_TCP_TRACE
+ struct tcp_sk_trace *trace;
+#endif
};
enum tsq_flags {
diff --git a/include/net/tcp_trace.h b/include/net/tcp_trace.h
new file mode 100644
index 0000000..f800cc7
--- /dev/null
+++ b/include/net/tcp_trace.h
@@ -0,0 +1,18 @@
+#ifndef TCP_TRACE_H
+#define TCP_TRACE_H
+
+struct sock;
+
+#ifdef CONFIG_TCP_TRACE
+
+void tcp_sk_trace_init(struct sock *sk);
+void tcp_sk_trace_destruct(struct sock *sk);
+
+#else /* CONFIG_TCP_TRACE */
+
+static inline void tcp_sk_trace_init(struct sock *sk) {}
+static inline void tcp_sk_trace_destruct(struct sock *sk) {}
+
+#endif
+
+#endif /* TCP_TRACE_H */
diff --git a/include/uapi/linux/tcp_trace.h b/include/uapi/linux/tcp_trace.h
new file mode 100644
index 0000000..4f91056
--- /dev/null
+++ b/include/uapi/linux/tcp_trace.h
@@ -0,0 +1,78 @@
+#ifndef UAPI_TCP_TRACE_H
+#define UAPI_TCP_TRACE_H
+
+#include <linux/kernel.h>
+
+#define TCP_TRACE_MAGIC 0x54435000
+#define TCP_TRACE_VERSION 0x01
+#define TCP_TRACE_MAGIC_VERSION (TCP_TRACE_MAGIC | TCP_TRACE_VERSION)
+
+enum tcp_trace_events {
+ TCP_TRACE_EVENT_ESTABLISHED,
+ TCP_TRACE_EVENT_PERIODIC, /* Periodic event every 2s */
+ TCP_TRACE_EVENT_RETRANS, /* Retrans (not in TCP_CA_Loss) */
+ TCP_TRACE_EVENT_RETRANS_LOSS, /* Retrans in TCP_CA_Loss */
+ TCP_TRACE_EVENT_CLOSE, /* Connection close */
+};
+
+struct tcp_stats {
+ /* outing packets */
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u64 data_octets_out;
+
+ /* retrans */
+ __u32 other_segs_retrans;
+ __u32 other_octets_retrans;
+ __u32 loss_segs_retrans;
+ __u32 loss_octets_retrans;
+
+ /* incoming packets */
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u64 data_octets_in;
+
+ /* RTT */
+ __u64 rtt_sample_us;
+ __u64 max_rtt_us;
+ __u64 min_rtt_us;
+ __u64 sum_rtt_us;
+ __u32 count_rtt;
+
+ /* RTO */
+ __u32 max_rto_ms;
+ __u32 min_rto_ms;
+
+ /* OOO or Loss */
+ __u32 dup_acks_in;
+ __u32 sacks_in;
+ __u32 sack_blks_in;
+ __u32 ooo_in;
+} __packed;
+
+struct tcp_trace {
+ __u32 magic;
+ __u8 event:7,
+ ipv6:1;
+ __u32 local_addr[4];
+ __u32 remote_addr[4];
+ __u16 local_port;
+ __u16 remote_port;
+} __packed;
+
+struct tcp_trace_basic {
+ struct tcp_trace event;
+ /* current values from tcp_sock */
+ __u32 snd_cwnd;
+ __u32 mss;
+ __u32 ssthresh;
+ __u64 srtt_us;
+ __u32 rto_ms;
+} __packed;
+
+struct tcp_trace_stats {
+ struct tcp_trace_basic basic;
+ struct tcp_stats stats;
+} __packed;
+
+#endif /* UAPI_TCP_TRACE_H */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index a5da09c..f30835c 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -599,6 +599,17 @@ config RING_BUFFER_STARTUP_TEST
If unsure, say N
+config TCP_TRACE
+ bool "TCP tracing"
+ depends on NET && INET
+ select DEBUG_FS
+ select TRACEPOINTS
+ select GENERIC_TRACER
+ help
+ This tracer collects per-flow statistics and events.
+
+ If unsure, say N.
+
endif # FTRACE
endif # TRACING_SUPPORT
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 67d6369..71d008a 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -65,5 +65,6 @@ obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
+obj-$(CONFIG_TCP_TRACE) += tcp_trace.o
libftrace-y := ftrace.o
diff --git a/kernel/trace/tcp_trace.c b/kernel/trace/tcp_trace.c
new file mode 100644
index 0000000..9d09fd0
--- /dev/null
+++ b/kernel/trace/tcp_trace.c
@@ -0,0 +1,37 @@
+#include <net/tcp_trace.h>
+#include <linux/tcp.h>
+#include <uapi/linux/tcp_trace.h>
+
+static bool tcp_trace_enabled __read_mostly;
+
+struct tcp_sk_trace {
+ struct tcp_stats stats;
+ unsigned long start_ts;
+ unsigned long last_ts;
+};
+
+void tcp_sk_trace_init(struct sock *sk)
+{
+ struct tcp_sk_trace *sktr;
+
+ tcp_sk(sk)->trace = NULL;
+ if (!tcp_trace_enabled)
+ return;
+
+ sktr = kzalloc(sizeof(*sktr), gfp_any());
+ if (unlikely(!sktr))
+ return;
+
+ tcp_sk(sk)->trace = sktr;
+ sk->sk_destruct = tcp_sock_destruct;
+
+ sktr->stats.min_rtt_us = U64_MAX;
+ sktr->stats.min_rto_ms = U32_MAX;
+
+ sktr->last_ts = sktr->start_ts = jiffies;
+}
+
+void tcp_sk_trace_destruct(struct sock *sk)
+{
+ kfree(tcp_sk(sk)->trace);
+}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3b887fa..41871c4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -275,6 +275,7 @@
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/sock.h>
+#include <net/tcp_trace.h>
#include <trace/events/tcp.h>
#include <asm/uaccess.h>
@@ -1904,6 +1905,7 @@ void tcp_set_state(struct sock *sk, int state)
case TCP_ESTABLISHED:
if (oldstate != TCP_ESTABLISHED) {
TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
+ tcp_sk_trace_init(sk);
trace_tcp_established(sk);
}
break;
@@ -2254,6 +2256,8 @@ EXPORT_SYMBOL(tcp_disconnect);
void tcp_sock_destruct(struct sock *sk)
{
+ tcp_sk_trace_destruct(sk);
+
inet_sock_destruct(sk);
kfree(inet_csk(sk)->icsk_accept_queue.fastopenq);
--
1.8.1
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists