[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240611045830.67640-1-lulie@linux.alibaba.com>
Date: Tue, 11 Jun 2024 12:58:30 +0800
From: Philo Lu <lulie@...ux.alibaba.com>
To: netdev@...r.kernel.org
Cc: edumazet@...gle.com,
rostedt@...dmis.org,
mhiramat@...nel.org,
mathieu.desnoyers@...icios.com,
davem@...emloft.net,
dsahern@...nel.org,
kuba@...nel.org,
pabeni@...hat.com,
xuanzhuo@...ux.alibaba.com,
dust.li@...ux.alibaba.com
Subject: [PATCH net-next] tcp: Add tracepoint for rxtstamp coalescing
During tcp coalescence, rx timestamps of the former skb ("to" in
tcp_try_coalesce), will be lost. This may lead to inaccurate
timestamping results if skbs come out of order.
Here is an example.
Assume a message consists of 3 skbs, namely A, B, and C. And these skbs
are processed by tcp in the following order:
A -(1us)-> C -(1ms)-> B
If C is coalesced to B, the final rx timestamps of the message will be
those of C. That is, the timestamps show that we received the message
when C came (including hardware and software). However, we actually
received it 1ms later (when B came).
With the added tracepoint, we can recognize such cases and report them
if we want.
Signed-off-by: Philo Lu <lulie@...ux.alibaba.com>
---
include/trace/events/tcp.h | 61 ++++++++++++++++++++++++++++++++++++++
net/ipv4/tcp_input.c | 2 ++
2 files changed, 63 insertions(+)
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 49b5ee091cf6..c4219ca2bcf0 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -411,6 +411,67 @@ TRACE_EVENT(tcp_cong_state_set,
__entry->cong_state)
);
+/*
+ * When called, TCP_SKB_CB(from)->has_rxtstamp must be true, but TCP_SKB_CB(to)->has_rxtstamp may
+ * not. So has_rxtstamp is checked before reading timestamps of skb "to".
+ */
+TRACE_EVENT(tcp_rxtstamp_coalesce,
+
+ TP_PROTO(const struct sock *sk, const struct sk_buff *to, const struct sk_buff *from),
+
+ TP_ARGS(sk, to, from),
+
+ TP_STRUCT__entry(
+ __field(__u16, sport)
+ __field(__u16, dport)
+ __field(__u16, family)
+ __array(__u8, saddr, 4)
+ __array(__u8, daddr, 4)
+ __array(__u8, saddr_v6, 16)
+ __array(__u8, daddr_v6, 16)
+ __field(__u64, to_tstamp)
+ __field(__u64, to_hwtstamp)
+ __field(__u64, from_tstamp)
+ __field(__u64, from_hwtstamp)
+ ),
+
+ TP_fast_assign(
+ const struct inet_sock *inet = inet_sk(sk);
+ __be32 *p32;
+
+ __entry->sport = ntohs(inet->inet_sport);
+ __entry->dport = ntohs(inet->inet_dport);
+ __entry->family = sk->sk_family;
+
+ p32 = (__be32 *) __entry->saddr;
+ *p32 = inet->inet_saddr;
+
+ p32 = (__be32 *) __entry->daddr;
+ *p32 = inet->inet_daddr;
+
+ TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+ sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
+
+ if (TCP_SKB_CB(to)->has_rxtstamp) {
+ __entry->to_tstamp = to->tstamp;
+ __entry->to_hwtstamp = skb_shinfo(to)->hwtstamps.hwtstamp;
+ } else {
+ __entry->to_tstamp = 0;
+ __entry->to_hwtstamp = 0;
+ }
+
+ __entry->from_tstamp = from->tstamp;
+ __entry->from_hwtstamp = skb_shinfo(from)->hwtstamps.hwtstamp;
+ ),
+
+ TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c to_tstamp=%llu to_hwtstamp=%llu from_tstamp=%llu from_hwtstamp=%llu",
+ show_family_name(__entry->family),
+ __entry->sport, __entry->dport, __entry->saddr, __entry->daddr,
+ __entry->saddr_v6, __entry->daddr_v6,
+ __entry->to_tstamp, __entry->to_hwtstamp,
+ __entry->from_tstamp, __entry->from_hwtstamp)
+);
+
#endif /* _TRACE_TCP_H */
/* This part must be outside protection */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eb187450e4d7..7024c6ba20ae 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4827,6 +4827,8 @@ static bool tcp_try_coalesce(struct sock *sk,
TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
if (TCP_SKB_CB(from)->has_rxtstamp) {
+ trace_tcp_rxtstamp_coalesce(sk, to, from);
+
TCP_SKB_CB(to)->has_rxtstamp = true;
to->tstamp = from->tstamp;
skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
--
2.32.0.3.g01195cf9f
Powered by blists - more mailing lists