lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240611045830.67640-1-lulie@linux.alibaba.com>
Date: Tue, 11 Jun 2024 12:58:30 +0800
From: Philo Lu <lulie@...ux.alibaba.com>
To: netdev@...r.kernel.org
Cc: edumazet@...gle.com,
	rostedt@...dmis.org,
	mhiramat@...nel.org,
	mathieu.desnoyers@...icios.com,
	davem@...emloft.net,
	dsahern@...nel.org,
	kuba@...nel.org,
	pabeni@...hat.com,
	xuanzhuo@...ux.alibaba.com,
	dust.li@...ux.alibaba.com
Subject: [PATCH net-next] tcp: Add tracepoint for rxtstamp coalescing

During tcp coalescence, rx timestamps of the former skb ("to" in
tcp_try_coalesce), will be lost. This may lead to inaccurate
timestamping results if skbs come out of order.

Here is an example.
Assume a message consists of 3 skbs, namely A, B, and C. And these skbs
are processed by tcp in the following order:
A -(1us)-> C -(1ms)-> B
If C is coalesced to B, the final rx timestamps of the message will be
those of C. That is, the timestamps show that we received the message
when C came (including hardware and software). However, we actually
received it 1ms later (when B came).

With the added tracepoint, we can recognize such cases and report them
if we want.

Signed-off-by: Philo Lu <lulie@...ux.alibaba.com>
---
 include/trace/events/tcp.h | 61 ++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_input.c       |  2 ++
 2 files changed, 63 insertions(+)

diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 49b5ee091cf6..c4219ca2bcf0 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -411,6 +411,67 @@ TRACE_EVENT(tcp_cong_state_set,
 		  __entry->cong_state)
 );
 
+/*
+ * When called, TCP_SKB_CB(from)->has_rxtstamp must be true, but TCP_SKB_CB(to)->has_rxtstamp may
+ * not. So has_rxtstamp is checked before reading timestamps of skb "to".
+ */
+TRACE_EVENT(tcp_rxtstamp_coalesce,
+
+	TP_PROTO(const struct sock *sk, const struct sk_buff *to, const struct sk_buff *from),
+
+	TP_ARGS(sk, to, from),
+
+	TP_STRUCT__entry(
+		__field(__u16, sport)
+		__field(__u16, dport)
+		__field(__u16, family)
+		__array(__u8, saddr, 4)
+		__array(__u8, daddr, 4)
+		__array(__u8, saddr_v6, 16)
+		__array(__u8, daddr_v6, 16)
+		__field(__u64, to_tstamp)
+		__field(__u64, to_hwtstamp)
+		__field(__u64, from_tstamp)
+		__field(__u64, from_hwtstamp)
+	),
+
+	TP_fast_assign(
+		const struct inet_sock *inet = inet_sk(sk);
+		__be32 *p32;
+
+		__entry->sport = ntohs(inet->inet_sport);
+		__entry->dport = ntohs(inet->inet_dport);
+		__entry->family = sk->sk_family;
+
+		p32 = (__be32 *) __entry->saddr;
+		*p32 = inet->inet_saddr;
+
+		p32 = (__be32 *) __entry->daddr;
+		*p32 = inet->inet_daddr;
+
+		TP_STORE_ADDRS(__entry, inet->inet_saddr, inet->inet_daddr,
+			       sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
+
+		if (TCP_SKB_CB(to)->has_rxtstamp) {
+			__entry->to_tstamp = to->tstamp;
+			__entry->to_hwtstamp = skb_shinfo(to)->hwtstamps.hwtstamp;
+		} else {
+			__entry->to_tstamp = 0;
+			__entry->to_hwtstamp = 0;
+		}
+
+		__entry->from_tstamp = from->tstamp;
+		__entry->from_hwtstamp = skb_shinfo(from)->hwtstamps.hwtstamp;
+	),
+
+	TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c to_tstamp=%llu to_hwtstamp=%llu from_tstamp=%llu from_hwtstamp=%llu",
+		  show_family_name(__entry->family),
+		  __entry->sport, __entry->dport, __entry->saddr, __entry->daddr,
+		  __entry->saddr_v6, __entry->daddr_v6,
+		  __entry->to_tstamp, __entry->to_hwtstamp,
+		  __entry->from_tstamp, __entry->from_hwtstamp)
+);
+
 #endif /* _TRACE_TCP_H */
 
 /* This part must be outside protection */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eb187450e4d7..7024c6ba20ae 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4827,6 +4827,8 @@ static bool tcp_try_coalesce(struct sock *sk,
 	TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
 
 	if (TCP_SKB_CB(from)->has_rxtstamp) {
+		trace_tcp_rxtstamp_coalesce(sk, to, from);
+
 		TCP_SKB_CB(to)->has_rxtstamp = true;
 		to->tstamp = from->tstamp;
 		skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
-- 
2.32.0.3.g01195cf9f


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ