[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190702161403.191066-2-sdf@google.com>
Date: Tue, 2 Jul 2019 09:13:56 -0700
From: Stanislav Fomichev <sdf@...gle.com>
To: netdev@...r.kernel.org, bpf@...r.kernel.org
Cc: davem@...emloft.net, ast@...nel.org, daniel@...earbox.net,
Stanislav Fomichev <sdf@...gle.com>,
Eric Dumazet <edumazet@...gle.com>,
Priyaranjan Jha <priyarjha@...gle.com>,
Yuchung Cheng <ycheng@...gle.com>,
Soheil Hassas Yeganeh <soheil@...gle.com>
Subject: [PATCH bpf-next v2 1/8] bpf: add BPF_CGROUP_SOCK_OPS callback that is
executed on every RTT
Performance impact should be minimal because it's under a new
BPF_SOCK_OPS_RTT_CB_FLAG flag that has to be explicitly enabled.
Suggested-by: Eric Dumazet <edumazet@...gle.com>
Cc: Eric Dumazet <edumazet@...gle.com>
Cc: Priyaranjan Jha <priyarjha@...gle.com>
Cc: Yuchung Cheng <ycheng@...gle.com>
Cc: Soheil Hassas Yeganeh <soheil@...gle.com>
Acked-by: Soheil Hassas Yeganeh <soheil@...gle.com>
Acked-by: Yuchung Cheng <ycheng@...gle.com>
Signed-off-by: Stanislav Fomichev <sdf@...gle.com>
---
include/net/tcp.h | 8 ++++++++
include/uapi/linux/bpf.h | 6 +++++-
net/ipv4/tcp_input.c | 4 ++++
3 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9d36cc88d043..e16d8a3fd3b4 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2221,6 +2221,14 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
}
+static inline void tcp_bpf_rtt(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTT_CB_FLAG))
+ tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL);
+}
+
#if IS_ENABLED(CONFIG_SMC)
extern struct static_key_false tcp_have_smc;
#endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index cffea1826a1f..9cdd0aaeba06 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1770,6 +1770,7 @@ union bpf_attr {
* * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
* * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
* * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT)
*
* Therefore, this function can be used to clear a callback flag by
* setting the appropriate bit to zero. e.g. to disable the RTO
@@ -3314,7 +3315,8 @@ struct bpf_sock_ops {
#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
-#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently
* supported cb flags
*/
@@ -3369,6 +3371,8 @@ enum {
BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after
* socket transition to LISTEN state.
*/
+ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT.
+ */
};
/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b71efeb0ae5b..c21e8a22fb3b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -778,6 +778,8 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->rttvar_us -= (tp->rttvar_us - tp->mdev_max_us) >> 2;
tp->rtt_seq = tp->snd_nxt;
tp->mdev_max_us = tcp_rto_min_us(sk);
+
+ tcp_bpf_rtt(sk);
}
} else {
/* no previous measure. */
@@ -786,6 +788,8 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
tp->rttvar_us = max(tp->mdev_us, tcp_rto_min_us(sk));
tp->mdev_max_us = tp->rttvar_us;
tp->rtt_seq = tp->snd_nxt;
+
+ tcp_bpf_rtt(sk);
}
tp->srtt_us = max(1U, srtt);
}
--
2.22.0.410.gd8fdbe21b5-goog
Powered by blists - more mailing lists