[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1448295986-14576-2-git-send-email-bro.devel+kernel@gmail.com>
Date: Mon, 23 Nov 2015 17:26:25 +0100
From: "Bendik Rønning Opstad" <bro.devel@...il.com>
To: "David S. Miller" <davem@...emloft.net>, <netdev@...r.kernel.org>
Cc: Yuchung Cheng <ycheng@...gle.com>,
Eric Dumazet <eric.dumazet@...il.com>,
Neal Cardwell <ncardwell@...gle.com>,
Andreas Petlund <apetlund@...ula.no>,
Carsten Griwodz <griff@...ula.no>,
Pål Halvorsen <paalh@...ula.no>,
Jonas Markussen <jonassm@....uio.no>,
Kristian Evensen <kristian.evensen@...il.com>,
Kenneth Klette Jonassen <kennetkl@....uio.no>,
Bendik Rønning Opstad
<bro.devel+kernel@...il.com>
Subject: [PATCH RFC v2 net-next 1/2] tcp: Add DPIFL thin stream detection mechanism
The existing mechanism for detecting thin streams (tcp_stream_is_thin)
is based on a static limit of less than 4 packets in flight. This treats
streams differently depending on the connections RTT, such that a stream
on a high RTT link may never be considered thin, whereas the same
application would produce a stream that would always be thin in a low RTT
scenario (e.g. data center).
By calculating a dynamic packets in flight limit (DPIFL), the thin stream
detection will be independent of the RTT and treat streams equally based
on the transmission pattern, i.e. the inter-transmission time (ITT).
Cc: Andreas Petlund <apetlund@...ula.no>
Cc: Carsten Griwodz <griff@...ula.no>
Cc: Pål Halvorsen <paalh@...ula.no>
Cc: Jonas Markussen <jonassm@....uio.no>
Cc: Kristian Evensen <kristian.evensen@...il.com>
Cc: Kenneth Klette Jonassen <kennetkl@....uio.no>
Signed-off-by: Bendik Rønning Opstad <bro.devel+kernel@...il.com>
---
Documentation/networking/ip-sysctl.txt | 8 ++++++++
include/net/tcp.h | 21 +++++++++++++++++++++
net/ipv4/sysctl_net_ipv4.c | 9 +++++++++
net/ipv4/tcp.c | 2 ++
4 files changed, 40 insertions(+)
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 2ea4c45..938ae73 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -700,6 +700,14 @@ tcp_thin_dupack - BOOLEAN
Documentation/networking/tcp-thin.txt
Default: 0
+tcp_thin_dpifl_itt_lower_bound - INTEGER
+ Controls the lower bound inter-transmission time (ITT) threshold
+ for when a stream is considered thin. The value is specified in
+ microseconds, and may not be lower than 10000 (10 ms). Based on
+ this threshold, a dynamic packets in flight limit (DPIFL) is
+ calculated, which is used to classify whether a stream is thin.
+ Default: 10000
+
tcp_limit_output_bytes - INTEGER
Controls TCP Small Queue limit per tcp socket.
TCP bulk sender tends to increase packets in flight until it
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4fc457b..deac96f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -215,6 +215,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
/* TCP thin-stream limits */
#define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */
+/* Lowest possible DPIFL lower bound ITT is 10 ms (10000 usec) */
+#define TCP_THIN_DPIFL_ITT_LOWER_BOUND_MIN 10000
/* TCP initial congestion window as per draft-hkchu-tcpm-initcwnd-01 */
#define TCP_INIT_CWND 10
@@ -274,6 +276,7 @@ extern int sysctl_tcp_workaround_signed_windows;
extern int sysctl_tcp_slow_start_after_idle;
extern int sysctl_tcp_thin_linear_timeouts;
extern int sysctl_tcp_thin_dupack;
+extern int sysctl_tcp_thin_dpifl_itt_lower_bound;
extern int sysctl_tcp_early_retrans;
extern int sysctl_tcp_limit_output_bytes;
extern int sysctl_tcp_challenge_ack_limit;
@@ -1631,6 +1634,24 @@ static inline bool tcp_stream_is_thin(struct tcp_sock *tp)
return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp);
}
+/**
+ * tcp_stream_is_thin_dpifl() - Tests if the stream is thin based on dynamic PIF
+ * limit
+ * @tp: the tcp_sock struct
+ *
+ * Return: true if current packets in flight (PIF) count is lower than
+ * the dynamic PIF limit, else false
+ */
+static inline bool tcp_stream_is_thin_dpifl(const struct tcp_sock *tp)
+{
+ /* Calculate the maximum allowed PIF limit by dividing the RTT by
+ * the minimum allowed inter-transmission time (ITT).
+ * Tests if PIF < RTT / ITT-lower-bound
+ */
+ return (u64) tcp_packets_in_flight(tp) *
+ sysctl_tcp_thin_dpifl_itt_lower_bound < (tp->srtt_us >> 3);
+}
+
/* /proc */
enum tcp_seq_states {
TCP_SEQ_STATE_LISTENING,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index a0bd7a5..5b12446 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -42,6 +42,7 @@ static int tcp_syn_retries_min = 1;
static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
static int ip_ping_group_range_min[] = { 0, 0 };
static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
+static int tcp_thin_dpifl_itt_lower_bound_min = TCP_THIN_DPIFL_ITT_LOWER_BOUND_MIN;
/* Update system visible IP port range */
static void set_local_port_range(struct net *net, int range[2])
@@ -709,6 +710,14 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "tcp_thin_dpifl_itt_lower_bound",
+ .data = &sysctl_tcp_thin_dpifl_itt_lower_bound,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .extra1 = &tcp_thin_dpifl_itt_lower_bound_min,
+ },
+ {
.procname = "tcp_early_retrans",
.data = &sysctl_tcp_early_retrans,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c172877..cb3354d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -287,6 +287,8 @@ int sysctl_tcp_min_tso_segs __read_mostly = 2;
int sysctl_tcp_autocorking __read_mostly = 1;
+int sysctl_tcp_thin_dpifl_itt_lower_bound __read_mostly = TCP_THIN_DPIFL_ITT_LOWER_BOUND_MIN;
+
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
--
1.9.1
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists