lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1660117763-38322-1-git-send-email-liyonglong@chinatelecom.cn>
Date:   Wed, 10 Aug 2022 15:49:23 +0800
From:   Yonglong Li <liyonglong@...natelecom.cn>
To:     netdev@...r.kernel.org
Cc:     davem@...emloft.net, edumazet@...gle.com, ncardwell@...gle.com,
        ycheng@...gle.com, dsahern@...nel.org, kuba@...nel.org,
        pabeni@...hat.com, liyonglong@...natelecom.cn
Subject: [PATCH v2] tcp: adjust rcvbuff according copied rate of user space

every time data is copied to user space tcp_rcv_space_adjust is called.
current It adjust rcvbuff by the length of data copied to user space.
If the interval of user space copy data from socket is not stable, the
length of data copied to user space will not exactly show the speed of
copying data from rcvbuff.
so in tcp_rcv_space_adjust it is more reasonable to adjust rcvbuff by
copied rate (length of copied data/interval)instead of copied data len

I tested this patch in simulation environment by Mininet:
with 80~120ms RTT / 1% loss link, 100 runs
of (netperf -t TCP_STREAM -l 5), and got an average throughput
of 17715 Kbit instead of 17703 Kbit.
with 80~120ms RTT without loss link, 100 runs of (netperf -t
TCP_STREAM -l 5), and got an average throughput of 18272 Kbit
instead of 18248 Kbit.

Signed-off-by: Yonglong Li <liyonglong@...natelecom.cn>
---
 include/linux/tcp.h  |  1 +
 net/ipv4/tcp_input.c | 16 +++++++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index a9fbe22..18e091c 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -410,6 +410,7 @@ struct tcp_sock {
 		u32	space;
 		u32	seq;
 		u64	time;
+		u32	prior_rate;
 	} rcvq_space;
 
 /* TCP-specific MTU probe information. */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ab5f0ea..b97aa36 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -544,6 +544,7 @@ static void tcp_init_buffer_space(struct sock *sk)
 	tcp_mstamp_refresh(tp);
 	tp->rcvq_space.time = tp->tcp_mstamp;
 	tp->rcvq_space.seq = tp->copied_seq;
+	tp->rcvq_space.prior_rate = 0;
 
 	maxwin = tcp_full_space(sk);
 
@@ -701,6 +702,7 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
 void tcp_rcv_space_adjust(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	u64 pre_copied_rate, copied_rate;
 	u32 copied;
 	int time;
 
@@ -713,7 +715,14 @@ void tcp_rcv_space_adjust(struct sock *sk)
 
 	/* Number of bytes copied to user in last RTT */
 	copied = tp->copied_seq - tp->rcvq_space.seq;
-	if (copied <= tp->rcvq_space.space)
+	copied_rate = copied * USEC_PER_SEC;
+	do_div(copied_rate, time);
+	pre_copied_rate = tp->rcvq_space.prior_rate;
+	if (!tp->rcvq_space.prior_rate) {
+		pre_copied_rate = tp->rcvq_space.space * USEC_PER_SEC;
+		do_div(pre_copied_rate, time);
+	}
+	if (copied_rate <= pre_copied_rate || !pre_copied_rate)
 		goto new_measure;
 
 	/* A bit of theory :
@@ -736,8 +745,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
 		rcvwin = ((u64)copied << 1) + 16 * tp->advmss;
 
 		/* Accommodate for sender rate increase (eg. slow start) */
-		grow = rcvwin * (copied - tp->rcvq_space.space);
-		do_div(grow, tp->rcvq_space.space);
+		grow = rcvwin * (copied_rate - pre_copied_rate);
+		do_div(grow, pre_copied_rate);
 		rcvwin += (grow << 1);
 
 		rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
@@ -755,6 +764,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
 		}
 	}
 	tp->rcvq_space.space = copied;
+	tp->rcvq_space.prior_rate = copied_rate;
 
 new_measure:
 	tp->rcvq_space.seq = tp->copied_seq;
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ