[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20201202225349.935284-3-arjunroy.kdev@gmail.com>
Date: Wed, 2 Dec 2020 14:53:43 -0800
From: Arjun Roy <arjunroy.kdev@...il.com>
To: davem@...emloft.net, netdev@...r.kernel.org
Cc: arjunroy@...gle.com, edumazet@...gle.com, soheil@...gle.com
Subject: [net-next v3 2/8] net-tcp: Introduce tcp_recvmsg_locked().
From: Arjun Roy <arjunroy@...gle.com>
Refactor tcp_recvmsg() by splitting it into locked and unlocked
portions. Callers already holding the socket lock and not using
ERRQUEUE/cmsg/busy polling can simply call tcp_recvmsg_locked().
This is in preparation for a short-circuit copy performed by
TCP receive zerocopy for small (< PAGE_SIZE, or otherwise requested
by the user) reads.
Signed-off-by: Arjun Roy <arjunroy@...gle.com>
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
Signed-off-by: Soheil Hassas Yeganeh <soheil@...gle.com>
---
net/ipv4/tcp.c | 69 ++++++++++++++++++++++++++++----------------------
1 file changed, 39 insertions(+), 30 deletions(-)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 887c6e986927..232cb478bacd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2065,36 +2065,28 @@ static int tcp_inq_hint(struct sock *sk)
* Probably, code can be easily improved even more.
*/
-int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
- int flags, int *addr_len)
+static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
+ int nonblock, int flags,
+ struct scm_timestamping_internal *tss,
+ int *cmsg_flags)
{
struct tcp_sock *tp = tcp_sk(sk);
int copied = 0;
u32 peek_seq;
u32 *seq;
unsigned long used;
- int err, inq;
+ int err;
int target; /* Read at least this many bytes */
long timeo;
struct sk_buff *skb, *last;
u32 urg_hole = 0;
- struct scm_timestamping_internal tss;
- int cmsg_flags;
-
- if (unlikely(flags & MSG_ERRQUEUE))
- return inet_recv_error(sk, msg, len, addr_len);
-
- if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) &&
- (sk->sk_state == TCP_ESTABLISHED))
- sk_busy_loop(sk, nonblock);
-
- lock_sock(sk);
err = -ENOTCONN;
if (sk->sk_state == TCP_LISTEN)
goto out;
- cmsg_flags = tp->recvmsg_inq ? 1 : 0;
+ if (tp->recvmsg_inq)
+ *cmsg_flags = 1;
timeo = sock_rcvtimeo(sk, nonblock);
/* Urgent data needs to be handled specially. */
@@ -2274,8 +2266,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
}
if (TCP_SKB_CB(skb)->has_rxtstamp) {
- tcp_update_recv_tstamps(skb, &tss);
- cmsg_flags |= 2;
+ tcp_update_recv_tstamps(skb, tss);
+ *cmsg_flags |= 2;
}
if (used + offset < skb->len)
@@ -2301,22 +2293,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
/* Clean up data we have read: This will do ACK frames. */
tcp_cleanup_rbuf(sk, copied);
-
- release_sock(sk);
-
- if (cmsg_flags) {
- if (cmsg_flags & 2)
- tcp_recv_timestamp(msg, sk, &tss);
- if (cmsg_flags & 1) {
- inq = tcp_inq_hint(sk);
- put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
- }
- }
-
return copied;
out:
- release_sock(sk);
return err;
recv_urg:
@@ -2327,6 +2306,36 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
err = tcp_peek_sndq(sk, msg, len);
goto out;
}
+
+int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
+ int flags, int *addr_len)
+{
+ int cmsg_flags = 0, ret, inq;
+ struct scm_timestamping_internal tss;
+
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return inet_recv_error(sk, msg, len, addr_len);
+
+ if (sk_can_busy_loop(sk) &&
+ skb_queue_empty_lockless(&sk->sk_receive_queue) &&
+ sk->sk_state == TCP_ESTABLISHED)
+ sk_busy_loop(sk, nonblock);
+
+ lock_sock(sk);
+ ret = tcp_recvmsg_locked(sk, msg, len, nonblock, flags, &tss,
+ &cmsg_flags);
+ release_sock(sk);
+
+ if (cmsg_flags && ret >= 0) {
+ if (cmsg_flags & 2)
+ tcp_recv_timestamp(msg, sk, &tss);
+ if (cmsg_flags & 1) {
+ inq = tcp_inq_hint(sk);
+ put_cmsg(msg, SOL_TCP, TCP_CM_INQ, sizeof(inq), &inq);
+ }
+ }
+ return ret;
+}
EXPORT_SYMBOL(tcp_recvmsg);
void tcp_set_state(struct sock *sk, int state)
--
2.29.2.576.ga3fc446d84-goog
Powered by blists - more mailing lists