[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210331023237.41094-13-xiyou.wangcong@gmail.com>
Date: Tue, 30 Mar 2021 19:32:33 -0700
From: Cong Wang <xiyou.wangcong@...il.com>
To: netdev@...r.kernel.org
Cc: bpf@...r.kernel.org, duanxiongchun@...edance.com,
wangdongdong.6@...edance.com, jiang.wang@...edance.com,
Cong Wang <cong.wang@...edance.com>,
John Fastabend <john.fastabend@...il.com>,
Daniel Borkmann <daniel@...earbox.net>,
Jakub Sitnicki <jakub@...udflare.com>,
Lorenz Bauer <lmb@...udflare.com>
Subject: [Patch bpf-next v8 12/16] skmsg: extract __tcp_bpf_recvmsg() and tcp_bpf_wait_data()
From: Cong Wang <cong.wang@...edance.com>
Although these two functions are only used by TCP, they are not
specific to TCP at all, both operate on skmsg and ingress_msg,
so fit in net/core/skmsg.c very well.
And we will need them for non-TCP, so rename and move them to
skmsg.c and export them to modules.
Cc: John Fastabend <john.fastabend@...il.com>
Cc: Daniel Borkmann <daniel@...earbox.net>
Cc: Jakub Sitnicki <jakub@...udflare.com>
Cc: Lorenz Bauer <lmb@...udflare.com>
Signed-off-by: Cong Wang <cong.wang@...edance.com>
---
include/linux/skmsg.h | 4 ++
include/net/tcp.h | 2 -
net/core/skmsg.c | 98 +++++++++++++++++++++++++++++++++++++++++
net/ipv4/tcp_bpf.c | 100 +-----------------------------------------
net/tls/tls_sw.c | 4 +-
5 files changed, 106 insertions(+), 102 deletions(-)
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 5e800ddc2dc6..f78e90a04a69 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -125,6 +125,10 @@ int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
struct sk_msg *msg, u32 bytes);
+int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
+ long timeo, int *err);
+int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ int len, int flags);
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
{
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2efa4e5ea23d..31b1696c62ba 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2209,8 +2209,6 @@ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
int flags);
-int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
- struct msghdr *msg, int len, int flags);
#endif /* CONFIG_NET_SOCK_MSG */
#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 9fc83f7cc1a0..92a83c02562a 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -399,6 +399,104 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
}
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
+int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
+ long timeo, int *err)
+{
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ int ret = 0;
+
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ return 1;
+
+ if (!timeo)
+ return ret;
+
+ add_wait_queue(sk_sleep(sk), &wait);
+ sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ ret = sk_wait_event(sk, &timeo,
+ !list_empty(&psock->ingress_msg) ||
+ !skb_queue_empty(&sk->sk_receive_queue), &wait);
+ sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(sk_msg_wait_data);
+
+/* Receive sk_msg from psock->ingress_msg to @msg. */
+int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+ int len, int flags)
+{
+ struct iov_iter *iter = &msg->msg_iter;
+ int peek = flags & MSG_PEEK;
+ struct sk_msg *msg_rx;
+ int i, copied = 0;
+
+ msg_rx = sk_psock_peek_msg(psock);
+ while (copied != len) {
+ struct scatterlist *sge;
+
+ if (unlikely(!msg_rx))
+ break;
+
+ i = msg_rx->sg.start;
+ do {
+ struct page *page;
+ int copy;
+
+ sge = sk_msg_elem(msg_rx, i);
+ copy = sge->length;
+ page = sg_page(sge);
+ if (copied + copy > len)
+ copy = len - copied;
+ copy = copy_page_to_iter(page, sge->offset, copy, iter);
+ if (!copy)
+ return copied ? copied : -EFAULT;
+
+ copied += copy;
+ if (likely(!peek)) {
+ sge->offset += copy;
+ sge->length -= copy;
+ if (!msg_rx->skb)
+ sk_mem_uncharge(sk, copy);
+ msg_rx->sg.size -= copy;
+
+ if (!sge->length) {
+ sk_msg_iter_var_next(i);
+ if (!msg_rx->skb)
+ put_page(page);
+ }
+ } else {
+ /* Lets not optimize peek case if copy_page_to_iter
+ * didn't copy the entire length lets just break.
+ */
+ if (copy != sge->length)
+ return copied;
+ sk_msg_iter_var_next(i);
+ }
+
+ if (copied == len)
+ break;
+ } while (i != msg_rx->sg.end);
+
+ if (unlikely(peek)) {
+ msg_rx = sk_psock_next_msg(psock, msg_rx);
+ if (!msg_rx)
+ break;
+ continue;
+ }
+
+ msg_rx->sg.start = i;
+ if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
+ msg_rx = sk_psock_dequeue_msg(psock);
+ kfree_sk_msg(msg_rx);
+ }
+ msg_rx = sk_psock_peek_msg(psock);
+ }
+
+ return copied;
+}
+EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
+
static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
struct sk_buff *skb)
{
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index ac8cfbaeacd2..3d622a0d0753 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -10,80 +10,6 @@
#include <net/inet_common.h>
#include <net/tls.h>
-int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
- struct msghdr *msg, int len, int flags)
-{
- struct iov_iter *iter = &msg->msg_iter;
- int peek = flags & MSG_PEEK;
- struct sk_msg *msg_rx;
- int i, copied = 0;
-
- msg_rx = sk_psock_peek_msg(psock);
- while (copied != len) {
- struct scatterlist *sge;
-
- if (unlikely(!msg_rx))
- break;
-
- i = msg_rx->sg.start;
- do {
- struct page *page;
- int copy;
-
- sge = sk_msg_elem(msg_rx, i);
- copy = sge->length;
- page = sg_page(sge);
- if (copied + copy > len)
- copy = len - copied;
- copy = copy_page_to_iter(page, sge->offset, copy, iter);
- if (!copy)
- return copied ? copied : -EFAULT;
-
- copied += copy;
- if (likely(!peek)) {
- sge->offset += copy;
- sge->length -= copy;
- if (!msg_rx->skb)
- sk_mem_uncharge(sk, copy);
- msg_rx->sg.size -= copy;
-
- if (!sge->length) {
- sk_msg_iter_var_next(i);
- if (!msg_rx->skb)
- put_page(page);
- }
- } else {
- /* Lets not optimize peek case if copy_page_to_iter
- * didn't copy the entire length lets just break.
- */
- if (copy != sge->length)
- return copied;
- sk_msg_iter_var_next(i);
- }
-
- if (copied == len)
- break;
- } while (i != msg_rx->sg.end);
-
- if (unlikely(peek)) {
- msg_rx = sk_psock_next_msg(psock, msg_rx);
- if (!msg_rx)
- break;
- continue;
- }
-
- msg_rx->sg.start = i;
- if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
- msg_rx = sk_psock_dequeue_msg(psock);
- kfree_sk_msg(msg_rx);
- }
- msg_rx = sk_psock_peek_msg(psock);
- }
-
- return copied;
-}
-EXPORT_SYMBOL_GPL(__tcp_bpf_recvmsg);
-
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg, u32 apply_bytes, int flags)
{
@@ -237,28 +163,6 @@ static bool tcp_bpf_stream_read(const struct sock *sk)
return !empty;
}
-static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
- int flags, long timeo, int *err)
-{
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- int ret = 0;
-
- if (sk->sk_shutdown & RCV_SHUTDOWN)
- return 1;
-
- if (!timeo)
- return ret;
-
- add_wait_queue(sk_sleep(sk), &wait);
- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- ret = sk_wait_event(sk, &timeo,
- !list_empty(&psock->ingress_msg) ||
- !skb_queue_empty(&sk->sk_receive_queue), &wait);
- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- remove_wait_queue(sk_sleep(sk), &wait);
- return ret;
-}
-
static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
@@ -278,13 +182,13 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
lock_sock(sk);
msg_bytes_ready:
- copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
+ copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
if (!copied) {
int data, err = 0;
long timeo;
timeo = sock_rcvtimeo(sk, nonblock);
- data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err);
+ data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
if (data) {
if (!sk_psock_queue_empty(psock))
goto msg_bytes_ready;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 01d933ae5f16..1dcb34dfd56b 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1789,8 +1789,8 @@ int tls_sw_recvmsg(struct sock *sk,
skb = tls_wait_data(sk, psock, flags, timeo, &err);
if (!skb) {
if (psock) {
- int ret = __tcp_bpf_recvmsg(sk, psock,
- msg, len, flags);
+ int ret = sk_msg_recvmsg(sk, psock, msg, len,
+ flags);
if (ret > 0) {
decrypted += ret;
--
2.25.1
Powered by blists - more mailing lists