[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230316152618.711970-13-dhowells@redhat.com>
Date: Thu, 16 Mar 2023 15:26:02 +0000
From: David Howells <dhowells@...hat.com>
To: Matthew Wilcox <willy@...radead.org>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>
Cc: David Howells <dhowells@...hat.com>,
Al Viro <viro@...iv.linux.org.uk>,
Christoph Hellwig <hch@...radead.org>,
Jens Axboe <axboe@...nel.dk>, Jeff Layton <jlayton@...nel.org>,
Christian Brauner <brauner@...nel.org>,
Linus Torvalds <torvalds@...ux-foundation.org>,
netdev@...r.kernel.org, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-mm@...ck.org
Subject: [RFC PATCH 12/28] af_unix: Support MSG_SPLICE_PAGES
Make AF_UNIX sendmsg() support MSG_SPLICE_PAGES, splicing in pages from the
source iterator if given and if ITER_BVEC and copying the data in
otherwise.
This allows ->sendpage() to be replaced by something that can handle
multiple multipage folios in a single transaction.
Signed-off-by: David Howells <dhowells@...hat.com>
cc: "David S. Miller" <davem@...emloft.net>
cc: Eric Dumazet <edumazet@...gle.com>
cc: Jakub Kicinski <kuba@...nel.org>
cc: Paolo Abeni <pabeni@...hat.com>
cc: Jens Axboe <axboe@...nel.dk>
cc: Matthew Wilcox <willy@...radead.org>
cc: netdev@...r.kernel.org
---
net/unix/af_unix.c | 84 +++++++++++++++++++++++++++++++++++++---------
1 file changed, 68 insertions(+), 16 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 347122c3575e..6f3454db9c53 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2151,6 +2151,44 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
}
#endif
+/*
+ * Extract pages from a BVEC-type iterator and add them to the socket buffer.
+ */
+static ssize_t unix_extract_bvec_to_skb(struct sk_buff *skb,
+ struct iov_iter *iter, ssize_t maxsize)
+{
+ const struct bio_vec *bv = iter->bvec;
+ unsigned long start = iter->iov_offset;
+ unsigned int i;
+ ssize_t ret = 0;
+
+ for (i = 0; i < iter->nr_segs; i++) {
+ size_t off, len;
+
+ len = bv[i].bv_len;
+ if (start >= len) {
+ start -= len;
+ continue;
+ }
+
+ len = min_t(size_t, maxsize, len - start);
+ off = bv[i].bv_offset + start;
+
+ if (skb_append_pagefrags(skb, bv->bv_page, off, len) < 0)
+ break;
+
+ ret += len;
+ maxsize -= len;
+ if (maxsize <= 0)
+ break;
+ start = 0;
+ }
+
+ if (ret > 0)
+ iov_iter_advance(iter, ret);
+ return ret;
+}
+
static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
@@ -2194,19 +2232,25 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
while (sent < len) {
size = len - sent;
- /* Keep two messages in the pipe so it schedules better */
- size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
+ if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
+ skb = sock_alloc_send_pskb(sk, 0, 0,
+ msg->msg_flags & MSG_DONTWAIT,
+ &err, 0);
+ } else {
+ /* Keep two messages in the pipe so it schedules better */
+ size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
- /* allow fallback to order-0 allocations */
- size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
+ /* allow fallback to order-0 allocations */
+ size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
- data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
+ data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
- data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
+ data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
- skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
- msg->msg_flags & MSG_DONTWAIT, &err,
- get_order(UNIX_SKB_FRAGS_SZ));
+ skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
+ msg->msg_flags & MSG_DONTWAIT, &err,
+ get_order(UNIX_SKB_FRAGS_SZ));
+ }
if (!skb)
goto out_err;
@@ -2218,13 +2262,21 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
}
fds_sent = true;
- skb_put(skb, size - data_len);
- skb->data_len = data_len;
- skb->len = size;
- err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
- if (err) {
- kfree_skb(skb);
- goto out_err;
+ if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
+ size = unix_extract_bvec_to_skb(skb, &msg->msg_iter, size);
+ skb->data_len += size;
+ skb->len += size;
+ skb->truesize += size;
+ refcount_add(size, &sk->sk_wmem_alloc);
+ } else {
+ skb_put(skb, size - data_len);
+ skb->data_len = data_len;
+ skb->len = size;
+ err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
+ if (err) {
+ kfree_skb(skb);
+ goto out_err;
+ }
}
unix_state_lock(other);
Powered by blists - more mailing lists