[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230329141354.516864-20-dhowells@redhat.com>
Date: Wed, 29 Mar 2023 15:13:25 +0100
From: David Howells <dhowells@...hat.com>
To: Matthew Wilcox <willy@...radead.org>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>
Cc: David Howells <dhowells@...hat.com>,
Al Viro <viro@...iv.linux.org.uk>,
Christoph Hellwig <hch@...radead.org>,
Jens Axboe <axboe@...nel.dk>, Jeff Layton <jlayton@...nel.org>,
Christian Brauner <brauner@...nel.org>,
Chuck Lever III <chuck.lever@...cle.com>,
Linus Torvalds <torvalds@...ux-foundation.org>,
netdev@...r.kernel.org, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-mm@...ck.org
Subject: [RFC PATCH v2 19/48] af_unix: Support MSG_SPLICE_PAGES
Make AF_UNIX sendmsg() support MSG_SPLICE_PAGES, splicing in pages from the
source iterator if given and if ITER_BVEC and copying the data in
otherwise.
This allows ->sendpage() to be replaced by something that can handle
multiple multipage folios in a single transaction.
Signed-off-by: David Howells <dhowells@...hat.com>
cc: "David S. Miller" <davem@...emloft.net>
cc: Eric Dumazet <edumazet@...gle.com>
cc: Jakub Kicinski <kuba@...nel.org>
cc: Paolo Abeni <pabeni@...hat.com>
cc: Jens Axboe <axboe@...nel.dk>
cc: Matthew Wilcox <willy@...radead.org>
cc: netdev@...r.kernel.org
---
net/unix/af_unix.c | 93 ++++++++++++++++++++++++++++++++++++++--------
1 file changed, 77 insertions(+), 16 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 347122c3575e..84a0d97f1aa4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2151,6 +2151,53 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other
}
#endif
+/*
+ * Extract pages from an iterator and add them to the socket buffer.
+ */
+static ssize_t unix_extract_bvec_to_skb(struct sk_buff *skb,
+ struct iov_iter *iter, ssize_t maxsize)
+{
+ struct page *pages[8], **ppages = pages;
+ unsigned int i, nr;
+ ssize_t ret = 0;
+
+ while (iter->count > 0) {
+ size_t off, len;
+
+ nr = min(MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags,
+ ARRAY_SIZE(pages));
+ if (nr == 0)
+ break;
+
+ len = iov_iter_extract_pages(iter, &ppages, maxsize, nr, 0, &off);
+ if (len <= 0) {
+ if (!ret)
+ ret = len ?: -EIO;
+ break;
+ }
+
+ i = 0;
+ do {
+ size_t part = min(PAGE_SIZE - off, len);
+
+ if (skb_append_pagefrags(skb, pages[i++], off, part) < 0) {
+ if (!ret)
+ ret = -EMSGSIZE;
+ goto out;
+ }
+ off = 0;
+ ret += part;
+ maxsize -= part;
+ len -= part;
+ } while (len > 0);
+ if (maxsize <= 0)
+ break;
+ }
+
+out:
+ return ret;
+}
+
static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
@@ -2194,19 +2241,25 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
while (sent < len) {
size = len - sent;
- /* Keep two messages in the pipe so it schedules better */
- size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
+ if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
+ skb = sock_alloc_send_pskb(sk, 0, 0,
+ msg->msg_flags & MSG_DONTWAIT,
+ &err, 0);
+ } else {
+ /* Keep two messages in the pipe so it schedules better */
+ size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
- /* allow fallback to order-0 allocations */
- size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
+ /* allow fallback to order-0 allocations */
+ size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
- data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
+ data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
- data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
+ data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
- skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
- msg->msg_flags & MSG_DONTWAIT, &err,
- get_order(UNIX_SKB_FRAGS_SZ));
+ skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
+ msg->msg_flags & MSG_DONTWAIT, &err,
+ get_order(UNIX_SKB_FRAGS_SZ));
+ }
if (!skb)
goto out_err;
@@ -2218,13 +2271,21 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
}
fds_sent = true;
- skb_put(skb, size - data_len);
- skb->data_len = data_len;
- skb->len = size;
- err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
- if (err) {
- kfree_skb(skb);
- goto out_err;
+ if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
+ size = unix_extract_bvec_to_skb(skb, &msg->msg_iter, size);
+ skb->data_len += size;
+ skb->len += size;
+ skb->truesize += size;
+ refcount_add(size, &sk->sk_wmem_alloc);
+ } else {
+ skb_put(skb, size - data_len);
+ skb->data_len = data_len;
+ skb->len = size;
+ err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
+ if (err) {
+ kfree_skb(skb);
+ goto out_err;
+ }
}
unix_state_lock(other);
Powered by blists - more mailing lists