[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <167391073019.2311931.11127613443740355536.stgit@warthog.procyon.org.uk>
Date: Mon, 16 Jan 2023 23:12:10 +0000
From: David Howells <dhowells@...hat.com>
To: Al Viro <viro@...iv.linux.org.uk>
Cc: "David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, netdev@...r.kernel.org,
dhowells@...hat.com, Christoph Hellwig <hch@...radead.org>,
Matthew Wilcox <willy@...radead.org>,
Jens Axboe <axboe@...nel.dk>, Jan Kara <jack@...e.cz>,
Jeff Layton <jlayton@...nel.org>,
Logan Gunthorpe <logang@...tatee.com>,
linux-fsdevel@...r.kernel.org, linux-block@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH v6 34/34] net: [RFC][WIP] Make __zerocopy_sg_from_iter()
correctly pin or leave pages unref'd
Make __zerocopy_sg_from_iter() call iov_iter_extract_pages() to get pages
that have been ref'd, pinned or left alone as appropriate. As this is only
used for source buffers, pinning isn't an option, but being unref'd is.
The way __zerocopy_sg_from_iter() merges fragments is also altered, such
that fragments must also match their cleanup modes to be merged.
An extra helper and wrapper, folio_put_unpin_sub() and page_put_unpin_sub()
are added to allow multiple refs to be put/unpinned.
Signed-off-by: David Howells <dhowells@...hat.com>
cc: "David S. Miller" <davem@...emloft.net>
cc: Eric Dumazet <edumazet@...gle.com>
cc: Jakub Kicinski <kuba@...nel.org>
cc: Paolo Abeni <pabeni@...hat.com>
cc: netdev@...r.kernel.org
---
include/linux/mm.h | 2 ++
mm/gup.c | 25 +++++++++++++++++++++++++
net/core/datagram.c | 23 +++++++++++++----------
3 files changed, 40 insertions(+), 10 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f14edb192394..e3923b89c75e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1368,7 +1368,9 @@ static inline bool is_cow_mapping(vm_flags_t flags)
#endif
void folio_put_unpin(struct folio *folio, unsigned int flags);
+void folio_put_unpin_sub(struct folio *folio, unsigned int flags, unsigned int refs);
void page_put_unpin(struct page *page, unsigned int flags);
+void page_put_unpin_sub(struct page *page, unsigned int flags, unsigned int refs);
/*
* The identification function is mainly used by the buddy allocator for
diff --git a/mm/gup.c b/mm/gup.c
index 3ee4b4c7e0cb..49dd27ba6c13 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -213,6 +213,31 @@ void page_put_unpin(struct page *page, unsigned int flags)
}
EXPORT_SYMBOL_GPL(page_put_unpin);
+/**
+ * folio_put_unpin_sub - Unpin/put a folio as appropriate
+ * @folio: The folio to release
+ * @flags: gup flags indicating the mode of release (FOLL_*)
+ * @refs: Number of refs/pins to drop
+ *
+ * Release a folio according to the flags. If FOLL_GET is set, the folio has a
+ * ref dropped; if FOLL_PIN is set, it is unpinned; otherwise it is left
+ * unaltered.
+ */
+void folio_put_unpin_sub(struct folio *folio, unsigned int flags,
+ unsigned int refs)
+{
+ if (flags & (FOLL_GET | FOLL_PIN))
+ gup_put_folio(folio, refs, flags);
+}
+EXPORT_SYMBOL_GPL(folio_put_unpin_sub);
+
+void page_put_unpin_sub(struct page *page, unsigned int flags,
+ unsigned int refs)
+{
+ folio_put_unpin_sub(page_folio(page), flags, refs);
+}
+EXPORT_SYMBOL_GPL(page_put_unpin_sub);
+
/**
* try_grab_page() - elevate a page's refcount by a flag-dependent amount
* @page: pointer to page to be grabbed
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 122bfb144d32..63ea1f8817e0 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -614,6 +614,7 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb, struct iov_iter *from,
size_t length)
{
+ unsigned int cleanup_mode = iov_iter_extract_mode(from, FOLL_SOURCE_BUF);
int frag;
if (msg && msg->msg_ubuf && msg->sg_from_iter)
@@ -622,7 +623,7 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
frag = skb_shinfo(skb)->nr_frags;
while (length && iov_iter_count(from)) {
- struct page *pages[MAX_SKB_FRAGS];
+ struct page *pages[MAX_SKB_FRAGS], **ppages = pages;
struct page *last_head = NULL;
size_t start;
ssize_t copied;
@@ -632,9 +633,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
if (frag == MAX_SKB_FRAGS)
return -EMSGSIZE;
- copied = iov_iter_get_pages(from, pages, length,
- MAX_SKB_FRAGS - frag, &start,
- FOLL_SOURCE_BUF);
+ copied = iov_iter_extract_pages(from, &ppages, length,
+ MAX_SKB_FRAGS - frag,
+ FOLL_SOURCE_BUF, &start);
if (copied < 0)
return -EFAULT;
@@ -662,12 +663,14 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
skb_frag_t *last = &skb_shinfo(skb)->frags[frag - 1];
if (head == skb_frag_page(last) &&
+ cleanup_mode == skb_frag_cleanup(last) &&
start == skb_frag_off(last) + skb_frag_size(last)) {
skb_frag_size_add(last, size);
/* We combined this page, we need to release
- * a reference. Since compound pages refcount
- * is shared among many pages, batch the refcount
- * adjustments to limit false sharing.
+ * a reference or a pin. Since compound pages
+ * refcount is shared among many pages, batch
+ * the refcount adjustments to limit false
+ * sharing.
*/
last_head = head;
refs++;
@@ -675,14 +678,14 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
}
}
if (refs) {
- page_ref_sub(last_head, refs);
+ page_put_unpin_sub(last_head, cleanup_mode, refs);
refs = 0;
}
skb_fill_page_desc_noacc(skb, frag++, head, start, size,
- FOLL_GET);
+ cleanup_mode);
}
if (refs)
- page_ref_sub(last_head, refs);
+ page_put_unpin_sub(last_head, cleanup_mode, refs);
}
return 0;
}
Powered by blists - more mailing lists