[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <167305165398.1521586.12353215176136705725.stgit@warthog.procyon.org.uk>
Date: Sat, 07 Jan 2023 00:34:14 +0000
From: David Howells <dhowells@...hat.com>
To: Al Viro <viro@...iv.linux.org.uk>
Cc: Jeff Layton <jlayton@...nel.org>, Steve French <sfrench@...ba.org>,
Shyam Prasad N <nspmangalore@...il.com>,
Rohith Surabattula <rohiths.msft@...il.com>,
linux-cachefs@...hat.com, linux-cifs@...r.kernel.org,
linux-fsdevel@...r.kernel.org, dhowells@...hat.com,
Christoph Hellwig <hch@...radead.org>,
Matthew Wilcox <willy@...radead.org>,
Jens Axboe <axboe@...nel.dk>, Jeff Layton <jlayton@...nel.org>,
Logan Gunthorpe <logang@...tatee.com>,
linux-fsdevel@...r.kernel.org, linux-block@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH v4 6/7] netfs: Add a function to extract an iterator into a
scatterlist
Provide a function for filling in a scatterlist from the list of pages
contained in an iterator.
If the iterator is UBUF- or IOBUF-type, the pages have a ref (ITER_SOURCE,
ie. WRITE) or a pin (ITER_DEST, ie. READ) taken on them.
If the iterator is BVEC-, KVEC- or XARRAY-type, no ref is taken on the
pages and it is left to the caller to manage their lifetime. It cannot be
assumed that a ref can be validly taken, particularly in the case of a KVEC
iterator.
Changes:
========
ver #3)
- Switch to using EXPORT_SYMBOL_GPL to prevent indirect 3rd-party access
to get/pin_user_pages_fast()[1].
Signed-off-by: David Howells <dhowells@...hat.com>
cc: Jeff Layton <jlayton@...nel.org>
cc: Steve French <sfrench@...ba.org>
cc: Shyam Prasad N <nspmangalore@...il.com>
cc: Rohith Surabattula <rohiths.msft@...il.com>
cc: linux-cachefs@...hat.com
cc: linux-cifs@...r.kernel.org
cc: linux-fsdevel@...r.kernel.org
Link: https://lore.kernel.org/r/Y3zFzdWnWlEJ8X8/@infradead.org/ [1]
Link: https://lore.kernel.org/r/166697255985.61150.16489950598033809487.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/166732027275.3186319.5186488812166611598.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/166869691313.3723671.10714823767342163891.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/166920905749.1461876.12079195122363691498.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/166997423514.9475.11145024341505464337.stgit@warthog.procyon.org.uk/ # v3
---
fs/netfs/iterator.c | 268 +++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/netfs.h | 4 +
mm/vmalloc.c | 1
3 files changed, 273 insertions(+)
diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c
index 7d802d21b9c5..82c46e3caf1d 100644
--- a/fs/netfs/iterator.c
+++ b/fs/netfs/iterator.c
@@ -7,7 +7,9 @@
#include <linux/export.h>
#include <linux/slab.h>
+#include <linux/mm.h>
#include <linux/uio.h>
+#include <linux/scatterlist.h>
#include <linux/netfs.h>
#include "internal.h"
@@ -97,3 +99,269 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
return npages;
}
EXPORT_SYMBOL_GPL(netfs_extract_user_iter);
+
+/*
+ * Extract as list of up to sg_max pages from UBUF- or IOVEC-class iterators,
+ * pin or get refs on them appropriate and add them to the scatterlist.
+ */
+static ssize_t netfs_extract_user_to_sg(struct iov_iter *iter,
+ ssize_t maxsize,
+ struct sg_table *sgtable,
+ unsigned int sg_max,
+ unsigned int *cleanup_mode)
+{
+ struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+ struct page **pages;
+ unsigned int npages;
+ ssize_t ret = 0, res;
+ size_t len, off;
+
+ *cleanup_mode = 0;
+
+ /* We decant the page list into the tail of the scatterlist */
+ pages = (void *)sgtable->sgl + array_size(sg_max, sizeof(struct scatterlist));
+ pages -= sg_max;
+
+ do {
+ res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max, 0,
+ &off, cleanup_mode);
+ if (res < 0)
+ goto failed;
+
+ len = res;
+ maxsize -= len;
+ ret += len;
+ npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
+ sg_max -= npages;
+
+ for (; npages < 0; npages--) {
+ struct page *page = *pages;
+ size_t seg = min_t(size_t, PAGE_SIZE - off, len);
+
+ *pages++ = NULL;
+ sg_set_page(sg, page, len, off);
+ sgtable->nents++;
+ sg++;
+ len -= seg;
+ off = 0;
+ }
+ } while (maxsize > 0 && sg_max > 0);
+
+ return ret;
+
+failed:
+ while (sgtable->nents > sgtable->orig_nents)
+ put_page(sg_page(&sgtable->sgl[--sgtable->nents]));
+ return res;
+}
+
+/*
+ * Extract up to sg_max pages from a BVEC-type iterator and add them to the
+ * scatterlist. The pages are not pinned.
+ */
+static ssize_t netfs_extract_bvec_to_sg(struct iov_iter *iter,
+ ssize_t maxsize,
+ struct sg_table *sgtable,
+ unsigned int sg_max,
+ unsigned int *cleanup_mode)
+{
+ const struct bio_vec *bv = iter->bvec;
+ struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+ unsigned long start = iter->iov_offset;
+ unsigned int i;
+ ssize_t ret = 0;
+
+ for (i = 0; i < iter->nr_segs; i++) {
+ size_t off, len;
+
+ len = bv[i].bv_len;
+ if (start >= len) {
+ start -= len;
+ continue;
+ }
+
+ len = min_t(size_t, maxsize, len - start);
+ off = bv[i].bv_offset + start;
+
+ sg_set_page(sg, bv[i].bv_page, len, off);
+ sgtable->nents++;
+ sg++;
+ sg_max--;
+
+ ret += len;
+ maxsize -= len;
+ if (maxsize <= 0 || sg_max == 0)
+ break;
+ start = 0;
+ }
+
+ if (ret > 0)
+ iov_iter_advance(iter, ret);
+ *cleanup_mode = 0;
+ return ret;
+}
+
+/*
+ * Extract up to sg_max pages from a KVEC-type iterator and add them to the
+ * scatterlist. This can deal with vmalloc'd buffers as well as kmalloc'd or
+ * static buffers. The pages are not pinned.
+ */
+static ssize_t netfs_extract_kvec_to_sg(struct iov_iter *iter,
+ ssize_t maxsize,
+ struct sg_table *sgtable,
+ unsigned int sg_max,
+ unsigned int *cleanup_mode)
+{
+ const struct kvec *kv = iter->kvec;
+ struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+ unsigned long start = iter->iov_offset;
+ unsigned int i;
+ ssize_t ret = 0;
+
+ for (i = 0; i < iter->nr_segs; i++) {
+ struct page *page;
+ unsigned long kaddr;
+ size_t off, len, seg;
+
+ len = kv[i].iov_len;
+ if (start >= len) {
+ start -= len;
+ continue;
+ }
+
+ kaddr = (unsigned long)kv[i].iov_base + start;
+ off = kaddr & ~PAGE_MASK;
+ len = min_t(size_t, maxsize, len - start);
+ kaddr &= PAGE_MASK;
+
+ maxsize -= len;
+ ret += len;
+ do {
+ seg = min_t(size_t, len, PAGE_SIZE - off);
+ if (is_vmalloc_or_module_addr((void *)kaddr))
+ page = vmalloc_to_page((void *)kaddr);
+ else
+ page = virt_to_page(kaddr);
+
+ sg_set_page(sg, page, len, off);
+ sgtable->nents++;
+ sg++;
+ sg_max--;
+
+ len -= seg;
+ kaddr += PAGE_SIZE;
+ off = 0;
+ } while (len > 0 && sg_max > 0);
+
+ if (maxsize <= 0 || sg_max == 0)
+ break;
+ start = 0;
+ }
+
+ if (ret > 0)
+ iov_iter_advance(iter, ret);
+ *cleanup_mode = 0;
+ return ret;
+}
+
+/*
+ * Extract up to sg_max folios from an XARRAY-type iterator and add them to
+ * the scatterlist. The pages are not pinned.
+ */
+static ssize_t netfs_extract_xarray_to_sg(struct iov_iter *iter,
+ ssize_t maxsize,
+ struct sg_table *sgtable,
+ unsigned int sg_max,
+ unsigned int *cleanup_mode)
+{
+ struct scatterlist *sg = sgtable->sgl + sgtable->nents;
+ struct xarray *xa = iter->xarray;
+ struct folio *folio;
+ loff_t start = iter->xarray_start + iter->iov_offset;
+ pgoff_t index = start / PAGE_SIZE;
+ ssize_t ret = 0;
+ size_t offset, len;
+ XA_STATE(xas, xa, index);
+
+ rcu_read_lock();
+
+ xas_for_each(&xas, folio, ULONG_MAX) {
+ if (xas_retry(&xas, folio))
+ continue;
+ if (WARN_ON(xa_is_value(folio)))
+ break;
+ if (WARN_ON(folio_test_hugetlb(folio)))
+ break;
+
+ offset = offset_in_folio(folio, start);
+ len = min_t(size_t, maxsize, folio_size(folio) - offset);
+
+ sg_set_page(sg, folio_page(folio, 0), len, offset);
+ sgtable->nents++;
+ sg++;
+ sg_max--;
+
+ maxsize -= len;
+ ret += len;
+ if (maxsize <= 0 || sg_max == 0)
+ break;
+ }
+
+ rcu_read_unlock();
+ if (ret > 0)
+ iov_iter_advance(iter, ret);
+ *cleanup_mode = 0;
+ return ret;
+}
+
+/**
+ * netfs_extract_iter_to_sg - Extract pages from an iterator and add ot an sglist
+ * @iter: The iterator to extract from
+ * @maxsize: The amount of iterator to copy
+ * @sgtable: The scatterlist table to fill in
+ * @sg_max: Maximum number of elements in @sgtable that may be filled
+ * @cleanup_mode: Where to return the cleanup mode
+ *
+ * Extract the page fragments from the given amount of the source iterator and
+ * add them to a scatterlist that refers to all of those bits, to a maximum
+ * addition of @sg_max elements.
+ *
+ * The pages referred to by UBUF- and IOVEC-type iterators are extracted and
+ * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE-
+ * and DISCARD-type are not supported.
+ *
+ * No end mark is placed on the scatterlist; that's left to the caller.
+ *
+ * If successul, @sgtable->nents is updated to include the number of elements
+ * added and the number of bytes added is returned. @sgtable->orig_nents is
+ * left unaltered.
+ */
+ssize_t netfs_extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
+ struct sg_table *sgtable, unsigned int sg_max,
+ unsigned int *cleanup_mode)
+{
+ if (maxsize == 0)
+ return 0;
+
+ switch (iov_iter_type(iter)) {
+ case ITER_UBUF:
+ case ITER_IOVEC:
+ return netfs_extract_user_to_sg(iter, maxsize, sgtable, sg_max,
+ cleanup_mode);
+ case ITER_BVEC:
+ return netfs_extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
+ cleanup_mode);
+ case ITER_KVEC:
+ return netfs_extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
+ cleanup_mode);
+ case ITER_XARRAY:
+ return netfs_extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
+ cleanup_mode);
+ default:
+ pr_err("netfs_extract_iter_to_sg(%u) unsupported\n",
+ iov_iter_type(iter));
+ WARN_ON_ONCE(1);
+ return -EIO;
+ }
+}
+EXPORT_SYMBOL_GPL(netfs_extract_iter_to_sg);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 26fe3e6bafa1..059e49233e29 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -299,6 +299,10 @@ void netfs_stats_show(struct seq_file *);
ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
struct iov_iter *new,
unsigned int *cleanup_mode);
+struct sg_table;
+ssize_t netfs_extract_iter_to_sg(struct iov_iter *iter, size_t len,
+ struct sg_table *sgtable, unsigned int sg_max,
+ unsigned int *cleanup_mode);
/**
* netfs_inode - Get the netfs inode context from the inode
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ca71de7c9d77..61f5bec0f2b6 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -656,6 +656,7 @@ int is_vmalloc_or_module_addr(const void *x)
#endif
return is_vmalloc_addr(x);
}
+EXPORT_SYMBOL_GPL(is_vmalloc_or_module_addr);
/*
* Walk a vmap address to the struct page it maps. Huge vmap mappings will
Powered by blists - more mailing lists