lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <153685394431.14766.3178466345696987059.stgit@warthog.procyon.org.uk>
Date:   Thu, 13 Sep 2018 16:52:24 +0100
From:   David Howells <dhowells@...hat.com>
To:     viro@...iv.linux.org.uk
Cc:     dhowells@...hat.com, linux-fsdevel@...r.kernel.org,
        linux-afs@...ts.infradead.org, linux-kernel@...r.kernel.org
Subject: [PATCH 06/10] afs: Set up the iov_iter before calling
 afs_extract_data()

afs_extract_data sets up a temporary iov_iter and passes it to AF_RXRPC
each time it is called to describe the remaining buffer to be filled.

Instead:

 (1) Put an iterator in the afs_call struct.

 (2) Set the iterator for each marshalling stage to load data into the
     appropriate places.  A number of convenience functions are provided to
     this end (eg. afs_extract_to_buf()).

     This iterator is then passed to afs_extract_data().

 (3) Use the new ITER_MAPPING iterator when reading data to load directly
     into the inode's pages without needing to create a list of them.  This
     comes with a page-done callback that can be used to unlock pages as
     they are filled.

 (4) Use the new ITER_DISCARD iterator to discard any excess data provided
     by FetchData.

This will allow O_DIRECT calls to be supported in future patches.

Signed-off-by: David Howells <dhowells@...hat.com>
---

 fs/afs/cmservice.c         |   40 +++----
 fs/afs/dir.c               |  191 +++++++++++++++++++++++----------
 fs/afs/file.c              |  199 +++++++++++++++++++++++------------
 fs/afs/fsclient.c          |  252 ++++++++++++--------------------------------
 fs/afs/internal.h          |   52 ++++++++-
 fs/afs/rxrpc.c             |   41 ++-----
 fs/afs/vlclient.c          |  104 ++++++++----------
 fs/afs/write.c             |    8 +
 include/linux/fscache.h    |   31 +++++
 include/trace/events/afs.h |   22 ++--
 10 files changed, 499 insertions(+), 441 deletions(-)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 58f79301a716..4db62ae8dc1a 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -176,13 +176,13 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
 		/* extract the FID array and its count in two steps */
 	case 1:
 		_debug("extract FID count");
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -196,13 +196,12 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 				       GFP_KERNEL);
 		if (!call->buffer)
 			return -ENOMEM;
-		call->offset = 0;
+		afs_extract_to_buf(call, call->count * 3 * 4);
 		call->unmarshall++;
 
 	case 2:
 		_debug("extract FID array");
-		ret = afs_extract_data(call, call->buffer,
-				       call->count * 3 * 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -222,13 +221,13 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 			cb->cb.type	= AFSCM_CB_UNTYPED;
 		}
 
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
 		/* extract the callback array and its count in two steps */
 	case 3:
 		_debug("extract CB count");
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -237,13 +236,12 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 		if (call->count2 != call->count && call->count2 != 0)
 			return afs_protocol_error(call, -EBADMSG,
 						  afs_eproto_cb_count);
-		call->offset = 0;
+		afs_extract_to_buf(call, call->count2 * 3 * 4);
 		call->unmarshall++;
 
 	case 4:
 		_debug("extract CB array");
-		ret = afs_extract_data(call, call->buffer,
-				       call->count2 * 3 * 4, false);
+		ret = afs_extract_data(call, false);
 		if (ret < 0)
 			return ret;
 
@@ -256,7 +254,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 			cb->cb.type	= ntohl(*bp++);
 		}
 
-		call->offset = 0;
 		call->unmarshall++;
 	case 5:
 		break;
@@ -303,7 +300,8 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 
 	rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
 
-	ret = afs_extract_data(call, NULL, 0, false);
+	afs_extract_discard(call, 0);
+	ret = afs_extract_data(call, false);
 	if (ret < 0)
 		return ret;
 
@@ -332,16 +330,15 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
 		call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
 		if (!call->buffer)
 			return -ENOMEM;
+		afs_extract_to_buf(call, 11 * sizeof(__be32));
 		call->unmarshall++;
 
 	case 1:
 		_debug("extract UUID");
-		ret = afs_extract_data(call, call->buffer,
-				       11 * sizeof(__be32), false);
+		ret = afs_extract_data(call, false);
 		switch (ret) {
 		case 0:		break;
 		case -EAGAIN:	return 0;
@@ -364,7 +361,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 		for (loop = 0; loop < 6; loop++)
 			r->node[loop] = ntohl(b[loop + 5]);
 
-		call->offset = 0;
 		call->unmarshall++;
 
 	case 2:
@@ -407,7 +403,8 @@ static int afs_deliver_cb_probe(struct afs_call *call)
 
 	_enter("");
 
-	ret = afs_extract_data(call, NULL, 0, false);
+	afs_extract_discard(call, 0);
+	ret = afs_extract_data(call, false);
 	if (ret < 0)
 		return ret;
 
@@ -455,16 +452,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
 		call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
 		if (!call->buffer)
 			return -ENOMEM;
+		afs_extract_to_buf(call, 11 * sizeof(__be32));
 		call->unmarshall++;
 
 	case 1:
 		_debug("extract UUID");
-		ret = afs_extract_data(call, call->buffer,
-				       11 * sizeof(__be32), false);
+		ret = afs_extract_data(call, false);
 		switch (ret) {
 		case 0:		break;
 		case -EAGAIN:	return 0;
@@ -487,7 +483,6 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 		for (loop = 0; loop < 6; loop++)
 			r->node[loop] = ntohl(b[loop + 5]);
 
-		call->offset = 0;
 		call->unmarshall++;
 
 	case 2:
@@ -572,7 +567,8 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
 
 	_enter("");
 
-	ret = afs_extract_data(call, NULL, 0, false);
+	afs_extract_discard(call, 0);
+	ret = afs_extract_data(call, false);
 	if (ret < 0)
 		return ret;
 
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 855bf2b79fed..c36b54b7450b 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -105,6 +105,40 @@ struct afs_lookup_cookie {
 	struct afs_fid		fids[50];
 };
 
+/*
+ * Drop the refs that we're holding on the pages we were reading into.  We've
+ * got refs on the first nr_pages pages.
+ */
+static void afs_dir_read_cleanup(struct afs_read *req)
+{
+	struct radix_tree_iter iter;
+	struct address_space *mapping = req->iter.mapping;
+	struct page *page;
+	pgoff_t index = req->pos >> PAGE_SHIFT;
+	void __rcu **slot;
+
+	if (unlikely(!req->nr_pages))
+		return;
+
+	rcu_read_lock();
+	radix_tree_for_each_contig(slot, &mapping->i_pages, &iter, index) {
+		page = radix_tree_deref_slot(slot);
+		if (unlikely(!page))
+			continue;
+
+		BUG_ON(radix_tree_exception(page));
+		BUG_ON(PageCompound(page));
+		BUG_ON(page->mapping != req->iter.mapping);
+
+		put_page(page);
+		req->nr_pages--;
+		if (req->nr_pages == 0)
+			break;
+	}
+
+	rcu_read_unlock();
+}
+
 /*
  * check that a directory page is valid
  */
@@ -130,7 +164,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
 	qty /= sizeof(union afs_xdr_dir_block);
 
 	/* check them */
-	dbuf = kmap(page);
+	dbuf = kmap_atomic(page);
 	for (tmp = 0; tmp < qty; tmp++) {
 		if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) {
 			printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
@@ -148,7 +182,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
 		((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0;
 	}
 
-	kunmap(page);
+	kunmap_atomic(dbuf);
 
 checked:
 	afs_stat_v(dvnode, n_read_dir);
@@ -158,6 +192,45 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
 	return false;
 }
 
+/*
+ * Check all the pages in a directory.  All the pages are held pinned.
+ */
+static int afs_dir_check(struct afs_vnode *dvnode, unsigned int nr_pages,
+			 loff_t i_size)
+{
+	struct radix_tree_iter iter;
+	struct address_space *mapping = dvnode->vfs_inode.i_mapping;
+	struct page *page;
+	void __rcu **slot;
+	int ret = 0;
+
+	if (unlikely(!nr_pages))
+		return 0;
+
+	rcu_read_lock();
+	radix_tree_for_each_contig(slot, &mapping->i_pages, &iter, 0) {
+		page = radix_tree_deref_slot(slot);
+		if (unlikely(!page)) {
+			pr_warn("kAFS: Missing page in dircheck\n");
+			ret = -EIO;
+			break;
+		}
+		if (page->index >= nr_pages)
+			break;
+
+		BUG_ON(radix_tree_exception(page));
+		BUG_ON(PageCompound(page));
+		BUG_ON(page->mapping != mapping);
+
+		ret = afs_dir_check_page(dvnode, page, i_size);
+		if (ret < 0)
+			break;
+	}
+
+	rcu_read_unlock();
+	return ret;
+}
+
 /*
  * open an AFS directory file
  */
@@ -184,56 +257,49 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
 {
 	struct afs_read *req;
 	loff_t i_size;
-	int nr_pages, nr_inline, i, n;
-	int ret = -ENOMEM;
+	int nr_pages, i, n;
+	int ret;
+
+	_enter("");
+
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (!req)
+		return ERR_PTR(-ENOMEM);
+
+	refcount_set(&req->usage, 1);
+	req->cleanup = afs_dir_read_cleanup;
 
-retry:
+expand:
 	i_size = i_size_read(&dvnode->vfs_inode);
+	ret = -EIO;
 	if (i_size < 2048)
-		return ERR_PTR(-EIO);
+		goto error;
+	ret = -EFBIG;
 	if (i_size > 2048 * 1024)
-		return ERR_PTR(-EFBIG);
-
-	_enter("%llu", i_size);
+		goto error;
 
-	/* Get a request record to hold the page list.  We want to hold it
-	 * inline if we can, but we don't want to make an order 1 allocation.
-	 */
 	nr_pages = (i_size + PAGE_SIZE - 1) / PAGE_SIZE;
-	nr_inline = nr_pages;
-	if (nr_inline > (PAGE_SIZE - sizeof(*req)) / sizeof(struct page *))
-		nr_inline = 0;
 
-	req = kzalloc(sizeof(*req) + sizeof(struct page *) * nr_inline,
-		      GFP_KERNEL);
-	if (!req)
-		return ERR_PTR(-ENOMEM);
-
-	refcount_set(&req->usage, 1);
-	req->nr_pages = nr_pages;
 	req->actual_len = i_size; /* May change */
 	req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
 	req->data_version = dvnode->status.data_version; /* May change */
-	if (nr_inline > 0) {
-		req->pages = req->array;
-	} else {
-		req->pages = kcalloc(nr_pages, sizeof(struct page *),
-				     GFP_KERNEL);
-		if (!req->pages)
-			goto error;
-	}
+	iov_iter_mapping(&req->iter, READ, dvnode->vfs_inode.i_mapping,
+			 0, i_size);
 
-	/* Get a list of all the pages that hold or will hold the directory
-	 * content.  We need to fill in any gaps that we might find where the
-	 * memory reclaimer has been at work.  If there are any gaps, we will
+	/* Fill in any gaps that we might find where the memory reclaimer has
+	 * been at work and pin all the pages.  If there are any gaps, we will
 	 * need to reread the entire directory contents.
 	 */
-	i = 0;
-	do {
+	i = req->nr_pages;
+	while (i < nr_pages) {
+		struct page *pages[8], *page;
+
 		n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i,
-					  req->nr_pages - i,
-					  req->pages + i);
-		_debug("find %u at %u/%u", n, i, req->nr_pages);
+					  min_t(unsigned int, nr_pages - i,
+						ARRAY_SIZE(pages)),
+					  pages);
+		_debug("find %u at %u/%u", n, i, nr_pages);
+
 		if (n == 0) {
 			gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask;
 
@@ -241,23 +307,25 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
 				afs_stat_v(dvnode, n_inval);
 
 			ret = -ENOMEM;
-			req->pages[i] = __page_cache_alloc(gfp);
-			if (!req->pages[i])
+			page = __page_cache_alloc(gfp);
+			if (!page)
 				goto error;
-			ret = add_to_page_cache_lru(req->pages[i],
+			ret = add_to_page_cache_lru(page,
 						    dvnode->vfs_inode.i_mapping,
 						    i, gfp);
 			if (ret < 0)
 				goto error;
 
-			set_page_private(req->pages[i], 1);
-			SetPagePrivate(req->pages[i]);
-			unlock_page(req->pages[i]);
+			set_page_private(page, 1);
+			SetPagePrivate(page);
+			unlock_page(page);
+			req->nr_pages++;
 			i++;
 		} else {
+			req->nr_pages += n;
 			i += n;
 		}
-	} while (i < req->nr_pages);
+	}
 
 	/* If we're going to reload, we need to lock all the pages to prevent
 	 * races.
@@ -280,15 +348,18 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
 
 		task_io_account_read(PAGE_SIZE * req->nr_pages);
 
-		if (req->len < req->file_size)
-			goto content_has_grown;
+		if (req->len < req->file_size) {
+			/* The content has grown, so we need to expand the
+			 * buffer.
+			 */
+			up_write(&dvnode->validate_lock);
+			goto expand;
+		}
 
 		/* Validate the data we just read. */
-		ret = -EIO;
-		for (i = 0; i < req->nr_pages; i++)
-			if (!afs_dir_check_page(dvnode, req->pages[i],
-						req->actual_len))
-				goto error_unlock;
+		ret = afs_dir_check(dvnode, req->nr_pages, req->actual_len);
+		if (ret < 0)
+			goto error_unlock;
 
 		// TODO: Trim excess pages
 
@@ -305,11 +376,6 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
 	afs_put_read(req);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
-
-content_has_grown:
-	up_write(&dvnode->validate_lock);
-	afs_put_read(req);
-	goto retry;
 }
 
 /*
@@ -415,6 +481,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
 	struct afs_read *req;
 	struct page *page;
 	unsigned blkoff, limit;
+	void __rcu **slot;
 	int ret;
 
 	_enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
@@ -438,9 +505,15 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
 		blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1);
 
 		/* Fetch the appropriate page from the directory and re-add it
-		 * to the LRU.
+		 * to the LRU.  We have all the pages pinned with an extra ref.
 		 */
-		page = req->pages[blkoff / PAGE_SIZE];
+		rcu_read_lock();
+		page = NULL;
+		slot = radix_tree_lookup_slot(&dvnode->vfs_inode.i_mapping->i_pages,
+					      blkoff / PAGE_SIZE);
+		if (slot)
+			page = radix_tree_deref_slot(slot);
+		rcu_read_unlock();
 		if (!page) {
 			ret = -EIO;
 			break;
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 7d4f26198573..e887a9b24f4f 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -148,7 +148,7 @@ int afs_open(struct inode *inode, struct file *file)
 
 	if (file->f_flags & O_TRUNC)
 		set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
-	
+
 	file->private_data = af;
 	_leave(" = 0");
 	return 0;
@@ -185,24 +185,79 @@ int afs_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+/*
+ * Make pages available as they're filled.  This function may not sleep.
+ */
+static void afs_readpages_page_done(const struct iov_iter *iter,
+				    const struct bio_vec *bv)
+{
+	struct page *page = bv->bv_page;
+	struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+	struct afs_read *req = container_of(iter, struct afs_read, iter);
+
+	SetPageUptodate(page);
+
+	if (0 && afs_vnode_cache(vnode))
+		SetPageFsCache(page);
+	unlock_page(page);
+	put_page(page);
+	req->done_pages++;
+}
+
+/*
+ * Unlock the pages we were reading into.  We've got locks and refs on the
+ * first nr_pages pages.
+ */
+static void afs_file_read_cleanup(struct afs_read *req)
+{
+	struct radix_tree_iter iter;
+	struct address_space *mapping = req->iter.mapping;
+	struct page *page;
+	pgoff_t index = req->pos >> PAGE_SHIFT;
+	void **slot;
+
+	_enter("%lu,%u,%u,%zu",
+	       index, req->done_pages, req->nr_pages, iov_iter_count(&req->iter));
+
+	if (likely(req->done_pages >= req->nr_pages))
+		return;
+
+	rcu_read_lock();
+	radix_tree_for_each_contig(slot, &mapping->i_pages, &iter, index) {
+		page = radix_tree_deref_slot(slot);
+		if (unlikely(!page))
+			continue;
+
+		BUG_ON(radix_tree_exception(page));
+		BUG_ON(PageCompound(page));
+		BUG_ON(page != *slot);
+		BUG_ON(page->mapping != req->iter.mapping);
+
+		if (req->error)
+			SetPageError(page);
+		unlock_page(page);
+		put_page(page);
+		req->done_pages++;
+		if (req->done_pages >= req->nr_pages)
+			break;
+	}
+
+	rcu_read_unlock();
+}
+
 /*
  * Dispose of a ref to a read record.
  */
 void afs_put_read(struct afs_read *req)
 {
-	int i;
-
 	if (refcount_dec_and_test(&req->usage)) {
-		for (i = 0; i < req->nr_pages; i++)
-			if (req->pages[i])
-				put_page(req->pages[i]);
-		if (req->pages != req->array)
-			kfree(req->pages);
+		if (req->cleanup)
+			req->cleanup(req);
 		kfree(req);
 	}
 }
 
-#ifdef CONFIG_AFS_FSCACHE
+#if 0 //def CONFIG_AFS_FSCACHE
 /*
  * deal with notification that a page was read from the cache
  */
@@ -257,6 +312,22 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de
 	return ret;
 }
 
+/*
+ * Clear the trailer after a short read.
+ */
+static void afs_clear_after_read(struct afs_vnode *vnode, struct afs_read *req,
+				 bool catch_page_done)
+{
+	if (req->actual_len >= req->len)
+		return;
+	iov_iter_mapping(&req->iter, READ, vnode->vfs_inode.i_mapping,
+			 req->pos + req->actual_len,
+			 req->len - req->actual_len);
+	if (catch_page_done)
+		req->iter.page_done = afs_readpages_page_done;
+	iov_iter_zero(req->len - req->actual_len, &req->iter);
+}
+
 /*
  * read page from file, directory or symlink, given a key to use
  */
@@ -277,7 +348,7 @@ int afs_page_filler(void *data, struct page *page)
 		goto error;
 
 	/* is it cached? */
-#ifdef CONFIG_AFS_FSCACHE
+#if 0 //def CONFIG_AFS_FSCACHE
 	ret = fscache_read_or_alloc_page(vnode->cache,
 					 page,
 					 afs_file_readpage_read_complete,
@@ -301,8 +372,7 @@ int afs_page_filler(void *data, struct page *page)
 		_debug("cache said ENOBUFS");
 	default:
 	go_on:
-		req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *),
-			      GFP_KERNEL);
+		req = kzalloc(sizeof(struct afs_read), GFP_KERNEL);
 		if (!req)
 			goto enomem;
 
@@ -314,10 +384,11 @@ int afs_page_filler(void *data, struct page *page)
 		req->pos = (loff_t)page->index << PAGE_SHIFT;
 		req->len = PAGE_SIZE;
 		req->nr_pages = 1;
-		req->pages = req->array;
-		req->pages[0] = page;
 		get_page(page);
 
+		iov_iter_mapping(&req->iter, READ, page->mapping,
+				 (loff_t)page->index << PAGE_SHIFT, PAGE_SIZE);
+
 		/* read the contents of the file from the server into the
 		 * page */
 		ret = afs_fetch_data(vnode, key, req);
@@ -331,11 +402,6 @@ int afs_page_filler(void *data, struct page *page)
 				ret = -ESTALE;
 			}
 
-#ifdef CONFIG_AFS_FSCACHE
-			fscache_uncache_page(vnode->cache, page);
-#endif
-			BUG_ON(PageFsCache(page));
-
 			if (ret == -EINTR ||
 			    ret == -ENOMEM ||
 			    ret == -ERESTARTSYS ||
@@ -344,10 +410,11 @@ int afs_page_filler(void *data, struct page *page)
 			goto io_error;
 		}
 
+		afs_clear_after_read(vnode, req, false);
 		SetPageUptodate(page);
 
 		/* send the page to the cache */
-#ifdef CONFIG_AFS_FSCACHE
+#if 0 //def CONFIG_AFS_FSCACHE
 		if (PageFsCache(page) &&
 		    fscache_write_page(vnode->cache, page, vnode->status.size,
 				       GFP_KERNEL) != 0) {
@@ -398,31 +465,39 @@ static int afs_readpage(struct file *file, struct page *page)
 	return ret;
 }
 
+#if 0
 /*
- * Make pages available as they're filled.
+ * Allow writing to a page to take place.  This function may not sleep.
  */
-static void afs_readpages_page_done(struct afs_call *call, struct afs_read *req)
+static void afs_clear_page_fscache_mark(const struct iov_iter *iter,
+					struct page *page)
 {
-#ifdef CONFIG_AFS_FSCACHE
-	struct afs_vnode *vnode = call->reply[0];
-#endif
-	struct page *page = req->pages[req->index];
+	ClearPageFsCache(page);
+}
 
-	req->pages[req->index] = NULL;
-	SetPageUptodate(page);
+static void afs_fscache_write_done(struct fscache_cookie *cookie,
+				   struct iov_iter *iter)
+{
+	struct afs_read *req = container_of(iter, struct afs_read, iter);
+
+	afs_put_read(req);
+}
+
+/*
+ * Write the read data to the cache.
+ */
+static void afs_readpages_write_to_cache(struct afs_read *req)
+{
+	struct afs_vnode *vnode = AFS_FS_I(req->iter.mapping->host);
 
-	/* send the page to the cache */
-#ifdef CONFIG_AFS_FSCACHE
-	if (PageFsCache(page) &&
-	    fscache_write_page(vnode->cache, page, vnode->status.size,
-			       GFP_KERNEL) != 0) {
-		fscache_uncache_page(vnode->cache, page);
-		BUG_ON(PageFsCache(page));
+	if (afs_vnode_cache(vnode)) {
+		req->iter.page_done = afs_clear_page_fscache_mark;
+		fscache_write(vnode->cache, &req->iter, req->pos,
+			      req->file_size, GFP_KERNEL,
+			      afs_fscache_write_done);
 	}
-#endif
-	unlock_page(page);
-	put_page(page);
 }
+#endif
 
 /*
  * Read a contiguous set of pages.
@@ -436,7 +511,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
 	struct page *first, *page;
 	struct key *key = afs_file_key(file);
 	pgoff_t index;
-	int ret, n, i;
+	int ret, n;
 
 	/* Count the number of contiguous pages at the front of the list.  Note
 	 * that the list goes prev-wards rather than next-wards.
@@ -452,20 +527,17 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
 		n++;
 	}
 
-	req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *) * n,
-		      GFP_NOFS);
+	req = kzalloc(sizeof(struct afs_read), GFP_NOFS);
 	if (!req)
 		return -ENOMEM;
 
 	refcount_set(&req->usage, 1);
-	req->page_done = afs_readpages_page_done;
+	req->cleanup = afs_file_read_cleanup;
 	req->pos = first->index;
 	req->pos <<= PAGE_SHIFT;
-	req->pages = req->array;
 
-	/* Transfer the pages to the request.  We add them in until one fails
-	 * to add to the LRU and then we stop (as that'll make a hole in the
-	 * contiguous run.
+	/* Add pages to the LRU until it fails.  We keep the pages ref'd and
+	 * locked until the read is complete.
 	 *
 	 * Note that it's possible for the file size to change whilst we're
 	 * doing this, but we rely on the server returning less than we asked
@@ -478,15 +550,11 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
 		index = page->index;
 		if (add_to_page_cache_lru(page, mapping, index,
 					  readahead_gfp_mask(mapping))) {
-#ifdef CONFIG_AFS_FSCACHE
-			fscache_uncache_page(vnode->cache, page);
-#endif
 			put_page(page);
 			break;
 		}
 
-		req->pages[req->nr_pages++] = page;
-		req->len += PAGE_SIZE;
+		req->nr_pages++;
 	} while (req->nr_pages < n);
 
 	if (req->nr_pages == 0) {
@@ -494,33 +562,26 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping,
 		return 0;
 	}
 
+	req->len = req->nr_pages * PAGE_SIZE;
+	iov_iter_mapping(&req->iter, READ, file->f_mapping, req->pos, req->len);
+	req->iter.page_done = afs_readpages_page_done;
+
 	ret = afs_fetch_data(vnode, key, req);
 	if (ret < 0)
 		goto error;
 
-	task_io_account_read(PAGE_SIZE * req->nr_pages);
-	afs_put_read(req);
+	afs_clear_after_read(vnode, req, true);
+	task_io_account_read(req->len);
 	return 0;
 
 error:
 	if (ret == -ENOENT) {
-		_debug("got NOENT from server"
-		       " - marking file deleted and stale");
+		_debug("got NOENT from server - marking file deleted and stale");
 		set_bit(AFS_VNODE_DELETED, &vnode->flags);
 		ret = -ESTALE;
 	}
 
-	for (i = 0; i < req->nr_pages; i++) {
-		page = req->pages[i];
-		if (page) {
-#ifdef CONFIG_AFS_FSCACHE
-			fscache_uncache_page(vnode->cache, page);
-#endif
-			SetPageError(page);
-			unlock_page(page);
-		}
-	}
-
+	req->error = true;
 	afs_put_read(req);
 	return ret;
 }
@@ -547,7 +608,7 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
 	}
 
 	/* attempt to read as many of the pages as possible */
-#ifdef CONFIG_AFS_FSCACHE
+#if 0 //def CONFIG_AFS_FSCACHE
 	ret = fscache_read_or_alloc_pages(vnode->cache,
 					  mapping,
 					  pages,
@@ -605,7 +666,7 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
 
 	/* we clean up only if the entire page is being invalidated */
 	if (offset == 0 && length == PAGE_SIZE) {
-#ifdef CONFIG_AFS_FSCACHE
+#if 0 //def CONFIG_AFS_FSCACHE
 		if (PageFsCache(page)) {
 			struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
 			fscache_wait_on_page_write(vnode->cache, page);
@@ -640,7 +701,7 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
 
 	/* deny if page is being written to the cache and the caller hasn't
 	 * elected to wait */
-#ifdef CONFIG_AFS_FSCACHE
+#if 0 //def CONFIG_AFS_FSCACHE
 	if (!fscache_maybe_release_page(vnode->cache, page, gfp_flags)) {
 		_leave(" = F [cache busy]");
 		return 0;
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index d9a5815945dc..f0cef8e7b1af 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -20,12 +20,6 @@
 
 static const struct afs_fid afs_zero_fid;
 
-/*
- * We need somewhere to discard into in case the server helpfully returns more
- * than we asked for in FS.FetchData{,64}.
- */
-static u8 afs_discard_buffer[64];
-
 static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi)
 {
 	call->cbi = afs_get_cb_interest(cbi);
@@ -468,115 +462,82 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 	struct afs_vnode *vnode = call->reply[0];
 	struct afs_read *req = call->reply[2];
 	const __be32 *bp;
-	unsigned int size;
-	void *buffer;
 	int ret;
 
-	_enter("{%u,%zu/%u;%llu/%llu}",
-	       call->unmarshall, call->offset, call->count,
-	       req->remain, req->actual_len);
+	_enter("{%u,%zu/%llu}",
+	       call->unmarshall, iov_iter_count(&call->iter), req->actual_len);
 
 	switch (call->unmarshall) {
 	case 0:
 		req->actual_len = 0;
-		call->offset = 0;
 		call->unmarshall++;
 		if (call->operation_ID != FSFETCHDATA64) {
 			call->unmarshall++;
 			goto no_msw;
 		}
+		afs_extract_to_tmp(call);
 
 		/* extract the upper part of the returned data length of an
-		 * FSFETCHDATA64 op (which should always be 0 using this
-		 * client) */
+		 * FSFETCHDATA64 op.
+		 */
 	case 1:
 		_debug("extract data length (MSW)");
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
 		req->actual_len = ntohl(call->tmp);
 		req->actual_len <<= 32;
-		call->offset = 0;
 		call->unmarshall++;
-
 	no_msw:
+		afs_extract_to_tmp(call);
+
 		/* extract the returned data length */
 	case 2:
 		_debug("extract data length");
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
 		req->actual_len |= ntohl(call->tmp);
 		_debug("DATA length: %llu", req->actual_len);
 
-		req->remain = req->actual_len;
-		call->offset = req->pos & (PAGE_SIZE - 1);
-		req->index = 0;
 		if (req->actual_len == 0)
 			goto no_more_data;
 		call->unmarshall++;
-
-	begin_page:
-		ASSERTCMP(req->index, <, req->nr_pages);
-		if (req->remain > PAGE_SIZE - call->offset)
-			size = PAGE_SIZE - call->offset;
-		else
-			size = req->remain;
-		call->count = call->offset + size;
-		ASSERTCMP(call->count, <=, PAGE_SIZE);
-		req->remain -= size;
+		call->_iter = &req->iter;
+		iov_iter_truncate(&req->iter, req->actual_len);
 
 		/* extract the returned data */
 	case 3:
-		_debug("extract data %llu/%llu %zu/%u",
-		       req->remain, req->actual_len, call->offset, call->count);
+		_debug("extract data %zu/%llu",
+		       iov_iter_count(&call->iter), req->actual_len);
 
-		buffer = kmap(req->pages[req->index]);
-		ret = afs_extract_data(call, buffer, call->count, true);
-		kunmap(req->pages[req->index]);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
-		if (call->offset == PAGE_SIZE) {
-			if (req->page_done)
-				req->page_done(call, req);
-			req->index++;
-			if (req->remain > 0) {
-				call->offset = 0;
-				if (req->index >= req->nr_pages) {
-					call->unmarshall = 4;
-					goto begin_discard;
-				}
-				goto begin_page;
-			}
-		}
-		goto no_more_data;
+
+		call->_iter = &call->iter;
+		if (req->actual_len <= req->len)
+			goto no_more_data;
 
 		/* Discard any excess data the server gave us */
-	begin_discard:
+		iov_iter_discard(&call->iter, READ, req->actual_len - req->len);
 	case 4:
-		size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain);
-		call->count = size;
-		_debug("extract discard %llu/%llu %zu/%u",
-		       req->remain, req->actual_len, call->offset, call->count);
-
-		call->offset = 0;
-		ret = afs_extract_data(call, afs_discard_buffer, call->count, true);
-		req->remain -= call->offset;
+		_debug("extract discard %zu/%llu",
+		       iov_iter_count(&call->iter), req->actual_len - req->len);
+
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
-		if (req->remain > 0)
-			goto begin_discard;
 
 	no_more_data:
-		call->offset = 0;
 		call->unmarshall = 5;
+		afs_extract_to_buf(call, (21 + 3 + 6) * 4);
 
 		/* extract the metadata */
 	case 5:
-		ret = afs_extract_data(call, call->buffer,
-				       (21 + 3 + 6) * 4, false);
+		ret = afs_extract_data(call, false);
 		if (ret < 0)
 			return ret;
 
@@ -589,22 +550,12 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		if (call->reply[1])
 			xdr_decode_AFSVolSync(&bp, call->reply[1]);
 
-		call->offset = 0;
 		call->unmarshall++;
 
 	case 6:
 		break;
 	}
 
-	for (; req->index < req->nr_pages; req->index++) {
-		if (call->count < PAGE_SIZE)
-			zero_user_segment(req->pages[req->index],
-					  call->count, PAGE_SIZE);
-		if (req->page_done)
-			req->page_done(call, req);
-		call->count = 0;
-	}
-
 	_leave(" = 0 [done]");
 	return 0;
 }
@@ -700,6 +651,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
 	call->reply[1] = NULL; /* volsync */
 	call->reply[2] = req;
 	call->expected_version = vnode->status.data_version;
+	req->call_debug_id = call->debug_id;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -1598,31 +1550,31 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 {
 	const __be32 *bp;
 	char *p;
+	u32 size;
 	int ret;
 
 	_enter("{%u}", call->unmarshall);
 
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
 		call->unmarshall++;
+		afs_extract_to_buf(call, 12 * 4);
 
 		/* extract the returned status record */
 	case 1:
 		_debug("extract status");
-		ret = afs_extract_data(call, call->buffer,
-				       12 * 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
 		bp = call->buffer;
 		xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]);
-		call->offset = 0;
 		call->unmarshall++;
+		afs_extract_to_tmp(call);
 
 		/* extract the volume name length */
 	case 2:
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -1631,46 +1583,26 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 		if (call->count >= AFSNAMEMAX)
 			return afs_protocol_error(call, -EBADMSG,
 						  afs_eproto_volname_len);
-		call->offset = 0;
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_begin(call, call->reply[2], size);
 		call->unmarshall++;
 
 		/* extract the volume name */
 	case 3:
 		_debug("extract volname");
-		if (call->count > 0) {
-			ret = afs_extract_data(call, call->reply[2],
-					       call->count, true);
-			if (ret < 0)
-				return ret;
-		}
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
 
 		p = call->reply[2];
 		p[call->count] = 0;
 		_debug("volname '%s'", p);
-
-		call->offset = 0;
-		call->unmarshall++;
-
-		/* extract the volume name padding */
-		if ((call->count & 3) == 0) {
-			call->unmarshall++;
-			goto no_volname_padding;
-		}
-		call->count = 4 - (call->count & 3);
-
-	case 4:
-		ret = afs_extract_data(call, call->buffer,
-				       call->count, true);
-		if (ret < 0)
-			return ret;
-
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
-	no_volname_padding:
 
 		/* extract the offline message length */
-	case 5:
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+	case 4:
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -1679,46 +1611,27 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 		if (call->count >= AFSNAMEMAX)
 			return afs_protocol_error(call, -EBADMSG,
 						  afs_eproto_offline_msg_len);
-		call->offset = 0;
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_begin(call, call->reply[2], size);
 		call->unmarshall++;
 
 		/* extract the offline message */
-	case 6:
+	case 5:
 		_debug("extract offline");
-		if (call->count > 0) {
-			ret = afs_extract_data(call, call->reply[2],
-					       call->count, true);
-			if (ret < 0)
-				return ret;
-		}
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
 
 		p = call->reply[2];
 		p[call->count] = 0;
 		_debug("offline '%s'", p);
 
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
-		/* extract the offline message padding */
-		if ((call->count & 3) == 0) {
-			call->unmarshall++;
-			goto no_offline_padding;
-		}
-		call->count = 4 - (call->count & 3);
-
-	case 7:
-		ret = afs_extract_data(call, call->buffer,
-				       call->count, true);
-		if (ret < 0)
-			return ret;
-
-		call->offset = 0;
-		call->unmarshall++;
-	no_offline_padding:
-
 		/* extract the message of the day length */
-	case 8:
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+	case 6:
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -1727,38 +1640,24 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 		if (call->count >= AFSNAMEMAX)
 			return afs_protocol_error(call, -EBADMSG,
 						  afs_eproto_motd_len);
-		call->offset = 0;
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_begin(call, call->reply[2], size);
 		call->unmarshall++;
 
 		/* extract the message of the day */
-	case 9:
+	case 7:
 		_debug("extract motd");
-		if (call->count > 0) {
-			ret = afs_extract_data(call, call->reply[2],
-					       call->count, true);
-			if (ret < 0)
-				return ret;
-		}
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
 
 		p = call->reply[2];
 		p[call->count] = 0;
 		_debug("motd '%s'", p);
 
-		call->offset = 0;
 		call->unmarshall++;
 
-		/* extract the message of the day padding */
-		call->count = (4 - (call->count & 3)) & 3;
-
-	case 10:
-		ret = afs_extract_data(call, call->buffer,
-				       call->count, false);
-		if (ret < 0)
-			return ret;
-
-		call->offset = 0;
-		call->unmarshall++;
-	case 11:
+	case 8:
 		break;
 	}
 
@@ -2024,19 +1923,16 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
 	u32 count;
 	int ret;
 
-	_enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+	_enter("{%u,%zu}", call->unmarshall, iov_iter_count(&call->iter));
 
-again:
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
 		/* Extract the capabilities word count */
 	case 1:
-		ret = afs_extract_data(call, &call->tmp,
-				       1 * sizeof(__be32),
-				       true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -2044,24 +1940,17 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
 
 		call->count = count;
 		call->count2 = count;
-		call->offset = 0;
+		iov_iter_discard(&call->iter, READ, count * sizeof(__be32));
 		call->unmarshall++;
 
 		/* Extract capabilities words */
 	case 2:
-		count = min(call->count, 16U);
-		ret = afs_extract_data(call, call->buffer,
-				       count * sizeof(__be32),
-				       call->count > 16);
+		ret = afs_extract_data(call, false);
 		if (ret < 0)
 			return ret;
 
 		/* TODO: Examine capabilities */
 
-		call->count -= count;
-		if (call->count > 0)
-			goto again;
-		call->offset = 0;
 		call->unmarshall++;
 		break;
 	}
@@ -2215,13 +2104,13 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
 		/* Extract the file status count and array in two steps */
 	case 1:
 		_debug("extract status count");
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -2234,11 +2123,11 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 		call->count = 0;
 		call->unmarshall++;
 	more_counts:
-		call->offset = 0;
+		afs_extract_to_buf(call, 21 * sizeof(__be32));
 
 	case 2:
 		_debug("extract status array %u", call->count);
-		ret = afs_extract_data(call, call->buffer, 21 * 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -2256,12 +2145,12 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 
 		call->count = 0;
 		call->unmarshall++;
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 
 		/* Extract the callback count and array in two steps */
 	case 3:
 		_debug("extract CB count");
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -2273,11 +2162,11 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 		call->count = 0;
 		call->unmarshall++;
 	more_cbs:
-		call->offset = 0;
+		afs_extract_to_buf(call, 3 * sizeof(__be32));
 
 	case 4:
 		_debug("extract CB array");
-		ret = afs_extract_data(call, call->buffer, 3 * 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -2294,11 +2183,11 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 		if (call->count < call->count2)
 			goto more_cbs;
 
-		call->offset = 0;
+		afs_extract_to_buf(call, 6 * sizeof(__be32));
 		call->unmarshall++;
 
 	case 5:
-		ret = afs_extract_data(call, call->buffer, 6 * 4, false);
+		ret = afs_extract_data(call, false);
 		if (ret < 0)
 			return ret;
 
@@ -2306,7 +2195,6 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 		if (call->reply[3])
 			xdr_decode_AFSVolSync(&bp, call->reply[3]);
 
-		call->offset = 0;
 		call->unmarshall++;
 
 	case 6:
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 457a8f76b6a2..997ab8350dfe 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -96,11 +96,16 @@ struct afs_call {
 	struct afs_cb_interest	*cbi;		/* Callback interest for server used */
 	void			*request;	/* request data (first part) */
 	struct address_space	*mapping;	/* Pages being written from */
+	struct iov_iter		iter;		/* Buffer iterator */
+	struct iov_iter		*_iter;		/* Iterator currently in use */
+	union {	/* Convenience for ->iter */
+		struct kvec	kvec[1];
+		struct bio_vec	bvec[1];
+	};
 	void			*buffer;	/* reply receive buffer */
 	void			*reply[4];	/* Where to put the reply */
 	pgoff_t			first;		/* first page in mapping to deal with */
 	pgoff_t			last;		/* last page in mapping to deal with */
-	size_t			offset;		/* offset into received data store */
 	atomic_t		usage;
 	enum afs_call_state	state;
 	spinlock_t		state_lock;
@@ -177,15 +182,15 @@ struct afs_read {
 	loff_t			pos;		/* Where to start reading */
 	loff_t			len;		/* How much we're asking for */
 	loff_t			actual_len;	/* How much we're actually getting */
-	loff_t			remain;		/* Amount remaining */
 	loff_t			file_size;	/* File size returned by server */
 	afs_dataversion_t	data_version;	/* Version number returned by server */
 	refcount_t		usage;
-	unsigned int		index;		/* Which page we're reading into */
 	unsigned int		nr_pages;
-	void (*page_done)(struct afs_call *, struct afs_read *);
-	struct page		**pages;
-	struct page		*array[];
+	unsigned int		done_pages;
+	bool			error;
+	unsigned int		call_debug_id;
+	void (*cleanup)(struct afs_read *);
+	struct iov_iter		iter;		/* Buffer */
 };
 
 /*
@@ -548,6 +553,15 @@ struct afs_vnode {
 	afs_callback_type_t	cb_type;	/* type of callback */
 };
 
+static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode)
+{
+#ifdef CONFIG_AFS_FSCACHE
+	return vnode->cache;
+#else
+	return NULL;
+#endif
+}
+
 /*
  * cached security record for one user's attempt to access a vnode
  */
@@ -928,12 +942,34 @@ extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
 extern void afs_flat_call_destructor(struct afs_call *);
 extern void afs_send_empty_reply(struct afs_call *);
 extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
-extern int afs_extract_data(struct afs_call *, void *, size_t, bool);
+extern int afs_extract_data(struct afs_call *, bool);
 extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause);
 
+static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size)
+{
+	call->kvec[0].iov_base = buf;
+	call->kvec[0].iov_len = size;
+	iov_iter_kvec(&call->iter, READ, call->kvec, 1, size);
+}
+
+static inline void afs_extract_to_tmp(struct afs_call *call)
+{
+	afs_extract_begin(call, &call->tmp, sizeof(call->tmp));
+}
+
+static inline void afs_extract_discard(struct afs_call *call, size_t size)
+{
+	iov_iter_discard(&call->iter, READ, size);
+}
+
+static inline void afs_extract_to_buf(struct afs_call *call, size_t size)
+{
+	afs_extract_begin(call, call->buffer, size);
+}
+
 static inline int afs_transfer_reply(struct afs_call *call)
 {
-	return afs_extract_data(call, call->buffer, call->reply_max, false);
+	return afs_extract_data(call, false);
 }
 
 static inline bool afs_check_call_state(struct afs_call *call,
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 20199f2b2c31..966e30f30cbb 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -143,6 +143,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
 	INIT_WORK(&call->async_work, afs_process_async_call);
 	init_waitqueue_head(&call->waitq);
 	spin_lock_init(&call->state_lock);
+	call->_iter = &call->iter;
 
 	o = atomic_inc_return(&net->nr_outstanding_calls);
 	trace_afs_call(call, afs_call_trace_alloc, 1, o,
@@ -233,6 +234,7 @@ struct afs_call *afs_alloc_flat_call(struct afs_net *net,
 			goto nomem_free;
 	}
 
+	afs_extract_to_buf(call, call->reply_max);
 	call->operation_ID = type->op;
 	init_waitqueue_head(&call->waitq);
 	return call;
@@ -465,14 +467,12 @@ static void afs_deliver_to_call(struct afs_call *call)
 	       state == AFS_CALL_SV_AWAIT_ACK
 	       ) {
 		if (state == AFS_CALL_SV_AWAIT_ACK) {
-			struct iov_iter iter;
-
-			iov_iter_kvec(&iter, READ, NULL, 0, 0);
+			iov_iter_kvec(&call->iter, READ, NULL, 0, 0);
 			ret = rxrpc_kernel_recv_data(call->net->socket,
-						     call->rxcall, &iter, false,
-						     &remote_abort,
+						     call->rxcall, &call->iter,
+						     false, &remote_abort,
 						     &call->service_id);
-			trace_afs_recv_data(call, 0, 0, false, ret);
+			trace_afs_receive_data(call, &call->iter, false, ret);
 
 			if (ret == -EINPROGRESS || ret == -EAGAIN)
 				return;
@@ -516,7 +516,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 			if (state != AFS_CALL_CL_AWAIT_REPLY)
 				abort_code = RXGEN_SS_UNMARSHAL;
 			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-						abort_code, -EBADMSG, "KUM");
+						abort_code, ret, "KUM");
 			goto local_abort;
 		}
 	}
@@ -729,6 +729,7 @@ void afs_charge_preallocation(struct work_struct *work)
 			call->async = true;
 			call->state = AFS_CALL_SV_AWAIT_OP_ID;
 			init_waitqueue_head(&call->waitq);
+			afs_extract_to_tmp(call);
 		}
 
 		if (rxrpc_kernel_charge_accept(net->socket,
@@ -774,18 +775,15 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
 {
 	int ret;
 
-	_enter("{%zu}", call->offset);
-
-	ASSERTCMP(call->offset, <, 4);
+	_enter("{%zu}", iov_iter_count(call->_iter));
 
 	/* the operation ID forms the first four bytes of the request data */
-	ret = afs_extract_data(call, &call->tmp, 4, true);
+	ret = afs_extract_data(call, true);
 	if (ret < 0)
 		return ret;
 
 	call->operation_ID = ntohl(call->tmp);
 	afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST);
-	call->offset = 0;
 
 	/* ask the cache manager to route the call (it'll change the call type
 	 * if successful) */
@@ -889,30 +887,19 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 /*
  * Extract a piece of data from the received data socket buffers.
  */
-int afs_extract_data(struct afs_call *call, void *buf, size_t count,
-		     bool want_more)
+int afs_extract_data(struct afs_call *call, bool want_more)
 {
 	struct afs_net *net = call->net;
-	struct iov_iter iter;
-	struct kvec iov;
+	struct iov_iter *iter = call->_iter;
 	enum afs_call_state state;
 	u32 remote_abort = 0;
 	int ret;
 
-	_enter("{%s,%zu},,%zu,%d",
-	       call->type->name, call->offset, count, want_more);
-
-	ASSERTCMP(call->offset, <=, count);
-
-	iov.iov_base = buf + call->offset;
-	iov.iov_len = count - call->offset;
-	iov_iter_kvec(&iter, READ, &iov, 1, count - call->offset);
+	_enter("{%s,%zu},%d", call->type->name, iov_iter_count(iter), want_more);
 
-	ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter,
+	ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter,
 				     want_more, &remote_abort,
 				     &call->service_id);
-	call->offset += (count - call->offset) - iov_iter_count(&iter);
-	trace_afs_recv_data(call, count, call->offset, want_more, ret);
 	if (ret == 0 || ret == -EAGAIN)
 		return ret;
 
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index d0f95c4ab05e..e18c51742daa 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -187,19 +187,18 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
 	u32 uniquifier, nentries, count;
 	int i, ret;
 
-	_enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+	_enter("{%u,%zu/%u}",
+	       call->unmarshall, iov_iter_count(call->_iter), call->count);
 
-again:
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
+		afs_extract_to_buf(call,
+				   sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32));
 		call->unmarshall++;
 
 		/* Extract the returned uuid, uniquifier, nentries and blkaddrs size */
 	case 1:
-		ret = afs_extract_data(call, call->buffer,
-				       sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32),
-				       true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -216,28 +215,28 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
 		call->reply[0] = alist;
 		call->count = count;
 		call->count2 = nentries;
-		call->offset = 0;
 		call->unmarshall++;
 
+	more_entries:
+		count = min(call->count, 4U);
+		afs_extract_to_buf(call, count * sizeof(__be32));
+
 		/* Extract entries */
 	case 2:
-		count = min(call->count, 4U);
-		ret = afs_extract_data(call, call->buffer,
-				       count * sizeof(__be32),
-				       call->count > 4);
+		ret = afs_extract_data(call, call->count > 4);
 		if (ret < 0)
 			return ret;
 
 		alist = call->reply[0];
 		bp = call->buffer;
+		count = min(call->count, 4U);
 		for (i = 0; i < count; i++)
 			if (alist->nr_addrs < call->count2)
 				afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT);
 
 		call->count -= count;
 		if (call->count > 0)
-			goto again;
-		call->offset = 0;
+			goto more_entries;
 		call->unmarshall++;
 		break;
 	}
@@ -318,44 +317,35 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call)
 	u32 count;
 	int ret;
 
-	_enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+	_enter("{%u,%zu/%u}",
+	       call->unmarshall, iov_iter_count(call->_iter), call->count);
 
-again:
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
 		/* Extract the capabilities word count */
 	case 1:
-		ret = afs_extract_data(call, &call->tmp,
-				       1 * sizeof(__be32),
-				       true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
 		count = ntohl(call->tmp);
-
 		call->count = count;
 		call->count2 = count;
-		call->offset = 0;
+
 		call->unmarshall++;
+		afs_extract_discard(call, count * sizeof(__be32));
 
 		/* Extract capabilities words */
 	case 2:
-		count = min(call->count, 16U);
-		ret = afs_extract_data(call, call->buffer,
-				       count * sizeof(__be32),
-				       call->count > 16);
+		ret = afs_extract_data(call, false);
 		if (ret < 0)
 			return ret;
 
 		/* TODO: Examine capabilities */
 
-		call->count -= count;
-		if (call->count > 0)
-			goto again;
-		call->offset = 0;
 		call->unmarshall++;
 		break;
 	}
@@ -426,22 +416,19 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 	u32 uniquifier, size;
 	int ret;
 
-	_enter("{%u,%zu/%u,%u}", call->unmarshall, call->offset, call->count, call->count2);
+	_enter("{%u,%zu,%u}",
+	       call->unmarshall, iov_iter_count(call->_iter), call->count2);
 
-again:
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
+		afs_extract_to_buf(call, sizeof(uuid_t) + 3 * sizeof(__be32));
 		call->unmarshall = 1;
 
 		/* Extract the returned uuid, uniquifier, fsEndpoints count and
 		 * either the first fsEndpoint type or the volEndpoints
 		 * count if there are no fsEndpoints. */
 	case 1:
-		ret = afs_extract_data(call, call->buffer,
-				       sizeof(uuid_t) +
-				       3 * sizeof(__be32),
-				       true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -459,15 +446,11 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 			return -ENOMEM;
 		alist->version = uniquifier;
 		call->reply[0] = alist;
-		call->offset = 0;
 
 		if (call->count == 0)
 			goto extract_volendpoints;
 
-		call->unmarshall = 2;
-
-		/* Extract fsEndpoints[] entries */
-	case 2:
+	next_fsendpoint:
 		switch (call->count2) {
 		case YFS_ENDPOINT_IPV4:
 			size = sizeof(__be32) * (1 + 1 + 1);
@@ -481,7 +464,12 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		}
 
 		size += sizeof(__be32);
-		ret = afs_extract_data(call, call->buffer, size, true);
+		afs_extract_to_buf(call, size);
+		call->unmarshall = 2;
+
+		/* Extract fsEndpoints[] entries */
+	case 2:
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -512,10 +500,9 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		 */
 		call->count2 = ntohl(*bp++);
 
-		call->offset = 0;
 		call->count--;
 		if (call->count > 0)
-			goto again;
+			goto next_fsendpoint;
 
 	extract_volendpoints:
 		/* Extract the list of volEndpoints. */
@@ -526,6 +513,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 			return afs_protocol_error(call, -EBADMSG,
 						  afs_eproto_yvl_vlendpt_type);
 
+		afs_extract_to_buf(call, 1 * sizeof(__be32));
 		call->unmarshall = 3;
 
 		/* Extract the type of volEndpoints[0].  Normally we would
@@ -533,17 +521,14 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		 * data of the current one, but this is the first...
 		 */
 	case 3:
-		ret = afs_extract_data(call, call->buffer, sizeof(__be32), true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
 		bp = call->buffer;
-		call->count2 = ntohl(*bp++);
-		call->offset = 0;
-		call->unmarshall = 4;
 
-		/* Extract volEndpoints[] entries */
-	case 4:
+	next_volendpoint:
+		call->count2 = ntohl(*bp++);
 		switch (call->count2) {
 		case YFS_ENDPOINT_IPV4:
 			size = sizeof(__be32) * (1 + 1 + 1);
@@ -557,8 +542,13 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		}
 
 		if (call->count > 1)
-			size += sizeof(__be32);
-		ret = afs_extract_data(call, call->buffer, size, true);
+			size += sizeof(__be32); /* Get next type too */
+		afs_extract_to_buf(call, size);
+		call->unmarshall = 4;
+
+		/* Extract volEndpoints[] entries */
+	case 4:
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -584,19 +574,17 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		/* Got either the type of the next entry or the count of
 		 * volEndpoints if no more fsEndpoints.
 		 */
-		call->offset = 0;
 		call->count--;
-		if (call->count > 0) {
-			call->count2 = ntohl(*bp++);
-			goto again;
-		}
+		if (call->count > 0)
+			goto next_volendpoint;
 
 	end:
+		afs_extract_discard(call, 0);
 		call->unmarshall = 5;
 
 		/* Done */
 	case 5:
-		ret = afs_extract_data(call, call->buffer, 0, false);
+		ret = afs_extract_data(call, false);
 		if (ret < 0)
 			return ret;
 		call->unmarshall = 6;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 19c04caf3c01..d07e7f29f50a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -37,8 +37,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
 
 	_enter(",,%llu", (unsigned long long)pos);
 
-	req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *),
-		      GFP_KERNEL);
+	req = kzalloc(sizeof(struct afs_read), GFP_KERNEL);
 	if (!req)
 		return -ENOMEM;
 
@@ -46,9 +45,8 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
 	req->pos = pos;
 	req->len = len;
 	req->nr_pages = 1;
-	req->pages = req->array;
-	req->pages[0] = page;
-	get_page(page);
+	iov_iter_mapping(&req->iter, READ, vnode->vfs_inode.i_mapping,
+			 pos, len);
 
 	ret = afs_fetch_data(vnode, key, req);
 	afs_put_read(req);
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 84b90a79d75a..d99294c75e9a 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -218,6 +218,9 @@ extern int __fscache_read_or_alloc_pages(struct fscache_cookie *,
 					 gfp_t);
 extern int __fscache_alloc_page(struct fscache_cookie *, struct page *, gfp_t);
 extern int __fscache_write_page(struct fscache_cookie *, struct page *, loff_t, gfp_t);
+extern int __fscache_write(struct fscache_cookie *, struct iov_iter *,
+			   loff_t, loff_t, gfp_t,
+			   void (*)(struct fscache_cookie *, struct iov_iter *));
 extern void __fscache_uncache_page(struct fscache_cookie *, struct page *);
 extern bool __fscache_check_page_write(struct fscache_cookie *, struct page *);
 extern void __fscache_wait_on_page_write(struct fscache_cookie *, struct page *);
@@ -655,6 +658,34 @@ void fscache_readpages_cancel(struct fscache_cookie *cookie,
 		__fscache_readpages_cancel(cookie, pages);
 }
 
+/**
+ * fscache_write - Request storage of data in the cache
+ * @cookie: The cookie representing the cache object
+ * @iter: The data to store
+ * @pos: The position in the cached data
+ * @object_size: Updated size of object
+ * @gfp: The conditions under which memory allocation should be made
+ * @done: Called upon operation completion
+ *
+ * Request the data described by the iterator be written into the cache.  This
+ * request may be ignored if insufficient space exists in the cache, in which
+ * case -ENOBUFS will be returned.
+ *
+ * See Documentation/filesystems/caching/netfs-api.txt for a complete
+ * description.
+ */
+static inline
+int fscache_write(struct fscache_cookie *cookie, struct iov_iter *iter,
+		  loff_t pos, loff_t object_size, gfp_t gfp,
+		  void (*done)(struct fscache_cookie *cookie,
+			       struct iov_iter *iter))
+{
+	if (fscache_cookie_valid(cookie))
+		return __fscache_write(cookie, iter, pos, object_size, gfp, done);
+	else
+		return -ENOBUFS;
+}
+
 /**
  * fscache_write_page - Request storage of a page in the cache
  * @cookie: The cookie representing the cache object
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 5c60ade2c7d8..5e0f8dcede26 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -207,17 +207,16 @@ afs_edit_dir_reasons;
 #define EM(a, b)	{ a, b },
 #define E_(a, b)	{ a, b }
 
-TRACE_EVENT(afs_recv_data,
-	    TP_PROTO(struct afs_call *call, unsigned count, unsigned offset,
+TRACE_EVENT(afs_receive_data,
+	    TP_PROTO(struct afs_call *call, struct iov_iter *iter,
 		     bool want_more, int ret),
 
-	    TP_ARGS(call, count, offset, want_more, ret),
+	    TP_ARGS(call, iter, want_more, ret),
 
 	    TP_STRUCT__entry(
+		    __field(loff_t,			remain		)
 		    __field(unsigned int,		call		)
 		    __field(enum afs_call_state,	state		)
-		    __field(unsigned int,		count		)
-		    __field(unsigned int,		offset		)
 		    __field(unsigned short,		unmarshall	)
 		    __field(bool,			want_more	)
 		    __field(int,			ret		)
@@ -227,17 +226,18 @@ TRACE_EVENT(afs_recv_data,
 		    __entry->call	= call->debug_id;
 		    __entry->state	= call->state;
 		    __entry->unmarshall	= call->unmarshall;
-		    __entry->count	= count;
-		    __entry->offset	= offset;
+		    __entry->remain	= iov_iter_count(iter);
 		    __entry->want_more	= want_more;
 		    __entry->ret	= ret;
 			   ),
 
-	    TP_printk("c=%08x s=%u u=%u %u/%u wm=%u ret=%d",
+	    TP_printk("c=%08x r=%llu u=%u w=%u s=%u ret=%d",
 		      __entry->call,
-		      __entry->state, __entry->unmarshall,
-		      __entry->offset, __entry->count,
-		      __entry->want_more, __entry->ret)
+		      __entry->remain,
+		      __entry->unmarshall,
+		      __entry->want_more,
+		      __entry->state,
+		      __entry->ret)
 	    );
 
 TRACE_EVENT(afs_notify_call,

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ