[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Yy6I7QBvlRP3yDbe@ZenIV>
Date: Sat, 24 Sep 2022 05:34:53 +0100
From: Al Viro <viro@...iv.linux.org.uk>
To: David Howells <dhowells@...hat.com>
Cc: Steve French <smfrench@...il.com>,
Steve French <sfrench@...ba.org>,
Shyam Prasad N <nspmangalore@...il.com>,
Rohith Surabattula <rohiths.msft@...il.com>,
linux-cifs@...r.kernel.org, Jeff Layton <jlayton@...nel.org>,
linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 5/7] cifs: Change the I/O paths to use an iterator rather
than a page list
On Tue, Aug 23, 2022 at 03:12:41PM +0100, David Howells wrote:
> static void
> cifs_writev_requeue(struct cifs_writedata *wdata)
> {
> - int i, rc = 0;
> + int rc = 0;
> struct inode *inode = d_inode(wdata->cfile->dentry);
> struct TCP_Server_Info *server;
> - unsigned int rest_len;
> + unsigned int rest_len = wdata->bytes;
Umm... Can that by different from iov_iter_count(&wdata->iter)?
> + loff_t fpos = wdata->offset;
>
> server = tlink_tcon(wdata->cfile->tlink)->ses->server;
> - i = 0;
> - rest_len = wdata->bytes;
> do {
> struct cifs_writedata *wdata2;
> - unsigned int j, nr_pages, wsize, tailsz, cur_len;
> + unsigned int wsize, cur_len;
>
> wsize = server->ops->wp_retry_size(inode);
> if (wsize < rest_len) {
> - nr_pages = wsize / PAGE_SIZE;
> - if (!nr_pages) {
> - rc = -EOPNOTSUPP;
> + if (wsize < PAGE_SIZE) {
> + rc = -ENOTSUPP;
> break;
> }
> - cur_len = nr_pages * PAGE_SIZE;
> - tailsz = PAGE_SIZE;
> + cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
> } else {
> - nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE);
> cur_len = rest_len;
> - tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE;
> }
>
> - wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
> + wdata2 = cifs_writedata_alloc(cifs_writev_complete);
> if (!wdata2) {
> rc = -ENOMEM;
> break;
> }
>
> - for (j = 0; j < nr_pages; j++) {
> - wdata2->pages[j] = wdata->pages[i + j];
> - lock_page(wdata2->pages[j]);
> - clear_page_dirty_for_io(wdata2->pages[j]);
> - }
> -
> wdata2->sync_mode = wdata->sync_mode;
> - wdata2->nr_pages = nr_pages;
> - wdata2->offset = page_offset(wdata2->pages[0]);
> - wdata2->pagesz = PAGE_SIZE;
> - wdata2->tailsz = tailsz;
> - wdata2->bytes = cur_len;
> + wdata2->offset = fpos;
> + wdata2->bytes = cur_len;
> + wdata2->iter = wdata->iter;
> +
> + iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
Am I right assuming that wdata->iter won't be looked at after we return?
If so, why not advance wdata->iter instead? At the point where you
increment fpos, that is. And instead of rest_len just use
iov_iter_count(&wdata->iter)...
> - /* cleanup remaining pages from the original wdata */
> - for (; i < wdata->nr_pages; i++) {
> - SetPageError(wdata->pages[i]);
> - end_page_writeback(wdata->pages[i]);
> - put_page(wdata->pages[i]);
> - }
> + /* Clean up remaining pages from the original wdata */
> + if (iov_iter_is_xarray(&wdata->iter))
> + cifs_pages_write_failed(inode, fpos, rest_len);
>
> if (rc != 0 && !is_retryable_error(rc))
> mapping_set_error(inode->i_mapping, rc);
> @@ -2491,7 +2503,6 @@ cifs_writev_complete(struct work_struct *work)
> struct cifs_writedata *wdata = container_of(work,
> struct cifs_writedata, work);
> struct inode *inode = d_inode(wdata->cfile->dentry);
> - int i = 0;
>
> if (wdata->result == 0) {
> spin_lock(&inode->i_lock);
> +/*
> + * Select span of a bvec iterator we're going to use. Limit it by both maximum
> + * size and maximum number of segments.
> + */
> +static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t offset,
> + size_t max_size, size_t max_segs, unsigned int *_nsegs)
> +{
> + const struct bio_vec *bvecs = iter->bvec;
> + unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
> + size_t len, span = 0, n = iter->count;
> + size_t skip = iter->iov_offset + offset;
> +
> + if (WARN_ON(!iov_iter_is_bvec(iter)) || WARN_ON(offset > n) || n == 0)
> + return 0;
> +
> + while (n && ix < nbv && skip) {
> + len = bvecs[ix].bv_len;
> + if (skip < len)
> + break;
> + skip -= len;
> + n -= len;
> + ix++;
> + }
Umm... Are you guaranteed that iter->count points to an end of some bvec?
IOW, what's to stop your n from wrapping around?
> static int
> -cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
> +cifs_write_from_iter(loff_t fpos, size_t len, const struct iov_iter *from,
> struct cifsFileInfo *open_file,
> struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
> struct cifs_aio_ctx *ctx)
> {
> int rc = 0;
> - size_t cur_len;
> - unsigned long nr_pages, num_pages, i;
> + size_t cur_len, max_len;
> struct cifs_writedata *wdata;
> - struct iov_iter saved_from = *from;
> - loff_t saved_offset = offset;
> + size_t skip = 0;
> pid_t pid;
> struct TCP_Server_Info *server;
> - struct page **pagevec;
> - size_t start;
> - unsigned int xid;
> + unsigned int xid, max_segs = INT_MAX;
>
> if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
> pid = open_file->pid;
> @@ -3341,10 +3741,20 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
> server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
> xid = get_xid();
>
> +#ifdef CONFIG_CIFS_SMB_DIRECT
> + if (server->smbd_conn)
> + max_segs = server->smbd_conn->max_frmr_depth;
> +#endif
> +
> do {
> - unsigned int wsize;
> struct cifs_credits credits_on_stack;
> struct cifs_credits *credits = &credits_on_stack;
> + unsigned int wsize, nsegs = 0;
> +
> + if (signal_pending(current)) {
> + rc = -EINTR;
> + break;
> + }
>
> if (open_file->invalidHandle) {
> rc = cifs_reopen_file(open_file, false);
> @@ -3359,96 +3769,43 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
> if (rc)
> break;
>
> - cur_len = min_t(const size_t, len, wsize);
> -
> - if (ctx->direct_io) {
> - ssize_t result;
> -
> - result = iov_iter_get_pages_alloc2(
> - from, &pagevec, cur_len, &start);
> - if (result < 0) {
> - cifs_dbg(VFS,
> - "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
> - result, iov_iter_type(from),
> - from->iov_offset, from->count);
> - dump_stack();
> -
> - rc = result;
> - add_credits_and_wake_if(server, credits, 0);
> - break;
> - }
> - cur_len = (size_t)result;
> -
> - nr_pages =
> - (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
> -
> - wdata = cifs_writedata_direct_alloc(pagevec,
> - cifs_uncached_writev_complete);
> - if (!wdata) {
> - rc = -ENOMEM;
> - add_credits_and_wake_if(server, credits, 0);
> - break;
> - }
> -
> -
> - wdata->page_offset = start;
> - wdata->tailsz =
> - nr_pages > 1 ?
> - cur_len - (PAGE_SIZE - start) -
> - (nr_pages - 2) * PAGE_SIZE :
> - cur_len;
> - } else {
> - nr_pages = get_numpages(wsize, len, &cur_len);
> - wdata = cifs_writedata_alloc(nr_pages,
> - cifs_uncached_writev_complete);
> - if (!wdata) {
> - rc = -ENOMEM;
> - add_credits_and_wake_if(server, credits, 0);
> - break;
> - }
> -
> - rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
> - if (rc) {
> - kvfree(wdata->pages);
> - kfree(wdata);
> - add_credits_and_wake_if(server, credits, 0);
> - break;
> - }
> -
> - num_pages = nr_pages;
> - rc = wdata_fill_from_iovec(
> - wdata, from, &cur_len, &num_pages);
> - if (rc) {
> - for (i = 0; i < nr_pages; i++)
> - put_page(wdata->pages[i]);
> - kvfree(wdata->pages);
> - kfree(wdata);
> - add_credits_and_wake_if(server, credits, 0);
> - break;
> - }
> + max_len = min_t(const size_t, len, wsize);
> + if (!max_len) {
> + rc = -EAGAIN;
> + add_credits_and_wake_if(server, credits, 0);
> + break;
> + }
>
> - /*
> - * Bring nr_pages down to the number of pages we
> - * actually used, and free any pages that we didn't use.
> - */
> - for ( ; nr_pages > num_pages; nr_pages--)
> - put_page(wdata->pages[nr_pages - 1]);
> + cur_len = cifs_limit_bvec_subset(from, skip, max_len, max_segs, &nsegs);
> + cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
> + cur_len, max_len, nsegs, from->nr_segs, max_segs);
> + if (cur_len == 0) {
> + rc = -EIO;
> + add_credits_and_wake_if(server, credits, 0);
> + break;
> + }
>
> - wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
> + wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
> + if (!wdata) {
> + rc = -ENOMEM;
> + add_credits_and_wake_if(server, credits, 0);
> + break;
> }
>
> wdata->sync_mode = WB_SYNC_ALL;
> - wdata->nr_pages = nr_pages;
> - wdata->offset = (__u64)offset;
> - wdata->cfile = cifsFileInfo_get(open_file);
> - wdata->server = server;
> - wdata->pid = pid;
> - wdata->bytes = cur_len;
> - wdata->pagesz = PAGE_SIZE;
> - wdata->credits = credits_on_stack;
> - wdata->ctx = ctx;
> + wdata->offset = (__u64)fpos;
> + wdata->cfile = cifsFileInfo_get(open_file);
> + wdata->server = server;
> + wdata->pid = pid;
> + wdata->bytes = cur_len;
> + wdata->credits = credits_on_stack;
> + wdata->iter = *from;
> + wdata->ctx = ctx;
> kref_get(&ctx->refcount);
>
> + iov_iter_advance(&wdata->iter, skip);
> + iov_iter_truncate(&wdata->iter, cur_len);
> +
> rc = adjust_credits(server, &wdata->credits, wdata->bytes);
>
> if (!rc) {
> @@ -3463,16 +3820,14 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
> add_credits_and_wake_if(server, &wdata->credits, 0);
> kref_put(&wdata->refcount,
> cifs_uncached_writedata_release);
> - if (rc == -EAGAIN) {
> - *from = saved_from;
> - iov_iter_advance(from, offset - saved_offset);
> + if (rc == -EAGAIN)
> continue;
> - }
> break;
> }
>
> list_add_tail(&wdata->list, wdata_list);
> - offset += cur_len;
> + skip += cur_len;
> + fpos += cur_len;
IDGI. Why not simply iov_iter_advance(from, cur_len) here, and
to hell with both the iov_iter_advance() above *and* with skip argument
of cifs_limit_bvec_subset()? If you want to keep from const - copy into
local variable and be done with that...
Powered by blists - more mailing lists