[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9855f69b-e67e-f7d9-88b8-8941666ab02f@kernel.dk>
Date: Fri, 10 Sep 2021 07:57:49 -0600
From: Jens Axboe <axboe@...nel.dk>
To: Al Viro <viro@...iv.linux.org.uk>
Cc: Linus Torvalds <torvalds@...ux-foundation.org>,
Pavel Begunkov <asml.silence@...il.com>,
Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
linux-fsdevel <linux-fsdevel@...r.kernel.org>
Subject: Re: [git pull] iov_iter fixes
On 9/9/21 9:36 PM, Al Viro wrote:
> On Thu, Sep 09, 2021 at 09:30:03PM -0600, Jens Axboe wrote:
>
>>> Again, we should never, ever modify the iovec (or bvec, etc.) array in
>>> ->read_iter()/->write_iter()/->sendmsg()/etc. instances. If you see
>>> such behaviour anywhere, report it immediately. Any such is a blatant
>>> bug.
>>
>> Yes that was wrong, the iovec is obviously const. But that really
>> doesn't change the original point, which was that copying the iov_iter
>> itself unconditionally would be miserable.
>
> Might very well be true, but... won't your patch hit the reimport on
> every short read? And the cost of uaccess in there is *much* higher
> than copying of 48 bytes into local variable...
>
> Or am I misreading your patch? Note that short reads on reaching
> EOF are obviously normal - it's not a rare case at all.
It was just a quick hack, might very well be too eager to go through
those motions. But pondering this instead of sleeping, we don't need to
copy all of iov_iter in order to restore the state, and we can use the
same advance after restoring. So something like this may be more
palatable. Caveat - again untested, and I haven't tested the performance
impact of this at all.
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 855ea544807f..4d6d4315deda 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2608,8 +2608,6 @@ static bool io_resubmit_prep(struct io_kiocb *req)
if (!rw)
return !io_req_prep_async(req);
- /* may have left rw->iter inconsistent on -EIOCBQUEUED */
- iov_iter_revert(&rw->iter, req->result - iov_iter_count(&rw->iter));
return true;
}
@@ -3431,14 +3429,45 @@ static bool need_read_all(struct io_kiocb *req)
S_ISBLK(file_inode(req->file)->i_mode);
}
+/*
+ * Stash the items we need to restore an iov_iter after a partial or
+ * -EAGAIN'ed result.
+ */
+struct iov_store {
+ ssize_t io_size;
+ size_t iov_offset;
+ unsigned long nr_segs;
+ const void *ptr;
+};
+
+static void io_iter_reset(struct iov_iter *iter, struct iov_store *store,
+ ssize_t did_bytes)
+{
+ iter->count = store->io_size;
+ iter->iov_offset = store->iov_offset;
+ iter->nr_segs = store->nr_segs;
+ iter->iov = store->ptr;
+ if (did_bytes > 0)
+ iov_iter_advance(iter, did_bytes);
+}
+
+static void io_iov_store(struct iov_store *store, struct iov_iter *iter)
+{
+ store->io_size = iov_iter_count(iter);
+ store->iov_offset = iter->iov_offset;
+ store->nr_segs = iter->nr_segs;
+ store->ptr = iter->iov;
+}
+
static int io_read(struct io_kiocb *req, unsigned int issue_flags)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter __iter, *iter = &__iter;
struct io_async_rw *rw = req->async_data;
- ssize_t io_size, ret, ret2;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ struct iov_store store;
+ ssize_t ret, ret2;
if (rw) {
iter = &rw->iter;
@@ -3448,8 +3477,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
return ret;
}
- io_size = iov_iter_count(iter);
- req->result = io_size;
+ io_iov_store(&store, iter);
+ req->result = store.io_size;
/* Ensure we clear previously set non-block flag */
if (!force_nonblock)
@@ -3463,7 +3492,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
return ret ?: -EAGAIN;
}
- ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), io_size);
+ ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), store.io_size);
if (unlikely(ret)) {
kfree(iovec);
return ret;
@@ -3479,18 +3508,17 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
/* no retry on NONBLOCK nor RWF_NOWAIT */
if (req->flags & REQ_F_NOWAIT)
goto done;
- /* some cases will consume bytes even on error returns */
- iov_iter_reexpand(iter, iter->count + iter->truncated);
- iov_iter_revert(iter, io_size - iov_iter_count(iter));
ret = 0;
} else if (ret == -EIOCBQUEUED) {
goto out_free;
- } else if (ret <= 0 || ret == io_size || !force_nonblock ||
+ } else if (ret <= 0 || ret == store.io_size || !force_nonblock ||
(req->flags & REQ_F_NOWAIT) || !need_read_all(req)) {
/* read all, failed, already did sync or don't want to retry */
goto done;
}
+ io_iter_reset(iter, &store, ret);
+
ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true);
if (ret2)
return ret2;
@@ -3501,7 +3529,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
iter = &rw->iter;
do {
- io_size -= ret;
+ store.io_size -= ret;
rw->bytes_done += ret;
/* if we can retry, do so with the callbacks armed */
if (!io_rw_should_retry(req)) {
@@ -3520,7 +3548,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
return 0;
/* we got some bytes, but not all. retry. */
kiocb->ki_flags &= ~IOCB_WAITQ;
- } while (ret > 0 && ret < io_size);
+ } while (ret > 0 && ret < store.io_size);
done:
kiocb_done(kiocb, ret, issue_flags);
out_free:
@@ -3543,8 +3571,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter __iter, *iter = &__iter;
struct io_async_rw *rw = req->async_data;
- ssize_t ret, ret2, io_size;
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+ struct iov_store store;
+ ssize_t ret, ret2;
if (rw) {
iter = &rw->iter;
@@ -3554,8 +3583,10 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
return ret;
}
- io_size = iov_iter_count(iter);
- req->result = io_size;
+
+ io_iov_store(&store, iter);
+ req->result = store.io_size;
+ ret2 = 0;
/* Ensure we clear previously set non-block flag */
if (!force_nonblock)
@@ -3572,7 +3603,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
(req->flags & REQ_F_ISREG))
goto copy_iov;
- ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), io_size);
+ ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), store.io_size);
if (unlikely(ret))
goto out_free;
@@ -3619,9 +3650,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
kiocb_done(kiocb, ret2, issue_flags);
} else {
copy_iov:
- /* some cases will consume bytes even on error returns */
- iov_iter_reexpand(iter, iter->count + iter->truncated);
- iov_iter_revert(iter, io_size - iov_iter_count(iter));
+ io_iter_reset(iter, &store, ret2);
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
return ret ?: -EAGAIN;
}
--
Jens Axboe
Powered by blists - more mailing lists