[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3f378e51-87e7-499e-a9fb-4810ca760d2b@kernel.dk>
Date: Tue, 12 Nov 2024 10:19:02 -0700
From: Jens Axboe <axboe@...nel.dk>
To: Brian Foster <bfoster@...hat.com>
Cc: Christoph Hellwig <hch@...radead.org>,
"Kirill A. Shutemov" <kirill@...temov.name>, linux-mm@...ck.org,
linux-fsdevel@...r.kernel.org, hannes@...xchg.org, clm@...a.com,
linux-kernel@...r.kernel.org, willy@...radead.org
Subject: Re: [PATCH 08/15] mm/filemap: add read support for RWF_UNCACHED
On 11/12/24 10:06 AM, Jens Axboe wrote:
> On 11/12/24 9:39 AM, Brian Foster wrote:
>> On Tue, Nov 12, 2024 at 08:14:28AM -0700, Jens Axboe wrote:
>>> On 11/11/24 10:13 PM, Christoph Hellwig wrote:
>>>> On Mon, Nov 11, 2024 at 04:42:25PM -0700, Jens Axboe wrote:
>>>>> Here's the slightly cleaned up version, this is the one I ran testing
>>>>> with.
>>>>
>>>> Looks reasonable to me, but you probably get better reviews on the
>>>> fstests lists.
>>>
>>> I'll send it out once this patchset is a bit closer to integration,
>>> there's the usual chicken and egg situation with it. For now, it's quite
>>> handy for my testing, found a few issues with this version. So thanks
>>> for the suggestion, sure beats writing more of your own test cases :-)
>>>
>>
>> fsx support is probably a good idea as well. It's similar in idea to
>> fsstress, but bashes the same file with mixed operations and includes
>> data integrity validation checks as well. It's pretty useful for
>> uncovering subtle corner case issues or bad interactions..
>
> Indeed, I did that too. Re-running xfstests right now with that too.
Here's what I'm running right now, fwiw. It adds RWF_UNCACHED support
for both the sync read/write and io_uring paths.
diff --git a/ltp/fsx.c b/ltp/fsx.c
index 41933354..104910ff 100644
--- a/ltp/fsx.c
+++ b/ltp/fsx.c
@@ -43,6 +43,10 @@
# define MAP_FILE 0
#endif
+#ifndef RWF_UNCACHED
+#define RWF_UNCACHED 0x80
+#endif
+
#define NUMPRINTCOLUMNS 32 /* # columns of data to print on each line */
/* Operation flags (bitmask) */
@@ -101,7 +105,9 @@ int logcount = 0; /* total ops */
enum {
/* common operations */
OP_READ = 0,
+ OP_READ_UNCACHED,
OP_WRITE,
+ OP_WRITE_UNCACHED,
OP_MAPREAD,
OP_MAPWRITE,
OP_MAX_LITE,
@@ -190,15 +196,16 @@ int o_direct; /* -Z */
int aio = 0;
int uring = 0;
int mark_nr = 0;
+int rwf_uncached = 1;
int page_size;
int page_mask;
int mmap_mask;
-int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
+int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags);
#define READ 0
#define WRITE 1
-#define fsxread(a,b,c,d) fsx_rw(READ, a,b,c,d)
-#define fsxwrite(a,b,c,d) fsx_rw(WRITE, a,b,c,d)
+#define fsxread(a,b,c,d,f) fsx_rw(READ, a,b,c,d,f)
+#define fsxwrite(a,b,c,d,f) fsx_rw(WRITE, a,b,c,d,f)
struct timespec deadline;
@@ -266,7 +273,9 @@ prterr(const char *prefix)
static const char *op_names[] = {
[OP_READ] = "read",
+ [OP_READ_UNCACHED] = "read_uncached",
[OP_WRITE] = "write",
+ [OP_WRITE_UNCACHED] = "write_uncached",
[OP_MAPREAD] = "mapread",
[OP_MAPWRITE] = "mapwrite",
[OP_TRUNCATE] = "truncate",
@@ -393,12 +402,14 @@ logdump(void)
prt("\t******WWWW");
break;
case OP_READ:
+ case OP_READ_UNCACHED:
prt("READ 0x%x thru 0x%x\t(0x%x bytes)",
lp->args[0], lp->args[0] + lp->args[1] - 1,
lp->args[1]);
if (overlap)
prt("\t***RRRR***");
break;
+ case OP_WRITE_UNCACHED:
case OP_WRITE:
prt("WRITE 0x%x thru 0x%x\t(0x%x bytes)",
lp->args[0], lp->args[0] + lp->args[1] - 1,
@@ -784,9 +795,8 @@ doflush(unsigned offset, unsigned size)
}
void
-doread(unsigned offset, unsigned size)
+__doread(unsigned offset, unsigned size, int flags)
{
- off_t ret;
unsigned iret;
offset -= offset % readbdy;
@@ -818,23 +828,39 @@ doread(unsigned offset, unsigned size)
(monitorend == -1 || offset <= monitorend))))))
prt("%lld read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
offset, offset + size - 1, size);
- ret = lseek(fd, (off_t)offset, SEEK_SET);
- if (ret == (off_t)-1) {
- prterr("doread: lseek");
- report_failure(140);
- }
- iret = fsxread(fd, temp_buf, size, offset);
+ iret = fsxread(fd, temp_buf, size, offset, flags);
if (iret != size) {
- if (iret == -1)
- prterr("doread: read");
- else
+ if (iret == -1) {
+ if (errno == EOPNOTSUPP && flags & RWF_UNCACHED) {
+ rwf_uncached = 1;
+ return;
+ }
+ prterr("dowrite: read");
+ } else {
prt("short read: 0x%x bytes instead of 0x%x\n",
iret, size);
+ }
report_failure(141);
}
check_buffers(temp_buf, offset, size);
}
+void
+doread(unsigned offset, unsigned size)
+{
+ __doread(offset, size, 0);
+}
+void
+doread_uncached(unsigned offset, unsigned size)
+{
+ if (rwf_uncached) {
+ __doread(offset, size, RWF_UNCACHED);
+ if (rwf_uncached)
+ return;
+ }
+ __doread(offset, size, 0);
+}
+
void
check_eofpage(char *s, unsigned offset, char *p, int size)
{
@@ -870,7 +896,6 @@ check_contents(void)
unsigned map_offset;
unsigned map_size;
char *p;
- off_t ret;
unsigned iret;
if (!check_buf) {
@@ -885,13 +910,7 @@ check_contents(void)
if (size == 0)
return;
- ret = lseek(fd, (off_t)offset, SEEK_SET);
- if (ret == (off_t)-1) {
- prterr("doread: lseek");
- report_failure(140);
- }
-
- iret = fsxread(fd, check_buf, size, offset);
+ iret = fsxread(fd, check_buf, size, offset, 0);
if (iret != size) {
if (iret == -1)
prterr("check_contents: read");
@@ -1064,9 +1083,8 @@ update_file_size(unsigned offset, unsigned size)
}
void
-dowrite(unsigned offset, unsigned size)
+__dowrite(unsigned offset, unsigned size, int flags)
{
- off_t ret;
unsigned iret;
offset -= offset % writebdy;
@@ -1101,18 +1119,18 @@ dowrite(unsigned offset, unsigned size)
(monitorend == -1 || offset <= monitorend))))))
prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
offset, offset + size - 1, size);
- ret = lseek(fd, (off_t)offset, SEEK_SET);
- if (ret == (off_t)-1) {
- prterr("dowrite: lseek");
- report_failure(150);
- }
- iret = fsxwrite(fd, good_buf + offset, size, offset);
+ iret = fsxwrite(fd, good_buf + offset, size, offset, flags);
if (iret != size) {
- if (iret == -1)
+ if (iret == -1) {
+ if (errno == EOPNOTSUPP && flags & RWF_UNCACHED) {
+ rwf_uncached = 0;
+ return;
+ }
prterr("dowrite: write");
- else
+ } else {
prt("short write: 0x%x bytes instead of 0x%x\n",
iret, size);
+ }
report_failure(151);
}
if (do_fsync) {
@@ -1126,6 +1144,22 @@ dowrite(unsigned offset, unsigned size)
}
}
+void
+dowrite(unsigned offset, unsigned size)
+{
+ __dowrite(offset, size, 0);
+}
+
+void
+dowrite_uncached(unsigned offset, unsigned size)
+{
+ if (rwf_uncached) {
+ __dowrite(offset, size, RWF_UNCACHED);
+ if (rwf_uncached)
+ return;
+ }
+ __dowrite(offset, size, 0);
+}
void
domapwrite(unsigned offset, unsigned size)
@@ -2340,11 +2374,21 @@ have_op:
doread(offset, size);
break;
+ case OP_READ_UNCACHED:
+ TRIM_OFF_LEN(offset, size, file_size);
+ doread_uncached(offset, size);
+ break;
+
case OP_WRITE:
TRIM_OFF_LEN(offset, size, maxfilelen);
dowrite(offset, size);
break;
+ case OP_WRITE_UNCACHED:
+ TRIM_OFF_LEN(offset, size, maxfilelen);
+ dowrite_uncached(offset, size);
+ break;
+
case OP_MAPREAD:
TRIM_OFF_LEN(offset, size, file_size);
domapread(offset, size);
@@ -2702,7 +2746,7 @@ uring_setup()
}
int
-uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
{
struct io_uring_sqe *sqe;
struct io_uring_cqe *cqe;
@@ -2733,6 +2777,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
} else {
io_uring_prep_writev(sqe, fd, &iovec, 1, o);
}
+ sqe->rw_flags = flags;
ret = io_uring_submit_and_wait(&ring, 1);
if (ret != 1) {
@@ -2781,7 +2826,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
}
#else
int
-uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
{
fprintf(stderr, "io_rw: need IO_URING support!\n");
exit(111);
@@ -2789,19 +2834,21 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
#endif
int
-fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
{
int ret;
if (aio) {
ret = aio_rw(rw, fd, buf, len, offset);
} else if (uring) {
- ret = uring_rw(rw, fd, buf, len, offset);
+ ret = uring_rw(rw, fd, buf, len, offset, flags);
} else {
+ struct iovec iov = { .iov_base = buf, .iov_len = len };
+
if (rw == READ)
- ret = read(fd, buf, len);
+ ret = preadv2(fd, &iov, 1, offset, flags);
else
- ret = write(fd, buf, len);
+ ret = pwritev2(fd, &iov, 1, offset, flags);
}
return ret;
}
--
Jens Axboe
Powered by blists - more mailing lists