lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3f378e51-87e7-499e-a9fb-4810ca760d2b@kernel.dk>
Date: Tue, 12 Nov 2024 10:19:02 -0700
From: Jens Axboe <axboe@...nel.dk>
To: Brian Foster <bfoster@...hat.com>
Cc: Christoph Hellwig <hch@...radead.org>,
 "Kirill A. Shutemov" <kirill@...temov.name>, linux-mm@...ck.org,
 linux-fsdevel@...r.kernel.org, hannes@...xchg.org, clm@...a.com,
 linux-kernel@...r.kernel.org, willy@...radead.org
Subject: Re: [PATCH 08/15] mm/filemap: add read support for RWF_UNCACHED

On 11/12/24 10:06 AM, Jens Axboe wrote:
> On 11/12/24 9:39 AM, Brian Foster wrote:
>> On Tue, Nov 12, 2024 at 08:14:28AM -0700, Jens Axboe wrote:
>>> On 11/11/24 10:13 PM, Christoph Hellwig wrote:
>>>> On Mon, Nov 11, 2024 at 04:42:25PM -0700, Jens Axboe wrote:
>>>>> Here's the slightly cleaned up version, this is the one I ran testing
>>>>> with.
>>>>
>>>> Looks reasonable to me, but you probably get better reviews on the
>>>> fstests lists.
>>>
>>> I'll send it out once this patchset is a bit closer to integration,
>>> there's the usual chicken and egg situation with it. For now, it's quite
>>> handy for my testing, found a few issues with this version. So thanks
>>> for the suggestion, sure beats writing more of your own test cases :-)
>>>
>>
>> fsx support is probably a good idea as well. It's similar in idea to
>> fsstress, but bashes the same file with mixed operations and includes
>> data integrity validation checks as well. It's pretty useful for
>> uncovering subtle corner case issues or bad interactions..
> 
> Indeed, I did that too. Re-running xfstests right now with that too.

Here's what I'm running right now, fwiw. It adds RWF_UNCACHED support
for both the sync read/write and io_uring paths.


diff --git a/ltp/fsx.c b/ltp/fsx.c
index 41933354..104910ff 100644
--- a/ltp/fsx.c
+++ b/ltp/fsx.c
@@ -43,6 +43,10 @@
 # define MAP_FILE 0
 #endif
 
+#ifndef RWF_UNCACHED
+#define RWF_UNCACHED	0x80
+#endif
+
 #define NUMPRINTCOLUMNS 32	/* # columns of data to print on each line */
 
 /* Operation flags (bitmask) */
@@ -101,7 +105,9 @@ int			logcount = 0;	/* total ops */
 enum {
 	/* common operations */
 	OP_READ = 0,
+	OP_READ_UNCACHED,
 	OP_WRITE,
+	OP_WRITE_UNCACHED,
 	OP_MAPREAD,
 	OP_MAPWRITE,
 	OP_MAX_LITE,
@@ -190,15 +196,16 @@ int	o_direct;			/* -Z */
 int	aio = 0;
 int	uring = 0;
 int	mark_nr = 0;
+int	rwf_uncached = 1;
 
 int page_size;
 int page_mask;
 int mmap_mask;
-int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset);
+int fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags);
 #define READ 0
 #define WRITE 1
-#define fsxread(a,b,c,d)	fsx_rw(READ, a,b,c,d)
-#define fsxwrite(a,b,c,d)	fsx_rw(WRITE, a,b,c,d)
+#define fsxread(a,b,c,d,f)	fsx_rw(READ, a,b,c,d,f)
+#define fsxwrite(a,b,c,d,f)	fsx_rw(WRITE, a,b,c,d,f)
 
 struct timespec deadline;
 
@@ -266,7 +273,9 @@ prterr(const char *prefix)
 
 static const char *op_names[] = {
 	[OP_READ] = "read",
+	[OP_READ_UNCACHED] = "read_uncached",
 	[OP_WRITE] = "write",
+	[OP_WRITE_UNCACHED] = "write_uncached",
 	[OP_MAPREAD] = "mapread",
 	[OP_MAPWRITE] = "mapwrite",
 	[OP_TRUNCATE] = "truncate",
@@ -393,12 +402,14 @@ logdump(void)
 				prt("\t******WWWW");
 			break;
 		case OP_READ:
+		case OP_READ_UNCACHED:
 			prt("READ     0x%x thru 0x%x\t(0x%x bytes)",
 			    lp->args[0], lp->args[0] + lp->args[1] - 1,
 			    lp->args[1]);
 			if (overlap)
 				prt("\t***RRRR***");
 			break;
+		case OP_WRITE_UNCACHED:
 		case OP_WRITE:
 			prt("WRITE    0x%x thru 0x%x\t(0x%x bytes)",
 			    lp->args[0], lp->args[0] + lp->args[1] - 1,
@@ -784,9 +795,8 @@ doflush(unsigned offset, unsigned size)
 }
 
 void
-doread(unsigned offset, unsigned size)
+__doread(unsigned offset, unsigned size, int flags)
 {
-	off_t ret;
 	unsigned iret;
 
 	offset -= offset % readbdy;
@@ -818,23 +828,39 @@ doread(unsigned offset, unsigned size)
 			(monitorend == -1 || offset <= monitorend))))))
 		prt("%lld read\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
 		    offset, offset + size - 1, size);
-	ret = lseek(fd, (off_t)offset, SEEK_SET);
-	if (ret == (off_t)-1) {
-		prterr("doread: lseek");
-		report_failure(140);
-	}
-	iret = fsxread(fd, temp_buf, size, offset);
+	iret = fsxread(fd, temp_buf, size, offset, flags);
 	if (iret != size) {
-		if (iret == -1)
-			prterr("doread: read");
-		else
+		if (iret == -1) {
+			if (errno == EOPNOTSUPP && flags & RWF_UNCACHED) {
+				rwf_uncached = 1;
+				return;
+			}
+			prterr("dowrite: read");
+		} else {
 			prt("short read: 0x%x bytes instead of 0x%x\n",
 			    iret, size);
+		}
 		report_failure(141);
 	}
 	check_buffers(temp_buf, offset, size);
 }
+void
+doread(unsigned offset, unsigned size)
+{
+	__doread(offset, size, 0);
+}
 
+void
+doread_uncached(unsigned offset, unsigned size)
+{
+	if (rwf_uncached) {
+		__doread(offset, size, RWF_UNCACHED);
+		if (rwf_uncached)
+			return;
+	}
+	__doread(offset, size, 0);
+}
+	
 void
 check_eofpage(char *s, unsigned offset, char *p, int size)
 {
@@ -870,7 +896,6 @@ check_contents(void)
 	unsigned map_offset;
 	unsigned map_size;
 	char *p;
-	off_t ret;
 	unsigned iret;
 
 	if (!check_buf) {
@@ -885,13 +910,7 @@ check_contents(void)
 	if (size == 0)
 		return;
 
-	ret = lseek(fd, (off_t)offset, SEEK_SET);
-	if (ret == (off_t)-1) {
-		prterr("doread: lseek");
-		report_failure(140);
-	}
-
-	iret = fsxread(fd, check_buf, size, offset);
+	iret = fsxread(fd, check_buf, size, offset, 0);
 	if (iret != size) {
 		if (iret == -1)
 			prterr("check_contents: read");
@@ -1064,9 +1083,8 @@ update_file_size(unsigned offset, unsigned size)
 }
 
 void
-dowrite(unsigned offset, unsigned size)
+__dowrite(unsigned offset, unsigned size, int flags)
 {
-	off_t ret;
 	unsigned iret;
 
 	offset -= offset % writebdy;
@@ -1101,18 +1119,18 @@ dowrite(unsigned offset, unsigned size)
 			(monitorend == -1 || offset <= monitorend))))))
 		prt("%lld write\t0x%x thru\t0x%x\t(0x%x bytes)\n", testcalls,
 		    offset, offset + size - 1, size);
-	ret = lseek(fd, (off_t)offset, SEEK_SET);
-	if (ret == (off_t)-1) {
-		prterr("dowrite: lseek");
-		report_failure(150);
-	}
-	iret = fsxwrite(fd, good_buf + offset, size, offset);
+	iret = fsxwrite(fd, good_buf + offset, size, offset, flags);
 	if (iret != size) {
-		if (iret == -1)
+		if (iret == -1) {
+			if (errno == EOPNOTSUPP && flags & RWF_UNCACHED) {
+				rwf_uncached = 0;
+				return;
+			}
 			prterr("dowrite: write");
-		else
+		} else {
 			prt("short write: 0x%x bytes instead of 0x%x\n",
 			    iret, size);
+		}
 		report_failure(151);
 	}
 	if (do_fsync) {
@@ -1126,6 +1144,22 @@ dowrite(unsigned offset, unsigned size)
 	}
 }
 
+void
+dowrite(unsigned offset, unsigned size)
+{
+	__dowrite(offset, size, 0);
+}
+
+void
+dowrite_uncached(unsigned offset, unsigned size)
+{
+	if (rwf_uncached) {
+		__dowrite(offset, size, RWF_UNCACHED);
+		if (rwf_uncached)
+			return;
+	}
+	__dowrite(offset, size, 0);
+}
 
 void
 domapwrite(unsigned offset, unsigned size)
@@ -2340,11 +2374,21 @@ have_op:
 		doread(offset, size);
 		break;
 
+	case OP_READ_UNCACHED:
+		TRIM_OFF_LEN(offset, size, file_size);
+		doread_uncached(offset, size);
+		break;
+
 	case OP_WRITE:
 		TRIM_OFF_LEN(offset, size, maxfilelen);
 		dowrite(offset, size);
 		break;
 
+	case OP_WRITE_UNCACHED:
+		TRIM_OFF_LEN(offset, size, maxfilelen);
+		dowrite_uncached(offset, size);
+		break;
+
 	case OP_MAPREAD:
 		TRIM_OFF_LEN(offset, size, file_size);
 		domapread(offset, size);
@@ -2702,7 +2746,7 @@ uring_setup()
 }
 
 int
-uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
 {
 	struct io_uring_sqe     *sqe;
 	struct io_uring_cqe     *cqe;
@@ -2733,6 +2777,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
 		} else {
 			io_uring_prep_writev(sqe, fd, &iovec, 1, o);
 		}
+		sqe->rw_flags = flags;
 
 		ret = io_uring_submit_and_wait(&ring, 1);
 		if (ret != 1) {
@@ -2781,7 +2826,7 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
 }
 #else
 int
-uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
 {
 	fprintf(stderr, "io_rw: need IO_URING support!\n");
 	exit(111);
@@ -2789,19 +2834,21 @@ uring_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
 #endif
 
 int
-fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset)
+fsx_rw(int rw, int fd, char *buf, unsigned len, unsigned offset, int flags)
 {
 	int ret;
 
 	if (aio) {
 		ret = aio_rw(rw, fd, buf, len, offset);
 	} else if (uring) {
-		ret = uring_rw(rw, fd, buf, len, offset);
+		ret = uring_rw(rw, fd, buf, len, offset, flags);
 	} else {
+		struct iovec iov = { .iov_base = buf, .iov_len = len };
+
 		if (rw == READ)
-			ret = read(fd, buf, len);
+			ret = preadv2(fd, &iov, 1, offset, flags);
 		else
-			ret = write(fd, buf, len);
+			ret = pwritev2(fd, &iov, 1, offset, flags);
 	}
 	return ret;
 }


-- 
Jens Axboe

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ