lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190411210834.4105-13-jglisse@redhat.com>
Date:   Thu, 11 Apr 2019 17:08:31 -0400
From:   jglisse@...hat.com
To:     linux-kernel@...r.kernel.org
Cc:     Jérôme Glisse <jglisse@...hat.com>,
        linux-fsdevel@...r.kernel.org, linux-block@...r.kernel.org,
        linux-mm@...ck.org, John Hubbard <jhubbard@...dia.com>,
        Jan Kara <jack@...e.cz>,
        Dan Williams <dan.j.williams@...el.com>,
        Alexander Viro <viro@...iv.linux.org.uk>,
        Johannes Thumshirn <jthumshirn@...e.de>,
        Christoph Hellwig <hch@....de>, Jens Axboe <axboe@...nel.dk>,
        Ming Lei <ming.lei@...hat.com>,
        Dave Chinner <david@...morbit.com>,
        Jason Gunthorpe <jgg@...pe.ca>,
        Matthew Wilcox <willy@...radead.org>,
        Ernesto A . Fernández 
        <ernesto.mnd.fernandez@...il.com>, Jeff Moyer <jmoyer@...hat.com>
Subject: [PATCH v1 12/15] fs/direct-io: keep track of wether a page is coming from GUP or not

From: Jérôme Glisse <jglisse@...hat.com>

We want to keep track of how we got a reference on page when doing DIO,
ie wether the page was reference through GUP (get_user_page*) or not.
For that this patch rework the way page reference is taken and handed
over between DIO code and BIO. Instead of taking a reference for page
that have been successfuly added to a BIO we just steal the reference
we have when we lookup the page (either through GUP or for ZERO_PAGE).

So this patch keep track of wether the reference has been stolen by the
BIO or not. This avoids a bunch of get_page()/put_page() so this limit
the number of atomic operations.

Signed-off-by: Jérôme Glisse <jglisse@...hat.com>
Cc: linux-fsdevel@...r.kernel.org
Cc: linux-block@...r.kernel.org
Cc: linux-mm@...ck.org
Cc: John Hubbard <jhubbard@...dia.com>
Cc: Jan Kara <jack@...e.cz>
Cc: Dan Williams <dan.j.williams@...el.com>
Cc: Alexander Viro <viro@...iv.linux.org.uk>
Cc: Johannes Thumshirn <jthumshirn@...e.de>
Cc: Christoph Hellwig <hch@....de>
Cc: Jens Axboe <axboe@...nel.dk>
Cc: Ming Lei <ming.lei@...hat.com>
Cc: Dave Chinner <david@...morbit.com>
Cc: Jason Gunthorpe <jgg@...pe.ca>
Cc: Matthew Wilcox <willy@...radead.org>
Cc: Ernesto A. Fernández <ernesto.mnd.fernandez@...il.com>
Cc: Jeff Moyer <jmoyer@...hat.com>
---
 fs/direct-io.c | 82 ++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 60 insertions(+), 22 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index b8b5d8e31aeb..ef9fc7703a78 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -100,6 +100,7 @@ struct dio_submit {
 	unsigned cur_page_len;		/* Nr of bytes at cur_page_offset */
 	sector_t cur_page_block;	/* Where it starts */
 	loff_t cur_page_fs_offset;	/* Offset in file */
+	bool cur_page_from_gup;		/* Current page is coming from GUP */
 
 	struct iov_iter *iter;
 	/*
@@ -148,6 +149,8 @@ struct dio {
 		struct page *pages[DIO_PAGES];	/* page buffer */
 		struct work_struct complete_work;/* deferred AIO completion */
 	};
+
+	bool gup;			/* pages are coming from GUP */
 } ____cacheline_aligned_in_smp;
 
 static struct kmem_cache *dio_cache __read_mostly;
@@ -167,6 +170,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 {
 	ssize_t ret;
 
+	dio->gup = iov_iter_get_pages_use_gup(sdio->iter);
 	ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
 				&sdio->from);
 
@@ -181,6 +185,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 			dio->page_errors = ret;
 		get_page(page);
 		dio->pages[0] = page;
+		dio->gup = false;
 		sdio->head = 0;
 		sdio->tail = 1;
 		sdio->from = 0;
@@ -490,8 +495,12 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
  */
 static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
 {
-	while (sdio->head < sdio->tail)
-		put_page(dio->pages[sdio->head++]);
+	while (sdio->head < sdio->tail) {
+		if (dio->gup)
+			put_user_page(dio->pages[sdio->head++]);
+		else
+			put_page(dio->pages[sdio->head++]);
+	}
 }
 
 /*
@@ -760,15 +769,19 @@ static inline int dio_bio_add_page(struct dio_submit *sdio)
 {
 	int ret;
 
-	ret = bio_add_page(sdio->bio, sdio->cur_page,
-			sdio->cur_page_len, sdio->cur_page_offset, false);
+	/*
+	 * The bio is stealing the page reference and that is fine we can add a
+	 * page only once ie when dio_send_cur_page() is call and each call to
+	 * dio_send_cur_page() clear the cur_page (on success).
+	 */
+	ret = bio_add_page(sdio->bio, sdio->cur_page, sdio->cur_page_len,
+			 sdio->cur_page_offset, sdio->cur_page_from_gup);
 	if (ret == sdio->cur_page_len) {
 		/*
 		 * Decrement count only, if we are done with this page
 		 */
 		if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE)
 			sdio->pages_in_io--;
-		get_page(sdio->cur_page);
 		sdio->final_block_in_bio = sdio->cur_page_block +
 			(sdio->cur_page_len >> sdio->blkbits);
 		ret = 0;
@@ -828,9 +841,14 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
 		ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
 		if (ret == 0) {
 			ret = dio_bio_add_page(sdio);
+			if (!ret)
+				/* Clear the current page. */
+				sdio->cur_page = NULL;
 			BUG_ON(ret != 0);
 		}
-	}
+	} else
+		/* Clear the current page. */
+		sdio->cur_page = NULL;
 out:
 	return ret;
 }
@@ -855,7 +873,7 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
 static inline int
 submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 		    unsigned offset, unsigned len, sector_t blocknr,
-		    struct buffer_head *map_bh)
+		    struct buffer_head *map_bh, bool gup)
 {
 	int ret = 0;
 
@@ -882,14 +900,13 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 	 */
 	if (sdio->cur_page) {
 		ret = dio_send_cur_page(dio, sdio, map_bh);
-		put_page(sdio->cur_page);
-		sdio->cur_page = NULL;
 		if (ret)
 			return ret;
 	}
 
-	get_page(page);		/* It is in dio */
+	/* Steal page reference and GUP flag */
 	sdio->cur_page = page;
+	sdio->cur_page_from_gup = gup;
 	sdio->cur_page_offset = offset;
 	sdio->cur_page_len = len;
 	sdio->cur_page_block = blocknr;
@@ -903,8 +920,6 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
 		ret = dio_send_cur_page(dio, sdio, map_bh);
 		if (sdio->bio)
 			dio_bio_submit(dio, sdio);
-		put_page(sdio->cur_page);
-		sdio->cur_page = NULL;
 	}
 	return ret;
 }
@@ -946,13 +961,29 @@ static inline void dio_zero_block(struct dio *dio, struct dio_submit *sdio,
 	this_chunk_bytes = this_chunk_blocks << sdio->blkbits;
 
 	page = ZERO_PAGE(0);
+	get_page(page);
 	if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes,
-				sdio->next_block_for_io, map_bh))
+				sdio->next_block_for_io, map_bh, false)) {
+		put_page(page);
 		return;
+	}
 
 	sdio->next_block_for_io += this_chunk_blocks;
 }
 
+static inline void dio_put_page(const struct dio *dio, bool stolen,
+				struct page *page)
+{
+	/* If page reference was stolen then nothing to do. */
+	if (stolen)
+		return;
+
+	if (dio->gup)
+		put_user_page(page);
+	else
+		put_page(page);
+}
+
 /*
  * Walk the user pages, and the file, mapping blocks to disk and generating
  * a sequence of (page,offset,len,block) mappings.  These mappings are injected
@@ -977,6 +1008,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 	int ret = 0;
 
 	while (sdio->block_in_file < sdio->final_block_in_request) {
+		bool stolen = false;
 		struct page *page;
 		size_t from, to;
 
@@ -1003,7 +1035,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 
 				ret = get_more_blocks(dio, sdio, map_bh);
 				if (ret) {
-					put_page(page);
+					dio_put_page(dio, stolen, page);
 					goto out;
 				}
 				if (!buffer_mapped(map_bh))
@@ -1048,7 +1080,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 
 				/* AKPM: eargh, -ENOTBLK is a hack */
 				if (dio->op == REQ_OP_WRITE) {
-					put_page(page);
+					dio_put_page(dio, stolen, page);
 					return -ENOTBLK;
 				}
 
@@ -1061,7 +1093,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 				if (sdio->block_in_file >=
 						i_size_aligned >> blkbits) {
 					/* We hit eof */
-					put_page(page);
+					dio_put_page(dio, stolen, page);
 					goto out;
 				}
 				zero_user(page, from, 1 << blkbits);
@@ -1099,11 +1131,13 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 						  from,
 						  this_chunk_bytes,
 						  sdio->next_block_for_io,
-						  map_bh);
+						  map_bh, dio->gup);
 			if (ret) {
-				put_page(page);
+				dio_put_page(dio, stolen, page);
 				goto out;
-			}
+			} else
+				/* The page reference has been  stolen ... */
+				stolen = true;
 			sdio->next_block_for_io += this_chunk_blocks;
 
 			sdio->block_in_file += this_chunk_blocks;
@@ -1117,7 +1151,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 		}
 
 		/* Drop the ref which was taken in get_user_pages() */
-		put_page(page);
+		dio_put_page(dio, stolen, page);
 	}
 out:
 	return ret;
@@ -1356,8 +1390,12 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 		ret2 = dio_send_cur_page(dio, &sdio, &map_bh);
 		if (retval == 0)
 			retval = ret2;
-		put_page(sdio.cur_page);
-		sdio.cur_page = NULL;
+		else {
+			if (sdio.cur_page_from_gup)
+				put_user_page(sdio.cur_page);
+			else
+				put_page(sdio.cur_page);
+		}
 	}
 	if (sdio.bio)
 		dio_bio_submit(dio, &sdio);
-- 
2.20.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ