linux-kernel - Re: linux-next: kernel BUG at mm/slub.c:1447!

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20151005191217.48008dc7.akpm@linux-foundation.org>
Date:	Mon, 5 Oct 2015 19:12:17 -0700
From:	Andrew Morton <akpm@...ux-foundation.org>
To:	Michal Hocko <mhocko@...nel.org>,
	Guenter Roeck <linux@...ck-us.net>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	Christoph Lameter <cl@...ux.com>,
	"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
	linux-mm@...ck.org, Dave Chinner <david@...morbit.com>
Subject: Re: linux-next: kernel BUG at mm/slub.c:1447!

On Mon, 5 Oct 2015 12:29:36 -0700 Andrew Morton <akpm@...ux-foundation.org> wrote:

> Maybe it would be better to add the gfp_t argument to the
> address_space_operations.  At a minimum, writepage(), readpage(),
> writepages(), readpages().  What a pickle.

I'm being dumb.  All we need to do is to add a new

	address_space_operations.readpage_gfp(..., gfp_t gfp)

etc.  That should be trivial.  Each fs type only has 2-4 instances of
address_space_operations so the overhead is miniscule.

As a background janitorial thing we can migrate filesystems over to the
new interface then remove address_space_operations.readpage() etc.  But
this *would* add overhead: more stack and more text for no gain.  So
perhaps we just leave things as they are.

That's so simple that I expect we can short-term fix this bug with that
approach.  umm, something like

--- a/fs/mpage.c~a
+++ a/fs/mpage.c
@@ -139,7 +139,8 @@ map_buffer_to_page(struct page *page, st
 static struct bio *
 do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 		sector_t *last_block_in_bio, struct buffer_head *map_bh,
-		unsigned long *first_logical_block, get_block_t get_block)
+		unsigned long *first_logical_block, get_block_t get_block,
+		gfp_t gfp)
 {
 	struct inode *inode = page->mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
@@ -278,7 +279,7 @@ alloc_new:
 		}
 		bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
 				min_t(int, nr_pages, BIO_MAX_PAGES),
-				GFP_KERNEL);
+				gfp);
 		if (bio == NULL)
 			goto confused;
 	}
@@ -310,7 +311,7 @@ confused:
 }
 
 /**
- * mpage_readpages - populate an address space with some pages & start reads against them
+ * mpage_readpages_gfp - populate an address space with some pages & start reads against them
  * @mapping: the address_space
  * @pages: The address of a list_head which contains the target pages.  These
  *   pages have their ->index populated and are otherwise uninitialised.
@@ -318,6 +319,7 @@ confused:
  *   issued in @pages->prev to @pages->next order.
  * @nr_pages: The number of pages at *@...es
  * @get_block: The filesystem's block mapper function.
+ * @gfp: memory allocation constraints
  *
  * This function walks the pages and the blocks within each page, building and
  * emitting large BIOs.
@@ -352,9 +354,8 @@ confused:
  *
  * This all causes the disk requests to be issued in the correct order.
  */
-int
-mpage_readpages(struct address_space *mapping, struct list_head *pages,
-				unsigned nr_pages, get_block_t get_block)
+int mpage_readpages_gfp(struct address_space *mapping, struct list_head *pages,
+			unsigned nr_pages, get_block_t get_block, gfp_t gfp)
 {
 	struct bio *bio = NULL;
 	unsigned page_idx;
@@ -370,12 +371,12 @@ mpage_readpages(struct address_space *ma
 		prefetchw(&page->flags);
 		list_del(&page->lru);
 		if (!add_to_page_cache_lru(page, mapping,
-					page->index, GFP_KERNEL)) {
+					page->index, gfp)) {
 			bio = do_mpage_readpage(bio, page,
 					nr_pages - page_idx,
 					&last_block_in_bio, &map_bh,
 					&first_logical_block,
-					get_block);
+					get_block, gfp);
 		}
 		page_cache_release(page);
 	}
@@ -384,6 +385,14 @@ mpage_readpages(struct address_space *ma
 		mpage_bio_submit(READ, bio);
 	return 0;
 }
+EXPORT_SYMBOL(mpage_readpages_gfp);
+
+int mpage_readpages(struct address_space *mapping, struct list_head *pages,
+			unsigned nr_pages, get_block_t get_block)
+{
+	return mpage_readpages_gfp(mapping, pages, nr_pages, get_block,
+				   GFP_KERNEL);
+}
 EXPORT_SYMBOL(mpage_readpages);
 
 /*
@@ -399,7 +408,7 @@ int mpage_readpage(struct page *page, ge
 	map_bh.b_state = 0;
 	map_bh.b_size = 0;
 	bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
-			&map_bh, &first_logical_block, get_block);
+			&map_bh, &first_logical_block, get_block, GFP_KERNEL);
 	if (bio)
 		mpage_bio_submit(READ, bio);
 	return 0;
diff -puN include/linux/fs.h~a include/linux/fs.h
diff -puN include/linux/mpage.h~a include/linux/mpage.h
--- a/include/linux/mpage.h~a
+++ a/include/linux/mpage.h
@@ -15,6 +15,8 @@ struct writeback_control;
 
 int mpage_readpages(struct address_space *mapping, struct list_head *pages,
 				unsigned nr_pages, get_block_t get_block);
+int mpage_readpages_gfp(struct address_space *mapping, struct list_head *pages,
+			unsigned nr_pages, get_block_t get_block, gfp_t gfp);
 int mpage_readpage(struct page *page, get_block_t get_block);
 int mpage_writepages(struct address_space *mapping,
 		struct writeback_control *wbc, get_block_t get_block);
diff -puN fs/xfs/xfs_aops.c~a fs/xfs/xfs_aops.c
--- a/fs/xfs/xfs_aops.c~a
+++ a/fs/xfs/xfs_aops.c
@@ -1908,13 +1908,14 @@ xfs_vm_readpage(
 }
 
 STATIC int
-xfs_vm_readpages(
+xfs_vm_readpages_gfp(
 	struct file		*unused,
 	struct address_space	*mapping,
 	struct list_head	*pages,
 	unsigned		nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
+	return mpage_readpages_gfp(mapping, pages, nr_pages, xfs_get_blocks,
+				   GFP_NOFS);
 }
 
 /*
@@ -1987,7 +1988,7 @@ xfs_vm_set_page_dirty(
 
 const struct address_space_operations xfs_address_space_operations = {
 	.readpage		= xfs_vm_readpage,
-	.readpages		= xfs_vm_readpages,
+	.readpages		= xfs_vm_readpages_gfp,
 	.writepage		= xfs_vm_writepage,
 	.writepages		= xfs_vm_writepages,
 	.set_page_dirty		= xfs_vm_set_page_dirty,
_

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/