lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20150923225900.64293d4c2c534f00bfa60435@gmail.com>
Date:	Wed, 23 Sep 2015 22:59:00 +0200
From:	Vitaly Wool <vitalywool@...il.com>
To:	Dan Streetman <ddstreet@...e.org>
Cc:	Seth Jennings <sjennings@...iantweb.net>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Minchan Kim <minchan@...nel.org>,
	Sergey Senozhatsky <sergey.senozhatsky@...il.com>,
	linux-kernel <linux-kernel@...r.kernel.org>,
	Linux-MM <linux-mm@...ck.org>
Subject: Re: [PATCH v2] zbud: allow up to PAGE_SIZE allocations

Okay, how about this? It's gotten smaller BTW :)

zbud: allow up to PAGE_SIZE allocations

Currently zbud is only capable of allocating not more than
PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE. This is okay as
long as only zswap is using it, but other users of zbud may
(and likely will) want to allocate up to PAGE_SIZE. This patch
addresses that by skipping the creation of zbud internal
structure in the beginning of an allocated page. As a zbud page
is no longer guaranteed to contain zbud header, the following
changes have to be applied throughout the code:
* page->lru to be used for zbud page lists
* page->private to hold 'under_reclaim' flag

page->private will also be used to indicate if this page contains
a zbud header in the beginning or not ('headless' flag).

Signed-off-by: Vitaly Wool <vitalywool@...il.com>
---
 mm/zbud.c | 167 ++++++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 113 insertions(+), 54 deletions(-)

diff --git a/mm/zbud.c b/mm/zbud.c
index fa48bcdf..3946fba 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -105,18 +105,20 @@ struct zbud_pool {
 
 /*
  * struct zbud_header - zbud page metadata occupying the first chunk of each
- *			zbud page.
+ *			zbud page, except for HEADLESS pages
  * @buddy:	links the zbud page into the unbuddied/buddied lists in the pool
- * @lru:	links the zbud page into the lru list in the pool
  * @first_chunks:	the size of the first buddy in chunks, 0 if free
  * @last_chunks:	the size of the last buddy in chunks, 0 if free
  */
 struct zbud_header {
 	struct list_head buddy;
-	struct list_head lru;
 	unsigned int first_chunks;
 	unsigned int last_chunks;
-	bool under_reclaim;
+};
+
+enum zbud_page_flags {
+	UNDER_RECLAIM = 0,
+	PAGE_HEADLESS,
 };
 
 /*****************
@@ -221,6 +223,7 @@ MODULE_ALIAS("zpool-zbud");
 *****************/
 /* Just to make the code easier to read */
 enum buddy {
+	HEADLESS,
 	FIRST,
 	LAST
 };
@@ -238,11 +241,14 @@ static int size_to_chunks(size_t size)
 static struct zbud_header *init_zbud_page(struct page *page)
 {
 	struct zbud_header *zhdr = page_address(page);
+
+	INIT_LIST_HEAD(&page->lru);
+	clear_bit(UNDER_RECLAIM, &page->private);
+	clear_bit(HEADLESS, &page->private);
+
 	zhdr->first_chunks = 0;
 	zhdr->last_chunks = 0;
 	INIT_LIST_HEAD(&zhdr->buddy);
-	INIT_LIST_HEAD(&zhdr->lru);
-	zhdr->under_reclaim = 0;
 	return zhdr;
 }
 
@@ -267,11 +273,22 @@ static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud)
 	 * over the zbud header in the first chunk.
 	 */
 	handle = (unsigned long)zhdr;
-	if (bud == FIRST)
+	switch (bud) {
+	case FIRST:
 		/* skip over zbud header */
 		handle += ZHDR_SIZE_ALIGNED;
-	else /* bud == LAST */
+		break;
+	case LAST:
 		handle += PAGE_SIZE - (zhdr->last_chunks  << CHUNK_SHIFT);
+		break;
+	case HEADLESS:
+		break;
+	default:
+		/* this should never happen */
+		pr_err("zbud: invalid buddy value %d\n", bud);
+		handle = 0;
+		break;
+	}
 	return handle;
 }
 
@@ -287,6 +304,7 @@ static int num_free_chunks(struct zbud_header *zhdr)
 	/*
 	 * Rather than branch for different situations, just use the fact that
 	 * free buddies have a length of zero to simplify everything.
+	 * NB: can't be used with HEADLESS pages.
 	 */
 	return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
 }
@@ -353,31 +371,39 @@ void zbud_destroy_pool(struct zbud_pool *pool)
 int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
 			unsigned long *handle)
 {
-	int chunks, i, freechunks;
+	int chunks = 0, i, freechunks;
 	struct zbud_header *zhdr = NULL;
 	enum buddy bud;
 	struct page *page;
 
 	if (!size || (gfp & __GFP_HIGHMEM))
 		return -EINVAL;
-	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
+
+	if (size > PAGE_SIZE)
 		return -ENOSPC;
-	chunks = size_to_chunks(size);
-	spin_lock(&pool->lock);
 
-	/* First, try to find an unbuddied zbud page. */
-	zhdr = NULL;
-	for_each_unbuddied_list(i, chunks) {
-		if (!list_empty(&pool->unbuddied[i])) {
-			zhdr = list_first_entry(&pool->unbuddied[i],
-					struct zbud_header, buddy);
-			list_del(&zhdr->buddy);
-			if (zhdr->first_chunks == 0)
-				bud = FIRST;
-			else
-				bud = LAST;
-			goto found;
+	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
+		bud = HEADLESS;
+	else {
+		chunks = size_to_chunks(size);
+		spin_lock(&pool->lock);
+
+		/* First, try to find an unbuddied zbud page. */
+		zhdr = NULL;
+		for_each_unbuddied_list(i, chunks) {
+			if (!list_empty(&pool->unbuddied[i])) {
+				zhdr = list_first_entry(&pool->unbuddied[i],
+						struct zbud_header, buddy);
+				list_del(&zhdr->buddy);
+				page = virt_to_page(zhdr);
+				if (zhdr->first_chunks == 0)
+					bud = FIRST;
+				else
+					bud = LAST;
+				goto found;
+			}
 		}
+		bud = FIRST;
 	}
 
 	/* Couldn't find unbuddied zbud page, create new one */
@@ -388,7 +414,11 @@ int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
 	spin_lock(&pool->lock);
 	pool->pages_nr++;
 	zhdr = init_zbud_page(page);
-	bud = FIRST;
+
+	if (bud == HEADLESS) {	
+		set_bit(PAGE_HEADLESS, &page->private);
+		goto headless;
+	}
 
 found:
 	if (bud == FIRST)
@@ -405,10 +435,12 @@ found:
 		list_add(&zhdr->buddy, &pool->buddied);
 	}
 
+headless:
 	/* Add/move zbud page to beginning of LRU */
-	if (!list_empty(&zhdr->lru))
-		list_del(&zhdr->lru);
-	list_add(&zhdr->lru, &pool->lru);
+	if (!list_empty(&page->lru))
+		list_del(&page->lru);
+
+	list_add(&page->lru, &pool->lru);
 
 	*handle = encode_handle(zhdr, bud);
 	spin_unlock(&pool->lock);
@@ -430,28 +462,39 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
 {
 	struct zbud_header *zhdr;
 	int freechunks;
+	struct page *page;
+	enum buddy bud;
 
 	spin_lock(&pool->lock);
 	zhdr = handle_to_zbud_header(handle);
+	page = virt_to_page(zhdr);
 
-	/* If first buddy, handle will be page aligned */
-	if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK)
+	if (!(handle & ~PAGE_MASK)) /* HEADLESS page stored */
+		bud = HEADLESS;
+	else if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK) {
+		bud = LAST;
 		zhdr->last_chunks = 0;
-	else
+	} else {
+		/* If first buddy, handle will be page aligned */
+		bud = FIRST;
 		zhdr->first_chunks = 0;
+	}
 
-	if (zhdr->under_reclaim) {
+	if (test_bit(UNDER_RECLAIM, &page->private)) {
 		/* zbud page is under reclaim, reclaim will free */
 		spin_unlock(&pool->lock);
 		return;
 	}
 
-	/* Remove from existing buddy list */
-	list_del(&zhdr->buddy);
+	if (bud != HEADLESS) {
+		/* Remove from existing buddy list */
+		list_del(&zhdr->buddy);
+	}
 
-	if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
+	if (bud == HEADLESS ||
+	    (zhdr->first_chunks == 0 && zhdr->last_chunks == 0)) {
 		/* zbud page is empty, free */
-		list_del(&zhdr->lru);
+		list_del(&page->lru);
 		free_zbud_page(zhdr);
 		pool->pages_nr--;
 	} else {
@@ -503,8 +546,9 @@ void zbud_free(struct zbud_pool *pool, unsigned long handle)
  */
 int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
 {
-	int i, ret, freechunks;
+	int i, ret = 0, freechunks;
 	struct zbud_header *zhdr;
+	struct page *page;
 	unsigned long first_handle = 0, last_handle = 0;
 
 	spin_lock(&pool->lock);
@@ -514,21 +558,30 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
 		return -EINVAL;
 	}
 	for (i = 0; i < retries; i++) {
-		zhdr = list_tail_entry(&pool->lru, struct zbud_header, lru);
-		list_del(&zhdr->lru);
-		list_del(&zhdr->buddy);
+		page = list_tail_entry(&pool->lru, struct page, lru);
+		list_del(&page->lru);
+
 		/* Protect zbud page against free */
-		zhdr->under_reclaim = true;
-		/*
-		 * We need encode the handles before unlocking, since we can
-		 * race with free that will set (first|last)_chunks to 0
-		 */
-		first_handle = 0;
-		last_handle = 0;
-		if (zhdr->first_chunks)
-			first_handle = encode_handle(zhdr, FIRST);
-		if (zhdr->last_chunks)
-			last_handle = encode_handle(zhdr, LAST);
+		set_bit(UNDER_RECLAIM, &page->private);
+		zhdr = page_address(page);
+		if (!test_bit(PAGE_HEADLESS, &page->private)) {
+			list_del(&zhdr->buddy);
+			/*
+			 * We need encode the handles before unlocking, since
+			 * we can race with free that will set
+			 * (first|last)_chunks to 0
+			 */
+			first_handle = 0;
+			last_handle = 0;
+			if (zhdr->first_chunks)
+				first_handle = encode_handle(zhdr, FIRST);
+			if (zhdr->last_chunks)
+				last_handle = encode_handle(zhdr, LAST);
+		} else {
+			first_handle = encode_handle(zhdr, HEADLESS);
+			last_handle = 0;
+		}
+
 		spin_unlock(&pool->lock);
 
 		/* Issue the eviction callback(s) */
@@ -544,8 +597,14 @@ int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
 		}
 next:
 		spin_lock(&pool->lock);
-		zhdr->under_reclaim = false;
-		if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
+		clear_bit(UNDER_RECLAIM, &page->private);
+		if (test_bit(PAGE_HEADLESS, &page->private)) {
+			if (ret == 0) {
+				free_zbud_page(zhdr);
+				pool->pages_nr--;
+				spin_unlock(&pool->lock);
+			}
+		} else if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
 			/*
 			 * Both buddies are now free, free the zbud page and
 			 * return success.
@@ -565,7 +624,7 @@ next:
 		}
 
 		/* add to beginning of LRU */
-		list_add(&zhdr->lru, &pool->lru);
+		list_add(&page->lru, &pool->lru);
 	}
 	spin_unlock(&pool->lock);
 	return -EAGAIN;
-- 
2.4.2
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ