lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 22 Sep 2014 09:03:09 +0900
From:	Minchan Kim <minchan@...nel.org>
To:	Andrew Morton <akpm@...ux-foundation.org>
Cc:	linux-kernel@...r.kernel.org, linux-mm@...ck.org,
	Hugh Dickins <hughd@...gle.com>, Shaohua Li <shli@...nel.org>,
	Jerome Marchand <jmarchan@...hat.com>,
	Sergey Senozhatsky <sergey.senozhatsky@...il.com>,
	Dan Streetman <ddstreet@...e.org>,
	Nitin Gupta <ngupta@...are.org>,
	Luigi Semenzato <semenzato@...gle.com>, juno.choi@....com,
	Minchan Kim <minchan@...nel.org>
Subject: [PATCH v1 3/5] mm: VM can be aware of zram fullness

VM uses nr_swap_pages to throttle amount of swap when it reclaims
anonymous pages because the nr_swap_pages means freeable space
of swap disk.

However, it's a problem for zram because zram can limit memory
usage by knob(ie, mem_limit) so that swap out can fail although
VM can see lots of free space from zram disk but no more free
space in zram by the limit. If it happens, VM should notice it
and stop reclaimaing until zram can obtain more free space but
we don't have a way to communicate between VM and zram.

This patch adds new hint SWAP_FULL so that zram can say to VM
"I'm full" from now on. Then VM cannot reclaim annoymous page
any more. If VM notice swap is full, it can remove swap_info_struct
from swap_avail_head and substract remained freeable space from
nr_swap_pages so that VM can think swap is full until VM frees a
swap and increase nr_swap_pages again.

Signed-off-by: Minchan Kim <minchan@...nel.org>
---
 include/linux/blkdev.h |  1 +
 mm/swapfile.c          | 44 ++++++++++++++++++++++++++++++++++++++------
 2 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c7220409456c..39f074e0acd7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1611,6 +1611,7 @@ static inline bool blk_integrity_is_initialized(struct gendisk *g)
 
 enum swap_blk_hint {
 	SWAP_FREE,
+	SWAP_FULL,
 };
 
 struct block_device_operations {
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 209112cf8b83..71e3df0431b6 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -493,6 +493,29 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
 	int latency_ration = LATENCY_LIMIT;
 
 	/*
+	 * If zram is full, we don't need to scan and want to stop swap.
+	 * For it, we removes si from swap_avail_head and decreases
+	 * nr_swap_pages to prevent further anonymous reclaim so that
+	 * VM can restart swap out if zram has a free space.
+	 * Look at swap_entry_free.
+	 */
+	if (si->flags & SWP_BLKDEV) {
+		struct gendisk *disk = si->bdev->bd_disk;
+
+		if (disk->fops->swap_hint && disk->fops->swap_hint(
+				si->bdev, SWAP_FULL, NULL)) {
+			spin_lock(&swap_avail_lock);
+			WARN_ON(plist_node_empty(&si->avail_list));
+			plist_del(&si->avail_list, &swap_avail_head);
+			spin_unlock(&swap_avail_lock);
+			atomic_long_sub(si->pages - si->inuse_pages,
+						&nr_swap_pages);
+			si->full = true;
+			return 0;
+		}
+	}
+
+	/*
 	 * We try to cluster swap pages by allocating them sequentially
 	 * in swap.  Once we've allocated SWAPFILE_CLUSTER pages this
 	 * way, however, we resort to first-free allocation, starting
@@ -798,6 +821,14 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
 	/* free if no reference */
 	if (!usage) {
 		bool was_full;
+		struct gendisk *virt_swap = NULL;
+
+		/* Check virtual swap */
+		if (p->flags & SWP_BLKDEV) {
+			virt_swap = p->bdev->bd_disk;
+			if (!virt_swap->fops->swap_hint)
+				virt_swap = NULL;
+		}
 
 		dec_cluster_info_page(p, p->cluster_info, offset);
 		if (offset < p->lowest_bit)
@@ -814,17 +845,18 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
 					  &swap_avail_head);
 			spin_unlock(&swap_avail_lock);
 			p->full = false;
+			if (virt_swap)
+				atomic_long_add(p->pages -
+						p->inuse_pages,
+						&nr_swap_pages);
 		}
 
 		atomic_long_inc(&nr_swap_pages);
 		p->inuse_pages--;
 		frontswap_invalidate_page(p->type, offset);
-		if (p->flags & SWP_BLKDEV) {
-			struct gendisk *disk = p->bdev->bd_disk;
-			if (disk->fops->swap_hint)
-				disk->fops->swap_hint(p->bdev,
-						SWAP_FREE, (void *)offset);
-		}
+		if (virt_swap)
+			virt_swap->fops->swap_hint(p->bdev,
+					SWAP_FREE, (void *)offset);
 	}
 
 	return usage;
-- 
2.0.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ