lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Fri, 16 Jul 2010 18:07:48 +0530
From:	Nitin Gupta <ngupta@...are.org>
To:	Pekka Enberg <penberg@...helsinki.fi>,
	Hugh Dickins <hugh.dickins@...cali.co.uk>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Greg KH <greg@...ah.com>,
	Dan Magenheimer <dan.magenheimer@...cle.com>,
	Rik van Riel <riel@...hat.com>, Avi Kivity <avi@...hat.com>,
	Christoph Hellwig <hch@...radead.org>,
	Minchan Kim <minchan.kim@...il.com>,
	Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>
Cc:	linux-mm <linux-mm@...ck.org>,
	linux-kernel <linux-kernel@...r.kernel.org>
Subject: [PATCH 6/8] Compress pages using LZO

Pages are now compressed using LZO compression algorithm
and a new statistic is exported through sysfs:

/sys/kernel/mm/zcache/pool<id>/compr_data_size

This gives compressed size of pages stored. So, we can
get compression ratio using this and the orig_data_size
statistic which is already exported.

We only keep pages which compress to less than or equal to
PAGE_SIZE/2. However, we still allocate full pages to store
compressed chunks.

Another change is to enforce memlimit again compressed
size instead of pages_stored (uncompressed size).

Signed-off-by: Nitin Gupta <ngupta@...are.org>
---
 drivers/staging/zram/zcache_drv.c |  254 +++++++++++++++++++++++++-----------
 drivers/staging/zram/zcache_drv.h |    7 +-
 2 files changed, 181 insertions(+), 80 deletions(-)

diff --git a/drivers/staging/zram/zcache_drv.c b/drivers/staging/zram/zcache_drv.c
index 3ea45a6..2a02606 100644
--- a/drivers/staging/zram/zcache_drv.c
+++ b/drivers/staging/zram/zcache_drv.c
@@ -40,13 +40,18 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/cleancache.h>
+#include <linux/cpu.h>
 #include <linux/highmem.h>
+#include <linux/lzo.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/u64_stats_sync.h>
 
 #include "zcache_drv.h"
 
+static DEFINE_PER_CPU(unsigned char *, compress_buffer);
+static DEFINE_PER_CPU(unsigned char *, compress_workmem);
+
 /*
  * For zero-filled pages, we directly insert 'index' value
  * in corresponding radix node. These defines make sure we
@@ -96,7 +101,6 @@ static void zcache_add_stat(struct zcache_pool *zpool,
 	stats->count[idx] += val;
 	u64_stats_update_end(&stats->syncp);
 	preempt_enable();
-
 }
 
 static void zcache_inc_stat(struct zcache_pool *zpool,
@@ -442,11 +446,20 @@ static void *zcache_index_to_ptr(unsigned long index)
 }
 
 /*
- * Returns index value encoded in the given radix node pointer.
+ * Radix node contains "pointer" value which encode <page, offset>
+ * pair, locating the compressed object. Header of the object then
+ * contains corresponding 'index' value.
  */
-static unsigned long zcache_ptr_to_index(void *ptr)
+static unsigned long zcache_ptr_to_index(struct page *page)
 {
-	return (unsigned long)(ptr) >> ZCACHE_ZERO_PAGE_INDEX_SHIFT;
+	unsigned long index;
+
+	if (zcache_is_zero_page(page))
+		index = (unsigned long)(page) >> ZCACHE_ZERO_PAGE_INDEX_SHIFT;
+	else
+		index = page->index;
+
+	return index;
 }
 
 void zcache_free_page(struct zcache_pool *zpool, struct page *page)
@@ -457,8 +470,12 @@ void zcache_free_page(struct zcache_pool *zpool, struct page *page)
 		return;
 
 	is_zero = zcache_is_zero_page(page);
-	if (!is_zero)
+	if (!is_zero) {
+		int clen = page->private;
+
+		zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE, -clen);
 		__free_page(page);
+	}
 
 	zcache_dec_pages(zpool, is_zero);
 }
@@ -474,9 +491,12 @@ static int zcache_store_page(struct zcache_inode_rb *znode,
 			pgoff_t index, struct page *page, int is_zero)
 {
 	int ret;
+	size_t clen;
 	unsigned long flags;
 	struct page *zpage;
-	void *src_data, *dest_data;
+	unsigned char *zbuffer, *zworkmem;
+	unsigned char *src_data, *dest_data;
+	struct zcache_pool *zpool = znode->pool;
 
 	if (is_zero) {
 		zpage = zcache_index_to_ptr(index);
@@ -488,13 +508,33 @@ static int zcache_store_page(struct zcache_inode_rb *znode,
 		ret = -ENOMEM;
 		goto out;
 	}
-	zpage->index = index;
+
+	preempt_disable();
+	zbuffer = __get_cpu_var(compress_buffer);
+	zworkmem = __get_cpu_var(compress_workmem);
+	if (unlikely(!zbuffer || !zworkmem)) {
+		ret = -EFAULT;
+		preempt_enable();
+		goto out;
+	}
 
 	src_data = kmap_atomic(page, KM_USER0);
-	dest_data = kmap_atomic(zpage, KM_USER1);
-	memcpy(dest_data, src_data, PAGE_SIZE);
+	ret = lzo1x_1_compress(src_data, PAGE_SIZE, zbuffer, &clen, zworkmem);
 	kunmap_atomic(src_data, KM_USER0);
-	kunmap_atomic(dest_data, KM_USER1);
+
+	if (unlikely(ret != LZO_E_OK) || clen > zcache_max_page_size) {
+		ret = -EINVAL;
+		preempt_enable();
+		goto out;
+	}
+
+	dest_data = kmap_atomic(zpage, KM_USER0);
+	memcpy(dest_data, zbuffer, clen);
+	kunmap_atomic(dest_data, KM_USER0);
+	preempt_enable();
+
+	zpage->index = index;
+	zpage->private = clen;
 
 out_store:
 	spin_lock_irqsave(&znode->tree_lock, flags);
@@ -505,11 +545,19 @@ out_store:
 			__free_page(zpage);
 		goto out;
 	}
-	if (!is_zero)
+	if (is_zero) {
+		zcache_inc_stat(zpool, ZPOOL_STAT_PAGES_ZERO);
+	} else {
+		int delta = zcache_max_page_size - clen;
+		zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE, -delta);
+		zcache_inc_stat(zpool, ZPOOL_STAT_PAGES_STORED);
 		radix_tree_tag_set(&znode->page_tree, index,
 				ZCACHE_TAG_NONZERO_PAGE);
+	}
 	spin_unlock_irqrestore(&znode->tree_lock, flags);
 
+	ret = 0; /* success */
+
 out:
 	return ret;
 }
@@ -525,42 +573,6 @@ out:
  */
 #define FREE_BATCH 16
 static void zcache_free_inode_pages(struct zcache_inode_rb *znode,
-				u32 pages_to_free)
-{
-	int count;
-	unsigned long index = 0;
-	struct zcache_pool *zpool = znode->pool;
-
-	do {
-		int i;
-		struct page *pages[FREE_BATCH];
-
-		count = radix_tree_gang_lookup(&znode->page_tree,
-					(void **)pages, index, FREE_BATCH);
-		if (count > pages_to_free)
-			count = pages_to_free;
-
-		for (i = 0; i < count; i++) {
-			if (zcache_is_zero_page(pages[i]))
-				index = zcache_ptr_to_index(pages[i]);
-			else
-				index = pages[i]->index;
-			radix_tree_delete(&znode->page_tree, index);
-			zcache_free_page(zpool, pages[i]);
-		}
-
-		index++;
-		pages_to_free -= count;
-	} while (pages_to_free && (count == FREE_BATCH));
-}
-
-/*
- * Same as the previous function except that we only look for
- * pages with the given tag set.
- *
- * Called under zcache_inode_rb->tree_lock
- */
-static void zcache_free_inode_pages_tag(struct zcache_inode_rb *znode,
 				u32 pages_to_free, enum zcache_tag tag)
 {
 	int count;
@@ -569,17 +581,26 @@ static void zcache_free_inode_pages_tag(struct zcache_inode_rb *znode,
 
 	do {
 		int i;
-		struct page *pages[FREE_BATCH];
+		void *objs[FREE_BATCH];
+
+		if (tag == ZCACHE_TAG_INVALID)
+			count = radix_tree_gang_lookup(&znode->page_tree,
+					objs, index, FREE_BATCH);
+		else
+			count = radix_tree_gang_lookup_tag(&znode->page_tree,
+					objs, index, FREE_BATCH, tag);
 
-		count = radix_tree_gang_lookup_tag(&znode->page_tree,
-				(void **)pages, index, FREE_BATCH, tag);
 		if (count > pages_to_free)
 			count = pages_to_free;
 
 		for (i = 0; i < count; i++) {
-			index = pages[i]->index;
-			zcache_free_page(zpool, pages[i]);
-			radix_tree_delete(&znode->page_tree, index);
+			void *obj;
+			unsigned long index;
+
+			index = zcache_ptr_to_index(objs[i]);
+			obj = radix_tree_delete(&znode->page_tree, index);
+			BUG_ON(obj != objs[i]);
+			zcache_free_page(zpool, obj);
 		}
 
 		index++;
@@ -663,7 +684,7 @@ static void zcache_shrink_pool(struct zcache_pool *zpool)
 
 		spin_lock(&znode->tree_lock);
 		/* Free 'pages_to_free' non-zero pages in the current node */
-		zcache_free_inode_pages_tag(znode, pages_to_free,
+		zcache_free_inode_pages(znode, pages_to_free,
 					ZCACHE_TAG_NONZERO_PAGE);
 		if (zcache_inode_is_empty(znode))
 			zcache_inode_isolate(znode);
@@ -721,6 +742,16 @@ static ssize_t orig_data_size_show(struct kobject *kobj,
 }
 ZCACHE_POOL_ATTR_RO(orig_data_size);
 
+static ssize_t compr_data_size_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	struct zcache_pool *zpool = zcache_kobj_to_pool(kobj);
+
+	return sprintf(buf, "%llu\n", zcache_get_stat(
+			zpool, ZPOOL_STAT_COMPR_SIZE));
+}
+ZCACHE_POOL_ATTR_RO(compr_data_size);
+
 static void memlimit_sysfs_common(struct kobject *kobj, u64 *value, int store)
 {
 	struct zcache_pool *zpool = zcache_kobj_to_pool(kobj);
@@ -763,6 +794,7 @@ ZCACHE_POOL_ATTR(memlimit);
 static struct attribute *zcache_pool_attrs[] = {
 	&zero_pages_attr.attr,
 	&orig_data_size_attr.attr,
+	&compr_data_size_attr.attr,
 	&memlimit_attr.attr,
 	NULL,
 };
@@ -867,21 +899,25 @@ static int zcache_init_shared_fs(char *uuid, size_t pagesize)
  * If found, copies it to the given output page 'page' and frees
  * zcache copy of the same.
  *
- * Returns 0 if requested page found, -1 otherwise.
+ * Returns 0 on success, negative error code on failure.
  */
 static int zcache_get_page(int pool_id, ino_t inode_no,
 			pgoff_t index, struct page *page)
 {
 	int ret = -1;
+	size_t clen;
 	unsigned long flags;
 	struct page *src_page;
-	void *src_data, *dest_data;
+	unsigned char *src_data, *dest_data;
+
 	struct zcache_inode_rb *znode;
 	struct zcache_pool *zpool = zcache->pools[pool_id];
 
 	znode = zcache_find_inode(zpool, inode_no);
-	if (!znode)
+	if (!znode) {
+		ret = -EFAULT;
 		goto out;
+	}
 
 	BUG_ON(znode->inode_no != inode_no);
 
@@ -893,20 +929,30 @@ static int zcache_get_page(int pool_id, ino_t inode_no,
 
 	kref_put(&znode->refcount, zcache_inode_release);
 
-	if (!src_page)
+	if (!src_page) {
+		ret = -EFAULT;
 		goto out;
+	}
 
 	if (zcache_is_zero_page(src_page)) {
 		zcache_handle_zero_page(page);
 		goto out_free;
 	}
 
+	clen = PAGE_SIZE;
 	src_data = kmap_atomic(src_page, KM_USER0);
 	dest_data = kmap_atomic(page, KM_USER1);
-	memcpy(dest_data, src_data, PAGE_SIZE);
+
+	ret = lzo1x_decompress_safe(src_data, src_page->private,
+				dest_data, &clen);
+
 	kunmap_atomic(src_data, KM_USER0);
 	kunmap_atomic(dest_data, KM_USER1);
 
+	/* Failure here means bug in LZO! */
+	if (unlikely(ret != LZO_E_OK))
+		goto out_free;
+
 	flush_dcache_page(page);
 
 out_free:
@@ -942,18 +988,16 @@ static void zcache_put_page(int pool_id, ino_t inode_no,
 	void *src_data = kmap_atomic(page, KM_USER0);
 	is_zero = zcache_page_zero_filled(src_data);
 	kunmap_atomic(src_data, KM_USER0);
-	if (is_zero) {
-		zcache_inc_stat(zpool, ZPOOL_STAT_PAGES_ZERO);
+	if (is_zero)
 		goto out_find_store;
-	}
 
 	/*
-	 * Incrementing local pages_stored before summing it from
+	 * Incrementing local compr_size before summing it from
 	 * all CPUs makes sure we do not end up storing pages in
 	 * excess of memlimit. In case of failure, we revert back
 	 * this local increment.
 	 */
-	zcache_inc_stat(zpool, ZPOOL_STAT_PAGES_STORED);
+	zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE, zcache_max_page_size);
 
 	/*
 	 * memlimit can be changed any time by user using sysfs. If
@@ -961,9 +1005,10 @@ static void zcache_put_page(int pool_id, ino_t inode_no,
 	 * stored, then excess pages are freed synchronously when this
 	 * sysfs event occurs.
 	 */
-	if (zcache_get_stat(zpool, ZPOOL_STAT_PAGES_STORED) >
-			zcache_get_memlimit(zpool) >> PAGE_SHIFT) {
-		zcache_dec_stat(zpool, ZPOOL_STAT_PAGES_STORED);
+	if (zcache_get_stat(zpool, ZPOOL_STAT_COMPR_SIZE) >
+			zcache_get_memlimit(zpool)) {
+		zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE,
+				-zcache_max_page_size);
 		return;
 	}
 
@@ -972,7 +1017,8 @@ out_find_store:
 	if (!znode) {
 		znode = zcache_inode_create(pool_id, inode_no);
 		if (unlikely(!znode)) {
-			zcache_dec_pages(zpool, is_zero);
+			zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE,
+					-zcache_max_page_size);
 			return;
 		}
 	}
@@ -985,9 +1031,9 @@ out_find_store:
 		zcache_free_page(zpool, zpage);
 
 	ret = zcache_store_page(znode, index, page, is_zero);
-	if (ret) {	/* failure */
-		zcache_dec_pages(zpool, is_zero);
-
+	if (unlikely(ret)) {
+		zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE,
+				-zcache_max_page_size);
 		/*
 		 * Its possible that racing zcache get/flush could not
 		 * isolate this node since we held a reference to it.
@@ -1046,7 +1092,7 @@ static void zcache_flush_inode(int pool_id, ino_t inode_no)
 		return;
 
 	spin_lock_irqsave(&znode->tree_lock, flags);
-	zcache_free_inode_pages(znode, UINT_MAX);
+	zcache_free_inode_pages(znode, UINT_MAX, ZCACHE_TAG_INVALID);
 	if (zcache_inode_is_empty(znode))
 		zcache_inode_isolate(znode);
 	spin_unlock_irqrestore(&znode->tree_lock, flags);
@@ -1080,7 +1126,7 @@ static void zcache_flush_fs(int pool_id)
 	while (node) {
 		znode = rb_entry(node, struct zcache_inode_rb, rb_node);
 		node = rb_next(node);
-		zcache_free_inode_pages(znode, UINT_MAX);
+		zcache_free_inode_pages(znode, UINT_MAX, ZCACHE_TAG_INVALID);
 		rb_erase(&znode->rb_node, &zpool->inode_tree);
 		kfree(znode);
 	}
@@ -1088,8 +1134,47 @@ static void zcache_flush_fs(int pool_id)
 	zcache_destroy_pool(zpool);
 }
 
+/*
+ * Callback for CPU hotplug events. Allocates percpu compression buffers.
+ */
+static int zcache_cpu_notify(struct notifier_block *nb, unsigned long action,
+			void *pcpu)
+{
+	int cpu = (long)pcpu;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		per_cpu(compress_buffer, cpu) = (void *)__get_free_pages(
+					GFP_KERNEL | __GFP_ZERO, 1);
+		per_cpu(compress_workmem, cpu) = kzalloc(
+					LZO1X_MEM_COMPRESS, GFP_KERNEL);
+
+		break;
+	case CPU_DEAD:
+	case CPU_UP_CANCELED:
+		free_pages((unsigned long)(per_cpu(compress_buffer, cpu)), 1);
+		per_cpu(compress_buffer, cpu) = NULL;
+
+		kfree(per_cpu(compress_buffer, cpu));
+		per_cpu(compress_buffer, cpu) = NULL;
+
+		break;
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block zcache_cpu_nb = {
+	.notifier_call = zcache_cpu_notify
+};
+
 static int __init zcache_init(void)
 {
+	int ret = -ENOMEM;
+	unsigned int cpu;
+
 	struct cleancache_ops ops = {
 		.init_fs = zcache_init_fs,
 		.init_shared_fs = zcache_init_shared_fs,
@@ -1102,20 +1187,33 @@ static int __init zcache_init(void)
 
 	zcache = kzalloc(sizeof(*zcache), GFP_KERNEL);
 	if (!zcache)
-		return -ENOMEM;
+		goto out;
+
+	ret = register_cpu_notifier(&zcache_cpu_nb);
+	if (ret)
+		goto out;
+
+	for_each_online_cpu(cpu) {
+		void *pcpu = (void *)(long)cpu;
+		zcache_cpu_notify(&zcache_cpu_nb, CPU_UP_PREPARE, pcpu);
+	}
 
 #ifdef CONFIG_SYSFS
 	/* Create /sys/kernel/mm/zcache/ */
 	zcache->kobj = kobject_create_and_add("zcache", mm_kobj);
-	if (!zcache->kobj) {
-		kfree(zcache);
-		return -ENOMEM;
-	}
+	if (!zcache->kobj)
+		goto out;
 #endif
 
 	spin_lock_init(&zcache->pool_lock);
 	cleancache_ops = ops;
 
+	ret = 0; /* success */
+
+out:
+	if (ret)
+		kfree(zcache);
+
 	return 0;
 }
 
diff --git a/drivers/staging/zram/zcache_drv.h b/drivers/staging/zram/zcache_drv.h
index 1e8c931..9ce97da 100644
--- a/drivers/staging/zram/zcache_drv.h
+++ b/drivers/staging/zram/zcache_drv.h
@@ -24,20 +24,22 @@
 enum zcache_pool_stats_index {
 	ZPOOL_STAT_PAGES_ZERO,
 	ZPOOL_STAT_PAGES_STORED,
+	ZPOOL_STAT_COMPR_SIZE,
 	ZPOOL_STAT_NSTATS,
 };
 
 /* Radix-tree tags */
 enum zcache_tag {
 	ZCACHE_TAG_NONZERO_PAGE,
-	ZCACHE_TAG_UNUSED
+	ZCACHE_TAG_UNUSED,
+	ZCACHE_TAG_INVALID
 };
 
 /* Default zcache per-pool memlimit: 10% of total RAM */
 static const unsigned zcache_pool_default_memlimit_perc_ram = 10;
 
  /* We only keep pages that compress to less than this size */
-static const unsigned zcache_max_page_size = PAGE_SIZE / 2;
+static const int zcache_max_page_size = PAGE_SIZE / 2;
 
 /* Red-Black tree node. Maps inode to its page-tree */
 struct zcache_inode_rb {
@@ -61,6 +63,7 @@ struct zcache_pool {
 
 	seqlock_t memlimit_lock;	/* protects memlimit */
 	u64 memlimit;			/* bytes */
+
 	struct zcache_pool_stats_cpu *stats;	/* percpu stats */
 #ifdef CONFIG_SYSFS
 	unsigned char name[MAX_ZPOOL_NAME_LEN];
-- 
1.7.1.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ