lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <f6a85e61f15ed88d7d40be7c60e833a75f8fc178.1451558672.git.tst@schoebel-theuer.de>
Date:	Thu, 31 Dec 2015 12:35:58 +0100
From:	Thomas Schoebel-Theuer <tst@...oebel-theuer.de>
To:	linux-kernel@...r.kernel.org, tst@...oebel-theuer.de
Subject: [RFC 03/31] mars: add new module brick_mem

Signed-off-by: Thomas Schoebel-Theuer <tst@...oebel-theuer.de>
---
 drivers/staging/mars/brick_mem.c | 1081 ++++++++++++++++++++++++++++++++++++++
 include/linux/brick/brick_mem.h  |  218 ++++++++
 2 files changed, 1299 insertions(+)
 create mode 100644 drivers/staging/mars/brick_mem.c
 create mode 100644 include/linux/brick/brick_mem.h

diff --git a/drivers/staging/mars/brick_mem.c b/drivers/staging/mars/brick_mem.c
new file mode 100644
index 0000000..03a3d28
--- /dev/null
+++ b/drivers/staging/mars/brick_mem.c
@@ -0,0 +1,1081 @@
+/*
+ * MARS Long Distance Replication Software
+ *
+ * Copyright (C) 2010-2014 Thomas Schoebel-Theuer
+ * Copyright (C) 2011-2014 1&1 Internet AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+
+#include <linux/atomic.h>
+
+#include <linux/brick/brick_mem.h>
+#include <linux/brick/brick_say.h>
+#include <linux/brick/lamport.h>
+
+#define USE_KERNEL_PAGES		/*  currently mandatory (vmalloc does not work) */
+
+#define MAGIC_BLOCK			((int)0x8B395D7B)
+#define MAGIC_BEND			((int)0x8B395D7C)
+#define MAGIC_MEM1			((int)0x8B395D7D)
+#define MAGIC_MEM2			((int)0x9B395D8D)
+#define MAGIC_MEND1			((int)0x8B395D7E)
+#define MAGIC_MEND2			((int)0x9B395D8E)
+#define MAGIC_STR			((int)0x8B395D7F)
+#define MAGIC_SEND			((int)0x9B395D8F)
+
+#define INT_ACCESS(ptr, offset) (*(int *)(((char *)(ptr)) + (offset)))
+
+#define _BRICK_FMT(_fmt, _class)					\
+	"%ld.%09ld %ld.%09ld MEM_%-5s %s[%d] %s:%d %s(): "		\
+		_fmt,							\
+		_s_now.tv_sec, _s_now.tv_nsec,				\
+		_l_now.tv_sec, _l_now.tv_nsec,				\
+		say_class[_class],					\
+		current->comm, (int)smp_processor_id(),			\
+		__BASE_FILE__,						\
+		__LINE__,						\
+		__func__
+
+#define _BRICK_MSG(_class, _dump, _fmt, _args...)			\
+	do {								\
+		struct timespec _s_now = CURRENT_TIME;			\
+		struct timespec _l_now;					\
+		get_lamport(&_l_now);					\
+		say(_class, _BRICK_FMT(_fmt, _class), ##_args);		\
+		if (_dump)						\
+			dump_stack();					\
+	} while (0)
+
+#define BRICK_ERR(_fmt, _args...) _BRICK_MSG(SAY_ERROR, true,  _fmt, ##_args)
+#define BRICK_WRN(_fmt, _args...) _BRICK_MSG(SAY_WARN,	false, _fmt, ##_args)
+#define BRICK_INF(_fmt, _args...) _BRICK_MSG(SAY_INFO,	false, _fmt, ##_args)
+
+/***********************************************************************/
+
+/*  limit handling */
+
+#include <linux/swap.h>
+
+long long brick_global_memavail;
+long long brick_global_memlimit;
+
+atomic64_t brick_global_block_used = ATOMIC64_INIT(0);
+
+void get_total_ram(void)
+{
+	struct sysinfo i = {};
+
+	si_meminfo(&i);
+	/* si_swapinfo(&i); */
+	brick_global_memavail = (long long)i.totalram * (PAGE_SIZE / 1024);
+	BRICK_INF("total RAM = %lld [KiB]\n", brick_global_memavail);
+}
+
+/***********************************************************************/
+
+/*  small memory allocation (use this only for len < PAGE_SIZE) */
+
+#ifdef BRICK_DEBUG_MEM
+static atomic_t phys_mem_alloc = ATOMIC_INIT(0);
+static atomic_t mem_redirect_alloc = ATOMIC_INIT(0);
+static atomic_t mem_count[BRICK_DEBUG_MEM];
+static atomic_t mem_free[BRICK_DEBUG_MEM];
+static int  mem_len[BRICK_DEBUG_MEM];
+
+#define PLUS_SIZE			(6 * sizeof(int))
+#else
+#define PLUS_SIZE			(2 * sizeof(int))
+#endif
+
+static inline
+void *__brick_mem_alloc(int len)
+{
+	void *res;
+
+	if (len >= PAGE_SIZE) {
+#ifdef BRICK_DEBUG_MEM
+		atomic_inc(&mem_redirect_alloc);
+#endif
+		res = _brick_block_alloc(0, len, 0);
+	} else {
+		for (;;) {
+			res = kmalloc(len, GFP_BRICK);
+			if (likely(res))
+				break;
+			msleep(1000);
+		}
+#ifdef BRICK_DEBUG_MEM
+		atomic_inc(&phys_mem_alloc);
+#endif
+	}
+	return res;
+}
+
+static inline
+void __brick_mem_free(void *data, int len)
+{
+	if (len >= PAGE_SIZE) {
+		_brick_block_free(data, len, 0);
+#ifdef BRICK_DEBUG_MEM
+		atomic_dec(&mem_redirect_alloc);
+#endif
+	} else {
+		kfree(data);
+#ifdef BRICK_DEBUG_MEM
+		atomic_dec(&phys_mem_alloc);
+#endif
+	}
+}
+
+void *_brick_mem_alloc(int len, int line)
+{
+	void *res;
+
+#ifdef CONFIG_MARS_DEBUG
+	might_sleep();
+#endif
+
+	res = __brick_mem_alloc(len + PLUS_SIZE);
+
+#ifdef BRICK_DEBUG_MEM
+	if (unlikely(line < 0))
+		line = 0;
+	else if (unlikely(line >= BRICK_DEBUG_MEM))
+		line = BRICK_DEBUG_MEM - 1;
+	INT_ACCESS(res, 0 * sizeof(int)) = MAGIC_MEM1;
+	INT_ACCESS(res, 1 * sizeof(int)) = len;
+	INT_ACCESS(res, 2 * sizeof(int)) = line;
+	INT_ACCESS(res, 3 * sizeof(int)) = MAGIC_MEM2;
+	res += 4 * sizeof(int);
+	INT_ACCESS(res, len + 0 * sizeof(int)) = MAGIC_MEND1;
+	INT_ACCESS(res, len + 1 * sizeof(int)) = MAGIC_MEND2;
+	atomic_inc(&mem_count[line]);
+	mem_len[line] = len;
+#else
+	INT_ACCESS(res, 0 * sizeof(int)) = len;
+	res += PLUS_SIZE;
+#endif
+	return res;
+}
+
+void _brick_mem_free(void *data, int cline)
+{
+#ifdef BRICK_DEBUG_MEM
+	void *test = data - 4 * sizeof(int);
+	int magic1 = INT_ACCESS(test, 0 * sizeof(int));
+	int len = INT_ACCESS(test, 1 * sizeof(int));
+	int line = INT_ACCESS(test, 2 * sizeof(int));
+	int magic2 = INT_ACCESS(test, 3 * sizeof(int));
+
+	if (unlikely(magic1 != MAGIC_MEM1)) {
+		BRICK_ERR("line %d memory corruption: magix1 %08x != %08x, len = %d\n",
+			cline,
+			magic1,
+			MAGIC_MEM1,
+			len);
+		goto _out_return;
+	}
+	if (unlikely(magic2 != MAGIC_MEM2)) {
+		BRICK_ERR("line %d memory corruption: magix2 %08x != %08x, len = %d\n",
+			cline,
+			magic2,
+			MAGIC_MEM2,
+			len);
+		goto _out_return;
+	}
+	if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
+		BRICK_ERR("line %d memory corruption: alloc line = %d, len = %d\n", cline, line, len);
+		goto _out_return;
+	}
+	INT_ACCESS(test, 0) = 0xffffffff;
+	magic1 = INT_ACCESS(data, len + 0 * sizeof(int));
+	if (unlikely(magic1 != MAGIC_MEND1)) {
+		BRICK_ERR("line %d memory corruption: magix1 %08x != %08x, len = %d\n",
+			cline,
+			magic1,
+			MAGIC_MEND1,
+			len);
+		goto _out_return;
+	}
+	magic2 = INT_ACCESS(data, len + 1 * sizeof(int));
+	if (unlikely(magic2 != MAGIC_MEND2)) {
+		BRICK_ERR("line %d memory corruption: magix2 %08x != %08x, len = %d\n",
+			cline,
+			magic2,
+			MAGIC_MEND2,
+			len);
+		goto _out_return;
+	}
+	INT_ACCESS(data, len) = 0xffffffff;
+	atomic_dec(&mem_count[line]);
+	atomic_inc(&mem_free[line]);
+#else
+	void *test = data - PLUS_SIZE;
+	int len = INT_ACCESS(test, 0 * sizeof(int));
+
+#endif
+	data = test;
+	__brick_mem_free(data, len + PLUS_SIZE);
+#ifdef BRICK_DEBUG_MEM
+_out_return:;
+#endif
+}
+
+/***********************************************************************/
+
+/*  string memory allocation */
+
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+# define STRING_CANARY							\
+	"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \
+	"yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" \
+	"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \
+	"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \
+	"yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" \
+	"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \
+	"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \
+	"yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" \
+	"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" \
+	" FILE = "	__FILE__					\
+	" VERSION = "	__VERSION__					\
+	" xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx STRING_error xxx\n"
+# define STRING_PLUS (sizeof(int) * 3 + sizeof(STRING_CANARY))
+#elif defined(BRICK_DEBUG_MEM)
+# define STRING_PLUS (sizeof(int) * 4)
+#else
+# define STRING_PLUS 0
+#endif
+
+#ifdef BRICK_DEBUG_MEM
+static atomic_t phys_string_alloc = ATOMIC_INIT(0);
+static atomic_t string_count[BRICK_DEBUG_MEM];
+static atomic_t string_free[BRICK_DEBUG_MEM];
+
+#endif
+
+char *_brick_string_alloc(int len, int line)
+{
+	char *res;
+
+#ifdef CONFIG_MARS_DEBUG
+	might_sleep();
+	if (unlikely(len > PAGE_SIZE))
+		BRICK_WRN("line = %d string too long: len = %d\n", line, len);
+#endif
+	if (len <= 0)
+		len = BRICK_STRING_LEN;
+
+	for (;;) {
+		res = kzalloc(len + STRING_PLUS, GFP_BRICK);
+		if (likely(res))
+			break;
+		msleep(1000);
+	}
+
+#ifdef BRICK_DEBUG_MEM
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+	memset(res + 1, '?', len - 1);
+#endif
+	atomic_inc(&phys_string_alloc);
+	if (unlikely(line < 0))
+		line = 0;
+	else if (unlikely(line >= BRICK_DEBUG_MEM))
+		line = BRICK_DEBUG_MEM - 1;
+	INT_ACCESS(res, 0) = MAGIC_STR;
+	INT_ACCESS(res, sizeof(int)) = len;
+	INT_ACCESS(res, sizeof(int) * 2) = line;
+	res += sizeof(int) * 3;
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+	strcpy(res + len, STRING_CANARY);
+#else
+	INT_ACCESS(res, len) = MAGIC_SEND;
+#endif
+	atomic_inc(&string_count[line]);
+#endif
+	return res;
+}
+
+void _brick_string_free(const char *data, int cline)
+{
+#ifdef BRICK_DEBUG_MEM
+	int magic;
+	int len;
+	int line;
+	char *orig = (void *)data;
+
+	data -= sizeof(int) * 3;
+	magic = INT_ACCESS(data, 0);
+	if (unlikely(magic != MAGIC_STR)) {
+		BRICK_ERR("cline %d stringmem corruption: magix %08x != %08x\n", cline, magic, MAGIC_STR);
+		goto _out_return;
+	}
+	len = INT_ACCESS(data, sizeof(int));
+	line = INT_ACCESS(data, sizeof(int) * 2);
+	if (unlikely(len <= 0)) {
+		BRICK_ERR("cline %d stringmem corruption: line = %d len = %d\n", cline, line, len);
+		goto _out_return;
+	}
+	if (unlikely(len > PAGE_SIZE))
+		BRICK_ERR("cline %d string too long: line = %d len = %d string='%s'\n", cline, line, len, orig);
+	if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
+		BRICK_ERR("cline %d stringmem corruption: line = %d (len = %d)\n", cline, line, len);
+		goto _out_return;
+	}
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+	if (unlikely(strcmp(orig + len, STRING_CANARY))) {
+		BRICK_ERR("cline %d stringmem corruption: bad canary '%s', line = %d len = %d\n",
+			  cline, STRING_CANARY, line, len);
+		goto _out_return;
+	}
+	orig[len]--;
+	memset(orig, '!', len);
+#else
+	magic = INT_ACCESS(orig, len);
+	if (unlikely(magic != MAGIC_SEND)) {
+		BRICK_ERR("cline %d stringmem corruption: end_magix %08x != %08x, line = %d len = %d\n",
+			  cline, magic, MAGIC_SEND, line, len);
+		goto _out_return;
+	}
+	INT_ACCESS(orig, len) = 0xffffffff;
+#endif
+	atomic_dec(&string_count[line]);
+	atomic_inc(&string_free[line]);
+	atomic_dec(&phys_string_alloc);
+#endif
+	kfree(data);
+#ifdef BRICK_DEBUG_MEM
+_out_return:;
+#endif
+}
+
+/***********************************************************************/
+
+/*  block memory allocation */
+
+static
+int len2order(int len)
+{
+	int order = 0;
+
+	if (unlikely(len <= 0)) {
+		BRICK_ERR("trying to use %d bytes\n", len);
+		return 0;
+	}
+
+	while ((PAGE_SIZE << order) < len)
+		order++;
+
+	if (unlikely(order > BRICK_MAX_ORDER)) {
+		BRICK_ERR("trying to use %d bytes (oder = %d, max = %d)\n", len, order, BRICK_MAX_ORDER);
+		return BRICK_MAX_ORDER;
+	}
+	return order;
+}
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+static atomic_t _alloc_count[BRICK_MAX_ORDER+1];
+int brick_mem_alloc_count[BRICK_MAX_ORDER+1] = {};
+int brick_mem_alloc_max[BRICK_MAX_ORDER+1] = {};
+int brick_mem_freelist_max[BRICK_MAX_ORDER+1] = {};
+
+#endif
+
+#ifdef BRICK_DEBUG_MEM
+static atomic_t phys_block_alloc = ATOMIC_INIT(0);
+
+/*  indexed by line */
+static atomic_t block_count[BRICK_DEBUG_MEM];
+static atomic_t block_free[BRICK_DEBUG_MEM];
+static int  block_len[BRICK_DEBUG_MEM];
+
+/*  indexed by order */
+static atomic_t op_count[BRICK_MAX_ORDER+1];
+static atomic_t raw_count[BRICK_MAX_ORDER+1];
+static int alloc_line[BRICK_MAX_ORDER+1];
+static int alloc_len[BRICK_MAX_ORDER+1];
+
+#endif
+
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+
+#define MAX_INFO_LISTS			1024
+
+#define INFO_LIST_HASH(addr) ((unsigned long)(addr) / (PAGE_SIZE * 2) % MAX_INFO_LISTS)
+
+struct mem_block_info {
+	struct list_head inf_head;
+	void *inf_data;
+	int inf_len;
+	int inf_line;
+	bool inf_used;
+};
+
+static struct list_head inf_anchor[MAX_INFO_LISTS];
+static rwlock_t inf_lock[MAX_INFO_LISTS];
+
+static
+void _new_block_info(void *data, int len, int cline)
+{
+	struct mem_block_info *inf;
+	int hash;
+
+	for (;;) {
+		inf = kmalloc(sizeof(struct mem_block_info), GFP_BRICK);
+		if (likely(inf))
+			break;
+		msleep(1000);
+	}
+	inf->inf_data = data;
+	inf->inf_len = len;
+	inf->inf_line = cline;
+	inf->inf_used = true;
+
+	hash = INFO_LIST_HASH(data);
+
+	write_lock(&inf_lock[hash]);
+	list_add(&inf->inf_head, &inf_anchor[hash]);
+	write_unlock(&inf_lock[hash]);
+}
+
+static
+struct mem_block_info *_find_block_info(void *data, bool remove)
+{
+	struct mem_block_info *res = NULL;
+	struct list_head *tmp;
+	int hash = INFO_LIST_HASH(data);
+
+	if (remove)
+		write_lock(&inf_lock[hash]);
+	else
+		read_lock(&inf_lock[hash]);
+	for (tmp = inf_anchor[hash].next; tmp != &inf_anchor[hash]; tmp = tmp->next) {
+		struct mem_block_info *inf = container_of(tmp, struct mem_block_info, inf_head);
+
+		if (inf->inf_data != data)
+			continue;
+		if (remove)
+			list_del_init(tmp);
+		res = inf;
+		break;
+	}
+	if (remove)
+		write_unlock(&inf_lock[hash]);
+	else
+		read_unlock(&inf_lock[hash]);
+	return res;
+}
+
+#endif /*  CONFIG_MARS_DEBUG_MEM_STRONG */
+
+static inline
+void *__brick_block_alloc(gfp_t gfp, int order, int cline)
+{
+	void *res;
+
+	for (;;) {
+#ifdef USE_KERNEL_PAGES
+		res = (void *)__get_free_pages(gfp, order);
+#else
+		res = __vmalloc(PAGE_SIZE << order, gfp, PAGE_KERNEL_IO);
+#endif
+		if (likely(res))
+			break;
+		msleep(1000);
+	}
+
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+	_new_block_info(res, PAGE_SIZE << order, cline);
+#endif
+#ifdef BRICK_DEBUG_MEM
+	atomic_inc(&phys_block_alloc);
+	atomic_inc(&raw_count[order]);
+#endif
+	atomic64_add((PAGE_SIZE/1024) << order, &brick_global_block_used);
+
+	return res;
+}
+
+static inline
+void __brick_block_free(void *data, int order, int cline)
+{
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+	struct mem_block_info *inf = _find_block_info(data, true);
+
+	if (likely(inf)) {
+		int inf_len = inf->inf_len;
+		int inf_line = inf->inf_line;
+
+		kfree(inf);
+		if (unlikely(inf_len != (PAGE_SIZE << order))) {
+			BRICK_ERR("line %d: address %p: bad freeing size %d (correct should be %d, previous line = %d)\n",
+				cline,
+				data,
+				(int)(PAGE_SIZE << order),
+				inf_len,
+				inf_line);
+			goto err;
+		}
+	} else {
+		BRICK_ERR("line %d: trying to free non-existent address %p (order = %d)\n", cline, data, order);
+		goto err;
+	}
+#endif
+#ifdef USE_KERNEL_PAGES
+	__free_pages(virt_to_page((unsigned long)data), order);
+#else
+	vfree(data);
+#endif
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+err:
+#endif
+#ifdef BRICK_DEBUG_MEM
+	atomic_dec(&phys_block_alloc);
+	atomic_dec(&raw_count[order]);
+#endif
+	atomic64_sub((PAGE_SIZE/1024) << order, &brick_global_block_used);
+}
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+int brick_allow_freelist = 1;
+
+int brick_pre_reserve[BRICK_MAX_ORDER+1] = {};
+
+/* Note: we have no separate lists per CPU.
+ * This should not hurt because the freelists are only used
+ * for higher-order pages which should be rather low-frequency.
+ */
+static spinlock_t freelist_lock[BRICK_MAX_ORDER+1];
+static void *brick_freelist[BRICK_MAX_ORDER+1];
+static atomic_t freelist_count[BRICK_MAX_ORDER+1];
+
+static
+void *_get_free(int order, int cline)
+{
+	void *data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&freelist_lock[order], flags);
+	data = brick_freelist[order];
+	if (likely(data)) {
+		void *next = *(void **)data;
+
+#ifdef BRICK_DEBUG_MEM /*  check for corruptions */
+		long pattern = *(((long *)data)+1);
+		void *copy = *(((void **)data)+2);
+
+		if (unlikely(pattern != 0xf0f0f0f0f0f0f0f0 || next != copy)) { /*  found a corruption */
+			/*  prevent further trouble by leaving a memleak */
+			brick_freelist[order] = NULL;
+			spin_unlock_irqrestore(&freelist_lock[order], flags);
+			BRICK_ERR("line %d:freelist corruption at %p (pattern = %lx next %p != %p, murdered = %d), order = %d\n",
+				  cline, data, pattern, next, copy, atomic_read(&freelist_count[order]), order);
+			return NULL;
+		}
+#endif
+		brick_freelist[order] = next;
+		atomic_dec(&freelist_count[order]);
+	}
+	spin_unlock_irqrestore(&freelist_lock[order], flags);
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+	if (data) {
+		struct mem_block_info *inf = _find_block_info(data, false);
+
+		if (likely(inf)) {
+			if (unlikely(inf->inf_len != (PAGE_SIZE << order))) {
+				BRICK_ERR("line %d: address %p: bad freelist size %d (correct should be %d, previous line = %d)\n",
+					  cline, data, (int)(PAGE_SIZE << order), inf->inf_len, inf->inf_line);
+			}
+			inf->inf_line = cline;
+			inf->inf_used = true;
+		} else {
+			BRICK_ERR("line %d: freelist address %p is invalid (order = %d)\n", cline, data, order);
+		}
+	}
+#endif
+	return data;
+}
+
+static
+void _put_free(void *data, int order)
+{
+	void *next;
+	unsigned long flags;
+
+#ifdef BRICK_DEBUG_MEM /*  fill with pattern */
+	memset(data, 0xf0, PAGE_SIZE << order);
+#endif
+
+	spin_lock_irqsave(&freelist_lock[order], flags);
+	next = brick_freelist[order];
+	*(void **)data = next;
+#ifdef BRICK_DEBUG_MEM /*  insert redundant copy for checking */
+	*(((void **)data)+2) = next;
+#endif
+	brick_freelist[order] = data;
+	spin_unlock_irqrestore(&freelist_lock[order], flags);
+	atomic_inc(&freelist_count[order]);
+}
+
+static
+void _free_all(void)
+{
+	int order;
+
+	for (order = BRICK_MAX_ORDER; order >= 0; order--) {
+		for (;;) {
+			void *data = _get_free(order, __LINE__);
+
+			if (!data)
+				break;
+			__brick_block_free(data, order, __LINE__);
+		}
+	}
+}
+
+int brick_mem_reserve(void)
+{
+	int order;
+	int status = 0;
+
+	for (order = BRICK_MAX_ORDER; order >= 0; order--) {
+		int max = brick_pre_reserve[order];
+		int i;
+
+		brick_mem_freelist_max[order] += max;
+		BRICK_INF("preallocating %d at order %d (new maxlevel = %d)\n",
+			max,
+			order,
+			brick_mem_freelist_max[order]);
+
+		max = brick_mem_freelist_max[order] - atomic_read(&freelist_count[order]);
+		if (max >= 0) {
+			for (i = 0; i < max; i++) {
+				void *data = __brick_block_alloc(GFP_KERNEL, order, __LINE__);
+
+				if (likely(data))
+					_put_free(data, order);
+				else
+					status = -ENOMEM;
+			}
+		} else {
+			for (i = 0; i < -max; i++) {
+				void *data = _get_free(order, __LINE__);
+
+				if (likely(data))
+					__brick_block_free(data, order, __LINE__);
+			}
+		}
+	}
+	return status;
+}
+#else
+int brick_mem_reserve(struct mem_reservation *r)
+{
+	BRICK_INF("preallocation is not compiled in\n");
+	return 0;
+}
+#endif
+
+void *_brick_block_alloc(loff_t pos, int len, int line)
+{
+	void *data;
+	int count;
+
+#ifdef BRICK_DEBUG_MEM
+#ifdef BRICK_DEBUG_ORDER0
+	const int plus0 = PAGE_SIZE;
+
+#else
+	const int plus0 = 0;
+
+#endif
+	const int plus = len <= PAGE_SIZE ? plus0 : PAGE_SIZE * 2;
+
+#else
+	const int plus = 0;
+
+#endif
+	int order = len2order(len + plus);
+
+	if (unlikely(order < 0)) {
+		BRICK_ERR("trying to allocate %d bytes (max = %d)\n", len, (int)(PAGE_SIZE << order));
+		return NULL;
+	}
+
+#ifdef CONFIG_MARS_DEBUG
+	might_sleep();
+#endif
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+	count = atomic_add_return(1, &_alloc_count[order]);
+	brick_mem_alloc_count[order] = count;
+	if (count > brick_mem_alloc_max[order])
+		brick_mem_alloc_max[order] = count;
+#endif
+
+#ifdef BRICK_DEBUG_MEM
+	atomic_inc(&op_count[order]);
+	/*  statistics */
+	alloc_line[order] = line;
+	alloc_len[order] = len;
+#endif
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+	/* Dynamic increase of limits, in order to reduce
+	 * fragmentation on higher-order pages.
+	 * This comes on cost of higher memory usage.
+	 */
+	if (order > 0 && count > brick_mem_freelist_max[order])
+		brick_mem_freelist_max[order] = count;
+#endif
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+	data = _get_free(order, line);
+	if (!data)
+#endif
+		data = __brick_block_alloc(GFP_BRICK, order, line);
+
+#ifdef BRICK_DEBUG_MEM
+	if (order > 0) {
+		if (unlikely(line < 0))
+			line = 0;
+		else if (unlikely(line >= BRICK_DEBUG_MEM))
+			line = BRICK_DEBUG_MEM - 1;
+		atomic_inc(&block_count[line]);
+		block_len[line] = len;
+		if (order > 1) {
+			INT_ACCESS(data, 0 * sizeof(int)) = MAGIC_BLOCK;
+			INT_ACCESS(data, 1 * sizeof(int)) = line;
+			INT_ACCESS(data, 2 * sizeof(int)) = len;
+			data += PAGE_SIZE;
+			INT_ACCESS(data, -1 * sizeof(int)) = MAGIC_BLOCK;
+			INT_ACCESS(data, len) = MAGIC_BEND;
+		} else if (order == 1) {
+			INT_ACCESS(data, PAGE_SIZE + 0 * sizeof(int)) = MAGIC_BLOCK;
+			INT_ACCESS(data, PAGE_SIZE + 1 * sizeof(int)) = line;
+			INT_ACCESS(data, PAGE_SIZE + 2 * sizeof(int)) = len;
+		}
+	}
+#endif
+	return data;
+}
+
+void _brick_block_free(void *data, int len, int cline)
+{
+	int order;
+
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+	struct mem_block_info *inf;
+	char *real_data;
+
+#endif
+#ifdef BRICK_DEBUG_MEM
+	int prev_line = 0;
+
+#ifdef BRICK_DEBUG_ORDER0
+	const int plus0 = PAGE_SIZE;
+
+#else
+	const int plus0 = 0;
+
+#endif
+	const int plus = len <= PAGE_SIZE ? plus0 : PAGE_SIZE * 2;
+
+#else
+	const int plus = 0;
+
+#endif
+
+	order = len2order(len + plus);
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+	real_data = data;
+	if (order > 1)
+		real_data -= PAGE_SIZE;
+	inf = _find_block_info(real_data, false);
+	if (likely(inf)) {
+		prev_line = inf->inf_line;
+		if (unlikely(inf->inf_len != (PAGE_SIZE << order))) {
+			BRICK_ERR("line %d: address %p: bad freeing size %d (correct should be %d, previous line = %d)\n",
+				  cline, data, (int)(PAGE_SIZE << order), inf->inf_len, prev_line);
+			goto _out_return;
+		}
+		if (unlikely(!inf->inf_used)) {
+			BRICK_ERR("line %d: address %p: double freeing (previous line = %d)\n",
+				cline,
+				data,
+				prev_line);
+			goto _out_return;
+		}
+		inf->inf_line = cline;
+		inf->inf_used = false;
+	} else {
+		BRICK_ERR("line %d: trying to free non-existent address %p (order = %d)\n", cline, data, order);
+		goto _out_return;
+	}
+#endif
+#ifdef BRICK_DEBUG_MEM
+	if (order > 1) {
+		void *test = data - PAGE_SIZE;
+		int magic = INT_ACCESS(test, 0);
+		int line = INT_ACCESS(test, sizeof(int));
+		int oldlen = INT_ACCESS(test, sizeof(int)*2);
+		int magic1 = INT_ACCESS(data, -1 * sizeof(int));
+		int magic2;
+
+		if (unlikely(magic1 != MAGIC_BLOCK)) {
+			BRICK_ERR("line %d memory corruption: %p magix1 %08x != %08x (previous line = %d)\n",
+				cline,
+				data,
+				magic1,
+				MAGIC_BLOCK,
+				prev_line);
+			goto _out_return;
+		}
+		if (unlikely(magic != MAGIC_BLOCK)) {
+			BRICK_ERR("line %d memory corruption: %p magix %08x != %08x (previous line = %d)\n",
+				cline,
+				data,
+				magic,
+				MAGIC_BLOCK,
+				prev_line);
+			goto _out_return;
+		}
+		if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
+			BRICK_ERR("line %d memory corruption %p: alloc line = %d (previous line = %d)\n",
+				cline,
+				data,
+				line,
+				prev_line);
+			goto _out_return;
+		}
+		if (unlikely(oldlen != len)) {
+			BRICK_ERR("line %d memory corruption %p: len != oldlen (%d != %d, previous line = %d))\n",
+				cline,
+				data,
+				len,
+				oldlen,
+				prev_line);
+			goto _out_return;
+		}
+		magic2 = INT_ACCESS(data, len);
+		if (unlikely(magic2 != MAGIC_BEND)) {
+			BRICK_ERR("line %d memory corruption %p: magix %08x != %08x (previous line = %d)\n",
+				cline,
+				data,
+				magic,
+				MAGIC_BEND,
+				prev_line);
+			goto _out_return;
+		}
+		INT_ACCESS(test, 0) = 0xffffffff;
+		INT_ACCESS(data, len) = 0xffffffff;
+		data = test;
+		atomic_dec(&block_count[line]);
+		atomic_inc(&block_free[line]);
+	} else if (order == 1) {
+		void *test = data + PAGE_SIZE;
+		int magic = INT_ACCESS(test, 0 * sizeof(int));
+		int line = INT_ACCESS(test, 1 * sizeof(int));
+		int oldlen = INT_ACCESS(test, 2 * sizeof(int));
+
+		if (unlikely(magic != MAGIC_BLOCK)) {
+			BRICK_ERR("line %d memory corruption %p: magix %08x != %08x (previous line = %d)\n",
+				cline,
+				data,
+				magic,
+				MAGIC_BLOCK,
+				prev_line);
+			goto _out_return;
+		}
+		if (unlikely(line < 0 || line >= BRICK_DEBUG_MEM)) {
+			BRICK_ERR("line %d memory corruption %p: alloc line = %d (previous line = %d)\n",
+				cline,
+				data,
+				line,
+				prev_line);
+			goto _out_return;
+		}
+		if (unlikely(oldlen != len)) {
+			BRICK_ERR("line %d memory corruption %p: len != oldlen (%d != %d, previous line = %d))\n",
+				cline,
+				data,
+				len,
+				oldlen,
+				prev_line);
+			goto _out_return;
+		}
+		atomic_dec(&block_count[line]);
+		atomic_inc(&block_free[line]);
+	}
+#endif
+#ifdef CONFIG_MARS_MEM_PREALLOC
+	if (order > 0 && brick_allow_freelist && atomic_read(&freelist_count[order]) <= brick_mem_freelist_max[order]) {
+		_put_free(data, order);
+	} else
+#endif
+		__brick_block_free(data, order, cline);
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+	brick_mem_alloc_count[order] = atomic_dec_return(&_alloc_count[order]);
+#endif
+#ifdef BRICK_DEBUG_MEM
+_out_return:;
+#endif
+}
+
+struct page *brick_iomap(void *data, int *offset, int *len)
+{
+	int _offset = ((unsigned long)data) & (PAGE_SIZE-1);
+	struct page *page;
+
+	*offset = _offset;
+	if (*len > PAGE_SIZE - _offset)
+		*len = PAGE_SIZE - _offset;
+	if (is_vmalloc_addr(data))
+		page = vmalloc_to_page(data);
+	else
+		page = virt_to_page(data);
+	return page;
+}
+
+/***********************************************************************/
+
+/*  module */
+
+void brick_mem_statistics(bool final)
+{
+#ifdef BRICK_DEBUG_MEM
+	int i;
+	int count = 0;
+	int places = 0;
+
+	BRICK_INF("======== page allocation:\n");
+#ifdef CONFIG_MARS_MEM_PREALLOC
+	for (i = 0; i <= BRICK_MAX_ORDER; i++) {
+		BRICK_INF("pages order = %2d operations = %9d freelist_count = %4d / %3d raw_count = %5d alloc_count = %5d alloc_len = %5d line = %5d max_count = %5d\n",
+			  i,
+			  atomic_read(&op_count[i]),
+			  atomic_read(&freelist_count[i]),
+			  brick_mem_freelist_max[i],
+			  atomic_read(&raw_count[i]),
+			  brick_mem_alloc_count[i],
+			  alloc_len[i],
+			  alloc_line[i],
+			  brick_mem_alloc_max[i]);
+	}
+#endif
+	for (i = 0; i < BRICK_DEBUG_MEM; i++) {
+		int val = atomic_read(&block_count[i]);
+
+		if (val) {
+			count += val;
+			places++;
+			BRICK_INF("line %4d: %6d allocated (last size = %4d, freed = %6d)\n",
+				  i,
+				  val,
+				  block_len[i],
+				  atomic_read(&block_free[i]));
+		}
+	}
+	if (!final || !count) {
+		BRICK_INF("======== %d block allocations in %d places (phys=%d)\n",
+			  count, places, atomic_read(&phys_block_alloc));
+	} else {
+		BRICK_ERR("======== %d block allocations in %d places (phys=%d)\n",
+			  count, places, atomic_read(&phys_block_alloc));
+	}
+	count = places = 0;
+	for (i = 0; i < BRICK_DEBUG_MEM; i++) {
+		int val = atomic_read(&mem_count[i]);
+
+		if (val) {
+			count += val;
+			places++;
+			BRICK_INF("line %4d: %6d allocated (last size = %4d, freed = %6d)\n",
+				  i,
+				  val,
+				  mem_len[i],
+				  atomic_read(&mem_free[i]));
+		}
+	}
+	if (!final || !count) {
+		BRICK_INF("======== %d memory allocations in %d places (phys=%d,redirect=%d)\n",
+			  count, places,
+			  atomic_read(&phys_mem_alloc), atomic_read(&mem_redirect_alloc));
+	} else {
+		BRICK_ERR("======== %d memory allocations in %d places (phys=%d,redirect=%d)\n",
+			  count, places,
+			  atomic_read(&phys_mem_alloc), atomic_read(&mem_redirect_alloc));
+	}
+	count = places = 0;
+	for (i = 0; i < BRICK_DEBUG_MEM; i++) {
+		int val = atomic_read(&string_count[i]);
+
+		if (val) {
+			count += val;
+			places++;
+			BRICK_INF("line %4d: %6d allocated (freed = %6d)\n",
+				  i,
+				  val,
+				  atomic_read(&string_free[i]));
+		}
+	}
+	if (!final || !count) {
+		BRICK_INF("======== %d string allocations in %d places (phys=%d)\n",
+			  count, places, atomic_read(&phys_string_alloc));
+	} else {
+		BRICK_ERR("======== %d string allocations in %d places (phys=%d)\n",
+			  count, places, atomic_read(&phys_string_alloc));
+	}
+#endif
+}
+
+/*  module init stuff */
+
+int __init init_brick_mem(void)
+{
+	int i;
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+	for (i = BRICK_MAX_ORDER; i >= 0; i--)
+		spin_lock_init(&freelist_lock[i]);
+#endif
+#ifdef CONFIG_MARS_DEBUG_MEM_STRONG
+	for (i = 0; i < MAX_INFO_LISTS; i++) {
+		INIT_LIST_HEAD(&inf_anchor[i]);
+		rwlock_init(&inf_lock[i]);
+	}
+#else
+	(void)i;
+#endif
+
+	get_total_ram();
+
+	return 0;
+}
+
+void exit_brick_mem(void)
+{
+	BRICK_INF("deallocating memory...\n");
+#ifdef CONFIG_MARS_MEM_PREALLOC
+	_free_all();
+#endif
+
+	brick_mem_statistics(true);
+}
diff --git a/include/linux/brick/brick_mem.h b/include/linux/brick/brick_mem.h
new file mode 100644
index 0000000..1a2f236
--- /dev/null
+++ b/include/linux/brick/brick_mem.h
@@ -0,0 +1,218 @@
+/*
+ * MARS Long Distance Replication Software
+ *
+ * Copyright (C) 2010-2014 Thomas Schoebel-Theuer
+ * Copyright (C) 2011-2014 1&1 Internet AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef BRICK_MEM_H
+#define BRICK_MEM_H
+
+#include <linux/mm_types.h>
+
+#define BRICK_DEBUG_MEM			4096
+
+#ifndef CONFIG_MARS_DEBUG_MEM
+#undef BRICK_DEBUG_MEM
+#endif
+#ifdef CONFIG_MARS_DEBUG_ORDER0
+#define BRICK_DEBUG_ORDER0
+#endif
+
+#define CONFIG_MARS_MEM_PREALLOC	/* this is VITAL - disable only for experiments! */
+
+#define GFP_BRICK			GFP_NOIO
+
+extern long long brick_global_memavail;
+extern long long brick_global_memlimit;
+extern atomic64_t brick_global_block_used;
+
+/* All brick memory allocations are guaranteed to succeed.
+ * In case of low memory, they will just retry (forever).
+ *
+ * We always prefer threads for concurrency.
+ * Therefore, in_interrupt() code does not occur, and we can
+ * always sleep in case of memory pressure.
+ *
+ * Resource deadlocks are avoided by the above memory limits.
+ * When exceeded, new memory is simply not allocated any more
+ * (except for vital memory, such as IO memory for which a
+ * low_mem_reserve must always exist, anyway).
+ */
+
+/***********************************************************************/
+
+/*  compiler tweaking */
+
+/* Some functions are known to return non-null pointer values,
+ * at least under some Kconfig conditions.
+ *
+ * In code like...
+ *
+ * void *ptr = myfunction();
+ * if (unlikely(!ptr)) {
+ *	   printk("ERROR: this should not happen\n");
+ *	   goto fail;
+ * }
+ *
+ * ... the dead code elimination of gcc will not remove the if clause
+ * because the function might return a NULL value, even if a human
+ * would know that myfunction() does not return a NULL value.
+ *
+ * Unfortunately, the __attribute__((nonnull)) can only be applied
+ * to input parameters, but not to the return value.
+ *
+ * More unfortunately, a small inline wrapper does not help,
+ * because it seems that together with the elimination of the wrapper,
+ * its nonnull attribute seems to be eliminated alltogether.
+ * I don't know whether this is a bug or a feature (or just a weakness).
+ *
+ * Following is a small hack which solves the problem at least for gcc 4.7.
+ *
+ * In order to be useful, the -fdelete-null-pointer-checks must be set.
+ * Since BRICK is superuser-only anyway, enabling this for MARS should not
+ * be a security risk
+ * (c.f. upstream kernel commit a3ca86aea507904148870946d599e07a340b39bf)
+ */
+extern inline
+void *brick_mark_nonnull(void *_ptr)
+{
+	char *ptr = _ptr;
+
+	/*  fool gcc to believe that the pointer were dereferenced... */
+	asm("" : : "X" (*ptr));
+	return ptr;
+}
+
+/***********************************************************************/
+
+/*  small memory allocation (use this only for len < PAGE_SIZE) */
+
+#define brick_mem_alloc(_len_)						\
+	({								\
+		void *_res_ = _brick_mem_alloc(_len_, __LINE__);	\
+		brick_mark_nonnull(_res_);				\
+	})
+
+#define brick_zmem_alloc(_len_)						\
+	({								\
+		void *_res_ = _brick_mem_alloc(_len_, __LINE__);	\
+		_res_ = brick_mark_nonnull(_res_);			\
+		memset(_res_, 0, _len_);				\
+		_res_;							\
+	})
+
+#define brick_mem_free(_data_)						\
+	do {								\
+		if (_data_) {						\
+			_brick_mem_free(_data_, __LINE__);		\
+		}							\
+	} while (0)
+
+/*  don't use the following directly */
+extern void *_brick_mem_alloc(int len, int line) __attribute__((malloc)) __attribute__((alloc_size(1)));
+extern void _brick_mem_free(void *data, int line);
+
+/***********************************************************************/
+
+/*  string memory allocation */
+
+#define BRICK_STRING_LEN		1024 /* default value when len == 0 */
+
+#define brick_string_alloc(_len_)					\
+	({								\
+		char *_res_ = _brick_string_alloc((_len_), __LINE__);	\
+		(char *)brick_mark_nonnull(_res_);			\
+	})
+
+#define brick_strndup(_orig_, _len_)					\
+	({								\
+		char *_res_ = _brick_string_alloc((_len_) + 1, __LINE__);\
+		_res_ = brick_mark_nonnull(_res_);			\
+		strncpy(_res_, (_orig_), (_len_) + 1);			\
+		/* always null-terminate for safety */			\
+		_res_[_len_] = '\0';					\
+		(char *)brick_mark_nonnull(_res_);			\
+	})
+
+#define brick_strdup(_orig_)						\
+	({								\
+		int _len_ = strlen(_orig_);				\
+		char *_res_ = _brick_string_alloc((_len_) + 1, __LINE__);\
+		_res_ = brick_mark_nonnull(_res_);			\
+		strncpy(_res_, (_orig_), (_len_) + 1);			\
+		(char *)brick_mark_nonnull(_res_);			\
+	})
+
+#define brick_string_free(_data_)					\
+	do {								\
+		if (_data_) {						\
+			_brick_string_free(_data_, __LINE__);		\
+		}							\
+	} while (0)
+
+/*  don't use the following directly */
+extern char *_brick_string_alloc(int len, int line) __attribute__((malloc));
+extern void _brick_string_free(const char *data, int line);
+
+/***********************************************************************/
+
+/*  block memory allocation (for aligned multiples of 512 resp PAGE_SIZE) */
+
+#define brick_block_alloc(_pos_, _len_)					\
+	({								\
+		void *_res_ = _brick_block_alloc((_pos_), (_len_), __LINE__);\
+		brick_mark_nonnull(_res_);				\
+	})
+
+#define brick_block_free(_data_, _len_)					\
+	do {								\
+		if (_data_) {						\
+			_brick_block_free((_data_), (_len_), __LINE__); \
+		}							\
+	} while (0)
+
+extern struct page *brick_iomap(void *data, int *offset, int *len);
+
+/*  don't use the following directly */
+extern void *_brick_block_alloc(loff_t pos, int len, int line) __attribute__((malloc)) __attribute__((alloc_size(2)));
+extern void _brick_block_free(void *data, int len, int cline);
+
+/***********************************************************************/
+
+/*  reservations / preallocation */
+
+#define BRICK_MAX_ORDER			11
+
+#ifdef CONFIG_MARS_MEM_PREALLOC
+extern int brick_allow_freelist;
+
+extern int brick_pre_reserve[BRICK_MAX_ORDER+1];
+extern int brick_mem_freelist_max[BRICK_MAX_ORDER+1];
+extern int brick_mem_alloc_count[BRICK_MAX_ORDER+1];
+extern int brick_mem_alloc_max[BRICK_MAX_ORDER+1];
+
+extern int brick_mem_reserve(void);
+
+#endif
+
+extern void brick_mem_statistics(bool final);
+
+/***********************************************************************/
+
+/*  init */
+
+extern int init_brick_mem(void);
+extern void exit_brick_mem(void);
+
+#endif
-- 
2.6.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ