linux-kernel - [Patch 09/18] fs/logfs/gc.c

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20070603184604.GJ8952@lazybastard.org>
Date:	Sun, 3 Jun 2007 20:46:04 +0200
From:	Jörn Engel <joern@...ybastard.org>
To:	linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
	linux-mtd@...ts.infradead.org
Cc:	akpm@...l.org, Sam Ravnborg <sam@...nborg.org>,
	John Stoffel <john@...ffel.org>,
	David Woodhouse <dwmw2@...radead.org>,
	Jamie Lokier <jamie@...reable.org>,
	Artem Bityutskiy <dedekind@...radead.org>,
	CaT <cat@....com.au>, Jan Engelhardt <jengelh@...ux01.gwdg.de>,
	Evgeniy Polyakov <johnpol@....mipt.ru>,
	David Weinehall <tao@....umu.se>,
	Arnd Bergmann <arnd@...db.de>, Willy Tarreau <w@....eu>,
	Kyle Moffett <mrmacman_g4@....com>,
	Dongjun Shin <djshin90@...il.com>, Pavel Machek <pavel@....cz>,
	Bill Davidsen <davidsen@....com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Albert Cahalan <acahalan@...il.com>,
	Pekka Enberg <penberg@...helsinki.fi>,
	Roland Dreier <rdreier@...co.com>,
	Ondrej Zajicek <santiago@...reenet.org>,
	Ulisses Furquim <ulissesf@...il.com>
Subject: [Patch 09/18] fs/logfs/gc.c

--- /dev/null	2007-03-13 19:15:28.862769062 +0100
+++ linux-2.6.21logfs/fs/logfs/gc.c	2007-06-03 19:18:57.000000000 +0200
@@ -0,0 +1,352 @@
+/*
+ * fs/logfs/gc.c	- garbage collection code
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2005-2007 Joern Engel
+ */
+#include "logfs.h"
+
+#if 0
+/*
+ * When deciding which segment to use next, calculate the resistance
+ * of each segment and pick the lowest.  Segments try to resist usage
+ * if
+ * o they are full,
+ * o they have a high erase count or
+ * o they have recently been written.
+ *
+ * Full segments should not get reused, as there is little space to
+ * gain from them.  Segments with high erase count should be left
+ * aside as they can wear out sooner than others.  Freshly-written
+ * segments contain many blocks that will get obsoleted fairly soon,
+ * so it helps to wait a little before reusing them.
+ *
+ * Total resistance is expressed in erase counts.  Formula is:
+ *
+ * R = EC + K1*F + K2*e^(-t/theta)
+ *
+ * R:	Resistance
+ * EC:	Erase count
+ * K1:	Constant, 10,000 might be a good value
+ * K2:	Constant,  1,000 might be a good value
+ * F:	Segment fill level
+ * t:	Time since segment was written to (in number of segments written)
+ * theta: Time constant.  Total number of segments might be a good value
+ *
+ * Since the kernel is not allowed to use floating point, the function
+ * decay() is used to approximate exponential decay in fixed point.
+ */
+static long decay(long t0, long t, long theta)
+{
+	long shift, fac;
+
+	if (t >= 32*theta)
+		return 0;
+
+	shift = t/theta;
+	fac = theta - (t%theta)/2;
+	return (t0 >> shift) * fac / theta;
+}
+#endif
+
+/* Only valid objects are accounted as valid bytes, including their headers */
+static u32 logfs_valid_bytes(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_object_header h;
+	u64 ofs, ino, pos;
+	u32 seg_ofs, valid, size;
+	void *reserved;
+	int i, err;
+
+	/* Some segments are reserved.  Just pretend they were all valid */
+	reserved = btree_lookup(&super->s_reserved_segments, segno);
+	if (reserved)
+		return super->s_segsize;
+
+	/* Currently open segments */
+	/* FIXME: just reserve open areas and remove this code */
+	for (i=0; i<LOGFS_NO_AREAS; i++) {
+		struct logfs_area *area = super->s_area[i];
+		if (area->a_is_open && (area->a_segno == segno)) {
+			return super->s_segsize;
+		}
+	}
+
+	err = device_read(sb, segno, 0, sizeof(h), &h);
+	BUG_ON(err);
+	if (!memchr_inv(&h, 0xff, sizeof(h)))
+		return 0;
+
+	valid = 0;
+	for (seg_ofs = sizeof(h); seg_ofs + sizeof(h) < super->s_segsize; ) {
+		err = device_read(sb, segno, seg_ofs, sizeof(h), &h);
+		BUG_ON(err);
+		if (!memchr_inv(&h, 0xff, sizeof(h)))
+			break;
+
+		ofs = dev_ofs(sb, segno, seg_ofs);
+		ino = be64_to_cpu(h.ino);
+		pos = be64_to_cpu(h.pos);
+		size = (u32)be16_to_cpu(h.len) + sizeof(h);
+		if (logfs_is_valid_block(sb, ofs, ino, pos))
+			valid += size;
+		seg_ofs += size;
+	}
+	pr_debug(KERN_INFO "LOGFS valid(%x) = %x\n", segno, valid);
+	return valid;
+}
+
+static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
+		u64 pos, int level)
+{
+	struct inode *inode;
+	int err, cookie;
+
+	inode = logfs_iget(sb, ino, &cookie);
+	BUG_ON(!inode);
+	err = logfs_rewrite_block(inode, pos, ofs, level);
+	BUG_ON(err);
+	logfs_iput(inode, cookie);
+}
+
+static void __logfs_gc_segment(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct logfs_object_header h;
+	struct logfs_segment_header *sh;
+	u64 ofs, ino, pos;
+	u32 seg_ofs;
+	int level, err;
+
+	err = device_read(sb, segno, 0, sizeof(h), &h);
+	BUG_ON(err);
+	sh = (void*)&h;
+	level = sh->level;
+
+	for (seg_ofs = sizeof(h); seg_ofs + sizeof(h) < super->s_segsize; ) {
+		ofs = dev_ofs(sb, segno, seg_ofs);
+		err = device_read(sb, segno, seg_ofs, sizeof(h), &h);
+		BUG_ON(err);
+		ino = be64_to_cpu(h.ino);
+		pos = be64_to_cpu(h.pos);
+		if (logfs_is_valid_block(sb, ofs, ino, pos))
+			logfs_cleanse_block(sb, ofs, ino, pos, level);
+		seg_ofs += sizeof(h);
+		seg_ofs += be16_to_cpu(h.len);
+	}
+}
+
+static void logfs_gc_segment(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i;
+	void *reserved;
+
+	/* Some segments are reserved.  Just pretend they were all valid */
+	reserved = btree_lookup(&super->s_reserved_segments, segno);
+	LOGFS_BUG_ON(reserved, sb);
+
+	/* Currently open segments */
+	for (i=0; i<LOGFS_NO_AREAS; i++) {
+		struct logfs_area *area = super->s_area[i];
+		BUG_ON(area->a_is_open && (area->a_segno == segno));
+	}
+	__logfs_gc_segment(sb, segno);
+}
+
+static void __add_segment(struct list_head *list, int *count, u32 segno,
+		int valid)
+{
+	struct gc_candidate *cand;
+
+	cand = kzalloc(sizeof(*cand), GFP_KERNEL);
+	if (!cand)
+		return;
+
+	cand->segno = segno;
+	cand->valid = valid;
+	list_add(&cand->list, list);
+	*count += 1;
+}
+
+static void add_segment(struct list_head *list, int *count, u32 segno,
+		int valid)
+{
+	struct gc_candidate *cand;
+
+	list_for_each_entry(cand, list, list)
+		if (cand->segno == segno)
+			return;
+	__add_segment(list, count, segno, valid);
+}
+
+static void del_segment(struct list_head *list, int *count, u32 segno)
+{
+	struct gc_candidate *cand;
+
+	list_for_each_entry(cand, list, list)
+		if (cand->segno == segno) {
+			list_del(&cand->list);
+			*count -= 1;
+			kfree(cand);
+			return;
+		}
+}
+
+static void add_free_segment(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	add_segment(&super->s_free_list, &super->s_free_count, segno, 0);
+}
+
+static void add_low_segment(struct super_block *sb, u32 segno, int valid)
+{
+	struct logfs_super *super = logfs_super(sb);
+	add_segment(&super->s_low_list, &super->s_low_count, segno, valid);
+}
+
+static void del_low_segment(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	del_segment(&super->s_low_list, &super->s_low_count, segno);
+}
+
+static void scan_segment(struct super_block *sb, u32 segno)
+{
+	struct logfs_super *super = logfs_super(sb);
+	u32 full = super->s_segsize - LOGFS_SEGMENT_RESERVE;
+	int valid;
+
+	valid = logfs_valid_bytes(sb, segno);
+	if (valid == 0) {
+		del_low_segment(sb, segno);
+		add_free_segment(sb, segno);
+	} else if (valid < full)
+		add_low_segment(sb, segno, valid);
+}
+
+static void logfs_scan_pass(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i;
+
+	for (i = super->s_sweeper+1; i != super->s_sweeper; i++) {
+		if (i >= super->s_no_segs)
+			i = 0;
+
+		scan_segment(sb, i);
+
+		if (super->s_free_count >= super->s_total_levels) {
+			super->s_sweeper = i;
+			return;
+		}
+	}
+	scan_segment(sb, super->s_sweeper);
+}
+
+static void logfs_gc_once(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	struct gc_candidate *cand, *next;
+	unsigned min_valid = super->s_segsize;
+	u32 segno;
+
+	BUG_ON(list_empty(&super->s_low_list));
+	list_for_each_entry_safe(cand, next, &super->s_low_list, list) {
+		if (cand->valid >= min_valid)
+			continue;
+		min_valid = cand->valid;
+		list_del(&cand->list);
+		list_add(&cand->list, &super->s_low_list);
+	}
+
+	cand = list_entry(super->s_low_list.next, struct gc_candidate, list);
+	list_del(&cand->list);
+	super->s_low_count -= 1;
+
+	segno = cand->segno;
+	logfs_gc_segment(sb, segno);
+	kfree(cand);
+	add_free_segment(sb, segno);
+}
+
+/* GC all the low-count segments.  If necessary, rescan the medium.
+ * If we made enough room, return */
+static void logfs_gc_several(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int rounds;
+
+	rounds = super->s_low_count;
+
+	for (; rounds; rounds--) {
+		if (super->s_free_count >= super->s_total_levels)
+			return;
+		if (super->s_free_count < 3)
+			logfs_scan_pass(sb);
+		logfs_gc_once(sb);
+	}
+}
+
+/*
+ * In principle, this function should loop forever, looking for GC candidates
+ * and moving data.  LogFS is designed in such a way that this loop is
+ * guaranteed to terminate.
+ *
+ * Limiting the loop to four iterations serves purely to catch cases when
+ * these guarantees have failed.  An actual endless loop is an obvious bug
+ * and should be reported as such.
+ *
+ * But there is another nasty twist to this.  As I have described in my LCA
+ * presentation, Garbage collection would have to limit itself to higher
+ * levels if the number of available free segments goes down.  This code
+ * doesn't and should fail spectacularly.  Yet - hard as I tried I haven't
+ * been able to make it fail (short of a bug elsewhere).
+ *
+ * So in a way this code is intentionally wrong as a desperate cry for a
+ * better testcase.  And I do expect to get blamed for it one day. :(
+ */
+void logfs_gc_pass(struct super_block *sb)
+{
+	struct logfs_super *super = logfs_super(sb);
+	int i;
+
+	for (i=4; i; i--) {
+		if (super->s_free_count >= super->s_total_levels)
+			return;
+		logfs_scan_pass(sb);
+
+		if (super->s_free_count >= super->s_total_levels)
+			return;
+		logfs_gc_several(sb);
+		cond_resched();
+	}
+	logfs_fsck(sb);
+	LOGFS_BUG(sb);
+}
+
+int logfs_init_gc(struct logfs_super *super)
+{
+	INIT_LIST_HEAD(&super->s_free_list);
+	INIT_LIST_HEAD(&super->s_low_list);
+	super->s_free_count = 0;
+	super->s_low_count = 0;
+
+	return 0;
+}
+
+void logfs_cleanup_gc(struct logfs_super *super)
+{
+	struct gc_candidate *cand, *next;
+
+	list_for_each_entry_safe(cand, next, &super->s_free_list, list) {
+		list_del(&cand->list);
+		kfree(cand);
+	}
+	list_for_each_entry_safe(cand, next, &super->s_low_list, list) {
+		list_del(&cand->list);
+		kfree(cand);
+	}
+}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/