lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1520705944-6723-79-git-send-email-jix024@eng.ucsd.edu>
Date:   Sat, 10 Mar 2018 10:18:59 -0800
From:   Andiry Xu <jix024@....ucsd.edu>
To:     linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-nvdimm@...ts.01.org
Cc:     dan.j.williams@...el.com, andy.rudoff@...el.com,
        coughlan@...hat.com, swanson@...ucsd.edu, david@...morbit.com,
        jack@...e.com, swhiteho@...hat.com, miklos@...redi.hu,
        andiry.xu@...il.com, Andiry Xu <jix024@...ucsd.edu>
Subject: [RFC v2 78/83] GC: Thorough garbage collection.

From: Andiry Xu <jix024@...ucsd.edu>

After fast gc, if the valid log entries still account for less
than the half of the log size, NOVA starts thorough garbage collection,
allocates a new log, copies the live log entries to it, and switches
to the new log atomically. The radix tree needs to be updated to point
to the new log.

Example:
I = Invalid, V = Valid

VIIV -> IIII -> VVII

     ||
     ||  fast gc
     \/

VIIV -> VVII

     ||
     ||  thorough gc
     \/

    VVVV

Signed-off-by: Andiry Xu <jix024@...ucsd.edu>
---
 fs/nova/gc.c | 273 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 273 insertions(+)

diff --git a/fs/nova/gc.c b/fs/nova/gc.c
index 1634c04..d74286e 100644
--- a/fs/nova/gc.c
+++ b/fs/nova/gc.c
@@ -18,6 +18,62 @@
 #include "nova.h"
 #include "inode.h"
 
+static bool curr_log_entry_invalid(struct super_block *sb,
+	struct nova_inode *pi, struct nova_inode_info_header *sih,
+	u64 curr_p, size_t *length)
+{
+	struct nova_file_write_entry *entry;
+	struct nova_dentry *dentry;
+	struct nova_setattr_logentry *setattr_entry;
+	struct nova_link_change_entry *linkc_entry;
+	void *entryc;
+	u8 type;
+	bool ret = true;
+
+	entryc = (void *)nova_get_block(sb, curr_p);
+	type = nova_get_entry_type(entryc);
+
+	switch (type) {
+	case SET_ATTR:
+		setattr_entry = (struct nova_setattr_logentry *) entryc;
+		if (setattr_entry->invalid == 0)
+			ret = false;
+		*length = sizeof(struct nova_setattr_logentry);
+		break;
+	case LINK_CHANGE:
+		linkc_entry = (struct nova_link_change_entry *) entryc;
+		if (linkc_entry->invalid == 0)
+			ret = false;
+		*length = sizeof(struct nova_link_change_entry);
+		break;
+	case FILE_WRITE:
+		entry = (struct nova_file_write_entry *) entryc;
+		if (entry->num_pages != entry->invalid_pages)
+			ret = false;
+		*length = sizeof(struct nova_file_write_entry);
+		break;
+	case DIR_LOG:
+		dentry = (struct nova_dentry *) entryc;
+		if (dentry->invalid == 0)
+			ret = false;
+		if (sih->last_dentry == curr_p)
+			ret = false;
+		*length = le16_to_cpu(dentry->de_len);
+		break;
+	case NEXT_PAGE:
+		/* No more entries in this page */
+		*length = PAGE_SIZE - ENTRY_LOC(curr_p);
+		break;
+	default:
+		nova_dbg("%s: unknown type %d, 0x%llx\n",
+					__func__, type, curr_p);
+		NOVA_ASSERT(0);
+		*length = PAGE_SIZE - ENTRY_LOC(curr_p);
+		break;
+	}
+
+	return ret;
+}
 
 static bool curr_page_invalid(struct super_block *sb,
 	struct nova_inode *pi, struct nova_inode_info_header *sih,
@@ -68,6 +124,210 @@ static void free_curr_page(struct super_block *sb,
 			nova_get_blocknr(sb, curr_head, btype), 1);
 }
 
+static int nova_gc_assign_file_entry(struct super_block *sb,
+	struct nova_inode_info_header *sih,
+	struct nova_file_write_entry *old_entry,
+	struct nova_file_write_entry *new_entry)
+{
+	struct nova_file_write_entry *temp;
+	void **pentry;
+	unsigned long start_pgoff = old_entry->pgoff;
+	unsigned int num = old_entry->num_pages;
+	unsigned long curr_pgoff;
+	int i;
+	int ret = 0;
+
+	for (i = 0; i < num; i++) {
+		curr_pgoff = start_pgoff + i;
+
+		pentry = radix_tree_lookup_slot(&sih->tree, curr_pgoff);
+		if (pentry) {
+			temp = radix_tree_deref_slot(pentry);
+			if (temp == old_entry)
+				radix_tree_replace_slot(&sih->tree, pentry,
+							new_entry);
+		}
+	}
+
+	return ret;
+}
+
+static int nova_gc_assign_dentry(struct super_block *sb,
+	struct nova_inode_info_header *sih, struct nova_dentry *old_dentry,
+	struct nova_dentry *new_dentry)
+{
+	struct nova_dentry *temp;
+	void **pentry;
+	unsigned long hash;
+	int ret = 0;
+
+	hash = BKDRHash(old_dentry->name, old_dentry->name_len);
+	nova_dbgv("%s: assign %s hash %lu\n", __func__,
+			old_dentry->name, hash);
+
+	/* FIXME: hash collision ignored here */
+	pentry = radix_tree_lookup_slot(&sih->tree, hash);
+	if (pentry) {
+		temp = radix_tree_deref_slot(pentry);
+		if (temp == old_dentry)
+			radix_tree_replace_slot(&sih->tree, pentry, new_dentry);
+	}
+
+	return ret;
+}
+
+static int nova_gc_assign_new_entry(struct super_block *sb,
+	struct nova_inode *pi, struct nova_inode_info_header *sih,
+	u64 curr_p, u64 new_curr)
+{
+	struct nova_file_write_entry *old_entry, *new_entry;
+	struct nova_dentry *old_dentry, *new_dentry;
+	void *addr, *new_addr;
+	u8 type;
+	int ret = 0;
+
+	addr = (void *)nova_get_block(sb, curr_p);
+	type = nova_get_entry_type(addr);
+	switch (type) {
+	case SET_ATTR:
+		sih->last_setattr = new_curr;
+		break;
+	case LINK_CHANGE:
+		sih->last_link_change = new_curr;
+		break;
+	case FILE_WRITE:
+		new_addr = (void *)nova_get_block(sb, new_curr);
+		old_entry = (struct nova_file_write_entry *)addr;
+		new_entry = (struct nova_file_write_entry *)new_addr;
+		ret = nova_gc_assign_file_entry(sb, sih, old_entry, new_entry);
+		break;
+	case DIR_LOG:
+		new_addr = (void *)nova_get_block(sb, new_curr);
+		old_dentry = (struct nova_dentry *)addr;
+		new_dentry = (struct nova_dentry *)new_addr;
+		if (sih->last_dentry == curr_p)
+			sih->last_dentry = new_curr;
+		ret = nova_gc_assign_dentry(sb, sih, old_dentry, new_dentry);
+		break;
+	default:
+		nova_dbg("%s: unknown type %d, 0x%llx\n",
+					__func__, type, curr_p);
+		NOVA_ASSERT(0);
+		break;
+	}
+
+	return ret;
+}
+
+/* Copy live log entries to the new log and atomically replace the old log */
+static unsigned long nova_inode_log_thorough_gc(struct super_block *sb,
+	struct nova_inode *pi, struct nova_inode_info_header *sih,
+	unsigned long blocks, unsigned long checked_pages)
+{
+	struct nova_inode_log_page *curr_page = NULL;
+	size_t length;
+	u64 curr_p, new_curr;
+	u64 old_curr_p;
+	u64 tail_block;
+	u64 old_head;
+	u64 new_head = 0;
+	u64 next;
+	int allocated;
+	int extended = 0;
+	int ret;
+	timing_t gc_time;
+
+	NOVA_START_TIMING(thorough_gc_t, gc_time);
+
+	curr_p = sih->log_head;
+	old_curr_p = curr_p;
+	old_head = sih->log_head;
+	nova_dbg_verbose("Log head 0x%llx, tail 0x%llx\n",
+				curr_p, sih->log_tail);
+	if (curr_p == 0 && sih->log_tail == 0)
+		goto out;
+
+	if (curr_p >> PAGE_SHIFT == sih->log_tail >> PAGE_SHIFT)
+		goto out;
+
+	allocated = nova_allocate_inode_log_pages(sb, sih, blocks,
+					&new_head, ANY_CPU, 0);
+	if (allocated != blocks) {
+		nova_err(sb, "%s: ERROR: no inode log page available\n",
+					__func__);
+		goto out;
+	}
+
+	new_curr = new_head;
+	while (curr_p != sih->log_tail) {
+		old_curr_p = curr_p;
+		if (goto_next_page(sb, curr_p))
+			curr_p = next_log_page(sb, curr_p);
+
+		if (curr_p >> PAGE_SHIFT == sih->log_tail >> PAGE_SHIFT) {
+			/* Don't recycle tail page */
+			break;
+		}
+
+		length = 0;
+		ret = curr_log_entry_invalid(sb, pi, sih, curr_p, &length);
+		if (!ret) {
+			extended = 0;
+			new_curr = nova_get_append_head(sb, pi, sih,
+						new_curr, length, MAIN_LOG,
+						1, &extended);
+			if (extended)
+				blocks++;
+			/* Copy entry to the new log */
+			memcpy_to_pmem_nocache(nova_get_block(sb, new_curr),
+				nova_get_block(sb, curr_p), length);
+			nova_inc_page_num_entries(sb, new_curr);
+			nova_gc_assign_new_entry(sb, pi, sih, curr_p, new_curr);
+			new_curr += length;
+		}
+
+		curr_p += length;
+	}
+
+	/* Step 1: Link new log to the tail block */
+	tail_block = BLOCK_OFF(sih->log_tail);
+	curr_page = (struct nova_inode_log_page *)nova_get_block(sb,
+							BLOCK_OFF(new_curr));
+	next = next_log_page(sb, new_curr);
+	if (next > 0)
+		nova_free_contiguous_log_blocks(sb, sih, next);
+
+	nova_set_next_page_flag(sb, new_curr);
+	nova_set_next_page_address(sb, curr_page, tail_block, 0);
+
+	/* Step 2: Atomically switch to the new log */
+	pi->log_head = new_head;
+	nova_persist_inode(pi);
+	nova_flush_buffer(pi, sizeof(struct nova_inode), 1);
+	sih->log_head = new_head;
+
+	/* Step 3: Unlink the old log */
+	curr_page = (struct nova_inode_log_page *)nova_get_block(sb,
+							BLOCK_OFF(old_curr_p));
+	next = next_log_page(sb, old_curr_p);
+	if (next != tail_block)
+		nova_err(sb, "Old log error: old curr_p 0x%lx, next 0x%lx ",
+			"curr_p 0x%lx, tail block 0x%lx\n", old_curr_p,
+			next, curr_p, tail_block);
+
+	nova_set_next_page_address(sb, curr_page, 0, 1);
+
+	/* Step 4: Free the old log */
+	nova_free_contiguous_log_blocks(sb, sih, old_head);
+
+	sih->log_pages = sih->log_pages + blocks - checked_pages;
+	NOVA_STATS_ADD(thorough_gc_pages, checked_pages - blocks);
+	NOVA_STATS_ADD(thorough_checked_pages, checked_pages);
+out:
+	NOVA_END_TIMING(thorough_gc_t, gc_time);
+	return blocks;
+}
+
 
 /*
  * Scan pages in the log and remove those with no valid log entries.
@@ -178,9 +438,22 @@ int nova_inode_log_fast_gc(struct super_block *sb,
 	if (sih->num_entries == 0)
 		return 0;
 
+	/* Estimate how many pages worth of valid entries the log contains.
+	 *
+	 * If it is less than half the number pages that remain in the log,
+	 * compress them with thorough gc.
+	 */
 	blocks = (sih->valid_entries * checked_pages) / sih->num_entries;
 	if ((sih->valid_entries * checked_pages) % sih->num_entries)
 		blocks++;
 
+	if (force_thorough || (blocks && blocks * 2 < checked_pages)) {
+		nova_dbgv("Thorough GC for inode %lu: checked pages %lu, valid pages %lu\n",
+				sih->ino,
+				checked_pages, blocks);
+		blocks = nova_inode_log_thorough_gc(sb, pi, sih,
+							blocks, checked_pages);
+	}
+
 	return 0;
 }
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ