lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 25 Jul 2016 23:47:24 -0400 (EDT)
From:	George Amvrosiadis <gamvrosi@...il.com>
To:	Andrew Morton <akpm@...ux-foundation.org>,
	Al Viro <viro@...iv.linux.org.uk>,
	linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
	linux-mm@...ck.org
Cc:	George Amvrosiadis <gamvrosi@...il.com>
Subject: [PATCH 3/3] mm/duet: framework code

The Duet framework code:

- bittree.c: red-black bitmap tree that keeps track of items of interest
- debug.c: functions used to print information used to debug Duet
- hash.c: implementation of the global hash table where page events are stored
  for all tasks
- hook.c: the function invoked by the page cache hooks when Duet is online
- init.c: routines used to bring Duet online or offline
- path.c: routines performing resolution of UUIDs to paths using d_path
- task.c: implementation of Duet task fd operations

Signed-off-by: George Amvrosiadis <gamvrosi@...il.com>
---
 init/Kconfig      |   2 +
 mm/Makefile       |   1 +
 mm/duet/Kconfig   |  31 +++
 mm/duet/Makefile  |   7 +
 mm/duet/bittree.c | 537 +++++++++++++++++++++++++++++++++++++++++++++++++
 mm/duet/common.h  | 211 ++++++++++++++++++++
 mm/duet/debug.c   |  98 +++++++++
 mm/duet/hash.c    | 315 +++++++++++++++++++++++++++++
 mm/duet/hook.c    |  81 ++++++++
 mm/duet/init.c    | 172 ++++++++++++++++
 mm/duet/path.c    | 184 +++++++++++++++++
 mm/duet/syscall.h |  61 ++++++
 mm/duet/task.c    | 584 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 13 files changed, 2284 insertions(+)
 create mode 100644 mm/duet/Kconfig
 create mode 100644 mm/duet/Makefile
 create mode 100644 mm/duet/bittree.c
 create mode 100644 mm/duet/common.h
 create mode 100644 mm/duet/debug.c
 create mode 100644 mm/duet/hash.c
 create mode 100644 mm/duet/hook.c
 create mode 100644 mm/duet/init.c
 create mode 100644 mm/duet/path.c
 create mode 100644 mm/duet/syscall.h
 create mode 100644 mm/duet/task.c

diff --git a/init/Kconfig b/init/Kconfig
index c02d897..6f94b5a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -294,6 +294,8 @@ config USELIB
 	  earlier, you may need to enable this syscall.  Current systems
 	  running glibc can safely disable this.
 
+source mm/duet/Kconfig
+
 config AUDIT
 	bool "Auditing support"
 	depends on NET
diff --git a/mm/Makefile b/mm/Makefile
index 78c6f7d..074c15f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -99,3 +99,4 @@ obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
 obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
 obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
 obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
+obj-$(CONFIG_DUET) += duet/
diff --git a/mm/duet/Kconfig b/mm/duet/Kconfig
new file mode 100644
index 0000000..2f3a0c5
--- /dev/null
+++ b/mm/duet/Kconfig
@@ -0,0 +1,31 @@
+config DUET
+	bool "Duet framework support"
+
+	help
+	  Duet is a framework aiming to reduce the IO footprint of analytics
+	  and maintenance work. By exposing page cache events to these tasks,
+	  it allows them to adapt their data processing order, in order to
+	  benefit from data available in the page cache. Duet's operation is
+	  based on hooks into the page cache.
+
+	  To compile support for Duet, say Y.
+
+config DUET_STATS
+	bool "Duet statistics collection"
+	depends on DUET
+	help
+	  This option enables support for the collection of statistics on the
+	  operation of Duet. It will print information about the data structures
+	  used internally, and profiling information about the framework.
+
+	  If unsure, say N.
+
+config DUET_DEBUG
+	bool "Duet debugging support"
+	depends on DUET
+	help
+	  Enable runtime debugging support for the Duet framework. This may
+	  enable additional and expensive checks with negative impact on
+	  performance.
+
+	  To compile debugging support for Duet, say Y. If unsure, say N.
diff --git a/mm/duet/Makefile b/mm/duet/Makefile
new file mode 100644
index 0000000..c0c9e11
--- /dev/null
+++ b/mm/duet/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the linux Duet framework.
+#
+
+obj-$(CONFIG_DUET) += duet.o
+
+duet-y := init.o hash.o hook.o task.o bittree.o path.o debug.o
diff --git a/mm/duet/bittree.c b/mm/duet/bittree.c
new file mode 100644
index 0000000..3b20c35
--- /dev/null
+++ b/mm/duet/bittree.c
@@ -0,0 +1,537 @@
+/*
+ * Copyright (C) 2016 George Amvrosiadis.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include "common.h"
+
+#define BMAP_READ	0x01	/* Read bmaps (overrides other flags) */
+#define BMAP_CHECK	0x02	/* Check given bmap value expression */
+				/* Sets bmaps to match expression if not set */
+
+/* Bmap expressions can be formed using the following flags: */
+#define BMAP_DONE_SET	0x04	/* Set done bmap values */
+#define BMAP_DONE_RST	0x08	/* Reset done bmap values */
+#define BMAP_RELV_SET	0x10	/* Set relevant bmap values */
+#define BMAP_RELV_RST	0x20	/* Reset relevant bmap values */
+#define BMAP_SEEN_SET	0x40	/* Set seen bmap values */
+#define BMAP_SEEN_RST	0x80	/* Reset seen bmap values */
+
+/* Some macros to make our life easier */
+#define BMAP_ALL_SET	(BMAP_SEEN_SET | BMAP_RELV_SET | BMAP_DONE_SET)
+#define BMAP_ALL_RST	(BMAP_SEEN_RST | BMAP_RELV_RST | BMAP_DONE_RST)
+
+#define BITTREE_RANGE	PAGE_SIZE	/* Bytes per bitmap bit */
+#define BITS_PER_NODE	(32768 * 8)	/* 32KB bitmaps */
+
+#define UUID_IDX(uuid)	(((unsigned long long) uuid.gen << 32) | \
+			  (unsigned long long) uuid.ino)
+/*
+ * The following functions are wrappers for the basic bitmap functions.
+ * A bitmap is characterized by a starting offset (bstart). The wrappers
+ * translate an arbitrary idx to the appropriate bit.
+ */
+
+/* Sets (or resets) a single bit */
+static int bmap_set(unsigned long *bmap, __u64 start, __u64 idx, __u8 do_set)
+{
+	__u64 bofft = idx - start;
+
+	if (bofft + 1 >= start + (BITS_PER_NODE * BITTREE_RANGE))
+		return -1;
+
+	/* Convert range to bitmap granularity */
+	do_div(bofft, BITTREE_RANGE);
+
+	if (do_set)
+		bitmap_set(bmap, (unsigned int)bofft, 1);
+	else
+		bitmap_clear(bmap, (unsigned int)bofft, 1);
+
+	return 0;
+}
+
+/* Returns value of bit at idx */
+static int bmap_read(unsigned long *bmap, __u64 start, __u64 idx)
+{
+	__u64 bofft64 = idx - start;
+	unsigned long *p, mask;
+	unsigned int bofft;
+
+	if (bofft64 + 1 >= start + (BITS_PER_NODE * BITTREE_RANGE))
+		return -1;
+
+	/* Convert offset to bitmap granularity */
+	do_div(bofft64, BITTREE_RANGE);
+	bofft = (unsigned int)bofft64;
+
+	/* Check the bits */
+	p = bmap + BIT_WORD(bofft);
+	mask = BITMAP_FIRST_WORD_MASK(bofft) & BITMAP_LAST_WORD_MASK(bofft + 1);
+
+	if ((*p) & mask)
+		return 1;
+
+	return 0;
+}
+
+/* Checks whether a bit is set */
+static int bmap_chk(unsigned long *bmap, __u64 start, __u64 idx, __u8 do_set)
+{
+	__u64 bofft64 = idx - start;
+	unsigned long *p, mask;
+	unsigned int bofft;
+
+	if (bofft64 + 1 >= start + (BITS_PER_NODE * BITTREE_RANGE))
+		return -1;
+
+	/* Convert range to bitmap granularity */
+	do_div(bofft64, BITTREE_RANGE);
+
+	/* Now it is safe to cast these variables */
+	bofft = (unsigned int)bofft64;
+
+	/* Check the bit */
+	p = bmap + BIT_WORD(bofft);
+	mask = BITMAP_FIRST_WORD_MASK(bofft) & BITMAP_LAST_WORD_MASK(bofft + 1);
+
+	if (do_set && !((*p) & mask))
+		return 0;
+	else if (!do_set && !(~(*p) & mask))
+		return 0;
+
+	return 1;
+}
+
+/* Initializes a bitmap tree node */
+static struct bmap_rbnode *bnode_init(struct duet_bittree *bt, __u64 idx)
+{
+	struct bmap_rbnode *bnode = NULL;
+
+#ifdef CONFIG_DUET_STATS
+	if (bt) {
+		(bt->statcur)++;
+		if (bt->statcur > bt->statmax) {
+			bt->statmax = bt->statcur;
+			pr_info("duet: %llu BitTree nodes (%llub)\n",
+				bt->statmax, bt->statmax * BITS_PER_NODE / 8);
+		}
+	}
+#endif /* CONFIG_DUET_STATS */
+
+	bnode = kmalloc(sizeof(*bnode), GFP_NOWAIT);
+	if (!bnode)
+		return NULL;
+
+	bnode->done = kzalloc(sizeof(unsigned long) *
+			BITS_TO_LONGS(BITS_PER_NODE), GFP_NOWAIT);
+	if (!bnode->done) {
+		kfree(bnode);
+		return NULL;
+	}
+
+	/* Allocate relevant bitmap, if needed */
+	bnode->relv = kzalloc(sizeof(unsigned long) *
+		BITS_TO_LONGS(BITS_PER_NODE), GFP_NOWAIT);
+	if (!bnode->relv) {
+		kfree(bnode->done);
+		kfree(bnode);
+		return NULL;
+	}
+
+	bnode->seen = kzalloc(sizeof(unsigned long) *
+		BITS_TO_LONGS(BITS_PER_NODE), GFP_NOWAIT);
+	if (!bnode->seen) {
+		kfree(bnode->relv);
+		kfree(bnode->done);
+		kfree(bnode);
+		return NULL;
+	}
+
+	RB_CLEAR_NODE(&bnode->node);
+	bnode->idx = idx;
+	return bnode;
+}
+
+static void bnode_dispose(struct bmap_rbnode *bnode, struct rb_node *rbnode,
+	struct duet_bittree *bt)
+{
+#ifdef CONFIG_DUET_STATS
+	if (bt)
+		(bt->statcur)--;
+#endif /* CONFIG_DUET_STATS */
+	rb_erase(rbnode, &bt->root);
+	kfree(bnode->relv);
+	kfree(bnode->seen);
+	kfree(bnode->done);
+	kfree(bnode);
+}
+
+/*
+ * Traverses bitmap nodes to read/set/unset/check a specific bit across bitmaps.
+ * May insert/remove bitmap nodes as needed.
+ *
+ * If DUET_BMAP_READ is set:
+ * - the bitmap value for idx are read for one or all bitmaps
+ * Otherwise, if DUET_BMAP_CHECK flag is set:
+ * - return value 1 means the idx matches the given flags
+ * - return value 0 means the idx doesn't match the given flags
+ * Otherwise, if neither flag is set:
+ * - return value 0 means the idx was updated to match given flags
+ *
+ * In all cases, a return value -1 denotes an error.
+ */
+static int __update_tree(struct duet_bittree *bt, __u64 idx, __u8 flags)
+{
+	int found, ret, res;
+	__u64 node_offt, div_rem;
+	struct rb_node **link, *parent;
+	struct bmap_rbnode *bnode = NULL;
+	unsigned long iflags;
+
+	local_irq_save(iflags);
+	spin_lock(&bt->lock);
+
+	div64_u64_rem(idx, BITTREE_RANGE * BITS_PER_NODE, &div_rem);
+	node_offt = idx - div_rem;
+
+	/* Look up BitTree node */
+	found = 0;
+	link = &(bt->root).rb_node;
+	parent = NULL;
+
+	while (*link) {
+		parent = *link;
+		bnode = rb_entry(parent, struct bmap_rbnode, node);
+
+		if (bnode->idx > node_offt) {
+			link = &(*link)->rb_left;
+		} else if (bnode->idx < node_offt) {
+			link = &(*link)->rb_right;
+		} else {
+			found = 1;
+			break;
+		}
+	}
+
+	/* If we're just reading bitmap values, return them now */
+	if (flags & BMAP_READ) {
+		ret = 0;
+
+		if (!found)
+			goto done;
+
+		/* First read seen bit */
+		res = bmap_read(bnode->seen, bnode->idx, idx);
+		if (res == -1) {
+			ret = -1;
+			goto done;
+		}
+		ret |= res << 2;
+
+		/* Then read relevant bit */
+		res = bmap_read(bnode->relv, bnode->idx, idx);
+		if (res == -1) {
+			ret = -1;
+			goto done;
+		}
+		ret |= res << 1;
+
+		/* Read done bit */
+		res = bmap_read(bnode->done, bnode->idx, idx);
+		if (res == -1) {
+			ret = -1;
+			goto done;
+		}
+
+		ret |= res;
+		goto done;
+	}
+
+	/*
+	 * Take appropriate action based on whether we found the node
+	 * and whether we plan to update (SET/RST), or only CHECK it.
+	 *
+	 *   NULL  |       Found            !Found      |
+	 *  -------+------------------------------------+
+	 *    SET  |     Set Bits     |  Init new node  |
+	 *         |------------------+-----------------|
+	 *    RST  | Clear (dispose?) |     Nothing     |
+	 *  -------+------------------------------------+
+	 *
+	 *  CHECK  |       Found            !Found      |
+	 *  -------+------------------------------------+
+	 *    SET  |    Check Bits    |  Return false   |
+	 *         |------------------+-----------------|
+	 *    RST  |    Check Bits    |    Continue     |
+	 *  -------+------------------------------------+
+	 */
+
+	/* First handle setting (or checking set) bits */
+	if (flags & BMAP_ALL_SET) {
+		if (!found && !(flags & BMAP_CHECK)) {
+			/* Insert the new node */
+			bnode = bnode_init(bt, node_offt);
+			if (!bnode) {
+				ret = -1;
+				goto done;
+			}
+
+			rb_link_node(&bnode->node, parent, link);
+			rb_insert_color(&bnode->node, &bt->root);
+
+		} else if (!found && (flags & BMAP_CHECK)) {
+			/* Looking for set bits, node didn't exist */
+			ret = 0;
+			goto done;
+		}
+
+		/* Set the bits. Return -1 if something goes wrong. */
+		if (!(flags & BMAP_CHECK)) {
+			if ((flags & BMAP_SEEN_SET) &&
+			    bmap_set(bnode->seen, bnode->idx, idx, 1)) {
+				ret = -1;
+				goto done;
+			}
+
+			if ((flags & BMAP_RELV_SET) &&
+			    bmap_set(bnode->relv, bnode->idx, idx, 1)) {
+				ret = -1;
+				goto done;
+			}
+
+			if ((flags & BMAP_DONE_SET) &&
+			    bmap_set(bnode->done, bnode->idx, idx, 1)) {
+				ret = -1;
+				goto done;
+			}
+
+		/* Check the bits. Return if any bits are off */
+		} else {
+			if (flags & BMAP_SEEN_SET) {
+				ret = bmap_chk(bnode->seen, bnode->idx, idx, 1);
+				if (ret != 1)
+					goto done;
+			}
+
+			if (flags & BMAP_RELV_SET) {
+				ret = bmap_chk(bnode->relv, bnode->idx, idx, 1);
+				if (ret != 1)
+					goto done;
+			}
+
+			ret = bmap_chk(bnode->done, bnode->idx, idx, 1);
+			if (ret != 1)
+				goto done;
+		}
+	}
+
+	/* Now handle unsetting any bits */
+	if (found && (flags & BMAP_ALL_RST)) {
+		/* Clear the bits. Return -1 if something goes wrong. */
+		if (!(flags & BMAP_CHECK)) {
+			if ((flags & BMAP_SEEN_RST) &&
+			    bmap_set(bnode->seen, bnode->idx, idx, 0)) {
+				ret = -1;
+				goto done;
+			}
+
+			if ((flags & BMAP_RELV_RST) &&
+			    bmap_set(bnode->relv, bnode->idx, idx, 0)) {
+				ret = -1;
+				goto done;
+			}
+
+			if ((flags & BMAP_DONE_RST) &&
+			    bmap_set(bnode->done, bnode->idx, idx, 0)) {
+				ret = -1;
+				goto done;
+			}
+
+		/* Check the bits. Return if any bits are off */
+		} else {
+			if (flags & BMAP_SEEN_RST) {
+				ret = bmap_chk(bnode->seen, bnode->idx, idx, 0);
+				if (ret != 1)
+					goto done;
+			}
+
+			if (flags & BMAP_RELV_RST) {
+				ret = bmap_chk(bnode->relv, bnode->idx, idx, 0);
+				if (ret != 1)
+					goto done;
+			}
+
+			ret = bmap_chk(bnode->done, bnode->idx, idx, 0);
+			if (ret != 1)
+				goto done;
+		}
+
+		/* Dispose of the node if empty */
+		if (!(flags & BMAP_CHECK) &&
+		    bitmap_empty(bnode->done, BITS_PER_NODE) &&
+		    bitmap_empty(bnode->seen, BITS_PER_NODE) &&
+		    bitmap_empty(bnode->relv, BITS_PER_NODE))
+			bnode_dispose(bnode, parent, bt);
+	}
+
+	if (!(flags & BMAP_CHECK))
+		ret = 0;
+	else
+		ret = 1;
+
+done:
+	if (ret == -1)
+		pr_err("duet: blocks were not %s\n",
+			(flags & BMAP_READ) ? "read" :
+			((flags & BMAP_CHECK) ? "checked" : "modified"));
+	spin_unlock(&bt->lock);
+	local_irq_restore(iflags);
+	return ret;
+}
+
+/*
+ * Check if we have seen this inode before. If not, check if it is relevant.
+ * Then, check whether it's done.
+ */
+static int do_bittree_check(struct duet_bittree *bt, struct duet_uuid uuid,
+			    struct duet_task *task, struct inode *inode)
+{
+	int ret, bits;
+	unsigned long long idx = UUID_IDX(uuid);
+
+	bits = __update_tree(bt, idx, BMAP_READ);
+
+	if (!(bits & 0x4)) {
+		/* We have not seen this inode before */
+		if (inode) {
+			ret = do_find_path(task, inode, 0, NULL, 0);
+		} else if (task) {
+			ret = duet_find_path(task, uuid, 0, NULL, 0);
+		} else {
+			pr_err("duet: check failed, no task/inode\n");
+			return -1;
+		}
+
+		if (!ret) {
+			/* Mark as relevant and return not done */
+			ret = __update_tree(bt, idx,
+					    BMAP_SEEN_SET | BMAP_RELV_SET);
+			if (ret != -1)
+				ret = 0;
+
+		} else if (ret == -ENOENT) {
+			/* Mark as irrelevant and return done */
+			ret = __update_tree(bt, idx, BMAP_SEEN_SET);
+			if (ret != -1)
+				ret = 1;
+
+		} else {
+			pr_err("duet: inode relevance undetermined\n");
+			return -1;
+		}
+
+	} else {
+		/* We know this inode, return 1 if done, or irrelevant */
+		ret = ((bits & 0x1) || !(bits & 0x2)) ? 1 : 0;
+	}
+
+	return ret;
+}
+
+/* Checks if a given inode is done. Skips inode lookup. */
+int bittree_check_inode(struct duet_bittree *bt, struct duet_task *task,
+	struct inode *inode)
+{
+	struct duet_uuid uuid;
+
+	uuid.ino = inode->i_ino;
+	uuid.gen = inode->i_generation;
+
+	return do_bittree_check(bt, uuid, task, inode);
+}
+
+/* Checks if the given entries are done */
+int bittree_check(struct duet_bittree *bt, struct duet_uuid uuid,
+		  struct duet_task *task)
+{
+	return do_bittree_check(bt, uuid, task, NULL);
+}
+
+/* Mark done bit for given entries */
+int bittree_set(struct duet_bittree *bt, struct duet_uuid uuid)
+{
+	return __update_tree(bt, UUID_IDX(uuid), BMAP_DONE_SET);
+}
+
+/* Unmark done bit for given entries */
+int bittree_reset(struct duet_bittree *bt, struct duet_uuid uuid)
+{
+	return __update_tree(bt, UUID_IDX(uuid), BMAP_DONE_RST);
+}
+
+int bittree_print(struct duet_task *task)
+{
+	struct bmap_rbnode *bnode = NULL;
+	struct rb_node *node;
+	unsigned long iflags;
+
+	local_irq_save(iflags);
+	spin_lock(&task->bittree.lock);
+	pr_info("duet: Printing task bittree\n");
+	node = rb_first(&task->bittree.root);
+	while (node) {
+		bnode = rb_entry(node, struct bmap_rbnode, node);
+
+		/* Print node information */
+		pr_info("duet: Node key = %llu\n", bnode->idx);
+		pr_info("duet:   Done bits set: %d out of %d\n",
+			bitmap_weight(bnode->done, BITS_PER_NODE),
+			BITS_PER_NODE);
+		pr_info("duet:   Relv bits set: %d out of %d\n",
+			bitmap_weight(bnode->relv, BITS_PER_NODE),
+			BITS_PER_NODE);
+		pr_info("duet:   Seen bits set: %d out of %d\n",
+			bitmap_weight(bnode->seen, BITS_PER_NODE),
+			BITS_PER_NODE);
+
+		node = rb_next(node);
+	}
+	spin_unlock(&task->bittree.lock);
+	local_irq_restore(iflags);
+
+	pr_info("duet: Task #%d bitmap has %d out of %lu bits set\n",
+		task->id, bitmap_weight(task->bucket_bmap,
+		duet_env.itm_hash_size), duet_env.itm_hash_size);
+
+	return 0;
+}
+
+void bittree_init(struct duet_bittree *bittree)
+{
+	spin_lock_init(&bittree->lock);
+	bittree->root = RB_ROOT;
+#ifdef CONFIG_DUET_STATS
+	bittree->statcur = bittree->statmax = 0;
+#endif /* CONFIG_DUET_STATS */
+}
+
+void bittree_destroy(struct duet_bittree *bittree)
+{
+	struct rb_node *rbnode;
+	struct bmap_rbnode *bnode;
+
+	while (!RB_EMPTY_ROOT(&bittree->root)) {
+		rbnode = rb_first(&bittree->root);
+		bnode = rb_entry(rbnode, struct bmap_rbnode, node);
+		bnode_dispose(bnode, rbnode, bittree);
+	}
+}
diff --git a/mm/duet/common.h b/mm/duet/common.h
new file mode 100644
index 0000000..1dac66b
--- /dev/null
+++ b/mm/duet/common.h
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2016 George Amvrosiadis.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef _COMMON_H
+#define _COMMON_H
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/bitmap.h>
+#include <linux/rculist.h>
+#include <linux/syscalls.h>
+#include <linux/duet.h>
+
+#ifdef DUET_DEBUG
+#define duet_dbg(...)	pr_info(__VA_ARGS__)
+#else
+#define duet_dbg(...)
+#endif
+
+/*
+ * Duet can be state-based, and/or event-based.
+ *
+ * Event-based Duet monitors events that occurred on a page, during its
+ * time in the page cache: ADDED, REMOVED, DIRTY, and FLUSHED.
+ *
+ * State-based Duet monitors changes in the page cache since the last time
+ * a notification was sent to the interested application. Registering for
+ * EXIST informs the application of page additions or removals from the cache
+ * (i.e. ADDED and REMOVED events cancel each other out if the application
+ * hasn't been told in the meantime). Registering for MODIFIED events is a
+ * similar model, where unreported DIRTY and FLUSHED events cancel each other.
+ */
+#define DUET_PAGE_ADDED		0x0001
+#define DUET_PAGE_REMOVED	0x0002
+#define DUET_PAGE_DIRTY		0x0004
+#define DUET_PAGE_FLUSHED	0x0008
+#define DUET_PAGE_MODIFIED	0x0010
+#define DUET_PAGE_EXISTS	0x0020
+#define DUET_FD_NONBLOCK	0x0040
+
+/* Used only for page state */
+#define DUET_MASK_VALID		0x8000
+
+/* Some useful flags for clearing bitmaps */
+#define BMAP_SEEN	0x1
+#define BMAP_RELV	0x2
+#define BMAP_DONE	0x4
+
+#define DUET_DEF_NUMTASKS	8
+#define DUET_INODE_FREEING	(I_WILL_FREE | I_FREEING | I_CLEAR)
+
+enum {
+	DUET_STATUS_OFF = 0,
+	DUET_STATUS_ON,
+	DUET_STATUS_INIT,
+	DUET_STATUS_CLEAN,
+};
+
+/*
+ * Item struct returned for processing.
+ * The UUID currently consists of the inode number, the inode generation
+ * (to help us identify cases of inode reuse), and the task id.
+ * For state-based duet, we mark a page if it EXISTS or is MODIFIED.
+ * For event-based duet, we mark a page added, removed, dirtied, and/or flushed.
+ * Acceptable event combinations will differ based on the task's subscription.
+ */
+struct duet_uuid {
+	unsigned long	ino;
+	__u32		gen;
+	__u8		tid;
+};
+
+struct duet_item {
+	struct duet_uuid	uuid;
+	unsigned long		idx;
+	__u16			state;
+};
+
+/*
+ * Red-black bitmap tree node.
+ * Represents the range starting from idx. For block tasks, only the done
+ * bitmap is used. For file tasks, the seen and relv (relevant) bitmaps are
+ * also used. The semantics of different states are listed below, where an
+ * item can be in the unknown state due to a bitmap reset, or because it hasn't
+ * been encountered yet.
+ * - !SEEN && !RELV && !DONE: Item in unknown state
+ * - !SEEN && !RELV &&  DONE: Item processed, but in unknown state
+ * -  SEEN && !RELV && !DONE: Item not relevant to the task
+ * -  SEEN &&  RELV && !DONE: Item is relevant, but not processed
+ * -  SEEN &&  RELV &&  DONE: Item is relevant, and has already been processed
+ */
+struct bmap_rbnode {
+	__u64		idx;
+	struct rb_node	node;
+	unsigned long	*seen;
+	unsigned long	*relv;
+	unsigned long	*done;
+};
+
+struct item_hnode {
+	struct hlist_bl_node	node;
+	struct duet_item	item;
+	__u8			refcount;
+	__u16			*state;		/* One entry per task */
+};
+
+struct duet_bittree {
+	spinlock_t		lock;
+	struct rb_root		root;
+#ifdef CONFIG_DUET_STATS
+	__u64			statcur;	/* Cur # of BitTree nodes */
+	__u64			statmax;	/* Max # of BitTree nodes */
+#endif /* CONFIG_DUET_STATS */
+};
+
+struct duet_task {
+	__u8			id;		/* Internal task ID */
+
+	int			fd;
+	struct filename		*name;
+	__u32			evtmask;	/* Mask of subscribed events */
+	struct path		*regpath;	/* Registered path */
+	char			*regpathname;	/* Registered path name */
+	__u16			regpathlen;	/* Length of path name */
+
+	/* Data structures linking task to framework */
+	struct list_head	task_list;
+	wait_queue_head_t	cleaner_queue;
+	atomic_t		refcount;
+	char			*pathbuf;	/* Buffer for getpath */
+	struct duet_bittree	bittree;	/* Progress bitmap */
+	wait_queue_head_t	event_queue;	/* for read() and poll() */
+
+	/* Hash table bucket bitmap */
+	spinlock_t		bbmap_lock;
+	unsigned long		*bucket_bmap;
+	unsigned long		bmap_cursor;
+};
+
+struct duet_info {
+	atomic_t		status;
+	__u8			numtasks;	/* Number of concurrent tasks */
+
+	/*
+	 * Access to the task list is synchronized via a mutex. However, any
+	 * operations that are on-going for a task (e.g. fetch) will increase
+	 * its refcount. This refcount is consulted when disposing of the task.
+	 */
+	struct mutex		task_list_mutex;
+	struct list_head	tasks;
+
+	/* ItemTable -- Global page state hash table */
+	struct hlist_bl_head	*itm_hash_table;
+	unsigned long		itm_hash_size;
+	unsigned long		itm_hash_shift;
+	unsigned long		itm_hash_mask;
+};
+
+extern struct duet_info duet_env;
+
+/* hook.c */
+void duet_hook(__u16 evtcode, void *data);
+
+/* hash.c */
+int hash_init(void);
+int hash_add(struct duet_task *task, struct duet_uuid uuid,
+	     unsigned long idx, __u16 evtmask, short in_scan);
+int hash_fetch(struct duet_task *task, struct duet_item *itm);
+void hash_print(struct duet_task *task);
+
+/* task.c -- not in linux/duet.h */
+struct duet_task *duet_find_task(__u8 id);
+void duet_task_dispose(struct duet_task *task);
+
+/* path.c */
+int do_find_path(struct duet_task *task, struct inode *inode,
+		 int getpath, char *buf, int bufsize);
+int duet_find_path(struct duet_task *task, struct duet_uuid uuid,
+		   int getpath, char *buf, int bufsize);
+
+/* bittree.c */
+int bittree_check_inode(struct duet_bittree *bt, struct duet_task *task,
+			struct inode *inode);
+int bittree_check(struct duet_bittree *bt, struct duet_uuid uuid,
+		  struct duet_task *task);
+int bittree_set(struct duet_bittree *bt, struct duet_uuid uuid);
+int bittree_reset(struct duet_bittree *bt, struct duet_uuid uuid);
+int bittree_print(struct duet_task *task);
+void bittree_init(struct duet_bittree *bittree);
+void bittree_destroy(struct duet_bittree *bittree);
+
+/* init.c */
+int duet_online(void);
+
+/* debug.c */
+int duet_print_bmap(__u8 id);
+int duet_print_item(__u8 id);
+int duet_print_list(struct duet_status_args __user *arg);
+
+#endif /* _COMMON_H */
diff --git a/mm/duet/debug.c b/mm/duet/debug.c
new file mode 100644
index 0000000..77a47b6
--- /dev/null
+++ b/mm/duet/debug.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2016 George Amvrosiadis.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include "common.h"
+#include "syscall.h"
+
+/* Do a preorder print of the BitTree */
+int duet_print_bmap(__u8 id)
+{
+	struct duet_task *task;
+
+	task = duet_find_task(id);
+	if (!task)
+		return -ENOENT;
+
+	if (bittree_print(task)) {
+		pr_err("duet: failed to print BitTree for task #%d\n",
+			task->id);
+		return -1;
+	}
+
+	/* decref and wake up cleaner if needed */
+	if (atomic_dec_and_test(&task->refcount))
+		wake_up(&task->cleaner_queue);
+
+	return 0;
+}
+
+/* Do a preorder print of the global hash table */
+int duet_print_item(__u8 id)
+{
+	struct duet_task *task;
+
+	task = duet_find_task(id);
+	if (!task)
+		return -ENOENT;
+
+	hash_print(task);
+
+	/* decref and wake up cleaner if needed */
+	if (atomic_dec_and_test(&task->refcount))
+		wake_up(&task->cleaner_queue);
+
+	return 0;
+}
+
+int duet_print_list(struct duet_status_args __user *arg)
+{
+	int i = 0;
+	struct duet_task *cur;
+	struct duet_status_args argh, *argp;
+
+	/* Copy in task list header (again) */
+	if (copy_from_user(&argh, arg, sizeof(argh)))
+		return -EFAULT;
+
+	/* Copy in entire task list */
+	argp = memdup_user(arg, sizeof(*argp) + (argh.numtasks *
+				sizeof(struct duet_task_attrs)));
+	if (IS_ERR(argp))
+		return PTR_ERR(argp);
+
+	/* Copy the info for the first numtasks */
+	mutex_lock(&duet_env.task_list_mutex);
+	list_for_each_entry(cur, &duet_env.tasks, task_list) {
+		argp->tasks[i].id = cur->id;
+		argp->tasks[i].fd = cur->fd;
+		memcpy(argp->tasks[i].name, cur->name->name, NAME_MAX);
+		argp->tasks[i].regmask = cur->evtmask;
+		memcpy(argp->tasks[i].path, cur->regpathname, cur->regpathlen);
+		i++;
+		if (i == argp->numtasks)
+			break;
+	}
+	mutex_unlock(&duet_env.task_list_mutex);
+
+	/* Copy out entire task list */
+	if (copy_to_user(arg, argp, sizeof(*argp) + (argp->numtasks *
+			 sizeof(struct duet_task_attrs)))) {
+		pr_err("duet_print_list: failed to copy out list\n");
+		kfree(argp);
+		return -EINVAL;
+	}
+
+	duet_dbg("duet_print_list: success sending task list\n");
+	kfree(argp);
+	return 0;
+}
diff --git a/mm/duet/hash.c b/mm/duet/hash.c
new file mode 100644
index 0000000..c2644d6
--- /dev/null
+++ b/mm/duet/hash.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2016 George Amvrosiadis.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/hash.h>
+#include "common.h"
+
+#define DUET_NEGATE_EXISTS	(DUET_PAGE_ADDED | DUET_PAGE_REMOVED)
+#define DUET_NEGATE_MODIFIED	(DUET_PAGE_DIRTY | DUET_PAGE_FLUSHED)
+
+/*
+ * Page state is retained in a global hash table shared by all tasks.
+ * Indexing is based on the page's inode number and offset.
+ */
+
+static unsigned long hash(unsigned long ino, unsigned long idx)
+{
+	unsigned long h;
+
+	h = (idx * ino ^ (GOLDEN_RATIO_PRIME + idx)) / L1_CACHE_BYTES;
+	h = h ^ ((h ^ GOLDEN_RATIO_PRIME) >> duet_env.itm_hash_shift);
+	return h & duet_env.itm_hash_mask;
+}
+
+int hash_init(void)
+{
+	/* Allocate power-of-2 number of buckets */
+	duet_env.itm_hash_shift = ilog2(totalram_pages);
+	duet_env.itm_hash_size = 1 << duet_env.itm_hash_shift;
+	duet_env.itm_hash_mask = duet_env.itm_hash_size - 1;
+
+	pr_debug("duet: allocated global hash table (%lu buckets)\n",
+		duet_env.itm_hash_size);
+	duet_env.itm_hash_table = vmalloc(sizeof(struct hlist_bl_head) *
+						duet_env.itm_hash_size);
+	if (!duet_env.itm_hash_table)
+		return 1;
+
+	memset(duet_env.itm_hash_table, 0, sizeof(struct hlist_bl_head) *
+						duet_env.itm_hash_size);
+	return 0;
+}
+
+/* Deallocate a hash table node */
+static void hnode_destroy(struct item_hnode *itnode)
+{
+	kfree(itnode->state);
+	kfree(itnode);
+}
+
+/* Allocate and initialize a new hash table node */
+static struct item_hnode *hnode_init(struct duet_uuid uuid, unsigned long idx)
+{
+	struct item_hnode *itnode = NULL;
+
+	itnode = kzalloc(sizeof(struct item_hnode), GFP_NOWAIT);
+	if (!itnode)
+		return NULL;
+
+	itnode->state = kcalloc(duet_env.numtasks, sizeof(*(itnode->state)),
+				GFP_NOWAIT);
+	if (!(itnode->state)) {
+		pr_err("duet: failed to allocate hash node state\n");
+		kfree(itnode);
+		return NULL;
+	}
+
+	(itnode->item).uuid = uuid;
+	(itnode->item).idx = idx;
+	itnode->refcount++;
+
+	return itnode;
+}
+
+/* Add one event into the hash table */
+int hash_add(struct duet_task *task, struct duet_uuid uuid, unsigned long idx,
+	__u16 evtmask, short in_scan)
+{
+	__u16 curmask = 0;
+	short found = 0;
+	unsigned long bnum, flags;
+	struct hlist_bl_head *b;
+	struct hlist_bl_node *n;
+	struct item_hnode *itnode;
+
+	evtmask &= task->evtmask;
+
+	/* Get the bucket */
+	bnum = hash(uuid.ino, idx);
+	b = duet_env.itm_hash_table + bnum;
+	local_irq_save(flags);
+	hlist_bl_lock(b);
+
+	/* Lookup the item in the bucket */
+	hlist_bl_for_each_entry(itnode, n, b, node) {
+		if ((itnode->item).uuid.ino == uuid.ino &&
+		    (itnode->item).uuid.gen == uuid.gen &&
+		    (itnode->item).idx == idx) {
+			found = 1;
+			break;
+		}
+	}
+
+	duet_dbg("duet: %s hash node (tid %d, ino%lu, gen%lu, idx%lu)\n",
+		found ? (in_scan ? "replacing" : "updating") : "inserting",
+		uuid.tid, uuid.ino, uuid.gen, idx);
+
+	if (found) {
+		curmask = itnode->state[task->id];
+
+		/* Only up the refcount if we are adding a new mask */
+		if (!(curmask & DUET_MASK_VALID) || in_scan) {
+			if (!in_scan)
+				itnode->refcount++;
+			curmask = evtmask | DUET_MASK_VALID;
+			goto check_dispose;
+		}
+
+		curmask |= evtmask | DUET_MASK_VALID;
+
+		/* Negate previous events and remove if needed */
+		if ((task->evtmask & DUET_PAGE_EXISTS) &&
+		   ((curmask & DUET_NEGATE_EXISTS) == DUET_NEGATE_EXISTS))
+			curmask &= ~DUET_NEGATE_EXISTS;
+
+		if ((task->evtmask & DUET_PAGE_MODIFIED) &&
+		   ((curmask & DUET_NEGATE_MODIFIED) == DUET_NEGATE_MODIFIED))
+			curmask &= ~DUET_NEGATE_MODIFIED;
+
+check_dispose:
+		if ((curmask == DUET_MASK_VALID) && (itnode->refcount == 1)) {
+			if (itnode->refcount != 1) {
+				itnode->state[task->id] = 0;
+			} else {
+				hlist_bl_del(&itnode->node);
+				hnode_destroy(itnode);
+			}
+
+			/* Are we still interested in this bucket? */
+			hlist_bl_for_each_entry(itnode, n, b, node) {
+				if (itnode->state[task->id] & DUET_MASK_VALID) {
+					found = 1;
+					break;
+				}
+			}
+
+			if (!found)
+				clear_bit(bnum, task->bucket_bmap);
+		} else {
+			itnode->state[task->id] = curmask;
+
+			/* Update bitmap */
+			set_bit(bnum, task->bucket_bmap);
+		}
+	} else if (!found) {
+		if (!evtmask)
+			goto done;
+
+		itnode = hnode_init(uuid, idx);
+		if (!itnode)
+			return 1;
+
+		itnode->state[task->id] = evtmask | DUET_MASK_VALID;
+		hlist_bl_add_head(&itnode->node, b);
+
+		/* Update bitmap */
+		set_bit(bnum, task->bucket_bmap);
+	}
+
+done:
+	hlist_bl_unlock(b);
+	local_irq_restore(flags);
+	return 0;
+}
+
+/* Fetch one item for a given task. Return found (1), empty (0), error (-1) */
+int hash_fetch(struct duet_task *task, struct duet_item *itm)
+{
+	int found;
+	unsigned long bnum, flags;
+	struct hlist_bl_head *b;
+	struct hlist_bl_node *n;
+	struct item_hnode *itnode;
+
+	local_irq_save(flags);
+again:
+	spin_lock(&task->bbmap_lock);
+	bnum = find_next_bit(task->bucket_bmap, duet_env.itm_hash_size,
+			     task->bmap_cursor);
+
+	if (bnum == duet_env.itm_hash_size) {
+		/* Reached end of bitmap */
+		found = 0;
+
+		if (task->bmap_cursor != 0) {
+			/* Started part way, try again */
+			bnum = find_next_bit(task->bucket_bmap,
+					     task->bmap_cursor, 0);
+
+			if (bnum != task->bmap_cursor)
+				found = 1;
+		}
+
+		if (!found) {
+			spin_unlock(&task->bbmap_lock);
+			local_irq_restore(flags);
+			return 1;
+		}
+	}
+
+	task->bmap_cursor = bnum;
+	clear_bit(bnum, task->bucket_bmap);
+	spin_unlock(&task->bbmap_lock);
+	b = duet_env.itm_hash_table + bnum;
+
+	/* Grab first item from bucket */
+	hlist_bl_lock(b);
+	if (!b->first) {
+		pr_err("duet: empty hash bucket marked in bitmap\n");
+		hlist_bl_unlock(b);
+		goto again;
+	}
+
+	found = 0;
+	hlist_bl_for_each_entry(itnode, n, b, node) {
+		if (itnode->state[task->id] & DUET_MASK_VALID) {
+			*itm = itnode->item;
+			itm->uuid.tid = task->id;
+			itm->state = itnode->state[task->id] &
+				     (~DUET_MASK_VALID);
+
+			itnode->refcount--;
+			/* Free or update node */
+			if (!itnode->refcount) {
+				hlist_bl_del(n);
+				hnode_destroy(itnode);
+			} else {
+				itnode->state[task->id] = 0;
+			}
+
+			found = 1;
+			break;
+		}
+	}
+
+	if (!found) {
+		hlist_bl_unlock(b);
+		goto again;
+	}
+
+	/* Are we still interested in this bucket? */
+	found = 0;
+	hlist_bl_for_each_entry(itnode, n, b, node) {
+		if (itnode->state[task->id] & DUET_MASK_VALID) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (found)
+		set_bit(bnum, task->bucket_bmap);
+
+	hlist_bl_unlock(b);
+	local_irq_restore(flags);
+	return 0;
+}
+
+/* Warning: expensive printing function. Use with care. */
+void hash_print(struct duet_task *task)
+{
+	unsigned long loop, count, start, end, buckets, flags;
+	unsigned long long nodes, tnodes;
+	struct hlist_bl_head *b;
+	struct hlist_bl_node *n;
+	struct item_hnode *itnode;
+
+	count = duet_env.itm_hash_size / 100;
+	tnodes = nodes = buckets = start = end = 0;
+	pr_info("duet: Printing hash table\n");
+	for (loop = 0; loop < duet_env.itm_hash_size; loop++) {
+		if (loop - start >= count) {
+			pr_info("duet:   Buckets %lu - %lu: %llu nodes (task: %llu)\n",
+				start, end, nodes, tnodes);
+			start = end = loop;
+			nodes = tnodes = 0;
+		}
+
+		/* Count bucket nodes */
+		b = duet_env.itm_hash_table + loop;
+		local_irq_save(flags);
+		hlist_bl_lock(b);
+		hlist_bl_for_each_entry(itnode, n, b, node) {
+			nodes++;
+			if (itnode->state[task->id] & DUET_MASK_VALID)
+				tnodes++;
+		}
+		hlist_bl_unlock(b);
+		local_irq_restore(flags);
+
+		end = loop;
+	}
+
+	if (start != loop - 1)
+		pr_info("duet:   Buckets %lu - %lu: %llu nodes (task: %llu)\n",
+			start, end, nodes, tnodes);
+}
diff --git a/mm/duet/hook.c b/mm/duet/hook.c
new file mode 100644
index 0000000..3ac89f5
--- /dev/null
+++ b/mm/duet/hook.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2016 George Amvrosiadis.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include "common.h"
+
+/* Handle an event. We're in RCU context so whatever happens, stay awake! */
+void duet_hook(__u16 evtcode, void *data)
+{
+	struct page *page = NULL;
+	struct inode *inode = NULL;
+	struct duet_task *cur;
+	unsigned long page_idx = 0;
+	struct duet_uuid uuid;
+
+	/* Duet must be online */
+	if (!duet_online())
+		return;
+
+	/* Handle page event */
+	page = (struct page *)data;
+
+	/* Duet must be online, and the page must belong to a valid mapping */
+	if (!page || !page_mapping(page)) {
+		duet_dbg("duet: dropped event %x due to NULL mapping\n",
+			evtcode);
+		return;
+	}
+
+	inode = page_mapping(page)->host;
+	page_idx = page->index;
+
+	/* Check that we're referring to an actual inode and get its UUID */
+	if (!inode)
+		return;
+
+	uuid.ino = inode->i_ino;
+	uuid.gen = inode->i_generation;
+
+	/* Verify that the inode does not belong to a special file */
+	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+		return;
+
+	if (!inode->i_ino) {
+		pr_err("duet: inode not initialized\n");
+		return;
+	}
+
+	/* Look for tasks interested in this event type and invoke callbacks */
+	rcu_read_lock();
+	list_for_each_entry_rcu(cur, &duet_env.tasks, task_list) {
+		struct super_block *sb = cur->regpath->mnt->mnt_sb;
+
+		/* Verify that the event refers to the fs we're interested in */
+		if (sb && sb != inode->i_sb)
+			continue;
+
+		duet_dbg("duet: rcvd event %x on (ino %lu, gen %lu, idx %lu)\n",
+			evtcode, uuid.ino, uuid.gen, page_idx);
+
+		/* Use the inode bitmap to filter out event if applicable */
+		if (bittree_check_inode(&cur->bittree, cur, inode) == 1)
+			continue;
+
+		/* Update the hash table */
+		if (hash_add(cur, uuid, page_idx, evtcode, 0))
+			pr_err("duet: hash table add failed\n");
+
+		wake_up(&cur->event_queue);
+	}
+	rcu_read_unlock();
+}
diff --git a/mm/duet/init.c b/mm/duet/init.c
new file mode 100644
index 0000000..a9e5ea1
--- /dev/null
+++ b/mm/duet/init.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2016 George Amvrosiadis.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include "common.h"
+#include "syscall.h"
+
+struct duet_info duet_env;
+duet_hook_t *duet_hook_fp;
+EXPORT_SYMBOL(duet_hook_fp);
+
+int duet_online(void)
+{
+	return (atomic_read(&duet_env.status) == DUET_STATUS_ON);
+}
+
+int duet_bootstrap(__u16 numtasks)
+{
+	if (atomic_cmpxchg(&duet_env.status, DUET_STATUS_OFF, DUET_STATUS_INIT)
+	    != DUET_STATUS_OFF) {
+		pr_err("duet: framework on, bootstrap aborted\n");
+		return 1;
+	}
+
+	duet_env.numtasks = (numtasks ? numtasks : DUET_DEF_NUMTASKS);
+
+	/* Initialize global hash table */
+	if (hash_init()) {
+		pr_err("duet: failed to initialize hash table\n");
+		return 1;
+	}
+
+	/* Initialize task list */
+	INIT_LIST_HEAD(&duet_env.tasks);
+	mutex_init(&duet_env.task_list_mutex);
+	atomic_set(&duet_env.status, DUET_STATUS_ON);
+
+	rcu_assign_pointer(duet_hook_fp, duet_hook);
+	synchronize_rcu();
+	return 0;
+}
+
+int duet_shutdown(void)
+{
+	struct duet_task *task;
+
+	if (atomic_cmpxchg(&duet_env.status, DUET_STATUS_ON, DUET_STATUS_CLEAN)
+	    != DUET_STATUS_ON) {
+		pr_err("duet: framework off, shutdown aborted\n");
+		return 1;
+	}
+
+	rcu_assign_pointer(duet_hook_fp, NULL);
+	synchronize_rcu();
+
+	/* Remove all tasks */
+	mutex_lock(&duet_env.task_list_mutex);
+	while (!list_empty(&duet_env.tasks)) {
+		task = list_entry_rcu(duet_env.tasks.next, struct duet_task,
+				task_list);
+		list_del_rcu(&task->task_list);
+		mutex_unlock(&duet_env.task_list_mutex);
+
+		/* Make sure everyone's let go before we free it */
+		synchronize_rcu();
+		wait_event(task->cleaner_queue,
+			atomic_read(&task->refcount) == 0);
+		duet_task_dispose(task);
+
+		mutex_lock(&duet_env.task_list_mutex);
+	}
+	mutex_unlock(&duet_env.task_list_mutex);
+
+	/* Destroy global hash table */
+	vfree((void *)duet_env.itm_hash_table);
+
+	INIT_LIST_HEAD(&duet_env.tasks);
+	mutex_destroy(&duet_env.task_list_mutex);
+	atomic_set(&duet_env.status, DUET_STATUS_OFF);
+	return 0;
+}
+
+SYSCALL_DEFINE2(duet_status, u16, flags, struct duet_status_args __user *, arg)
+{
+	int ret = 0;
+	struct duet_status_args *sa;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	sa = memdup_user(arg, sizeof(*sa));
+	if (IS_ERR(sa))
+		return PTR_ERR(sa);
+
+	/* For now, we only support one struct size */
+	if (sa->size != sizeof(*sa)) {
+		pr_err("duet_status: invalid args struct size (%u)\n",
+			sa->size);
+		ret = -EINVAL;
+		goto done;
+	}
+
+	/* If we're cleaning up, only allow ops that affect Duet status */
+	if (atomic_read(&duet_env.status) != DUET_STATUS_ON && !(flags &
+	    (DUET_STATUS_START | DUET_STATUS_STOP | DUET_STATUS_REPORT))) {
+		pr_err("duet_status: ops rejected during shutdown\n");
+		ret = -EINVAL;
+		goto done;
+	}
+
+	switch (flags) {
+	case DUET_STATUS_START:
+		ret = duet_bootstrap(sa->maxtasks);
+
+		if (ret)
+			pr_err("duet: failed to enable framework\n");
+		else
+			pr_info("duet: framework enabled\n");
+
+		break;
+
+	case DUET_STATUS_STOP:
+		ret = duet_shutdown();
+
+		if (ret)
+			pr_err("duet: failed to disable framework\n");
+		else
+			pr_info("duet: framework disabled\n");
+
+		break;
+
+	case DUET_STATUS_REPORT:
+		ret = duet_online();
+		break;
+
+	case DUET_STATUS_PRINT_BMAP:
+		ret = duet_print_bmap(sa->id);
+		break;
+
+	case DUET_STATUS_PRINT_ITEM:
+		ret = duet_print_item(sa->id);
+		break;
+
+	case DUET_STATUS_PRINT_LIST:
+		ret = duet_print_list(arg);
+		goto done;
+
+	default:
+		pr_info("duet_status: invalid flags\n");
+		ret = -EINVAL;
+		goto done;
+	}
+
+	if (copy_to_user(arg, sa, sizeof(*sa))) {
+		pr_err("duet_status: copy_to_user failed\n");
+		ret = -EINVAL;
+		goto done;
+	}
+
+done:
+	kfree(sa);
+	return ret;
+}
diff --git a/mm/duet/path.c b/mm/duet/path.c
new file mode 100644
index 0000000..103ab42
--- /dev/null
+++ b/mm/duet/path.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2016 George Amvrosiadis.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include "common.h"
+#include "syscall.h"
+
+/* Scan through the page cache for a given inode */
+static int find_get_inode(struct super_block *sb, struct duet_uuid c_uuid,
+	struct inode **c_inode)
+{
+	struct inode *inode = NULL;
+
+	*c_inode = NULL;
+	spin_lock(&sb->s_inode_list_lock);
+	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+		spin_lock(&inode->i_lock);
+		if (!*c_inode && inode->i_ino == c_uuid.ino &&
+		    inode->i_generation == c_uuid.gen &&
+		    !(inode->i_state & DUET_INODE_FREEING)) {
+			atomic_inc(&inode->i_count);
+			*c_inode = inode;
+			spin_unlock(&inode->i_lock);
+			spin_unlock(&sb->s_inode_list_lock);
+			return 0;
+		}
+		spin_unlock(&inode->i_lock);
+	}
+	spin_unlock(&sb->s_inode_list_lock);
+
+	/* We shouldn't get here unless we failed */
+	return 1;
+}
+
+int do_find_path(struct duet_task *task, struct inode *inode, int getpath,
+	char *buf, int bufsize)
+{
+	int len;
+	char *p;
+	struct path path;
+	struct dentry *alias, *i_dentry = NULL;
+
+	if (!task) {
+		pr_err("do_find_path: invalid task\n");
+		return -EINVAL;
+	}
+
+	if (getpath)
+		buf[0] = '\0';
+
+	/* Get the path for at least one alias of the inode */
+	if (hlist_empty(&inode->i_dentry))
+		return -ENOENT;
+
+	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
+		if (IS_ROOT(alias) && (alias->d_flags & DCACHE_DISCONNECTED))
+			continue;
+
+		i_dentry = alias;
+
+		/* Now get the path */
+		len = PATH_MAX;
+		memset(task->pathbuf, 0, len);
+		path.mnt = task->regpath->mnt;
+		path.dentry = i_dentry;
+
+		p = d_path(&path, task->pathbuf, len);
+		if (IS_ERR(p)) {
+			pr_err("do_find_path: d_path failed\n");
+			continue;
+		} else if (!p) {
+			duet_dbg("do_find_path: dentry not found\n");
+			continue;
+		}
+
+		/* Is this path of interest? */
+		if (memcmp(task->regpathname, p, task->regpathlen - 1)) {
+			duet_dbg("do_find_path: no common ancestor\n");
+			continue;
+		}
+
+		/* Got one. If it fits, return it */
+		if (getpath && (bufsize < len - (p - task->pathbuf)))
+			return -ENOMEM;
+
+		duet_dbg("do_find_path: got %s\n", p);
+		if (getpath)
+			memcpy(buf, p, len - (p - task->pathbuf));
+
+		return 0;
+	}
+
+	/* We only get here if we got nothing */
+	return -ENOENT;
+}
+
+int duet_find_path(struct duet_task *task, struct duet_uuid uuid, int getpath,
+	char *buf, int bufsize)
+{
+	int ret = 0;
+	struct inode *ino;
+
+	if (!task) {
+		pr_err("duet_find_path: invalid task\n");
+		return -EINVAL;
+	}
+
+	/* First, we need to find struct inode for child and parent */
+	if (find_get_inode(task->regpath->mnt->mnt_sb, uuid, &ino)) {
+		duet_dbg("duet_find_path: child inode not found\n");
+		return -ENOENT;
+	}
+
+	ret = do_find_path(task, ino, getpath, buf, bufsize);
+
+	iput(ino);
+	return ret;
+}
+
+SYSCALL_DEFINE3(duet_get_path, struct duet_uuid_arg __user *, uuid,
+		char __user *, pathbuf, int, pathbufsize)
+{
+	int ret = 0;
+	struct duet_uuid_arg *ua;
+	struct duet_task *task;
+	char *buf;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!duet_online())
+		return -ESRCH;
+
+	/* Do some basic sanity checking */
+	if (!uuid || pathbufsize <= 0)
+		return -EINVAL;
+
+	buf = kcalloc(pathbufsize, sizeof(char), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ua = memdup_user(uuid, sizeof(*uuid));
+	if (IS_ERR(ua)) {
+		kfree(buf);
+		return PTR_ERR(ua);
+	}
+
+	/* For now, we only support one struct size */
+	if (ua->size != sizeof(*ua)) {
+		pr_err("duet_get_path: invalid args struct size (%u)\n",
+			ua->size);
+		ret = -EINVAL;
+		goto done;
+	}
+
+	task = duet_find_task(ua->uuid.tid);
+	if (!task) {
+		ret = -ENOENT;
+		goto done;
+	}
+
+	ret = duet_find_path(task, ua->uuid, 1, buf, pathbufsize);
+
+	if (!ret && copy_to_user(pathbuf, buf, pathbufsize))
+		ret = -EFAULT;
+
+	/* decref and wake up cleaner if needed */
+	if (atomic_dec_and_test(&task->refcount))
+		wake_up(&task->cleaner_queue);
+
+done:
+	kfree(ua);
+	kfree(buf);
+	return ret;
+}
diff --git a/mm/duet/syscall.h b/mm/duet/syscall.h
new file mode 100644
index 0000000..1bc6830
--- /dev/null
+++ b/mm/duet/syscall.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2016 George Amvrosiadis.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include "common.h"
+
+/* Status syscall flags */
+#define DUET_STATUS_START	0x0001
+#define DUET_STATUS_STOP	0x0002
+#define DUET_STATUS_REPORT	0x0004
+#define DUET_STATUS_PRINT_BMAP	0x0008
+#define DUET_STATUS_PRINT_ITEM	0x0010
+#define DUET_STATUS_PRINT_LIST	0x0020
+
+struct duet_task_attrs {
+	__u8	id;
+	int	fd;
+	char	name[NAME_MAX];
+	__u32	regmask;
+	char	path[PATH_MAX];
+};
+
+struct duet_status_args {
+	__u32 size;						/* in */
+	union {
+		/* DUET_START args */
+		struct {
+			__u16 maxtasks;				/* in */
+		};
+
+		/* DUET_PRINT_BIT, DUET_PRINT_ITEM args */
+		struct {
+			__u8 id;				/* in */
+		};
+
+		/* DUET_PRINT_LIST args */
+		struct {
+			__u16 numtasks;				/* out */
+			struct duet_task_attrs tasks[0];	/* out */
+		};
+	};
+};
+
+/* Bmap syscall flags */
+#define DUET_BMAP_SET		0x0001
+#define DUET_BMAP_RESET		0x0002
+#define DUET_BMAP_CHECK		0x0004
+
+struct duet_uuid_arg {
+	__u32			size;
+	struct duet_uuid	uuid;
+};
diff --git a/mm/duet/task.c b/mm/duet/task.c
new file mode 100644
index 0000000..c91fad3
--- /dev/null
+++ b/mm/duet/task.c
@@ -0,0 +1,584 @@
+/*
+ * Copyright (C) 2016 George Amvrosiadis.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/namei.h>
+#include <linux/anon_inodes.h>
+#include "common.h"
+#include "syscall.h"
+
+/*
+ * To synchronize access to the task list and structures without compromising
+ * scalability, a two-level approach is used. At the task list level, which is
+ * rarely updated, RCU is used. For the task structures, we use traditional
+ * reference counting. The two techniques are interweaved to achieve overall
+ * consistency.
+ */
+
+static unsigned int duet_poll(struct file *file, poll_table *wait)
+{
+	__u8 *tid = file->private_data;
+	struct duet_task *task;
+	int ret = 0;
+
+	task = duet_find_task(*tid);
+	if (!task) {
+		pr_err("duet_poll: task not found\n");
+		return ret;
+	}
+
+	poll_wait(file, &task->event_queue, wait);
+	if (bitmap_weight(task->bucket_bmap, duet_env.itm_hash_size))
+		ret = POLLIN | POLLRDNORM;
+
+	return ret;
+}
+
+/*
+ * Copy an item to user space, returning how much we copied.
+ *
+ * We already checked that the event size is smaller than the
+ * buffer we had in duet_read() below.
+ */
+static ssize_t copy_item_to_user(struct duet_task *task,
+				 struct duet_item *item,
+				 char __user *buf)
+{
+	size_t item_size = sizeof(struct duet_item);
+
+	/* Send the item */
+	if (copy_to_user(buf, item, item_size))
+		return -EFAULT;
+
+	buf += item_size;
+
+	duet_dbg("duet_read: sending (ino%lu, gen%u, idx%lu, %x)\n",
+		 item->uuid.ino, item->uuid.gen, item->idx, item->state);
+
+	return item_size;
+}
+
+/*
+ * Sends out duet items. The number of bytes returned corresponds to the number
+ * of sizeof(struct duet_item) items fetched. Items are checked against the
+ * bitmap, and discarded if they have been marked; this can happen because an
+ * insertion can occur between the last read and the last bitmap set operation.
+ */
+static ssize_t duet_read(struct file *file, char __user *buf,
+			 size_t count, loff_t *pos)
+{
+	struct duet_task *task;
+	struct duet_item item;
+	char __user *start;
+	int ret, err;
+	__u8 *tid;
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+	start = buf;
+	tid = file->private_data;
+
+	task = duet_find_task(*tid);
+	if (!task)
+		return -ENOENT;
+
+	add_wait_queue(&task->event_queue, &wait);
+	while (1) {
+		/* Fetch an item only if there's space to store it */
+		if (sizeof(struct duet_item) > count)
+			err = -EINVAL;
+		else
+			err = hash_fetch(task, &item);
+
+		if (!err) {
+			ret = copy_item_to_user(task, &item, buf);
+			if (ret < 0)
+				break;
+			buf += ret;
+			count -= ret;
+			continue;
+		}
+
+		ret = -EAGAIN;
+		if (file->f_flags & O_NONBLOCK)
+			break;
+		ret = -ERESTARTSYS;
+		if (signal_pending(current))
+			break;
+
+		if (start != buf)
+			break;
+
+		wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+	}
+	remove_wait_queue(&task->event_queue, &wait);
+
+	if (start != buf && ret != -EFAULT)
+		ret = buf - start;
+
+	/* Decref and wake up cleaner if needed */
+	if (atomic_dec_and_test(&task->refcount))
+		wake_up(&task->cleaner_queue);
+
+	return ret;
+}
+
+/*
+ * Properly dismantle and dispose of a task struct.
+ * At this point we've guaranteed that noone else is accessing the
+ * task struct, so we don't need any locks
+ */
+void duet_task_dispose(struct duet_task *task)
+{
+	int ret = 0;
+	struct duet_item itm;
+
+	/* Dispose of the bitmap tree */
+	bittree_destroy(&task->bittree);
+
+	/* Dispose of hash table entries, bucket bitmap */
+	while (!ret)
+		ret = hash_fetch(task, &itm);
+	kfree(task->bucket_bmap);
+
+	putname(task->name);
+	path_put(task->regpath);
+	kfree(task->regpath);
+	kfree(task->regpathname);
+	kfree(task->pathbuf);
+	kfree(task);
+}
+
+static int duet_release(struct inode *ignored, struct file *file)
+{
+	__u8 *tid = file->private_data;
+	struct duet_task *cur;
+
+	/* Find the task in the list, then dispose of it */
+	mutex_lock(&duet_env.task_list_mutex);
+	list_for_each_entry_rcu(cur, &duet_env.tasks, task_list) {
+		if (cur->id == *tid) {
+#ifdef CONFIG_DUET_STATS
+			hash_print(cur);
+			bittree_print(cur);
+#endif /* CONFIG_DUET_STATS */
+			list_del_rcu(&cur->task_list);
+			mutex_unlock(&duet_env.task_list_mutex);
+
+			/* Wait until everyone's done with it */
+			synchronize_rcu();
+			wait_event(cur->cleaner_queue,
+				atomic_read(&cur->refcount) == 0);
+
+			pr_info("duet: deregistered task %d\n",	cur->id);
+
+			duet_task_dispose(cur);
+			kfree(tid);
+			return 0;
+		}
+	}
+	mutex_unlock(&duet_env.task_list_mutex);
+
+	return -ENOENT;
+}
+
+static const struct file_operations duet_fops = {
+	.show_fdinfo	= NULL,
+	.poll		= duet_poll,
+	.read		= duet_read,
+	.fasync		= NULL,
+	.release	= duet_release,
+	.unlocked_ioctl	= NULL,
+	.compat_ioctl	= NULL,
+	.llseek		= noop_llseek,
+};
+
+static int process_inode(struct duet_task *task, struct inode *inode)
+{
+	struct radix_tree_iter iter;
+	struct duet_uuid uuid;
+	void **slot;
+	__u16 state;
+
+	/* Use the inode bitmap to decide whether to skip inode */
+	if (bittree_check_inode(&task->bittree, task, inode) == 1)
+		return 0;
+
+	/* Go through all pages of this inode */
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, &inode->i_mapping->page_tree, &iter, 0) {
+		struct page *page;
+
+		page = radix_tree_deref_slot(slot);
+		if (unlikely(!page))
+			continue;
+
+		if (radix_tree_exception(page)) {
+			if (radix_tree_deref_retry(page)) {
+				slot = radix_tree_iter_retry(&iter);
+				continue;
+			}
+			/*
+			 * Shadow entry of recently evicted page, or swap entry
+			 * from shmem/tmpfs. Skip over it.
+			 */
+			continue;
+		}
+
+		state = DUET_PAGE_ADDED;
+		if (PageDirty(page))
+			state |= DUET_PAGE_DIRTY;
+		uuid.ino = inode->i_ino;
+		uuid.gen = inode->i_generation;
+		hash_add(task, uuid, page->index, state, 1);
+	}
+	rcu_read_unlock();
+
+	return 0;
+}
+
+/* Scan through the page cache for events of interest to the task */
+static int scan_page_cache(struct duet_task *task)
+{
+	struct inode *inode, *prev = NULL;
+	struct super_block *sb = task->regpath->mnt->mnt_sb;
+
+	pr_info("duet: page cache scan started\n");
+
+	spin_lock(&sb->s_inode_list_lock);
+	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+		struct address_space *mapping = inode->i_mapping;
+
+		spin_lock(&inode->i_lock);
+		if (inode->i_state & DUET_INODE_FREEING ||
+		    mapping->nrpages == 0) {
+			spin_unlock(&inode->i_lock);
+			continue;
+		}
+		atomic_inc(&inode->i_count);
+		spin_unlock(&inode->i_lock);
+		spin_unlock(&sb->s_inode_list_lock);
+
+		/*
+		 * We are holding a reference to inode so it won't be removed
+		 * from s_inodes list while we don't hold the s_inode_list_lock.
+		 * We cannot iput the inode now, though, as we may be holding
+		 * the last reference. We will iput it after the iteration is
+		 * done.
+		 */
+
+		iput(prev);
+		prev = inode;
+
+		process_inode(task, inode);
+
+		spin_lock(&sb->s_inode_list_lock);
+	}
+	spin_unlock(&sb->s_inode_list_lock);
+	iput(prev);
+
+	pr_info("duet: page cache scan finished\n");
+
+	return 0;
+}
+
+/* Find task and increment its refcount */
+struct duet_task *duet_find_task(__u8 id)
+{
+	struct duet_task *cur, *task = NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(cur, &duet_env.tasks, task_list) {
+		if (cur->id == id) {
+			task = cur;
+			atomic_inc(&task->refcount);
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return task;
+}
+
+/* Allocate and initialize a task struct */
+static int duet_task_init(struct duet_task **task, struct filename *name,
+	__u32 regmask, struct path *path)
+{
+	int len;
+	char *p;
+	u32 evtmask = regmask;
+
+	/* Do some sanity checking on event mask. */
+	if (evtmask & DUET_PAGE_EXISTS) {
+		if (evtmask & (DUET_PAGE_ADDED | DUET_PAGE_REMOVED)) {
+			pr_debug("duet_task_init: invalid regmask\n");
+			return -EINVAL;
+		}
+		evtmask |= (DUET_PAGE_ADDED | DUET_PAGE_REMOVED);
+	}
+
+	if (evtmask & DUET_PAGE_MODIFIED) {
+		if (evtmask & (DUET_PAGE_DIRTY | DUET_PAGE_FLUSHED)) {
+			pr_debug("duet_task_init: invalid regmask\n");
+			goto err;
+		}
+		evtmask |= (DUET_PAGE_DIRTY | DUET_PAGE_FLUSHED);
+	}
+
+	/* Allocate task info struct */
+	*task = kzalloc(sizeof(**task), GFP_KERNEL);
+	if (!(*task))
+		return -ENOMEM;
+
+	/* Allocate temporary space for getpath file paths */
+	(*task)->pathbuf = kzalloc(PATH_MAX, GFP_KERNEL);
+	if (!(*task)->pathbuf) {
+		pr_err("duet_task_init: buffer allocation failed\n");
+		kfree(*task);
+		return -ENOMEM;
+	}
+
+	/* Find and store registered dir path */
+	(*task)->regpathname = kzalloc(PATH_MAX, GFP_KERNEL);
+	if (!(*task)->regpathname) {
+		pr_err("duet_task_init: path allocation failed\n");
+		kfree((*task)->pathbuf);
+		kfree(*task);
+		return -ENOMEM;
+	}
+
+	/* Populate registered dir path buffer */
+	len = PATH_MAX;
+	p = d_path(path, (*task)->pathbuf, len);
+	if (IS_ERR(p)) {
+		pr_err("duet_task_init: path registration failed\n");
+		goto err;
+	} else if (!p) {
+		pr_err("duet_task_init: (null) registered path\n");
+		goto err;
+	}
+
+	(*task)->regpathlen = len - (p - (*task)->pathbuf);
+	memcpy((*task)->regpathname, p, (*task)->regpathlen);
+
+	(*task)->id = 1;
+	(*task)->name = name;
+	(*task)->regpath = path;
+	(*task)->evtmask = (regmask & 0xffff);
+	atomic_set(&(*task)->refcount, 0);
+	INIT_LIST_HEAD(&(*task)->task_list);
+	init_waitqueue_head(&(*task)->cleaner_queue);
+	init_waitqueue_head(&(*task)->event_queue);
+	bittree_init(&(*task)->bittree);
+
+	/* Initialize hash table bitmap */
+	(*task)->bmap_cursor = 0;
+	spin_lock_init(&(*task)->bbmap_lock);
+	(*task)->bucket_bmap = kzalloc(sizeof(unsigned long) *
+		BITS_TO_LONGS(duet_env.itm_hash_size), GFP_KERNEL);
+	if (!(*task)->bucket_bmap) {
+		pr_err("duet_task_init: hash bitmap alloc failed\n");
+		kfree((*task)->regpathname);
+		kfree((*task)->pathbuf);
+		kfree(*task);
+		return -ENOMEM;
+	}
+
+	return 0;
+err:
+	pr_err("duet_task_init: error registering task\n");
+	kfree((*task)->regpathname);
+	kfree((*task)->pathbuf);
+	kfree(*task);
+	return -EINVAL;
+}
+
+/* Register the task with Duet */
+int duet_register_task(struct filename *name, __u32 regmask, struct path *path)
+{
+	int ret;
+	__u8 *tid;
+	struct duet_task *cur, *task = NULL;
+	struct list_head *last;
+
+	/* Do some sanity checking on the parameters passed */
+	if (!path || !regmask)
+		return -EINVAL;
+
+	if (!path->dentry || !path->dentry->d_inode) {
+		pr_err("duet_register_task: invalid path\n");
+		return -EINVAL;
+	}
+
+	if (!S_ISDIR(path->dentry->d_inode->i_mode)) {
+		pr_err("duet_register_task: path is not a dir\n");
+		return -EINVAL;
+	}
+
+	ret = duet_task_init(&task, name, regmask, path);
+	if (ret) {
+		pr_err("duet_register_task: initialization failed\n");
+		return ret;
+	}
+
+	/* Now get an anonymous inode to use for communication with Duet */
+	tid = kzalloc(sizeof(__u8), GFP_KERNEL);
+	if (!tid) {
+		duet_task_dispose(task);
+		return -ENOMEM;
+	}
+
+	ret = anon_inode_getfd("duet", &duet_fops, tid,
+		O_RDONLY | ((regmask & DUET_FD_NONBLOCK) ? O_NONBLOCK : 0));
+	if (ret < 0) {
+		duet_task_dispose(task);
+		kfree(tid);
+		return ret;
+	}
+
+	task->fd = ret;
+
+	/* Find a free task id for the new task. Tasks are sorted by id. */
+	mutex_lock(&duet_env.task_list_mutex);
+	last = &duet_env.tasks;
+	list_for_each_entry_rcu(cur, &duet_env.tasks, task_list) {
+		if (cur->id == task->id)
+			(task->id)++;
+		else if (cur->id > task->id)
+			break;
+
+		last = &cur->task_list;
+	}
+	list_add_rcu(&task->task_list, last);
+	mutex_unlock(&duet_env.task_list_mutex);
+
+	*tid = task->id;
+
+	/* Before we return, scan the page cache for pages of interest */
+	scan_page_cache(task);
+
+	pr_info("duet: task %d (fd %d) registered %s(%d) with mask %x\n",
+		task->id, task->fd, task->regpathname, task->regpathlen,
+		task->evtmask);
+
+	return ret;
+}
+
+SYSCALL_DEFINE3(duet_init, const char __user *, taskname, u32, regmask,
+		const char __user *, pathname)
+{
+	int ret;
+	unsigned int lookup_flags = LOOKUP_DIRECTORY;
+	struct filename *name = NULL;
+	struct path *path = NULL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!duet_online())
+		return -ESRCH;
+
+	/* Do some basic sanity checking */
+	if (!pathname || !regmask)
+		return -EINVAL;
+
+	if (taskname) {
+		name = getname(taskname);
+		if (IS_ERR(name))
+			return PTR_ERR(name);
+	}
+
+	path = kzalloc(sizeof(struct path), GFP_KERNEL);
+	if (!path) {
+		putname(name);
+		return -ENOMEM;
+	}
+
+	ret = user_path_at(AT_FDCWD, pathname, lookup_flags, path);
+	if (ret) {
+		pr_err("duet_init: user_path_at failed\n");
+		goto err;
+	}
+
+	/* Register the task with the framework */
+	ret = duet_register_task(name, regmask, path);
+	if (ret < 0) {
+		pr_err("duet_init: task registration failed\n");
+		goto err;
+	}
+
+	return ret;
+
+err:
+	putname(name);
+	path_put(path);
+	kfree(path);
+	return ret;
+}
+
+SYSCALL_DEFINE2(duet_bmap, u16, flags, struct duet_uuid_arg __user *, arg)
+{
+	int ret = 0;
+	struct duet_uuid_arg *ua;
+	struct duet_task *task;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!duet_online())
+		return -ESRCH;
+
+	/* Do some basic sanity checking */
+	if (!arg)
+		return -EINVAL;
+
+	ua = memdup_user(arg, sizeof(*arg));
+	if (IS_ERR(ua))
+		return PTR_ERR(ua);
+
+	/* For now, we only support one struct size */
+	if (ua->size != sizeof(*ua)) {
+		pr_err("duet_bmap: invalid args struct size (%u)\n", ua->size);
+		ret = -EINVAL;
+		goto done;
+	}
+
+	task = duet_find_task(ua->uuid.tid);
+	if (!task)
+		return -ENOENT;
+
+	switch (flags) {
+	case DUET_BMAP_SET:
+		ret = bittree_set(&task->bittree, ua->uuid);
+		break;
+
+	case DUET_BMAP_RESET:
+		ret = bittree_reset(&task->bittree, ua->uuid);
+		break;
+
+	case DUET_BMAP_CHECK:
+		ret = bittree_check(&task->bittree, ua->uuid, task);
+		break;
+
+	default:
+		pr_err("duet_bmap: invalid flags\n");
+		ret = -EINVAL;
+		break;
+	}
+
+	/* decreg and wake up cleaner if needed */
+	if (atomic_dec_and_test(&task->refcount))
+		wake_up(&task->cleaner_queue);
+
+done:
+	kfree(ua);
+	return ret;
+}
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ