[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <150174653826.104003.17977611280376142324.stgit@hn>
Date: Thu, 03 Aug 2017 00:48:58 -0700
From: Steven Swanson <swanson@....ucsd.edu>
To: linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-nvdimm@...ts.01.org
Cc: Steven Swanson <steven.swanson@...il.com>, dan.j.williams@...el.com
Subject: [RFC 07/16] NOVA: File and directory operations
To access file data via read(), Nova maintains a radix tree in DRAM for each
inode (nova_inode_info_header.tree) that maps file offsets to write log
entries. For directories, the same tree maps a hash of filenames to their
corresponding dentry.
In both cases, the nova populates the tree when the file or directory is opened
by scanning its log.
Signed-off-by: Steven Swanson <swanson@...ucsd.edu>
---
fs/nova/dir.c | 760 +++++++++++++++++++++++++++++++++++++++++++
fs/nova/file.c | 943 +++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nova/namei.c | 919 ++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nova/symlink.c | 153 +++++++++
4 files changed, 2775 insertions(+)
create mode 100644 fs/nova/dir.c
create mode 100644 fs/nova/file.c
create mode 100644 fs/nova/namei.c
create mode 100644 fs/nova/symlink.c
diff --git a/fs/nova/dir.c b/fs/nova/dir.c
new file mode 100644
index 000000000000..47e89088a69b
--- /dev/null
+++ b/fs/nova/dir.c
@@ -0,0 +1,760 @@
+/*
+ * BRIEF DESCRIPTION
+ *
+ * File operations for directories.
+ *
+ * Copyright 2015-2016 Regents of the University of California,
+ * UCSD Non-Volatile Systems Lab, Andiry Xu <jix024@...ucsd.edu>
+ * Copyright 2012-2013 Intel Corporation
+ * Copyright 2009-2011 Marco Stornelli <marco.stornelli@...il.com>
+ * Copyright 2003 Sony Corporation
+ * Copyright 2003 Matsushita Electric Industrial Co., Ltd.
+ * 2003-2004 (c) MontaVista Software, Inc. , Steve Longerbeam
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "nova.h"
+#include "inode.h"
+
+#define DT2IF(dt) (((dt) << 12) & S_IFMT)
+#define IF2DT(sif) (((sif) & S_IFMT) >> 12)
+
+struct nova_dentry *nova_find_dentry(struct super_block *sb,
+ struct nova_inode *pi, struct inode *inode, const char *name,
+ unsigned long name_len)
+{
+ struct nova_inode_info *si = NOVA_I(inode);
+ struct nova_inode_info_header *sih = &si->header;
+ struct nova_dentry *direntry;
+ unsigned long hash;
+
+ hash = BKDRHash(name, name_len);
+ direntry = radix_tree_lookup(&sih->tree, hash);
+
+ return direntry;
+}
+
+int nova_insert_dir_radix_tree(struct super_block *sb,
+ struct nova_inode_info_header *sih, const char *name,
+ int namelen, struct nova_dentry *direntry)
+{
+ unsigned long hash;
+ int ret;
+
+ hash = BKDRHash(name, namelen);
+ nova_dbgv("%s: insert %s hash %lu\n", __func__, name, hash);
+
+ /* FIXME: hash collision ignored here */
+ ret = radix_tree_insert(&sih->tree, hash, direntry);
+ if (ret)
+ nova_dbg("%s ERROR %d: %s\n", __func__, ret, name);
+
+ return ret;
+}
+
+static int nova_check_dentry_match(struct super_block *sb,
+ struct nova_dentry *dentry, const char *name, int namelen)
+{
+ if (dentry->name_len != namelen)
+ return -EINVAL;
+
+ return strncmp(dentry->name, name, namelen);
+}
+
+int nova_remove_dir_radix_tree(struct super_block *sb,
+ struct nova_inode_info_header *sih, const char *name, int namelen,
+ int replay, struct nova_dentry **create_dentry)
+{
+ struct nova_dentry *entry;
+ struct nova_dentry *entryc, entry_copy;
+ unsigned long hash;
+
+ hash = BKDRHash(name, namelen);
+ entry = radix_tree_delete(&sih->tree, hash);
+
+ if (replay == 0) {
+ if (!entry) {
+ nova_dbg("%s ERROR: %s, length %d, hash %lu\n",
+ __func__, name, namelen, hash);
+ return -EINVAL;
+ }
+
+ if (metadata_csum == 0)
+ entryc = entry;
+ else {
+ entryc = &entry_copy;
+ if (!nova_verify_entry_csum(sb, entry, entryc))
+ return -EINVAL;
+ }
+
+ if (entryc->ino == 0 || entryc->invalid ||
+ nova_check_dentry_match(sb, entryc, name, namelen)) {
+ nova_dbg("%s dentry not match: %s, length %d, hash %lu\n",
+ __func__, name, namelen, hash);
+ /* for debug information, still allow access to nvmm */
+ nova_dbg("dentry: type %d, inode %llu, name %s, namelen %u, rec len %u\n",
+ entry->entry_type, le64_to_cpu(entry->ino),
+ entry->name, entry->name_len,
+ le16_to_cpu(entry->de_len));
+ return -EINVAL;
+ }
+
+ if (create_dentry)
+ *create_dentry = entry;
+ }
+
+ return 0;
+}
+
+void nova_delete_dir_tree(struct super_block *sb,
+ struct nova_inode_info_header *sih)
+{
+ struct nova_dentry *direntry;
+ struct nova_dentry *direntryc, entry_copy;
+ unsigned long pos = 0;
+ struct nova_dentry *entries[FREE_BATCH];
+ timing_t delete_time;
+ int nr_entries;
+ int i;
+ void *ret;
+
+ NOVA_START_TIMING(delete_dir_tree_t, delete_time);
+
+ direntryc = (metadata_csum == 0) ? direntry : &entry_copy;
+ do {
+ nr_entries = radix_tree_gang_lookup(&sih->tree,
+ (void **)entries, pos, FREE_BATCH);
+ for (i = 0; i < nr_entries; i++) {
+ direntry = entries[i];
+ BUG_ON(!direntry);
+
+ if (metadata_csum == 0)
+ direntryc = direntry;
+ else if (!nova_verify_entry_csum(sb, direntry,
+ direntryc))
+ return;
+
+ pos = BKDRHash(direntryc->name, direntryc->name_len);
+ ret = radix_tree_delete(&sih->tree, pos);
+ if (!ret || ret != direntry) {
+ nova_err(sb, "dentry: type %d, inode %llu, name %s, namelen %u, rec len %u\n",
+ direntry->entry_type,
+ le64_to_cpu(direntry->ino),
+ direntry->name, direntry->name_len,
+ le16_to_cpu(direntry->de_len));
+ if (!ret)
+ nova_dbg("ret is NULL\n");
+ }
+ }
+ pos++;
+ } while (nr_entries == FREE_BATCH);
+
+ NOVA_END_TIMING(delete_dir_tree_t, delete_time);
+}
+
+/* ========================= Entry operations ============================= */
+
+static unsigned int nova_init_dentry(struct super_block *sb,
+ struct nova_dentry *de_entry, u64 self_ino, u64 parent_ino,
+ u64 epoch_id)
+{
+ void *start = de_entry;
+ struct nova_inode_log_page *curr_page = start;
+ unsigned int length;
+ unsigned short de_len;
+
+ de_len = NOVA_DIR_LOG_REC_LEN(1);
+ memset(de_entry, 0, de_len);
+ de_entry->entry_type = DIR_LOG;
+ de_entry->epoch_id = epoch_id;
+ de_entry->trans_id = 0;
+ de_entry->ino = cpu_to_le64(self_ino);
+ de_entry->name_len = 1;
+ de_entry->de_len = cpu_to_le16(de_len);
+ de_entry->mtime = timespec_trunc(current_kernel_time(),
+ sb->s_time_gran).tv_sec;
+
+ de_entry->links_count = 1;
+ strncpy(de_entry->name, ".\0", 2);
+ nova_update_entry_csum(de_entry);
+
+ length = de_len;
+
+ de_entry = (struct nova_dentry *)((char *)de_entry + length);
+ de_len = NOVA_DIR_LOG_REC_LEN(2);
+ memset(de_entry, 0, de_len);
+ de_entry->entry_type = DIR_LOG;
+ de_entry->epoch_id = epoch_id;
+ de_entry->trans_id = 0;
+ de_entry->ino = cpu_to_le64(parent_ino);
+ de_entry->name_len = 2;
+ de_entry->de_len = cpu_to_le16(de_len);
+ de_entry->mtime = timespec_trunc(current_kernel_time(),
+ sb->s_time_gran).tv_sec;
+
+ de_entry->links_count = 2;
+ strncpy(de_entry->name, "..\0", 3);
+ nova_update_entry_csum(de_entry);
+ length += de_len;
+
+ nova_set_page_num_entries(sb, curr_page, 2, 1);
+
+ nova_flush_buffer(start, length, 0);
+ return length;
+}
+
+/* Append . and .. entries
+ *
+ * TODO: why is epoch_id a parameter when we pass in the sb?
+ */
+int nova_append_dir_init_entries(struct super_block *sb,
+ struct nova_inode *pi, u64 self_ino, u64 parent_ino, u64 epoch_id)
+{
+ struct nova_inode_info_header sih;
+ struct nova_inode *alter_pi;
+ u64 alter_pi_addr = 0;
+ int allocated;
+ int ret;
+ u64 new_block;
+ unsigned int length;
+ struct nova_dentry *de_entry;
+
+ sih.ino = self_ino;
+ sih.i_blk_type = NOVA_DEFAULT_BLOCK_TYPE;
+
+ allocated = nova_allocate_inode_log_pages(sb, &sih, 1, &new_block,
+ ANY_CPU, 0);
+ if (allocated != 1) {
+ nova_err(sb, "ERROR: no inode log page available\n");
+ return -ENOMEM;
+ }
+
+ nova_memunlock_inode(sb, pi);
+
+ pi->log_tail = pi->log_head = new_block;
+
+ de_entry = (struct nova_dentry *)nova_get_block(sb, new_block);
+
+ length = nova_init_dentry(sb, de_entry, self_ino, parent_ino, epoch_id);
+
+ nova_update_tail(pi, new_block + length);
+
+ nova_memlock_inode(sb, pi);
+
+ if (metadata_csum == 0)
+ return 0;
+
+ allocated = nova_allocate_inode_log_pages(sb, &sih, 1, &new_block,
+ ANY_CPU, 1);
+ if (allocated != 1) {
+ nova_err(sb, "ERROR: no inode log page available\n");
+ return -ENOMEM;
+ }
+ nova_memunlock_inode(sb, pi);
+ pi->alter_log_tail = pi->alter_log_head = new_block;
+
+ de_entry = (struct nova_dentry *)nova_get_block(sb, new_block);
+
+ length = nova_init_dentry(sb, de_entry, self_ino, parent_ino, epoch_id);
+
+ nova_update_alter_tail(pi, new_block + length);
+ nova_update_alter_pages(sb, pi, pi->log_head,
+ pi->alter_log_head);
+ nova_update_inode_checksum(pi);
+ nova_flush_buffer(pi, sizeof(struct nova_inode), 0);
+ nova_memlock_inode(sb, pi);
+
+ /* Get alternate inode address */
+ ret = nova_get_alter_inode_address(sb, self_ino, &alter_pi_addr);
+ if (ret)
+ return ret;
+
+ alter_pi = (struct nova_inode *)nova_get_block(sb, alter_pi_addr);
+ if (!alter_pi)
+ return -EINVAL;
+
+ nova_memunlock_inode(sb, alter_pi);
+ memcpy_to_pmem_nocache(alter_pi, pi, sizeof(struct nova_inode));
+ nova_memlock_inode(sb, alter_pi);
+
+ return 0;
+}
+
+/* adds a directory entry pointing to the inode. assumes the inode has
+ * already been logged for consistency
+ */
+int nova_add_dentry(struct dentry *dentry, u64 ino, int inc_link,
+ struct nova_inode_update *update, u64 epoch_id)
+{
+ struct inode *dir = dentry->d_parent->d_inode;
+ struct super_block *sb = dir->i_sb;
+ struct nova_inode_info *si = NOVA_I(dir);
+ struct nova_inode_info_header *sih = &si->header;
+ struct nova_inode *pidir;
+ const char *name = dentry->d_name.name;
+ int namelen = dentry->d_name.len;
+ struct nova_dentry *direntry;
+ unsigned short loglen;
+ int ret;
+ u64 curr_entry;
+ timing_t add_dentry_time;
+
+ nova_dbg_verbose("%s: dir %lu new inode %llu\n",
+ __func__, dir->i_ino, ino);
+ nova_dbg_verbose("%s: %s %d\n", __func__, name, namelen);
+ NOVA_START_TIMING(add_dentry_t, add_dentry_time);
+ if (namelen == 0)
+ return -EINVAL;
+
+ pidir = nova_get_inode(sb, dir);
+
+ /*
+ * XXX shouldn't update any times until successful
+ * completion of syscall, but too many callers depend
+ * on this.
+ */
+ dir->i_mtime = dir->i_ctime = current_time(dir);
+
+ loglen = NOVA_DIR_LOG_REC_LEN(namelen);
+ ret = nova_append_dentry(sb, pidir, dir, dentry,
+ ino, loglen, update,
+ inc_link, epoch_id);
+
+ if (ret) {
+ nova_dbg("%s: append dir entry failure\n", __func__);
+ return ret;
+ }
+
+ curr_entry = update->curr_entry;
+ direntry = (struct nova_dentry *)nova_get_block(sb, curr_entry);
+ sih->last_dentry = curr_entry;
+ ret = nova_insert_dir_radix_tree(sb, sih, name, namelen, direntry);
+
+ sih->trans_id++;
+ NOVA_END_TIMING(add_dentry_t, add_dentry_time);
+ return ret;
+}
+
+static int nova_can_inplace_update_dentry(struct super_block *sb,
+ struct nova_dentry *dentry, u64 epoch_id)
+{
+ struct nova_dentry *dentryc, entry_copy;
+
+ if (metadata_csum == 0)
+ dentryc = dentry;
+ else {
+ dentryc = &entry_copy;
+ if (!nova_verify_entry_csum(sb, dentry, dentryc))
+ return 0;
+ }
+
+ if (dentry && dentryc->epoch_id == epoch_id)
+ return 1;
+
+ return 0;
+}
+
+static int nova_inplace_update_dentry(struct super_block *sb,
+ struct inode *dir, struct nova_dentry *dentry, int link_change,
+ u64 epoch_id)
+{
+ struct nova_inode_info *si = NOVA_I(dir);
+ struct nova_inode_info_header *sih = &si->header;
+ struct nova_log_entry_info entry_info;
+
+ entry_info.type = DIR_LOG;
+ entry_info.link_change = link_change;
+ entry_info.epoch_id = epoch_id;
+ entry_info.trans_id = sih->trans_id;
+ entry_info.inplace = 1;
+
+ return nova_inplace_update_log_entry(sb, dir, dentry,
+ &entry_info);
+}
+
+/* removes a directory entry pointing to the inode. assumes the inode has
+ * already been logged for consistency
+ */
+int nova_remove_dentry(struct dentry *dentry, int dec_link,
+ struct nova_inode_update *update, u64 epoch_id)
+{
+ struct inode *dir = dentry->d_parent->d_inode;
+ struct super_block *sb = dir->i_sb;
+ struct nova_sb_info *sbi = NOVA_SB(sb);
+ struct nova_inode_info *si = NOVA_I(dir);
+ struct nova_inode_info_header *sih = &si->header;
+ struct nova_inode *pidir;
+ struct qstr *entry = &dentry->d_name;
+ struct nova_dentry *old_dentry = NULL;
+ unsigned short loglen;
+ int ret;
+ u64 curr_entry;
+ timing_t remove_dentry_time;
+
+ NOVA_START_TIMING(remove_dentry_t, remove_dentry_time);
+
+ update->create_dentry = NULL;
+ update->delete_dentry = NULL;
+
+ if (!dentry->d_name.len) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = nova_remove_dir_radix_tree(sb, sih, entry->name, entry->len, 0,
+ &old_dentry);
+
+ if (ret)
+ goto out;
+
+ pidir = nova_get_inode(sb, dir);
+
+ dir->i_mtime = dir->i_ctime = current_time(dir);
+
+ if (nova_can_inplace_update_dentry(sb, old_dentry, epoch_id)) {
+ nova_inplace_update_dentry(sb, dir, old_dentry,
+ dec_link, epoch_id);
+ curr_entry = nova_get_addr_off(sbi, old_dentry);
+
+ sih->last_dentry = curr_entry;
+ /* Leave create/delete_dentry to NULL
+ * Do not change tail/alter_tail if used as input
+ */
+ if (update->tail == 0) {
+ update->tail = sih->log_tail;
+ update->alter_tail = sih->alter_log_tail;
+ }
+ sih->trans_id++;
+ goto out;
+ }
+
+ loglen = NOVA_DIR_LOG_REC_LEN(entry->len);
+ ret = nova_append_dentry(sb, pidir, dir, dentry,
+ 0, loglen, update,
+ dec_link, epoch_id);
+
+ if (ret) {
+ nova_dbg("%s: append dir entry failure\n", __func__);
+ goto out;
+ }
+
+ update->create_dentry = old_dentry;
+ curr_entry = update->curr_entry;
+ update->delete_dentry = (struct nova_dentry *)nova_get_block(sb,
+ curr_entry);
+ sih->last_dentry = curr_entry;
+ sih->trans_id++;
+out:
+ NOVA_END_TIMING(remove_dentry_t, remove_dentry_time);
+ return ret;
+}
+
+/* Create dentry and delete dentry must be invalidated together */
+int nova_invalidate_dentries(struct super_block *sb,
+ struct nova_inode_update *update)
+{
+ struct nova_sb_info *sbi = NOVA_SB(sb);
+ struct nova_dentry *create_dentry;
+ struct nova_dentry *create_dentryc, entry_copy;
+ struct nova_dentry *delete_dentry;
+ u64 create_curr, delete_curr;
+ int ret;
+
+ create_dentry = update->create_dentry;
+ delete_dentry = update->delete_dentry;
+
+ if (!create_dentry)
+ return 0;
+
+ nova_reassign_logentry(sb, create_dentry, DIR_LOG);
+
+ if (metadata_csum == 0)
+ create_dentryc = create_dentry;
+ else {
+ create_dentryc = &entry_copy;
+ if (!nova_verify_entry_csum(sb, create_dentry, create_dentryc))
+ return 0;
+ }
+
+ if (!old_entry_freeable(sb, create_dentryc->epoch_id))
+ return 0;
+
+ create_curr = nova_get_addr_off(sbi, create_dentry);
+ delete_curr = nova_get_addr_off(sbi, delete_dentry);
+
+ nova_invalidate_logentry(sb, create_dentry, DIR_LOG, 0);
+
+ ret = nova_invalidate_logentry(sb, delete_dentry, DIR_LOG, 0);
+
+ return ret;
+}
+
+static int nova_readdir_slow(struct file *file, struct dir_context *ctx)
+{
+ struct inode *inode = file_inode(file);
+ struct super_block *sb = inode->i_sb;
+ struct nova_inode *pidir;
+ struct nova_inode_info *si = NOVA_I(inode);
+ struct nova_inode_info_header *sih = &si->header;
+ struct nova_inode *child_pi;
+ struct nova_dentry *entry;
+ struct nova_dentry *entryc, entry_copy;
+ struct nova_dentry *entries[FREE_BATCH];
+ int nr_entries;
+ u64 pi_addr;
+ unsigned long pos = 0;
+ ino_t ino;
+ int i;
+ int ret;
+ timing_t readdir_time;
+
+ NOVA_START_TIMING(readdir_t, readdir_time);
+ pidir = nova_get_inode(sb, inode);
+ nova_dbgv("%s: ino %llu, size %llu, pos %llu\n",
+ __func__, (u64)inode->i_ino,
+ pidir->i_size, ctx->pos);
+
+ if (!sih) {
+ nova_dbg("%s: inode %lu sih does not exist!\n",
+ __func__, inode->i_ino);
+ ctx->pos = READDIR_END;
+ return 0;
+ }
+
+ pos = ctx->pos;
+ if (pos == READDIR_END)
+ goto out;
+
+ entryc = (metadata_csum == 0) ? entry : &entry_copy;
+
+ do {
+ nr_entries = radix_tree_gang_lookup(&sih->tree,
+ (void **)entries, pos, FREE_BATCH);
+ for (i = 0; i < nr_entries; i++) {
+ entry = entries[i];
+
+ if (metadata_csum == 0)
+ entryc = entry;
+ else if (!nova_verify_entry_csum(sb, entry, entryc))
+ return -EIO;
+
+ pos = BKDRHash(entryc->name, entryc->name_len);
+ ino = __le64_to_cpu(entryc->ino);
+ if (ino == 0)
+ continue;
+
+ ret = nova_get_inode_address(sb, ino, 0, &pi_addr,
+ 0, 0);
+
+ if (ret) {
+ nova_dbg("%s: get child inode %lu address failed %d\n",
+ __func__, ino, ret);
+ ctx->pos = READDIR_END;
+ return ret;
+ }
+
+ child_pi = nova_get_block(sb, pi_addr);
+ nova_dbgv("ctx: ino %llu, name %s, name_len %u, de_len %u, csum 0x%x\n",
+ (u64)ino, entry->name, entry->name_len,
+ entry->de_len, entry->csum);
+ if (!dir_emit(ctx, entryc->name, entryc->name_len,
+ ino, IF2DT(le16_to_cpu(child_pi->i_mode)))) {
+ nova_dbgv("Here: pos %llu\n", ctx->pos);
+ return 0;
+ }
+ ctx->pos = pos + 1;
+ }
+ pos++;
+ } while (nr_entries == FREE_BATCH);
+
+out:
+ NOVA_END_TIMING(readdir_t, readdir_time);
+ return 0;
+}
+
+static u64 nova_find_next_dentry_addr(struct super_block *sb,
+ struct nova_inode_info_header *sih, u64 pos)
+{
+ struct nova_sb_info *sbi = NOVA_SB(sb);
+ struct nova_file_write_entry *entry = NULL;
+ struct nova_file_write_entry *entries[1];
+ int nr_entries;
+ u64 addr = 0;
+
+ nr_entries = radix_tree_gang_lookup(&sih->tree,
+ (void **)entries, pos, 1);
+ if (nr_entries == 1) {
+ entry = entries[0];
+ addr = nova_get_addr_off(sbi, entry);
+ }
+
+ return addr;
+}
+
+static int nova_readdir_fast(struct file *file, struct dir_context *ctx)
+{
+ struct inode *inode = file_inode(file);
+ struct super_block *sb = inode->i_sb;
+ struct nova_inode *pidir;
+ struct nova_inode_info *si = NOVA_I(inode);
+ struct nova_inode_info_header *sih = &si->header;
+ struct nova_inode *child_pi;
+ struct nova_inode *prev_child_pi = NULL;
+ struct nova_dentry *entry = NULL;
+ struct nova_dentry *entryc, entry_copy;
+ struct nova_dentry *prev_entry = NULL;
+ struct nova_dentry *prev_entryc, prev_entry_copy;
+ unsigned short de_len;
+ u64 pi_addr;
+ unsigned long pos = 0;
+ ino_t ino;
+ void *addr;
+ u64 curr_p;
+ u8 type;
+ int ret;
+ timing_t readdir_time;
+
+ NOVA_START_TIMING(readdir_t, readdir_time);
+ pidir = nova_get_inode(sb, inode);
+ nova_dbgv("%s: ino %llu, size %llu, pos 0x%llx\n",
+ __func__, (u64)inode->i_ino,
+ pidir->i_size, ctx->pos);
+
+ if (sih->log_head == 0) {
+ nova_err(sb, "Dir %lu log is NULL!\n", inode->i_ino);
+ BUG();
+ return -EINVAL;
+ }
+
+ pos = ctx->pos;
+
+ if (pos == 0)
+ curr_p = sih->log_head;
+ else if (pos == READDIR_END)
+ goto out;
+ else {
+ curr_p = nova_find_next_dentry_addr(sb, sih, pos);
+ if (curr_p == 0)
+ goto out;
+ }
+
+ entryc = (metadata_csum == 0) ? entry : &entry_copy;
+ prev_entryc = (metadata_csum == 0) ? prev_entry : &prev_entry_copy;
+
+ while (curr_p != sih->log_tail) {
+ if (goto_next_page(sb, curr_p))
+ curr_p = next_log_page(sb, curr_p);
+
+
+ if (curr_p == 0) {
+ nova_err(sb, "Dir %lu log is NULL!\n", inode->i_ino);
+ BUG();
+ return -EINVAL;
+ }
+
+ addr = (void *)nova_get_block(sb, curr_p);
+ type = nova_get_entry_type(addr);
+ switch (type) {
+ case SET_ATTR:
+ curr_p += sizeof(struct nova_setattr_logentry);
+ continue;
+ case LINK_CHANGE:
+ curr_p += sizeof(struct nova_link_change_entry);
+ continue;
+ case DIR_LOG:
+ break;
+ default:
+ nova_dbg("%s: unknown type %d, 0x%llx\n",
+ __func__, type, curr_p);
+ BUG();
+ return -EINVAL;
+ }
+
+ entry = (struct nova_dentry *)nova_get_block(sb, curr_p);
+ nova_dbgv("curr_p: 0x%llx, type %d, ino %llu, name %s, namelen %u, rec len %u\n",
+ curr_p, entry->entry_type, le64_to_cpu(entry->ino),
+ entry->name, entry->name_len,
+ le16_to_cpu(entry->de_len));
+
+ if (metadata_csum == 0)
+ entryc = entry;
+ else if (!nova_verify_entry_csum(sb, entry, entryc))
+ return -EIO;
+
+ de_len = le16_to_cpu(entryc->de_len);
+ if (entryc->ino > 0 && entryc->invalid == 0
+ && entryc->reassigned == 0) {
+ ino = __le64_to_cpu(entryc->ino);
+ pos = BKDRHash(entryc->name, entryc->name_len);
+
+ ret = nova_get_inode_address(sb, ino, 0,
+ &pi_addr, 0, 0);
+ if (ret) {
+ nova_dbg("%s: get child inode %lu address failed %d\n",
+ __func__, ino, ret);
+ ctx->pos = READDIR_END;
+ return ret;
+ }
+
+ child_pi = nova_get_block(sb, pi_addr);
+ nova_dbgv("ctx: ino %llu, name %s, name_len %u, de_len %u\n",
+ (u64)ino, entry->name, entry->name_len,
+ entry->de_len);
+ if (prev_entry && !dir_emit(ctx, prev_entryc->name,
+ prev_entryc->name_len, ino,
+ IF2DT(le16_to_cpu(prev_child_pi->i_mode)))) {
+ nova_dbgv("Here: pos %llu\n", ctx->pos);
+ return 0;
+ }
+ prev_entry = entry;
+
+ if (metadata_csum == 0)
+ prev_entryc = prev_entry;
+ else
+ memcpy(prev_entryc, entryc,
+ sizeof(struct nova_dentry));
+
+ prev_child_pi = child_pi;
+ }
+ ctx->pos = pos;
+ curr_p += de_len;
+ }
+
+ if (prev_entry && !dir_emit(ctx, prev_entryc->name,
+ prev_entryc->name_len, ino,
+ IF2DT(le16_to_cpu(prev_child_pi->i_mode))))
+ return 0;
+
+ ctx->pos = READDIR_END;
+out:
+ NOVA_END_TIMING(readdir_t, readdir_time);
+ nova_dbgv("%s return\n", __func__);
+ return 0;
+}
+
+static int nova_readdir(struct file *file, struct dir_context *ctx)
+{
+ struct inode *inode = file_inode(file);
+ struct super_block *sb = inode->i_sb;
+ struct nova_sb_info *sbi = NOVA_SB(sb);
+
+ if (sbi->mount_snapshot == 0)
+ return nova_readdir_fast(file, ctx);
+ else
+ return nova_readdir_slow(file, ctx);
+}
+
+const struct file_operations nova_dir_operations = {
+ .llseek = generic_file_llseek,
+ .read = generic_read_dir,
+ .iterate = nova_readdir,
+ .fsync = noop_fsync,
+ .unlocked_ioctl = nova_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = nova_compat_ioctl,
+#endif
+};
diff --git a/fs/nova/file.c b/fs/nova/file.c
new file mode 100644
index 000000000000..51b2114796df
--- /dev/null
+++ b/fs/nova/file.c
@@ -0,0 +1,943 @@
+/*
+ * BRIEF DESCRIPTION
+ *
+ * File operations for files.
+ *
+ * Copyright 2015-2016 Regents of the University of California,
+ * UCSD Non-Volatile Systems Lab, Andiry Xu <jix024@...ucsd.edu>
+ * Copyright 2012-2013 Intel Corporation
+ * Copyright 2009-2011 Marco Stornelli <marco.stornelli@...il.com>
+ * Copyright 2003 Sony Corporation
+ * Copyright 2003 Matsushita Electric Industrial Co., Ltd.
+ * 2003-2004 (c) MontaVista Software, Inc. , Steve Longerbeam
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/slab.h>
+#include <linux/uio.h>
+#include <linux/uaccess.h>
+#include <linux/falloc.h>
+#include <asm/mman.h>
+#include "nova.h"
+#include "inode.h"
+
+
+static inline int nova_can_set_blocksize_hint(struct inode *inode,
+ struct nova_inode *pi, loff_t new_size)
+{
+ struct nova_inode_info *si = NOVA_I(inode);
+ struct nova_inode_info_header *sih = &si->header;
+
+ /* Currently, we don't deallocate data blocks till the file is deleted.
+ * So no changing blocksize hints once allocation is done.
+ */
+ if (sih->i_size > 0)
+ return 0;
+ return 1;
+}
+
+int nova_set_blocksize_hint(struct super_block *sb, struct inode *inode,
+ struct nova_inode *pi, loff_t new_size)
+{
+ unsigned short block_type;
+
+ if (!nova_can_set_blocksize_hint(inode, pi, new_size))
+ return 0;
+
+ if (new_size >= 0x40000000) { /* 1G */
+ block_type = NOVA_BLOCK_TYPE_1G;
+ goto hint_set;
+ }
+
+ if (new_size >= 0x200000) { /* 2M */
+ block_type = NOVA_BLOCK_TYPE_2M;
+ goto hint_set;
+ }
+
+ /* defaulting to 4K */
+ block_type = NOVA_BLOCK_TYPE_4K;
+
+hint_set:
+ nova_dbg_verbose(
+ "Hint: new_size 0x%llx, i_size 0x%llx\n",
+ new_size, pi->i_size);
+ nova_dbg_verbose("Setting the hint to 0x%x\n", block_type);
+ nova_memunlock_inode(sb, pi);
+ pi->i_blk_type = block_type;
+ nova_memlock_inode(sb, pi);
+ return 0;
+}
+
+static loff_t nova_llseek(struct file *file, loff_t offset, int origin)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ int retval;
+
+ if (origin != SEEK_DATA && origin != SEEK_HOLE)
+ return generic_file_llseek(file, offset, origin);
+
+ inode_lock(inode);
+ switch (origin) {
+ case SEEK_DATA:
+ retval = nova_find_region(inode, &offset, 0);
+ if (retval) {
+ inode_unlock(inode);
+ return retval;
+ }
+ break;
+ case SEEK_HOLE:
+ retval = nova_find_region(inode, &offset, 1);
+ if (retval) {
+ inode_unlock(inode);
+ return retval;
+ }
+ break;
+ }
+
+ if ((offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) ||
+ offset > inode->i_sb->s_maxbytes) {
+ inode_unlock(inode);
+ return -ENXIO;
+ }
+
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+
+ inode_unlock(inode);
+ return offset;
+}
+
+/* This function is called by both msync() and fsync().
+ * TODO: Check if we can avoid calling nova_flush_buffer() for fsync. We use
+ * movnti to write data to files, so we may want to avoid doing unnecessary
+ * nova_flush_buffer() on fsync()
+ */
+static int nova_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
+ unsigned long start_pgoff, end_pgoff;
+ int ret = 0;
+ timing_t fsync_time;
+
+ NOVA_START_TIMING(fsync_t, fsync_time);
+
+ if (datasync)
+ NOVA_STATS_ADD(fdatasync, 1);
+
+ /* No need to flush if the file is not mmaped */
+ if (!mapping_mapped(mapping))
+ goto persist;
+
+ start_pgoff = start >> PAGE_SHIFT;
+ end_pgoff = (end + 1) >> PAGE_SHIFT;
+ nova_dbgv("%s: msync pgoff range %lu to %lu\n",
+ __func__, start_pgoff, end_pgoff);
+
+ /*
+ * Set csum and parity.
+ * We do not protect data integrity during mmap, but we have to
+ * update csum here since msync clears dirty bit.
+ */
+ nova_reset_mapping_csum_parity(sb, inode, mapping,
+ start_pgoff, end_pgoff);
+
+ ret = generic_file_fsync(file, start, end, datasync);
+
+persist:
+ PERSISTENT_BARRIER();
+ NOVA_END_TIMING(fsync_t, fsync_time);
+
+ return ret;
+}
+
+/* This callback is called when a file is closed */
+static int nova_flush(struct file *file, fl_owner_t id)
+{
+ PERSISTENT_BARRIER();
+ return 0;
+}
+
+static int nova_open(struct inode *inode, struct file *filp)
+{
+ return generic_file_open(inode, filp);
+}
+
+static long nova_fallocate(struct file *file, int mode, loff_t offset,
+ loff_t len)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
+ struct nova_inode_info *si = NOVA_I(inode);
+ struct nova_inode_info_header *sih = &si->header;
+ struct nova_inode *pi;
+ struct nova_file_write_entry *entry;
+ struct nova_file_write_entry *entryc, entry_copy;
+ struct nova_file_write_entry entry_data;
+ struct nova_inode_update update;
+ unsigned long start_blk, num_blocks, ent_blks = 0;
+ unsigned long total_blocks = 0;
+ unsigned long blocknr = 0;
+ unsigned long blockoff;
+ unsigned int data_bits;
+ loff_t new_size;
+ long ret = 0;
+ int inplace = 0;
+ int blocksize_mask;
+ int allocated = 0;
+ bool update_log = false;
+ timing_t fallocate_time;
+ u64 begin_tail = 0;
+ u64 epoch_id;
+ u32 time;
+
+ /*
+ * Fallocate does not make much sence for CoW,
+ * but we still support it for DAX-mmap purpose.
+ */
+
+ /* We only support the FALLOC_FL_KEEP_SIZE mode */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+
+ if (S_ISDIR(inode->i_mode))
+ return -ENODEV;
+
+ new_size = len + offset;
+ if (!(mode & FALLOC_FL_KEEP_SIZE) && new_size > inode->i_size) {
+ ret = inode_newsize_ok(inode, new_size);
+ if (ret)
+ return ret;
+ } else {
+ new_size = inode->i_size;
+ }
+
+ nova_dbgv("%s: inode %lu, offset %lld, count %lld, mode 0x%x\n",
+ __func__, inode->i_ino, offset, len, mode);
+
+ NOVA_START_TIMING(fallocate_t, fallocate_time);
+ inode_lock(inode);
+
+ pi = nova_get_inode(sb, inode);
+ if (!pi) {
+ ret = -EACCES;
+ goto out;
+ }
+
+ inode->i_mtime = inode->i_ctime = current_time(inode);
+ time = current_time(inode).tv_sec;
+
+ blocksize_mask = sb->s_blocksize - 1;
+ start_blk = offset >> sb->s_blocksize_bits;
+ blockoff = offset & blocksize_mask;
+ num_blocks = (blockoff + len + blocksize_mask) >> sb->s_blocksize_bits;
+
+ epoch_id = nova_get_epoch_id(sb);
+ update.tail = sih->log_tail;
+ update.alter_tail = sih->alter_log_tail;
+ while (num_blocks > 0) {
+ ent_blks = nova_check_existing_entry(sb, inode, num_blocks,
+ start_blk, &entry, &entry_copy,
+ 1, epoch_id, &inplace, 1);
+
+ entryc = (metadata_csum == 0) ? entry : &entry_copy;
+
+ if (entry && inplace) {
+ if (entryc->size < new_size) {
+ /* Update existing entry */
+ nova_memunlock_range(sb, entry, CACHELINE_SIZE);
+ entry->size = new_size;
+ nova_update_entry_csum(entry);
+ nova_update_alter_entry(sb, entry);
+ nova_memlock_range(sb, entry, CACHELINE_SIZE);
+ }
+ allocated = ent_blks;
+ goto next;
+ }
+
+ /* Allocate zeroed blocks to fill hole */
+ allocated = nova_new_data_blocks(sb, sih, &blocknr, start_blk,
+ ent_blks, ALLOC_INIT_ZERO, ANY_CPU,
+ ALLOC_FROM_HEAD);
+ nova_dbgv("%s: alloc %d blocks @ %lu\n", __func__,
+ allocated, blocknr);
+
+ if (allocated <= 0) {
+ nova_dbg("%s alloc %lu blocks failed!, %d\n",
+ __func__, ent_blks, allocated);
+ ret = allocated;
+ goto out;
+ }
+
+ /* Handle hole fill write */
+ nova_init_file_write_entry(sb, sih, &entry_data, epoch_id,
+ start_blk, allocated, blocknr,
+ time, new_size);
+
+ ret = nova_append_file_write_entry(sb, pi, inode,
+ &entry_data, &update);
+ if (ret) {
+ nova_dbg("%s: append inode entry failed\n", __func__);
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ entry = nova_get_block(sb, update.curr_entry);
+ nova_reset_csum_parity_range(sb, sih, entry, start_blk,
+ start_blk + allocated, 1, 0);
+
+ update_log = true;
+ if (begin_tail == 0)
+ begin_tail = update.curr_entry;
+
+ total_blocks += allocated;
+next:
+ num_blocks -= allocated;
+ start_blk += allocated;
+ }
+
+ data_bits = blk_type_to_shift[sih->i_blk_type];
+ sih->i_blocks += (total_blocks << (data_bits - sb->s_blocksize_bits));
+
+ inode->i_blocks = sih->i_blocks;
+
+ if (update_log) {
+ sih->log_tail = update.tail;
+ sih->alter_log_tail = update.alter_tail;
+
+ nova_memunlock_inode(sb, pi);
+ nova_update_tail(pi, update.tail);
+ if (metadata_csum)
+ nova_update_alter_tail(pi, update.alter_tail);
+ nova_memlock_inode(sb, pi);
+
+ /* Update file tree */
+ ret = nova_reassign_file_tree(sb, sih, begin_tail);
+ if (ret)
+ goto out;
+
+ }
+
+ nova_dbgv("blocks: %lu, %lu\n", inode->i_blocks, sih->i_blocks);
+
+ if (ret || (mode & FALLOC_FL_KEEP_SIZE)) {
+ nova_memunlock_inode(sb, pi);
+ pi->i_flags |= cpu_to_le32(NOVA_EOFBLOCKS_FL);
+ nova_memlock_inode(sb, pi);
+ }
+
+ if (!(mode & FALLOC_FL_KEEP_SIZE) && new_size > inode->i_size) {
+ inode->i_size = new_size;
+ sih->i_size = new_size;
+ }
+
+ nova_memunlock_inode(sb, pi);
+ nova_update_inode_checksum(pi);
+ nova_update_alter_inode(sb, inode, pi);
+ nova_memlock_inode(sb, pi);
+
+ sih->trans_id++;
+out:
+ if (ret < 0)
+ nova_cleanup_incomplete_write(sb, sih, blocknr, allocated,
+ begin_tail, update.tail);
+
+ inode_unlock(inode);
+ NOVA_END_TIMING(fallocate_t, fallocate_time);
+ return ret;
+}
+
+static int nova_iomap_begin_nolock(struct inode *inode, loff_t offset,
+ loff_t length, unsigned int flags, struct iomap *iomap)
+{
+ return nova_iomap_begin(inode, offset, length, flags, iomap, false);
+}
+
+static struct iomap_ops nova_iomap_ops_nolock = {
+ .iomap_begin = nova_iomap_begin_nolock,
+ .iomap_end = nova_iomap_end,
+};
+
+static ssize_t nova_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+ ssize_t ret;
+ timing_t read_iter_time;
+
+ if (!iov_iter_count(to))
+ return 0;
+
+ NOVA_START_TIMING(read_iter_t, read_iter_time);
+ inode_lock_shared(inode);
+ ret = dax_iomap_rw(iocb, to, &nova_iomap_ops_nolock);
+ inode_unlock_shared(inode);
+
+ file_accessed(iocb->ki_filp);
+ NOVA_END_TIMING(read_iter_t, read_iter_time);
+ return ret;
+}
+
+static int nova_update_iter_csum_parity(struct super_block *sb,
+ struct inode *inode, loff_t offset, size_t count)
+{
+ struct nova_inode_info *si = NOVA_I(inode);
+ struct nova_inode_info_header *sih = &si->header;
+ unsigned long start_pgoff, end_pgoff;
+ loff_t end;
+
+ if (data_csum == 0 && data_parity == 0)
+ return 0;
+
+ end = offset + count;
+
+ start_pgoff = offset >> sb->s_blocksize_bits;
+ end_pgoff = end >> sb->s_blocksize_bits;
+ if (end & (nova_inode_blk_size(sih) - 1))
+ end_pgoff++;
+
+ nova_reset_csum_parity_range(sb, sih, NULL, start_pgoff,
+ end_pgoff, 0, 0);
+
+ return 0;
+}
+
+static ssize_t nova_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ struct super_block *sb = inode->i_sb;
+ loff_t offset;
+ size_t count;
+ ssize_t ret;
+ timing_t write_iter_time;
+
+ NOVA_START_TIMING(write_iter_t, write_iter_time);
+ inode_lock(inode);
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out_unlock;
+
+ ret = file_remove_privs(file);
+ if (ret)
+ goto out_unlock;
+
+ ret = file_update_time(file);
+ if (ret)
+ goto out_unlock;
+
+ count = iov_iter_count(from);
+ offset = iocb->ki_pos;
+
+ ret = dax_iomap_rw(iocb, from, &nova_iomap_ops_nolock);
+ if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
+ i_size_write(inode, iocb->ki_pos);
+ mark_inode_dirty(inode);
+ }
+
+ nova_update_iter_csum_parity(sb, inode, offset, count);
+
+out_unlock:
+ inode_unlock(inode);
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+ NOVA_END_TIMING(write_iter_t, write_iter_time);
+ return ret;
+}
+
+static ssize_t
+do_dax_mapping_read(struct file *filp, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct inode *inode = filp->f_mapping->host;
+ struct super_block *sb = inode->i_sb;
+ struct nova_inode_info *si = NOVA_I(inode);
+ struct nova_inode_info_header *sih = &si->header;
+ struct nova_file_write_entry *entry;
+ struct nova_file_write_entry *entryc, entry_copy;
+ pgoff_t index, end_index;
+ unsigned long offset;
+ loff_t isize, pos;
+ size_t copied = 0, error = 0;
+ timing_t memcpy_time;
+
+ pos = *ppos;
+ index = pos >> PAGE_SHIFT;
+ offset = pos & ~PAGE_MASK;
+
+ if (!access_ok(VERIFY_WRITE, buf, len)) {
+ error = -EFAULT;
+ goto out;
+ }
+
+ isize = i_size_read(inode);
+ if (!isize)
+ goto out;
+
+ nova_dbgv("%s: inode %lu, offset %lld, count %lu, size %lld\n",
+ __func__, inode->i_ino, pos, len, isize);
+
+ if (len > isize - pos)
+ len = isize - pos;
+
+ if (len <= 0)
+ goto out;
+
+ entryc = (metadata_csum == 0) ? entry : &entry_copy;
+
+ end_index = (isize - 1) >> PAGE_SHIFT;
+ do {
+ unsigned long nr, left;
+ unsigned long nvmm;
+ void *dax_mem = NULL;
+ int zero = 0;
+
+ /* nr is the maximum number of bytes to copy from this page */
+ if (index >= end_index) {
+ if (index > end_index)
+ goto out;
+ nr = ((isize - 1) & ~PAGE_MASK) + 1;
+ if (nr <= offset)
+ goto out;
+ }
+
+ entry = nova_get_write_entry(sb, sih, index);
+ if (unlikely(entry == NULL)) {
+ nova_dbgv("Required extent not found: pgoff %lu, inode size %lld\n",
+ index, isize);
+ nr = PAGE_SIZE;
+ zero = 1;
+ goto memcpy;
+ }
+
+ if (metadata_csum == 0)
+ entryc = entry;
+ else if (!nova_verify_entry_csum(sb, entry, entryc))
+ return -EIO;
+
+ /* Find contiguous blocks */
+ if (index < entryc->pgoff ||
+ index - entryc->pgoff >= entryc->num_pages) {
+ nova_err(sb, "%s ERROR: %lu, entry pgoff %llu, num %u, blocknr %llu\n",
+ __func__, index, entry->pgoff,
+ entry->num_pages, entry->block >> PAGE_SHIFT);
+ return -EINVAL;
+ }
+ if (entryc->reassigned == 0) {
+ nr = (entryc->num_pages - (index - entryc->pgoff))
+ * PAGE_SIZE;
+ } else {
+ nr = PAGE_SIZE;
+ }
+
+ nvmm = get_nvmm(sb, sih, entryc, index);
+ dax_mem = nova_get_block(sb, (nvmm << PAGE_SHIFT));
+
+memcpy:
+ nr = nr - offset;
+ if (nr > len - copied)
+ nr = len - copied;
+
+ if ((!zero) && (data_csum > 0)) {
+ if (nova_find_pgoff_in_vma(inode, index))
+ goto skip_verify;
+
+ if (!nova_verify_data_csum(sb, sih, nvmm, offset, nr)) {
+ nova_err(sb, "%s: nova data checksum and recovery fail! inode %lu, offset %lu, entry pgoff %lu, %u pages, pgoff %lu\n",
+ __func__, inode->i_ino, offset,
+ entry->pgoff, entry->num_pages, index);
+ error = -EIO;
+ goto out;
+ }
+ }
+skip_verify:
+ NOVA_START_TIMING(memcpy_r_nvmm_t, memcpy_time);
+
+ if (!zero)
+ left = __copy_to_user(buf + copied,
+ dax_mem + offset, nr);
+ else
+ left = __clear_user(buf + copied, nr);
+
+ NOVA_END_TIMING(memcpy_r_nvmm_t, memcpy_time);
+
+ if (left) {
+ nova_dbg("%s ERROR!: bytes %lu, left %lu\n",
+ __func__, nr, left);
+ error = -EFAULT;
+ goto out;
+ }
+
+ copied += (nr - left);
+ offset += (nr - left);
+ index += offset >> PAGE_SHIFT;
+ offset &= ~PAGE_MASK;
+ } while (copied < len);
+
+out:
+ *ppos = pos + copied;
+ if (filp)
+ file_accessed(filp);
+
+ NOVA_STATS_ADD(read_bytes, copied);
+
+ nova_dbgv("%s returned %zu\n", __func__, copied);
+ return copied ? copied : error;
+}
+
+/*
+ * Wrappers. We need to use the rcu read lock to avoid
+ * concurrent truncate operation. No problem for write because we held
+ * lock.
+ */
+static ssize_t nova_dax_file_read(struct file *filp, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct inode *inode = filp->f_mapping->host;
+ ssize_t res;
+ timing_t dax_read_time;
+
+ NOVA_START_TIMING(dax_read_t, dax_read_time);
+ inode_lock_shared(inode);
+ res = do_dax_mapping_read(filp, buf, len, ppos);
+ inode_unlock_shared(inode);
+ NOVA_END_TIMING(dax_read_t, dax_read_time);
+ return res;
+}
+
+static ssize_t nova_cow_file_write(struct file *filp,
+ const char __user *buf, size_t len, loff_t *ppos)
+{
+ struct address_space *mapping = filp->f_mapping;
+ struct inode *inode = mapping->host;
+ struct nova_inode_info *si = NOVA_I(inode);
+ struct nova_inode_info_header *sih = &si->header;
+ struct super_block *sb = inode->i_sb;
+ struct nova_inode *pi, inode_copy;
+ struct nova_file_write_entry entry_data;
+ struct nova_inode_update update;
+ ssize_t written = 0;
+ loff_t pos;
+ size_t count, offset, copied;
+ unsigned long start_blk, num_blocks;
+ unsigned long total_blocks;
+ unsigned long blocknr = 0;
+ unsigned int data_bits;
+ int allocated = 0;
+ void *kmem;
+ u64 file_size;
+ size_t bytes;
+ long status = 0;
+ timing_t cow_write_time, memcpy_time;
+ unsigned long step = 0;
+ ssize_t ret;
+ u64 begin_tail = 0;
+ int try_inplace = 0;
+ u64 epoch_id;
+ u32 time;
+
+
+ if (len == 0)
+ return 0;
+
+ NOVA_START_TIMING(cow_write_t, cow_write_time);
+
+ sb_start_write(inode->i_sb);
+ inode_lock(inode);
+
+ if (!access_ok(VERIFY_READ, buf, len)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ pos = *ppos;
+
+ if (filp->f_flags & O_APPEND)
+ pos = i_size_read(inode);
+
+ count = len;
+
+ pi = nova_get_block(sb, sih->pi_addr);
+
+ /* nova_inode tail pointer will be updated and we make sure all other
+ * inode fields are good before checksumming the whole structure
+ */
+ if (nova_check_inode_integrity(sb, sih->ino, sih->pi_addr,
+ sih->alter_pi_addr, &inode_copy, 0) < 0) {
+ ret = -EIO;
+ goto out;
+ }
+
+ offset = pos & (sb->s_blocksize - 1);
+ num_blocks = ((count + offset - 1) >> sb->s_blocksize_bits) + 1;
+ total_blocks = num_blocks;
+ start_blk = pos >> sb->s_blocksize_bits;
+
+ if (nova_check_overlap_vmas(sb, sih, start_blk, num_blocks)) {
+ nova_dbgv("COW write overlaps with vma: inode %lu, pgoff %lu, %lu blocks\n",
+ inode->i_ino, start_blk, num_blocks);
+ NOVA_STATS_ADD(cow_overlap_mmap, 1);
+ try_inplace = 1;
+ ret = -EACCES;
+ goto out;
+ }
+
+ /* offset in the actual block size block */
+
+ ret = file_remove_privs(filp);
+ if (ret)
+ goto out;
+
+ inode->i_ctime = inode->i_mtime = current_time(inode);
+ time = current_time(inode).tv_sec;
+
+ nova_dbgv("%s: inode %lu, offset %lld, count %lu\n",
+ __func__, inode->i_ino, pos, count);
+
+ epoch_id = nova_get_epoch_id(sb);
+ update.tail = sih->log_tail;
+ update.alter_tail = sih->alter_log_tail;
+ while (num_blocks > 0) {
+ offset = pos & (nova_inode_blk_size(sih) - 1);
+ start_blk = pos >> sb->s_blocksize_bits;
+
+ /* don't zero-out the allocated blocks */
+ allocated = nova_new_data_blocks(sb, sih, &blocknr, start_blk,
+ num_blocks, ALLOC_NO_INIT, ANY_CPU,
+ ALLOC_FROM_HEAD);
+
+ nova_dbg_verbose("%s: alloc %d blocks @ %lu\n", __func__,
+ allocated, blocknr);
+
+ if (allocated <= 0) {
+ nova_dbg("%s alloc blocks failed %d\n", __func__,
+ allocated);
+ ret = allocated;
+ goto out;
+ }
+
+ step++;
+ bytes = sb->s_blocksize * allocated - offset;
+ if (bytes > count)
+ bytes = count;
+
+ kmem = nova_get_block(inode->i_sb,
+ nova_get_block_off(sb, blocknr, sih->i_blk_type));
+
+ if (offset || ((offset + bytes) & (PAGE_SIZE - 1)) != 0) {
+ ret = nova_handle_head_tail_blocks(sb, inode, pos,
+ bytes, kmem);
+ if (ret)
+ goto out;
+ }
+ /* Now copy from user buf */
+ // nova_dbg("Write: %p\n", kmem);
+ NOVA_START_TIMING(memcpy_w_nvmm_t, memcpy_time);
+ nova_memunlock_range(sb, kmem + offset, bytes);
+ copied = bytes - memcpy_to_pmem_nocache(kmem + offset,
+ buf, bytes);
+ nova_memlock_range(sb, kmem + offset, bytes);
+ NOVA_END_TIMING(memcpy_w_nvmm_t, memcpy_time);
+
+ if (data_csum > 0 || data_parity > 0) {
+ ret = nova_protect_file_data(sb, inode, pos, bytes,
+ buf, blocknr, false);
+ if (ret)
+ goto out;
+ }
+
+ if (pos + copied > inode->i_size)
+ file_size = cpu_to_le64(pos + copied);
+ else
+ file_size = cpu_to_le64(inode->i_size);
+
+ nova_init_file_write_entry(sb, sih, &entry_data, epoch_id,
+ start_blk, allocated, blocknr, time,
+ file_size);
+
+ ret = nova_append_file_write_entry(sb, pi, inode,
+ &entry_data, &update);
+ if (ret) {
+ nova_dbg("%s: append inode entry failed\n", __func__);
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ nova_dbgv("Write: %p, %lu\n", kmem, copied);
+ if (copied > 0) {
+ status = copied;
+ written += copied;
+ pos += copied;
+ buf += copied;
+ count -= copied;
+ num_blocks -= allocated;
+ }
+ if (unlikely(copied != bytes)) {
+ nova_dbg("%s ERROR!: %p, bytes %lu, copied %lu\n",
+ __func__, kmem, bytes, copied);
+ if (status >= 0)
+ status = -EFAULT;
+ }
+ if (status < 0)
+ break;
+
+ if (begin_tail == 0)
+ begin_tail = update.curr_entry;
+ }
+
+ data_bits = blk_type_to_shift[sih->i_blk_type];
+ sih->i_blocks += (total_blocks << (data_bits - sb->s_blocksize_bits));
+
+ nova_memunlock_inode(sb, pi);
+ nova_update_inode(sb, inode, pi, &update, 1);
+ nova_memlock_inode(sb, pi);
+
+ /* Free the overlap blocks after the write is committed */
+ ret = nova_reassign_file_tree(sb, sih, begin_tail);
+ if (ret)
+ goto out;
+
+ inode->i_blocks = sih->i_blocks;
+
+ ret = written;
+ NOVA_STATS_ADD(cow_write_breaks, step);
+ nova_dbgv("blocks: %lu, %lu\n", inode->i_blocks, sih->i_blocks);
+
+ *ppos = pos;
+ if (pos > inode->i_size) {
+ i_size_write(inode, pos);
+ sih->i_size = pos;
+ }
+
+ sih->trans_id++;
+out:
+ if (ret < 0)
+ nova_cleanup_incomplete_write(sb, sih, blocknr, allocated,
+ begin_tail, update.tail);
+
+ inode_unlock(inode);
+ sb_end_write(inode->i_sb);
+ NOVA_END_TIMING(cow_write_t, cow_write_time);
+ NOVA_STATS_ADD(cow_write_bytes, written);
+
+ if (try_inplace)
+ return nova_inplace_file_write(filp, buf, len, ppos);
+
+ return ret;
+}
+
+static ssize_t nova_dax_file_write(struct file *filp, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ if (inplace_data_updates)
+ return nova_inplace_file_write(filp, buf, len, ppos);
+ else
+ return nova_cow_file_write(filp, buf, len, ppos);
+}
+
+static int nova_dax_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct inode *inode = file->f_mapping->host;
+
+ file_accessed(file);
+
+ vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
+
+ vma->vm_ops = &nova_dax_vm_ops;
+
+ nova_insert_write_vma(vma);
+
+ nova_dbg_mmap4k("[%s:%d] inode %lu, MMAP 4KPAGE vm_start(0x%lx), vm_end(0x%lx), vm pgoff %lu, %lu blocks, vm_flags(0x%lx), vm_page_prot(0x%lx)\n",
+ __func__, __LINE__,
+ inode->i_ino, vma->vm_start, vma->vm_end,
+ vma->vm_pgoff,
+ (vma->vm_end - vma->vm_start) >> PAGE_SHIFT,
+ vma->vm_flags,
+ pgprot_val(vma->vm_page_prot));
+
+ return 0;
+}
+
+const struct file_operations nova_dax_file_operations = {
+ .llseek = nova_llseek,
+ .read = nova_dax_file_read,
+ .write = nova_dax_file_write,
+ .read_iter = nova_dax_read_iter,
+ .write_iter = nova_dax_write_iter,
+ .mmap = nova_dax_file_mmap,
+ .open = nova_open,
+ .fsync = nova_fsync,
+ .flush = nova_flush,
+ .unlocked_ioctl = nova_ioctl,
+ .fallocate = nova_fallocate,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = nova_compat_ioctl,
+#endif
+};
+
+
+static ssize_t nova_wrap_rw_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *filp = iocb->ki_filp;
+ ssize_t ret = -EIO;
+ ssize_t written = 0;
+ unsigned long seg;
+ unsigned long nr_segs = iter->nr_segs;
+ const struct iovec *iv = iter->iov;
+
+ nova_dbgv("%s %s: %lu segs\n", __func__,
+ iov_iter_rw(iter) == READ ? "read" : "write",
+ nr_segs);
+ iv = iter->iov;
+ for (seg = 0; seg < nr_segs; seg++) {
+ if (iov_iter_rw(iter) == READ) {
+ ret = nova_dax_file_read(filp, iv->iov_base,
+ iv->iov_len, &iocb->ki_pos);
+ } else if (iov_iter_rw(iter) == WRITE) {
+ ret = nova_dax_file_write(filp, iv->iov_base,
+ iv->iov_len, &iocb->ki_pos);
+ }
+ if (ret < 0)
+ goto err;
+
+ if (iter->count > iv->iov_len)
+ iter->count -= iv->iov_len;
+ else
+ iter->count = 0;
+
+ written += ret;
+ iter->nr_segs--;
+ iv++;
+ }
+ ret = written;
+err:
+ return ret;
+}
+
+
+/* Wrap read/write_iter for DP, CoW and WP */
+const struct file_operations nova_wrap_file_operations = {
+ .llseek = nova_llseek,
+ .read = nova_dax_file_read,
+ .write = nova_dax_file_write,
+ .read_iter = nova_wrap_rw_iter,
+ .write_iter = nova_wrap_rw_iter,
+ .mmap = nova_dax_file_mmap,
+ .open = nova_open,
+ .fsync = nova_fsync,
+ .flush = nova_flush,
+ .unlocked_ioctl = nova_ioctl,
+ .fallocate = nova_fallocate,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = nova_compat_ioctl,
+#endif
+};
+
+const struct inode_operations nova_file_inode_operations = {
+ .setattr = nova_notify_change,
+ .getattr = nova_getattr,
+ .get_acl = NULL,
+};
diff --git a/fs/nova/namei.c b/fs/nova/namei.c
new file mode 100644
index 000000000000..59776338008d
--- /dev/null
+++ b/fs/nova/namei.c
@@ -0,0 +1,919 @@
+/*
+ * BRIEF DESCRIPTION
+ *
+ * Inode operations for directories.
+ *
+ * Copyright 2015-2016 Regents of the University of California,
+ * UCSD Non-Volatile Systems Lab, Andiry Xu <jix024@...ucsd.edu>
+ * Copyright 2012-2013 Intel Corporation
+ * Copyright 2009-2011 Marco Stornelli <marco.stornelli@...il.com>
+ * Copyright 2003 Sony Corporation
+ * Copyright 2003 Matsushita Electric Industrial Co., Ltd.
+ * 2003-2004 (c) MontaVista Software, Inc. , Steve Longerbeam
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include "nova.h"
+#include "journal.h"
+#include "inode.h"
+
+static ino_t nova_inode_by_name(struct inode *dir, struct qstr *entry,
+ struct nova_dentry **res_entry)
+{
+ struct super_block *sb = dir->i_sb;
+ struct nova_dentry *direntry;
+ struct nova_dentry *direntryc, entry_copy;
+
+ direntry = nova_find_dentry(sb, NULL, dir,
+ entry->name, entry->len);
+ if (direntry == NULL)
+ return 0;
+
+ if (metadata_csum == 0)
+ direntryc = direntry;
+ else {
+ direntryc = &entry_copy;
+ if (!nova_verify_entry_csum(sb, direntry, direntryc))
+ return 0;
+ }
+
+ *res_entry = direntry;
+ return direntryc->ino;
+}
+
+static struct dentry *nova_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ struct inode *inode = NULL;
+ struct nova_dentry *de;
+ ino_t ino;
+ timing_t lookup_time;
+
+ NOVA_START_TIMING(lookup_t, lookup_time);
+ if (dentry->d_name.len > NOVA_NAME_LEN) {
+ nova_dbg("%s: namelen %u exceeds limit\n",
+ __func__, dentry->d_name.len);
+ return ERR_PTR(-ENAMETOOLONG);
+ }
+
+ nova_dbg_verbose("%s: %s\n", __func__, dentry->d_name.name);
+ ino = nova_inode_by_name(dir, &dentry->d_name, &de);
+ nova_dbg_verbose("%s: ino %lu\n", __func__, ino);
+ if (ino) {
+ inode = nova_iget(dir->i_sb, ino);
+ if (inode == ERR_PTR(-ESTALE) || inode == ERR_PTR(-ENOMEM)
+ || inode == ERR_PTR(-EACCES)) {
+ nova_err(dir->i_sb,
+ "%s: get inode failed: %lu\n",
+ __func__, (unsigned long)ino);
+ return ERR_PTR(-EIO);
+ }
+ }
+
+ NOVA_END_TIMING(lookup_t, lookup_time);
+ return d_splice_alias(inode, dentry);
+}
+
+static void nova_lite_transaction_for_new_inode(struct super_block *sb,
+ struct nova_inode *pi, struct nova_inode *pidir, struct inode *inode,
+ struct inode *dir, struct nova_inode_update *update)
+{
+ struct nova_sb_info *sbi = NOVA_SB(sb);
+ int cpu;
+ u64 journal_tail;
+ timing_t trans_time;
+
+ NOVA_START_TIMING(create_trans_t, trans_time);
+
+ cpu = smp_processor_id();
+ spin_lock(&sbi->journal_locks[cpu]);
+ nova_memunlock_journal(sb);
+
+ // If you change what's required to create a new inode, you need to
+ // update this functions so the changes will be roll back on failure.
+ journal_tail = nova_create_inode_transaction(sb, inode, dir, cpu, 1, 0);
+
+ nova_update_inode(sb, dir, pidir, update, 0);
+
+ pi->valid = 1;
+ nova_update_inode_checksum(pi);
+ PERSISTENT_BARRIER();
+
+ nova_commit_lite_transaction(sb, journal_tail, cpu);
+ nova_memlock_journal(sb);
+ spin_unlock(&sbi->journal_locks[cpu]);
+
+ if (metadata_csum) {
+ nova_memunlock_inode(sb, pi);
+ nova_update_alter_inode(sb, inode, pi);
+ nova_update_alter_inode(sb, dir, pidir);
+ nova_memlock_inode(sb, pi);
+ }
+ NOVA_END_TIMING(create_trans_t, trans_time);
+}
+
+/* Returns new tail after append */
+/*
+ * By the time this is called, we already have created
+ * the directory cache entry for the new file, but it
+ * is so far negative - it has no inode.
+ *
+ * If the create succeeds, we fill in the inode information
+ * with d_instantiate().
+ */
+static int nova_create(struct inode *dir, struct dentry *dentry, umode_t mode,
+ bool excl)
+{
+ struct inode *inode = NULL;
+ int err = PTR_ERR(inode);
+ struct super_block *sb = dir->i_sb;
+ struct nova_inode *pidir, *pi;
+ struct nova_inode_update update;
+ u64 pi_addr = 0;
+ u64 ino, epoch_id;
+ timing_t create_time;
+
+ NOVA_START_TIMING(create_t, create_time);
+
+ pidir = nova_get_inode(sb, dir);
+ if (!pidir)
+ goto out_err;
+
+ epoch_id = nova_get_epoch_id(sb);
+ ino = nova_new_nova_inode(sb, &pi_addr);
+ if (ino == 0)
+ goto out_err;
+
+ update.tail = 0;
+ update.alter_tail = 0;
+ err = nova_add_dentry(dentry, ino, 0, &update, epoch_id);
+ if (err)
+ goto out_err;
+
+ nova_dbgv("%s: %s\n", __func__, dentry->d_name.name);
+ nova_dbgv("%s: inode %llu, dir %lu\n", __func__, ino, dir->i_ino);
+ inode = nova_new_vfs_inode(TYPE_CREATE, dir, pi_addr, ino, mode,
+ 0, 0, &dentry->d_name, epoch_id);
+ if (IS_ERR(inode))
+ goto out_err;
+
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
+
+ pi = nova_get_block(sb, pi_addr);
+ nova_lite_transaction_for_new_inode(sb, pi, pidir, inode, dir,
+ &update);
+ NOVA_END_TIMING(create_t, create_time);
+ return err;
+out_err:
+ nova_err(sb, "%s return %d\n", __func__, err);
+ NOVA_END_TIMING(create_t, create_time);
+ return err;
+}
+
+static int nova_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+ dev_t rdev)
+{
+ struct inode *inode = NULL;
+ int err = PTR_ERR(inode);
+ struct super_block *sb = dir->i_sb;
+ u64 pi_addr = 0;
+ struct nova_inode *pidir, *pi;
+ struct nova_inode_update update;
+ u64 ino;
+ u64 epoch_id;
+ timing_t mknod_time;
+
+ NOVA_START_TIMING(mknod_t, mknod_time);
+
+ pidir = nova_get_inode(sb, dir);
+ if (!pidir)
+ goto out_err;
+
+ epoch_id = nova_get_epoch_id(sb);
+ ino = nova_new_nova_inode(sb, &pi_addr);
+ if (ino == 0)
+ goto out_err;
+
+ nova_dbgv("%s: %s\n", __func__, dentry->d_name.name);
+ nova_dbgv("%s: inode %llu, dir %lu\n", __func__, ino, dir->i_ino);
+
+ update.tail = 0;
+ update.alter_tail = 0;
+ err = nova_add_dentry(dentry, ino, 0, &update, epoch_id);
+ if (err)
+ goto out_err;
+
+ inode = nova_new_vfs_inode(TYPE_MKNOD, dir, pi_addr, ino, mode,
+ 0, rdev, &dentry->d_name, epoch_id);
+ if (IS_ERR(inode))
+ goto out_err;
+
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
+
+ pi = nova_get_block(sb, pi_addr);
+ nova_lite_transaction_for_new_inode(sb, pi, pidir, inode, dir,
+ &update);
+ NOVA_END_TIMING(mknod_t, mknod_time);
+ return err;
+out_err:
+ nova_err(sb, "%s return %d\n", __func__, err);
+ NOVA_END_TIMING(mknod_t, mknod_time);
+ return err;
+}
+
+static int nova_symlink(struct inode *dir, struct dentry *dentry,
+ const char *symname)
+{
+ struct super_block *sb = dir->i_sb;
+ int err = -ENAMETOOLONG;
+ unsigned int len = strlen(symname);
+ struct inode *inode;
+ struct nova_inode_info *si;
+ struct nova_inode_info_header *sih;
+ u64 pi_addr = 0;
+ struct nova_inode *pidir, *pi;
+ struct nova_inode_update update;
+ u64 ino;
+ u64 epoch_id;
+ timing_t symlink_time;
+
+ NOVA_START_TIMING(symlink_t, symlink_time);
+ if (len + 1 > sb->s_blocksize)
+ goto out;
+
+ pidir = nova_get_inode(sb, dir);
+ if (!pidir)
+ goto out_fail;
+
+ epoch_id = nova_get_epoch_id(sb);
+ ino = nova_new_nova_inode(sb, &pi_addr);
+ if (ino == 0)
+ goto out_fail;
+
+ nova_dbgv("%s: name %s, symname %s\n", __func__,
+ dentry->d_name.name, symname);
+ nova_dbgv("%s: inode %llu, dir %lu\n", __func__, ino, dir->i_ino);
+
+ update.tail = 0;
+ update.alter_tail = 0;
+ err = nova_add_dentry(dentry, ino, 0, &update, epoch_id);
+ if (err)
+ goto out_fail;
+
+ inode = nova_new_vfs_inode(TYPE_SYMLINK, dir, pi_addr, ino,
+ S_IFLNK|0777, len, 0,
+ &dentry->d_name, epoch_id);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out_fail;
+ }
+
+ pi = nova_get_inode(sb, inode);
+
+ si = NOVA_I(inode);
+ sih = &si->header;
+
+ err = nova_block_symlink(sb, pi, inode, symname, len, epoch_id);
+ if (err)
+ goto out_fail;
+
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
+
+ nova_lite_transaction_for_new_inode(sb, pi, pidir, inode, dir,
+ &update);
+out:
+ NOVA_END_TIMING(symlink_t, symlink_time);
+ return err;
+
+out_fail:
+ nova_err(sb, "%s return %d\n", __func__, err);
+ goto out;
+}
+
+static void nova_lite_transaction_for_time_and_link(struct super_block *sb,
+ struct nova_inode *pi, struct nova_inode *pidir, struct inode *inode,
+ struct inode *dir, struct nova_inode_update *update,
+ struct nova_inode_update *update_dir, int invalidate, u64 epoch_id)
+{
+ struct nova_sb_info *sbi = NOVA_SB(sb);
+ u64 journal_tail;
+ int cpu;
+ timing_t trans_time;
+
+ NOVA_START_TIMING(link_trans_t, trans_time);
+
+ cpu = smp_processor_id();
+ spin_lock(&sbi->journal_locks[cpu]);
+ nova_memunlock_journal(sb);
+
+ // If you change what's required to create a new inode, you need to
+ // update this functions so the changes will be roll back on failure.
+ journal_tail = nova_create_inode_transaction(sb, inode, dir, cpu,
+ 0, invalidate);
+
+ if (invalidate) {
+ pi->valid = 0;
+ pi->delete_epoch_id = epoch_id;
+ }
+ nova_update_inode(sb, inode, pi, update, 0);
+
+ nova_update_inode(sb, dir, pidir, update_dir, 0);
+
+ PERSISTENT_BARRIER();
+
+ nova_commit_lite_transaction(sb, journal_tail, cpu);
+ nova_memlock_journal(sb);
+ spin_unlock(&sbi->journal_locks[cpu]);
+
+ if (metadata_csum) {
+ nova_memunlock_inode(sb, pi);
+ nova_update_alter_inode(sb, inode, pi);
+ nova_update_alter_inode(sb, dir, pidir);
+ nova_memlock_inode(sb, pi);
+ }
+
+ NOVA_END_TIMING(link_trans_t, trans_time);
+}
+
+static int nova_link(struct dentry *dest_dentry, struct inode *dir,
+ struct dentry *dentry)
+{
+ struct super_block *sb = dir->i_sb;
+ struct inode *inode = dest_dentry->d_inode;
+ struct nova_inode *pi = nova_get_inode(sb, inode);
+ struct nova_inode *pidir;
+ struct nova_inode_update update_dir;
+ struct nova_inode_update update;
+ u64 old_linkc = 0;
+ u64 epoch_id;
+ int err = -ENOMEM;
+ timing_t link_time;
+
+ NOVA_START_TIMING(link_t, link_time);
+ if (inode->i_nlink >= NOVA_LINK_MAX) {
+ err = -EMLINK;
+ goto out;
+ }
+
+ pidir = nova_get_inode(sb, dir);
+ if (!pidir) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ ihold(inode);
+ epoch_id = nova_get_epoch_id(sb);
+
+ nova_dbgv("%s: name %s, dest %s\n", __func__,
+ dentry->d_name.name, dest_dentry->d_name.name);
+ nova_dbgv("%s: inode %lu, dir %lu\n", __func__,
+ inode->i_ino, dir->i_ino);
+
+ update_dir.tail = 0;
+ update_dir.alter_tail = 0;
+ err = nova_add_dentry(dentry, inode->i_ino, 0, &update_dir, epoch_id);
+ if (err) {
+ iput(inode);
+ goto out;
+ }
+
+ inode->i_ctime = current_time(inode);
+ inc_nlink(inode);
+
+ update.tail = 0;
+ update.alter_tail = 0;
+ err = nova_append_link_change_entry(sb, pi, inode, &update,
+ &old_linkc, epoch_id);
+ if (err) {
+ iput(inode);
+ goto out;
+ }
+
+ d_instantiate(dentry, inode);
+ nova_lite_transaction_for_time_and_link(sb, pi, pidir, inode, dir,
+ &update, &update_dir, 0, epoch_id);
+
+ nova_invalidate_link_change_entry(sb, old_linkc);
+
+out:
+ NOVA_END_TIMING(link_t, link_time);
+ return err;
+}
+
+static int nova_unlink(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ struct super_block *sb = dir->i_sb;
+ int retval = -ENOMEM;
+ struct nova_inode *pi = nova_get_inode(sb, inode);
+ struct nova_inode *pidir;
+ struct nova_inode_update update_dir;
+ struct nova_inode_update update;
+ u64 old_linkc = 0;
+ u64 epoch_id;
+ int invalidate = 0;
+ timing_t unlink_time;
+
+ NOVA_START_TIMING(unlink_t, unlink_time);
+
+ pidir = nova_get_inode(sb, dir);
+ if (!pidir)
+ goto out;
+
+ epoch_id = nova_get_epoch_id(sb);
+ nova_dbgv("%s: %s\n", __func__, dentry->d_name.name);
+ nova_dbgv("%s: inode %lu, dir %lu\n", __func__,
+ inode->i_ino, dir->i_ino);
+
+ update_dir.tail = 0;
+ update_dir.alter_tail = 0;
+ retval = nova_remove_dentry(dentry, 0, &update_dir, epoch_id);
+ if (retval)
+ goto out;
+
+ inode->i_ctime = dir->i_ctime;
+
+ if (inode->i_nlink == 1)
+ invalidate = 1;
+
+ if (inode->i_nlink)
+ drop_nlink(inode);
+
+ update.tail = 0;
+ update.alter_tail = 0;
+ retval = nova_append_link_change_entry(sb, pi, inode, &update,
+ &old_linkc, epoch_id);
+ if (retval)
+ goto out;
+
+ nova_lite_transaction_for_time_and_link(sb, pi, pidir, inode, dir,
+ &update, &update_dir, invalidate, epoch_id);
+
+ nova_invalidate_link_change_entry(sb, old_linkc);
+ nova_invalidate_dentries(sb, &update_dir);
+
+ NOVA_END_TIMING(unlink_t, unlink_time);
+ return 0;
+out:
+ nova_err(sb, "%s return %d\n", __func__, retval);
+ NOVA_END_TIMING(unlink_t, unlink_time);
+ return retval;
+}
+
+static int nova_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct super_block *sb = dir->i_sb;
+ struct inode *inode;
+ struct nova_inode *pidir, *pi;
+ struct nova_inode_info *si, *sidir;
+ struct nova_inode_info_header *sih = NULL;
+ struct nova_inode_update update;
+ u64 pi_addr = 0;
+ u64 ino;
+ u64 epoch_id;
+ int err = -EMLINK;
+ timing_t mkdir_time;
+
+ NOVA_START_TIMING(mkdir_t, mkdir_time);
+ if (dir->i_nlink >= NOVA_LINK_MAX)
+ goto out;
+
+ ino = nova_new_nova_inode(sb, &pi_addr);
+ if (ino == 0)
+ goto out_err;
+
+ epoch_id = nova_get_epoch_id(sb);
+ nova_dbgv("%s: name %s\n", __func__, dentry->d_name.name);
+ nova_dbgv("%s: inode %llu, dir %lu, link %d\n", __func__,
+ ino, dir->i_ino, dir->i_nlink);
+
+ update.tail = 0;
+ update.alter_tail = 0;
+ err = nova_add_dentry(dentry, ino, 1, &update, epoch_id);
+ if (err) {
+ nova_dbg("failed to add dir entry\n");
+ goto out_err;
+ }
+
+ inode = nova_new_vfs_inode(TYPE_MKDIR, dir, pi_addr, ino,
+ S_IFDIR | mode, sb->s_blocksize,
+ 0, &dentry->d_name, epoch_id);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ goto out_err;
+ }
+
+ pi = nova_get_inode(sb, inode);
+ nova_append_dir_init_entries(sb, pi, inode->i_ino, dir->i_ino,
+ epoch_id);
+
+ /* Build the dir tree */
+ si = NOVA_I(inode);
+ sih = &si->header;
+ nova_rebuild_dir_inode_tree(sb, pi, pi_addr, sih);
+
+ pidir = nova_get_inode(sb, dir);
+ sidir = NOVA_I(dir);
+ sih = &si->header;
+ dir->i_blocks = sih->i_blocks;
+ inc_nlink(dir);
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
+
+ nova_lite_transaction_for_new_inode(sb, pi, pidir, inode, dir,
+ &update);
+out:
+ NOVA_END_TIMING(mkdir_t, mkdir_time);
+ return err;
+
+out_err:
+// clear_nlink(inode);
+ nova_err(sb, "%s return %d\n", __func__, err);
+ goto out;
+}
+
+/*
+ * routine to check that the specified directory is empty (for rmdir)
+ */
+static int nova_empty_dir(struct inode *inode)
+{
+ struct super_block *sb;
+ struct nova_inode_info *si = NOVA_I(inode);
+ struct nova_inode_info_header *sih = &si->header;
+ struct nova_dentry *entry;
+ struct nova_dentry *entryc, entry_copy;
+ unsigned long pos = 0;
+ struct nova_dentry *entries[4];
+ int nr_entries;
+ int i;
+
+ sb = inode->i_sb;
+ nr_entries = radix_tree_gang_lookup(&sih->tree,
+ (void **)entries, pos, 4);
+ if (nr_entries > 2)
+ return 0;
+
+ entryc = (metadata_csum == 0) ? entry : &entry_copy;
+
+ for (i = 0; i < nr_entries; i++) {
+ entry = entries[i];
+
+ if (metadata_csum == 0)
+ entryc = entry;
+ else if (!nova_verify_entry_csum(sb, entry, entryc))
+ return 0;
+
+ if (!is_dir_init_entry(sb, entryc))
+ return 0;
+ }
+
+ return 1;
+}
+
+static int nova_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ struct nova_dentry *de;
+ struct super_block *sb = inode->i_sb;
+ struct nova_inode *pi = nova_get_inode(sb, inode), *pidir;
+ struct nova_inode_update update_dir;
+ struct nova_inode_update update;
+ u64 old_linkc = 0;
+ struct nova_inode_info *si = NOVA_I(inode);
+ struct nova_inode_info_header *sih = &si->header;
+ int err = -ENOTEMPTY;
+ u64 epoch_id;
+ timing_t rmdir_time;
+
+ NOVA_START_TIMING(rmdir_t, rmdir_time);
+ if (!inode)
+ return -ENOENT;
+
+ nova_dbgv("%s: name %s\n", __func__, dentry->d_name.name);
+ pidir = nova_get_inode(sb, dir);
+ if (!pidir)
+ return -EINVAL;
+
+ if (nova_inode_by_name(dir, &dentry->d_name, &de) == 0)
+ return -ENOENT;
+
+ if (!nova_empty_dir(inode))
+ return err;
+
+ nova_dbgv("%s: inode %lu, dir %lu, link %d\n", __func__,
+ inode->i_ino, dir->i_ino, dir->i_nlink);
+
+ if (inode->i_nlink != 2)
+ nova_dbg("empty directory %lu has nlink!=2 (%d), dir %lu",
+ inode->i_ino, inode->i_nlink, dir->i_ino);
+
+ epoch_id = nova_get_epoch_id(sb);
+
+ update_dir.tail = 0;
+ update_dir.alter_tail = 0;
+ err = nova_remove_dentry(dentry, -1, &update_dir, epoch_id);
+ if (err)
+ goto end_rmdir;
+
+ /*inode->i_version++; */
+ clear_nlink(inode);
+ inode->i_ctime = dir->i_ctime;
+
+ if (dir->i_nlink)
+ drop_nlink(dir);
+
+ nova_delete_dir_tree(sb, sih);
+
+ update.tail = 0;
+ update.alter_tail = 0;
+ err = nova_append_link_change_entry(sb, pi, inode, &update,
+ &old_linkc, epoch_id);
+ if (err)
+ goto end_rmdir;
+
+ nova_lite_transaction_for_time_and_link(sb, pi, pidir, inode, dir,
+ &update, &update_dir, 1, epoch_id);
+
+ nova_invalidate_link_change_entry(sb, old_linkc);
+ nova_invalidate_dentries(sb, &update_dir);
+
+ NOVA_END_TIMING(rmdir_t, rmdir_time);
+ return err;
+
+end_rmdir:
+ nova_err(sb, "%s return %d\n", __func__, err);
+ NOVA_END_TIMING(rmdir_t, rmdir_time);
+ return err;
+}
+
+static int nova_rename(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ struct inode *old_inode = old_dentry->d_inode;
+ struct inode *new_inode = new_dentry->d_inode;
+ struct super_block *sb = old_inode->i_sb;
+ struct nova_sb_info *sbi = NOVA_SB(sb);
+ struct nova_inode *old_pi = NULL, *new_pi = NULL;
+ struct nova_inode *new_pidir = NULL, *old_pidir = NULL;
+ struct nova_dentry *father_entry = NULL;
+ struct nova_dentry *father_entryc, entry_copy;
+ char *head_addr = NULL;
+ int invalidate_new_inode = 0;
+ struct nova_inode_update update_dir_new;
+ struct nova_inode_update update_dir_old;
+ struct nova_inode_update update_new;
+ struct nova_inode_update update_old;
+ u64 old_linkc1 = 0, old_linkc2 = 0;
+ int err = -ENOENT;
+ int inc_link = 0, dec_link = 0;
+ int cpu;
+ int change_parent = 0;
+ u64 journal_tail;
+ u64 epoch_id;
+ timing_t rename_time;
+
+ nova_dbgv("%s: rename %s to %s,\n", __func__,
+ old_dentry->d_name.name, new_dentry->d_name.name);
+ nova_dbgv("%s: %s inode %lu, old dir %lu, new dir %lu, new inode %lu\n",
+ __func__, S_ISDIR(old_inode->i_mode) ? "dir" : "normal",
+ old_inode->i_ino, old_dir->i_ino, new_dir->i_ino,
+ new_inode ? new_inode->i_ino : 0);
+
+ if (flags & ~RENAME_NOREPLACE)
+ return -EINVAL;
+
+ NOVA_START_TIMING(rename_t, rename_time);
+
+ if (new_inode) {
+ err = -ENOTEMPTY;
+ if (S_ISDIR(old_inode->i_mode) && !nova_empty_dir(new_inode))
+ goto out;
+ } else {
+ if (S_ISDIR(old_inode->i_mode)) {
+ err = -EMLINK;
+ if (new_dir->i_nlink >= NOVA_LINK_MAX)
+ goto out;
+ }
+ }
+
+ if (S_ISDIR(old_inode->i_mode)) {
+ dec_link = -1;
+ if (!new_inode)
+ inc_link = 1;
+ /*
+ * Tricky for in-place update:
+ * New dentry is always after renamed dentry, so we have to
+ * make sure new dentry has the correct links count
+ * to workaround the rebuild nlink issue.
+ */
+ if (old_dir == new_dir) {
+ inc_link--;
+ if (inc_link == 0)
+ dec_link = 0;
+ }
+ }
+
+ epoch_id = nova_get_epoch_id(sb);
+ new_pidir = nova_get_inode(sb, new_dir);
+ old_pidir = nova_get_inode(sb, old_dir);
+
+ old_pi = nova_get_inode(sb, old_inode);
+ old_inode->i_ctime = current_time(old_inode);
+ update_old.tail = 0;
+ update_old.alter_tail = 0;
+ err = nova_append_link_change_entry(sb, old_pi, old_inode,
+ &update_old, &old_linkc1, epoch_id);
+ if (err)
+ goto out;
+
+ if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
+ /* My father is changed. Update .. entry */
+ /* For simplicity, we use in-place update and journal it */
+ change_parent = 1;
+ head_addr = (char *)nova_get_block(sb, old_pi->log_head);
+ father_entry = (struct nova_dentry *)(head_addr +
+ NOVA_DIR_LOG_REC_LEN(1));
+
+ if (metadata_csum == 0)
+ father_entryc = father_entry;
+ else {
+ father_entryc = &entry_copy;
+ if (!nova_verify_entry_csum(sb, father_entry,
+ father_entryc)) {
+ err = -EIO;
+ goto out;
+ }
+ }
+
+ if (le64_to_cpu(father_entryc->ino) != old_dir->i_ino)
+ nova_err(sb, "%s: dir %lu parent should be %lu, but actually %lu\n",
+ __func__,
+ old_inode->i_ino, old_dir->i_ino,
+ le64_to_cpu(father_entry->ino));
+ }
+
+ update_dir_new.tail = 0;
+ update_dir_new.alter_tail = 0;
+ if (new_inode) {
+ /* First remove the old entry in the new directory */
+ err = nova_remove_dentry(new_dentry, 0, &update_dir_new,
+ epoch_id);
+ if (err)
+ goto out;
+ }
+
+ /* link into the new directory. */
+ err = nova_add_dentry(new_dentry, old_inode->i_ino,
+ inc_link, &update_dir_new, epoch_id);
+ if (err)
+ goto out;
+
+ if (inc_link > 0)
+ inc_nlink(new_dir);
+
+ update_dir_old.tail = 0;
+ update_dir_old.alter_tail = 0;
+ if (old_dir == new_dir) {
+ update_dir_old.tail = update_dir_new.tail;
+ update_dir_old.alter_tail = update_dir_new.alter_tail;
+ }
+
+ err = nova_remove_dentry(old_dentry, dec_link, &update_dir_old,
+ epoch_id);
+ if (err)
+ goto out;
+
+ if (dec_link < 0)
+ drop_nlink(old_dir);
+
+ if (new_inode) {
+ new_pi = nova_get_inode(sb, new_inode);
+ new_inode->i_ctime = current_time(new_inode);
+
+ if (S_ISDIR(old_inode->i_mode)) {
+ if (new_inode->i_nlink)
+ drop_nlink(new_inode);
+ }
+ if (new_inode->i_nlink)
+ drop_nlink(new_inode);
+
+ update_new.tail = 0;
+ update_new.alter_tail = 0;
+ err = nova_append_link_change_entry(sb, new_pi, new_inode,
+ &update_new, &old_linkc2,
+ epoch_id);
+ if (err)
+ goto out;
+ }
+
+ cpu = smp_processor_id();
+ spin_lock(&sbi->journal_locks[cpu]);
+ nova_memunlock_journal(sb);
+ if (new_inode && new_inode->i_nlink == 0)
+ invalidate_new_inode = 1;
+ journal_tail = nova_create_rename_transaction(sb, old_inode, old_dir,
+ new_inode,
+ old_dir != new_dir ? new_dir : NULL,
+ father_entry,
+ invalidate_new_inode,
+ cpu);
+
+ nova_update_inode(sb, old_inode, old_pi, &update_old, 0);
+ nova_update_inode(sb, old_dir, old_pidir, &update_dir_old, 0);
+
+ if (old_pidir != new_pidir)
+ nova_update_inode(sb, new_dir, new_pidir, &update_dir_new, 0);
+
+ if (change_parent && father_entry) {
+ father_entry->ino = cpu_to_le64(new_dir->i_ino);
+ nova_update_entry_csum(father_entry);
+ nova_update_alter_entry(sb, father_entry);
+ }
+
+ if (new_inode) {
+ if (invalidate_new_inode) {
+ new_pi->valid = 0;
+ new_pi->delete_epoch_id = epoch_id;
+ }
+ nova_update_inode(sb, new_inode, new_pi, &update_new, 0);
+ }
+
+ PERSISTENT_BARRIER();
+
+ nova_commit_lite_transaction(sb, journal_tail, cpu);
+ nova_memlock_journal(sb);
+ spin_unlock(&sbi->journal_locks[cpu]);
+
+ nova_memunlock_inode(sb, old_pi);
+ nova_update_alter_inode(sb, old_inode, old_pi);
+ nova_update_alter_inode(sb, old_dir, old_pidir);
+ if (old_dir != new_dir)
+ nova_update_alter_inode(sb, new_dir, new_pidir);
+ if (new_inode)
+ nova_update_alter_inode(sb, new_inode, new_pi);
+ nova_memlock_inode(sb, old_pi);
+
+ nova_invalidate_link_change_entry(sb, old_linkc1);
+ nova_invalidate_link_change_entry(sb, old_linkc2);
+ if (new_inode)
+ nova_invalidate_dentries(sb, &update_dir_new);
+ nova_invalidate_dentries(sb, &update_dir_old);
+
+ NOVA_END_TIMING(rename_t, rename_time);
+ return 0;
+out:
+ nova_err(sb, "%s return %d\n", __func__, err);
+ NOVA_END_TIMING(rename_t, rename_time);
+ return err;
+}
+
+struct dentry *nova_get_parent(struct dentry *child)
+{
+ struct inode *inode;
+ struct qstr dotdot = QSTR_INIT("..", 2);
+ struct nova_dentry *de = NULL;
+ ino_t ino;
+
+ nova_inode_by_name(child->d_inode, &dotdot, &de);
+ if (!de)
+ return ERR_PTR(-ENOENT);
+
+ /* FIXME: can de->ino be avoided by using the return value of
+ * nova_inode_by_name()?
+ */
+ ino = le64_to_cpu(de->ino);
+
+ if (ino)
+ inode = nova_iget(child->d_inode->i_sb, ino);
+ else
+ return ERR_PTR(-ENOENT);
+
+ return d_obtain_alias(inode);
+}
+
+const struct inode_operations nova_dir_inode_operations = {
+ .create = nova_create,
+ .lookup = nova_lookup,
+ .link = nova_link,
+ .unlink = nova_unlink,
+ .symlink = nova_symlink,
+ .mkdir = nova_mkdir,
+ .rmdir = nova_rmdir,
+ .mknod = nova_mknod,
+ .rename = nova_rename,
+ .setattr = nova_notify_change,
+ .get_acl = NULL,
+};
+
+const struct inode_operations nova_special_inode_operations = {
+ .setattr = nova_notify_change,
+ .get_acl = NULL,
+};
diff --git a/fs/nova/symlink.c b/fs/nova/symlink.c
new file mode 100644
index 000000000000..b0e5e898a41b
--- /dev/null
+++ b/fs/nova/symlink.c
@@ -0,0 +1,153 @@
+/*
+ * BRIEF DESCRIPTION
+ *
+ * Symlink operations
+ *
+ * Copyright 2015-2016 Regents of the University of California,
+ * UCSD Non-Volatile Systems Lab, Andiry Xu <jix024@...ucsd.edu>
+ * Copyright 2012-2013 Intel Corporation
+ * Copyright 2009-2011 Marco Stornelli <marco.stornelli@...il.com>
+ * Copyright 2003 Sony Corporation
+ * Copyright 2003 Matsushita Electric Industrial Co., Ltd.
+ * 2003-2004 (c) MontaVista Software, Inc. , Steve Longerbeam
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/version.h>
+#include "nova.h"
+#include "inode.h"
+
+int nova_block_symlink(struct super_block *sb, struct nova_inode *pi,
+ struct inode *inode, const char *symname, int len, u64 epoch_id)
+{
+ struct nova_file_write_entry entry_data;
+ struct nova_inode_info *si = NOVA_I(inode);
+ struct nova_inode_info_header *sih = &si->header;
+ struct nova_inode_update update;
+ unsigned long name_blocknr = 0;
+ int allocated;
+ u64 block;
+ char *blockp;
+ u32 time;
+ int ret;
+
+ update.tail = sih->log_tail;
+ update.alter_tail = sih->alter_log_tail;
+
+ allocated = nova_new_data_blocks(sb, sih, &name_blocknr, 0, 1,
+ ALLOC_INIT_ZERO, ANY_CPU, ALLOC_FROM_TAIL);
+ if (allocated != 1 || name_blocknr == 0) {
+ ret = allocated;
+ return ret;
+ }
+
+ /* First copy name to name block */
+ block = nova_get_block_off(sb, name_blocknr, NOVA_BLOCK_TYPE_4K);
+ blockp = (char *)nova_get_block(sb, block);
+
+ nova_memunlock_block(sb, blockp);
+ memcpy_to_pmem_nocache(blockp, symname, len);
+ blockp[len] = '\0';
+ nova_memlock_block(sb, blockp);
+
+ /* Apply a write entry to the log page */
+ time = current_time(inode).tv_sec;
+ nova_init_file_write_entry(sb, sih, &entry_data, epoch_id, 0, 1,
+ name_blocknr, time, len + 1);
+
+ ret = nova_append_file_write_entry(sb, pi, inode, &entry_data, &update);
+ if (ret) {
+ nova_dbg("%s: append file write entry failed %d\n",
+ __func__, ret);
+ nova_free_data_blocks(sb, sih, name_blocknr, 1);
+ return ret;
+ }
+
+ nova_memunlock_inode(sb, pi);
+ nova_update_inode(sb, inode, pi, &update, 1);
+ nova_memlock_inode(sb, pi);
+ sih->trans_id++;
+
+ return 0;
+}
+
+/* FIXME: Temporary workaround */
+static int nova_readlink_copy(char __user *buffer, int buflen, const char *link)
+{
+ int len = PTR_ERR(link);
+
+ if (IS_ERR(link))
+ goto out;
+
+ len = strlen(link);
+ if (len > (unsigned int) buflen)
+ len = buflen;
+ if (copy_to_user(buffer, link, len))
+ len = -EFAULT;
+out:
+ return len;
+}
+
+static int nova_readlink(struct dentry *dentry, char __user *buffer, int buflen)
+{
+ struct nova_file_write_entry *entry;
+ struct nova_file_write_entry *entryc, entry_copy;
+ struct inode *inode = dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
+ struct nova_inode_info *si = NOVA_I(inode);
+ struct nova_inode_info_header *sih = &si->header;
+ char *blockp;
+
+ entry = (struct nova_file_write_entry *)nova_get_block(sb,
+ sih->log_head);
+
+ if (metadata_csum == 0)
+ entryc = entry;
+ else {
+ entryc = &entry_copy;
+ if (!nova_verify_entry_csum(sb, entry, entryc))
+ return -EIO;
+ }
+
+ blockp = (char *)nova_get_block(sb, BLOCK_OFF(entryc->block));
+
+ return nova_readlink_copy(buffer, buflen, blockp);
+}
+
+static const char *nova_get_link(struct dentry *dentry, struct inode *inode,
+ struct delayed_call *done)
+{
+ struct nova_file_write_entry *entry;
+ struct nova_file_write_entry *entryc, entry_copy;
+ struct super_block *sb = inode->i_sb;
+ struct nova_inode_info *si = NOVA_I(inode);
+ struct nova_inode_info_header *sih = &si->header;
+ char *blockp;
+
+ entry = (struct nova_file_write_entry *)nova_get_block(sb,
+ sih->log_head);
+ if (metadata_csum == 0)
+ entryc = entry;
+ else {
+ entryc = &entry_copy;
+ if (!nova_verify_entry_csum(sb, entry, entryc))
+ return NULL;
+ }
+
+ blockp = (char *)nova_get_block(sb, BLOCK_OFF(entryc->block));
+
+ return blockp;
+}
+
+const struct inode_operations nova_symlink_inode_operations = {
+ .readlink = nova_readlink,
+ .get_link = nova_get_link,
+ .setattr = nova_notify_change,
+};
Powered by blists - more mailing lists