[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1272723382-19470-82-git-send-email-orenl@cs.columbia.edu>
Date: Sat, 1 May 2010 10:16:03 -0400
From: Oren Laadan <orenl@...columbia.edu>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: containers@...ts.linux-foundation.org,
linux-kernel@...r.kernel.org, Serge Hallyn <serue@...ibm.com>,
Matt Helsley <matthltc@...ibm.com>,
Pavel Emelyanov <xemul@...nvz.org>,
Oren Laadan <orenl@...columbia.edu>,
Al Viro <viro@...iv.linux.org.uk>,
linux-fsdevel@...r.kernel.org
Subject: [PATCH v21 081/100] c/r: restore task fs_root and pwd (v3)
Checkpoint and restore task->fs. Tasks sharing task->fs will
share them again after restart.
Original patch by Serge Hallyn <serue@...ibm.com>
Changelog:
Jan 25: [orenl] Addressed comments by .. myself:
- add leak detection
- change order of save/restore of chroot and cwd
- save/restore fs only after file-table and mm
- rename functions to adapt existing conventions
Dec 28: [serge] Addressed comments by Oren (and Dave)
- define and use {get,put}_fs_struct helpers
- fix locking comment
- define ckpt_read_fname() and use in checkpoint/files.c
Cc: Al Viro <viro@...iv.linux.org.uk>
Cc: linux-fsdevel@...r.kernel.org
Signed-off-by: Oren Laadan <orenl@...columbia.edu>
Signed-off-by: Serge Hallyn <serue@...ibm.com>
---
fs/checkpoint.c | 232 +++++++++++++++++++++++++++++++++++++++-
fs/fs_struct.c | 21 ++++
fs/open.c | 58 ++++++----
include/linux/checkpoint.h | 6 +-
include/linux/checkpoint_hdr.h | 12 ++
include/linux/fs.h | 5 +
include/linux/fs_struct.h | 2 +
kernel/checkpoint/process.c | 17 +++
8 files changed, 325 insertions(+), 28 deletions(-)
diff --git a/fs/checkpoint.c b/fs/checkpoint.c
index e0f8a15..61b68da 100644
--- a/fs/checkpoint.c
+++ b/fs/checkpoint.c
@@ -15,6 +15,9 @@
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/file.h>
+#include <linux/namei.h>
+#include <linux/fs_struct.h>
+#include <linux/fs.h>
#include <linux/fdtable.h>
#include <linux/fsnotify.h>
#include <linux/pipe_fs_i.h>
@@ -369,6 +372,58 @@ int checkpoint_obj_file_table(struct ckpt_ctx *ctx, struct task_struct *t)
return objref;
}
+int checkpoint_obj_fs(struct ckpt_ctx *ctx, struct task_struct *t)
+{
+ struct fs_struct *fs;
+ int fs_objref;
+
+ task_lock(current);
+ fs = t->fs;
+ get_fs_struct(fs);
+ task_unlock(current);
+
+ fs_objref = checkpoint_obj(ctx, fs, CKPT_OBJ_FS);
+ put_fs_struct(fs);
+
+ return fs_objref;
+}
+
+/* called with fs refcount bumped so it won't disappear */
+static int checkpoint_fs(struct ckpt_ctx *ctx, void *ptr)
+{
+ struct fs_struct *fs = ptr;
+ struct ckpt_hdr_fs *h;
+ struct fs_struct *fscopy;
+ int ret;
+
+ h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FS);
+ if (!h)
+ return -ENOMEM;
+ ret = ckpt_write_obj(ctx, &h->h);
+ ckpt_hdr_put(ctx, h);
+ if (ret)
+ return ret;
+
+ fscopy = copy_fs_struct(fs);
+ if (!fs)
+ return -ENOMEM;
+
+ ret = checkpoint_fname(ctx, &fscopy->pwd, &ctx->root_fs_path);
+ if (ret < 0) {
+ ckpt_err(ctx, ret, "%(T)writing path of cwd");
+ goto out;
+ }
+ ret = checkpoint_fname(ctx, &fscopy->root, &ctx->root_fs_path);
+ if (ret < 0) {
+ ckpt_err(ctx, ret, "%(T)writing path of fs root");
+ goto out;
+ }
+ ret = 0;
+ out:
+ free_fs_struct(fscopy);
+ return ret;
+}
+
/***********************************************************************
* Collect
*/
@@ -455,10 +510,41 @@ int ckpt_collect_file_table(struct ckpt_ctx *ctx, struct task_struct *t)
return ret;
}
+int ckpt_collect_fs(struct ckpt_ctx *ctx, struct task_struct *t)
+{
+ struct fs_struct *fs;
+ int ret;
+
+ task_lock(t);
+ fs = t->fs;
+ get_fs_struct(fs);
+ task_unlock(t);
+
+ ret = ckpt_obj_collect(ctx, fs, CKPT_OBJ_FS);
+
+ put_fs_struct(fs);
+ return ret;
+}
+
/**************************************************************************
* Restart
*/
+static int ckpt_read_fname(struct ckpt_ctx *ctx, char **fname)
+{
+ int len;
+
+ len = ckpt_read_payload(ctx, (void **) fname,
+ PATH_MAX, CKPT_HDR_FILE_NAME);
+ if (len < 0)
+ return len;
+
+ (*fname)[len - 1] = '\0'; /* always play if safe */
+ ckpt_debug("read filename '%s'\n", *fname);
+
+ return len;
+}
+
/**
* restore_open_fname - read a file name and open a file
* @ctx: checkpoint context
@@ -474,11 +560,9 @@ struct file *restore_open_fname(struct ckpt_ctx *ctx, int flags)
if (flags & (O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC))
return ERR_PTR(-EINVAL);
- len = ckpt_read_payload(ctx, (void **) &fname,
- PATH_MAX, CKPT_HDR_FILE_NAME);
+ len = ckpt_read_fname(ctx, &fname);
if (len < 0)
return ERR_PTR(len);
- fname[len - 1] = '\0'; /* always play if safe */
ckpt_debug("fname '%s' flags %#x\n", fname, flags);
file = filp_open(fname, flags, 0);
@@ -805,8 +889,136 @@ int restore_obj_file_table(struct ckpt_ctx *ctx, int files_objref)
}
/*
+ * Called by task restore code to set the restarted task's
+ * current->fs to an entry on the hash
+ */
+int restore_obj_fs(struct ckpt_ctx *ctx, int fs_objref)
+{
+ struct fs_struct *newfs, *oldfs;
+
+ newfs = ckpt_obj_fetch(ctx, fs_objref, CKPT_OBJ_FS);
+ if (IS_ERR(newfs))
+ return PTR_ERR(newfs);
+
+ task_lock(current);
+ get_fs_struct(newfs);
+ oldfs = current->fs;
+ current->fs = newfs;
+ task_unlock(current);
+ put_fs_struct(oldfs);
+
+ return 0;
+}
+
+static int restore_chroot(struct ckpt_ctx *ctx, struct fs_struct *fs, char *name)
+{
+ struct nameidata nd;
+ int ret;
+
+ ckpt_debug("attempting chroot to %s\n", name);
+ ret = path_lookup(name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
+ if (ret) {
+ ckpt_err(ctx, ret, "%(T)Opening chroot dir %s", name);
+ return ret;
+ }
+ ret = do_chroot(fs, &nd.path);
+ path_put(&nd.path);
+ if (ret) {
+ ckpt_err(ctx, ret, "%(T)Setting chroot %s", name);
+ return ret;
+ }
+ return 0;
+}
+
+static int restore_cwd(struct ckpt_ctx *ctx, struct fs_struct *fs, char *name)
+{
+ struct nameidata nd;
+ int ret;
+
+ ckpt_debug("attempting chdir to %s\n", name);
+ ret = path_lookup(name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
+ if (ret) {
+ ckpt_err(ctx, ret, "%(T)Opening cwd %s", name);
+ return ret;
+ }
+ ret = do_chdir(fs, &nd.path);
+ path_put(&nd.path);
+ if (ret) {
+ ckpt_err(ctx, ret, "%(T)Setting cwd %s", name);
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * Called by objhash when it runs into a CKPT_OBJ_FS entry. Creates
+ * an fs_struct with desired chroot/cwd and places it in the hash.
+ */
+static void *restore_fs(struct ckpt_ctx *ctx)
+{
+ struct ckpt_hdr_fs *h;
+ struct fs_struct *fs;
+ char *path;
+ int ret = 0;
+
+ h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_FS);
+ if (IS_ERR(h))
+ return ERR_PTR(PTR_ERR(h));
+ ckpt_hdr_put(ctx, h);
+
+ fs = copy_fs_struct(current->fs);
+ if (!fs)
+ return ERR_PTR(-ENOMEM);
+
+ ret = ckpt_read_fname(ctx, &path);
+ if (ret < 0)
+ goto out;
+ ret = restore_cwd(ctx, fs, path);
+ kfree(path);
+ if (ret)
+ goto out;
+
+ ret = ckpt_read_fname(ctx, &path);
+ if (ret < 0)
+ goto out;
+ ret = restore_chroot(ctx, fs, path);
+ kfree(path);
+
+out:
+ if (ret) {
+ free_fs_struct(fs);
+ return ERR_PTR(ret);
+ }
+ return fs;
+}
+
+/*
* fs-related checkpoint objects
*/
+
+static int obj_fs_grab(void *ptr)
+{
+ get_fs_struct((struct fs_struct *) ptr);
+ return 0;
+}
+
+static void obj_fs_drop(void *ptr, int lastref)
+{
+ put_fs_struct((struct fs_struct *) ptr);
+}
+
+static int obj_fs_users(void *ptr)
+{
+ /*
+ * It's safe to not use fs->lock because the fs referenced.
+ * It's also sufficient for leak detection: with no leak the
+ * count can't change; with a leak it will be too big already
+ * (even if it's about to grow), and if it's about to shrink
+ * then it's as if we sampled the count a bit earlier.
+ */
+ return ((struct fs_struct *) ptr)->users;
+}
+
static int obj_file_table_grab(void *ptr)
{
atomic_inc(&((struct files_struct *) ptr)->count);
@@ -839,6 +1051,17 @@ static int obj_file_users(void *ptr)
return atomic_long_read(&((struct file *) ptr)->f_count);
}
+/* fs object */
+static const struct ckpt_obj_ops ckpt_obj_fs_ops = {
+ .obj_name = "FS",
+ .obj_type = CKPT_OBJ_FS,
+ .ref_drop = obj_fs_drop,
+ .ref_grab = obj_fs_grab,
+ .ref_users = obj_fs_users,
+ .checkpoint = checkpoint_fs,
+ .restore = restore_fs,
+};
+
/* files_struct object */
static const struct ckpt_obj_ops ckpt_obj_files_struct_ops = {
.obj_name = "FILE_TABLE",
@@ -865,6 +1088,9 @@ static __init int checkpoint_register_fs(void)
{
int ret;
+ ret = register_checkpoint_obj(&ckpt_obj_fs_ops);
+ if (ret < 0)
+ return ret;
ret = register_checkpoint_obj(&ckpt_obj_files_struct_ops);
if (ret < 0)
return ret;
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index eee0590..2a4c6f5 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -6,6 +6,27 @@
#include <linux/fs_struct.h>
/*
+ * call with owning task locked
+ */
+void get_fs_struct(struct fs_struct *fs)
+{
+ write_lock(&fs->lock);
+ fs->users++;
+ write_unlock(&fs->lock);
+}
+
+void put_fs_struct(struct fs_struct *fs)
+{
+ int kill;
+
+ write_lock(&fs->lock);
+ kill = !--fs->users;
+ write_unlock(&fs->lock);
+ if (kill)
+ free_fs_struct(fs);
+}
+
+/*
* Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
* It can block.
*/
diff --git a/fs/open.c b/fs/open.c
index 74e5cd9..e9d5626 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -524,6 +524,18 @@ SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
return sys_faccessat(AT_FDCWD, filename, mode);
}
+int do_chdir(struct fs_struct *fs, struct path *path)
+{
+ int error;
+
+ error = inode_permission(path->dentry->d_inode, MAY_EXEC | MAY_ACCESS);
+ if (error)
+ return error;
+
+ set_fs_pwd(fs, path);
+ return 0;
+}
+
SYSCALL_DEFINE1(chdir, const char __user *, filename)
{
struct path path;
@@ -531,17 +543,10 @@ SYSCALL_DEFINE1(chdir, const char __user *, filename)
error = user_path_dir(filename, &path);
if (error)
- goto out;
-
- error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
- if (error)
- goto dput_and_out;
-
- set_fs_pwd(current->fs, &path);
+ return error;
-dput_and_out:
+ error = do_chdir(current->fs, &path);
path_put(&path);
-out:
return error;
}
@@ -571,31 +576,36 @@ out:
return error;
}
-SYSCALL_DEFINE1(chroot, const char __user *, filename)
+int do_chroot(struct fs_struct *fs, struct path *path)
{
- struct path path;
int error;
- error = user_path_dir(filename, &path);
+ error = inode_permission(path->dentry->d_inode, MAY_EXEC | MAY_ACCESS);
if (error)
- goto out;
+ return error;
+
+ if (!capable(CAP_SYS_CHROOT))
+ return -EPERM;
- error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
+ error = security_path_chroot(path);
if (error)
- goto dput_and_out;
+ return error;
- error = -EPERM;
- if (!capable(CAP_SYS_CHROOT))
- goto dput_and_out;
- error = security_path_chroot(&path);
+ set_fs_root(fs, path);
+ return 0;
+}
+
+SYSCALL_DEFINE1(chroot, const char __user *, filename)
+{
+ struct path path;
+ int error;
+
+ error = user_path_dir(filename, &path);
if (error)
- goto dput_and_out;
+ return error;
- set_fs_root(current->fs, &path);
- error = 0;
-dput_and_out:
+ error = do_chroot(current->fs, &path);
path_put(&path);
-out:
return error;
}
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index 09fbb59..c1079b7 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -10,7 +10,7 @@
* distribution for more details.
*/
-#define CHECKPOINT_VERSION 3
+#define CHECKPOINT_VERSION 4
/* checkpoint user flags */
#define CHECKPOINT_SUBTREE 0x1
@@ -224,6 +224,10 @@ extern int checkpoint_file_common(struct ckpt_ctx *ctx, struct file *file,
extern int restore_file_common(struct ckpt_ctx *ctx, struct file *file,
struct ckpt_hdr_file *h);
+extern int ckpt_collect_fs(struct ckpt_ctx *ctx, struct task_struct *t);
+extern int checkpoint_obj_fs(struct ckpt_ctx *ctx, struct task_struct *t);
+extern int restore_obj_fs(struct ckpt_ctx *ctx, int fs_objref);
+
/* memory */
extern void ckpt_pgarr_free(struct ckpt_ctx *ctx);
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index e89fbf9..8dbd6e9 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -139,6 +139,9 @@ enum {
CKPT_HDR_MM_CONTEXT,
#define CKPT_HDR_MM_CONTEXT CKPT_HDR_MM_CONTEXT
+ CKPT_HDR_FS = 451, /* must be after file-table, mm */
+#define CKPT_HDR_FS CKPT_HDR_FS
+
CKPT_HDR_IPC = 501,
#define CKPT_HDR_IPC CKPT_HDR_IPC
CKPT_HDR_IPC_SHM,
@@ -209,6 +212,8 @@ enum obj_type {
#define CKPT_OBJ_FILE CKPT_OBJ_FILE
CKPT_OBJ_MM,
#define CKPT_OBJ_MM CKPT_OBJ_MM
+ CKPT_OBJ_FS,
+#define CKPT_OBJ_FS CKPT_OBJ_FS
CKPT_OBJ_SIGHAND,
#define CKPT_OBJ_SIGHAND CKPT_OBJ_SIGHAND
CKPT_OBJ_SIGNAL,
@@ -424,6 +429,7 @@ struct ckpt_hdr_task_objs {
__s32 files_objref;
__s32 mm_objref;
+ __s32 fs_objref;
__s32 sighand_objref;
__s32 signal_objref;
} __attribute__((aligned(8)));
@@ -461,6 +467,12 @@ enum restart_block_type {
};
/* file system */
+struct ckpt_hdr_fs {
+ struct ckpt_hdr h;
+ /* char *fs_root */
+ /* char *fs_pwd */
+} __attribute__((aligned(8)));
+
struct ckpt_hdr_file_table {
struct ckpt_hdr h;
__s32 fdt_nfds;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c0a59ea..ee725ff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1826,6 +1826,11 @@ extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
struct vfsmount *);
extern int vfs_statfs(struct dentry *, struct kstatfs *);
+struct fs_struct;
+extern int do_chdir(struct fs_struct *fs, struct path *path);
+extern int do_chroot(struct fs_struct *fs, struct path *path);
+
+
extern int current_umask(void);
/* /sys/fs */
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 78a05bf..a73cbcb 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -20,5 +20,7 @@ extern struct fs_struct *copy_fs_struct(struct fs_struct *);
extern void free_fs_struct(struct fs_struct *);
extern void daemonize_fs_struct(void);
extern int unshare_fs_struct(void);
+extern void get_fs_struct(struct fs_struct *);
+extern void put_fs_struct(struct fs_struct *);
#endif /* _LINUX_FS_STRUCT_H */
diff --git a/kernel/checkpoint/process.c b/kernel/checkpoint/process.c
index fa08616..922287b 100644
--- a/kernel/checkpoint/process.c
+++ b/kernel/checkpoint/process.c
@@ -232,6 +232,7 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
struct ckpt_hdr_task_objs *h;
int files_objref;
int mm_objref;
+ int fs_objref;
int sighand_objref;
int signal_objref;
int first, ret;
@@ -272,6 +273,13 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
return mm_objref;
}
+ /* note: this must come *after* file-table and mm */
+ fs_objref = checkpoint_obj_fs(ctx, t);
+ if (fs_objref < 0) {
+ ckpt_err(ctx, fs_objref, "%(T)process fs\n");
+ return fs_objref;
+ }
+
sighand_objref = checkpoint_obj_sighand(ctx, t);
ckpt_debug("sighand: objref %d\n", sighand_objref);
if (sighand_objref < 0) {
@@ -299,6 +307,7 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
return -ENOMEM;
h->files_objref = files_objref;
h->mm_objref = mm_objref;
+ h->fs_objref = fs_objref;
h->sighand_objref = sighand_objref;
h->signal_objref = signal_objref;
ret = ckpt_write_obj(ctx, &h->h);
@@ -477,6 +486,9 @@ int ckpt_collect_task(struct ckpt_ctx *ctx, struct task_struct *t)
ret = ckpt_collect_mm(ctx, t);
if (ret < 0)
return ret;
+ ret = ckpt_collect_fs(ctx, t);
+ if (ret < 0)
+ return ret;
ret = ckpt_collect_sighand(ctx, t);
return ret;
@@ -645,6 +657,11 @@ static int restore_task_objs(struct ckpt_ctx *ctx)
if (ret < 0)
goto out;
+ ret = restore_obj_fs(ctx, h->fs_objref);
+ ckpt_debug("fs: ret %d (%p)\n", ret, current->fs);
+ if (ret < 0)
+ return ret;
+
ret = restore_obj_sighand(ctx, h->sighand_objref);
ckpt_debug("sighand: ret %d (%p)\n", ret, current->sighand);
if (ret < 0)
--
1.6.3.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists