lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sat, 15 Sep 2018 18:04:23 +0200
From:   Oleg Nesterov <oleg@...hat.com>
To:     Jeff Layton <jlayton@...nel.org>
Cc:     viro@...iv.linux.org.uk, ebiederm@...ssion.com,
        berrange@...hat.com, linux-fsdevel@...r.kernel.org,
        linux-kernel@...r.kernel.org,
        Andrew Morton <akpm@...ux-foundation.org>
Subject: Re: [PATCH v3 1/3] exec: separate thread_count for files_struct

On 09/14, Jeff Layton wrote:
>
> Currently, we have a single refcount variable inside the files_struct.
> When we go to unshare the files_struct, we check this counter and if
> it's elevated, then we allocate a new files_struct instead of just
> repurposing the old one, under the assumption that that indicates that
> it's shared between tasks.
>
> This is not necessarily the case, however. Each task associated with the
> files_struct does get a long-held reference, but the refcount can be
> elevated for other reasons as well, by callers of get_files_struct.
>
> This means that we can end up allocating a new files_struct if we just
> happen to race with a call to get_files_struct. Fix this by adding a new
> counter to track the number associated threads, and use that to
> determine whether to allocate a new files_struct when unsharing.

But who actually needs get_files_struct() ? All users except binder.c need
struct file, not struct files_struct.

See the (completely untested) patch below. It adds the new fget_task() helper
and converts all other users to use it.

As for binder.c, in this case we probably actually want to unshare ->files
on exec so we can ignore it?

Oleg.
---

 fs/file.c            |  29 ++++++++++++-
 fs/proc/fd.c         | 117 +++++++++++++++++++--------------------------------
 include/linux/file.h |   3 ++
 kernel/bpf/syscall.c |  23 +++-------
 kernel/kcmp.c        |  20 ++-------
 5 files changed, 83 insertions(+), 109 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 7ffd6e9..a685cc0 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -676,9 +676,9 @@ void do_close_on_exec(struct files_struct *files)
 	spin_unlock(&files->file_lock);
 }
 
-static struct file *__fget(unsigned int fd, fmode_t mask)
+static struct file *__fget_files(struct files_struct *files,
+				unsigned int fd, fmode_t mask)
 {
-	struct files_struct *files = current->files;
 	struct file *file;
 
 	rcu_read_lock();
@@ -699,12 +699,37 @@ static struct file *__fget(unsigned int fd, fmode_t mask)
 	return file;
 }
 
+static inline struct file *__fget(unsigned int fd, fmode_t mask)
+{
+	return __fget_files(current->files, fd, mask);
+}
+
 struct file *fget(unsigned int fd)
 {
 	return __fget(fd, FMODE_PATH);
 }
 EXPORT_SYMBOL(fget);
 
+struct file *fget_task(struct task_struct *task, unsigned int fd)
+{
+	struct files_struct *files;
+	struct file *file = ERR_PTR(-EBADF);
+
+	task_lock(task);
+	/*
+	 * __fget_files() checks max_fds itself but we want -EBADF
+	 * if fd is too big; and files_fdtable() needs rcu lock.
+	 */
+	rcu_read_lock();
+	files = task->files;
+	if (files && fd < files_fdtable(files)->max_fds)
+		file = __fget_files(files, fd, FMODE_PATH);
+	rcu_read_unlock();
+	task_unlock(task);
+
+	return file;
+}
+
 struct file *fget_raw(unsigned int fd)
 {
 	return __fget(fd, 0);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 81882a1..bb61890 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -19,54 +19,45 @@
 
 static int seq_show(struct seq_file *m, void *v)
 {
-	struct files_struct *files = NULL;
+	struct task_struct *task = get_proc_task(m->private);
+	unsigned int fd = proc_fd(m->private);
 	int f_flags = 0, ret = -ENOENT;
 	struct file *file = NULL;
-	struct task_struct *task;
 
-	task = get_proc_task(m->private);
 	if (!task)
-		return -ENOENT;
-
-	files = get_files_struct(task);
-	put_task_struct(task);
-
-	if (files) {
-		unsigned int fd = proc_fd(m->private);
-
-		spin_lock(&files->file_lock);
-		file = fcheck_files(files, fd);
-		if (file) {
-			struct fdtable *fdt = files_fdtable(files);
+		return ret;
 
-			f_flags = file->f_flags;
-			if (close_on_exec(fd, fdt))
-				f_flags |= O_CLOEXEC;
+	file = fget_task(task, fd);
+	if (IS_ERR_OR_NULL(file))
+		goto out;
 
-			get_file(file);
-			ret = 0;
-		}
-		spin_unlock(&files->file_lock);
-		put_files_struct(files);
+	/* TODO: add another helper ? */
+	task_lock(task);
+	rcu_read_lock();
+	if (task->files) {
+		struct fdtable *fdt = files_fdtable(task->files);
+		if (fd < fdt->max_fds && close_on_exec(fd, fdt))
+			f_flags |= O_CLOEXEC;
 	}
-
-	if (ret)
-		return ret;
+	rcu_read_unlock();
+	task_unlock(task);
 
 	seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n",
 		   (long long)file->f_pos, f_flags,
 		   real_mount(file->f_path.mnt)->mnt_id);
 
-	show_fd_locks(m, file, files);
+	/* show_fd_locks() never dereferences file, NULL is fine too */
+	show_fd_locks(m, file, task->files);
 	if (seq_has_overflowed(m))
 		goto out;
 
 	if (file->f_op->show_fdinfo)
 		file->f_op->show_fdinfo(m, file);
 
-out:
 	fput(file);
-	return 0;
+out:
+	put_task_struct(task);
+	return ret;
 }
 
 static int seq_fdinfo_open(struct inode *inode, struct file *file)
@@ -83,19 +74,14 @@ static const struct file_operations proc_fdinfo_file_operations = {
 
 static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode)
 {
-	struct files_struct *files = get_files_struct(task);
-	struct file *file;
+	struct file *file = fget_task(task, fd);
 
-	if (!files)
+	if (IS_ERR_OR_NULL(file))
 		return false;
 
-	rcu_read_lock();
-	file = fcheck_files(files, fd);
-	if (file)
-		*mode = file->f_mode;
-	rcu_read_unlock();
-	put_files_struct(files);
-	return !!file;
+	*mode = file->f_mode;
+	fput(file);
+	return true;
 }
 
 static void tid_fd_update_inode(struct task_struct *task, struct inode *inode,
@@ -146,31 +132,24 @@ static const struct dentry_operations tid_fd_dentry_operations = {
 
 static int proc_fd_link(struct dentry *dentry, struct path *path)
 {
-	struct files_struct *files = NULL;
-	struct task_struct *task;
+	struct task_struct *task = get_proc_task(d_inode(dentry));
+	unsigned int fd = proc_fd(d_inode(dentry));
+	struct file *fd_file;
 	int ret = -ENOENT;
 
 	task = get_proc_task(d_inode(dentry));
-	if (task) {
-		files = get_files_struct(task);
-		put_task_struct(task);
-	}
-
-	if (files) {
-		unsigned int fd = proc_fd(d_inode(dentry));
-		struct file *fd_file;
+	if (!task)
+		return ret;
 
-		spin_lock(&files->file_lock);
-		fd_file = fcheck_files(files, fd);
-		if (fd_file) {
-			*path = fd_file->f_path;
-			path_get(&fd_file->f_path);
-			ret = 0;
-		}
-		spin_unlock(&files->file_lock);
-		put_files_struct(files);
+	fd_file = fget_task(task, fd);
+	if (!IS_ERR_OR_NULL(fd_file)) {
+		*path = fd_file->f_path;
+		path_get(&fd_file->f_path);
+		fput(fd_file);
+		ret = 0;
 	}
 
+	put_task_struct(task);
 	return ret;
 }
 
@@ -229,7 +208,6 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
 			      instantiate_t instantiate)
 {
 	struct task_struct *p = get_proc_task(file_inode(file));
-	struct files_struct *files;
 	unsigned int fd;
 
 	if (!p)
@@ -237,37 +215,30 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
 
 	if (!dir_emit_dots(file, ctx))
 		goto out;
-	files = get_files_struct(p);
-	if (!files)
-		goto out;
 
-	rcu_read_lock();
-	for (fd = ctx->pos - 2;
-	     fd < files_fdtable(files)->max_fds;
-	     fd++, ctx->pos++) {
+	for (fd = ctx->pos - 2;; fd++, ctx->pos++) {
 		struct file *f;
 		struct fd_data data;
 		char name[10 + 1];
 		unsigned int len;
 
-		f = fcheck_files(files, fd);
+		f = fget_task(p, fd);
 		if (!f)
 			continue;
+		if (IS_ERR(f))
+			break;
+
 		data.mode = f->f_mode;
-		rcu_read_unlock();
 		data.fd = fd;
+		fput(f);
 
 		len = snprintf(name, sizeof(name), "%u", fd);
 		if (!proc_fill_cache(file, ctx,
 				     name, len, instantiate, p,
 				     &data))
-			goto out_fd_loop;
+			goto out;
 		cond_resched();
-		rcu_read_lock();
 	}
-	rcu_read_unlock();
-out_fd_loop:
-	put_files_struct(files);
 out:
 	put_task_struct(p);
 	return 0;
diff --git a/include/linux/file.h b/include/linux/file.h
index 6b2fb03..8b7abdb 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -43,6 +43,9 @@ static inline void fdput(struct fd fd)
 		fput(fd.file);
 }
 
+struct task_struct;
+extern struct file *fget_task(struct task_struct *task, unsigned int fd);
+
 extern struct file *fget(unsigned int fd);
 extern struct file *fget_raw(unsigned int fd);
 extern unsigned long __fdget(unsigned int fd);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8339d81..2cbf26d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2260,7 +2260,6 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
 	pid_t pid = attr->task_fd_query.pid;
 	u32 fd = attr->task_fd_query.fd;
 	const struct perf_event *event;
-	struct files_struct *files;
 	struct task_struct *task;
 	struct file *file;
 	int err;
@@ -2278,24 +2277,13 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
 	if (!task)
 		return -ENOENT;
 
-	files = get_files_struct(task);
-	put_task_struct(task);
-	if (!files)
-		return -ENOENT;
-
-	err = 0;
-	spin_lock(&files->file_lock);
-	file = fcheck_files(files, fd);
-	if (!file)
-		err = -EBADF;
-	else
-		get_file(file);
-	spin_unlock(&files->file_lock);
-	put_files_struct(files);
-
-	if (err)
+	err = -EBADF;
+	file = fget_task(task, fd);
+	if (IS_ERR_OR_NULL(file))
 		goto out;
 
+	err = -ENOTSUPP;
+
 	if (file->f_op == &bpf_raw_tp_fops) {
 		struct bpf_raw_tracepoint *raw_tp = file->private_data;
 		struct bpf_raw_event_map *btp = raw_tp->btp;
@@ -2324,7 +2312,6 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
 		goto put_file;
 	}
 
-	err = -ENOTSUPP;
 put_file:
 	fput(file);
 out:
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index a0e3d7a..067639e 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -107,7 +107,6 @@ static int kcmp_epoll_target(struct task_struct *task1,
 {
 	struct file *filp, *filp_epoll, *filp_tgt;
 	struct kcmp_epoll_slot slot;
-	struct files_struct *files;
 
 	if (copy_from_user(&slot, uslot, sizeof(slot)))
 		return -EFAULT;
@@ -116,23 +115,12 @@ static int kcmp_epoll_target(struct task_struct *task1,
 	if (!filp)
 		return -EBADF;
 
-	files = get_files_struct(task2);
-	if (!files)
+	filp_epoll = fget_task(task2, slot.efd);
+	if (IS_ERR_OR_NULL(filp_epoll))
 		return -EBADF;
 
-	spin_lock(&files->file_lock);
-	filp_epoll = fcheck_files(files, slot.efd);
-	if (filp_epoll)
-		get_file(filp_epoll);
-	else
-		filp_tgt = ERR_PTR(-EBADF);
-	spin_unlock(&files->file_lock);
-	put_files_struct(files);
-
-	if (filp_epoll) {
-		filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
-		fput(filp_epoll);
-	}
+	filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
+	fput(filp_epoll);
 
 	if (IS_ERR(filp_tgt))
 		return PTR_ERR(filp_tgt);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ