lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <131abf909496124958b0ce0a654fb8a25a633944.1493315290.git.bankarsandhya512@gmail.com>
Date:   Fri, 28 Apr 2017 00:49:23 +0530
From:   Sandhya Bankar <bankarsandhya512@...il.com>
To:     linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
        viro@...iv.linux.org.uk, mawmilcox@...rosoft.com,
        keescook@...omium.org, adobriyan@...il.com, re.emese@...il.com,
        riel@...riel.com
Subject: [PATCH 13/13] vfs: Delete struct fdtable

Completing the conversion of the file descriptor allocation code to use
the IDR.

This patch includes below changes:

 - Move max_fds from struct fdtable to files_struct.
 - Added fill_max_fds() routine to calculate the new value of max_fds
   to matches the old behaviour of alloc_fdtable() code which is
   user-visible through /proc.
 - Remove struct fdtable
 - Removed resize_in_progress, resize_wait from files_struct
 - Delete open_fds() & count_open_files()
 - Use READ_ONCE() instead of rcu_read_lock/unlock(). The rcu_read_lock()/
   unlock() was used to dereference the files->fdt. Now we don't use RCU to
   protect files->max_fds. So using READ_ONCE() macro to read the value of
   files->max_fds.

Signed-off-by: Sandhya Bankar <bankarsandhya512@...il.com>
Signed-off-by: Matthew Wilcox <mawilcox@...rosoft.com>
---
 fs/compat.c             |   6 +-
 fs/file.c               | 354 +++++++-----------------------------------------
 fs/proc/array.c         |   2 +-
 fs/proc/fd.c            |   2 +-
 fs/select.c             |   6 +-
 include/linux/fdtable.h |  31 +----
 6 files changed, 54 insertions(+), 347 deletions(-)

diff --git a/fs/compat.c b/fs/compat.c
index c61b506..7483c9c 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1153,17 +1153,13 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
 	fd_set_bits fds;
 	void *bits;
 	int size, max_fds, ret = -EINVAL;
-	struct fdtable *fdt;
 	long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
 
 	if (n < 0)
 		goto out_nofds;
 
 	/* max_fds can increase, so grab it once to avoid race */
-	rcu_read_lock();
-	fdt = files_fdtable(current->files);
-	max_fds = fdt->max_fds;
-	rcu_read_unlock();
+	max_fds = READ_ONCE(current->files->max_fds);
 	if (n > max_fds)
 		n = max_fds;
 
diff --git a/fs/file.c b/fs/file.c
index 23f198b..3e6cf10 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -31,191 +31,36 @@
 unsigned int sysctl_nr_open_max =
 	__const_min(INT_MAX, ~(size_t)0/sizeof(void *)) & -BITS_PER_LONG;
 
-static void *alloc_fdmem(size_t size)
+static int fill_max_fds(struct files_struct *files, unsigned int nr)
 {
-	/*
-	 * Very large allocations can stress page reclaim, so fall back to
-	 * vmalloc() if the allocation size will be considered "large" by the VM.
-	 */
-	if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
-		void *data = kmalloc(size, GFP_KERNEL_ACCOUNT |
-				     __GFP_NOWARN | __GFP_NORETRY);
-		if (data != NULL)
-			return data;
-	}
-	return __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM, PAGE_KERNEL);
-}
+	unsigned int nr_open;
 
-static void __free_fdtable(struct fdtable *fdt)
-{
-	kvfree(fdt->open_fds);
-	kfree(fdt);
-}
-
-static void free_fdtable_rcu(struct rcu_head *rcu)
-{
-	__free_fdtable(container_of(rcu, struct fdtable, rcu));
-}
-
-/*
- * Copy 'count' fd bits from the old table to the new table and clear the extra
- * space if any.  This does not copy the file pointers.  Called with the files
- * spinlock held for write.
- */
-static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
-			    unsigned int count)
-{
-	unsigned int cpy, set;
-
-	cpy = count / BITS_PER_BYTE;
-	set = (nfdt->max_fds - count) / BITS_PER_BYTE;
-	memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
-	memset((char *)nfdt->open_fds + cpy, 0, set);
-}
+	if (likely(nr < files->max_fds))
+		return 0;
 
-/*
- * Copy all file descriptors from the old table to the new, expanded table and
- * clear the extra space.  Called with the files spinlock held for write.
- */
-static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
-{
-	BUG_ON(nfdt->max_fds < ofdt->max_fds);
-	copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
-}
+	nr_open = READ_ONCE(sysctl_nr_open);
 
-static struct fdtable * alloc_fdtable(unsigned int nr)
-{
-	struct fdtable *fdt;
-	void *data;
+	if (nr >= nr_open)
+		return -EMFILE;
 
 	/*
-	 * Figure out how many fds we actually want to support in this fdtable.
-	 * Allocation steps are keyed to the size of the fdarray, since it
-	 * grows far faster than any of the other dynamic data. We try to fit
-	 * the fdarray into comfortable page-tuned chunks: starting at 1024B
-	 * and growing in powers of two from there on.
+	 * This calculation of the new value of max_fds matches the old
+	 * behaviour of this code which is user-visible through /proc.
+	 * nr may exceed the sysctl_nr_open value by a small amount.
+	 * This used to be necessary to keep the bitmaps aligned, and
+	 * we keep it the same even though the IDR handles the bitmaps now.
 	 */
+
 	nr /= (1024 / sizeof(struct file *));
 	nr = roundup_pow_of_two(nr + 1);
 	nr *= (1024 / sizeof(struct file *));
-	/*
-	 * Note that this can drive nr *below* what we had passed if sysctl_nr_open
-	 * had been set lower between the check in expand_files() and here.  Deal
-	 * with that in caller, it's cheaper that way.
-	 *
-	 * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
-	 * bitmaps handling below becomes unpleasant, to put it mildly...
-	 */
-	if (unlikely(nr > sysctl_nr_open))
-		nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
 
-	fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
-	if (!fdt)
-		goto out;
-	fdt->max_fds = nr;
+	if (unlikely(nr > nr_open))
+		nr = ((nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
 
-	data = alloc_fdmem(max_t(size_t, nr / BITS_PER_BYTE, L1_CACHE_BYTES));
-	if (!data)
-		goto out_fdt;
-	fdt->open_fds = data;
+	files->max_fds = nr;
 
-	return fdt;
-
-out_fdt:
-	kfree(fdt);
-out:
-	return NULL;
-}
-
-/*
- * Expand the file descriptor table.
- * This function will allocate a new fdtable and both fd array and fdset, of
- * the given size.
- * Return <0 error code on error; 1 on successful completion.
- * The files->file_lock should be held on entry, and will be held on exit.
- */
-static int expand_fdtable(struct files_struct *files, unsigned int nr)
-	__releases(files->file_lock)
-	__acquires(files->file_lock)
-{
-	struct fdtable *new_fdt, *cur_fdt;
-
-	spin_unlock(&files->file_lock);
-	idr_preload_end();
-	new_fdt = alloc_fdtable(nr);
-
-	/* make sure all __fd_install() have seen resize_in_progress
-	 * or have finished their rcu_read_lock_sched() section.
-	 */
-	if (atomic_read(&files->count) > 1)
-		synchronize_sched();
-
-	idr_preload(GFP_KERNEL);
-	spin_lock(&files->file_lock);
-	if (!new_fdt)
-		return -ENOMEM;
-	/*
-	 * extremely unlikely race - sysctl_nr_open decreased between the check in
-	 * caller and alloc_fdtable().  Cheaper to catch it here...
-	 */
-	if (unlikely(new_fdt->max_fds <= nr)) {
-		__free_fdtable(new_fdt);
-		return -EMFILE;
-	}
-	cur_fdt = files_fdtable(files);
-	BUG_ON(nr < cur_fdt->max_fds);
-	copy_fdtable(new_fdt, cur_fdt);
-	rcu_assign_pointer(files->fdt, new_fdt);
-	if (cur_fdt != &files->fdtab)
-		call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
-	/* coupled with smp_rmb() in __fd_install() */
-	smp_wmb();
-	return 1;
-}
-
-/*
- * Expand files.
- * This function will expand the file structures, if the requested size exceeds
- * the current capacity and there is room for expansion.
- * Return <0 error code on error; 0 when nothing done; 1 when files were
- * expanded and execution may have blocked.
- * The files->file_lock should be held on entry, and will be held on exit.
- */
-static int expand_files(struct files_struct *files, unsigned int nr)
-	__releases(files->file_lock)
-	__acquires(files->file_lock)
-{
-	struct fdtable *fdt;
-	int expanded = 0;
-
-repeat:
-	fdt = files_fdtable(files);
-
-	/* Do we need to expand? */
-	if (nr < fdt->max_fds)
-		return expanded;
-
-	/* Can we expand? */
-	if (nr >= sysctl_nr_open)
-		return -EMFILE;
-
-	if (unlikely(files->resize_in_progress)) {
-		spin_unlock(&files->file_lock);
-		idr_preload_end();
-		expanded = 1;
-		wait_event(files->resize_wait, !files->resize_in_progress);
-		idr_preload(GFP_KERNEL);
-		spin_lock(&files->file_lock);
-		goto repeat;
-	}
-
-	/* All good, so we try */
-	files->resize_in_progress = true;
-	expanded = expand_fdtable(files, nr);
-	files->resize_in_progress = false;
-
-	wake_up_all(&files->resize_wait);
-	return expanded;
+	return 0;
 }
 
 static inline bool fd_is_open(unsigned int fd, struct files_struct *files)
@@ -235,28 +80,21 @@ static inline void __clear_close_on_exec(unsigned int fd,
 	idr_tag_clear(&files->fd_idr, fd, FD_TAG_CLOEXEC);
 }
 
-static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
-{
-	__set_bit(fd, fdt->open_fds);
-}
-
-static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
-{
-	__clear_bit(fd, fdt->open_fds);
-}
-
-static unsigned int count_open_files(struct fdtable *fdt)
+static void close_files(struct files_struct * files)
 {
-	unsigned int size = fdt->max_fds;
-	unsigned int i;
+	/*
+	 * No need for RCU or ->file_lock protection because
+	 * this is the last reference to the files structure.
+	 */
+	struct file *file;
+	int fd;
 
-	/* Find the last open fd */
-	for (i = size / BITS_PER_LONG; i > 0; ) {
-		if (fdt->open_fds[--i])
-			break;
+	idr_for_each_entry(&files->fd_idr, file, fd) {
+		filp_close(file, files);
+		cond_resched_rcu_qs();
 	}
-	i = (i + 1) * BITS_PER_LONG;
-	return i;
+
+	idr_destroy(&files->fd_idr);
 }
 
 /*
@@ -267,9 +105,8 @@ static unsigned int count_open_files(struct fdtable *fdt)
 struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 {
 	struct files_struct *newf;
-	unsigned int open_files, i;
+	unsigned int i;
 	struct file *f;
-	struct fdtable *old_fdt, *new_fdt;
 
 	*errorp = -ENOMEM;
 	newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
@@ -280,51 +117,11 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 
 	spin_lock_init(&newf->file_lock);
 	idr_init(&newf->fd_idr);
-	newf->resize_in_progress = false;
-	init_waitqueue_head(&newf->resize_wait);
-	new_fdt = &newf->fdtab;
-	new_fdt->max_fds = NR_OPEN_DEFAULT;
-	new_fdt->open_fds = newf->open_fds_init;
+	newf->max_fds = NR_OPEN_DEFAULT;
 
 restart:
 	idr_copy_preload(&oldf->fd_idr, GFP_KERNEL);
 	spin_lock(&oldf->file_lock);
-	old_fdt = files_fdtable(oldf);
-	open_files = count_open_files(old_fdt);
-
-	/*
-	 * Check whether we need to allocate a larger fd array and fd set.
-	 */
-	while (unlikely(open_files > new_fdt->max_fds)) {
-		spin_unlock(&oldf->file_lock);
-		idr_preload_end();
-
-		if (new_fdt != &newf->fdtab)
-			__free_fdtable(new_fdt);
-
-		new_fdt = alloc_fdtable(open_files - 1);
-		if (!new_fdt) {
-			*errorp = -ENOMEM;
-			goto out_release;
-		}
-
-		/* beyond sysctl_nr_open; nothing to do */
-		if (unlikely(new_fdt->max_fds < open_files)) {
-			__free_fdtable(new_fdt);
-			*errorp = -EMFILE;
-			goto out_release;
-		}
-
-		/*
-		 * Reacquire the oldf lock and a pointer to its fd table
-		 * who knows it may have a new bigger fd table. We need
-		 * the latest pointer.
-		 */
-		idr_copy_preload(&oldf->fd_idr, GFP_KERNEL);
-		spin_lock(&oldf->file_lock);
-		old_fdt = files_fdtable(oldf);
-		open_files = count_open_files(old_fdt);
-	}
 
 	if (!idr_check_preload(&oldf->fd_idr)) {
 		spin_unlock(&oldf->file_lock);
@@ -332,8 +129,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 		goto restart;
 	}
 
-	copy_fd_bitmaps(new_fdt, old_fdt, open_files);
-
 	idr_for_each_entry(&oldf->fd_idr, f, i) {
 		int err;
 
@@ -341,6 +136,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 		err = idr_alloc(&newf->fd_idr, f, i, i + 1, GFP_NOWAIT);
 		if (WARN(err != i, "Could not allocate %d: %d", i, err)) {
 			spin_unlock(&oldf->file_lock);
+			idr_preload_end();
 			goto out;
 		}
 
@@ -351,42 +147,18 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
 	spin_unlock(&oldf->file_lock);
 	idr_preload_end();
 
-	/*
-	 * The fd may be claimed in the fd bitmap but not yet
-	 * instantiated in the files array if a sibling thread
-	 * is partway through open().
-	 */
-	for_each_set_bit(i, new_fdt->open_fds, new_fdt->max_fds) {
-		if (!idr_find(&newf->fd_idr, i))
-			__clear_bit(i, new_fdt->open_fds);
+	if (unlikely(fill_max_fds(newf, i) < 0)) {
+		*errorp =  -EMFILE;
+		goto out;
 	}
 
-	rcu_assign_pointer(newf->fdt, new_fdt);
-
 	return newf;
 
-out_release:
-	idr_destroy(&newf->fd_idr);
-	kmem_cache_free(files_cachep, newf);
 out:
-	return NULL;
-}
-
-static void close_files(struct files_struct * files)
-{
-	/*
-	 * No need for RCU or ->file_lock protection because
-	 * this is the last reference to the files structure.
-	 */
-	struct file *file;
-	int fd;
-
-	idr_for_each_entry(&files->fd_idr, file, fd) {
-		filp_close(file, files);
-		cond_resched_rcu_qs();
-	}
+	close_files(newf);
+	kmem_cache_free(files_cachep, newf);
 
-	idr_destroy(&files->fd_idr);
+	return NULL;
 }
 
 struct files_struct *get_files_struct(struct task_struct *task)
@@ -405,12 +177,7 @@ struct files_struct *get_files_struct(struct task_struct *task)
 void put_files_struct(struct files_struct *files)
 {
 	if (atomic_dec_and_test(&files->count)) {
-		struct fdtable *fdt = rcu_dereference_raw(files->fdt);
 		close_files(files);
-
-		/* free the arrays if they are not embedded */
-		if (fdt != &files->fdtab)
-			__free_fdtable(fdt);
 		kmem_cache_free(files_cachep, files);
 	}
 }
@@ -441,11 +208,7 @@ void exit_files(struct task_struct *tsk)
 
 struct files_struct init_files = {
 	.count		= ATOMIC_INIT(1),
-	.fdt		= &init_files.fdtab,
-	.fdtab		= {
-		.max_fds	= NR_OPEN_DEFAULT,
-		.open_fds	= init_files.open_fds_init,
-	},
+	.max_fds	= NR_OPEN_DEFAULT,
 	.file_lock	= __SPIN_LOCK_UNLOCKED(init_files.file_lock),
 	.fd_idr		= IDR_INIT,
 };
@@ -458,7 +221,6 @@ int __alloc_fd(struct files_struct *files,
 {
 	unsigned int fd;
 	int error;
-	struct fdtable *fdt;
 
 	idr_preload(GFP_KERNEL);
 	spin_lock(&files->file_lock);
@@ -471,14 +233,12 @@ int __alloc_fd(struct files_struct *files,
 	BUG_ON(error < 0);
 	fd = error;
 
-	error = expand_files(files, fd);
+	error = fill_max_fds(files, fd);
 	if (error < 0) {
 		idr_remove(&files->fd_idr, fd);
 		goto out;
 	}
 
-	fdt = files_fdtable(files);
-	__set_open_fd(fd, fdt);
 	if (flags & O_CLOEXEC)
 		__set_close_on_exec(fd, files);
 	else
@@ -502,18 +262,11 @@ int get_unused_fd_flags(unsigned flags)
 }
 EXPORT_SYMBOL(get_unused_fd_flags);
 
-static void __put_unused_fd(struct files_struct *files, unsigned int fd)
-{
-	struct fdtable *fdt = files_fdtable(files);
-	__clear_open_fd(fd, fdt);
-}
-
 void put_unused_fd(unsigned int fd)
 {
 	struct files_struct *files = current->files;
 	spin_lock(&files->file_lock);
 	BUG_ON(idr_remove(&files->fd_idr, fd));
-	__put_unused_fd(files, fd);
 	spin_unlock(&files->file_lock);
 }
 
@@ -560,17 +313,12 @@ void fd_install(unsigned int fd, struct file *file)
 int __close_fd(struct files_struct *files, unsigned fd)
 {
 	struct file *file;
-	struct fdtable *fdt;
 
 	spin_lock(&files->file_lock);
-	fdt = files_fdtable(files);
-	if (fd >= fdt->max_fds)
-		goto out_unlock;
 	file = idr_remove(&files->fd_idr, fd);
 	if (!file)
 		goto out_unlock;
 	__clear_close_on_exec(fd, files);
-	__put_unused_fd(files, fd);
 	spin_unlock(&files->file_lock);
 	return filp_close(file, files);
 
@@ -589,7 +337,6 @@ void do_close_on_exec(struct files_struct *files)
 
 	idr_for_each_entry_tagged(&files->fd_idr, file, fd, FD_TAG_CLOEXEC) {
 		idr_remove(&files->fd_idr, fd);
-		__put_unused_fd(files, fd);
 		spin_unlock(&files->file_lock);
 		filp_close(file, files);
 		cond_resched();
@@ -718,7 +465,6 @@ static int do_dup2(struct files_struct *files,
 __releases(&files->file_lock)
 {
 	struct file *tofree;
-	struct fdtable *fdt;
 
 	/*
 	 * We need to detect attempts to do dup2() over allocated but still
@@ -734,7 +480,6 @@ static int do_dup2(struct files_struct *files,
 	 * scope of POSIX or SUS, since neither considers shared descriptor
 	 * tables and this condition does not arise without those.
 	 */
-	fdt = files_fdtable(files);
 	tofree = idr_find(&files->fd_idr, fd);
 	if (!tofree && fd_is_open(fd, files))
 		goto Ebusy;
@@ -749,7 +494,6 @@ static int do_dup2(struct files_struct *files,
 			goto Ebusy;
 		}
 	}
-	__set_open_fd(fd, fdt);
 	if (flags & O_CLOEXEC)
 		__set_close_on_exec(fd, files);
 	else
@@ -781,9 +525,10 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
 
 	idr_preload(GFP_KERNEL);
 	spin_lock(&files->file_lock);
-	err = expand_files(files, fd);
+	err = fill_max_fds(files, fd);
 	if (unlikely(err < 0))
 		goto out_unlock;
+
 	return do_dup2(files, file, fd, flags);
 
 out_unlock:
@@ -809,20 +554,18 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
 
 	idr_preload(GFP_KERNEL);
 	spin_lock(&files->file_lock);
-	err = expand_files(files, newfd);
+	err = fill_max_fds(files, newfd);
+	if (unlikely(err < 0))
+		 goto Ebadf;
+
 	file = fcheck(oldfd);
 	if (unlikely(!file))
 		goto Ebadf;
-	if (unlikely(err < 0)) {
-		if (err == -EMFILE)
-			goto Ebadf;
-		goto out_unlock;
-	}
+
 	return do_dup2(files, file, newfd, flags);
 
 Ebadf:
 	err = -EBADF;
-out_unlock:
 	spin_unlock(&files->file_lock);
 	idr_preload_end();
 	return err;
@@ -875,12 +618,11 @@ int iterate_fd(struct files_struct *files, unsigned n,
 		int (*f)(const void *, struct file *, unsigned),
 		const void *p)
 {
-	struct fdtable *fdt;
 	int res = 0;
 	if (!files)
 		return 0;
 	spin_lock(&files->file_lock);
-	for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
+	for (; n < files->max_fds; n++) {
 		struct file *file;
 		file = __fcheck_files(files, n);
 		if (!file)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 88c3555..ec6fdaf 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -186,7 +186,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 
 	task_lock(p);
 	if (p->files)
-		max_fds = files_fdtable(p->files)->max_fds;
+		max_fds = READ_ONCE(p->files->max_fds);
 	task_unlock(p);
 	rcu_read_unlock();
 
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 2735ccc..7e5aeca 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -231,7 +231,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
 
 	rcu_read_lock();
 	for (fd = ctx->pos - 2;
-	     fd < files_fdtable(files)->max_fds;
+	     fd < files->max_fds;
 	     fd++, ctx->pos++) {
 		char name[PROC_NUMBUF];
 		int len;
diff --git a/fs/select.c b/fs/select.c
index 5d20a14..14da393 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -557,7 +557,6 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 	void *bits;
 	int ret, max_fds;
 	size_t size, alloc_size;
-	struct fdtable *fdt;
 	/* Allocate small arguments on the stack to save memory and be faster */
 	long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];
 
@@ -566,10 +565,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 		goto out_nofds;
 
 	/* max_fds can increase, so grab it once to avoid race */
-	rcu_read_lock();
-	fdt = files_fdtable(current->files);
-	max_fds = fdt->max_fds;
-	rcu_read_unlock();
+	max_fds = READ_ONCE(current->files->max_fds);
 	if (n > max_fds)
 		n = max_fds;
 
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 7f1ab82..ff94541 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -24,43 +24,16 @@
 
 #define	FD_TAG_CLOEXEC	1
 
-struct fdtable {
-	unsigned int max_fds;
-	unsigned long *open_fds;
-	struct rcu_head rcu;
-};
-
 /*
  * Open file table structure
  */
 struct files_struct {
-  /*
-   * read mostly part
-   */
+	spinlock_t file_lock;
 	atomic_t count;
-	bool resize_in_progress;
-	wait_queue_head_t resize_wait;
-
 	struct idr fd_idr;
-	struct fdtable __rcu *fdt;
-	struct fdtable fdtab;
-  /*
-   * written part on a separate cache line in SMP
-   */
-	spinlock_t file_lock ____cacheline_aligned_in_smp;
-	unsigned long open_fds_init[1];
+	unsigned int max_fds;
 };
 
-struct file_operations;
-struct vfsmount;
-struct dentry;
-
-#define rcu_dereference_check_fdtable(files, fdtfd) \
-	rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock))
-
-#define files_fdtable(files) \
-	rcu_dereference_check_fdtable((files), (files)->fdt)
-
 /*
  * The caller must ensure that fd table isn't shared or hold rcu or file lock
  */
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ