fs/file.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++--- include/linux/fdtable.h | 2 ++ 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/fs/file.c b/fs/file.c index 6c672ad329e9..0b89a97cabb5 100644 --- a/fs/file.c +++ b/fs/file.c @@ -48,6 +48,7 @@ static void __free_fdtable(struct fdtable *fdt) { kvfree(fdt->fd); kvfree(fdt->open_fds); + kfree(fdt->full_fds_bits); kfree(fdt); } @@ -56,6 +57,9 @@ static void free_fdtable_rcu(struct rcu_head *rcu) __free_fdtable(container_of(rcu, struct fdtable, rcu)); } +#define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr)) +#define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long)) + /* * Expand the fdset in the files_struct. Called with the files spinlock * held for write. @@ -77,6 +81,11 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) memset((char *)(nfdt->open_fds) + cpy, 0, set); memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); memset((char *)(nfdt->close_on_exec) + cpy, 0, set); + + cpy = BITBIT_SIZE(ofdt->max_fds); + set = BITBIT_SIZE(nfdt->max_fds) - cpy; + memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy); + memset(cpy+(char *)nfdt->full_fds_bits, 0, set); } static struct fdtable * alloc_fdtable(unsigned int nr) @@ -122,8 +131,21 @@ static struct fdtable * alloc_fdtable(unsigned int nr) data += nr / BITS_PER_BYTE; fdt->close_on_exec = data; + /* + * The "bitmap of bitmaps" has a bit set for each word in + * the open_fds array that is full. We initialize it to + * zero both at allocation and at copying, because it is + * important that it never have a bit set for a non-full + * word, but it doesn't have to be exact otherwise. + */ + fdt->full_fds_bits = kzalloc(BITBIT_SIZE(nr), GFP_KERNEL); + if (!fdt->full_fds_bits) + goto out_nofull; + return fdt; +out_nofull: + kvfree(fdt->open_fds); out_arr: kvfree(fdt->fd); out_fdt: @@ -229,14 +251,18 @@ static inline void __clear_close_on_exec(int fd, struct fdtable *fdt) __clear_bit(fd, fdt->close_on_exec); } -static inline void __set_open_fd(int fd, struct fdtable *fdt) +static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt) { __set_bit(fd, fdt->open_fds); + fd /= BITS_PER_LONG; + if (!~fdt->open_fds[fd]) + __set_bit(fd, fdt->full_fds_bits); } -static inline void __clear_open_fd(int fd, struct fdtable *fdt) +static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt) { __clear_bit(fd, fdt->open_fds); + __clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits); } static int count_open_files(struct fdtable *fdt) @@ -280,6 +306,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) new_fdt->max_fds = NR_OPEN_DEFAULT; new_fdt->close_on_exec = newf->close_on_exec_init; new_fdt->open_fds = newf->open_fds_init; + new_fdt->full_fds_bits = newf->full_fds_bits_init; new_fdt->fd = &newf->fd_array[0]; spin_lock(&oldf->file_lock); @@ -323,6 +350,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8); memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8); + memcpy(new_fdt->full_fds_bits, old_fdt->full_fds_bits, BITBIT_SIZE(open_files)); for (i = open_files; i != 0; i--) { struct file *f = *old_fds++; @@ -454,10 +482,25 @@ struct files_struct init_files = { .fd = &init_files.fd_array[0], .close_on_exec = init_files.close_on_exec_init, .open_fds = init_files.open_fds_init, + .full_fds_bits = init_files.full_fds_bits_init, }, .file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock), }; +static unsigned long find_next_fd(struct fdtable *fdt, unsigned long start) +{ + unsigned long maxfd = fdt->max_fds; + unsigned long maxbit = maxfd / BITS_PER_LONG; + unsigned long bitbit = start / BITS_PER_LONG; + + bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG; + if (bitbit > maxfd) + return maxfd; + if (bitbit > start) + start = bitbit; + return find_next_zero_bit(fdt->open_fds, maxfd, start); +} + /* * allocate a file descriptor, mark it busy. */ @@ -476,7 +519,7 @@ repeat: fd = files->next_fd; if (fd < fdt->max_fds) - fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd); + fd = find_next_fd(fdt, fd); /* * N.B. For clone tasks sharing a files structure, this test diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 674e3e226465..5295535b60c6 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -26,6 +26,7 @@ struct fdtable { struct file __rcu **fd; /* current fd array */ unsigned long *close_on_exec; unsigned long *open_fds; + unsigned long *full_fds_bits; struct rcu_head rcu; }; @@ -59,6 +60,7 @@ struct files_struct { int next_fd; unsigned long close_on_exec_init[1]; unsigned long open_fds_init[1]; + unsigned long full_fds_bits_init[1]; struct file __rcu * fd_array[NR_OPEN_DEFAULT]; };