diff --git a/include/linux/file.h b/include/linux/file.h index a59001e..ec6b120 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -36,6 +36,16 @@ struct fdtable { }; /* + * To avoid big latencies in get_unused_fd(), + * we maintain counters of "one" bits in bitmap pages + * we define a 'page' here to contain 32768 bits, + * so that each counter is an unsigned short + * with MAX_NR_OPENS = 2^20, we get 32 counters : 64 bytes + */ +#define FDSBITS 32768 +#define MAX_NR_OPEN (1024*1024) /* Absolute upper limit on fd num */ + +/* * Open file table structure */ struct files_struct { @@ -50,6 +60,7 @@ struct files_struct { */ spinlock_t file_lock ____cacheline_aligned_in_smp; int next_fd; + unsigned short fds_counter[(MAX_NR_OPEN + (FDSBITS - 1)) / FDSBITS]; struct embedded_fd_set close_on_exec_init; struct embedded_fd_set open_fds_init; struct file * fd_array[NR_OPEN_DEFAULT]; diff --git a/fs/fcntl.c b/fs/fcntl.c index 8e382a5..5257ba6 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -61,6 +61,7 @@ static int locate_fd(struct files_struct unsigned int start; int error; struct fdtable *fdt; + unsigned int page_nr; error = -EINVAL; if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) @@ -77,11 +78,19 @@ repeat: start = files->next_fd; newfd = start; - if (start < fdt->max_fds) + + error = -EMFILE; + if (start < fdt->max_fds) { + page_nr = start / FDSBITS; + while (files->fds_counter[page_nr] == FDSBITS) { + page_nr++; + start = page_nr * FDSBITS; + if (start >= fdt->max_fds) + goto out; + } newfd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, start); - - error = -EMFILE; + } if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; @@ -122,6 +131,7 @@ static int dupfd(struct file *file, unsi /* locate_fd() may have expanded fdtable, load the ptr */ fdt = files_fdtable(files); FD_SET(fd, fdt->open_fds); + files->fds_counter[fd / FDSBITS]++; FD_CLR(fd, fdt->close_on_exec); spin_unlock(&files->file_lock); fd_install(fd, file); @@ -171,7 +181,9 @@ asmlinkage long sys_dup2(unsigned int ol rcu_assign_pointer(fdt->fd[newfd], file); FD_SET(newfd, fdt->open_fds); + files->fds_counter[newfd / FDSBITS]++; FD_CLR(newfd, fdt->close_on_exec); + spin_unlock(&files->file_lock); if (tofree) diff --git a/fs/file.c b/fs/file.c index c5575de..7dbd9c5 100644 --- a/fs/file.c +++ b/fs/file.c @@ -147,8 +147,8 @@ static struct fdtable * alloc_fdtable(un nr /= (1024 / sizeof(struct file *)); nr = roundup_pow_of_two(nr + 1); nr *= (1024 / sizeof(struct file *)); - if (nr > NR_OPEN) - nr = NR_OPEN; + if (nr > MAX_NR_OPEN) + nr = MAX_NR_OPEN; fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); if (!fdt) @@ -233,7 +233,7 @@ int expand_files(struct files_struct *fi if (nr < fdt->max_fds) return 0; /* Can we expand? */ - if (nr >= NR_OPEN) + if (nr >= MAX_NR_OPEN) return -EMFILE; /* All good, so we try */ diff --git a/fs/open.c b/fs/open.c index 0d515d1..340e69b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -860,12 +860,23 @@ int get_unused_fd(void) struct files_struct * files = current->files; int fd, error; struct fdtable *fdt; + unsigned int page_nr; error = -EMFILE; spin_lock(&files->file_lock); repeat: fdt = files_fdtable(files); + page_nr = files->next_fd / FDSBITS; + /* + * We can avoid testing big chunks of memory if all bit are set + */ + while (files->fds_counter[page_nr] == FDSBITS) { + page_nr++; + files->next_fd = page_nr * FDSBITS; + if (files->next_fd >= fdt->max_fds) + break; + } fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, files->next_fd); @@ -891,6 +902,7 @@ repeat: } FD_SET(fd, fdt->open_fds); + files->fds_counter[fd / FDSBITS]++; FD_CLR(fd, fdt->close_on_exec); files->next_fd = fd + 1; #if 1 @@ -913,6 +925,7 @@ static void __put_unused_fd(struct files { struct fdtable *fdt = files_fdtable(files); __FD_CLR(fd, fdt->open_fds); + files->fds_counter[fd / FDSBITS]--; if (fd < files->next_fd) files->next_fd = fd; } diff --git a/include/linux/fs.h b/include/linux/fs.h index b3ae77c..9db2799 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -20,8 +20,6 @@ #include */ /* Fixed constants first: */ -#undef NR_OPEN -#define NR_OPEN (1024*1024) /* Absolute upper limit on fd num */ #define INR_OPEN 1024 /* Initial setting for nfile rlimits */ #define BLOCK_SIZE_BITS 10 diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 276ccaa..bd24190 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -26,6 +26,7 @@ #define INIT_FILES \ .fdtab = INIT_FDTABLE, \ .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), \ .next_fd = 0, \ + .fds_counter = {0}, \ .close_on_exec_init = { { 0, } }, \ .open_fds_init = { { 0, } }, \ .fd_array = { NULL, } \ diff --git a/kernel/fork.c b/kernel/fork.c index 73ad5cd..f4341c5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -641,6 +641,7 @@ static struct files_struct *alloc_files( spin_lock_init(&newf->file_lock); newf->next_fd = 0; + memset(newf->fds_counter, 0, sizeof(newf->fds_counter)); fdt = &newf->fdtab; fdt->max_fds = NR_OPEN_DEFAULT; fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;