[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-id: <64A9A867-32E6-430C-A1A5-C515102D069D@sun.com>
Date: Mon, 11 Jan 2010 08:40:52 -0400
From: Andreas Dilger <adilger@....com>
To: Alexander Shishkin <virtuoso@...nd.org>
Cc: Valdis.Kletnieks@...edu, linux-fsdevel@...r.kernel.org,
akpm@...ux-foundation.org, linux-kernel@...r.kernel.org,
viro@...iv.linux.org.uk
Subject: Re: [RFC][PATCHv3] List per-process file descriptor consumption when
hitting file-max
On 2010-01-11, at 05:38, Alexander Shishkin wrote:
> When a file descriptor limit is hit, display the top consumers of
> descriptors so that it is possible to identify and fix those which
> leak them.
>
> Two new sysctl tunables are introduced:
> * file-max-consumers -- number of processes to display (defaults
> to 10);
> * file-max-rate-limit -- time interval between subsequent dumps
> (defaults to 10 seconds).
This should default to max_consumers=0 to avoid spamming the logs, IMHO.
> Signed-off-by: Alexander Shishkin <virtuoso@...nd.org>
> CC: viro@...iv.linux.org.uk
> CC: linux-fsdevel@...r.kernel.org
> ---
> Changes:
> v3 -- fix a couple of silly checkpatch errors
> v2 -- add rate-limiting and reduce number of processes to be output
> v1 -- initial implementation.
>
> fs/file_table.c | 89 +++++++++++++++++++++++++++++++++++++++++++
> ++++++++-
> include/linux/fs.h | 5 +++
> kernel/sysctl.c | 14 ++++++++
> 3 files changed, 107 insertions(+), 1 deletions(-)
>
> diff --git a/fs/file_table.c b/fs/file_table.c
> index 69652c5..26666fd 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -9,6 +9,7 @@
> #include <linux/slab.h>
> #include <linux/file.h>
> #include <linux/fdtable.h>
> +#include <linux/sort.h>
> #include <linux/init.h>
> #include <linux/module.h>
> #include <linux/fs.h>
> @@ -29,7 +30,8 @@
>
> /* sysctl tunables... */
> struct files_stat_struct files_stat = {
> - .max_files = NR_FILE
> + .max_files = NR_FILE,
> + .max_consumers = NR_CONSUMERS,
> };
>
> /* public. Not pretty! */
> @@ -90,6 +92,80 @@ int proc_nr_files(ctl_table *table, int write,
> }
> #endif
>
> +/*
> + * Number of open file descriptors per task_struct
> + */
> +struct fd_consumer {
> + struct task_struct *task;
> + int fd_count;
> +};
> +
> +static int cmp_fd_consumers(const void *a, const void *b)
> +{
> + const struct fd_consumer *x = a, *y = b;
> +
> + return y->fd_count - x->fd_count;
> +}
> +
> +static void dump_fd_consumers(void)
> +{
> + struct task_struct *p;
> + struct files_struct *files;
> + struct fdtable *fdt;
> + int proc_limit = files_stat.max_consumers;
> + int i, nproc;
> + struct fd_consumer *procs, *tmp;
> +
> + if (!files_stat.max_consumers)
> + return;
> +
> + read_lock(&tasklist_lock);
> +
> + /* build an array of per-task file descriptor usage */
> + nproc = nr_processes();
> + procs = kzalloc(nproc * sizeof(struct fd_consumer), GFP_KERNEL);
> + if (!procs)
> + goto out;
> +
> + tmp = procs;
> +
> + for_each_process(p) {
> + tmp->task = p;
> +
> + files = get_files_struct(p);
> + if (!files)
> + continue;
> +
> + spin_lock(&files->file_lock);
> + fdt = files_fdtable(files);
> +
> + /* we have to actually *count* the fds */
> + for (tmp->fd_count = i = 0; i < fdt->max_fds; i++)
> + tmp->fd_count += !!fcheck_files(files, i);
> +
> + spin_unlock(&files->file_lock);
> + put_files_struct(files);
> +
> + tmp++;
> + }
> +
> + /* sort by number of used descriptor in descending order */
> + sort(procs, nproc, sizeof(struct fd_consumer), cmp_fd_consumers,
> NULL);
> +
> + if (proc_limit > nproc)
> + proc_limit = nproc;
> +
> + /* output the 'proc_limit' first entries */
> + for (i = 0, tmp = procs; i < proc_limit; i++, tmp++)
> + printk(KERN_INFO "=> %s [%d]: open=%d\n", tmp->task->comm,
> + tmp->task->pid, tmp->fd_count);
> +
> + kfree(procs);
> +
> +out:
> + read_unlock(&tasklist_lock);
> +}
> +
> /* Find an unused file structure and return a pointer to it.
> * Returns NULL, if there are no more free file structures or
> * we run out of memory.
> @@ -105,6 +181,7 @@ struct file *get_empty_filp(void)
> const struct cred *cred = current_cred();
> static int old_max;
> struct file * f;
> + static unsigned long next_dump;
>
> /*
> * Privileged users can go above max_files
> @@ -140,6 +217,14 @@ over:
> if (get_nr_files() > old_max) {
> printk(KERN_INFO "VFS: file-max limit %d reached\n",
> get_max_files());
> +
> + /* dump the biggest file descriptor users */
> + if (!next_dump || time_after(jiffies, next_dump)) {
> + next_dump = jiffies + files_stat.rate_limit;
> +
> + dump_fd_consumers();
> + }
> +
> old_max = get_nr_files();
> }
> goto fail;
> @@ -425,6 +510,8 @@ void __init files_init(unsigned long mempages)
> files_stat.max_files = n;
> if (files_stat.max_files < NR_FILE)
> files_stat.max_files = NR_FILE;
> +
> + files_stat.rate_limit = DUMP_RATE_LIMIT;
> files_defer_init();
> percpu_counter_init(&nr_files, 0);
> }
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 9147ca8..291beb3 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -36,6 +36,8 @@ struct files_stat_struct {
> int nr_files; /* read only */
> int nr_free_files; /* read only */
> int max_files; /* tunable */
> + int max_consumers; /* tunable */
> + unsigned long rate_limit; /* tunable */
> };
>
> struct inodes_stat_t {
> @@ -46,6 +48,9 @@ struct inodes_stat_t {
>
>
> #define NR_FILE 8192 /* this can well be larger on a larger system */
> +#define NR_CONSUMERS 10 /* dump this many tasks when file-max is
> hit */
> +#define DUMP_RATE_LIMIT msecs_to_jiffies(10000) /* wait this long
> between
> + dumps */
>
> #define MAY_EXEC 1
> #define MAY_WRITE 2
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 8a68b24..dfb08fc 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -1325,6 +1325,20 @@ static struct ctl_table fs_table[] = {
> .proc_handler = proc_dointvec,
> },
> {
> + .procname = "file-max-consumers",
> + .data = &files_stat.max_consumers,
> + .maxlen = sizeof(int),
> + .mode = 0644,
> + .proc_handler = proc_dointvec,
> + },
> + {
> + .procname = "file-max-rate-limit",
> + .data = &files_stat.rate_limit,
> + .maxlen = sizeof(unsigned long),
> + .mode = 0644,
> + .proc_handler = proc_doulongvec_ms_jiffies_minmax,
> + },
> + {
> .procname = "nr_open",
> .data = &sysctl_nr_open,
> .maxlen = sizeof(int),
> --
> 1.6.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-
> fsdevel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
Cheers, Andreas
--
Andreas Dilger
Sr. Staff Engineer, Lustre Group
Sun Microsystems of Canada, Inc.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists