[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <56b3900d-652b-64d8-b2a4-8cd862c17b8f@fb.com>
Date: Mon, 23 Nov 2020 09:06:03 -0800
From: Yonghong Song <yhs@...com>
To: "Eric W. Biederman" <ebiederm@...ssion.com>,
<linux-kernel@...r.kernel.org>
CC: <linux-fsdevel@...r.kernel.org>, <criu@...nvz.org>,
<bpf@...r.kernel.org>,
Linus Torvalds <torvalds@...ux-foundation.org>,
Alexander Viro <viro@...iv.linux.org.uk>,
Christian Brauner <christian.brauner@...ntu.com>,
Oleg Nesterov <oleg@...hat.com>,
Cyrill Gorcunov <gorcunov@...il.com>,
Jann Horn <jann@...jh.net>, Kees Cook <keescook@...omium.org>,
Daniel P . Berrangé <berrange@...hat.com>,
Jeff Layton <jlayton@...hat.com>,
Miklos Szeredi <miklos@...redi.hu>,
Matthew Wilcox <willy@...radead.org>,
"J. Bruce Fields" <bfields@...ldses.org>,
Trond Myklebust <trond.myklebust@...merspace.com>,
Chris Wright <chrisw@...hat.com>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Martin KaFai Lau <kafai@...com>,
Song Liu <songliubraving@...com>,
Andrii Nakryiko <andriin@...com>,
John Fastabend <john.fastabend@...il.com>,
KP Singh <kpsingh@...omium.org>
Subject: Re: [PATCH v2 16/24] bpf/task_iter: In task_file_seq_get_next use
task_lookup_next_fd_rcu
On 11/20/20 3:14 PM, Eric W. Biederman wrote:
> When discussing[1] exec and posix file locks it was realized that none
> of the callers of get_files_struct fundamentally needed to call
> get_files_struct, and that by switching them to helper functions
> instead it will both simplify their code and remove unnecessary
> increments of files_struct.count. Those unnecessary increments can
> result in exec unnecessarily unsharing files_struct which breaking
> posix locks, and it can result in fget_light having to fallback to
> fget reducing system performance.
>
> Using task_lookup_next_fd_rcu simplifies task_file_seq_get_next, by
> moving the checking for the maximum file descritor into the generic
> code, and by remvoing the need for capturing and releasing a reference
> on files_struct. As the reference count of files_struct no longer
> needs to be maintained bpf_iter_seq_task_file_info can have it's files
> member removed and task_file_seq_get_next no longer needs it's fstruct
> argument.
>
> The curr_fd local variable does need to become unsigned to be used
> with fnext_task. As curr_fd is assigned from and assigned a u32
> making curr_fd an unsigned int won't cause problems and might prevent
> them.
>
> [1] https://lkml.kernel.org/r/20180915160423.GA31461@redhat.com
> Suggested-by: Oleg Nesterov <oleg@...hat.com>
> v1: https://lkml.kernel.org/r/20200817220425.9389-11-ebiederm@xmission.com
> Signed-off-by: "Eric W. Biederman" <ebiederm@...ssion.com>
> ---
> kernel/bpf/task_iter.c | 44 ++++++++++--------------------------------
> 1 file changed, 10 insertions(+), 34 deletions(-)
Just a heads-up. The following patch
bpf-next: 91b2db27d3ff ("bpf: Simplify task_file_seq_get_next()")
https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=91b2db27d3ff9ad29e8b3108dfbf1e2f49fe9bd3
has been merged in bpf-next tree.
It will have merge conflicts with this patch. The above patch
is a refactoring for simplification with no functionality change.
>
> diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
> index 5ab2ccfb96cb..4ec63170c741 100644
> --- a/kernel/bpf/task_iter.c
> +++ b/kernel/bpf/task_iter.c
> @@ -130,45 +130,33 @@ struct bpf_iter_seq_task_file_info {
> */
> struct bpf_iter_seq_task_common common;
> struct task_struct *task;
> - struct files_struct *files;
> u32 tid;
> u32 fd;
> };
>
> static struct file *
> task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
> - struct task_struct **task, struct files_struct **fstruct)
> + struct task_struct **task)
> {
> struct pid_namespace *ns = info->common.ns;
> - u32 curr_tid = info->tid, max_fds;
> - struct files_struct *curr_files;
> + u32 curr_tid = info->tid;
> struct task_struct *curr_task;
> - int curr_fd = info->fd;
> + unsigned int curr_fd = info->fd;
>
> /* If this function returns a non-NULL file object,
> - * it held a reference to the task/files_struct/file.
> + * it held a reference to the task/file.
> * Otherwise, it does not hold any reference.
> */
> again:
> if (*task) {
> curr_task = *task;
> - curr_files = *fstruct;
> curr_fd = info->fd;
> } else {
> curr_task = task_seq_get_next(ns, &curr_tid, true);
> if (!curr_task)
> return NULL;
>
> - curr_files = get_files_struct(curr_task);
> - if (!curr_files) {
> - put_task_struct(curr_task);
> - curr_tid = ++(info->tid);
> - info->fd = 0;
> - goto again;
> - }
> -
> - /* set *fstruct, *task and info->tid */
> - *fstruct = curr_files;
> + /* set *task and info->tid */
> *task = curr_task;
> if (curr_tid == info->tid) {
> curr_fd = info->fd;
> @@ -179,13 +167,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
> }
>
> rcu_read_lock();
> - max_fds = files_fdtable(curr_files)->max_fds;
> - for (; curr_fd < max_fds; curr_fd++) {
> + for (;; curr_fd++) {
> struct file *f;
> -
> - f = files_lookup_fd_rcu(curr_files, curr_fd);
> + f = task_lookup_next_fd_rcu(curr_task, &curr_fd);
> if (!f)
> - continue;
> + break;
> if (!get_file_rcu(f))
> continue;
>
> @@ -197,10 +183,8 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
>
> /* the current task is done, go to the next task */
> rcu_read_unlock();
> - put_files_struct(curr_files);
> put_task_struct(curr_task);
> *task = NULL;
> - *fstruct = NULL;
> info->fd = 0;
> curr_tid = ++(info->tid);
> goto again;
> @@ -209,13 +193,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
> static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
> {
> struct bpf_iter_seq_task_file_info *info = seq->private;
> - struct files_struct *files = NULL;
> struct task_struct *task = NULL;
> struct file *file;
>
> - file = task_file_seq_get_next(info, &task, &files);
> + file = task_file_seq_get_next(info, &task);
> if (!file) {
> - info->files = NULL;
> info->task = NULL;
> return NULL;
> }
> @@ -223,7 +205,6 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
> if (*pos == 0)
> ++*pos;
> info->task = task;
> - info->files = files;
>
> return file;
> }
> @@ -231,22 +212,19 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
> static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
> {
> struct bpf_iter_seq_task_file_info *info = seq->private;
> - struct files_struct *files = info->files;
> struct task_struct *task = info->task;
> struct file *file;
>
> ++*pos;
> ++info->fd;
> fput((struct file *)v);
> - file = task_file_seq_get_next(info, &task, &files);
> + file = task_file_seq_get_next(info, &task);
> if (!file) {
> - info->files = NULL;
> info->task = NULL;
> return NULL;
> }
>
> info->task = task;
> - info->files = files;
>
> return file;
> }
> @@ -295,9 +273,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v)
> (void)__task_file_seq_show(seq, v, true);
> } else {
> fput((struct file *)v);
> - put_files_struct(info->files);
> put_task_struct(info->task);
> - info->files = NULL;
> info->task = NULL;
> }
> }
>
Powered by blists - more mailing lists