lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160126045947.GA40151@ast-mbp.thefacebook.com>
Date:	Mon, 25 Jan 2016 20:59:49 -0800
From:	Alexei Starovoitov <alexei.starovoitov@...il.com>
To:	Peter Zijlstra <peterz@...radead.org>
Cc:	Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
	Ingo Molnar <mingo@...hat.com>, linux-kernel@...r.kernel.org,
	vince@...ter.net, eranian@...gle.com,
	Arnaldo Carvalho de Melo <acme@...radead.org>,
	Jiri Olsa <jolsa@...nel.org>,
	Daniel Borkmann <daniel@...earbox.net>,
	Wang Nan <wangnan0@...wei.com>
Subject: Re: [PATCH v2] perf: Synchronously cleanup child events

On Mon, Jan 25, 2016 at 10:04:10PM +0100, Peter Zijlstra wrote:
> On Mon, Jan 25, 2016 at 03:54:14PM +0100, Peter Zijlstra wrote:
> > Alexander, Alexei,
> > 
> > How about the below? That uses event->state == PERF_EVENT_STATE_EXIT to
> > indicate the event has been given up by its 'owner' and decouples us
> > from the actual event->owner logic.
> > 
> > This retains the event->owner and event->owner_list thing purely for the
> > prclt(.option = PR_TASK_PERF_EVENTS_{EN,DIS}ABLE) calls, but does give
> > us strict 'owner' semantics in that:
> > 
> >   struct perf_event *my_event = perf_event_create_kernel_counter();
> > 
> >   /* ... */
> > 
> >   perf_event_release_kernel(my_event);
> > 
> > Or
> > 
> >   int fd = sys_perf_event_open(...);
> > 
> >   close(fd); /* last, calls fops::release */
> > 
> > Will destroy the event dead. event::refcount will 'retain' the object
> > but it will become non functional and is strictly meant as a temporal
> > existence guarantee (for when RCU isn't good enough).
> > 
> > So this should restore the scm_rights case, which preserves the fd but
> > could result in not having event->owner (and therefore being removed
> > from its owner_list), which is fine.
> > 
> > BPF still needs to get fixed to use filedesc references instead.
> 
> Still no BPF, but this one actually 'works', as in it doesn't have the
> blatant exit races and has survived a few hours of runtime.
> 
> ---
>  include/linux/perf_event.h |    3 
>  kernel/events/core.c       |  304 ++++++++++++++++++++++-----------------------
>  2 files changed, 150 insertions(+), 157 deletions(-)

I think I understand what you're trying to do and
the patch looks good to me.
As far as BPF side I did the following...
does it match the model you outlined above?
I did basic testing and it looks fine.

Subject: [PATCH ] perf,bpf: convert perf_event_array to use struct file

Signed-off-by: Alexei Starovoitov <ast@...nel.org>
---
 include/linux/perf_event.h |  4 ++--
 kernel/bpf/arraymap.c      | 21 +++++++++++----------
 kernel/events/core.c       | 20 ++++++++------------
 kernel/trace/bpf_trace.c   | 14 ++++++++++----
 4 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f9828a48f16a..df275020fde9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -729,7 +729,7 @@ extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void perf_event_delayed_put(struct task_struct *task);
-extern struct perf_event *perf_event_get(unsigned int fd);
+extern struct file *perf_event_get(unsigned int fd);
 extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
@@ -1070,7 +1070,7 @@ static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
 static inline void perf_event_delayed_put(struct task_struct *task)	{ }
-static inline struct perf_event *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
+static inline struct file *perf_event_get(unsigned int fd)	{ return ERR_PTR(-EINVAL); }
 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
 {
 	return ERR_PTR(-EINVAL);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index b0799bced518..89ebbc4d1164 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -291,10 +291,13 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 {
 	struct perf_event *event;
 	const struct perf_event_attr *attr;
+	struct file *file;
 
-	event = perf_event_get(fd);
-	if (IS_ERR(event))
-		return event;
+	file = perf_event_get(fd);
+	if (IS_ERR(file))
+		return file;
+
+	event = file->private_data;
 
 	attr = perf_event_attrs(event);
 	if (IS_ERR(attr))
@@ -304,24 +307,22 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
 		goto err;
 
 	if (attr->type == PERF_TYPE_RAW)
-		return event;
+		return file;
 
 	if (attr->type == PERF_TYPE_HARDWARE)
-		return event;
+		return file;
 
 	if (attr->type == PERF_TYPE_SOFTWARE &&
 	    attr->config == PERF_COUNT_SW_BPF_OUTPUT)
-		return event;
+		return file;
 err:
-	perf_event_release_kernel(event);
+	fput(file);
 	return ERR_PTR(-EINVAL);
 }
 
 static void perf_event_fd_array_put_ptr(void *ptr)
 {
-	struct perf_event *event = ptr;
-
-	perf_event_release_kernel(event);
+	fput((struct file *)ptr);
 }
 
 static const struct bpf_map_ops perf_event_array_ops = {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 06ae52e99ac2..2a95e0d2370f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8896,21 +8896,17 @@ void perf_event_delayed_put(struct task_struct *task)
 		WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
 }
 
-struct perf_event *perf_event_get(unsigned int fd)
+struct file *perf_event_get(unsigned int fd)
 {
-	int err;
-	struct fd f;
-	struct perf_event *event;
-
-	err = perf_fget_light(fd, &f);
-	if (err)
-		return ERR_PTR(err);
+	struct file *file;
 
-	event = f.file->private_data;
-	atomic_long_inc(&event->refcount);
-	fdput(f);
+	file = fget_raw(fd);
+	if (file->f_op != &perf_fops) {
+		fput(file);
+		return ERR_PTR(-EBADF);
+	}
 
-	return event;
+	return file;
 }
 
 const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 45dd798bcd37..326a75e884db 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -191,14 +191,17 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
 	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	struct perf_event *event;
+	struct file *file;
 
 	if (unlikely(index >= array->map.max_entries))
 		return -E2BIG;
 
-	event = (struct perf_event *)array->ptrs[index];
-	if (!event)
+	file = (struct file *)array->ptrs[index];
+	if (unlikely(!file))
 		return -ENOENT;
 
+	event = file->private_data;
+
 	/* make sure event is local and doesn't have pmu::count */
 	if (event->oncpu != smp_processor_id() ||
 	    event->pmu->count)
@@ -228,6 +231,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
 	void *data = (void *) (long) r4;
 	struct perf_sample_data sample_data;
 	struct perf_event *event;
+	struct file *file;
 	struct perf_raw_record raw = {
 		.size = size,
 		.data = data,
@@ -236,10 +240,12 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
 	if (unlikely(index >= array->map.max_entries))
 		return -E2BIG;
 
-	event = (struct perf_event *)array->ptrs[index];
-	if (unlikely(!event))
+	file = (struct file *)array->ptrs[index];
+	if (unlikely(!file))
 		return -ENOENT;
 
+	event = file->private_data;
+
 	if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
 		     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
 		return -EINVAL;
-- 
2.4.6

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ