[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YsMGixSL4CDPTTZs@worktop.programming.kicks-ass.net>
Date: Mon, 4 Jul 2022 17:26:03 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Yang Jihong <yangjihong1@...wei.com>
Cc: mingo@...hat.com, acme@...nel.org, mark.rutland@....com,
alexander.shishkin@...ux.intel.com, jolsa@...nel.org,
namhyung@...nel.org, linux-perf-users@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] perf/core: Fix data race between
perf_event_set_output and perf_mmap_close
On Mon, Jul 04, 2022 at 08:00:06PM +0800, Yang Jihong wrote:
> Data race exists between perf_event_set_output and perf_mmap_close.
> The scenario is as follows:
>
> CPU1 CPU2
> perf_mmap_close(event2)
> if (atomic_dec_and_test(&event2->rb->mmap_count) // mmap_count 1 -> 0
> detach_rest = true;
> ioctl(event1, PERF_EVENT_IOC_SET_OUTPUT, event2)
> perf_event_set_output(event1, event2)
> if (!detach_rest)
> goto out_put;
> list_for_each_entry_rcu(event, &event2->rb->event_list, rb_entry)
> ring_buffer_attach(event, NULL)
> // because event1 has not been added to event2->rb->event_list,
> // event1->rb is not set to NULL in these loops
>
> ring_buffer_attach(event1, event2->rb)
> list_add_rcu(&event1->rb_entry, &event2->rb->event_list)
>
> The above data race causes a problem, that is, event1->rb is not NULL, but event1->rb->mmap_count is 0.
> If the perf_mmap interface is invoked for the fd of event1, the kernel keeps in the perf_mmap infinite loop:
>
> again:
> mutex_lock(&event->mmap_mutex);
> if (event->rb) {
> <SNIP>
> if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
> /*
> * Raced against perf_mmap_close() through
> * perf_event_set_output(). Try again, hope for better
> * luck.
> */
> mutex_unlock(&event->mmap_mutex);
> goto again;
> }
> <SNIP>
Too tired, must look again tomorrow, little feeback below.
> kernel/events/core.c | 24 +++++++++++++++++++++++-
> 1 file changed, 23 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 80782cddb1da..c67c070f7b39 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -5900,6 +5900,7 @@ static void ring_buffer_attach(struct perf_event *event,
> struct perf_buffer *rb)
> {
> struct perf_buffer *old_rb = NULL;
> + struct perf_buffer *new_rb = rb;
> unsigned long flags;
>
> WARN_ON_ONCE(event->parent);
> @@ -5928,6 +5929,20 @@ static void ring_buffer_attach(struct perf_event *event,
>
> spin_lock_irqsave(&rb->event_lock, flags);
> list_add_rcu(&event->rb_entry, &rb->event_list);
> +
> + /*
> + * When perf_mmap_close traverses rb->event_list during
> + * detach all other events, new event may not be added to
> + * rb->event_list, let's check again, if rb->mmap_count is 0,
> + * it indicates that perf_mmap_close is executed.
> + * Manually delete event from rb->event_list and
> + * set event->rb to null.
> + */
> + if (!atomic_read(&rb->mmap_count)) {
> + list_del_rcu(&event->rb_entry);
> + new_rb = NULL;
> + }
> +
> spin_unlock_irqrestore(&rb->event_lock, flags);
> }
>
> @@ -5944,7 +5959,7 @@ static void ring_buffer_attach(struct perf_event *event,
> if (has_aux(event))
> perf_event_stop(event, 0);
>
> - rcu_assign_pointer(event->rb, rb);
> + rcu_assign_pointer(event->rb, new_rb);
>
> if (old_rb) {
> ring_buffer_put(old_rb);
I'm confused by the above hunks; the below will avoid calling
ring_buffer_attach() when !rb->mmap_count, so how can the above ever
execute?
> @@ -11883,6 +11898,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
> goto unlock;
> }
>
> + /*
> + * If rb->mmap_count is 0, perf_mmap_close is being executed,
> + * the ring buffer is about to be unmapped and cannot be attached.
> + */
> + if (rb && !atomic_read(&rb->mmap_count))
> + goto unlock;
> +
> ring_buffer_attach(event, rb);
>
> ret = 0;
This is wrong I think, it'll leak ring_buffer_get().
Powered by blists - more mailing lists