[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b606c5fc-2463-492d-b978-e8cf4dab01c7@lucifer.local>
Date: Mon, 11 Aug 2025 14:21:08 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Thomas Gleixner <tglx@...utronix.de>
Cc: LKML <linux-kernel@...r.kernel.org>,
Linus Torvalds <torvalds@...uxfoundation.org>,
Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...nel.org>,
Namhyung Kim <namhyung@...nel.org>,
Arnaldo Carvalho de Melo <acme@...hat.com>,
Kees Cook <kees@...nel.org>
Subject: Re: [patch V2 RESEND 4/6] perf/core: Split out AUX buffer allocation
On Mon, Aug 11, 2025 at 02:36:35PM +0200, Thomas Gleixner wrote:
> The code logic in perf_mmap() is incomprehensible and has been source of
> subtle bugs in the past. It makes it impossible to convert the atomic_t
> reference counts to refcount_t.
>
> There is not really much, which is shared between the ringbuffer and AUX
> buffer allocation code since the mlock limit calculation and the
> accounting has been split out into helper functions.
>
> Move the AUX buffer allocation code out and integrate the call with a
> momentary workaround to allow skipping the remaining ringbuffer related
> code completely. That workaround will be removed once the ringbuffer
> allocation is moved to its own function as well.
>
> No functional change.
>
> Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
git range-diff made this much easier :)
I see you have also rearranged the ordering of the event->state <=
PERF_EVENT_STATE_REVOKED check and the data_page_nr() check as well, which
comparing to the original, seems to be a correct thing to do (I guess maybe
we'd just have a different error message than anticipated at worst from
having them in the 'wrong' order).
Overall this LGTM, thanks for getting this respun quickly!!
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
> ---
> V2: Fixup invers condition and add the dropped flags setup back - Lorenzo
> Fixup subject line to match the content
> ---
> kernel/events/core.c | 137 +++++++++++++++++++++++++++++----------------------
> 1 file changed, 78 insertions(+), 59 deletions(-)
>
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -6970,12 +6970,79 @@ static void perf_mmap_account(struct vm_
> atomic64_add(extra, &vma->vm_mm->pinned_vm);
> }
>
> +static int perf_mmap_aux(struct vm_area_struct *vma, struct perf_event *event,
> + unsigned long nr_pages)
> +{
> + long user_extra = nr_pages, extra = 0;
> + struct perf_buffer *rb = event->rb;
> + u64 aux_offset, aux_size;
> + int ret, rb_flags = 0;
> +
> + /*
> + * AUX area mapping: if rb->aux_nr_pages != 0, it's already
> + * mapped, all subsequent mappings should have the same size
> + * and offset. Must be above the normal perf buffer.
> + */
> + aux_offset = READ_ONCE(rb->user_page->aux_offset);
> + aux_size = READ_ONCE(rb->user_page->aux_size);
> +
> + if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
> + return -EINVAL;
> +
> + if (aux_offset != vma->vm_pgoff << PAGE_SHIFT)
> + return -EINVAL;
> +
> + /* Already mapped with a different offset */
> + if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff)
> + return -EINVAL;
> +
> + if (aux_size != nr_pages * PAGE_SIZE)
> + return -EINVAL;
> +
> + /* Already mapped with a different size */
> + if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages)
> + return -EINVAL;
> +
> + if (!is_power_of_2(nr_pages))
> + return -EINVAL;
> +
> + /* If this succeeds, subsequent failures have to undo it */
> + if (!atomic_inc_not_zero(&rb->mmap_count))
> + return -EINVAL;
> +
> + /* If mapped, attach to it */
> + if (rb_has_aux(rb)) {
> + atomic_inc(&rb->aux_mmap_count);
> + return 0;
> + }
> +
> + if (!perf_mmap_calc_limits(vma, &user_extra, &extra)) {
> + atomic_dec(&rb->mmap_count);
> + return -EPERM;
> + }
> +
> + if (vma->vm_flags & VM_WRITE)
> + rb_flags |= RING_BUFFER_WRITABLE;
> +
> + ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages,
> + event->attr.aux_watermark, rb_flags);
> + if (ret) {
> + atomic_dec(&rb->mmap_count);
> + return ret;
> + }
> +
> + atomic_set(&rb->aux_mmap_count, 1);
> + rb->aux_mmap_locked = extra;
> + perf_mmap_account(vma, user_extra, extra);
> + atomic_inc(&event->mmap_count);
> + return 0;
> +}
> +
> static int perf_mmap(struct file *file, struct vm_area_struct *vma)
> {
> struct perf_event *event = file->private_data;
> unsigned long vma_size, nr_pages;
> long user_extra = 0, extra = 0;
> - struct mutex *aux_mutex = NULL;
> struct perf_buffer *rb = NULL;
> int ret, flags = 0;
> mapped_f mapped;
> @@ -7055,51 +7122,15 @@ static int perf_mmap(struct file *file,
> }
>
> } else {
> - /*
> - * AUX area mapping: if rb->aux_nr_pages != 0, it's already
> - * mapped, all subsequent mappings should have the same size
> - * and offset. Must be above the normal perf buffer.
> - */
> - u64 aux_offset, aux_size;
> -
> - rb = event->rb;
> - if (!rb)
> - goto aux_unlock;
> -
> - aux_mutex = &rb->aux_mutex;
> - mutex_lock(aux_mutex);
> -
> - aux_offset = READ_ONCE(rb->user_page->aux_offset);
> - aux_size = READ_ONCE(rb->user_page->aux_size);
> -
> - if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
> - goto aux_unlock;
> -
> - if (aux_offset != vma->vm_pgoff << PAGE_SHIFT)
> - goto aux_unlock;
> -
> - /* already mapped with a different offset */
> - if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff)
> - goto aux_unlock;
> -
> - if (aux_size != nr_pages * PAGE_SIZE)
> - goto aux_unlock;
> -
> - /* already mapped with a different size */
> - if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages)
> - goto aux_unlock;
> -
> - if (!is_power_of_2(nr_pages))
> - goto aux_unlock;
> -
> - if (!atomic_inc_not_zero(&rb->mmap_count))
> - goto aux_unlock;
> -
> - if (rb_has_aux(rb)) {
> - atomic_inc(&rb->aux_mmap_count);
> - ret = 0;
> - goto unlock;
> + if (!event->rb) {
> + ret = -EINVAL;
> + } else {
> + scoped_guard(mutex, &event->rb->aux_mutex)
> + ret = perf_mmap_aux(vma, event, nr_pages);
> }
> + // Temporary workaround to split out AUX handling first
> + mutex_unlock(&event->mmap_mutex);
> + goto out;
> }
>
> if (!perf_mmap_calc_limits(vma, &user_extra, &extra)) {
> @@ -7132,28 +7163,16 @@ static int perf_mmap(struct file *file,
> perf_event_init_userpage(event);
> perf_event_update_userpage(event);
> ret = 0;
> - } else {
> - ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages,
> - event->attr.aux_watermark, flags);
> - if (!ret) {
> - atomic_set(&rb->aux_mmap_count, 1);
> - rb->aux_mmap_locked = extra;
> - }
> }
> -
> unlock:
> if (!ret) {
> perf_mmap_account(vma, user_extra, extra);
> atomic_inc(&event->mmap_count);
> - } else if (rb) {
> - /* AUX allocation failed */
> - atomic_dec(&rb->mmap_count);
> }
> -aux_unlock:
> - if (aux_mutex)
> - mutex_unlock(aux_mutex);
> mutex_unlock(&event->mmap_mutex);
>
> +// Temporary until RB allocation is split out.
> +out:
> if (ret)
> return ret;
>
>
Powered by blists - more mailing lists