lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <385df771-961c-4fc1-971c-81314c231f5d@lucifer.local>
Date: Thu, 18 Sep 2025 14:42:16 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Kalesh Singh <kaleshsingh@...gle.com>
Cc: akpm@...ux-foundation.org, minchan@...nel.org, david@...hat.com,
        Liam.Howlett@...cle.com, rppt@...nel.org, pfalcato@...e.de,
        kernel-team@...roid.com, android-mm@...gle.com,
        Alexander Viro <viro@...iv.linux.org.uk>,
        Christian Brauner <brauner@...nel.org>, Jan Kara <jack@...e.cz>,
        Kees Cook <kees@...nel.org>, Vlastimil Babka <vbabka@...e.cz>,
        Suren Baghdasaryan <surenb@...gle.com>, Michal Hocko <mhocko@...e.com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Masami Hiramatsu <mhiramat@...nel.org>,
        Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
        Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>,
        Juri Lelli <juri.lelli@...hat.com>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Dietmar Eggemann <dietmar.eggemann@....com>,
        Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
        Valentin Schneider <vschneid@...hat.com>, Jann Horn <jannh@...gle.com>,
        Shuah Khan <shuah@...nel.org>, linux-kernel@...r.kernel.org,
        linux-fsdevel@...r.kernel.org, linux-mm@...ck.org,
        linux-trace-kernel@...r.kernel.org, linux-kselftest@...r.kernel.org
Subject: Re: [PATCH v2 7/7] mm/tracing: introduce max_vma_count_exceeded
 trace event

On Mon, Sep 15, 2025 at 09:36:38AM -0700, Kalesh Singh wrote:
> Needed observability on in field devices can be collected with minimal
> overhead and can be toggled on and off. Event driven telemetry can be
> done with tracepoint BPF programs.
>
> The process comm is provided for aggregation across devices and tgid is
> to enable per-process aggregation per device.
>
> This allows for observing the distribution of such problems in the
> field, to deduce if there are legitimate bugs or if a bump to the limit is
> warranted.

It's not really a bug though is it? It's somebody running out of resources.

I'm not sure how useful this is really. But I'm open to being convinced!

I also wonder if this is better as a statistic? You'd figure out it was a
problem that way too right?

>
> Cc: Andrew Morton <akpm@...ux-foundation.org>
> Cc: David Hildenbrand <david@...hat.com>
> Cc: "Liam R. Howlett" <Liam.Howlett@...cle.com>
> Cc: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
> Cc: Mike Rapoport <rppt@...nel.org>
> Cc: Minchan Kim <minchan@...nel.org>
> Cc: Pedro Falcato <pfalcato@...e.de>
> Signed-off-by: Kalesh Singh <kaleshsingh@...gle.com>

This breaks the VMA tests, please make sure to always check them:

cc -I../shared -I. -I../../include -I../../arch/x86/include -I../../../lib -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined    -c -o vma.o vma.c
In file included from vma.c:33:
../../../mm/vma.c:10:10: fatal error: trace/events/vma.h: No such file or directory
   10 | #include <trace/events/vma.h>
      |          ^~~~~~~~~~~~~~~~~~~~
compilation terminated.
make: *** [<builtin>: vma.o] Error 1

See below though, you've included this in the wrong place (I don't blame you,
perhaps we've not made this _very_ clear :)

> ---
>
> Chnages in v2:
>   - Add needed observability for operations failing due to the vma count limit,
>       per Minchan
>     (Since there isn't a common point for debug logging due checks being
>     external to the capacity based vma_count_remaining() helper. I used a
>     trace event for low overhead and to facilitate event driven telemetry
>     for in field devices)
>
>  include/trace/events/vma.h | 32 ++++++++++++++++++++++++++++++++

Do we really need a new file? We already have VMA-related tracepoints no?

Also if you add a new file you _have_ to update MAINTAINERS.

>  mm/mmap.c                  |  5 ++++-
>  mm/mremap.c                | 10 ++++++++--
>  mm/vma.c                   | 11 +++++++++--
>  4 files changed, 53 insertions(+), 5 deletions(-)
>  create mode 100644 include/trace/events/vma.h
>
> diff --git a/include/trace/events/vma.h b/include/trace/events/vma.h
> new file mode 100644
> index 000000000000..2fed63b0d0a6
> --- /dev/null
> +++ b/include/trace/events/vma.h
> @@ -0,0 +1,32 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM vma
> +
> +#if !defined(_TRACE_VMA_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_VMA_H
> +
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT(max_vma_count_exceeded,
> +
> +	TP_PROTO(struct task_struct *task),
> +
> +	TP_ARGS(task),
> +
> +	TP_STRUCT__entry(
> +		__string(comm,	task->comm)
> +		__field(pid_t,	tgid)
> +	),
> +
> +	TP_fast_assign(
> +		__assign_str(comm);
> +		__entry->tgid = task->tgid;
> +	),
> +
> +	TP_printk("comm=%s tgid=%d", __get_str(comm), __entry->tgid)
> +);
> +
> +#endif /*  _TRACE_VMA_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 30ddd550197e..0bb311bf48f3 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -56,6 +56,7 @@
>
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/mmap.h>
> +#include <trace/events/vma.h>
>
>  #include "internal.h"
>
> @@ -374,8 +375,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
>  		return -EOVERFLOW;
>
>  	/* Too many mappings? */
> -	if (!vma_count_remaining(mm))
> +	if (!vma_count_remaining(mm)) {
> +		trace_max_vma_count_exceeded(current);
>  		return -ENOMEM;
> +	}
>
>  	/*
>  	 * addr is returned from get_unmapped_area,
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 14d35d87e89b..f42ac05f0069 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -30,6 +30,8 @@
>  #include <asm/tlb.h>
>  #include <asm/pgalloc.h>
>
> +#include <trace/events/vma.h>
> +
>  #include "internal.h"
>
>  /* Classify the kind of remap operation being performed. */
> @@ -1040,8 +1042,10 @@ static unsigned long prep_move_vma(struct vma_remap_struct *vrm)
>  	 * We'd prefer to avoid failure later on in do_munmap:
>  	 * which may split one vma into three before unmapping.
>  	 */
> -	if (vma_count_remaining(current->mm) < 4)
> +	if (vma_count_remaining(current->mm) < 4) {
> +		trace_max_vma_count_exceeded(current);

But we didn't exceed it, we're guessing we might?

>  		return -ENOMEM;
> +	}
>
>  	if (vma->vm_ops && vma->vm_ops->may_split) {
>  		if (vma->vm_start != old_addr)
> @@ -1817,8 +1821,10 @@ static unsigned long check_mremap_params(struct vma_remap_struct *vrm)
>  	 * the threshold. In other words, is the current map count + 6 at or
>  	 * below the threshold? Otherwise return -ENOMEM here to be more safe.
>  	 */
> -	if (vma_count_remaining(current->mm) < 6)
> +	if (vma_count_remaining(current->mm) < 6) {
> +		trace_max_vma_count_exceeded(current);

Similar point here.

>  		return -ENOMEM;
> +	}
>
>  	return 0;
>  }
> diff --git a/mm/vma.c b/mm/vma.c
> index 0e4fcaebe209..692c33c3e84d 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -7,6 +7,8 @@
>  #include "vma_internal.h"
>  #include "vma.h"
>
> +#include <trace/events/vma.h>

Nope you don't do this :)

vma.c is designed to be userland testable and _intentionally_ puts all its
includes in vma_internal.h.

So you'd need to add this include over there, and then update the vma tests
vma_internal.h to stub out the trace function.

> +
>  struct mmap_state {
>  	struct mm_struct *mm;
>  	struct vma_iterator *vmi;
> @@ -621,8 +623,10 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
>  static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
>  		     unsigned long addr, int new_below)
>  {
> -	if (!vma_count_remaining(vma->vm_mm))
> +	if (!vma_count_remaining(vma->vm_mm)) {
> +		trace_max_vma_count_exceeded(current);
>  		return -ENOMEM;
> +	}
>
>  	return __split_vma(vmi, vma, addr, new_below);
>  }
> @@ -1375,6 +1379,7 @@ static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
>  		 */
>  		if (vms->end < vms->vma->vm_end &&
>  		    !vma_count_remaining(vms->vma->vm_mm)) {
> +			trace_max_vma_count_exceeded(current);
>  			error = -ENOMEM;
>  			goto vma_count_exceeded;
>  		}
> @@ -2801,8 +2806,10 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
>  	if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT))
>  		return -ENOMEM;
>
> -	if (!vma_count_remaining(mm))
> +	if (!vma_count_remaining(mm)) {
> +		trace_max_vma_count_exceeded(current);
>  		return -ENOMEM;
> +	}
>
>  	if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
>  		return -ENOMEM;
> --
> 2.51.0.384.g4c02a37b29-goog
>

Cheers, Lorenzo

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ