[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b24f45dc-96c7-4dcc-8dab-8de11e3921af@lucifer.local>
Date: Mon, 23 Jun 2025 18:13:02 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Vlastimil Babka <vbabka@...e.cz>
Cc: Andrew Morton <akpm@...ux-foundation.org>,
"Liam R. Howlett" <Liam.Howlett@...cle.com>,
David Hildenbrand <david@...hat.com>, Jann Horn <jannh@...gle.com>,
Mike Rapoport <rppt@...nel.org>,
Suren Baghdasaryan <surenb@...gle.com>, Michal Hocko <mhocko@...e.com>,
Colin Cross <ccross@...gle.com>, linux-mm@...ck.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH RFC 2/2] mm, madvise: move prctl_set_vma() to mm/madvise.c
On Mon, Jun 23, 2025 at 04:59:51PM +0200, Vlastimil Babka wrote:
> Setting anon_name is done via madvise_set_anon_name() and behaves a lot
> of like other madvise operations. However, apparently because madvise()
> has lacked the 4th argument and prctl() not, the userspace entry point
> has been implemented via prctl(PR_SET_VMA, ...) and handled first by
> prctl_set_vma().
>
> Currently prctl_set_vma() lives in kernel/sys.c but it's mm code so move
> it under mm. mm/madvise.c seems to be the most straightforward place as
> that's where madvise_set_anon_name() lives, so we can stop declaring the
> latter in the header and instead declare prctl_set_vma(). It's not ideal
> as prctl is not madvise, but that's the reality we live in, as described
> above.
>
> Signed-off-by: Vlastimil Babka <vbabka@...e.cz>
To be clear I also very much love what you're doing here too, but again feel we
can tweak this :P See below...
> ---
> include/linux/mm.h | 13 +++++------
> kernel/sys.c | 64 ------------------------------------------------------
> mm/madvise.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++--
> 3 files changed, 63 insertions(+), 73 deletions(-)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0e0549f3d681f6c7a78e8dfa341a810e5a8f96c1..1f8c2561c8cf77e9bb695094325401c09c15f3e6 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -4059,14 +4059,13 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping,
> #endif
>
> #ifdef CONFIG_ANON_VMA_NAME
> -int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
> - unsigned long len_in,
> - struct anon_vma_name *anon_name);
> +int prctl_set_vma(unsigned long opt, unsigned long start,
> + unsigned long size, unsigned long arg);
> #else
> -static inline int
> -madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
> - unsigned long len_in, struct anon_vma_name *anon_name) {
> - return 0;
> +static inline int prctl_set_vma(unsigned long opt, unsigned long start,
> + unsigned long size, unsigned long arg)
> +{
> + return -EINVAL;
> }
> #endif
>
> diff --git a/kernel/sys.c b/kernel/sys.c
> index adc0de0aa364aebb23999f621717a5d32599921c..247d8925daa6fc86134504042832c2164b5d8277 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -2343,70 +2343,6 @@ int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long st
>
> #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE)
>
> -#ifdef CONFIG_ANON_VMA_NAME
> -
> -#define ANON_VMA_NAME_MAX_LEN 80
> -#define ANON_VMA_NAME_INVALID_CHARS "\\`$[]"
> -
> -static inline bool is_valid_name_char(char ch)
> -{
> - /* printable ascii characters, excluding ANON_VMA_NAME_INVALID_CHARS */
> - return ch > 0x1f && ch < 0x7f &&
> - !strchr(ANON_VMA_NAME_INVALID_CHARS, ch);
> -}
> -
> -static int prctl_set_vma(unsigned long opt, unsigned long addr,
> - unsigned long size, unsigned long arg)
> -{
> - struct mm_struct *mm = current->mm;
> - const char __user *uname;
> - struct anon_vma_name *anon_name = NULL;
> - int error;
> -
> - switch (opt) {
> - case PR_SET_VMA_ANON_NAME:
> - uname = (const char __user *)arg;
> - if (uname) {
> - char *name, *pch;
> -
> - name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN);
> - if (IS_ERR(name))
> - return PTR_ERR(name);
> -
> - for (pch = name; *pch != '\0'; pch++) {
> - if (!is_valid_name_char(*pch)) {
> - kfree(name);
> - return -EINVAL;
> - }
> - }
> - /* anon_vma has its own copy */
> - anon_name = anon_vma_name_alloc(name);
> - kfree(name);
> - if (!anon_name)
> - return -ENOMEM;
> -
> - }
> -
> - mmap_write_lock(mm);
> - error = madvise_set_anon_name(mm, addr, size, anon_name);
> - mmap_write_unlock(mm);
> - anon_vma_name_put(anon_name);
> - break;
> - default:
> - error = -EINVAL;
> - }
> -
> - return error;
> -}
> -
> -#else /* CONFIG_ANON_VMA_NAME */
> -static int prctl_set_vma(unsigned long opt, unsigned long start,
> - unsigned long size, unsigned long arg)
> -{
> - return -EINVAL;
> -}
> -#endif /* CONFIG_ANON_VMA_NAME */
> -
> static inline unsigned long get_current_mdwe(void)
> {
> unsigned long ret = 0;
> diff --git a/mm/madvise.c b/mm/madvise.c
> index ae29395b4fc7f65a449c5772b1901a90f4195885..4a8e61e2c5025726bc2ce1f323768c5b25cef2c9 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -31,6 +31,7 @@
> #include <linux/swapops.h>
> #include <linux/shmem_fs.h>
> #include <linux/mmu_notifier.h>
> +#include <linux/prctl.h>
>
> #include <asm/tlb.h>
>
> @@ -134,8 +135,8 @@ static int replace_anon_vma_name(struct vm_area_struct *vma,
> return 0;
> }
>
> -int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
> - unsigned long len_in, struct anon_vma_name *anon_name)
> +static int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
> + unsigned long len_in, struct anon_vma_name *anon_name)
> {
> unsigned long end;
> unsigned long len;
> @@ -165,6 +166,60 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
> madv_behavior.range.end = end;
> return madvise_walk_vmas(&madv_behavior);
> }
> +
> +#define ANON_VMA_NAME_MAX_LEN 80
> +#define ANON_VMA_NAME_INVALID_CHARS "\\`$[]"
> +
> +static inline bool is_valid_name_char(char ch)
> +{
> + /* printable ascii characters, excluding ANON_VMA_NAME_INVALID_CHARS */
> + return ch > 0x1f && ch < 0x7f &&
> + !strchr(ANON_VMA_NAME_INVALID_CHARS, ch);
> +}
> +
> +int prctl_set_vma(unsigned long opt, unsigned long addr,
> + unsigned long size, unsigned long arg)
So I'd really really like to quarantine the absolutely disgusting prctl() stuff
in kernel/sys.c. I hate to see this opt, addr, size, arg yuckity yuck yuck here.
> +{
> + struct mm_struct *mm = current->mm;
> + const char __user *uname;
> + struct anon_vma_name *anon_name = NULL;
> + int error;
> +
> + switch (opt) {
> + case PR_SET_VMA_ANON_NAME:
So I'd like to copy just the below over to madvise - we can decide to move stuff
around _later_ since it's really weird to have all the anon_vma_name stuff live
in madvise (apart from the stuff in include/linux/mm-inline.h obv) - but I think
that can be a follow-up patch.
I'd like to then split out bits and pieces to make this less yucky too.
Maybe add anon_vma_name_from_user() grabbing the characters, doing the
strndup_user() etc., have it call a new anon_vma_name_validate() static function
which does the is_valid_name_char() check against all chars, etc.
> + uname = (const char __user *)arg;
> + if (uname) {
> + char *name, *pch;
> +
> + name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN);
> + if (IS_ERR(name))
> + return PTR_ERR(name);
> +
> + for (pch = name; *pch != '\0'; pch++) {
> + if (!is_valid_name_char(*pch)) {
> + kfree(name);
> + return -EINVAL;
> + }
> + }
> + /* anon_vma has its own copy */
> + anon_name = anon_vma_name_alloc(name);
Right now I find the fact that we do this in prctl() super gross. Same with
mmap_write_lock(), anon_vma_name_put() etc. etc. below. It's just mm logic in a
random place.
Obviously you're fixing this either way :) but just to make the point :P
> + kfree(name);
> + if (!anon_name)
> + return -ENOMEM;
> +
> + }
> +
> + mmap_write_lock(mm);
> + error = madvise_set_anon_name(mm, addr, size, anon_name);
> + mmap_write_unlock(mm);
> + anon_vma_name_put(anon_name);
> + break;
> + default:
> + error = -EINVAL;
> + }
> +
> + return error;
> +}
> #else /* CONFIG_ANON_VMA_NAME */
> static int replace_anon_vma_name(struct vm_area_struct *vma,
> struct anon_vma_name *anon_name)
>
> --
> 2.50.0
>
Powered by blists - more mailing lists