[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <hjd226olq54jbxgxp65j4nexqflrmkqrrgzwfbdpjkwnqg4e4j@v4fev4d7yfhn>
Date: Fri, 3 Jan 2025 11:22:58 -0500
From: "Liam R. Howlett" <Liam.Howlett@...cle.com>
To: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>,
Thomas Bogendoerfer <tsbogend@...ha.franken.de>,
Vlastimil Babka <vbabka@...e.cz>, Jann Horn <jannh@...gle.com>,
linux-mm@...ck.org, linux-kernel@...r.kernel.org,
linux-mips@...r.kernel.org
Subject: Re: [PATCH 2/2] mm: make mmap_region() internal
* Lorenzo Stoakes <lorenzo.stoakes@...cle.com> [250102 07:11]:
> Now that we have removed the one user of mmap_region() outside of mm, make
> it internal and add it to vma.c so it can be userland tested.
>
> This ensures that all external memory mappings are performed using the
> appropriate interfaces and allows us to modify memory mapping logic as we
> see fit.
>
> Additionally expand test stubs to allow for the mmap_region() code to
> compile and be userland testable.
>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@...cle.com>
> ---
> include/linux/mm.h | 3 --
> mm/mmap.c | 59 -----------------------------
> mm/vma.c | 61 +++++++++++++++++++++++++++++-
> mm/vma.h | 2 +-
> tools/testing/vma/vma_internal.h | 65 ++++++++++++++++++++++++++++++++
> 5 files changed, 126 insertions(+), 64 deletions(-)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 6939c2a8d90f..1a11f9df5c2d 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3446,9 +3446,6 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
> return __get_unmapped_area(file, addr, len, pgoff, flags, 0);
> }
>
> -extern unsigned long mmap_region(struct file *file, unsigned long addr,
> - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
> - struct list_head *uf);
> extern unsigned long do_mmap(struct file *file, unsigned long addr,
> unsigned long len, unsigned long prot, unsigned long flags,
> vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 7fdc4207fe98..7aa36216ecc0 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1072,65 +1072,6 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
> return do_vmi_munmap(&vmi, mm, start, len, uf, false);
> }
>
> -/**
> - * mmap_region() - Actually perform the userland mapping of a VMA into
> - * current->mm with known, aligned and overflow-checked @addr and @len, and
> - * correctly determined VMA flags @vm_flags and page offset @pgoff.
> - *
> - * This is an internal memory management function, and should not be used
> - * directly.
> - *
> - * The caller must write-lock current->mm->mmap_lock.
> - *
> - * @file: If a file-backed mapping, a pointer to the struct file describing the
> - * file to be mapped, otherwise NULL.
> - * @addr: The page-aligned address at which to perform the mapping.
> - * @len: The page-aligned, non-zero, length of the mapping.
> - * @vm_flags: The VMA flags which should be applied to the mapping.
> - * @pgoff: If @file is specified, the page offset into the file, if not then
> - * the virtual page offset in memory of the anonymous mapping.
> - * @uf: Optionally, a pointer to a list head used for tracking userfaultfd unmap
> - * events.
> - *
> - * Returns: Either an error, or the address at which the requested mapping has
> - * been performed.
> - */
> -unsigned long mmap_region(struct file *file, unsigned long addr,
> - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
> - struct list_head *uf)
> -{
> - unsigned long ret;
> - bool writable_file_mapping = false;
> -
> - mmap_assert_write_locked(current->mm);
> -
> - /* Check to see if MDWE is applicable. */
> - if (map_deny_write_exec(vm_flags, vm_flags))
> - return -EACCES;
> -
> - /* Allow architectures to sanity-check the vm_flags. */
> - if (!arch_validate_flags(vm_flags))
> - return -EINVAL;
> -
> - /* Map writable and ensure this isn't a sealed memfd. */
> - if (file && is_shared_maywrite(vm_flags)) {
> - int error = mapping_map_writable(file->f_mapping);
> -
> - if (error)
> - return error;
> - writable_file_mapping = true;
> - }
> -
> - ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf);
> -
> - /* Clear our write mapping regardless of error. */
> - if (writable_file_mapping)
> - mapping_unmap_writable(file->f_mapping);
> -
> - validate_mm(current->mm);
> - return ret;
> -}
> -
> int vm_munmap(unsigned long start, size_t len)
> {
> return __vm_munmap(start, len, false);
> diff --git a/mm/vma.c b/mm/vma.c
> index e37eb384d118..4cf2acc378ba 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -2427,7 +2427,7 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
> vma_set_page_prot(vma);
> }
>
> -unsigned long __mmap_region(struct file *file, unsigned long addr,
> +static unsigned long __mmap_region(struct file *file, unsigned long addr,
> unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
> struct list_head *uf)
> {
> @@ -2479,6 +2479,65 @@ unsigned long __mmap_region(struct file *file, unsigned long addr,
> return error;
> }
>
> +/**
> + * mmap_region() - Actually perform the userland mapping of a VMA into
> + * current->mm with known, aligned and overflow-checked @addr and @len, and
> + * correctly determined VMA flags @vm_flags and page offset @pgoff.
> + *
> + * This is an internal memory management function, and should not be used
> + * directly.
> + *
> + * The caller must write-lock current->mm->mmap_lock.
> + *
> + * @file: If a file-backed mapping, a pointer to the struct file describing the
> + * file to be mapped, otherwise NULL.
> + * @addr: The page-aligned address at which to perform the mapping.
> + * @len: The page-aligned, non-zero, length of the mapping.
> + * @vm_flags: The VMA flags which should be applied to the mapping.
> + * @pgoff: If @file is specified, the page offset into the file, if not then
> + * the virtual page offset in memory of the anonymous mapping.
> + * @uf: Optionally, a pointer to a list head used for tracking userfaultfd unmap
> + * events.
> + *
> + * Returns: Either an error, or the address at which the requested mapping has
> + * been performed.
> + */
> +unsigned long mmap_region(struct file *file, unsigned long addr,
> + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
> + struct list_head *uf)
> +{
> + unsigned long ret;
> + bool writable_file_mapping = false;
> +
> + mmap_assert_write_locked(current->mm);
> +
> + /* Check to see if MDWE is applicable. */
> + if (map_deny_write_exec(vm_flags, vm_flags))
> + return -EACCES;
> +
> + /* Allow architectures to sanity-check the vm_flags. */
> + if (!arch_validate_flags(vm_flags))
> + return -EINVAL;
> +
> + /* Map writable and ensure this isn't a sealed memfd. */
> + if (file && is_shared_maywrite(vm_flags)) {
> + int error = mapping_map_writable(file->f_mapping);
> +
> + if (error)
> + return error;
> + writable_file_mapping = true;
> + }
> +
> + ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf);
> +
> + /* Clear our write mapping regardless of error. */
> + if (writable_file_mapping)
> + mapping_unmap_writable(file->f_mapping);
> +
> + validate_mm(current->mm);
> + return ret;
> +}
> +
> /*
> * do_brk_flags() - Increase the brk vma if the flags match.
> * @vmi: The vma iterator
> diff --git a/mm/vma.h b/mm/vma.h
> index d6803626151d..41bb52594ffd 100644
> --- a/mm/vma.h
> +++ b/mm/vma.h
> @@ -245,7 +245,7 @@ bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
> int mm_take_all_locks(struct mm_struct *mm);
> void mm_drop_all_locks(struct mm_struct *mm);
>
> -unsigned long __mmap_region(struct file *file, unsigned long addr,
> +unsigned long mmap_region(struct file *file, unsigned long addr,
> unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
> struct list_head *uf);
>
> diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
> index c7c580ec9a2d..49a85ce0d45a 100644
> --- a/tools/testing/vma/vma_internal.h
> +++ b/tools/testing/vma/vma_internal.h
> @@ -41,6 +41,8 @@ extern unsigned long dac_mmap_min_addr;
> #define VM_BUG_ON(_expr) (BUG_ON(_expr))
> #define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
>
> +#define MMF_HAS_MDWE 28
> +
> #define VM_NONE 0x00000000
> #define VM_READ 0x00000001
> #define VM_WRITE 0x00000002
> @@ -222,6 +224,8 @@ struct mm_struct {
> unsigned long stack_vm; /* VM_STACK */
>
> unsigned long def_flags;
> +
> + unsigned long flags; /* Must use atomic bitops to access */
> };
>
> struct file {
> @@ -1176,4 +1180,65 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
> {
> }
>
> +/*
> + * Denies creating a writable executable mapping or gaining executable permissions.
> + *
> + * This denies the following:
> + *
> + * a) mmap(PROT_WRITE | PROT_EXEC)
> + *
> + * b) mmap(PROT_WRITE)
> + * mprotect(PROT_EXEC)
> + *
> + * c) mmap(PROT_WRITE)
> + * mprotect(PROT_READ)
> + * mprotect(PROT_EXEC)
> + *
> + * But allows the following:
> + *
> + * d) mmap(PROT_READ | PROT_EXEC)
> + * mmap(PROT_READ | PROT_EXEC | PROT_BTI)
> + *
> + * This is only applicable if the user has set the Memory-Deny-Write-Execute
> + * (MDWE) protection mask for the current process.
> + *
> + * @old specifies the VMA flags the VMA originally possessed, and @new the ones
> + * we propose to set.
> + *
> + * Return: false if proposed change is OK, true if not ok and should be denied.
> + */
> +static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
> +{
> + /* If MDWE is disabled, we have nothing to deny. */
> + if (!test_bit(MMF_HAS_MDWE, ¤t->mm->flags))
> + return false;
> +
> + /* If the new VMA is not executable, we have nothing to deny. */
> + if (!(new & VM_EXEC))
> + return false;
> +
> + /* Under MDWE we do not accept newly writably executable VMAs... */
> + if (new & VM_WRITE)
> + return true;
> +
> + /* ...nor previously non-executable VMAs becoming executable. */
> + if (!(old & VM_EXEC))
> + return true;
> +
> + return false;
> +}
> +
> +static inline int mapping_map_writable(struct address_space *mapping)
> +{
> + int c = atomic_read(&mapping->i_mmap_writable);
> +
> + /* Derived from the raw_atomic_inc_unless_negative() implementation. */
> + do {
> + if (c < 0)
> + return -EPERM;
> + } while (!__sync_bool_compare_and_swap(&mapping->i_mmap_writable, c, c+1));
> +
> + return 0;
> +}
> +
> #endif /* __MM_VMA_INTERNAL_H */
> --
> 2.47.1
>
Powered by blists - more mailing lists