lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <p7nijnmkjljnevxdizul2iczzk33pk7o6rjahzm6wceldfpaom@jdj7o4zszgex>
Date: Mon, 28 Apr 2025 15:12:19 -0400
From: "Liam R. Howlett" <Liam.Howlett@...cle.com>
To: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>,
        Vlastimil Babka <vbabka@...e.cz>, Jann Horn <jannh@...gle.com>,
        Pedro Falcato <pfalcato@...e.de>, David Hildenbrand <david@...hat.com>,
        Kees Cook <kees@...nel.org>, Alexander Viro <viro@...iv.linux.org.uk>,
        Christian Brauner <brauner@...nel.org>, Jan Kara <jack@...e.cz>,
        Suren Baghdasaryan <surenb@...gle.com>, linux-mm@...ck.org,
        linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3 1/4] mm: establish mm/vma_exec.c for shared exec/mm
 VMA functionality

* Lorenzo Stoakes <lorenzo.stoakes@...cle.com> [250428 11:28]:
> There is functionality that overlaps the exec and memory mapping
> subsystems. While it properly belongs in mm, it is important that exec
> maintainers maintain oversight of this functionality correctly.
> 
> We can establish both goals by adding a new mm/vma_exec.c file which
> contains these 'glue' functions, and have fs/exec.c import them.
> 
> As a part of this change, to ensure that proper oversight is achieved, add
> the file to both the MEMORY MAPPING and EXEC & BINFMT API, ELF sections.
> 
> scripts/get_maintainer.pl can correctly handle files in multiple entries
> and this neatly handles the cross-over.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>

Reviewed-by: Liam R. Howlett <Liam.Howlett@...cle.com>

> ---
>  MAINTAINERS                      |  2 +
>  fs/exec.c                        |  3 ++
>  include/linux/mm.h               |  1 -
>  mm/Makefile                      |  2 +-
>  mm/mmap.c                        | 83 ----------------------------
>  mm/vma.h                         |  5 ++
>  mm/vma_exec.c                    | 92 ++++++++++++++++++++++++++++++++
>  tools/testing/vma/Makefile       |  2 +-
>  tools/testing/vma/vma.c          |  1 +
>  tools/testing/vma/vma_internal.h | 40 ++++++++++++++
>  10 files changed, 145 insertions(+), 86 deletions(-)
>  create mode 100644 mm/vma_exec.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index f5ee0390cdee..1ee1c22e6e36 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -8830,6 +8830,7 @@ F:	include/linux/elf.h
>  F:	include/uapi/linux/auxvec.h
>  F:	include/uapi/linux/binfmts.h
>  F:	include/uapi/linux/elf.h
> +F:	mm/vma_exec.c
>  F:	tools/testing/selftests/exec/
>  N:	asm/elf.h
>  N:	binfmt
> @@ -15654,6 +15655,7 @@ F:	mm/mremap.c
>  F:	mm/mseal.c
>  F:	mm/vma.c
>  F:	mm/vma.h
> +F:	mm/vma_exec.c
>  F:	mm/vma_internal.h
>  F:	tools/testing/selftests/mm/merge.c
>  F:	tools/testing/vma/
> diff --git a/fs/exec.c b/fs/exec.c
> index 8e4ea5f1e64c..477bc3f2e966 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -78,6 +78,9 @@
>  
>  #include <trace/events/sched.h>
>  
> +/* For vma exec functions. */
> +#include "../mm/internal.h"
> +
>  static int bprm_creds_from_file(struct linux_binprm *bprm);
>  
>  int suid_dumpable = 0;
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 21dd110b6655..4fc361df9ad7 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3223,7 +3223,6 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node);
>  extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
>  extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
>  extern void exit_mmap(struct mm_struct *);
> -int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift);
>  bool mmap_read_lock_maybe_expand(struct mm_struct *mm, struct vm_area_struct *vma,
>  				 unsigned long addr, bool write);
>  
> diff --git a/mm/Makefile b/mm/Makefile
> index 9d7e5b5bb694..15a901bb431a 100644
> --- a/mm/Makefile
> +++ b/mm/Makefile
> @@ -37,7 +37,7 @@ mmu-y			:= nommu.o
>  mmu-$(CONFIG_MMU)	:= highmem.o memory.o mincore.o \
>  			   mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \
>  			   msync.o page_vma_mapped.o pagewalk.o \
> -			   pgtable-generic.o rmap.o vmalloc.o vma.o
> +			   pgtable-generic.o rmap.o vmalloc.o vma.o vma_exec.o
>  
>  
>  ifdef CONFIG_CROSS_MEMORY_ATTACH
> diff --git a/mm/mmap.c b/mm/mmap.c
> index bd210aaf7ebd..1794bf6f4dc0 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1717,89 +1717,6 @@ static int __meminit init_reserve_notifier(void)
>  }
>  subsys_initcall(init_reserve_notifier);
>  
> -/*
> - * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between
> - * this VMA and its relocated range, which will now reside at [vma->vm_start -
> - * shift, vma->vm_end - shift).
> - *
> - * This function is almost certainly NOT what you want for anything other than
> - * early executable temporary stack relocation.
> - */
> -int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift)
> -{
> -	/*
> -	 * The process proceeds as follows:
> -	 *
> -	 * 1) Use shift to calculate the new vma endpoints.
> -	 * 2) Extend vma to cover both the old and new ranges.  This ensures the
> -	 *    arguments passed to subsequent functions are consistent.
> -	 * 3) Move vma's page tables to the new range.
> -	 * 4) Free up any cleared pgd range.
> -	 * 5) Shrink the vma to cover only the new range.
> -	 */
> -
> -	struct mm_struct *mm = vma->vm_mm;
> -	unsigned long old_start = vma->vm_start;
> -	unsigned long old_end = vma->vm_end;
> -	unsigned long length = old_end - old_start;
> -	unsigned long new_start = old_start - shift;
> -	unsigned long new_end = old_end - shift;
> -	VMA_ITERATOR(vmi, mm, new_start);
> -	VMG_STATE(vmg, mm, &vmi, new_start, old_end, 0, vma->vm_pgoff);
> -	struct vm_area_struct *next;
> -	struct mmu_gather tlb;
> -	PAGETABLE_MOVE(pmc, vma, vma, old_start, new_start, length);
> -
> -	BUG_ON(new_start > new_end);
> -
> -	/*
> -	 * ensure there are no vmas between where we want to go
> -	 * and where we are
> -	 */
> -	if (vma != vma_next(&vmi))
> -		return -EFAULT;
> -
> -	vma_iter_prev_range(&vmi);
> -	/*
> -	 * cover the whole range: [new_start, old_end)
> -	 */
> -	vmg.middle = vma;
> -	if (vma_expand(&vmg))
> -		return -ENOMEM;
> -
> -	/*
> -	 * move the page tables downwards, on failure we rely on
> -	 * process cleanup to remove whatever mess we made.
> -	 */
> -	pmc.for_stack = true;
> -	if (length != move_page_tables(&pmc))
> -		return -ENOMEM;
> -
> -	tlb_gather_mmu(&tlb, mm);
> -	next = vma_next(&vmi);
> -	if (new_end > old_start) {
> -		/*
> -		 * when the old and new regions overlap clear from new_end.
> -		 */
> -		free_pgd_range(&tlb, new_end, old_end, new_end,
> -			next ? next->vm_start : USER_PGTABLES_CEILING);
> -	} else {
> -		/*
> -		 * otherwise, clean from old_start; this is done to not touch
> -		 * the address space in [new_end, old_start) some architectures
> -		 * have constraints on va-space that make this illegal (IA64) -
> -		 * for the others its just a little faster.
> -		 */
> -		free_pgd_range(&tlb, old_start, old_end, new_end,
> -			next ? next->vm_start : USER_PGTABLES_CEILING);
> -	}
> -	tlb_finish_mmu(&tlb);
> -
> -	vma_prev(&vmi);
> -	/* Shrink the vma to just the new range */
> -	return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
> -}
> -
>  #ifdef CONFIG_MMU
>  /*
>   * Obtain a read lock on mm->mmap_lock, if the specified address is below the
> diff --git a/mm/vma.h b/mm/vma.h
> index 149926e8a6d1..1ce3e18f01b7 100644
> --- a/mm/vma.h
> +++ b/mm/vma.h
> @@ -548,4 +548,9 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address);
>  
>  int __vm_munmap(unsigned long start, size_t len, bool unlock);
>  
> +/* vma_exec.h */
> +#ifdef CONFIG_MMU
> +int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift);
> +#endif
> +
>  #endif	/* __MM_VMA_H */
> diff --git a/mm/vma_exec.c b/mm/vma_exec.c
> new file mode 100644
> index 000000000000..6736ae37f748
> --- /dev/null
> +++ b/mm/vma_exec.c
> @@ -0,0 +1,92 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +/*
> + * Functions explicitly implemented for exec functionality which however are
> + * explicitly VMA-only logic.
> + */
> +
> +#include "vma_internal.h"
> +#include "vma.h"
> +
> +/*
> + * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between
> + * this VMA and its relocated range, which will now reside at [vma->vm_start -
> + * shift, vma->vm_end - shift).
> + *
> + * This function is almost certainly NOT what you want for anything other than
> + * early executable temporary stack relocation.
> + */
> +int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift)
> +{
> +	/*
> +	 * The process proceeds as follows:
> +	 *
> +	 * 1) Use shift to calculate the new vma endpoints.
> +	 * 2) Extend vma to cover both the old and new ranges.  This ensures the
> +	 *    arguments passed to subsequent functions are consistent.
> +	 * 3) Move vma's page tables to the new range.
> +	 * 4) Free up any cleared pgd range.
> +	 * 5) Shrink the vma to cover only the new range.
> +	 */
> +
> +	struct mm_struct *mm = vma->vm_mm;
> +	unsigned long old_start = vma->vm_start;
> +	unsigned long old_end = vma->vm_end;
> +	unsigned long length = old_end - old_start;
> +	unsigned long new_start = old_start - shift;
> +	unsigned long new_end = old_end - shift;
> +	VMA_ITERATOR(vmi, mm, new_start);
> +	VMG_STATE(vmg, mm, &vmi, new_start, old_end, 0, vma->vm_pgoff);
> +	struct vm_area_struct *next;
> +	struct mmu_gather tlb;
> +	PAGETABLE_MOVE(pmc, vma, vma, old_start, new_start, length);
> +
> +	BUG_ON(new_start > new_end);
> +
> +	/*
> +	 * ensure there are no vmas between where we want to go
> +	 * and where we are
> +	 */
> +	if (vma != vma_next(&vmi))
> +		return -EFAULT;
> +
> +	vma_iter_prev_range(&vmi);
> +	/*
> +	 * cover the whole range: [new_start, old_end)
> +	 */
> +	vmg.middle = vma;
> +	if (vma_expand(&vmg))
> +		return -ENOMEM;
> +
> +	/*
> +	 * move the page tables downwards, on failure we rely on
> +	 * process cleanup to remove whatever mess we made.
> +	 */
> +	pmc.for_stack = true;
> +	if (length != move_page_tables(&pmc))
> +		return -ENOMEM;
> +
> +	tlb_gather_mmu(&tlb, mm);
> +	next = vma_next(&vmi);
> +	if (new_end > old_start) {
> +		/*
> +		 * when the old and new regions overlap clear from new_end.
> +		 */
> +		free_pgd_range(&tlb, new_end, old_end, new_end,
> +			next ? next->vm_start : USER_PGTABLES_CEILING);
> +	} else {
> +		/*
> +		 * otherwise, clean from old_start; this is done to not touch
> +		 * the address space in [new_end, old_start) some architectures
> +		 * have constraints on va-space that make this illegal (IA64) -
> +		 * for the others its just a little faster.
> +		 */
> +		free_pgd_range(&tlb, old_start, old_end, new_end,
> +			next ? next->vm_start : USER_PGTABLES_CEILING);
> +	}
> +	tlb_finish_mmu(&tlb);
> +
> +	vma_prev(&vmi);
> +	/* Shrink the vma to just the new range */
> +	return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
> +}
> diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile
> index 860fd2311dcc..624040fcf193 100644
> --- a/tools/testing/vma/Makefile
> +++ b/tools/testing/vma/Makefile
> @@ -9,7 +9,7 @@ include ../shared/shared.mk
>  OFILES = $(SHARED_OFILES) vma.o maple-shim.o
>  TARGETS = vma
>  
> -vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma.h
> +vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma_exec.c ../../../mm/vma.h
>  
>  vma:	$(OFILES)
>  	$(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS)
> diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
> index 7cfd6e31db10..5832ae5d797d 100644
> --- a/tools/testing/vma/vma.c
> +++ b/tools/testing/vma/vma.c
> @@ -28,6 +28,7 @@ unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
>   * Directly import the VMA implementation here. Our vma_internal.h wrapper
>   * provides userland-equivalent functionality for everything vma.c uses.
>   */
> +#include "../../../mm/vma_exec.c"
>  #include "../../../mm/vma.c"
>  
>  const struct vm_operations_struct vma_dummy_vm_ops;
> diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
> index 572ab2cea763..0df19ca0000a 100644
> --- a/tools/testing/vma/vma_internal.h
> +++ b/tools/testing/vma/vma_internal.h
> @@ -421,6 +421,28 @@ struct vm_unmapped_area_info {
>  	unsigned long start_gap;
>  };
>  
> +struct pagetable_move_control {
> +	struct vm_area_struct *old; /* Source VMA. */
> +	struct vm_area_struct *new; /* Destination VMA. */
> +	unsigned long old_addr; /* Address from which the move begins. */
> +	unsigned long old_end; /* Exclusive address at which old range ends. */
> +	unsigned long new_addr; /* Address to move page tables to. */
> +	unsigned long len_in; /* Bytes to remap specified by user. */
> +
> +	bool need_rmap_locks; /* Do rmap locks need to be taken? */
> +	bool for_stack; /* Is this an early temp stack being moved? */
> +};
> +
> +#define PAGETABLE_MOVE(name, old_, new_, old_addr_, new_addr_, len_)	\
> +	struct pagetable_move_control name = {				\
> +		.old = old_,						\
> +		.new = new_,						\
> +		.old_addr = old_addr_,					\
> +		.old_end = (old_addr_) + (len_),			\
> +		.new_addr = new_addr_,					\
> +		.len_in = len_,						\
> +	}
> +
>  static inline void vma_iter_invalidate(struct vma_iterator *vmi)
>  {
>  	mas_pause(&vmi->mas);
> @@ -1240,4 +1262,22 @@ static inline int mapping_map_writable(struct address_space *mapping)
>  	return 0;
>  }
>  
> +static inline unsigned long move_page_tables(struct pagetable_move_control *pmc)
> +{
> +	(void)pmc;
> +
> +	return 0;
> +}
> +
> +static inline void free_pgd_range(struct mmu_gather *tlb,
> +			unsigned long addr, unsigned long end,
> +			unsigned long floor, unsigned long ceiling)
> +{
> +	(void)tlb;
> +	(void)addr;
> +	(void)end;
> +	(void)floor;
> +	(void)ceiling;
> +}
> +
>  #endif	/* __MM_VMA_INTERNAL_H */
> -- 
> 2.49.0
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ