lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ac4301b5-6f82-49f2-9c71-7c4c015d48f7@linux.dev>
Date: Tue, 20 May 2025 11:55:20 +0800
From: Chengming Zhou <chengming.zhou@...ux.dev>
To: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
 Andrew Morton <akpm@...ux-foundation.org>
Cc: Alexander Viro <viro@...iv.linux.org.uk>,
 Christian Brauner <brauner@...nel.org>, Jan Kara <jack@...e.cz>,
 "Liam R . Howlett" <Liam.Howlett@...cle.com>,
 Vlastimil Babka <vbabka@...e.cz>, Jann Horn <jannh@...gle.com>,
 Pedro Falcato <pfalcato@...e.de>, David Hildenbrand <david@...hat.com>,
 Xu Xin <xu.xin16@....com.cn>, linux-mm@...ck.org,
 linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org
Subject: Re: [PATCH 3/4] mm: prevent KSM from completely breaking VMA merging

On 2025/5/19 16:51, Lorenzo Stoakes wrote:
> If a user wishes to enable KSM mergeability for an entire process and all
> fork/exec'd processes that come after it, they use the prctl()
> PR_SET_MEMORY_MERGE operation.
> 
> This defaults all newly mapped VMAs to have the VM_MERGEABLE VMA flag set
> (in order to indicate they are KSM mergeable), as well as setting this flag
> for all existing VMAs.
> 
> However it also entirely and completely breaks VMA merging for the process
> and all forked (and fork/exec'd) processes.
> 
> This is because when a new mapping is proposed, the flags specified will
> never have VM_MERGEABLE set. However all adjacent VMAs will already have
> VM_MERGEABLE set, rendering VMAs unmergeable by default.
> 
> To work around this, we try to set the VM_MERGEABLE flag prior to
> attempting a merge. In the case of brk() this can always be done.
> 
> However on mmap() things are more complicated - while KSM is not supported
> for file-backed mappings, it is supported for MAP_PRIVATE file-backed
> mappings.
> 
> And these mappings may have deprecated .mmap() callbacks specified which
> could, in theory, adjust flags and thus KSM merge eligiblity.
> 
> So we check to determine whether this at all possible. If not, we set
> VM_MERGEABLE prior to the merge attempt on mmap(), otherwise we retain the
> previous behaviour.
> 
> When .mmap_prepare() is more widely used, we can remove this precaution.
> 
> While this doesn't quite cover all cases, it covers a great many (all
> anonymous memory, for instance), meaning we should already see a
> significant improvement in VMA mergeability.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>

Looks good to me with the build fix. And it seems that ksm_add_vma()
is not used anymore..

Reviewed-by: Chengming Zhou <chengming.zhou@...ux.dev>

Thanks!

> ---
>   include/linux/ksm.h |  4 ++--
>   mm/ksm.c            | 20 ++++++++++++------
>   mm/vma.c            | 49 +++++++++++++++++++++++++++++++++++++++++++--
>   3 files changed, 63 insertions(+), 10 deletions(-)
> 
> diff --git a/include/linux/ksm.h b/include/linux/ksm.h
> index d73095b5cd96..ba5664daca6e 100644
> --- a/include/linux/ksm.h
> +++ b/include/linux/ksm.h
> @@ -17,8 +17,8 @@
>   #ifdef CONFIG_KSM
>   int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
>   		unsigned long end, int advice, unsigned long *vm_flags);
> -
> -void ksm_add_vma(struct vm_area_struct *vma);
> +vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
> +			 vm_flags_t vm_flags);
>   int ksm_enable_merge_any(struct mm_struct *mm);
>   int ksm_disable_merge_any(struct mm_struct *mm);
>   int ksm_disable(struct mm_struct *mm);
> diff --git a/mm/ksm.c b/mm/ksm.c
> index d0c763abd499..022af14a95ea 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -2731,16 +2731,24 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
>   	return 0;
>   }
>   /**
> - * ksm_add_vma - Mark vma as mergeable if compatible
> + * ksm_vma_flags - Update VMA flags to mark as mergeable if compatible
>    *
> - * @vma:  Pointer to vma
> + * @mm:       Proposed VMA's mm_struct
> + * @file:     Proposed VMA's file-backed mapping, if any.
> + * @vm_flags: Proposed VMA"s flags.
> + *
> + * Returns: @vm_flags possibly updated to mark mergeable.
>    */
> -void ksm_add_vma(struct vm_area_struct *vma)
> +vm_flags_t ksm_vma_flags(const struct mm_struct *mm, const struct file *file,
> +			 vm_flags_t vm_flags)
>   {
> -	struct mm_struct *mm = vma->vm_mm;
> +	vm_flags_t ret = vm_flags;
>   
> -	if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
> -		__ksm_add_vma(vma);
> +	if (test_bit(MMF_VM_MERGE_ANY, &mm->flags) &&
> +	    __ksm_should_add_vma(file, vm_flags))
> +		ret |= VM_MERGEABLE;
> +
> +	return ret;
>   }
>   
>   static void ksm_add_vmas(struct mm_struct *mm)
> diff --git a/mm/vma.c b/mm/vma.c
> index 3ff6cfbe3338..5bebe55ea737 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -2482,7 +2482,6 @@ static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
>   	 */
>   	if (!vma_is_anonymous(vma))
>   		khugepaged_enter_vma(vma, map->flags);
> -	ksm_add_vma(vma);
>   	*vmap = vma;
>   	return 0;
>   
> @@ -2585,6 +2584,45 @@ static void set_vma_user_defined_fields(struct vm_area_struct *vma,
>   	vma->vm_private_data = map->vm_private_data;
>   }
>   
> +static void update_ksm_flags(struct mmap_state *map)
> +{
> +	map->flags = ksm_vma_flags(map->mm, map->file, map->flags);
> +}
> +
> +/*
> + * Are we guaranteed no driver can change state such as to preclude KSM merging?
> + * If so, let's set the KSM mergeable flag early so we don't break VMA merging.
> + *
> + * This is applicable when PR_SET_MEMORY_MERGE has been set on the mm_struct via
> + * prctl() causing newly mapped VMAs to have the KSM mergeable VMA flag set.
> + *
> + * If this is not the case, then we set the flag after considering mergeability,
> + * which will prevent mergeability as, when PR_SET_MEMORY_MERGE is set, a new
> + * VMA will not have the KSM mergeability VMA flag set, but all other VMAs will,
> + * preventing any merge.
> + */
> +static bool can_set_ksm_flags_early(struct mmap_state *map)
> +{
> +	struct file *file = map->file;
> +
> +	/* Anonymous mappings have no driver which can change them. */
> +	if (!file)
> +		return true;
> +
> +	/* shmem is safe. */
> +	if (shmem_file(file))
> +		return true;
> +
> +	/*
> +	 * If .mmap_prepare() is specified, then the driver will have already
> +	 * manipulated state prior to updating KSM flags.
> +	 */
> +	if (file->f_op->mmap_prepare)
> +		return true;
> +
> +	return false;
> +}
> +
>   static unsigned long __mmap_region(struct file *file, unsigned long addr,
>   		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
>   		struct list_head *uf)
> @@ -2595,6 +2633,7 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
>   	bool have_mmap_prepare = file && file->f_op->mmap_prepare;
>   	VMA_ITERATOR(vmi, mm, addr);
>   	MMAP_STATE(map, mm, &vmi, addr, len, pgoff, vm_flags, file);
> +	bool check_ksm_early = can_set_ksm_flags_early(&map);
>   
>   	error = __mmap_prepare(&map, uf);
>   	if (!error && have_mmap_prepare)
> @@ -2602,6 +2641,9 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
>   	if (error)
>   		goto abort_munmap;
>   
> +	if (check_ksm_early)
> +		update_ksm_flags(&map);
> +
>   	/* Attempt to merge with adjacent VMAs... */
>   	if (map.prev || map.next) {
>   		VMG_MMAP_STATE(vmg, &map, /* vma = */ NULL);
> @@ -2611,6 +2653,9 @@ static unsigned long __mmap_region(struct file *file, unsigned long addr,
>   
>   	/* ...but if we can't, allocate a new VMA. */
>   	if (!vma) {
> +		if (!check_ksm_early)
> +			update_ksm_flags(&map);
> +
>   		error = __mmap_new_vma(&map, &vma);
>   		if (error)
>   			goto unacct_error;
> @@ -2713,6 +2758,7 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
>   	 * Note: This happens *after* clearing old mappings in some code paths.
>   	 */
>   	flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
> +	flags = ksm_vma_flags(mm, NULL, flags);
>   	if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
>   		return -ENOMEM;
>   
> @@ -2756,7 +2802,6 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
>   
>   	mm->map_count++;
>   	validate_mm(mm);
> -	ksm_add_vma(vma);
>   out:
>   	perf_event_mmap(vma);
>   	mm->total_vm += len >> PAGE_SHIFT;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ