lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220218195729.oa5olrcsq6yox7hp@revolver>
Date:   Fri, 18 Feb 2022 19:57:39 +0000
From:   Liam Howlett <liam.howlett@...cle.com>
To:     Jakub Matěna <matenajakub@...il.com>
CC:     "linux-mm@...ck.org" <linux-mm@...ck.org>,
        "patches@...ts.linux.dev" <patches@...ts.linux.dev>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        "vbabka@...e.cz" <vbabka@...e.cz>,
        "mhocko@...nel.org" <mhocko@...nel.org>,
        "mgorman@...hsingularity.net" <mgorman@...hsingularity.net>,
        "willy@...radead.org" <willy@...radead.org>,
        "hughd@...gle.com" <hughd@...gle.com>,
        "kirill@...temov.name" <kirill@...temov.name>,
        "riel@...riel.com" <riel@...riel.com>,
        "rostedt@...dmis.org" <rostedt@...dmis.org>,
        "peterz@...radead.org" <peterz@...radead.org>
Subject: Re: [RFC PATCH 4/4] [PATCH 4/4] mm: add tracing for VMA merges

* Jakub Matěna <matenajakub@...il.com> [220218 07:21]:
> Adds trace support for vma_merge to measure successful and unsuccessful
> merges of two VMAs with distinct anon_vmas and also trace support for
> merges made possible by update of page offset made possible by a previous
> patch in this series.
> 
> Signed-off-by: Jakub Matěna <matenajakub@...il.com>
> ---
>  include/trace/events/mmap.h | 55 ++++++++++++++++++++++++++++++++
>  mm/internal.h               | 11 +++++++
>  mm/mmap.c                   | 63 ++++++++++++++++++++-----------------
>  3 files changed, 100 insertions(+), 29 deletions(-)
> 
> diff --git a/include/trace/events/mmap.h b/include/trace/events/mmap.h
> index 4661f7ba07c0..9f6439e2ed2d 100644
> --- a/include/trace/events/mmap.h
> +++ b/include/trace/events/mmap.h
> @@ -6,6 +6,7 @@
>  #define _TRACE_MMAP_H
>  
>  #include <linux/tracepoint.h>
> +#include <../mm/internal.h>
>  
>  TRACE_EVENT(vm_unmapped_area,
>  
> @@ -42,6 +43,60 @@ TRACE_EVENT(vm_unmapped_area,
>  		__entry->low_limit, __entry->high_limit, __entry->align_mask,
>  		__entry->align_offset)
>  );
> +
> +TRACE_EVENT(vm_av_merge,
> +
> +	TP_PROTO(int merged, enum vma_merge_res merge_prev, enum vma_merge_res merge_next, enum vma_merge_res merge_both),
> +
> +	TP_ARGS(merged, merge_prev, merge_next, merge_both),
> +
> +	TP_STRUCT__entry(
> +		__field(int,			merged)
> +		__field(enum vma_merge_res,	predecessor_different_av)
> +		__field(enum vma_merge_res,	successor_different_av)
> +		__field(enum vma_merge_res,	predecessor_with_successor_different_av)
> +		__field(int,			diff_count)
> +		__field(int,			failed_count)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->merged = merged == 0;
> +		__entry->predecessor_different_av = merge_prev;
> +		__entry->successor_different_av = merge_next;
> +		__entry->predecessor_with_successor_different_av = merge_both;
> +		__entry->diff_count = (merge_prev == AV_MERGE_DIFFERENT)
> +		+ (merge_next == AV_MERGE_DIFFERENT) + (merge_both == AV_MERGE_DIFFERENT);
> +		__entry->failed_count = (merge_prev == AV_MERGE_FAILED)
> +		+ (merge_next == AV_MERGE_FAILED) + (merge_both == AV_MERGE_FAILED);
> +	),
> +
> +	TP_printk("merged=%d predecessor=%d successor=%d predecessor_with_successor=%d diff_count=%d failed_count=%d\n",
> +		__entry->merged,
> +		__entry->predecessor_different_av, __entry->successor_different_av,
> +		__entry->predecessor_with_successor_different_av,
> +		__entry->diff_count, __entry->failed_count)
> +
> +);
> +
> +TRACE_EVENT(vm_pgoff_merge,
> +
> +	TP_PROTO(struct vm_area_struct *vma, bool anon_pgoff_updated),
> +
> +	TP_ARGS(vma, anon_pgoff_updated),
> +
> +	TP_STRUCT__entry(
> +		__field(bool,	faulted)
> +		__field(bool,	updated)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->faulted = vma->anon_vma;
> +		__entry->updated = anon_pgoff_updated;
> +	),
> +
> +	TP_printk("faulted=%d updated=%d\n",
> +		__entry->faulted, __entry->updated)
> +);
>  #endif
>  
>  /* This part must be outside protection */
> diff --git a/mm/internal.h b/mm/internal.h
> index d80300392a19..b3e482175518 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -34,6 +34,17 @@ struct folio_batch;
>  /* Do not use these with a slab allocator */
>  #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
>  
> +/*
> + * Following values indicate reason for merge success or failure.
> + */
> +enum vma_merge_res {
> +	MERGE_FAILED,
> +	AV_MERGE_FAILED,
> +	AV_MERGE_SAME,
> +	MERGE_OK = AV_MERGE_SAME,
> +	AV_MERGE_DIFFERENT,
> +};
> +
>  void page_writeback_init(void);
>  
>  static inline void *folio_raw_mapping(struct folio *folio)
> diff --git a/mm/mmap.c b/mm/mmap.c
> index ed91d0cd2111..10c76c6a3288 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1064,21 +1064,21 @@ static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
>  	 */
>  	if ((!anon_vma1 || !anon_vma2) && (!vma ||
>  		list_is_singular(&vma->anon_vma_chain)))
> -		return 1;
> +		return AV_MERGE_SAME;
>  	if (anon_vma1 == anon_vma2)
> -		return 1;
> +		return AV_MERGE_SAME;
>  	/*
>  	 * Different anon_vma but not shared by several processes
>  	 */
>  	else if ((anon_vma1 && anon_vma2) &&
>  			(anon_vma1 == anon_vma1->root)
>  			&& (rbt_no_children(anon_vma1)))
> -		return 1;
> +		return AV_MERGE_DIFFERENT;
>  	/*
>  	 * Different anon_vma and shared -> unmergeable
>  	 */
>  	else
> -		return 0;
> +		return AV_MERGE_FAILED;
>  }
>  
>  /*
> @@ -1099,12 +1099,10 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
>  		     struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
>  		     const char *anon_name)
>  {
> -	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
> -	    is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
> +	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name))
>  		if (vma->vm_pgoff == vm_pgoff)
> -			return 1;
> -	}
> -	return 0;
> +			return is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma);
> +	return MERGE_FAILED;
>  }
>  
>  /*
> @@ -1121,14 +1119,13 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
>  		    struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
>  		    const char *anon_name)
>  {
> -	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
> -	    is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
> +	if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name)) {
>  		pgoff_t vm_pglen;
>  		vm_pglen = vma_pages(vma);
>  		if (vma->vm_pgoff + vm_pglen == vm_pgoff)
> -			return 1;
> +			return is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma);
>  	}
> -	return 0;
> +	return MERGE_FAILED;
>  }
>  
>  /*
> @@ -1185,9 +1182,14 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
>  	pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
>  	struct vm_area_struct *area, *next;
>  	int err;
> -	int merge_prev = 0;
> -	int merge_both = 0;
> -	int merge_next = 0;
> +	/*
> +	 * Following three variables are used to store values
> +	 * indicating wheather this VMA and its anon_vma can
> +	 * be merged and also the type of failure or success.
> +	 */
> +	enum vma_merge_res merge_prev = MERGE_FAILED;
> +	enum vma_merge_res merge_both = MERGE_FAILED;
> +	enum vma_merge_res merge_next = MERGE_FAILED;
>  
>  	/*
>  	 * We later require that vma->vm_flags == vm_flags,
> @@ -1210,38 +1212,39 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
>  	 * Can we merge predecessor?
>  	 */
>  	if (prev && prev->vm_end == addr &&
> -			mpol_equal(vma_policy(prev), policy) &&
> -			can_vma_merge_after(prev, vm_flags,
> +			mpol_equal(vma_policy(prev), policy)) {
> +		merge_prev = can_vma_merge_after(prev, vm_flags,
>  					    anon_vma, file, pgoff,
> -					    vm_userfaultfd_ctx, anon_name)) {
> -		merge_prev = true;
> +					    vm_userfaultfd_ctx, anon_name);
>  	}
> +
>  	/*
>  	 * Can we merge successor?
>  	 */
>  	if (next && end == next->vm_start &&
> -			mpol_equal(policy, vma_policy(next)) &&
> -			can_vma_merge_before(next, vm_flags,
> +			mpol_equal(policy, vma_policy(next))) {
> +		merge_next = can_vma_merge_before(next, vm_flags,
>  					anon_vma, file, pgoff+pglen,
> -					vm_userfaultfd_ctx, anon_name)) {
> -		merge_next = true;
> +					vm_userfaultfd_ctx, anon_name);
>  	}
> +
>  	/*
>  	 * Can we merge both predecessor and successor?
>  	 */
> -	if (merge_prev && merge_next)
> +	if (merge_prev >= MERGE_OK && merge_next >= MERGE_OK) {

What happened to making vma_merge easier to read?  What does > MERGE_OK
mean?  I guess this answers why booleans were not used, but I don't like
it.   Couldn't you just log the err/success and the value of
merge_prev/merge_next?  It's not like the code tries more than one way
of merging on failure..

>  		merge_both = is_mergeable_anon_vma(prev->anon_vma, next->anon_vma, NULL);
> +	}
>  
> -	if (merge_both) {	 /* cases 1, 6 */
> +	if (merge_both >= MERGE_OK) {	 /* cases 1, 6 */
>  		err = __vma_adjust(prev, prev->vm_start,
>  					next->vm_end, prev->vm_pgoff, NULL,
>  					prev);
>  		area = prev;
> -	} else if (merge_prev) {			/* cases 2, 5, 7 */
> +	} else if (merge_prev >= MERGE_OK) {			/* cases 2, 5, 7 */
>  		err = __vma_adjust(prev, prev->vm_start,
>  					end, prev->vm_pgoff, NULL, prev);
>  		area = prev;
> -	} else if (merge_next) {
> +	} else if (merge_next >= MERGE_OK) {
>  		if (prev && addr < prev->vm_end)	/* case 4 */
>  			err = __vma_adjust(prev, prev->vm_start,
>  					addr, prev->vm_pgoff, NULL, next);
> @@ -1252,7 +1255,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
>  	} else {
>  		err = -1;
>  	}
> -
> +	trace_vm_av_merge(err, merge_prev, merge_next, merge_both);
>  	/*
>  	 * Cannot merge with predecessor or successor or error in __vma_adjust?
>  	 */
> @@ -3359,6 +3362,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
>  		/*
>  		 * Source vma may have been merged into new_vma
>  		 */
> +		trace_vm_pgoff_merge(vma, anon_pgoff_updated);
> +
>  		if (unlikely(vma_start >= new_vma->vm_start &&
>  			     vma_start < new_vma->vm_end)) {
>  			/*
> -- 
> 2.34.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ