[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20191113153816.14b95acd@gandalf.local.home>
Date: Wed, 13 Nov 2019 15:38:16 -0500
From: Steven Rostedt <rostedt@...dmis.org>
To: "Joel Fernandes (Google)" <joel@...lfernandes.org>
Cc: linux-kernel@...r.kernel.org, Ioannis Ilkos <ilkos@...gle.com>,
minchan@...gle.com, primiano@...gle.com, fmayer@...gle.com,
hjd@...gle.com, joaodias@...gle.com, joelaf@...gle.com,
lalitm@...gle.com, rslawik@...gle.com, sspatil@...gle.com,
timmurray@...gle.com, Andrew Morton <akpm@...ux-foundation.org>,
Andy Shevchenko <andriy.shevchenko@...ux.intel.com>,
Changbin Du <changbin.du@...el.com>,
Ingo Molnar <mingo@...hat.com>, Joe Perches <joe@...ches.com>,
Kees Cook <keescook@...omium.org>, linux-mm@...ck.org,
Michal Hocko <mhocko@...e.com>, Petr Mladek <pmladek@...e.com>,
"Rafael J. Wysocki" <rafael.j.wysocki@...el.com>,
Sakari Ailus <sakari.ailus@...ux.intel.com>,
Sergey Senozhatsky <sergey.senozhatsky@...il.com>,
Stephen Rothwell <sfr@...b.auug.org.au>
Subject: Re: [PATCH] rss_stat: Add support to detect RSS updates of external
mm
/me coming late to the party.
On Tue, 5 Nov 2019 21:44:51 -0500
"Joel Fernandes (Google)" <joel@...lfernandes.org> wrote:
> When a process updates the RSS of a different process, the rss_stat
> tracepoint appears in the context of the process doing the update. This
> can confuse userspace that the RSS of process doing the update is
> updated, while in reality a different process's RSS was updated.
>
> This issue happens in reclaim paths such as with direct reclaim or
> background reclaim.
>
> This patch adds more information to the tracepoint about whether the mm
> being updated belongs to the current process's context (curr field). We
> also include a hash of the mm pointer so that the process who the mm
> belongs to can be uniquely identified (mm_id field).
>
> Also vsprintf.c is refactored a bit to allow reuse of hashing code.
>
> Reported-by: Ioannis Ilkos <ilkos@...gle.com>
> Signed-off-by: Joel Fernandes (Google) <joel@...lfernandes.org>
>
> ---
> Based on top of the commit in linux-next:
> 8342d836dc7c ("mm: emit tracepoint when RSS changes")
>
> Google Bug: 140711541
>
> Cc: minchan@...gle.com
> Cc: primiano@...gle.com
> Cc: ilkos@...gle.com
> Cc: fmayer@...gle.com
> Cc: hjd@...gle.com
> Cc: joaodias@...gle.com
> Cc: joelaf@...gle.com
> Cc: lalitm@...gle.com
> Cc: rslawik@...gle.com
> Cc: sspatil@...gle.com
> Cc: timmurray@...gle.com
>
> include/linux/mm.h | 8 ++++----
> include/linux/string.h | 2 ++
> include/trace/events/kmem.h | 32 +++++++++++++++++++++++++++++---
> lib/vsprintf.c | 36 +++++++++++++++++++++++++-----------
> mm/memory.c | 4 ++--
> 5 files changed, 62 insertions(+), 20 deletions(-)
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 31d8cfb3d988..bfbe65ccffa3 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1643,27 +1643,27 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
> return (unsigned long)val;
> }
>
> -void mm_trace_rss_stat(int member, long count);
> +void mm_trace_rss_stat(struct mm_struct *mm, int member, long count);
>
> static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
> {
> long count = atomic_long_add_return(value, &mm->rss_stat.count[member]);
>
> - mm_trace_rss_stat(member, count);
> + mm_trace_rss_stat(mm, member, count);
> }
>
> static inline void inc_mm_counter(struct mm_struct *mm, int member)
> {
> long count = atomic_long_inc_return(&mm->rss_stat.count[member]);
>
> - mm_trace_rss_stat(member, count);
> + mm_trace_rss_stat(mm, member, count);
> }
>
> static inline void dec_mm_counter(struct mm_struct *mm, int member)
> {
> long count = atomic_long_dec_return(&mm->rss_stat.count[member]);
>
> - mm_trace_rss_stat(member, count);
> + mm_trace_rss_stat(mm, member, count);
> }
>
> /* Optimized variant when page is already known not to be PageAnon */
> diff --git a/include/linux/string.h b/include/linux/string.h
> index f516cec5277c..6b0e950701d0 100644
> --- a/include/linux/string.h
> +++ b/include/linux/string.h
> @@ -261,6 +261,8 @@ int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4);
> extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
> const void *from, size_t available);
>
> +int ptr_to_hashval(const void *ptr, unsigned long *hashval_out);
> +
> /**
> * strstarts - does @str start with @prefix?
> * @str: string to examine
> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index 5a0666bfcf85..ad7e642bd497 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -316,24 +316,50 @@ TRACE_EVENT(mm_page_alloc_extfrag,
> __entry->change_ownership)
> );
>
> +/*
> + * Required for uniquely and securely identifying mm in rss_stat tracepoint.
> + */
> +#ifndef __PTR_TO_HASHVAL
> +static unsigned int __maybe_unused mm_ptr_to_hash(const void *ptr)
> +{
> + int ret;
> + unsigned long hashval;
> +
> + ret = ptr_to_hashval(ptr, &hashval);
> + if (ret)
> + return 0;
> +
> + /* The hashed value is only 32-bit */
> + return (unsigned int)hashval;
> +}
> +#define __PTR_TO_HASHVAL
> +#endif
> +
> TRACE_EVENT(rss_stat,
>
> - TP_PROTO(int member,
> + TP_PROTO(struct mm_struct *mm,
> + int member,
> long count),
>
> - TP_ARGS(member, count),
> + TP_ARGS(mm, member, count),
>
> TP_STRUCT__entry(
> + __field(unsigned int, mm_id)
> + __field(unsigned int, curr)
> __field(int, member)
> __field(long, size)
> ),
>
> TP_fast_assign(
> + __entry->mm_id = mm_ptr_to_hash(mm);
> + __entry->curr = !!(current->mm == mm);
> __entry->member = member;
> __entry->size = (count << PAGE_SHIFT);
> ),
>
> - TP_printk("member=%d size=%ldB",
> + TP_printk("mm_id=%u curr=%d member=%d size=%ldB",
> + __entry->mm_id,
> + __entry->curr,
> __entry->member,
> __entry->size)
> );
> diff --git a/lib/vsprintf.c b/lib/vsprintf.c
> index dee8fc467fcf..401baaac1813 100644
> --- a/lib/vsprintf.c
> +++ b/lib/vsprintf.c
> @@ -761,11 +761,34 @@ static int __init initialize_ptr_random(void)
> early_initcall(initialize_ptr_random);
>
> /* Maps a pointer to a 32 bit unique identifier. */
> +int ptr_to_hashval(const void *ptr, unsigned long *hashval_out)
> +{
> + const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)";
> + unsigned long hashval;
> +
> + if (static_branch_unlikely(¬_filled_random_ptr_key))
> + return -EAGAIN;
> +
> +#ifdef CONFIG_64BIT
> + hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key);
> + /*
> + * Mask off the first 32 bits, this makes explicit that we have
> + * modified the address (and 32 bits is plenty for a unique ID).
> + */
> + hashval = hashval & 0xffffffff;
> +#else
> + hashval = (unsigned long)siphash_1u32((u32)ptr, &ptr_key);
> +#endif
> + *hashval_out = hashval;
> + return 0;
> +}
> +
> static char *ptr_to_id(char *buf, char *end, const void *ptr,
> struct printf_spec spec)
> {
> const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)";
> unsigned long hashval;
> + int ret;
>
> /* When debugging early boot use non-cryptographically secure hash. */
> if (unlikely(debug_boot_weak_hash)) {
> @@ -773,22 +796,13 @@ static char *ptr_to_id(char *buf, char *end, const void *ptr,
> return pointer_string(buf, end, (const void *)hashval, spec);
> }
>
> - if (static_branch_unlikely(¬_filled_random_ptr_key)) {
> + ret = ptr_to_hashval(ptr, &hashval);
> + if (ret) {
This is a hot path (used in trace_printk()), and you just turned a
static_branch into a function call.
Can we make a __ptr_to_hashval() static inline, and have
ptr_to_hashval() call that, but use the static inline here, where the
static_branch gets called directly here?
-- Steve
> spec.field_width = 2 * sizeof(ptr);
> /* string length must be less than default_width */
> return error_string(buf, end, str, spec);
> }
>
> -#ifdef CONFIG_64BIT
> - hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key);
> - /*
> - * Mask off the first 32 bits, this makes explicit that we have
> - * modified the address (and 32 bits is plenty for a unique ID).
> - */
> - hashval = hashval & 0xffffffff;
> -#else
> - hashval = (unsigned long)siphash_1u32((u32)ptr, &ptr_key);
> -#endif
> return pointer_string(buf, end, (const void *)hashval, spec);
> }
>
> diff --git a/mm/memory.c b/mm/memory.c
> index 7596d625ebd1..d3c9784e6dc1 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -154,9 +154,9 @@ static int __init init_zero_pfn(void)
> }
> core_initcall(init_zero_pfn);
>
> -void mm_trace_rss_stat(int member, long count)
> +void mm_trace_rss_stat(struct mm_struct *mm, int member, long count)
> {
> - trace_rss_stat(member, count);
> + trace_rss_stat(mm, member, count);
> }
>
> #if defined(SPLIT_RSS_COUNTING)
Powered by blists - more mailing lists