[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20190903200503.147973-1-joel@joelfernandes.org>
Date: Tue, 3 Sep 2019 16:05:03 -0400
From: "Joel Fernandes (Google)" <joel@...lfernandes.org>
To: linux-kernel@...r.kernel.org
Cc: "Joel Fernandes (Google)" <joel@...lfernandes.org>,
Tim Murray <timmurray@...gle.com>, carmenjackson@...gle.com,
mayankgupta@...gle.com, dancol@...gle.com, rostedt@...dmis.org,
minchan@...nel.org, akpm@...ux-foundation.org,
kernel-team@...roid.com,
"Aneesh Kumar K.V" <aneesh.kumar@...ux.ibm.com>,
Dan Williams <dan.j.williams@...el.com>,
"Jerome Glisse" <jglisse@...hat.com>, linux-mm@...ck.org,
Matthew Wilcox <willy@...radead.org>,
Michal Hocko <mhocko@...e.cz>,
Ralph Campbell <rcampbell@...dia.com>,
Vlastimil Babka <vbabka@...e.cz>
Subject: [PATCH] mm: emit tracepoint when RSS changes by threshold
Useful to track how RSS is changing per TGID. Several Android teams have
been using this patch in various kernel trees for half a year now. Many
reported to me it is really useful.
Initial patch developed by Tim Murray. Changes I made from original patch:
o Prevent any additional space consumed by mm_struct.
o Keep overhead low by checking if tracing is enabled.
o Add some noise reduction and lower overhead by emitting only on
threshold changes.
Co-developed-by: Tim Murray <timmurray@...gle.com>
Signed-off-by: Tim Murray <timmurray@...gle.com>
Signed-off-by: Joel Fernandes (Google) <joel@...lfernandes.org>
---
Cc: carmenjackson@...gle.com
Cc: mayankgupta@...gle.com
Cc: dancol@...gle.com
Cc: rostedt@...dmis.org
Cc: minchan@...nel.org
Cc: akpm@...ux-foundation.org
Cc: kernel-team@...roid.com
include/linux/mm.h | 14 +++++++++++---
include/trace/events/kmem.h | 21 +++++++++++++++++++++
mm/memory.c | 20 ++++++++++++++++++++
3 files changed, 52 insertions(+), 3 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0334ca97c584..823aaf759bdb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1671,19 +1671,27 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
return (unsigned long)val;
}
+void mm_trace_rss_stat(int member, long count, long value);
+
static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
{
- atomic_long_add(value, &mm->rss_stat.count[member]);
+ long count = atomic_long_add_return(value, &mm->rss_stat.count[member]);
+
+ mm_trace_rss_stat(member, count, value);
}
static inline void inc_mm_counter(struct mm_struct *mm, int member)
{
- atomic_long_inc(&mm->rss_stat.count[member]);
+ long count = atomic_long_inc_return(&mm->rss_stat.count[member]);
+
+ mm_trace_rss_stat(member, count, 1);
}
static inline void dec_mm_counter(struct mm_struct *mm, int member)
{
- atomic_long_dec(&mm->rss_stat.count[member]);
+ long count = atomic_long_dec_return(&mm->rss_stat.count[member]);
+
+ mm_trace_rss_stat(member, count, -1);
}
/* Optimized variant when page is already known not to be PageAnon */
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index eb57e3037deb..8b88e04fafbf 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -315,6 +315,27 @@ TRACE_EVENT(mm_page_alloc_extfrag,
__entry->change_ownership)
);
+TRACE_EVENT(rss_stat,
+
+ TP_PROTO(int member,
+ long count),
+
+ TP_ARGS(member, count),
+
+ TP_STRUCT__entry(
+ __field(int, member)
+ __field(long, size)
+ ),
+
+ TP_fast_assign(
+ __entry->member = member;
+ __entry->size = (count << PAGE_SHIFT);
+ ),
+
+ TP_printk("member=%d size=%ldB",
+ __entry->member,
+ __entry->size)
+ );
#endif /* _TRACE_KMEM_H */
/* This part must be outside protection */
diff --git a/mm/memory.c b/mm/memory.c
index e2bb51b6242e..9d81322c24a3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -72,6 +72,8 @@
#include <linux/oom.h>
#include <linux/numa.h>
+#include <trace/events/kmem.h>
+
#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
@@ -140,6 +142,24 @@ static int __init init_zero_pfn(void)
}
core_initcall(init_zero_pfn);
+/*
+ * This threshold is the boundary in the value space, that the counter has to
+ * advance before we trace it. Should be a power of 2. It is to reduce unwanted
+ * trace overhead. The counter is in units of number of pages.
+ */
+#define TRACE_MM_COUNTER_THRESHOLD 128
+
+void mm_trace_rss_stat(int member, long count, long value)
+{
+ long thresh_mask = ~(TRACE_MM_COUNTER_THRESHOLD - 1);
+
+ if (!trace_rss_stat_enabled())
+ return;
+
+ /* Threshold roll-over, trace it */
+ if ((count & thresh_mask) != ((count - value) & thresh_mask))
+ trace_rss_stat(member, count);
+}
#if defined(SPLIT_RSS_COUNTING)
--
2.23.0.187.g17f5b7556c-goog
Powered by blists - more mailing lists