lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1249409546-6343-2-git-send-email-mel@csn.ul.ie>
Date:	Tue,  4 Aug 2009 19:12:23 +0100
From:	Mel Gorman <mel@....ul.ie>
To:	Larry Woodman <lwoodman@...hat.com>,
	Andrew Morton <akpm@...ux-foundation.org>
Cc:	riel@...hat.com, Ingo Molnar <mingo@...e.hu>,
	Peter Zijlstra <peterz@...radead.org>,
	LKML <linux-kernel@...r.kernel.org>, linux-mm@...ck.org,
	Mel Gorman <mel@....ul.ie>
Subject: [PATCH 1/4] tracing, page-allocator: Add trace events for page allocation and page freeing

This patch adds trace events for the allocation and freeing of pages,
including the freeing of pagevecs.  Using the events, it will be known what
struct page and pfns are being allocated and freed and what the call site
was in many cases.

The page alloc tracepoints be used as an indicator as to whether the workload
was heavily dependant on the page allocator or not. You can make a guess based
on vmstat but you can't get a per-process breakdown. Depending on the call
path, the call_site for page allocation may be __get_free_pages() instead
of a useful callsite. Instead of passing down a return address similar to
slab debugging, the user should enable the stacktrace and seg-addr options
to get a proper stack trace.

The pagevec free tracepoint has a different usecase. It can be used to get
a idea of how many pages are being dumped off the LRU and whether it is
kswapd doing the work or a process doing direct reclaim.

Signed-off-by: Mel Gorman <mel@....ul.ie>
Acked-by: Rik van Riel <riel@...hat.com>
---
 include/trace/events/kmem.h |   86 +++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c             |    6 ++-
 2 files changed, 91 insertions(+), 1 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 1493c54..57bf13c 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -225,6 +225,92 @@ TRACE_EVENT(kmem_cache_free,
 
 	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
 );
+
+TRACE_EVENT(mm_page_free_direct,
+
+	TP_PROTO(unsigned long call_site, const void *page, unsigned int order),
+
+	TP_ARGS(call_site, page, order),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	page		)
+		__field(	unsigned int,	order		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->page		= page;
+		__entry->order		= order;
+	),
+
+	TP_printk("call_site=%lx page=%p pfn=%lu order=%d",
+			__entry->call_site,
+			__entry->page,
+			page_to_pfn((struct page *)__entry->page),
+			__entry->order)
+);
+
+TRACE_EVENT(mm_pagevec_free,
+
+	TP_PROTO(unsigned long call_site, const void *page, int order, int cold),
+
+	TP_ARGS(call_site, page, order, cold),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	page		)
+		__field(	int,		order		)
+		__field(	int,		cold		)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->page		= page;
+		__entry->order		= order;
+		__entry->cold		= cold;
+	),
+
+	TP_printk("call_site=%lx page=%p pfn=%lu order=%d cold=%d",
+			__entry->call_site,
+			__entry->page,
+			page_to_pfn((struct page *)__entry->page),
+			__entry->order,
+			__entry->cold)
+);
+
+TRACE_EVENT(mm_page_alloc,
+
+	TP_PROTO(unsigned long call_site, const void *page, unsigned int order,
+			gfp_t gfp_flags, int migratetype),
+
+	TP_ARGS(call_site, page, order, gfp_flags, migratetype),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	page		)
+		__field(	unsigned int,	order		)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		migratetype	)
+	),
+
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->page		= page;
+		__entry->order		= order;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->migratetype	= migratetype;
+	),
+
+	TP_printk("call_site=%lx page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s",
+		__entry->call_site,
+		__entry->page,
+		page_to_pfn((struct page *)__entry->page),
+		__entry->order,
+		__entry->migratetype,
+		show_gfp_flags(__entry->gfp_flags))
+);
+
 #endif /* _TRACE_KMEM_H */
 
 /* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d052abb..843bdec 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1905,6 +1905,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 				zonelist, high_zoneidx, nodemask,
 				preferred_zone, migratetype);
 
+	trace_mm_page_alloc(_RET_IP_, page, order, gfp_mask, migratetype);
 	return page;
 }
 EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -1945,13 +1946,16 @@ void __pagevec_free(struct pagevec *pvec)
 {
 	int i = pagevec_count(pvec);
 
-	while (--i >= 0)
+	while (--i >= 0) {
+		trace_mm_pagevec_free(_RET_IP_, pvec->pages[i], 0, pvec->cold);
 		free_hot_cold_page(pvec->pages[i], pvec->cold);
+	}
 }
 
 void __free_pages(struct page *page, unsigned int order)
 {
 	if (put_page_testzero(page)) {
+		trace_mm_page_free_direct(_RET_IP_, page, order);
 		if (order == 0)
 			free_hot_page(page);
 		else
-- 
1.6.3.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ