[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20251110161954.47d88433@gandalf.local.home>
Date: Mon, 10 Nov 2025 16:19:54 -0500
From: Steven Rostedt <rostedt@...dmis.org>
To: Balbir Singh <balbirs@...dia.com>
Cc: linux-mm@...ck.org, linux-trace-kernel@...r.kernel.org,
linux-kernel@...r.kernel.org, Masami Hiramatsu <mhiramat@...nel.org>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>, Andrew Morton
<akpm@...ux-foundation.org>, David Hildenbrand <david@...hat.com>, Zi Yan
<ziy@...dia.com>, Joshua Hahn <joshua.hahnjy@...il.com>, Rakie Kim
<rakie.kim@...com>, Byungchul Park <byungchul@...com>, Gregory Price
<gourry@...rry.net>, Ying Huang <ying.huang@...ux.alibaba.com>, Alistair
Popple <apopple@...dia.com>
Subject: Re: [PATCH] mm/migrate_device: Add tracepoints for debugging
On Thu, 16 Oct 2025 16:46:19 +1100
Balbir Singh <balbirs@...dia.com> wrote:
> Add tracepoints for debugging device migration flow in migrate_device.c.
> This is helpful in debugging how long migration took (time can be
> tracked backwards from migrate_device_finalize to migrate_vma_setup).
>
> A combination of these events along with existing thp:*, exceptions:*
> and migrate:* is very useful for debugging issues related to
> migration.
>
> Cc: Steven Rostedt <rostedt@...dmis.org>
> Cc: Masami Hiramatsu <mhiramat@...nel.org>
> Cc: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
> Cc: Andrew Morton <akpm@...ux-foundation.org>
> Cc: David Hildenbrand <david@...hat.com>
> Cc: Zi Yan <ziy@...dia.com>
> Cc: Joshua Hahn <joshua.hahnjy@...il.com>
> Cc: Rakie Kim <rakie.kim@...com>
> Cc: Byungchul Park <byungchul@...com>
> Cc: Gregory Price <gourry@...rry.net>
> Cc: Ying Huang <ying.huang@...ux.alibaba.com>
> Cc: Alistair Popple <apopple@...dia.com>
>
> Signed-off-by: Balbir Singh <balbirs@...dia.com>
> ---
>
> Sample output from hmm-tests
>
> hmm-tests-855 [002] 50.042792: migrate_vma_setup: start=0x7f2908a00000 end=0x7f2908c00000 nr_pages=512
> hmm-tests-855 [002] 50.042800: set_migration_pmd: addr=7f2908a00000, pmd=dfffffffd39ffe00
> hmm-tests-855 [002] 50.042801: migrate_vma_collect_skip: start=0x7f2908a01000 end=0x7f2908c00000
> hmm-tests-855 [002] 50.042802: migrate_vma_collect: start=0x7f2908a00000 end=0x7f2908c00000 npages=512
> hmm-tests-855 [002] 50.061929: migrate_device_pages: npages=512 migrated=512
> hmm-tests-855 [002] 50.062345: remove_migration_pmd: addr=7f2908a00000, pmd=efffffe00403fe00
> hmm-tests-855 [002] 50.062371: migrate_vma_finalize: npages=512
Looks like some of these tracepoints can be combined via classes:
> hmm-tests-855 [002] 50.042792: migrate_vma_setup: start=0x7f2908a00000 end=0x7f2908c00000 nr_pages=512
> hmm-tests-855 [002] 50.042802: migrate_vma_collect: start=0x7f2908a00000 end=0x7f2908c00000 npages=512
Is there a difference between "nr_pages" and "npages"?
> hmm-tests-855 [002] 50.042800: set_migration_pmd: addr=7f2908a00000, pmd=dfffffffd39ffe00
> hmm-tests-855 [002] 50.062345: remove_migration_pmd: addr=7f2908a00000, pmd=efffffe00403fe00
Each TRACE_EVENT() is equivalent to:
DECLARE_EVENT_CLASS(event, ...)
DEFINE_EVENT(event, event, ...)
Where a class is around 4-5K in size, and the DEFINE_EVENT is between
500 and 1k in size.
By using a single DECLARE_EVENT_CLASS() for multiple events, you can save
several thousands of bytes of memory.
>
>
> include/trace/events/migrate_device.h | 196 ++++++++++++++++++++++++++
> mm/migrate_device.c | 11 ++
> 2 files changed, 207 insertions(+)
> create mode 100644 include/trace/events/migrate_device.h
>
> diff --git a/include/trace/events/migrate_device.h b/include/trace/events/migrate_device.h
> new file mode 100644
> index 000000000000..9b2782800ea9
> --- /dev/null
> +++ b/include/trace/events/migrate_device.h
> @@ -0,0 +1,196 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES
> + */
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM migrate_device
> +
> +#if !defined(_TRACE_MIGRATE_DEVICE_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_MIGRATE_DEVICE_H
> +
> +#include <linux/tracepoint.h>
> +#include <linux/migrate.h>
> +
> +TRACE_EVENT(migrate_vma_setup,
> +
> + TP_PROTO(unsigned long start, unsigned long end, unsigned long nr_pages),
> +
> + TP_ARGS(start, end, nr_pages),
> +
> + TP_STRUCT__entry(
> + __field(unsigned long, start)
> + __field(unsigned long, end)
> + __field(unsigned long, nr_pages)
> + ),
> +
> + TP_fast_assign(
> + __entry->start = start;
> + __entry->end = end;
> + __entry->nr_pages = nr_pages;
> + ),
> +
> + TP_printk("start=0x%lx end=0x%lx nr_pages=%lu",
> + __entry->start, __entry->end, __entry->nr_pages)
> +);
Make the above into:
DECLAER_EVENT_CLASS(migrate_vma_pages_template,
TP_PROTO(unsigned long start, unsigned long end, unsigned long nr_pages),
TP_ARGS(start, end, nr_pages),
TP_STRUCT__entry(
__field(unsigned long, start)
__field(unsigned long, end)
__field(unsigned long, nr_pages)
),
TP_fast_assign(
__entry->start = start;
__entry->end = end;
__entry->nr_pages = nr_pages;
),
TP_printk("start=0x%lx end=0x%lx nr_pages=%lu",
__entry->start, __entry->end, __entry->nr_pages)
);
DEFINE_EVENT(migrate_vma_pages_template, migrate_vma_setup,
TP_PROTO(unsigned long start, unsigned long end, unsigned long nr_pages),
TP_ARGS(start, end, nr_pages));
DEFINE_EVENT(migrate_vma_pages_template, migrate_vma_collect,
TP_PROTO(unsigned long start, unsigned long end, unsigned long nr_pages),
TP_ARGS(start, end, nr_pages));
DEFINE_EVENT(migrate_vma_pages_template, migrate_vma_collect_hole,
TP_PROTO(unsigned long start, unsigned long end, unsigned long nr_pages),
TP_ARGS(start, end, nr_pages));
> +
> +TRACE_EVENT(migrate_vma_collect_skip,
> +
> + TP_PROTO(unsigned long start, unsigned long end),
> +
> + TP_ARGS(start, end),
> +
> + TP_STRUCT__entry(
> + __field(unsigned long, start)
> + __field(unsigned long, end)
> + ),
> +
> + TP_fast_assign(
> + __entry->start = start;
> + __entry->end = end;
> + ),
> +
> + TP_printk("start=0x%lx end=0x%lx", __entry->start, __entry->end)
> +);
> +
> +TRACE_EVENT(migrate_vma_unmap,
> +
> + TP_PROTO(unsigned long npages, unsigned long cpages),
> +
> + TP_ARGS(npages, cpages),
> +
> + TP_STRUCT__entry(
> + __field(unsigned long, npages)
> + __field(unsigned long, cpages)
> + ),
> +
> + TP_fast_assign(
> + __entry->npages = npages;
> + __entry->cpages = cpages;
> + ),
> +
> + TP_printk("npages=%lu cpages=%lu",
> + __entry->npages, __entry->cpages)
> +);
> +
> +TRACE_EVENT(migrate_device_pages,
> +
> + TP_PROTO(unsigned long npages, unsigned long migrated),
> +
> + TP_ARGS(npages, migrated),
> +
> + TP_STRUCT__entry(
> + __field(unsigned long, npages)
> + __field(unsigned long, migrated)
> + ),
> +
> + TP_fast_assign(
> + __entry->npages = npages;
> + __entry->migrated = migrated;
> + ),
> +
> + TP_printk("npages=%lu migrated=%lu",
> + __entry->npages, __entry->migrated)
> +);
The above two could be converted to:
DECLARE_EVENT_CLASS(migrate_vma_device_template
TP_PROTO(unsigned long npages, unsigned long cpage_migrate),
TP_ARGS(npages, cpage_migrate),
TP_STRUCT__entry(
__field(unsigned long, npages)
__field(unsigned long, cpage_migrated)
),
TP_fast_assign(
__entry->npages = npages;
__entry->cpage_migrated = cpage_migrate;
),
TP_printk("npages=%lu migrated=%lu",
__entry->npages, __entry->migrated)
);
DEFINE_EVENT(migrate_vma_device_template, migrate_device_pages,
TP_PROTO(unsigned long npages, unsigned long cpage_migrate),
TP_ARGS(npages, cpage_migrate));
DEFINE_EVENT_PRINT(migrate_vma_device_template, migrate_vma_unmap
TP_PROTO(unsigned long npages, unsigned long cpage_migrate),
TP_ARGS(npages, cpage_migrate),
TP_printk("npages=%lu cpages=%lu",
__entry->npages, __entry->cpages));
Where the second one will show a different print format.
> +
> +TRACE_EVENT(migrate_vma_pages,
> +
> + TP_PROTO(unsigned long npages, unsigned long start, unsigned long end),
> +
> + TP_ARGS(npages, start, end),
> +
> + TP_STRUCT__entry(
> + __field(unsigned long, npages)
> + __field(unsigned long, start)
> + __field(unsigned long, end)
> + ),
> +
> + TP_fast_assign(
> + __entry->npages = npages;
> + __entry->start = start;
> + __entry->end = end;
> + ),
> +
> + TP_printk("npages=%lu start=0x%lx end=0x%lx",
> + __entry->npages, __entry->start, __entry->end)
Is there a reason npages is not at the end? Otherwise you can save even more memory with:
DEFINE_EVENT(migrate_vma_pages_template, migrate_vma_pages,
TP_PROTO(unsigned long start, unsigned long end, unsigned long nr_pages),
TP_ARGS(start, end, nr_pages));
> +);
> +
> +TRACE_EVENT(migrate_device_finalize,
> +
> + TP_PROTO(unsigned long npages),
> +
> + TP_ARGS(npages),
> +
> + TP_STRUCT__entry(
> + __field(unsigned long, npages)
> + ),
> +
> + TP_fast_assign(
> + __entry->npages = npages;
> + ),
> +
> + TP_printk("npages=%lu", __entry->npages)
> +);
> +
> +TRACE_EVENT(migrate_vma_finalize,
> +
> + TP_PROTO(unsigned long npages),
> +
> + TP_ARGS(npages),
> +
> + TP_STRUCT__entry(
> + __field(unsigned long, npages)
> + ),
> +
> + TP_fast_assign(
> + __entry->npages = npages;
> + ),
> +
> + TP_printk("npages=%lu", __entry->npages)
> +);
The above two can be converted to:
DECLARE_EVENT_CLASS(migrate_finalize_template,
TP_PROTO(unsigned long npages),
TP_ARGS(npages),
TP_STRUCT__entry(
__field(unsigned long, npages)
),
TP_fast_assign(
__entry->npages = npages;
),
TP_printk("npages=%lu", __entry->npages)
);
DEFINE_EVENT(migrate_finalize_template, migrate_device_finalize,
TP_PROTO(unsigned long npages),
TP_ARGS(npages));
DEFINE_EVENT(migrate_finalize_template, migrate_vma_finalize,
TP_PROTO(unsigned long npages),
TP_ARGS(npages));
-- Steve
> +#endif /* _TRACE_MIGRATE_DEVICE_H */
> +
> +#include <trace/define_trace.h>
> diff --git a/mm/migrate_device.c b/mm/migrate_device.c
> index fa42d2ebd024..c869b272e85a 100644
> --- a/mm/migrate_device.c
> +++ b/mm/migrate_device.c
> @@ -18,6 +18,9 @@
> #include <asm/tlbflush.h>
> #include "internal.h"
>
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/migrate_device.h>
> +
> static int migrate_vma_collect_skip(unsigned long start,
> unsigned long end,
> struct mm_walk *walk)
> @@ -25,6 +28,8 @@ static int migrate_vma_collect_skip(unsigned long start,
> struct migrate_vma *migrate = walk->private;
> unsigned long addr;
>
> + trace_migrate_vma_collect_skip(start, end);
> +
> for (addr = start; addr < end; addr += PAGE_SIZE) {
> migrate->dst[migrate->npages] = 0;
> migrate->src[migrate->npages++] = 0;
> @@ -69,6 +74,7 @@ static int migrate_vma_collect_hole(unsigned long start,
> migrate->cpages++;
> }
>
> + trace_migrate_vma_collect_hole(start, end, migrate->npages);
> return 0;
> }
>
> @@ -517,6 +523,7 @@ static void migrate_vma_collect(struct migrate_vma *migrate)
>
> mmu_notifier_invalidate_range_end(&range);
> migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
> + trace_migrate_vma_collect(migrate->start, migrate->end, migrate->npages);
> }
>
> /*
> @@ -748,6 +755,8 @@ int migrate_vma_setup(struct migrate_vma *args)
> if (args->fault_page && !PageLocked(args->fault_page))
> return -EINVAL;
>
> + trace_migrate_vma_setup(args->start, args->end, nr_pages);
> +
> memset(args->src, 0, sizeof(*args->src) * nr_pages);
> args->cpages = 0;
> args->npages = 0;
> @@ -1259,6 +1268,7 @@ EXPORT_SYMBOL(migrate_device_pages);
> void migrate_vma_pages(struct migrate_vma *migrate)
> {
> __migrate_device_pages(migrate->src, migrate->dst, migrate->npages, migrate);
> + trace_migrate_device_pages(migrate->npages, migrate->npages);
> }
> EXPORT_SYMBOL(migrate_vma_pages);
>
> @@ -1312,6 +1322,7 @@ static void __migrate_device_finalize(unsigned long *src_pfns,
> folio_put(dst);
> }
> }
> + trace_migrate_vma_finalize(npages);
> }
>
> /*
Powered by blists - more mailing lists