[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <2bf90acf7d29641ba6643934ff8dbba897dbd2d9.1718873074.git.jialong.yang@shingroup.cn>
Date: Thu, 20 Jun 2024 16:51:17 +0800
From: Jialong Yang <jialong.yang@...ngroup.cn>
To: Michael Ellerman <mpe@...erman.id.au>,
Nicholas Piggin <npiggin@...il.com>,
Christophe Leroy <christophe.leroy@...roup.eu>,
"Naveen N. Rao" <naveen.n.rao@...ux.ibm.com>
Cc: luming.yu@...ngroup.cn,
shenghui.qu@...ngroup.cn,
Jialong Yang <jialong.yang@...ngroup.cn>,
linuxppc-dev@...ts.ozlabs.org,
linux-kernel@...r.kernel.org
Subject: [PATCH v1 1/2] powerpc/mmiotrace: Add MMIO Tracing tool for PowerPC
mmiotrace is a useful tool to trace MMIO accesses. Nowadays, it only
supported on x86 and x86_64 platforms. Here is a support for powerpc.
The manual is located at Documentation/trace/mmiotrace.rst which means
I have not changed user API. People will be easy to use it.
Almost all files are copied from x86/mm, there are only some
differences from hardware and architectures software.
LINK: https://lore.kernel.org/lkml/20080127195536.50809974@daedalus.pq.iki.fi/
Signed-off-by: Jialong Yang <jialong.yang@...ngroup.cn>
---
arch/powerpc/Kconfig.debug | 3 +
arch/powerpc/mm/Makefile | 1 +
arch/powerpc/mm/kmmio.c | 649 +++++++++++++++++++++++++++++++
arch/powerpc/mm/mmio-mod.c | 414 ++++++++++++++++++++
arch/powerpc/mm/mmiotrace_arch.c | 149 +++++++
arch/powerpc/mm/mmiotrace_arch.h | 25 ++
arch/powerpc/mm/pf_in.c | 185 +++++++++
arch/powerpc/mm/pf_in.h | 33 ++
8 files changed, 1459 insertions(+)
create mode 100644 arch/powerpc/mm/kmmio.c
create mode 100644 arch/powerpc/mm/mmio-mod.c
create mode 100644 arch/powerpc/mm/mmiotrace_arch.c
create mode 100644 arch/powerpc/mm/mmiotrace_arch.h
create mode 100644 arch/powerpc/mm/pf_in.c
create mode 100644 arch/powerpc/mm/pf_in.h
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 8c80b154e814..8a69188aa75a 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -1,5 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
+config HAVE_MMIOTRACE_SUPPORT
+ def_bool y
+
config PPC_DISABLE_WERROR
bool "Don't build arch/powerpc code with -Werror"
help
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 0fe2f085c05a..cb92049f1239 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
obj-$(CONFIG_PTDUMP_CORE) += ptdump/
obj-$(CONFIG_KASAN) += kasan/
+obj-$(CONFIG_MMIOTRACE) += kmmio.o mmio-mod.o pf_in.o mmiotrace_arch.o
diff --git a/arch/powerpc/mm/kmmio.c b/arch/powerpc/mm/kmmio.c
new file mode 100644
index 000000000000..f4374e721b37
--- /dev/null
+++ b/arch/powerpc/mm/kmmio.c
@@ -0,0 +1,649 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Support for MMIO probes.
+ * Derived from arch/x86/mm/kmmio.c:
+ * Copyright (C) 2024 Jialong Yang (jialong.yang@...ngroup.cn)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include <linux/hash.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/percpu.h>
+#include <linux/kdebug.h>
+#include <linux/mutex.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/paca.h>
+#include <linux/errno.h>
+#include <linux/mmiotrace.h>
+
+#include "mmiotrace_arch.h"
+
+typedef unsigned long pteval_t;
+typedef unsigned long pmdval_t;
+
+#define KMMIO_PAGE_HASH_BITS 4
+#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
+
+struct kmmio_fault_page {
+ struct list_head list;
+ struct kmmio_fault_page *release_next;
+ unsigned long addr; /* the requested address */
+ pteval_t old_presence; /* page presence prior to arming */
+ bool armed;
+
+ /*
+ * Number of times this page has been registered as a part
+ * of a probe. If zero, page is disarmed and this may be freed.
+ * Used only by writers (RCU) and post_kmmio_handler().
+ * Protected by kmmio_lock, when linked into kmmio_page_table.
+ */
+ int count;
+
+ bool scheduled_for_release;
+};
+
+struct kmmio_delayed_release {
+ struct rcu_head rcu;
+ struct kmmio_fault_page *release_list;
+};
+
+struct kmmio_context {
+ struct kmmio_fault_page *fpage;
+ struct kmmio_probe *probe;
+ unsigned long saved_flags;
+ unsigned long saved_softe;
+ unsigned long addr;
+ int active;
+};
+
+/*
+ * The kmmio_lock is taken in int3 context, which is treated as NMI context.
+ * This causes lockdep to complain about it bein in both NMI and normal
+ * context. Hide it from lockdep, as it should not have any other locks
+ * taken under it, and this is only enabled for debugging mmio anyway.
+ */
+static arch_spinlock_t kmmio_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+/* Protected by kmmio_lock */
+unsigned int kmmio_count;
+
+/* Read-protected by RCU, write-protected by kmmio_lock. */
+static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
+static LIST_HEAD(kmmio_probes);
+
+static struct list_head *kmmio_page_list(unsigned long addr)
+{
+ unsigned int l;
+ pte_t *pte = lookup_address(addr, &l);
+
+ if (!pte)
+ return NULL;
+ addr &= page_level_mask(l);
+
+ return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)];
+}
+
+/* Accessed per-cpu */
+static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
+
+/*
+ * this is basically a dynamic stabbing problem:
+ * Could use the existing prio tree code or
+ * Possible better implementations:
+ * The Interval Skip List: A Data Structure for Finding All Intervals That
+ * Overlap a Point (might be simple)
+ * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
+ */
+/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
+static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
+{
+ struct kmmio_probe *p;
+
+ list_for_each_entry_rcu(p, &kmmio_probes, list) {
+ if (addr >= p->addr && addr < (p->addr + p->len))
+ return p;
+ }
+ return NULL;
+}
+
+/* You must be holding RCU read lock. */
+static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
+{
+ struct list_head *head;
+ struct kmmio_fault_page *f;
+ unsigned int l;
+ pte_t *pte = lookup_address(addr, &l);
+
+ if (!pte)
+ return NULL;
+ addr &= page_level_mask(l);
+ head = kmmio_page_list(addr);
+ list_for_each_entry_rcu(f, head, list) {
+ if (f->addr == addr)
+ return f;
+ }
+ return NULL;
+}
+
+static inline pmd_t pmd_mkinvalid(pmd_t pmd)
+{
+ return __pmd_raw(pmd_raw(pmd) & ~cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID));
+}
+
+static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
+{
+ pmd_t new_pmd;
+ pmdval_t v = pmd_val(*pmd);
+
+ if (clear) {
+ *old = v;
+ new_pmd = pmd_mkinvalid(*pmd);
+ } else {
+ /* Presume this has been called with clear==true previously */
+ new_pmd = __pmd(*old);
+ }
+ *pmd = new_pmd;
+}
+
+static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old, unsigned long addr)
+{
+ pteval_t v = pte_val(*pte);
+
+ if (clear) {
+ *old = v;
+ /* Nothing should care about address */
+ pte_clear(&init_mm, addr, pte);
+ } else {
+ /* Presume this has been called with clear==true previously */
+ set_pte_at(&init_mm, addr, pte, __pte(*old));
+ }
+}
+
+static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
+{
+ unsigned int level;
+ pte_t *pte = lookup_address(f->addr, &level);
+
+ if (!pte) {
+ pr_err("no pte for addr 0x%08lx\n", f->addr);
+ return -1;
+ }
+
+ if (level == PMD_SHIFT)
+ clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence);
+ else if (level == PAGE_SHIFT)
+ clear_pte_presence(pte, clear, &f->old_presence, f->addr);
+ else {
+ pr_err("unexpected page level 0x%x.\n", level);
+ return -1;
+ }
+
+ mmap_read_lock(&init_mm);
+ struct vm_area_struct *vma = find_vma(&init_mm, f->addr);
+
+ mmap_read_unlock(&init_mm);
+
+ flush_tlb_page(vma, f->addr);
+
+ return 0;
+}
+
+/*
+ * Mark the given page as not present. Access to it will trigger a fault.
+ *
+ * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the
+ * protection is ignored here. RCU read lock is assumed held, so the struct
+ * will not disappear unexpectedly. Furthermore, the caller must guarantee,
+ * that double arming the same virtual address (page) cannot occur.
+ *
+ * Double disarming on the other hand is allowed, and may occur when a fault
+ * and mmiotrace shutdown happen simultaneously.
+ */
+static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
+{
+ int ret;
+
+ WARN_ONCE(f->armed, pr_fmt("kmmio page already armed.\n"));
+ if (f->armed) {
+ pr_warn("double-arm: addr 0x%08lx, ref %d, old %d\n",
+ f->addr, f->count, !!f->old_presence);
+ }
+ ret = clear_page_presence(f, true);
+ WARN_ONCE(ret < 0, pr_fmt("arming at 0x%08lx failed.\n"),
+ f->addr);
+ f->armed = true;
+ return ret;
+}
+
+/** Restore the given page to saved presence state. */
+static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
+{
+ int ret = clear_page_presence(f, false);
+
+ WARN_ONCE(ret < 0,
+ KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr);
+ f->armed = false;
+}
+
+/*
+ * This is being called from do_page_fault().
+ *
+ * We may be in an interrupt or a critical section. Also prefecthing may
+ * trigger a page fault. We may be in the middle of process switch.
+ * We cannot take any locks, because we could be executing especially
+ * within a kmmio critical section.
+ *
+ * Local interrupts are disabled, so preemption cannot happen.
+ * Do not enable interrupts, do not sleep, and watch out for other CPUs.
+ */
+/*
+ * Interrupts are disabled on entry as trap3 is an interrupt gate
+ * and they remain disabled throughout this function.
+ */
+int kmmio_handler(struct pt_regs *regs, unsigned long addr)
+{
+ struct kmmio_context *ctx;
+ struct kmmio_fault_page *faultpage;
+ int ret = 0; /* default to fault not handled */
+ unsigned long page_base = addr;
+ unsigned int l;
+ pte_t *pte = lookup_address(addr, &l);
+
+ if (!pte)
+ return -EINVAL;
+ page_base &= page_level_mask(l);
+
+ /*
+ * Hold the RCU read lock over single stepping to avoid looking
+ * up the probe and kmmio_fault_page again. The rcu_read_lock_sched()
+ * also disables preemption and prevents process switch during
+ * the single stepping. We can only handle one active kmmio trace
+ * per cpu, so ensure that we finish it before something else
+ * gets to run.
+ */
+ rcu_read_lock_sched_notrace();
+
+ faultpage = get_kmmio_fault_page(page_base);
+ if (!faultpage) {
+ /*
+ * Either this page fault is not caused by kmmio, or
+ * another CPU just pulled the kmmio probe from under
+ * our feet. The latter case should not be possible.
+ */
+ goto no_kmmio;
+ }
+
+ ctx = this_cpu_ptr(&kmmio_ctx);
+ if (ctx->active) {
+ if (page_base == ctx->addr) {
+ /*
+ * A second fault on the same page means some other
+ * condition needs handling by do_page_fault(), the
+ * page really not being present is the most common.
+ */
+ pr_debug("secondary hit for 0x%08lx CPU %d.\n",
+ addr, smp_processor_id());
+
+ if (!faultpage->old_presence)
+ pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n",
+ addr, smp_processor_id());
+ } else {
+ /*
+ * Prevent overwriting already in-flight context.
+ * This should not happen, let's hope disarming at
+ * least prevents a panic.
+ */
+ pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n",
+ smp_processor_id(), addr);
+ pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr);
+ disarm_kmmio_fault_page(faultpage);
+ }
+ goto no_kmmio;
+ }
+ ctx->active++;
+
+ ctx->fpage = faultpage;
+ ctx->probe = get_kmmio_probe(page_base);
+ ctx->saved_flags = (regs->msr & (MSR_SE | MSR_EE));
+ ctx->saved_softe = regs->softe;
+ ctx->addr = page_base;
+
+ if (ctx->probe && ctx->probe->pre_handler)
+ ctx->probe->pre_handler(ctx->probe, regs, addr);
+
+ /*
+ * Enable single-stepping and disable interrupts for the faulting
+ * context. Local interrupts must not get enabled during stepping.
+ */
+ regs->msr |= MSR_SE; // single step
+ regs->msr &= ~MSR_EE; // hard interrupt
+ regs->softe = IRQS_DISABLED; // soft interrupt
+
+ local_paca->srr_valid = 0;
+
+ /* Now we set present bit in PTE and single step. */
+ disarm_kmmio_fault_page(ctx->fpage);
+
+ /*
+ * If another cpu accesses the same page while we are stepping,
+ * the access will not be caught. It will simply succeed and the
+ * only downside is we lose the event. If this becomes a problem,
+ * the user should drop to single cpu before tracing.
+ */
+
+ return 1; /* fault handled */
+
+no_kmmio:
+ rcu_read_unlock_sched_notrace();
+ return ret;
+}
+
+/*
+ * Interrupts are disabled on entry as trap1 is an interrupt gate
+ * and they remain disabled throughout this function.
+ * This must always get called as the pair to kmmio_handler().
+ */
+static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
+{
+ int ret = 0;
+ struct kmmio_context *ctx = this_cpu_ptr(&kmmio_ctx);
+
+ if (!ctx->active) {
+ /*
+ * debug traps without an active context are due to either
+ * something external causing them (f.e. using a debugger while
+ * mmio tracing enabled), or erroneous behaviour
+ */
+ pr_warn("unexpected debug trap on CPU %d.\n", smp_processor_id());
+ goto out;
+ }
+
+ if (ctx->probe && ctx->probe->post_handler)
+ ctx->probe->post_handler(ctx->probe, condition, regs);
+
+ /* Prevent racing against release_kmmio_fault_page(). */
+ arch_spin_lock(&kmmio_lock);
+ if (ctx->fpage->count)
+ arm_kmmio_fault_page(ctx->fpage);
+ arch_spin_unlock(&kmmio_lock);
+
+ // disabled single step in entry of single_step_exception.
+ // regs->msr &= ~MSR_SE;
+ regs->msr |= ctx->saved_flags;
+ regs->softe = ctx->saved_softe;
+
+ /* These were acquired in kmmio_handler(). */
+ ctx->active--;
+ BUG_ON(ctx->active);
+ rcu_read_unlock_sched_notrace();
+
+ /*
+ * if somebody else is singlestepping across a probe point, flags
+ * will have TF set, in which case, continue the remaining processing
+ * of do_debug, as if this is not a probe hit.
+ */
+ if (!(regs->msr & MSR_SE))
+ ret = 1;
+out:
+ return ret;
+}
+
+/* You must be holding kmmio_lock. */
+static int add_kmmio_fault_page(unsigned long addr)
+{
+ struct kmmio_fault_page *f;
+
+ f = get_kmmio_fault_page(addr);
+ if (f) {
+ if (!f->count)
+ arm_kmmio_fault_page(f);
+ f->count++;
+ return 0;
+ }
+
+ f = kzalloc(sizeof(*f), GFP_ATOMIC);
+ if (!f)
+ return -1;
+
+ f->count = 1;
+ f->addr = addr;
+
+ if (arm_kmmio_fault_page(f)) {
+ kfree(f);
+ return -1;
+ }
+
+ list_add_rcu(&f->list, kmmio_page_list(f->addr));
+
+ return 0;
+}
+
+/* You must be holding kmmio_lock. */
+static void release_kmmio_fault_page(unsigned long addr,
+ struct kmmio_fault_page **release_list)
+{
+ struct kmmio_fault_page *f;
+
+ f = get_kmmio_fault_page(addr);
+ if (!f)
+ return;
+
+ f->count--;
+ BUG_ON(f->count < 0);
+ if (!f->count) {
+ disarm_kmmio_fault_page(f);
+ if (!f->scheduled_for_release) {
+ f->release_next = *release_list;
+ *release_list = f;
+ f->scheduled_for_release = true;
+ }
+ }
+}
+
+/*
+ * With page-unaligned ioremaps, one or two armed pages may contain
+ * addresses from outside the intended mapping. Events for these addresses
+ * are currently silently dropped. The events may result only from programming
+ * mistakes by accessing addresses before the beginning or past the end of a
+ * mapping.
+ */
+int register_kmmio_probe(struct kmmio_probe *p)
+{
+ unsigned long flags;
+ int ret = 0;
+ unsigned long size = 0;
+ unsigned long addr = p->addr & PAGE_MASK;
+ const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+ unsigned int l;
+ pte_t *pte;
+
+ local_irq_save(flags);
+ arch_spin_lock(&kmmio_lock);
+ if (get_kmmio_probe(addr)) {
+ ret = -EEXIST;
+ goto out;
+ }
+
+ pte = lookup_address(addr, &l);
+ if (!pte) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ kmmio_count++;
+ list_add_rcu(&p->list, &kmmio_probes);
+ while (size < size_lim) {
+ if (add_kmmio_fault_page(addr + size))
+ pr_err("Unable to set page fault.\n");
+ size += page_level_size(l);
+ }
+out:
+ arch_spin_unlock(&kmmio_lock);
+ local_irq_restore(flags);
+
+ /*
+ * XXX: What should I do here?
+ * Here was a call to global_flush_tlb(), but it does not exist
+ * anymore. It seems it's not needed after all.
+ */
+ return ret;
+}
+EXPORT_SYMBOL(register_kmmio_probe);
+
+static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
+{
+ struct kmmio_delayed_release *dr = container_of(
+ head,
+ struct kmmio_delayed_release,
+ rcu);
+ struct kmmio_fault_page *f = dr->release_list;
+
+ while (f) {
+ struct kmmio_fault_page *next = f->release_next;
+
+ BUG_ON(f->count);
+ kfree(f);
+ f = next;
+ }
+ kfree(dr);
+}
+
+static void remove_kmmio_fault_pages(struct rcu_head *head)
+{
+ struct kmmio_delayed_release *dr =
+ container_of(head, struct kmmio_delayed_release, rcu);
+ struct kmmio_fault_page *f = dr->release_list;
+ struct kmmio_fault_page **prevp = &dr->release_list;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ arch_spin_lock(&kmmio_lock);
+ while (f) {
+ if (!f->count) {
+ list_del_rcu(&f->list);
+ prevp = &f->release_next;
+ } else {
+ *prevp = f->release_next;
+ f->release_next = NULL;
+ f->scheduled_for_release = false;
+ }
+ f = *prevp;
+ }
+ arch_spin_unlock(&kmmio_lock);
+ local_irq_restore(flags);
+
+ /* This is the real RCU destroy call. */
+ call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
+}
+
+/*
+ * Remove a kmmio probe. You have to synchronize_rcu() before you can be
+ * sure that the callbacks will not be called anymore. Only after that
+ * you may actually release your struct kmmio_probe.
+ *
+ * Unregistering a kmmio fault page has three steps:
+ * 1. release_kmmio_fault_page()
+ * Disarm the page, wait a grace period to let all faults finish.
+ * 2. remove_kmmio_fault_pages()
+ * Remove the pages from kmmio_page_table.
+ * 3. rcu_free_kmmio_fault_pages()
+ * Actually free the kmmio_fault_page structs as with RCU.
+ */
+void unregister_kmmio_probe(struct kmmio_probe *p)
+{
+ unsigned long flags;
+ unsigned long size = 0;
+ unsigned long addr = p->addr & PAGE_MASK;
+ const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+ struct kmmio_fault_page *release_list = NULL;
+ struct kmmio_delayed_release *drelease;
+ unsigned int l;
+ pte_t *pte;
+
+ pte = lookup_address(addr, &l);
+ if (!pte)
+ return;
+
+ local_irq_save(flags);
+ arch_spin_lock(&kmmio_lock);
+ while (size < size_lim) {
+ release_kmmio_fault_page(addr + size, &release_list);
+ size += page_level_size(l);
+ }
+ list_del_rcu(&p->list);
+ kmmio_count--;
+ arch_spin_unlock(&kmmio_lock);
+ local_irq_restore(flags);
+
+ if (!release_list)
+ return;
+
+ drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
+ if (!drelease)
+ return;
+
+ drelease->release_list = release_list;
+
+ /*
+ * This is not really RCU here. We have just disarmed a set of
+ * pages so that they cannot trigger page faults anymore. However,
+ * we cannot remove the pages from kmmio_page_table,
+ * because a probe hit might be in flight on another CPU. The
+ * pages are collected into a list, and they will be removed from
+ * kmmio_page_table when it is certain that no probe hit related to
+ * these pages can be in flight. RCU grace period sounds like a
+ * good choice.
+ *
+ * If we removed the pages too early, kmmio page fault handler might
+ * not find the respective kmmio_fault_page and determine it's not
+ * a kmmio fault, when it actually is. This would lead to madness.
+ */
+ call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
+}
+EXPORT_SYMBOL(unregister_kmmio_probe);
+
+static int
+kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
+{
+ struct die_args *arg = args;
+
+ if (val == DIE_SSTEP && post_kmmio_handler(0, arg->regs) == 1)
+ return NOTIFY_STOP;
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nb_die = {
+ .notifier_call = kmmio_die_notifier
+};
+
+int kmmio_init(void)
+{
+ int i;
+
+ for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
+ INIT_LIST_HEAD(&kmmio_page_table[i]);
+
+ return register_die_notifier(&nb_die);
+}
+
+void kmmio_cleanup(void)
+{
+ int i;
+
+ unregister_die_notifier(&nb_die);
+ for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
+ WARN_ONCE(!list_empty(&kmmio_page_table[i]),
+ pr_fmt("kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n"));
+ }
+}
diff --git a/arch/powerpc/mm/mmio-mod.c b/arch/powerpc/mm/mmio-mod.c
new file mode 100644
index 000000000000..68ba9f028678
--- /dev/null
+++ b/arch/powerpc/mm/mmio-mod.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Derived from arch/x86/mm/mmio-mod.c:
+ * Copyright (C) 2024 Jialong Yang (jialong.yang@...ngroup.cn)
+ */
+
+#define pr_fmt(fmt) "mmiotrace: " fmt
+
+#include <linux/moduleparam.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+#include <linux/mmiotrace.h>
+#include <linux/pgtable.h>
+#include <linux/atomic.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+
+#include "pf_in.h"
+#include "mmiotrace_arch.h"
+
+struct remap_trace {
+ struct list_head list;
+ struct kmmio_probe probe;
+ resource_size_t phys;
+ unsigned long id;
+};
+
+/* Accessed per-cpu. */
+static DEFINE_PER_CPU(struct trap_reason, pf_reason);
+static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace);
+
+static DEFINE_MUTEX(mmiotrace_mutex);
+static DEFINE_SPINLOCK(trace_lock);
+static atomic_t mmiotrace_enabled;
+static LIST_HEAD(trace_list); /* struct remap_trace */
+
+/*
+ * Locking in this file:
+ * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections.
+ * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex
+ * and trace_lock.
+ * - Routines depending on is_enabled() must take trace_lock.
+ * - trace_list users must hold trace_lock.
+ * - is_enabled() guarantees that mmio_trace_{rw,mapping} are allowed.
+ * - pre/post callbacks assume the effect of is_enabled() being true.
+ */
+
+/* module parameters */
+static unsigned long filter_offset;
+static bool nommiotrace;
+
+module_param(filter_offset, ulong, 0);
+module_param(nommiotrace, bool, 0);
+
+MODULE_PARM_DESC(filter_offset, "Start address of traced mappings.");
+MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing.");
+
+static bool is_enabled(void)
+{
+ return atomic_read(&mmiotrace_enabled);
+}
+
+static void print_pte(unsigned long address)
+{
+ unsigned int level;
+ pte_t *pte = lookup_address(address, &level);
+
+ if (!pte) {
+ pr_err("Error in %s: no pte for page 0x%08lx\n",
+ __func__, address);
+ return;
+ }
+
+ if (level == PMD_SHIFT) {
+ pr_emerg("4MB pages are not currently supported: 0x%08lx\n",
+ address);
+ BUG();
+ }
+ pr_info("pte for 0x%lx: 0x%llx 0x%llx\n",
+ address,
+ (unsigned long long)pte_val(*pte),
+ (unsigned long long)pte_val(*pte) & _PAGE_PRESENT);
+}
+
+/*
+ * For some reason the pre/post pairs have been called in an
+ * unmatched order. Report and die.
+ */
+static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
+{
+ const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
+
+ pr_emerg("unexpected fault for address: 0x%08lx, last fault for address: 0x%08lx\n",
+ addr, my_reason->addr);
+ print_pte(addr);
+ pr_emerg("faulting IP is at %pS\n", (void *)regs->nip);
+ pr_emerg("last faulting IP was at %pS\n", (void *)my_reason->ip);
+ put_cpu_var(pf_reason);
+ BUG();
+}
+
+static void pre(struct kmmio_probe *p, struct pt_regs *regs,
+ unsigned long addr)
+{
+ struct trap_reason *my_reason = &get_cpu_var(pf_reason);
+ struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace);
+ const unsigned long instptr = instruction_pointer(regs);
+ struct opcode_t *opcode = get_opcode((unsigned int *)instptr);
+ enum mm_io_opcode type = get_ins_type(opcode);
+ struct remap_trace *trace = p->private;
+
+ /* it doesn't make sense to have more than one active trace per cpu */
+ if (my_reason->active_traces)
+ die_kmmio_nesting_error(regs, addr);
+ else
+ my_reason->active_traces++;
+
+ if (!opcode) {
+ pr_warn("The ins may be not included in src. Tell the dever follow info:");
+ pr_warn("ins_addr: 0x%lx ins: 0x%lx", instptr, *(unsigned long *)instptr);
+ }
+
+ my_reason->opcode = opcode;
+
+ my_reason->addr = addr;
+ my_reason->ip = instptr;
+
+ my_trace->phys = addr - trace->probe.addr + trace->phys;
+ my_trace->map_id = trace->id;
+
+ my_trace->pc = instptr;
+
+ my_trace->opcode = type;
+ my_trace->width = get_ins_width(opcode);
+
+ if (type == MMIO_WRITE)
+ my_trace->value = get_ins_val(my_reason, regs);
+
+ put_cpu_var(cpu_trace);
+ put_cpu_var(pf_reason);
+}
+
+static void post(struct kmmio_probe *p, unsigned long condition,
+ struct pt_regs *regs)
+{
+ struct trap_reason *my_reason = &get_cpu_var(pf_reason);
+ struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace);
+ struct opcode_t *opcode = my_reason->opcode;
+ enum mm_io_opcode type = get_ins_type(opcode);
+
+ /* this should always return the active_trace count to 0 */
+ my_reason->active_traces--;
+ if (my_reason->active_traces) {
+ pr_emerg("unexpected post handler");
+ BUG();
+ }
+
+ if (type == MMIO_READ)
+ my_trace->value = get_ins_val(my_reason, regs);
+
+ mmio_trace_rw(my_trace);
+ put_cpu_var(cpu_trace);
+ put_cpu_var(pf_reason);
+}
+
+static void ioremap_trace_core(resource_size_t offset, unsigned long size,
+ void __iomem *addr)
+{
+ static atomic_t next_id;
+ struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL);
+ /* These are page-unaligned. */
+ struct mmiotrace_map map = {
+ .phys = offset,
+ .virt = (unsigned long)addr,
+ .len = size,
+ .opcode = MMIO_PROBE
+ };
+
+ if (!trace) {
+ pr_err("kmalloc failed in ioremap\n");
+ return;
+ }
+
+ *trace = (struct remap_trace) {
+ .probe = {
+ .addr = (unsigned long)addr,
+ .len = size,
+ .pre_handler = pre,
+ .post_handler = post,
+ .private = trace
+ },
+ .phys = offset,
+ .id = atomic_inc_return(&next_id)
+ };
+ map.map_id = trace->id;
+
+ spin_lock_irq(&trace_lock);
+ if (!is_enabled()) {
+ kfree(trace);
+ goto not_enabled;
+ }
+
+ mmio_trace_mapping(&map);
+ list_add_tail(&trace->list, &trace_list);
+ if (!nommiotrace)
+ register_kmmio_probe(&trace->probe);
+
+not_enabled:
+ spin_unlock_irq(&trace_lock);
+}
+
+void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
+ void __iomem *addr)
+{
+ pr_err("ioremap_*(0x%llx, 0x%lx) = %p\n",
+ (unsigned long long)offset, size, addr);
+ if (!is_enabled()) /* recheck and proper locking in *_core() */
+ return;
+
+ pr_debug("ioremap_*(0x%llx, 0x%lx) = %p\n",
+ (unsigned long long)offset, size, addr);
+ if ((filter_offset) && (offset != filter_offset))
+ return;
+ ioremap_trace_core(offset, size, addr);
+}
+
+static void iounmap_trace_core(volatile void __iomem *addr)
+{
+ struct mmiotrace_map map = {
+ .phys = 0,
+ .virt = (unsigned long)addr,
+ .len = 0,
+ .opcode = MMIO_UNPROBE
+ };
+ struct remap_trace *trace;
+ struct remap_trace *tmp;
+ struct remap_trace *found_trace = NULL;
+
+ pr_debug("Unmapping %p.\n", addr);
+
+ spin_lock_irq(&trace_lock);
+ if (!is_enabled())
+ goto not_enabled;
+
+ list_for_each_entry_safe(trace, tmp, &trace_list, list) {
+ if ((unsigned long)addr == trace->probe.addr) {
+ if (!nommiotrace)
+ unregister_kmmio_probe(&trace->probe);
+ list_del(&trace->list);
+ found_trace = trace;
+ break;
+ }
+ }
+ map.map_id = (found_trace) ? found_trace->id : -1;
+ mmio_trace_mapping(&map);
+
+not_enabled:
+ spin_unlock_irq(&trace_lock);
+ if (found_trace) {
+ synchronize_rcu(); /* unregister_kmmio_probe() requirement */
+ kfree(found_trace);
+ }
+}
+
+void mmiotrace_iounmap(volatile void __iomem *addr)
+{
+ might_sleep();
+ if (is_enabled()) /* recheck and proper locking in *_core() */
+ iounmap_trace_core(addr);
+}
+
+int mmiotrace_printk(const char *fmt, ...)
+{
+ int ret = 0;
+ va_list args;
+ unsigned long flags;
+
+ va_start(args, fmt);
+
+ spin_lock_irqsave(&trace_lock, flags);
+ if (is_enabled())
+ ret = mmio_trace_printk(fmt, args);
+ spin_unlock_irqrestore(&trace_lock, flags);
+
+ va_end(args);
+ return ret;
+}
+EXPORT_SYMBOL(mmiotrace_printk);
+
+static void clear_trace_list(void)
+{
+ struct remap_trace *trace;
+ struct remap_trace *tmp;
+
+ /*
+ * No locking required, because the caller ensures we are in a
+ * critical section via mutex, and is_enabled() is false,
+ * i.e. nothing can traverse or modify this list.
+ * Caller also ensures is_enabled() cannot change.
+ */
+ list_for_each_entry(trace, &trace_list, list) {
+ pr_notice("purging non-iounmapped trace @0x%08lx, size 0x%lx.\n",
+ trace->probe.addr, trace->probe.len);
+ if (!nommiotrace)
+ unregister_kmmio_probe(&trace->probe);
+ }
+ synchronize_rcu(); /* unregister_kmmio_probe() requirement */
+
+ list_for_each_entry_safe(trace, tmp, &trace_list, list) {
+ list_del(&trace->list);
+ kfree(trace);
+ }
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static cpumask_var_t downed_cpus;
+
+static void enter_uniprocessor(void)
+{
+ int cpu;
+ int err;
+
+ if (!cpumask_available(downed_cpus) &&
+ !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) {
+ pr_notice("Failed to allocate mask\n");
+ goto out;
+ }
+
+ cpus_read_lock();
+ cpumask_copy(downed_cpus, cpu_online_mask);
+ cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus);
+ if (num_online_cpus() > 1)
+ pr_notice("Disabling non-boot CPUs...\n");
+ cpus_read_unlock();
+
+ for_each_cpu(cpu, downed_cpus) {
+ err = remove_cpu(cpu);
+ if (!err)
+ pr_info("CPU%d is down.\n", cpu);
+ else
+ pr_err("Error taking CPU%d down: %d\n", cpu, err);
+ }
+out:
+ if (num_online_cpus() > 1)
+ pr_warn("multiple CPUs still online, may miss events.\n");
+}
+
+static void leave_uniprocessor(void)
+{
+ int cpu;
+ int err;
+
+ if (!cpumask_available(downed_cpus) || cpumask_empty(downed_cpus))
+ return;
+ pr_notice("Re-enabling CPUs...\n");
+ for_each_cpu(cpu, downed_cpus) {
+ err = add_cpu(cpu);
+ if (!err)
+ pr_info("enabled CPU%d.\n", cpu);
+ else
+ pr_err("cannot re-enable CPU%d: %d\n", cpu, err);
+ }
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+static void enter_uniprocessor(void)
+{
+ if (num_online_cpus() > 1)
+ pr_warn("multiple CPUs are online, may miss events. Suggest booting with maxcpus=1 kernel argument.\n");
+}
+
+static void leave_uniprocessor(void)
+{
+}
+#endif
+
+void enable_mmiotrace(void)
+{
+ mutex_lock(&mmiotrace_mutex);
+ if (is_enabled())
+ goto out;
+
+ if (nommiotrace)
+ pr_info("MMIO tracing disabled.\n");
+ kmmio_init();
+ enter_uniprocessor();
+ spin_lock_irq(&trace_lock);
+ atomic_inc(&mmiotrace_enabled);
+ spin_unlock_irq(&trace_lock);
+ pr_info("enabled.\n");
+out:
+ mutex_unlock(&mmiotrace_mutex);
+}
+
+void disable_mmiotrace(void)
+{
+ mutex_lock(&mmiotrace_mutex);
+ if (!is_enabled())
+ goto out;
+
+ spin_lock_irq(&trace_lock);
+ atomic_dec(&mmiotrace_enabled);
+ BUG_ON(is_enabled());
+ spin_unlock_irq(&trace_lock);
+
+ clear_trace_list(); /* guarantees: no more kmmio callbacks */
+ leave_uniprocessor();
+ kmmio_cleanup();
+ pr_info("disabled.\n");
+out:
+ mutex_unlock(&mmiotrace_mutex);
+}
diff --git a/arch/powerpc/mm/mmiotrace_arch.c b/arch/powerpc/mm/mmiotrace_arch.c
new file mode 100644
index 000000000000..ccc8032384ef
--- /dev/null
+++ b/arch/powerpc/mm/mmiotrace_arch.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Derived from arch/powerpc/mm/pgtable.c:
+ * Copyright (C) 2024 Jialong Yang (jialong.yang@...ngroup.cn)
+ */
+
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/hugetlb.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/hugetlb.h>
+
+#include "mmiotrace_arch.h"
+
+static pte_t *mmiotrace_find_linux_pte(pgd_t *pgdp, unsigned long ea,
+ bool *is_thp, unsigned int *hpage_shift)
+{
+ p4d_t p4d, *p4dp;
+ pud_t pud, *pudp;
+ pmd_t pmd, *pmdp;
+ pte_t *ret_pte;
+ hugepd_t *hpdp = NULL;
+ unsigned int pdshift;
+
+ if (hpage_shift)
+ *hpage_shift = 0;
+
+ if (is_thp)
+ *is_thp = false;
+
+ /*
+ * Always operate on the local stack value. This make sure the
+ * value don't get updated by a parallel THP split/collapse,
+ * page fault or a page unmap. The return pte_t * is still not
+ * stable. So should be checked there for above conditions.
+ * Top level is an exception because it is folded into p4d.
+ */
+ p4dp = p4d_offset(pgdp, ea);
+ p4d = READ_ONCE(*p4dp);
+ pdshift = P4D_SHIFT;
+
+ if (p4d_none(p4d))
+ return NULL;
+
+ if (p4d_leaf(p4d)) {
+ ret_pte = (pte_t *)p4dp;
+ goto out;
+ }
+
+ if (is_hugepd(__hugepd(p4d_val(p4d)))) {
+ hpdp = (hugepd_t *)&p4d;
+ goto out_huge;
+ }
+
+ /*
+ * Even if we end up with an unmap, the pgtable will not
+ * be freed, because we do an rcu free and here we are
+ * irq disabled
+ */
+ pdshift = PUD_SHIFT;
+ pudp = pud_offset(&p4d, ea);
+ pud = READ_ONCE(*pudp);
+
+ if (pud_none(pud))
+ return NULL;
+
+ if (pud_leaf(pud)) {
+ ret_pte = (pte_t *)pudp;
+ goto out;
+ }
+
+ if (is_hugepd(__hugepd(pud_val(pud)))) {
+ hpdp = (hugepd_t *)&pud;
+ goto out_huge;
+ }
+
+ pdshift = PMD_SHIFT;
+ pmdp = pmd_offset(&pud, ea);
+ pmd = READ_ONCE(*pmdp);
+
+ /*
+ * A hugepage collapse is captured by this condition, see
+ * pmdp_collapse_flush.
+ */
+ if (pmd_none(pmd))
+ return NULL;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * A hugepage split is captured by this condition, see
+ * pmdp_invalidate.
+ *
+ * Huge page modification can be caught here too.
+ */
+ if (pmd_is_serializing(pmd))
+ return NULL;
+#endif
+
+ if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
+ if (is_thp)
+ *is_thp = true;
+ ret_pte = (pte_t *)pmdp;
+ goto out;
+ }
+
+ if (pmd_leaf(pmd)) {
+ ret_pte = (pte_t *)pmdp;
+ goto out;
+ }
+
+ if (is_hugepd(__hugepd(pmd_val(pmd)))) {
+ hpdp = (hugepd_t *)&pmd;
+ goto out_huge;
+ }
+
+ pdshift = PAGE_SHIFT;
+
+ if (hpage_shift)
+ *hpage_shift = pdshift;
+
+ return pte_offset_kernel(&pmd, ea);
+
+out_huge:
+ if (!hpdp)
+ return NULL;
+
+ ret_pte = hugepte_offset(*hpdp, ea, pdshift);
+ pdshift = hugepd_shift(*hpdp);
+out:
+ if (hpage_shift)
+ *hpage_shift = pdshift;
+ return ret_pte;
+}
+
+pte_t *lookup_address(unsigned long address, unsigned int *shift)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ pte_t *pte = mmiotrace_find_linux_pte(pgd_offset_k(address), address, NULL, shift);
+
+ local_irq_restore(flags);
+
+ return pte;
+}
diff --git a/arch/powerpc/mm/mmiotrace_arch.h b/arch/powerpc/mm/mmiotrace_arch.h
new file mode 100644
index 000000000000..f4a5bff24a07
--- /dev/null
+++ b/arch/powerpc/mm/mmiotrace_arch.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Derived from arch/powerpc/mm/pgtable.c:
+ * Copyright (C) 2024 Jialong Yang (jialong.yang@...ngroup.cn)
+ */
+
+#ifndef __MMIOTRACE_ARCH_
+#define __MMIOTRACE_ARCH_
+#include <asm/pgtable.h>
+
+static inline int page_level_shift(unsigned int level)
+{
+ return level;
+}
+static inline unsigned long page_level_size(unsigned int level)
+{
+ return 1UL << page_level_shift(level);
+}
+static inline unsigned long page_level_mask(unsigned int level)
+{
+ return ~(page_level_size(level) - 1);
+}
+
+pte_t *lookup_address(unsigned long address, unsigned int *level);
+#endif // __MMIOTRACE_ARCH_
diff --git a/arch/powerpc/mm/pf_in.c b/arch/powerpc/mm/pf_in.c
new file mode 100644
index 000000000000..e6c90b383e7f
--- /dev/null
+++ b/arch/powerpc/mm/pf_in.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Derived from arch/x86/mm/pf_in.c:
+ * Copyright (C) 2024 Jialong Yang (jialong.yang@...ngroup.cn)
+ */
+
+#include <linux/ptrace.h> /* struct pt_regs */
+#include "pf_in.h"
+#include <linux/printk.h>
+#include <linux/mmiotrace.h>
+
+/* D 32 0x80000000 B lwz Load Word and Zero */
+/* D 33 0x84000000 B lwz Load Word and Zero with Update */
+/* D 34 0x88000000 B lbz Load Byte and Zero */
+/* D 33 0x8C000000 B lbzu Load Word and Zero with Update */
+/* D 35 0x90000000 B stw Store Word */
+/* D 36 0x94000000 B stwu Store Word with Update */
+/* D 37 0x98000000 B stb Store Byte */
+/* D 38 0x9C000000 B stbu Store Byte with Update */
+/* D 40 0xA0000000 B lhz Load Halfword and Zero with Update */
+/* D 41 0xA4000000 B lhzu Load Halfword and Zero with Update */
+/* D 42 0xA8000000 B lha Load Halfword Algebraic */
+/* D 43 0xAC000000 B lhau Load Halfword Algebraic with Update */
+/* D 44 0xB0000000 B sth Store Halfword */
+/* D 45 0xB4000000 B sthu Store Halfword with Update */
+/* D 46 0xB8000000 B lmw Load Multiple Word */
+/* D 47 0xBC000000 B stmw Store Multiple Word */
+/* D 48 0xC0000000 FP lfs Load Floating-Point Single */
+/* D 49 0xC4000000 FP lfsu Load Floating-Point Single with Update */
+/* D 50 0xC8000000 FP lfd Load Floating-Point Double */
+/* D 51 0xCC000000 FP lfdu Load Floating-Point Double with Update */
+/* D 52 0xD0000000 FP stfs Store Floating-Point Single */
+/* D 53 0xD4000000 FP stfsu Store Floating-Point Single with Update */
+/* D 54 0xD8000000 FP stfd Store Floating-Point Double */
+/* D 55 0xDC000000 FP stfdu Store Floating-Point Double with Update */
+/* DQ 56 0xE0000000 P 58 LSQ lq Load Quadword */
+/* DS 57 0xE4000000 140 FP.out Lfdp Load Floating-Point Double Pair */
+/* DS 58 0xE8000000 53 64 Ldu Load Doubleword with Update */
+/* DS 58 0xE8000001 53 64 Ld Load Doubleword */
+/* DS 58 0xE8000002 52 64 Lwa Load Word Algebraic */
+/* DS 62 0xF8000000 57 64 std Store Doubleword */
+/* DS 62 0xF8000001 57 64 stdu Store Doubleword with Update */
+/* DS 62 0xF8000002 59 LSQ stq Store Quadword */
+
+// D-form:
+// 0-5 6-10 11-15 16-31
+// opcode RT RA Offset
+
+// DQ-form:
+// 0-5 6-10 11-15 16-27
+// opcode RT RA Offset
+
+// DS-form:
+// 0-5 6-10 11-15 16-29 30-31
+// opcode RT RA Offset opcode
+
+#define D_OPCODE_MASK GENMASK(31, 26)
+#define DQ_OPCODE_MASK D_OPCODE_MASK
+#define DS_OPCODE_MASK (D_OPCODE_MASK | GENMASK(0, 1))
+#define RS_RT_OFFSET 21UL
+#define RS_RT_MASK GENMASK(25, 21)
+#define RA_MASK GENMASK(20, 16)
+#define D_OFFSET GENMASK(15, 0)
+#define DQ_OFFSET GENMASK(15, 4)
+#define DS_OFFSET GENMASK(15, 2)
+
+struct opcode_t opcodes[] = {
+ {0x80000000, D_FORMAT, "lwz", },
+ {0x84000000, D_FORMAT, "lwzu", },
+ {0x88000000, D_FORMAT, "lbz", },
+ {0x8C000000, D_FORMAT, "lbzu", },
+ {0x90000000, D_FORMAT, "stw", },
+ {0x94000000, D_FORMAT, "stwu", },
+ {0x98000000, D_FORMAT, "stb", },
+ {0x9C000000, D_FORMAT, "stbu", },
+ {0xA0000000, D_FORMAT, "lhz", },
+ {0xA4000000, D_FORMAT, "lhzu", },
+ {0xA8000000, D_FORMAT, "lha", },
+ {0xAC000000, D_FORMAT, "lhau", },
+ {0xB0000000, D_FORMAT, "sth", },
+ {0xB4000000, D_FORMAT, "sthu", },
+ {0xB8000000, D_FORMAT, "lmw", },
+ {0xBC000000, D_FORMAT, "stmw", },
+ {0xC0000000, D_FORMAT, "lfs", },
+ {0xC4000000, D_FORMAT, "lfsu", },
+ {0xC8000000, D_FORMAT, "lfd", },
+ {0xCC000000, D_FORMAT, "lfdu", },
+ {0xD0000000, D_FORMAT, "stfs", },
+ {0xD4000000, D_FORMAT, "stfsu", },
+ {0xD8000000, D_FORMAT, "stfd", },
+ {0xDC000000, D_FORMAT, "stfdu", },
+ {0xE0000000, DQ_FORMAT, "lq", },
+ {0xE4000000, DS_FORMAT, "lfdp", },
+ {0xE8000000, DS_FORMAT, "ldu", },
+ {0xE8000001, DS_FORMAT, "ld", },
+ {0xE8000002, DS_FORMAT, "lwa", },
+ {0xF8000000, DS_FORMAT, "std", },
+ {0xF8000001, DS_FORMAT, "stdu", },
+ {0xF8000002, DS_FORMAT, "stq", }
+};
+
+struct opcode_t *get_opcode(unsigned int *addr)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(opcodes); i++) {
+ switch (opcodes[i].form) {
+ case D_FORMAT:
+ if (opcodes[i].opcode == (*addr & D_OPCODE_MASK))
+ return &opcodes[i];
+ break;
+ case DQ_FORMAT:
+ if (opcodes[i].opcode == (*addr & DQ_OPCODE_MASK))
+ return &opcodes[i];
+ break;
+ case DS_FORMAT:
+ if (opcodes[i].opcode == (*addr & DQ_OPCODE_MASK))
+ return &opcodes[i];
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+inline enum mm_io_opcode get_ins_type(struct opcode_t *opcode)
+{
+ if (!opcode)
+ return MMIO_UNKNOWN_OP;
+
+ if (opcode->name[0] == 'l')
+ return MMIO_READ;
+
+ if (opcode->name[0] == 's')
+ return MMIO_WRITE;
+
+ return MMIO_UNKNOWN_OP;
+}
+
+unsigned int get_ins_width(struct opcode_t *opcode)
+{
+ char width_ch;
+
+ if (!opcode)
+ return 0;
+
+ if (opcode->name[0] == 'l')
+ width_ch = opcode->name[1];
+
+ if (opcode->name[0] == 's')
+ width_ch = opcode->name[2];
+
+ switch (width_ch) {
+ case 'b': /* byte */
+ return 1;
+ case 'h': /* half word */
+ return sizeof(long) / 2;
+ case 'w': /* word */
+ /* return sizeof(long); */
+ case 'm': /* multi words(can be calculated out by (32-RT) * sizeof(long)) */
+ case 'f': /* float(not too much. So ignore word number) */
+ case 'd': /* double words */
+ /* return 2 * sizeof(long); */
+ case 'q': /* quad words */
+ /* return 4 * sizeof(long); */
+ default:
+ return sizeof(long);
+ }
+}
+
+unsigned long get_ins_val(struct trap_reason *reason, struct pt_regs *regs)
+{
+ struct opcode_t *opcode = reason->opcode;
+ unsigned int ins = *(unsigned int *)(reason->ip);
+ unsigned int reg_no;
+ unsigned long mask = ~0UL;
+
+ if (!opcode)
+ return 0;
+
+ mask >>= 8 * (sizeof(long) - get_ins_width(opcode));
+ reg_no = (ins & RS_RT_MASK) >> RS_RT_OFFSET;
+
+ return regs->gpr[reg_no] & mask;
+}
diff --git a/arch/powerpc/mm/pf_in.h b/arch/powerpc/mm/pf_in.h
new file mode 100644
index 000000000000..905ba4937137
--- /dev/null
+++ b/arch/powerpc/mm/pf_in.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Derived from arch/x86/mm/pf_in.h:
+ * Copyright (C) 2024 Jialong Yang (jialong.yang@...ngroup.cn)
+ */
+
+#ifndef __PF_H_
+#define __PF_H_
+
+enum OPCODE_FORMAT {
+ D_FORMAT,
+ DQ_FORMAT,
+ DS_FORMAT,
+};
+
+struct opcode_t {
+ unsigned int opcode;
+ enum OPCODE_FORMAT form;
+ const char *name;
+};
+
+struct trap_reason {
+ unsigned long addr;
+ unsigned long ip;
+ struct opcode_t *opcode;
+ int active_traces;
+};
+
+struct opcode_t *get_opcode(unsigned int *addr);
+enum mm_io_opcode get_ins_type(struct opcode_t *opcode);
+unsigned int get_ins_width(struct opcode_t *opcode);
+unsigned long get_ins_val(struct trap_reason *reason, struct pt_regs *regs);
+#endif /* __PF_H_ */
--
2.34.1
Powered by blists - more mailing lists