[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230329082950.726-1-cuiyunhui@bytedance.com>
Date: Wed, 29 Mar 2023 16:29:50 +0800
From: Yunhui Cui <cuiyunhui@...edance.com>
To: conor.dooley@...rochip.com, paul.walmsley@...ive.com,
palmer@...belt.com, aou@...s.berkeley.edu, peterz@...radead.org,
mpe@...erman.id.au, jpoimboe@...nel.org, mark.rutland@....com,
svens@...ux.ibm.com, cuiyunhui@...edance.com, guoren@...nel.org,
jszhang@...nel.org, ebiederm@...ssion.com, bjorn@...osinc.com,
heiko@...ech.de, xianting.tian@...ux.alibaba.com,
mnissler@...osinc.com, linux-riscv@...ts.infradead.org,
linux-kernel@...r.kernel.org
Subject: [PATCH v2] riscv: Dump user opcode bytes on fatal faults
We encountered such a problem that when the system starts to execute
init, init exits unexpectedly with error message: "unhandled signal 4
code 0x1 ...".
We are more curious about which instruction execution caused the
exception. After dumping it through show_opcodes(), we found that it
was caused by a floating-point instruction.
In this way, we found the problem: in the system bringup , it is
precisely that we have not enabled the floating point function(CONFIG_FPU
is set, but not enalbe COMPAT_HWCAP_ISA_F/D in the dts or acpi).
Like commit ba54d856a9d8 ("x86/fault: Dump user opcode bytes on fatal
faults"), when an exception occurs, it is necessary to dump the
instruction that caused the exception.
Signed-off-by: Yunhui Cui <cuiyunhui@...edance.com>
---
arch/riscv/include/asm/bug.h | 1 +
arch/riscv/kernel/process.c | 30 ++++++++++++++++++++++++++++++
arch/riscv/kernel/traps.c | 1 +
3 files changed, 32 insertions(+)
diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h
index 1aaea81fb141..56dab998d05d 100644
--- a/arch/riscv/include/asm/bug.h
+++ b/arch/riscv/include/asm/bug.h
@@ -86,6 +86,7 @@ struct pt_regs;
struct task_struct;
void __show_regs(struct pt_regs *regs);
+void show_opcodes(struct pt_regs *regs);
void die(struct pt_regs *regs, const char *str);
void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr);
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 774ffde386ab..9ba9f8719605 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -83,6 +83,36 @@ void show_regs(struct pt_regs *regs)
dump_backtrace(regs, NULL, KERN_DEFAULT);
}
+static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src,
+ unsigned int nbytes)
+{
+ if (!user_mode(regs))
+ return copy_from_kernel_nofault(buf, (u8 *)src, nbytes);
+
+ /* The user space code from other tasks cannot be accessed. */
+ if (regs != task_pt_regs(current))
+ return -EPERM;
+
+ return copy_from_user_nofault(buf, (void __user *)src, nbytes);
+}
+
+void show_opcodes(struct pt_regs *regs)
+{
+ u8 opcodes[4];
+
+ switch (copy_code(regs, opcodes, regs->epc, sizeof(opcodes))) {
+ case 0:
+ pr_info("Opcode: %4ph", opcodes);
+ break;
+ case -EPERM:
+ pr_err("Unable to access userspace of other tasks");
+ break;
+ default:
+ pr_err("Failed to access opcode");
+ break;
+ }
+}
+
#ifdef CONFIG_COMPAT
static bool compat_mode_supported __read_mostly;
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index f6fda94e8e59..892826234ee9 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -100,6 +100,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
print_vma_addr(KERN_CONT " in ", instruction_pointer(regs));
pr_cont("\n");
__show_regs(regs);
+ show_opcodes(regs);
}
force_sig_fault(signo, code, (void __user *)addr);
--
2.20.1
Powered by blists - more mailing lists