[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250525121939.43112-1-weiguixiong@bytedance.com>
Date: Sun, 25 May 2025 20:19:39 +0800
From: Guixiong Wei <weiguixiong@...edance.com>
To: wim@...ux-watchdog.org,
linux@...ck-us.net
Cc: linux-watchdog@...r.kernel.org,
linux-kernel@...r.kernel.org,
weiguixiong@...edance.com
Subject: [RFC] watchdog: Introduce panic notifier for watchdog device on kdump
The watchdog device is not stop after kenrel crash. But the kexec
kernel may not enable watchdog device. This will interrupt the
long-time kdump process(e.g., TDX VMs with large-memory).
So introduce panic notifer for watchdog device. When kernel crashes,
the handler invokes the watchdog stop callback, then the kdump
process will not be interrupted by the watchdog device.
Signed-off-by: Guixiong Wei <weiguixiong@...edance.com>
---
drivers/watchdog/watchdog_core.c | 22 ++++++++++++++++++++++
include/linux/watchdog.h | 9 +++++++++
2 files changed, 31 insertions(+)
diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c
index 6152dba4b52c..7a1ad9935bf7 100644
--- a/drivers/watchdog/watchdog_core.c
+++ b/drivers/watchdog/watchdog_core.c
@@ -36,6 +36,7 @@
#include <linux/of.h> /* For of_alias_get_id */
#include <linux/property.h> /* For device_property_read_u32 */
#include <linux/suspend.h>
+#include <linux/panic_notifier.h>
#include "watchdog_core.h" /* For watchdog_dev_register/... */
@@ -155,6 +156,22 @@ int watchdog_init_timeout(struct watchdog_device *wdd,
}
EXPORT_SYMBOL_GPL(watchdog_init_timeout);
+static int watchdog_panic_notifier(struct notifier_block *nb, unsigned long code, void *data)
+{
+ struct watchdog_device *wdd;
+
+ wdd = container_of(nb, struct watchdog_device, panic_nb);
+ if (watchdog_hw_running(wdd)) {
+ int ret;
+
+ ret = wdd->ops->stop(wdd);
+ if (ret)
+ return NOTIFY_BAD;
+ }
+
+ return NOTIFY_DONE;
+}
+
static int watchdog_reboot_notifier(struct notifier_block *nb,
unsigned long code, void *data)
{
@@ -334,6 +351,11 @@ static int ___watchdog_register_device(struct watchdog_device *wdd)
wdd->id, ret);
}
+ if (test_bit(WDOG_STOP_ON_PANIC, &wdd->status)) {
+ wdd->panic_nb.notifier_call = watchdog_panic_notifier;
+ atomic_notifier_chain_register(&panic_notifier_list, &wdd->panic_nb);
+ }
+
return 0;
}
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 99660197a36c..2a74373aed28 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -79,6 +79,7 @@ struct watchdog_ops {
* Replaces max_timeout if specified.
* @reboot_nb: The notifier block to stop watchdog on reboot.
* @restart_nb: The notifier block to register a restart function.
+ * @panic_nb: The notifier block to register a panic function.
* @driver_data:Pointer to the drivers private data.
* @wd_data: Pointer to watchdog core internal data.
* @status: Field that contains the devices internal status bits.
@@ -107,6 +108,7 @@ struct watchdog_device {
unsigned int max_hw_heartbeat_ms;
struct notifier_block reboot_nb;
struct notifier_block restart_nb;
+ struct notifier_block panic_nb;
struct notifier_block pm_nb;
void *driver_data;
struct watchdog_core_data *wd_data;
@@ -118,6 +120,7 @@ struct watchdog_device {
#define WDOG_HW_RUNNING 3 /* True if HW watchdog running */
#define WDOG_STOP_ON_UNREGISTER 4 /* Should be stopped on unregister */
#define WDOG_NO_PING_ON_SUSPEND 5 /* Ping worker should be stopped on suspend */
+#define WDOG_STOP_ON_PANIC 6 /* Should be stopped on panic */
struct list_head deferred;
};
@@ -146,6 +149,12 @@ static inline void watchdog_set_nowayout(struct watchdog_device *wdd, bool noway
set_bit(WDOG_NO_WAY_OUT, &wdd->status);
}
+/* Use the following function to stop the watchdog on panic */
+static inline void watchdog_stop_on_panic(struct watchdog_device *wdd)
+{
+ set_bit(WDOG_STOP_ON_PANIC, &wdd->status);
+}
+
/* Use the following function to stop the watchdog on reboot */
static inline void watchdog_stop_on_reboot(struct watchdog_device *wdd)
{
--
2.39.5 (Apple Git-154)
Powered by blists - more mailing lists