[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <24d73707-f0b5-23aa-e7c1-f9f8a72fe5cd@roeck-us.net>
Date: Sun, 3 Apr 2022 04:47:30 -0700
From: Guenter Roeck <linux@...ck-us.net>
To: Eliav Farber <farbere@...zon.com>, wim@...ux-watchdog.org,
linux-watchdog@...r.kernel.org, linux-kernel@...r.kernel.org
Cc: ronenk@...zon.com, talel@...zon.com, hhhawa@...zon.com,
jonnyc@...zon.com, hanochu@...zon.com, dwmw@...zon.co.uk
Subject: Re: [PATCH] watchdog: sp805: add support for irq
On 4/2/22 22:24, Eliav Farber wrote:
> This change adds registration to IRQ for the sp805 watchdog.
> Before this change there was no notification for watchdog
> barking and the only thing that the HW watchdog did was to
> directly restart the CPU.
>
> With this change, upon IRQ the driver will call panic() which
> in turn might initiate kdump (in case configured). In case the
> dying process (like kdump collection) will hang, the hardware
> watchdog will still directly restart the CPU on a second timeout.
>
> The driver used to cut the specified timeout in half when setting
> the watchdog timeout, since it was ignoring the IRQ that occurred
> the first time the timeout expired. We now use the timeout as is
> since the watchdog IRQ will go off after the configured interval.
>
The functionality is quite similar to pretimeout. dw_wdt implements
pretty much the same functionality this way. Why not use that mechanism ?
Guenter
> Signed-off-by: Talel Shenhar <talel@...zon.com>
> Signed-off-by: Eliav Farber <farbere@...zon.com>
> ---
> drivers/watchdog/sp805_wdt.c | 102 +++++++++++++++++++++++++++++++----
> 1 file changed, 93 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c
> index d8876fba686d..09223aed87e3 100644
> --- a/drivers/watchdog/sp805_wdt.c
> +++ b/drivers/watchdog/sp805_wdt.c
> @@ -17,6 +17,7 @@
> #include <linux/amba/bus.h>
> #include <linux/bitops.h>
> #include <linux/clk.h>
> +#include <linux/interrupt.h>
> #include <linux/io.h>
> #include <linux/ioport.h>
> #include <linux/kernel.h>
> @@ -78,6 +79,11 @@ module_param(nowayout, bool, 0);
> MODULE_PARM_DESC(nowayout,
> "Set to 1 to keep watchdog running after device release");
>
> +static bool skip_panic_skip_reset_on_timeout;
> +module_param(skip_panic_skip_reset_on_timeout, bool, 0600);
> +MODULE_PARM_DESC(skip_panic_skip_reset_on_timeout,
> + "For skipping panic and skipping reset on timeout, set this to Y/1");
> +
> /* returns true if wdt is running; otherwise returns false */
> static bool wdt_is_running(struct watchdog_device *wdd)
> {
> @@ -87,7 +93,14 @@ static bool wdt_is_running(struct watchdog_device *wdd)
> return (wdtcontrol & ENABLE_MASK) == ENABLE_MASK;
> }
>
> -/* This routine finds load value that will reset system in required timout */
> +/*
> + * This routine finds load value that will reset or panic the system in the
> + * required timeout.
> + * It also calculates the actual timeout, which can differ from the input
> + * timeout if load value is not in the range of LOAD_MIN and LOAD_MAX.
> + * When panic is enabled it calculates the timeout till the panic, and when it
> + * is disabled it calculates the timeout till the reset.
> + */
> static int wdt_setload(struct watchdog_device *wdd, unsigned int timeout)
> {
> struct sp805_wdt *wdt = watchdog_get_drvdata(wdd);
> @@ -98,10 +111,21 @@ static int wdt_setload(struct watchdog_device *wdd, unsigned int timeout)
> /*
> * sp805 runs counter with given value twice, after the end of first
> * counter it gives an interrupt and then starts counter again. If
> - * interrupt already occurred then it resets the system. This is why
> - * load is half of what should be required.
> + * interrupt already occurred then it resets the system.
> */
> - load = div_u64(rate, 2) * timeout - 1;
> + if (wdt->adev->irq[0]) {
> + /*
> + * Set load value based on a single timeout until we handle
> + * the interrupt.
> + */
> + load = rate * timeout - 1;
> + } else {
> + /*
> + * Set load value to half the timeout, since the interrupt is
> + * ignored and counter must expire twice before CPU is reset.
> + */
> + load = div_u64(rate, 2) * timeout - 1;
> + }
>
> load = (load > LOAD_MAX) ? LOAD_MAX : load;
> load = (load < LOAD_MIN) ? LOAD_MIN : load;
> @@ -109,13 +133,19 @@ static int wdt_setload(struct watchdog_device *wdd, unsigned int timeout)
> spin_lock(&wdt->lock);
> wdt->load_val = load;
> /* roundup timeout to closest positive integer value */
> - wdd->timeout = div_u64((load + 1) * 2 + (rate / 2), rate);
> + if (wdt->adev->irq[0])
> + wdd->timeout = div_u64((load + 1) + (rate / 2), rate);
> + else
> + wdd->timeout = div_u64((load + 1) * 2 + (rate / 2), rate);
> spin_unlock(&wdt->lock);
>
> return 0;
> }
>
> -/* returns number of seconds left for reset to occur */
> +/*
> + * returns number of seconds left for reset to occur, or returns number of
> + * seconds left for panic to occur when enabled.
> + */
> static unsigned int wdt_timeleft(struct watchdog_device *wdd)
> {
> struct sp805_wdt *wdt = watchdog_get_drvdata(wdd);
> @@ -124,9 +154,18 @@ static unsigned int wdt_timeleft(struct watchdog_device *wdd)
> spin_lock(&wdt->lock);
> load = readl_relaxed(wdt->base + WDTVALUE);
>
> - /*If the interrupt is inactive then time left is WDTValue + WDTLoad. */
> - if (!(readl_relaxed(wdt->base + WDTRIS) & INT_MASK))
> - load += wdt->load_val + 1;
> + /*
> + * When panic is enabled, it occurs after the first time that sp805
> + * runs the counter with the given load value.
> + * Otherwise, reset happens after sp805 runs the counter with given
> + * value twice (once before the interrupt and second after the
> + * interrupt), so if the interrupt is inactive (first count) then time
> + * left is WDTValue + WDTLoad.
> + */
> + if (!wdt->adev->irq[0])
> + if (!(readl_relaxed(wdt->base + WDTRIS) & INT_MASK))
> + load += wdt->load_val + 1;
> +
> spin_unlock(&wdt->lock);
>
> return div_u64(load, wdt->rate);
> @@ -227,6 +266,29 @@ static const struct watchdog_ops wdt_ops = {
> .restart = wdt_restart,
> };
>
> +static irqreturn_t wdt_irq_handler(int irq, void *dev_id)
> +{
> + struct device *dev = dev_id;
> + struct sp805_wdt *wdt = dev_get_drvdata(dev);
> +
> + /*
> + * Reset the watchdog timeout to maximize time available to the panic
> + * handler before the watchdog induces a CPU reset. Without resetting
> + * the watchdog here, it would have a single timeout remaining between
> + * interrupt and hardware reset. By resetting the timeout, the panic
> + * handler can run with interrupts disabled for two watchdog timeouts,
> + * ignoring the interrupt that would occur after the first timeout.
> + */
> + wdt_config(&wdt->wdd, true);
> +
> + if (skip_panic_skip_reset_on_timeout)
> + dev_warn(dev, "watchdog timeout expired\n");
> + else
> + panic("watchdog timeout expired");
> +
> + return IRQ_HANDLED;
> +}
> +
> static int
> sp805_wdt_probe(struct amba_device *adev, const struct amba_id *id)
> {
> @@ -299,9 +361,28 @@ sp805_wdt_probe(struct amba_device *adev, const struct amba_id *id)
> }
> amba_set_drvdata(adev, wdt);
>
> + if (adev->irq[0]) {
> + ret = devm_request_irq(&adev->dev,
> + adev->irq[0],
> + wdt_irq_handler,
> + 0,
> + MODULE_NAME,
> + adev);
> + if (ret) {
> + dev_err(&adev->dev,
> + "watchdog irq request failed: %d\n",
> + ret);
> + goto err_request_irq;
> + }
> + } else {
> + dev_warn(&adev->dev, "no irq exists for watchdog\n");
> + }
> +
> dev_info(&adev->dev, "registration successful\n");
> return 0;
>
> +err_request_irq:
> + watchdog_unregister_device(&wdt->wdd);
> err:
> dev_err(&adev->dev, "Probe Failed!!!\n");
> return ret;
> @@ -311,6 +392,9 @@ static int sp805_wdt_remove(struct amba_device *adev)
> {
> struct sp805_wdt *wdt = amba_get_drvdata(adev);
>
> + if (adev->irq[0])
> + devm_free_irq(&adev->dev, adev->irq[0], adev);
> +
> watchdog_unregister_device(&wdt->wdd);
> watchdog_set_drvdata(&wdt->wdd, NULL);
>
Powered by blists - more mailing lists