diff mbox series

[v4,2/2] drivers: watchdog: Add support for panic notifier callback

Message ID 20250305101025.2279951-3-george.cherian@marvell.com (mailing list archive)
State New
Headers show
Series Add stop_on_panic support for watchdog | expand

Commit Message

George Cherian March 5, 2025, 10:10 a.m. UTC
Watchdog is not turned off in kernel panic situation.
In certain systems this might prevent the successful loading
of kdump kernel. The kdump kernel might hit a watchdog reset
while it is booting.

To avoid such scenarios add a panic notifier call back function
which can stop the watchdog. This provision can be enabled by
passing watchdog.stop_on_panic=1 via kernel command-line parameter.

Signed-off-by: George Cherian <george.cherian@marvell.com>
---
 drivers/watchdog/watchdog_core.c | 35 ++++++++++++++++++++++++++++++++
 include/linux/watchdog.h         |  2 ++
 2 files changed, 37 insertions(+)

Comments

Andy Shevchenko March 5, 2025, 10:33 a.m. UTC | #1
On Wed, Mar 05, 2025 at 10:10:25AM +0000, George Cherian wrote:
> Watchdog is not turned off in kernel panic situation.
> In certain systems this might prevent the successful loading
> of kdump kernel. The kdump kernel might hit a watchdog reset
> while it is booting.
> 
> To avoid such scenarios add a panic notifier call back function
> which can stop the watchdog. This provision can be enabled by
> passing watchdog.stop_on_panic=1 via kernel command-line parameter.

...

First of all, do we really need a new module parameter for that? Why can't it
be done automatically if kdump is expected?

> +static bool stop_on_panic;
> +module_param(stop_on_panic, bool, 0444);
> +MODULE_PARM_DESC(stop_on_panic, "Stop watchdogs on panic (0=keep watching, 1=stop)");

+ blank line.

Also I do not see the documentation update. Where is it lost?

>  /*
Guenter Roeck March 6, 2025, midnight UTC | #2
On 3/5/25 02:33, Andy Shevchenko wrote:
> On Wed, Mar 05, 2025 at 10:10:25AM +0000, George Cherian wrote:
>> Watchdog is not turned off in kernel panic situation.
>> In certain systems this might prevent the successful loading
>> of kdump kernel. The kdump kernel might hit a watchdog reset
>> while it is booting.
>>
>> To avoid such scenarios add a panic notifier call back function
>> which can stop the watchdog. This provision can be enabled by
>> passing watchdog.stop_on_panic=1 via kernel command-line parameter.
> 
> ...
> 
> First of all, do we really need a new module parameter for that? Why can't it
> be done automatically if kdump is expected?
> 

Sounds like a good idea to me.

Guenter
diff mbox series

Patch

diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c
index d46d8c8c01f2..f0006d90da92 100644
--- a/drivers/watchdog/watchdog_core.c
+++ b/drivers/watchdog/watchdog_core.c
@@ -34,6 +34,7 @@ 
 #include <linux/idr.h>		/* For ida_* macros */
 #include <linux/err.h>		/* For IS_ERR macros */
 #include <linux/of.h>		/* For of_get_timeout_sec */
+#include <linux/panic_notifier.h> /* For panic handler */
 #include <linux/suspend.h>
 
 #include "watchdog_core.h"	/* For watchdog_dev_register/... */
@@ -47,6 +48,9 @@  static int stop_on_reboot = -1;
 module_param(stop_on_reboot, int, 0444);
 MODULE_PARM_DESC(stop_on_reboot, "Stop watchdogs on reboot (0=keep watching, 1=stop)");
 
+static bool stop_on_panic;
+module_param(stop_on_panic, bool, 0444);
+MODULE_PARM_DESC(stop_on_panic, "Stop watchdogs on panic (0=keep watching, 1=stop)");
 /*
  * Deferred Registration infrastructure.
  *
@@ -155,6 +159,23 @@  int watchdog_init_timeout(struct watchdog_device *wdd,
 }
 EXPORT_SYMBOL_GPL(watchdog_init_timeout);
 
+static int watchdog_panic_notify(struct notifier_block *nb,
+				 unsigned long action, void *data)
+{
+	struct watchdog_device *wdd;
+
+	wdd = container_of(nb, struct watchdog_device, panic_nb);
+	if (watchdog_active(wdd)) {
+		int ret;
+
+		ret = wdd->ops->stop(wdd);
+		if (ret)
+			return NOTIFY_BAD;
+	}
+
+	return NOTIFY_DONE;
+}
+
 static int watchdog_reboot_notifier(struct notifier_block *nb,
 				    unsigned long code, void *data)
 {
@@ -334,6 +355,17 @@  static int ___watchdog_register_device(struct watchdog_device *wdd)
 				wdd->id, ret);
 	}
 
+	if (stop_on_panic) {
+		if (wdd->ops->stop && !(wdd->info->options & WDIOF_STOP_MAYSLEEP)) {
+			wdd->panic_nb.notifier_call = watchdog_panic_notify;
+			atomic_notifier_chain_register(&panic_notifier_list,
+						       &wdd->panic_nb);
+			set_bit(WDOG_STOP_ON_PANIC, &wdd->status);
+		} else {
+			pr_warn("watchdog%d: stop_on_panic not supported\n", wdd->id);
+		}
+	}
+
 	return 0;
 }
 
@@ -390,6 +422,9 @@  static void __watchdog_unregister_device(struct watchdog_device *wdd)
 	if (test_bit(WDOG_STOP_ON_REBOOT, &wdd->status))
 		unregister_reboot_notifier(&wdd->reboot_nb);
 
+	if (test_bit(WDOG_STOP_ON_PANIC, &wdd->status))
+		atomic_notifier_chain_unregister(&panic_notifier_list,
+						 &wdd->panic_nb);
 	watchdog_dev_unregister(wdd);
 	ida_free(&watchdog_ida, wdd->id);
 }
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 99660197a36c..ef6f1136a4c5 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -108,6 +108,7 @@  struct watchdog_device {
 	struct notifier_block reboot_nb;
 	struct notifier_block restart_nb;
 	struct notifier_block pm_nb;
+	struct notifier_block panic_nb;
 	void *driver_data;
 	struct watchdog_core_data *wd_data;
 	unsigned long status;
@@ -118,6 +119,7 @@  struct watchdog_device {
 #define WDOG_HW_RUNNING		3	/* True if HW watchdog running */
 #define WDOG_STOP_ON_UNREGISTER	4	/* Should be stopped on unregister */
 #define WDOG_NO_PING_ON_SUSPEND	5	/* Ping worker should be stopped on suspend */
+#define WDOG_STOP_ON_PANIC	6	/* Should be stopped on panic for loading kdump kernels */
 	struct list_head deferred;
 };