diff mbox series

[1/3] memory-failure: Introduce memory failure notifier

Message ID 20220520070648.1794132-2-pizhenwei@bytedance.com (mailing list archive)
State New, archived
Headers show
Series recover hardware corrupted page by virtio balloon | expand

Commit Message

zhenwei pi May 20, 2022, 7:06 a.m. UTC
Introduce memory failure notifier, once hardware memory failure
occurs, after the kernel handles the corrupted page successfully,
someone who registered this chain gets noticed of the corrupted PFN.

Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
---
 include/linux/mm.h  |  2 ++
 mm/memory-failure.c | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

Comments

HORIGUCHI NAOYA(堀口 直也) May 30, 2022, 5:09 a.m. UTC | #1
On Fri, May 20, 2022 at 03:06:46PM +0800, zhenwei pi wrote:
> Introduce memory failure notifier, once hardware memory failure
> occurs, after the kernel handles the corrupted page successfully,
> someone who registered this chain gets noticed of the corrupted PFN.
> 
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>

...
> @@ -1136,6 +1165,10 @@ static void action_result(unsigned long pfn, enum mf_action_page_type type,
>  	num_poisoned_pages_inc();
>  	pr_err("Memory failure: %#lx: recovery action for %s: %s\n",
>  		pfn, action_page_types[type], action_name[result]);
> +
> +	/* notify the chain if we handle successfully only */

This comment looks somewhat obvious from the code, maybe it's greater
to comment about "why" or the intention.

Thanks,
Naoya Horiguchi

> +	if (result == MF_RECOVERED)
> +		blocking_notifier_call_chain(&mf_notifier_list, pfn, NULL);
>  }
>  
>  static int page_action(struct page_state *ps, struct page *p,
> -- 
> 2.20.1
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9f44254af8ce..665873c2788c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3197,6 +3197,8 @@  extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p);
 extern atomic_long_t num_poisoned_pages __read_mostly;
 extern int soft_offline_page(unsigned long pfn, int flags);
+extern int register_memory_failure_notifier(struct notifier_block *nb);
+extern int unregister_memory_failure_notifier(struct notifier_block *nb);
 #ifdef CONFIG_MEMORY_FAILURE
 extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
 #else
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2d590cba412c..95c218bb0a37 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -68,6 +68,35 @@  int sysctl_memory_failure_recovery __read_mostly = 1;
 
 atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
 
+static BLOCKING_NOTIFIER_HEAD(mf_notifier_list);
+
+/**
+ * register_memory_failure_notifier - Register function to be called if a
+ *                                    corrupted page gets handled successfully
+ * @nb: Info about notifier function to be called
+ *
+ * Currently always returns zero, as blocking_notifier_chain_register()
+ * always returns zero.
+ */
+int register_memory_failure_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&mf_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(register_memory_failure_notifier);
+
+/**
+ * unregister_memory_failure_notifier - Unregister previously registered
+ *                                      memory failure notifier
+ * @nb: Hook to be unregistered
+ *
+ * Returns zero on success, or %-ENOENT on failure.
+ */
+int unregister_memory_failure_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&mf_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_memory_failure_notifier);
+
 static bool __page_handle_poison(struct page *page)
 {
 	int ret;
@@ -1136,6 +1165,10 @@  static void action_result(unsigned long pfn, enum mf_action_page_type type,
 	num_poisoned_pages_inc();
 	pr_err("Memory failure: %#lx: recovery action for %s: %s\n",
 		pfn, action_page_types[type], action_name[result]);
+
+	/* notify the chain if we handle successfully only */
+	if (result == MF_RECOVERED)
+		blocking_notifier_call_chain(&mf_notifier_list, pfn, NULL);
 }
 
 static int page_action(struct page_state *ps, struct page *p,