@@ -612,7 +612,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
return NOTIFY_DONE;
pfn = mce->addr >> PAGE_SHIFT;
- if (!memory_failure(pfn, 0)) {
+ if (!memory_failure(pfn, MF_MCE_HANDLE)) {
set_mce_nospec(pfn, whole_page(mce));
mce->kflags |= MCE_HANDLED_UC;
}
@@ -1286,7 +1286,7 @@ static void kill_me_now(struct callback_head *ch)
static void kill_me_maybe(struct callback_head *cb)
{
struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
- int flags = MF_ACTION_REQUIRED;
+ int flags = MF_ACTION_REQUIRED | MF_MCE_HANDLE;
int ret;
p->mce_count = 0;
@@ -1303,9 +1303,12 @@ static void kill_me_maybe(struct callback_head *cb)
}
/*
- * -EHWPOISON from memory_failure() means that it already sent SIGBUS
- * to the current process with the proper error info, so no need to
- * send SIGBUS here again.
+ * -EHWPOISON from memory_failure() means that memory_failure() did
+ * not handle the error event for the following reason:
+ * - SIGBUS has already been sent to the current process with the
+ * proper error info, or
+ * - hwpoison_filter() filtered the event,
+ * so no need to deal with it more.
*/
if (ret == -EHWPOISON)
return;
@@ -1320,7 +1323,7 @@ static void kill_me_never(struct callback_head *cb)
p->mce_count = 0;
pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
- if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
+ if (!memory_failure(p->mce_addr >> PAGE_SHIFT, MF_MCE_HANDLE))
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
}
@@ -3188,6 +3188,7 @@ enum mf_flags {
MF_MUST_KILL = 1 << 2,
MF_SOFT_OFFLINE = 1 << 3,
MF_UNPOISON = 1 << 4,
+ MF_MCE_HANDLE = 1 << 5,
};
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
@@ -1526,7 +1526,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
if (TestClearPageHWPoison(head))
num_poisoned_pages_dec();
unlock_page(head);
- return 0;
+ if (flags & MF_MCE_HANDLE)
+ return -EHWPOISON;
+ else
+ return 0;
}
unlock_page(head);
res = MF_FAILED;
@@ -1613,7 +1616,10 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
goto out;
if (hwpoison_filter(page)) {
- rc = 0;
+ if (flags & MF_MCE_HANDLE)
+ rc = -EHWPOISON;
+ else
+ rc = 0;
goto unlock;
}
@@ -1837,6 +1843,10 @@ int memory_failure(unsigned long pfn, int flags)
num_poisoned_pages_dec();
unlock_page(p);
put_page(p);
+ if (flags & MF_MCE_HANDLE)
+ res = -EHWPOISON;
+ else
+ res = 0;
goto unlock_mutex;
}
When the hwpoison page meets the filter conditions, it should not be regarded as successful memory_failure() processing for mce handler, but should return a value(-EHWPOISON), otherwise mce handler regards the error page has been identified and isolated, which may lead to calling set_mce_nospec() to change page attribute, etc. Here a new MF_MCE_HANDLE flag is introduced to identify the call from the mce handler and instruct hwpoison_filter() to return -EHWPOISON, otherwise return 0 for compatibility with the hwpoison injector. Signed-off-by: luofei <luofei@unicloud.com> --- arch/x86/kernel/cpu/mce/core.c | 15 +++++++++------ include/linux/mm.h | 1 + mm/memory-failure.c | 14 ++++++++++++-- 3 files changed, 22 insertions(+), 8 deletions(-)