Message ID | 20210521030156.2612074-2-nao.horiguchi@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm,hwpoison: fix sending SIGBUS for Action Required MCE | expand |
On Fri, 21 May 2021 12:01:54 +0900 Naoya Horiguchi <nao.horiguchi@gmail.com> wrote: > There can be races when multiple CPUs consume poison from the same > page. The first into memory_failure() atomically sets the HWPoison > page flag and begins hunting for tasks that map this page. Eventually > it invalidates those mappings and may send a SIGBUS to the affected > tasks. > > But while all that work is going on, other CPUs see a "success" > return code from memory_failure() and so they believe the error > has been handled and continue executing. > > Fix by wrapping most of the internal parts of memory_failure() in > a mutex. We can reduce the scope of that mutex, which helps readability at least. --- a/mm/memory-failure.c~mm-memory-failure-use-a-mutex-to-avoid-memory_failure-races-fix +++ a/mm/memory-failure.c @@ -1397,8 +1397,6 @@ out: return rc; } -static DEFINE_MUTEX(mf_mutex); - /** * memory_failure - Handle memory failure of a page. * @pfn: Page Number of the corrupted page @@ -1425,6 +1423,7 @@ int memory_failure(unsigned long pfn, in int res = 0; unsigned long page_flags; bool retry = true; + static DEFINE_MUTEX(mf_mutex); if (!sysctl_memory_failure_recovery) panic("Memory failure on page %lx", pfn);
On Sat, May 22, 2021 at 03:09:00PM -0700, Andrew Morton wrote: > On Fri, 21 May 2021 12:01:54 +0900 Naoya Horiguchi <nao.horiguchi@gmail.com> wrote: > > > There can be races when multiple CPUs consume poison from the same > > page. The first into memory_failure() atomically sets the HWPoison > > page flag and begins hunting for tasks that map this page. Eventually > > it invalidates those mappings and may send a SIGBUS to the affected > > tasks. > > > > But while all that work is going on, other CPUs see a "success" > > return code from memory_failure() and so they believe the error > > has been handled and continue executing. > > > > Fix by wrapping most of the internal parts of memory_failure() in > > a mutex. > > We can reduce the scope of that mutex, which helps readability at least. Thanks, this change is totally fine to me. > > --- a/mm/memory-failure.c~mm-memory-failure-use-a-mutex-to-avoid-memory_failure-races-fix > +++ a/mm/memory-failure.c > @@ -1397,8 +1397,6 @@ out: > return rc; > } > > -static DEFINE_MUTEX(mf_mutex); > - > /** > * memory_failure - Handle memory failure of a page. > * @pfn: Page Number of the corrupted page > @@ -1425,6 +1423,7 @@ int memory_failure(unsigned long pfn, in > int res = 0; > unsigned long page_flags; > bool retry = true; > + static DEFINE_MUTEX(mf_mutex); > > if (!sysctl_memory_failure_recovery) > panic("Memory failure on page %lx", pfn); > _ >
On Fri, May 21, 2021 at 12:01:54PM +0900, Naoya Horiguchi wrote: > From: Tony Luck <tony.luck@intel.com> > > There can be races when multiple CPUs consume poison from the same > page. The first into memory_failure() atomically sets the HWPoison > page flag and begins hunting for tasks that map this page. Eventually > it invalidates those mappings and may send a SIGBUS to the affected > tasks. > > But while all that work is going on, other CPUs see a "success" > return code from memory_failure() and so they believe the error > has been handled and continue executing. > > Fix by wrapping most of the internal parts of memory_failure() in > a mutex. > > Signed-off-by: Tony Luck <tony.luck@intel.com> > Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com> > Reviewed-by: Borislav Petkov <bp@suse.de> Reviewed-by: Oscar Salvador <osalvador@suse.de>
diff --git v5.13-rc2/mm/memory-failure.c v5.13-rc2_patched/mm/memory-failure.c index 9a7c12ace9e2..0f0b932ccbca 100644 --- v5.13-rc2/mm/memory-failure.c +++ v5.13-rc2_patched/mm/memory-failure.c @@ -1400,6 +1400,8 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, return rc; } +static DEFINE_MUTEX(mf_mutex); + /** * memory_failure - Handle memory failure of a page. * @pfn: Page Number of the corrupted page @@ -1423,7 +1425,7 @@ int memory_failure(unsigned long pfn, int flags) struct page *hpage; struct page *orig_head; struct dev_pagemap *pgmap; - int res; + int res = 0; unsigned long page_flags; bool retry = true; @@ -1443,13 +1445,18 @@ int memory_failure(unsigned long pfn, int flags) return -ENXIO; } + mutex_lock(&mf_mutex); + try_again: - if (PageHuge(p)) - return memory_failure_hugetlb(pfn, flags); + if (PageHuge(p)) { + res = memory_failure_hugetlb(pfn, flags); + goto unlock_mutex; + } + if (TestSetPageHWPoison(p)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); - return 0; + goto unlock_mutex; } orig_head = hpage = compound_head(p); @@ -1482,17 +1489,19 @@ int memory_failure(unsigned long pfn, int flags) res = MF_FAILED; } action_result(pfn, MF_MSG_BUDDY, res); - return res == MF_RECOVERED ? 0 : -EBUSY; + res = res == MF_RECOVERED ? 0 : -EBUSY; } else { action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED); - return -EBUSY; + res = -EBUSY; } + goto unlock_mutex; } if (PageTransHuge(hpage)) { if (try_to_split_thp_page(p, "Memory Failure") < 0) { action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); - return -EBUSY; + res = -EBUSY; + goto unlock_mutex; } VM_BUG_ON_PAGE(!page_count(p), p); } @@ -1516,7 +1525,7 @@ int memory_failure(unsigned long pfn, int flags) if (PageCompound(p) && compound_head(p) != orig_head) { action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED); res = -EBUSY; - goto out; + goto unlock_page; } /* @@ -1536,14 +1545,14 @@ int memory_failure(unsigned long pfn, int flags) num_poisoned_pages_dec(); unlock_page(p); put_page(p); - return 0; + goto unlock_mutex; } if (hwpoison_filter(p)) { if (TestClearPageHWPoison(p)) num_poisoned_pages_dec(); unlock_page(p); put_page(p); - return 0; + goto unlock_mutex; } if (!PageTransTail(p) && !PageLRU(p)) @@ -1562,7 +1571,7 @@ int memory_failure(unsigned long pfn, int flags) if (!hwpoison_user_mappings(p, pfn, flags, &p)) { action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); res = -EBUSY; - goto out; + goto unlock_page; } /* @@ -1571,13 +1580,15 @@ int memory_failure(unsigned long pfn, int flags) if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED); res = -EBUSY; - goto out; + goto unlock_page; } identify_page_state: res = identify_page_state(pfn, p, page_flags); -out: +unlock_page: unlock_page(p); +unlock_mutex: + mutex_unlock(&mf_mutex); return res; } EXPORT_SYMBOL_GPL(memory_failure);