@@ -42,6 +42,9 @@ enum {
SUBPAGE_INDEX_CGROUP, /* reuse page->private */
SUBPAGE_INDEX_CGROUP_RSVD, /* reuse page->private */
__MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD,
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+ SUBPAGE_INDEX_HWPOISON,
#endif
__NR_USED_SUBPAGE,
};
@@ -550,7 +553,7 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
* Synchronization: Initially set after new page allocation with no
* locking. When examined and modified during migration processing
* (isolate, migrate, putback) the hugetlb_lock is held.
- * HPG_temporary - - Set on a page that is temporarily allocated from the buddy
+ * HPG_temporary -- Set on a page that is temporarily allocated from the buddy
* allocator. Typically used for migration target pages when no pages
* are available in the pool. The hugetlb free page path will
* immediately free pages with this flag set to the buddy allocator.
@@ -560,6 +563,8 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
* HPG_freed - Set when page is on the free lists.
* Synchronization: hugetlb_lock held for examination and modification.
* HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed.
+ * HPG_raw_hwp_unreliable - Set when the hugetlb page has a hwpoison sub-page
+ * that is not tracked by raw_hwp_page list.
*/
enum hugetlb_page_flags {
HPG_restore_reserve = 0,
@@ -567,6 +572,7 @@ enum hugetlb_page_flags {
HPG_temporary,
HPG_freed,
HPG_vmemmap_optimized,
+ HPG_raw_hwp_unreliable,
__NR_HPAGEFLAGS,
};
@@ -613,6 +619,7 @@ HPAGEFLAG(Migratable, migratable)
HPAGEFLAG(Temporary, temporary)
HPAGEFLAG(Freed, freed)
HPAGEFLAG(VmemmapOptimized, vmemmap_optimized)
+HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable)
#ifdef CONFIG_HUGETLB_PAGE
@@ -798,6 +805,15 @@ extern int dissolve_free_huge_page(struct page *page);
extern int dissolve_free_huge_pages(unsigned long start_pfn,
unsigned long end_pfn);
+#ifdef CONFIG_MEMORY_FAILURE
+extern int hugetlb_clear_page_hwpoison(struct page *hpage);
+#else
+static inline int hugetlb_clear_page_hwpoison(struct page *hpage)
+{
+ return 0;
+}
+#endif
+
#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
#ifndef arch_hugetlb_migration_supported
static inline bool arch_hugetlb_migration_supported(struct hstate *h)
@@ -1541,17 +1541,15 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return;
- if (hugetlb_vmemmap_alloc(h, page)) {
- spin_lock_irq(&hugetlb_lock);
- /*
- * If we cannot allocate vmemmap pages, just refuse to free the
- * page and put the page back on the hugetlb free list and treat
- * as a surplus page.
- */
- add_hugetlb_page(h, page, true);
- spin_unlock_irq(&hugetlb_lock);
- return;
- }
+ if (hugetlb_vmemmap_alloc(h, page))
+ goto fail;
+
+ /*
+ * Move PageHWPoison flag from head page to the raw error pages,
+ * which makes any healthy subpages reusable.
+ */
+ if (unlikely(PageHWPoison(page) && hugetlb_clear_page_hwpoison(page)))
+ goto fail;
for (i = 0; i < pages_per_huge_page(h);
i++, subpage = mem_map_next(subpage, page, i)) {
@@ -1572,6 +1570,16 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
} else {
__free_pages(page, huge_page_order(h));
}
+ return;
+fail:
+ spin_lock_irq(&hugetlb_lock);
+ /*
+ * If we cannot allocate vmemmap pages or cannot identify raw hwpoison
+ * subpages reliably, just refuse to free the page and put the page
+ * back on the hugetlb free list and treat as a surplus page.
+ */
+ add_hugetlb_page(h, page, true);
+ spin_unlock_irq(&hugetlb_lock);
}
/*
@@ -2115,15 +2123,6 @@ int dissolve_free_huge_page(struct page *page)
*/
rc = hugetlb_vmemmap_alloc(h, head);
if (!rc) {
- /*
- * Move PageHWPoison flag from head page to the raw
- * error page, which makes any subpages rather than
- * the error page reusable.
- */
- if (PageHWPoison(head) && page != head) {
- SetPageHWPoison(page);
- ClearPageHWPoison(head);
- }
update_and_free_page(h, head, false);
} else {
spin_lock_irq(&hugetlb_lock);
@@ -1499,6 +1499,82 @@ static int try_to_split_thp_page(struct page *page, const char *msg)
}
#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * Struct raw_hwp_page represents information about "raw error page",
+ * constructing singly linked list originated from ->private field of
+ * SUBPAGE_INDEX_HWPOISON-th tail page.
+ */
+struct raw_hwp_page {
+ struct llist_node node;
+ struct page *page;
+};
+
+static inline struct llist_head *raw_hwp_list_head(struct page *hpage)
+{
+ return (struct llist_head *)&page_private(hpage + SUBPAGE_INDEX_HWPOISON);
+}
+
+static inline int hugetlb_set_page_hwpoison(struct page *hpage,
+ struct page *page)
+{
+ struct llist_head *head;
+ struct raw_hwp_page *raw_hwp;
+ struct llist_node *t, *tnode;
+ int ret;
+
+ /*
+ * Once the hwpoison hugepage has lost reliable raw error info,
+ * there is little meaning to keep additional error info precisely,
+ * so skip to add additional raw error info.
+ */
+ if (HPageRawHwpUnreliable(hpage))
+ return -EHWPOISON;
+ head = raw_hwp_list_head(hpage);
+ llist_for_each_safe(tnode, t, head->first) {
+ struct raw_hwp_page *p = container_of(tnode, struct raw_hwp_page, node);
+
+ if (p->page == page)
+ return -EHWPOISON;
+ }
+
+ ret = TestSetPageHWPoison(hpage) ? -EHWPOISON : 0;
+ /* the first error event will be counted in action_result(). */
+ if (ret)
+ num_poisoned_pages_inc();
+
+ raw_hwp = kmalloc(sizeof(struct raw_hwp_page), GFP_ATOMIC);
+ if (raw_hwp) {
+ raw_hwp->page = page;
+ llist_add(&raw_hwp->node, head);
+ } else {
+ /*
+ * Failed to save raw error info. We no longer trace all
+ * hwpoisoned subpages, and we need refuse to free/dissolve
+ * this hwpoisoned hugepage.
+ */
+ SetHPageRawHwpUnreliable(hpage);
+ }
+ return ret;
+}
+
+inline int hugetlb_clear_page_hwpoison(struct page *hpage)
+{
+ struct llist_head *head;
+ struct llist_node *t, *tnode;
+
+ if (!HPageRawHwpUnreliable(hpage))
+ ClearPageHWPoison(hpage);
+ head = raw_hwp_list_head(hpage);
+ llist_for_each_safe(tnode, t, head->first) {
+ struct raw_hwp_page *p = container_of(tnode, struct raw_hwp_page, node);
+
+ SetPageHWPoison(p->page);
+ kfree(p);
+ }
+ llist_del_all(head);
+ return 0;
+}
+
/*
* Called from hugetlb code with hugetlb_lock held.
*
@@ -1533,7 +1609,7 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
goto out;
}
- if (TestSetPageHWPoison(head)) {
+ if (hugetlb_set_page_hwpoison(head, page)) {
ret = -EHWPOISON;
goto out;
}
@@ -1585,7 +1661,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
lock_page(head);
if (hwpoison_filter(p)) {
- ClearPageHWPoison(head);
+ hugetlb_clear_page_hwpoison(head);
res = -EOPNOTSUPP;
goto out;
}