diff mbox series

[v3,2/2] mm,hwpoison: make get_hwpoison_page call get_any_page()

Message ID 20210511151016.2310627-3-nao.horiguchi@gmail.com (mailing list archive)
State New, archived
Headers show
Series hwpoison: fix race with compound page allocation | expand

Commit Message

Naoya Horiguchi May 11, 2021, 3:10 p.m. UTC
From: Naoya Horiguchi <naoya.horiguchi@nec.com>

Now __get_hwpoison_page() could return -EBUSY in a race condition,
so it's helpful to handle it by retrying.  We already have retry
logic, so make get_hwpoison_page() call get_any_page() when called
from memory_failure().

Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
---
 mm/memory-failure.c | 114 ++++++++++++++++++++++++--------------------
 1 file changed, 62 insertions(+), 52 deletions(-)

Comments

Oscar Salvador May 12, 2021, 8:55 a.m. UTC | #1
On Wed, May 12, 2021 at 12:10:16AM +0900, Naoya Horiguchi wrote:
> From: Naoya Horiguchi <naoya.horiguchi@nec.com>
> 
> Now __get_hwpoison_page() could return -EBUSY in a race condition,
> so it's helpful to handle it by retrying.  We already have retry
> logic, so make get_hwpoison_page() call get_any_page() when called
> from memory_failure().

As I stated in your previous patch, I think you should start returning -EBUSY
from this patch onwards.

>  static int get_any_page(struct page *p, unsigned long flags)
>  {
>  	int ret = 0, pass = 0;
> @@ -1152,50 +1136,76 @@ static int get_any_page(struct page *p, unsigned long flags)
>  		count_increased = true;
>  
>  try_again:
> -	if (!count_increased && !__get_hwpoison_page(p)) {
> -		if (page_count(p)) {
> -			/* We raced with an allocation, retry. */
> -			if (pass++ < 3)
> -				goto try_again;
> -			ret = -EBUSY;
> -		} else if (!PageHuge(p) && !is_free_buddy_page(p)) {
> -			/* We raced with put_page, retry. */
> -			if (pass++ < 3)
> -				goto try_again;
> -			ret = -EIO;
> +	if (!count_increased) {
> +		ret = __get_hwpoison_page(p);
> +		if (!ret) {
> +			if (page_count(p)) {
> +				/* We raced with an allocation, retry. */
> +				if (pass++ < 3)
> +					goto try_again;
> +				ret = -EBUSY;
> +			} else if (!PageHuge(p) && !is_free_buddy_page(p)) {
> +				/* We raced with put_page, retry. */
> +				if (pass++ < 3)
> +					goto try_again;
> +				ret = -EIO;
> +			}
> +			goto out;
>  		}
> +	}

I think this can be improved.

We cannot have -EBUSY unless we come from __get_hwpoison_page() (!count_increased),
so I think a much more natural approach would be to stuff the hunk below in there,
and then place the other stuff in an else, so something like:

        if (!count_increased) {
                ret = __get_hwpoison_page(p);
                if (!ret) {
                        if (page_count(p)) {
                                /* We raced with an allocation, retry. */
                                if (pass++ < 3)
                                        goto try_again;
                                ret = -EBUSY;
                        } else if (!PageHuge(p) && !is_free_buddy_page(p)) {
                                /* We raced with put_page, retry. */
                                if (pass++ < 3)
                                        goto try_again;
                                ret = -EIO;
                        }
                        goto out;
                } else if (ret == -EBUSY) {
			/* We raced with freeing huge page to buddy, retry. */
			if (pass++ < 3)
				goto try_again;
		}
        } else {
		/* We do already have a refcount for this page, see if we can
		 * handle it.
		 */
		if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
			ret = 1;
		} else {
			/* A page we cannot handle. Check...
		}
	}

Other than that, looks good to me.

> +
> +	if (ret == -EBUSY) {
> +		/* We raced with freeing huge page to buddy, retry. */
> +		if (pass++ < 3)
> +			goto try_again;
HORIGUCHI NAOYA(堀口 直也) May 13, 2021, 12:03 a.m. UTC | #2
On Wed, May 12, 2021 at 10:55:22AM +0200, Oscar Salvador wrote:
> On Wed, May 12, 2021 at 12:10:16AM +0900, Naoya Horiguchi wrote:
> > From: Naoya Horiguchi <naoya.horiguchi@nec.com>
> > 
> > Now __get_hwpoison_page() could return -EBUSY in a race condition,
> > so it's helpful to handle it by retrying.  We already have retry
> > logic, so make get_hwpoison_page() call get_any_page() when called
> > from memory_failure().
> 
> As I stated in your previous patch, I think you should start returning -EBUSY
> from this patch onwards.
> 
> >  static int get_any_page(struct page *p, unsigned long flags)
> >  {
> >  	int ret = 0, pass = 0;
> > @@ -1152,50 +1136,76 @@ static int get_any_page(struct page *p, unsigned long flags)
> >  		count_increased = true;
> >  
> >  try_again:
> > -	if (!count_increased && !__get_hwpoison_page(p)) {
> > -		if (page_count(p)) {
> > -			/* We raced with an allocation, retry. */
> > -			if (pass++ < 3)
> > -				goto try_again;
> > -			ret = -EBUSY;
> > -		} else if (!PageHuge(p) && !is_free_buddy_page(p)) {
> > -			/* We raced with put_page, retry. */
> > -			if (pass++ < 3)
> > -				goto try_again;
> > -			ret = -EIO;
> > +	if (!count_increased) {
> > +		ret = __get_hwpoison_page(p);
> > +		if (!ret) {
> > +			if (page_count(p)) {
> > +				/* We raced with an allocation, retry. */
> > +				if (pass++ < 3)
> > +					goto try_again;
> > +				ret = -EBUSY;
> > +			} else if (!PageHuge(p) && !is_free_buddy_page(p)) {
> > +				/* We raced with put_page, retry. */
> > +				if (pass++ < 3)
> > +					goto try_again;
> > +				ret = -EIO;
> > +			}
> > +			goto out;
> >  		}
> > +	}
> 
> I think this can be improved.
> 
> We cannot have -EBUSY unless we come from __get_hwpoison_page() (!count_increased),
> so I think a much more natural approach would be to stuff the hunk below in there,
> and then place the other stuff in an else, so something like:
> 
>         if (!count_increased) {
>                 ret = __get_hwpoison_page(p);
>                 if (!ret) {
>                         if (page_count(p)) {
>                                 /* We raced with an allocation, retry. */
>                                 if (pass++ < 3)
>                                         goto try_again;
>                                 ret = -EBUSY;
>                         } else if (!PageHuge(p) && !is_free_buddy_page(p)) {
>                                 /* We raced with put_page, retry. */
>                                 if (pass++ < 3)
>                                         goto try_again;
>                                 ret = -EIO;
>                         }
>                         goto out;
>                 } else if (ret == -EBUSY) {
> 			/* We raced with freeing huge page to buddy, retry. */
> 			if (pass++ < 3)
> 				goto try_again;
> 		}

Moving "if (ret == -EBUSY)" block to here makes sense to me. Thank you.

>         } else {

In the original logic, if __get_hwpoison_page() returns 1, we fall into the
"if (PageHuge(p) || PageLRU(p) || __PageMovable(p)" check.  I guess that this
"else" seems not necessary?

> 		/* We do already have a refcount for this page, see if we can
> 		 * handle it.
> 		 */
> 		if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
> 			ret = 1;
> 		} else {
> 			/* A page we cannot handle. Check...
> 		}
> 	}

Thanks,
Naoya Horiguchi
diff mbox series

Patch

diff --git v5.12/mm/memory-failure.c v5.12_patched/mm/memory-failure.c
index 02668b24e512..d6f470e9ee05 100644
--- v5.12/mm/memory-failure.c
+++ v5.12_patched/mm/memory-failure.c
@@ -1084,13 +1084,6 @@  static int page_action(struct page_state *ps, struct page *p,
 	return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
 }
 
-/**
- * __get_hwpoison_page() - Get refcount for memory error handling:
- * @page:	raw error page (hit by memory error)
- *
- * Return: return 0 if failed to grab the refcount, otherwise true (some
- * non-zero value.)
- */
 static int __get_hwpoison_page(struct page *page)
 {
 	struct page *head = compound_head(page);
@@ -1134,15 +1127,6 @@  static int __get_hwpoison_page(struct page *page)
 	return get_page_unless_zero(page);
 }
 
-/*
- * Safely get reference count of an arbitrary page.
- *
- * Returns 0 for a free page, 1 for an in-use page,
- * -EIO for a page-type we cannot handle and -EBUSY if we raced with an
- * allocation.
- * We only incremented refcount in case the page was already in-use and it
- * is a known type we can handle.
- */
 static int get_any_page(struct page *p, unsigned long flags)
 {
 	int ret = 0, pass = 0;
@@ -1152,50 +1136,76 @@  static int get_any_page(struct page *p, unsigned long flags)
 		count_increased = true;
 
 try_again:
-	if (!count_increased && !__get_hwpoison_page(p)) {
-		if (page_count(p)) {
-			/* We raced with an allocation, retry. */
-			if (pass++ < 3)
-				goto try_again;
-			ret = -EBUSY;
-		} else if (!PageHuge(p) && !is_free_buddy_page(p)) {
-			/* We raced with put_page, retry. */
-			if (pass++ < 3)
-				goto try_again;
-			ret = -EIO;
+	if (!count_increased) {
+		ret = __get_hwpoison_page(p);
+		if (!ret) {
+			if (page_count(p)) {
+				/* We raced with an allocation, retry. */
+				if (pass++ < 3)
+					goto try_again;
+				ret = -EBUSY;
+			} else if (!PageHuge(p) && !is_free_buddy_page(p)) {
+				/* We raced with put_page, retry. */
+				if (pass++ < 3)
+					goto try_again;
+				ret = -EIO;
+			}
+			goto out;
 		}
+	}
+
+	if (ret == -EBUSY) {
+		/* We raced with freeing huge page to buddy, retry. */
+		if (pass++ < 3)
+			goto try_again;
+	} else if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
+		ret = 1;
 	} else {
-		if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) {
-			ret = 1;
-		} else {
-			/*
-			 * A page we cannot handle. Check whether we can turn
-			 * it into something we can handle.
-			 */
-			if (pass++ < 3) {
-				put_page(p);
-				shake_page(p, 1);
-				count_increased = false;
-				goto try_again;
-			}
+		/*
+		 * A page we cannot handle. Check whether we can turn
+		 * it into something we can handle.
+		 */
+		if (pass++ < 3) {
 			put_page(p);
-			ret = -EIO;
+			shake_page(p, 1);
+			count_increased = false;
+			goto try_again;
 		}
+		put_page(p);
+		ret = -EIO;
 	}
-
+out:
 	return ret;
 }
 
-static int get_hwpoison_page(struct page *p, unsigned long flags,
-			     enum mf_flags ctxt)
+/**
+ * get_hwpoison_page() - Get refcount for memory error handling
+ * @p:		Raw error page (hit by memory error)
+ * @flags:	Flags controlling behavior of error handling
+ *
+ * get_hwpoison_page() takes a page refcount of an error page to handle memory
+ * error on it, after checking that the error page is in a well-defined state
+ * (defined as a page-type we can successfully handle the memor error on it,
+ * such as LRU page and hugetlb page).
+ *
+ * Memory error handling could be triggered at any time on any type of page,
+ * so it's prone to race with typical memory management lifecycle (like
+ * allocation and free).  So to avoid such races, get_hwpoison_page() takes
+ * extra care for the error page's state (as done in __get_hwpoison_page()),
+ * and has some retry logic in get_any_page().
+ *
+ * Return: 0 for buddy free pages,
+ *         1 on success for in-use pages in a well-defined state,
+ *         -EIO for pages on which we can not handle memory errors,
+ *         -EBUSY when get_hwpoison_page() has raced with page lifecycle
+ *         operations like allocation and free.
+ */
+static int get_hwpoison_page(struct page *p, unsigned long flags)
 {
 	int ret;
 
 	zone_pcp_disable(page_zone(p));
-	if (ctxt == MF_SOFT_OFFLINE)
-		ret = get_any_page(p, flags);
-	else
-		ret = __get_hwpoison_page(p);
+	ret = get_any_page(p, flags);
 	zone_pcp_enable(page_zone(p));
 
 	return ret;
@@ -1384,7 +1394,7 @@  static int memory_failure_hugetlb(unsigned long pfn, int flags)
 
 	num_poisoned_pages_inc();
 
-	if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
+	if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags)) {
 		/*
 		 * Check "filter hit" and "race with other subpage."
 		 */
@@ -1608,7 +1618,7 @@  int memory_failure(unsigned long pfn, int flags)
 	 * In fact it's dangerous to directly bump up page count from 0,
 	 * that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
 	 */
-	if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) {
+	if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags)) {
 		if (is_free_buddy_page(p)) {
 			if (take_page_off_buddy(p)) {
 				page_ref_inc(p);
@@ -1900,7 +1910,7 @@  int unpoison_memory(unsigned long pfn)
 		return 0;
 	}
 
-	if (!get_hwpoison_page(p, flags, 0)) {
+	if (!get_hwpoison_page(p, flags)) {
 		if (TestClearPageHWPoison(p))
 			num_poisoned_pages_dec();
 		unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
@@ -2116,7 +2126,7 @@  int soft_offline_page(unsigned long pfn, int flags)
 
 retry:
 	get_online_mems();
-	ret = get_hwpoison_page(page, flags, MF_SOFT_OFFLINE);
+	ret = get_hwpoison_page(page, flags);
 	put_online_mems();
 
 	if (ret > 0) {