diff mbox series

[v1,1/6] mm/hwpoison: mf_mutex for soft offline and unpoison

Message ID 20210614021212.223326-2-nao.horiguchi@gmail.com (mailing list archive)
State New, archived
Headers show
Series mm/hwpoison: fix unpoison_memory() | expand

Commit Message

Naoya Horiguchi June 14, 2021, 2:12 a.m. UTC
From: Naoya Horiguchi <naoya.horiguchi@nec.com>

Originally mf_mutex is introduced to serialize multiple MCE events, but
it's also helpful to exclude races among  soft_offline_page() and
unpoison_memory().  So apply mf_mutex to them.

Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
---
 mm/memory-failure.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

Comments

Ding Hui June 15, 2021, 11:41 a.m. UTC | #1
On 2021/6/14 10:12, Naoya Horiguchi wrote:

>   
> @@ -2171,6 +2177,8 @@ int soft_offline_page(unsigned long pfn, int flags)
>   		return -EIO;
>   	}
>   
> +	mutex_lock(&mf_mutex);
> +
>   	if (PageHWPoison(page)) {
>   		pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
>   		put_ref_page(ref_page);

Did you miss mutex_unlock() here when page already poisoned ?

> @@ -2194,5 +2202,7 @@ int soft_offline_page(unsigned long pfn, int flags)
>   			 __func__, pfn, page->flags, &page->flags);
>   	}
>   
> +	mutex_unlock(&mf_mutex);
> +
>   	return ret;
>   }
> 

--
Thanks,
- Ding Hui
HORIGUCHI NAOYA(堀口 直也) June 15, 2021, 11:55 a.m. UTC | #2
On Tue, Jun 15, 2021 at 07:41:34PM +0800, Ding Hui wrote:
> On 2021/6/14 10:12, Naoya Horiguchi wrote:
> 
> > @@ -2171,6 +2177,8 @@ int soft_offline_page(unsigned long pfn, int flags)
> >   		return -EIO;
> >   	}
> > +	mutex_lock(&mf_mutex);
> > +
> >   	if (PageHWPoison(page)) {
> >   		pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
> >   		put_ref_page(ref_page);
> 
> Did you miss mutex_unlock() here when page already poisoned ?

Thanks, you're right. I'll fix it.

- Naoya

> 
> > @@ -2194,5 +2202,7 @@ int soft_offline_page(unsigned long pfn, int flags)
> >   			 __func__, pfn, page->flags, &page->flags);
> >   	}
> > +	mutex_unlock(&mf_mutex);
> > +
> >   	return ret;
> >   }
> > 
> 
> --
> Thanks,
> - Ding Hui
>
Miaohe Lin June 15, 2021, 12:42 p.m. UTC | #3
Hi:
On 2021/6/14 10:12, Naoya Horiguchi wrote:
> From: Naoya Horiguchi <naoya.horiguchi@nec.com>
> 
> Originally mf_mutex is introduced to serialize multiple MCE events, but
> it's also helpful to exclude races among  soft_offline_page() and
> unpoison_memory().  So apply mf_mutex to them.
> 

When I was investigating the memory-failure code, I realized these possible races too.
It's very kind of you to fix these races! Many thanks!

> Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
> ---
>  mm/memory-failure.c | 26 ++++++++++++++++++--------
>  1 file changed, 18 insertions(+), 8 deletions(-)
> 
> diff --git v5.13-rc5/mm/memory-failure.c v5.13-rc5_patched/mm/memory-failure.c
> index ae30fd6d575a..280eb6d6dd15 100644
> --- v5.13-rc5/mm/memory-failure.c
> +++ v5.13-rc5_patched/mm/memory-failure.c
> @@ -1583,6 +1583,8 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
>  	return rc;
>  }
>  
> +static DEFINE_MUTEX(mf_mutex);
> +
>  /**
>   * memory_failure - Handle memory failure of a page.
>   * @pfn: Page Number of the corrupted page
> @@ -1609,7 +1611,6 @@ int memory_failure(unsigned long pfn, int flags)
>  	int res = 0;
>  	unsigned long page_flags;
>  	bool retry = true;
> -	static DEFINE_MUTEX(mf_mutex);
>  
>  	if (!sysctl_memory_failure_recovery)
>  		panic("Memory failure on page %lx", pfn);
> @@ -1918,6 +1919,7 @@ int unpoison_memory(unsigned long pfn)
>  	struct page *page;
>  	struct page *p;
>  	int freeit = 0;
> +	int ret = 0;
>  	unsigned long flags = 0;
>  	static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
>  					DEFAULT_RATELIMIT_BURST);
> @@ -1928,28 +1930,30 @@ int unpoison_memory(unsigned long pfn)
>  	p = pfn_to_page(pfn);
>  	page = compound_head(p);
>  
> +	mutex_lock(&mf_mutex);
> +
>  	if (!PageHWPoison(p)) {
>  		unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
>  				 pfn, &unpoison_rs);
> -		return 0;
> +		goto unlock_mutex;
>  	}
>  
>  	if (page_count(page) > 1) {
>  		unpoison_pr_info("Unpoison: Someone grabs the hwpoison page %#lx\n",
>  				 pfn, &unpoison_rs);
> -		return 0;
> +		goto unlock_mutex;
>  	}
>  
>  	if (page_mapped(page)) {
>  		unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
>  				 pfn, &unpoison_rs);
> -		return 0;
> +		goto unlock_mutex;
>  	}
>  
>  	if (page_mapping(page)) {
>  		unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n",
>  				 pfn, &unpoison_rs);
> -		return 0;
> +		goto unlock_mutex;
>  	}
>  
>  	/*
> @@ -1960,7 +1964,7 @@ int unpoison_memory(unsigned long pfn)
>  	if (!PageHuge(page) && PageTransHuge(page)) {
>  		unpoison_pr_info("Unpoison: Memory failure is now running on %#lx\n",
>  				 pfn, &unpoison_rs);
> -		return 0;
> +		goto unlock_mutex;
>  	}
>  

Maybe it's more appropriate to start mutex_lock(&mf_mutex) here? I think these races start here.

>  	if (!get_hwpoison_page(p, flags)) {
> @@ -1968,7 +1972,7 @@ int unpoison_memory(unsigned long pfn)
>  			num_poisoned_pages_dec();
>  		unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
>  				 pfn, &unpoison_rs);
> -		return 0;
> +		goto unlock_mutex;
>  	}
>  
>  	lock_page(page);
> @@ -1990,7 +1994,9 @@ int unpoison_memory(unsigned long pfn)
>  	if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1))
>  		put_page(page);
>  
> -	return 0;
> +unlock_mutex:
> +	mutex_unlock(&mf_mutex);
> +	return ret;
>  }
>  EXPORT_SYMBOL(unpoison_memory);
>  
> @@ -2171,6 +2177,8 @@ int soft_offline_page(unsigned long pfn, int flags)
>  		return -EIO;
>  	}
>  
> +	mutex_lock(&mf_mutex);
> +
>  	if (PageHWPoison(page)) {
>  		pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
>  		put_ref_page(ref_page);
> @@ -2194,5 +2202,7 @@ int soft_offline_page(unsigned long pfn, int flags)
>  			 __func__, pfn, page->flags, &page->flags);
>  	}
>  
> +	mutex_unlock(&mf_mutex);
> +
>  	return ret;
>  }
>
HORIGUCHI NAOYA(堀口 直也) June 16, 2021, 12:41 a.m. UTC | #4
On Tue, Jun 15, 2021 at 08:42:23PM +0800, Miaohe Lin wrote:
...
> > @@ -1960,7 +1964,7 @@ int unpoison_memory(unsigned long pfn)
> >  	if (!PageHuge(page) && PageTransHuge(page)) {
> >  		unpoison_pr_info("Unpoison: Memory failure is now running on %#lx\n",
> >  				 pfn, &unpoison_rs);
> > -		return 0;
> > +		goto unlock_mutex;
> >  	}
> >  
> 
> Maybe it's more appropriate to start mutex_lock(&mf_mutex) here? I think these races start here.

Hi Miaohe,

Thank your for the review.

Consider that we put mutex_lock() here, and let's think about two concurrent
calls of unpoison_memory(), then these events could be processed like below:

    CPU 0                             CPU 1
    unpoison_memory
    check PageHWPoison // true
                                      unpoison_memory
                                      check PageHWPoison  // true
    mutex_lock
    get_hwpoison_page
    TestClearPageHWPoison
    put_page
    put_page // freed
    mutex_unlock

    // the unpoisoned page can be used for allocation

                                      mutex_lock
                                      get_hwpoison_page // succeeds
                                      ... // unpoison the !PageHWPoison page !?


So I thought that we had better do the prechecks in mf_mutex.  Maybe the 2nd
unpoison_memory() just get and put the page refcount by 1 even in this race,
so the impact is not so big, but I feel like avoiding "unpoison the
!PageHWPoison page" situation.

Does it make sense for you?

Thanks,
Naoya Horiguchi
Miaohe Lin June 16, 2021, 3:14 a.m. UTC | #5
On 2021/6/16 8:41, HORIGUCHI NAOYA(堀口 直也) wrote:
> On Tue, Jun 15, 2021 at 08:42:23PM +0800, Miaohe Lin wrote:
> ...
>>> @@ -1960,7 +1964,7 @@ int unpoison_memory(unsigned long pfn)
>>>  	if (!PageHuge(page) && PageTransHuge(page)) {
>>>  		unpoison_pr_info("Unpoison: Memory failure is now running on %#lx\n",
>>>  				 pfn, &unpoison_rs);
>>> -		return 0;
>>> +		goto unlock_mutex;
>>>  	}
>>>  
>>
>> Maybe it's more appropriate to start mutex_lock(&mf_mutex) here? I think these races start here.
> 
> Hi Miaohe,
> 
> Thank your for the review.
> > Consider that we put mutex_lock() here, and let's think about two concurrent
> calls of unpoison_memory(), then these events could be processed like below:
> 
>     CPU 0                             CPU 1
>     unpoison_memory
>     check PageHWPoison // true
>                                       unpoison_memory
>                                       check PageHWPoison  // true
>     mutex_lock
>     get_hwpoison_page
>     TestClearPageHWPoison
>     put_page
>     put_page // freed
>     mutex_unlock
> 
>     // the unpoisoned page can be used for allocation
> 
>                                       mutex_lock
>                                       get_hwpoison_page // succeeds
>                                       ... // unpoison the !PageHWPoison page !?
> 
> 
> So I thought that we had better do the prechecks in mf_mutex.  Maybe the 2nd
> unpoison_memory() just get and put the page refcount by 1 even in this race,
> so the impact is not so big, but I feel like avoiding "unpoison the
> !PageHWPoison page" situation.
> 
> Does it make sense for you?

Yes, I missed the races inside unpoison_memory() itself.
Many thanks for your detailed explanation!

> 
> Thanks,
> Naoya Horiguchi
>
diff mbox series

Patch

diff --git v5.13-rc5/mm/memory-failure.c v5.13-rc5_patched/mm/memory-failure.c
index ae30fd6d575a..280eb6d6dd15 100644
--- v5.13-rc5/mm/memory-failure.c
+++ v5.13-rc5_patched/mm/memory-failure.c
@@ -1583,6 +1583,8 @@  static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 	return rc;
 }
 
+static DEFINE_MUTEX(mf_mutex);
+
 /**
  * memory_failure - Handle memory failure of a page.
  * @pfn: Page Number of the corrupted page
@@ -1609,7 +1611,6 @@  int memory_failure(unsigned long pfn, int flags)
 	int res = 0;
 	unsigned long page_flags;
 	bool retry = true;
-	static DEFINE_MUTEX(mf_mutex);
 
 	if (!sysctl_memory_failure_recovery)
 		panic("Memory failure on page %lx", pfn);
@@ -1918,6 +1919,7 @@  int unpoison_memory(unsigned long pfn)
 	struct page *page;
 	struct page *p;
 	int freeit = 0;
+	int ret = 0;
 	unsigned long flags = 0;
 	static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
 					DEFAULT_RATELIMIT_BURST);
@@ -1928,28 +1930,30 @@  int unpoison_memory(unsigned long pfn)
 	p = pfn_to_page(pfn);
 	page = compound_head(p);
 
+	mutex_lock(&mf_mutex);
+
 	if (!PageHWPoison(p)) {
 		unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
 				 pfn, &unpoison_rs);
-		return 0;
+		goto unlock_mutex;
 	}
 
 	if (page_count(page) > 1) {
 		unpoison_pr_info("Unpoison: Someone grabs the hwpoison page %#lx\n",
 				 pfn, &unpoison_rs);
-		return 0;
+		goto unlock_mutex;
 	}
 
 	if (page_mapped(page)) {
 		unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
 				 pfn, &unpoison_rs);
-		return 0;
+		goto unlock_mutex;
 	}
 
 	if (page_mapping(page)) {
 		unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n",
 				 pfn, &unpoison_rs);
-		return 0;
+		goto unlock_mutex;
 	}
 
 	/*
@@ -1960,7 +1964,7 @@  int unpoison_memory(unsigned long pfn)
 	if (!PageHuge(page) && PageTransHuge(page)) {
 		unpoison_pr_info("Unpoison: Memory failure is now running on %#lx\n",
 				 pfn, &unpoison_rs);
-		return 0;
+		goto unlock_mutex;
 	}
 
 	if (!get_hwpoison_page(p, flags)) {
@@ -1968,7 +1972,7 @@  int unpoison_memory(unsigned long pfn)
 			num_poisoned_pages_dec();
 		unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
 				 pfn, &unpoison_rs);
-		return 0;
+		goto unlock_mutex;
 	}
 
 	lock_page(page);
@@ -1990,7 +1994,9 @@  int unpoison_memory(unsigned long pfn)
 	if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1))
 		put_page(page);
 
-	return 0;
+unlock_mutex:
+	mutex_unlock(&mf_mutex);
+	return ret;
 }
 EXPORT_SYMBOL(unpoison_memory);
 
@@ -2171,6 +2177,8 @@  int soft_offline_page(unsigned long pfn, int flags)
 		return -EIO;
 	}
 
+	mutex_lock(&mf_mutex);
+
 	if (PageHWPoison(page)) {
 		pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
 		put_ref_page(ref_page);
@@ -2194,5 +2202,7 @@  int soft_offline_page(unsigned long pfn, int flags)
 			 __func__, pfn, page->flags, &page->flags);
 	}
 
+	mutex_unlock(&mf_mutex);
+
 	return ret;
 }