diff mbox series

mm/hwpoison: Check the subpage, not the head page

Message ID 20220130013042.1906881-1-willy@infradead.org (mailing list archive)
State New
Headers show
Series mm/hwpoison: Check the subpage, not the head page | expand

Commit Message

Matthew Wilcox (Oracle) Jan. 30, 2022, 1:30 a.m. UTC
Hardware poison is tracked on a per-page basis, not on the head page.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/rmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Comments

David Rientjes Jan. 30, 2022, 8:58 p.m. UTC | #1
On Sun, 30 Jan 2022, Matthew Wilcox (Oracle) wrote:

> Hardware poison is tracked on a per-page basis, not on the head page.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/rmap.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 6a1e8c7f6213..09b08888120e 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1553,7 +1553,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
>  		/* Update high watermark before we lower rss */
>  		update_hiwater_rss(mm);
>  
> -		if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
> +		if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) {
>  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
>  			if (PageHuge(page)) {
>  				hugetlb_count_sub(compound_nr(page), mm);
> @@ -1873,7 +1873,7 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
>  			 * memory are supported.
>  			 */
>  			subpage = page;
> -		} else if (PageHWPoison(page)) {
> +		} else if (PageHWPoison(subpage)) {
>  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
>  			if (PageHuge(page)) {
>  				hugetlb_count_sub(compound_nr(page), mm);

This looks correct.  Correct me if I'm wrong that this is for consistency 
and cleanup and that there is no bug being fixed by this, however.

Thanks!
Matthew Wilcox (Oracle) Jan. 30, 2022, 9:14 p.m. UTC | #2
On Sun, Jan 30, 2022 at 12:58:17PM -0800, David Rientjes wrote:
> On Sun, 30 Jan 2022, Matthew Wilcox (Oracle) wrote:
> 
> > Hardware poison is tracked on a per-page basis, not on the head page.
> > 
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > ---
> >  mm/rmap.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/mm/rmap.c b/mm/rmap.c
> > index 6a1e8c7f6213..09b08888120e 100644
> > --- a/mm/rmap.c
> > +++ b/mm/rmap.c
> > @@ -1553,7 +1553,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
> >  		/* Update high watermark before we lower rss */
> >  		update_hiwater_rss(mm);
> >  
> > -		if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
> > +		if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) {
> >  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
> >  			if (PageHuge(page)) {
> >  				hugetlb_count_sub(compound_nr(page), mm);
> > @@ -1873,7 +1873,7 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
> >  			 * memory are supported.
> >  			 */
> >  			subpage = page;
> > -		} else if (PageHWPoison(page)) {
> > +		} else if (PageHWPoison(subpage)) {
> >  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
> >  			if (PageHuge(page)) {
> >  				hugetlb_count_sub(compound_nr(page), mm);
> 
> This looks correct.  Correct me if I'm wrong that this is for consistency 
> and cleanup and that there is no bug being fixed by this, however.

Oh, no, I think there's a real bug here.  It's just that we're looking
at an uncommon & hence rarely-tested scenario -- a memory fault in the
middle of a THP (in mainline; obviously it'll be a little more common
with arbitrary sized folios).  I don't do HWPoison testing myself, so
this was by inspection and not from testing.  A scenario where things
would go wrong is a memory error on a non-head-page would go unnoticed
when migrating or unmapping.  Contrariwise, if there's a hardware error
on a head page, all the subpages get treated as poisoned, even though
they shouldn't be.
HORIGUCHI NAOYA(堀口 直也) Jan. 31, 2022, 5:44 a.m. UTC | #3
On Sun, Jan 30, 2022 at 09:14:21PM +0000, Matthew Wilcox wrote:
> On Sun, Jan 30, 2022 at 12:58:17PM -0800, David Rientjes wrote:
> > On Sun, 30 Jan 2022, Matthew Wilcox (Oracle) wrote:
> > 
> > > Hardware poison is tracked on a per-page basis, not on the head page.
> > > 
> > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > > ---
> > >  mm/rmap.c | 4 ++--
> > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/mm/rmap.c b/mm/rmap.c
> > > index 6a1e8c7f6213..09b08888120e 100644
> > > --- a/mm/rmap.c
> > > +++ b/mm/rmap.c
> > > @@ -1553,7 +1553,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
> > >  		/* Update high watermark before we lower rss */
> > >  		update_hiwater_rss(mm);
> > >  
> > > -		if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
> > > +		if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) {
> > >  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
> > >  			if (PageHuge(page)) {
> > >  				hugetlb_count_sub(compound_nr(page), mm);
> > > @@ -1873,7 +1873,7 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
> > >  			 * memory are supported.
> > >  			 */
> > >  			subpage = page;
> > > -		} else if (PageHWPoison(page)) {
> > > +		} else if (PageHWPoison(subpage)) {
> > >  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
> > >  			if (PageHuge(page)) {
> > >  				hugetlb_count_sub(compound_nr(page), mm);
> > 
> > This looks correct.  Correct me if I'm wrong that this is for consistency 
> > and cleanup and that there is no bug being fixed by this, however.
> 
> Oh, no, I think there's a real bug here.  It's just that we're looking
> at an uncommon & hence rarely-tested scenario -- a memory fault in the
> middle of a THP (in mainline; obviously it'll be a little more common
> with arbitrary sized folios).  I don't do HWPoison testing myself, so
> this was by inspection and not from testing.  A scenario where things
> would go wrong is a memory error on a non-head-page would go unnoticed
> when migrating or unmapping.  Contrariwise, if there's a hardware error
> on a head page, all the subpages get treated as poisoned, even though
> they shouldn't be.

Thank you for reporting.  As you point out, the current check does not
handle thp properly.  The reason of checking head page here is to handle
hwpoisoned hugetlb (which has PG_hwpoison on the head page even if the error
is on any of tail page).  So I think that the proper fix is to add a helper
function to check page type (normal, thp, or hugetlb) as well as PageHWPoison.

Thanks,
Naoya Horiguchi
Matthew Wilcox (Oracle) Jan. 31, 2022, 1:24 p.m. UTC | #4
On Mon, Jan 31, 2022 at 05:44:35AM +0000, HORIGUCHI NAOYA(堀口 直也) wrote:
> On Sun, Jan 30, 2022 at 09:14:21PM +0000, Matthew Wilcox wrote:
> > On Sun, Jan 30, 2022 at 12:58:17PM -0800, David Rientjes wrote:
> > > On Sun, 30 Jan 2022, Matthew Wilcox (Oracle) wrote:
> > > 
> > > > Hardware poison is tracked on a per-page basis, not on the head page.
> > > > 
> > > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > > > ---
> > > >  mm/rmap.c | 4 ++--
> > > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/mm/rmap.c b/mm/rmap.c
> > > > index 6a1e8c7f6213..09b08888120e 100644
> > > > --- a/mm/rmap.c
> > > > +++ b/mm/rmap.c
> > > > @@ -1553,7 +1553,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
> > > >  		/* Update high watermark before we lower rss */
> > > >  		update_hiwater_rss(mm);
> > > >  
> > > > -		if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
> > > > +		if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) {
> > > >  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
> > > >  			if (PageHuge(page)) {
> > > >  				hugetlb_count_sub(compound_nr(page), mm);
> > > > @@ -1873,7 +1873,7 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
> > > >  			 * memory are supported.
> > > >  			 */
> > > >  			subpage = page;
> > > > -		} else if (PageHWPoison(page)) {
> > > > +		} else if (PageHWPoison(subpage)) {
> > > >  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
> > > >  			if (PageHuge(page)) {
> > > >  				hugetlb_count_sub(compound_nr(page), mm);
> > > 
> > > This looks correct.  Correct me if I'm wrong that this is for consistency 
> > > and cleanup and that there is no bug being fixed by this, however.
> > 
> > Oh, no, I think there's a real bug here.  It's just that we're looking
> > at an uncommon & hence rarely-tested scenario -- a memory fault in the
> > middle of a THP (in mainline; obviously it'll be a little more common
> > with arbitrary sized folios).  I don't do HWPoison testing myself, so
> > this was by inspection and not from testing.  A scenario where things
> > would go wrong is a memory error on a non-head-page would go unnoticed
> > when migrating or unmapping.  Contrariwise, if there's a hardware error
> > on a head page, all the subpages get treated as poisoned, even though
> > they shouldn't be.
> 
> Thank you for reporting.  As you point out, the current check does not
> handle thp properly.  The reason of checking head page here is to handle
> hwpoisoned hugetlb (which has PG_hwpoison on the head page even if the error
> is on any of tail page).  So I think that the proper fix is to add a helper
> function to check page type (normal, thp, or hugetlb) as well as PageHWPoison.

I think this handles HugeTLB pages correctly:

                subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);

As I understand the HugeTLB code, pvmw.pte refers to the head page, not
the subpage (unlike a PTE-mapped THP page, where it would refer to the
precise page).  But I cheerfully admit that the intricacies of the
HugeTLB code are not something I'm an expert on.
Mike Kravetz Jan. 31, 2022, 7:24 p.m. UTC | #5
On 1/31/22 05:24, Matthew Wilcox wrote:
> On Mon, Jan 31, 2022 at 05:44:35AM +0000, HORIGUCHI NAOYA(堀口 直也) wrote:
>> On Sun, Jan 30, 2022 at 09:14:21PM +0000, Matthew Wilcox wrote:
>>> On Sun, Jan 30, 2022 at 12:58:17PM -0800, David Rientjes wrote:
>>>> On Sun, 30 Jan 2022, Matthew Wilcox (Oracle) wrote:
>>>>
>>>>> Hardware poison is tracked on a per-page basis, not on the head page.
>>>>>
>>>>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>>>>> ---
>>>>>  mm/rmap.c | 4 ++--
>>>>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/mm/rmap.c b/mm/rmap.c
>>>>> index 6a1e8c7f6213..09b08888120e 100644
>>>>> --- a/mm/rmap.c
>>>>> +++ b/mm/rmap.c
>>>>> @@ -1553,7 +1553,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
>>>>>  		/* Update high watermark before we lower rss */
>>>>>  		update_hiwater_rss(mm);
>>>>>  
>>>>> -		if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
>>>>> +		if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) {
>>>>>  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
>>>>>  			if (PageHuge(page)) {
>>>>>  				hugetlb_count_sub(compound_nr(page), mm);
>>>>> @@ -1873,7 +1873,7 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
>>>>>  			 * memory are supported.
>>>>>  			 */
>>>>>  			subpage = page;
>>>>> -		} else if (PageHWPoison(page)) {
>>>>> +		} else if (PageHWPoison(subpage)) {
>>>>>  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
>>>>>  			if (PageHuge(page)) {
>>>>>  				hugetlb_count_sub(compound_nr(page), mm);
>>>>
>>>> This looks correct.  Correct me if I'm wrong that this is for consistency 
>>>> and cleanup and that there is no bug being fixed by this, however.
>>>
>>> Oh, no, I think there's a real bug here.  It's just that we're looking
>>> at an uncommon & hence rarely-tested scenario -- a memory fault in the
>>> middle of a THP (in mainline; obviously it'll be a little more common
>>> with arbitrary sized folios).  I don't do HWPoison testing myself, so
>>> this was by inspection and not from testing.  A scenario where things
>>> would go wrong is a memory error on a non-head-page would go unnoticed
>>> when migrating or unmapping.  Contrariwise, if there's a hardware error
>>> on a head page, all the subpages get treated as poisoned, even though
>>> they shouldn't be.
>>
>> Thank you for reporting.  As you point out, the current check does not
>> handle thp properly.  The reason of checking head page here is to handle
>> hwpoisoned hugetlb (which has PG_hwpoison on the head page even if the error
>> is on any of tail page).  So I think that the proper fix is to add a helper
>> function to check page type (normal, thp, or hugetlb) as well as PageHWPoison.
> 
> I think this handles HugeTLB pages correctly:
> 
>                 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
> 
> As I understand the HugeTLB code, pvmw.pte refers to the head page, not
> the subpage (unlike a PTE-mapped THP page, where it would refer to the
> precise page).  But I cheerfully admit that the intricacies of the
> HugeTLB code are not something I'm an expert on.

Your understanding is correct.  Here is the comment for the routine which
sets pvmw.pte.

 * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry
 * regardless of which page table level the page is mapped at. @pvmw->pmd is
 * NULL.

Just another thought.  With new hugetlb vmmemmap optimizations, it is not
possible to set poison on hugetlb tail pages until after allocating struct
pages.
HORIGUCHI NAOYA(堀口 直也) Jan. 31, 2022, 11:04 p.m. UTC | #6
On Mon, Jan 31, 2022 at 11:24:45AM -0800, Mike Kravetz wrote:
> On 1/31/22 05:24, Matthew Wilcox wrote:
> > On Mon, Jan 31, 2022 at 05:44:35AM +0000, HORIGUCHI NAOYA(堀口 直也) wrote:
> >> On Sun, Jan 30, 2022 at 09:14:21PM +0000, Matthew Wilcox wrote:
> >>> On Sun, Jan 30, 2022 at 12:58:17PM -0800, David Rientjes wrote:
> >>>> On Sun, 30 Jan 2022, Matthew Wilcox (Oracle) wrote:
> >>>>
> >>>>> Hardware poison is tracked on a per-page basis, not on the head page.
> >>>>>
> >>>>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> >>>>> ---
> >>>>>  mm/rmap.c | 4 ++--
> >>>>>  1 file changed, 2 insertions(+), 2 deletions(-)
> >>>>>
> >>>>> diff --git a/mm/rmap.c b/mm/rmap.c
> >>>>> index 6a1e8c7f6213..09b08888120e 100644
> >>>>> --- a/mm/rmap.c
> >>>>> +++ b/mm/rmap.c
> >>>>> @@ -1553,7 +1553,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
> >>>>>  		/* Update high watermark before we lower rss */
> >>>>>  		update_hiwater_rss(mm);
> >>>>>  
> >>>>> -		if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
> >>>>> +		if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) {
> >>>>>  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
> >>>>>  			if (PageHuge(page)) {
> >>>>>  				hugetlb_count_sub(compound_nr(page), mm);
> >>>>> @@ -1873,7 +1873,7 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
> >>>>>  			 * memory are supported.
> >>>>>  			 */
> >>>>>  			subpage = page;
> >>>>> -		} else if (PageHWPoison(page)) {
> >>>>> +		} else if (PageHWPoison(subpage)) {
> >>>>>  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
> >>>>>  			if (PageHuge(page)) {
> >>>>>  				hugetlb_count_sub(compound_nr(page), mm);
> >>>>
> >>>> This looks correct.  Correct me if I'm wrong that this is for consistency 
> >>>> and cleanup and that there is no bug being fixed by this, however.
> >>>
> >>> Oh, no, I think there's a real bug here.  It's just that we're looking
> >>> at an uncommon & hence rarely-tested scenario -- a memory fault in the
> >>> middle of a THP (in mainline; obviously it'll be a little more common
> >>> with arbitrary sized folios).  I don't do HWPoison testing myself, so
> >>> this was by inspection and not from testing.  A scenario where things
> >>> would go wrong is a memory error on a non-head-page would go unnoticed
> >>> when migrating or unmapping.  Contrariwise, if there's a hardware error
> >>> on a head page, all the subpages get treated as poisoned, even though
> >>> they shouldn't be.
> >>
> >> Thank you for reporting.  As you point out, the current check does not
> >> handle thp properly.  The reason of checking head page here is to handle
> >> hwpoisoned hugetlb (which has PG_hwpoison on the head page even if the error
> >> is on any of tail page).  So I think that the proper fix is to add a helper
> >> function to check page type (normal, thp, or hugetlb) as well as PageHWPoison.
> > 
> > I think this handles HugeTLB pages correctly:
> > 
> >                 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
> > 
> > As I understand the HugeTLB code, pvmw.pte refers to the head page, not
> > the subpage (unlike a PTE-mapped THP page, where it would refer to the
> > precise page).  But I cheerfully admit that the intricacies of the
> > HugeTLB code are not something I'm an expert on.

Sorry, you're right.

> 
> Your understanding is correct.  Here is the comment for the routine which
> sets pvmw.pte.
> 
>  * For HugeTLB pages, @pvmw->pte is set to the relevant page table entry
>  * regardless of which page table level the page is mapped at. @pvmw->pmd is
>  * NULL.

Thank you for the clarification.

> 
> Just another thought.  With new hugetlb vmmemmap optimizations, it is not
> possible to set poison on hugetlb tail pages until after allocating struct
> pages.

So the proposed patch is fine to me.

Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Yang Shi Feb. 1, 2022, 6:29 p.m. UTC | #7
On Sat, Jan 29, 2022 at 5:30 PM Matthew Wilcox (Oracle)
<willy@infradead.org> wrote:
>
> Hardware poison is tracked on a per-page basis, not on the head page.

Looks correct to me. Reviewed-by: Yang Shi <shy828301@gmail.com>

>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  mm/rmap.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 6a1e8c7f6213..09b08888120e 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1553,7 +1553,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
>                 /* Update high watermark before we lower rss */
>                 update_hiwater_rss(mm);
>
> -               if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
> +               if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) {
>                         pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
>                         if (PageHuge(page)) {
>                                 hugetlb_count_sub(compound_nr(page), mm);
> @@ -1873,7 +1873,7 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
>                          * memory are supported.
>                          */
>                         subpage = page;
> -               } else if (PageHWPoison(page)) {
> +               } else if (PageHWPoison(subpage)) {
>                         pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
>                         if (PageHuge(page)) {
>                                 hugetlb_count_sub(compound_nr(page), mm);
> --
> 2.34.1
>
>
diff mbox series

Patch

diff --git a/mm/rmap.c b/mm/rmap.c
index 6a1e8c7f6213..09b08888120e 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1553,7 +1553,7 @@  static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		/* Update high watermark before we lower rss */
 		update_hiwater_rss(mm);
 
-		if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
+		if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) {
 			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
 			if (PageHuge(page)) {
 				hugetlb_count_sub(compound_nr(page), mm);
@@ -1873,7 +1873,7 @@  static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,
 			 * memory are supported.
 			 */
 			subpage = page;
-		} else if (PageHWPoison(page)) {
+		} else if (PageHWPoison(subpage)) {
 			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
 			if (PageHuge(page)) {
 				hugetlb_count_sub(compound_nr(page), mm);