Message ID | 20170517152336.6052-2-punit.agrawal@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, May 17, 2017 at 04:23:34PM +0100, Punit Agrawal wrote: > When memory failure is enabled, a poisoned hugepage pte is marked as a > swap entry. huge_pte_offset() does not return the poisoned page table > entries when it encounters PUD/PMD hugepages. > > This behaviour of huge_pte_offset() leads to error such as below when > munmap is called on poisoned hugepages. > > [ 344.165544] mm/pgtable-generic.c:33: bad pmd 000000083af00074. > > Fix huge_pte_offset() to return the poisoned pte which is then > appropriately handled by the generic layer code. > > Signed-off-by: Punit Agrawal <punit.agrawal@arm.com> > Acked-by: Steve Capper <steve.capper@arm.com> > Cc: Catalin Marinas <catalin.marinas@arm.com> > Cc: David Woods <dwoods@mellanox.com> > --- > arch/arm64/include/asm/pgtable.h | 2 +- > arch/arm64/mm/hugetlbpage.c | 29 ++++++++++------------------- > 2 files changed, 11 insertions(+), 20 deletions(-) > > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index c213fdbd056c..6eae342ced6b 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -441,7 +441,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) > > #define pud_none(pud) (!pud_val(pud)) > #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) > -#define pud_present(pud) (pud_val(pud)) > +#define pud_present(pud) pte_present(pud_pte(pud)) > > static inline void set_pud(pud_t *pudp, pud_t pud) > { > diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c > index 7514a000e361..69b8200b1cfd 100644 > --- a/arch/arm64/mm/hugetlbpage.c > +++ b/arch/arm64/mm/hugetlbpage.c > @@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) > { > pgd_t *pgd; > pud_t *pud; > - pmd_t *pmd = NULL; > - pte_t *pte = NULL; > + pmd_t *pmd; > > pgd = pgd_offset(mm, addr); > pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); > if (!pgd_present(*pgd)) > return NULL; > + > pud = pud_offset(pgd, addr); > - if (!pud_present(*pud)) > + if (pud_none(*pud)) > return NULL; Do you actually need this special case? > - > - if (pud_huge(*pud)) > + /* swap or huge page */ > + if (!pud_present(*pud) || pud_huge(*pud)) ... couldn't you just add a '|| pud_none(*pud)' in here? > return (pte_t *)pud; > + /* table; check the next level */ > + > pmd = pmd_offset(pud, addr); > - if (!pmd_present(*pmd)) > + if (pmd_none(*pmd)) > return NULL; > - > - if (pte_cont(pmd_pte(*pmd))) { > - pmd = pmd_offset( > - pud, (addr & CONT_PMD_MASK)); > - return (pte_t *)pmd; > - } > - if (pmd_huge(*pmd)) > + if (!pmd_present(*pmd) || pmd_huge(*pmd)) Similarly here. Will
On Wed, Jun 07, 2017 at 02:47:32PM +0100, Will Deacon wrote: > On Wed, May 17, 2017 at 04:23:34PM +0100, Punit Agrawal wrote: > > --- a/arch/arm64/mm/hugetlbpage.c > > +++ b/arch/arm64/mm/hugetlbpage.c > > @@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) > > { > > pgd_t *pgd; > > pud_t *pud; > > - pmd_t *pmd = NULL; > > - pte_t *pte = NULL; > > + pmd_t *pmd; > > > > pgd = pgd_offset(mm, addr); > > pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); > > if (!pgd_present(*pgd)) > > return NULL; > > + > > pud = pud_offset(pgd, addr); > > - if (!pud_present(*pud)) > > + if (pud_none(*pud)) > > return NULL; > > Do you actually need this special case? > > > - > > - if (pud_huge(*pud)) > > + /* swap or huge page */ > > + if (!pud_present(*pud) || pud_huge(*pud)) > > ... couldn't you just add a '|| pud_none(*pud)' in here? > > > return (pte_t *)pud; But then you no longer return NULL if *pud == 0.
On Wed, Jun 07, 2017 at 03:30:37PM +0100, Catalin Marinas wrote: > On Wed, Jun 07, 2017 at 02:47:32PM +0100, Will Deacon wrote: > > On Wed, May 17, 2017 at 04:23:34PM +0100, Punit Agrawal wrote: > > > --- a/arch/arm64/mm/hugetlbpage.c > > > +++ b/arch/arm64/mm/hugetlbpage.c > > > @@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) > > > { > > > pgd_t *pgd; > > > pud_t *pud; > > > - pmd_t *pmd = NULL; > > > - pte_t *pte = NULL; > > > + pmd_t *pmd; > > > > > > pgd = pgd_offset(mm, addr); > > > pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); > > > if (!pgd_present(*pgd)) > > > return NULL; > > > + > > > pud = pud_offset(pgd, addr); > > > - if (!pud_present(*pud)) > > > + if (pud_none(*pud)) > > > return NULL; > > > > Do you actually need this special case? > > > > > - > > > - if (pud_huge(*pud)) > > > + /* swap or huge page */ > > > + if (!pud_present(*pud) || pud_huge(*pud)) > > > > ... couldn't you just add a '|| pud_none(*pud)' in here? > > > > > return (pte_t *)pud; > > But then you no longer return NULL if *pud == 0. Does that actually matter? The bits of hugetlb code I looked at will deferenced the returned pud and handle the huge_pte_none case correctly. Will
On Wed, May 17, 2017 at 04:23:34PM +0100, Punit Agrawal wrote: > When memory failure is enabled, a poisoned hugepage pte is marked as a > swap entry. huge_pte_offset() does not return the poisoned page table > entries when it encounters PUD/PMD hugepages. > > This behaviour of huge_pte_offset() leads to error such as below when > munmap is called on poisoned hugepages. > > [ 344.165544] mm/pgtable-generic.c:33: bad pmd 000000083af00074. > > Fix huge_pte_offset() to return the poisoned pte which is then > appropriately handled by the generic layer code. > > Signed-off-by: Punit Agrawal <punit.agrawal@arm.com> > Acked-by: Steve Capper <steve.capper@arm.com> > Cc: Catalin Marinas <catalin.marinas@arm.com> > Cc: David Woods <dwoods@mellanox.com> Since the patch matches my suggestions in v1: Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Will Deacon <will.deacon@arm.com> writes: > On Wed, Jun 07, 2017 at 03:30:37PM +0100, Catalin Marinas wrote: >> On Wed, Jun 07, 2017 at 02:47:32PM +0100, Will Deacon wrote: >> > On Wed, May 17, 2017 at 04:23:34PM +0100, Punit Agrawal wrote: >> > > --- a/arch/arm64/mm/hugetlbpage.c >> > > +++ b/arch/arm64/mm/hugetlbpage.c >> > > @@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) >> > > { >> > > pgd_t *pgd; >> > > pud_t *pud; >> > > - pmd_t *pmd = NULL; >> > > - pte_t *pte = NULL; >> > > + pmd_t *pmd; >> > > >> > > pgd = pgd_offset(mm, addr); >> > > pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); >> > > if (!pgd_present(*pgd)) >> > > return NULL; >> > > + >> > > pud = pud_offset(pgd, addr); >> > > - if (!pud_present(*pud)) >> > > + if (pud_none(*pud)) >> > > return NULL; >> > >> > Do you actually need this special case? >> > >> > > - >> > > - if (pud_huge(*pud)) >> > > + /* swap or huge page */ >> > > + if (!pud_present(*pud) || pud_huge(*pud)) >> > >> > ... couldn't you just add a '|| pud_none(*pud)' in here? >> > I think an earlier version took this approach but... >> > > return (pte_t *)pud; >> >> But then you no longer return NULL if *pud == 0. > > Does that actually matter? The bits of hugetlb code I looked at will > deferenced the returned pud and handle the huge_pte_none case correctly. For hugetlb fault handling (hugetlb_fault()), returning NULL vs pointer to the pud/pmd results in different behaviour. If we return the pud when pud_none(), then we lose the resulting hugepage size check we get from huge_pte_alloc(). > > Will
On Wed, Jun 07, 2017 at 04:32:28PM +0100, Punit Agrawal wrote: > Will Deacon <will.deacon@arm.com> writes: > > > On Wed, Jun 07, 2017 at 03:30:37PM +0100, Catalin Marinas wrote: > >> On Wed, Jun 07, 2017 at 02:47:32PM +0100, Will Deacon wrote: > >> > On Wed, May 17, 2017 at 04:23:34PM +0100, Punit Agrawal wrote: > >> > > --- a/arch/arm64/mm/hugetlbpage.c > >> > > +++ b/arch/arm64/mm/hugetlbpage.c > >> > > @@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) > >> > > { > >> > > pgd_t *pgd; > >> > > pud_t *pud; > >> > > - pmd_t *pmd = NULL; > >> > > - pte_t *pte = NULL; > >> > > + pmd_t *pmd; > >> > > > >> > > pgd = pgd_offset(mm, addr); > >> > > pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); > >> > > if (!pgd_present(*pgd)) > >> > > return NULL; > >> > > + > >> > > pud = pud_offset(pgd, addr); > >> > > - if (!pud_present(*pud)) > >> > > + if (pud_none(*pud)) > >> > > return NULL; > >> > > >> > Do you actually need this special case? > >> > > >> > > - > >> > > - if (pud_huge(*pud)) > >> > > + /* swap or huge page */ > >> > > + if (!pud_present(*pud) || pud_huge(*pud)) > >> > > >> > ... couldn't you just add a '|| pud_none(*pud)' in here? > >> > > > I think an earlier version took this approach but... > > >> > > return (pte_t *)pud; > >> > >> But then you no longer return NULL if *pud == 0. > > > > Does that actually matter? The bits of hugetlb code I looked at will > > deferenced the returned pud and handle the huge_pte_none case correctly. > > For hugetlb fault handling (hugetlb_fault()), returning NULL vs pointer > to the pud/pmd results in different behaviour. If we return the pud when > pud_none(), then we lose the resulting hugepage size check we get from > huge_pte_alloc(). Ok, so does that mean that many of the huge_pte_none checks in mm/hugetlb.c that operate on a huge_ptep_get of non-NULL output from huge_pte_offset are actually redundant? Will
On Wed, Jun 07, 2017 at 04:32:28PM +0100, Punit Agrawal wrote: > Will Deacon <will.deacon@arm.com> writes: > > On Wed, Jun 07, 2017 at 03:30:37PM +0100, Catalin Marinas wrote: > >> On Wed, Jun 07, 2017 at 02:47:32PM +0100, Will Deacon wrote: > >> > On Wed, May 17, 2017 at 04:23:34PM +0100, Punit Agrawal wrote: > >> > > --- a/arch/arm64/mm/hugetlbpage.c > >> > > +++ b/arch/arm64/mm/hugetlbpage.c > >> > > @@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) > >> > > { > >> > > pgd_t *pgd; > >> > > pud_t *pud; > >> > > - pmd_t *pmd = NULL; > >> > > - pte_t *pte = NULL; > >> > > + pmd_t *pmd; > >> > > > >> > > pgd = pgd_offset(mm, addr); > >> > > pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); > >> > > if (!pgd_present(*pgd)) > >> > > return NULL; > >> > > + > >> > > pud = pud_offset(pgd, addr); > >> > > - if (!pud_present(*pud)) > >> > > + if (pud_none(*pud)) > >> > > return NULL; > >> > > >> > Do you actually need this special case? > >> > > >> > > - > >> > > - if (pud_huge(*pud)) > >> > > + /* swap or huge page */ > >> > > + if (!pud_present(*pud) || pud_huge(*pud)) > >> > > >> > ... couldn't you just add a '|| pud_none(*pud)' in here? > >> > > > I think an earlier version took this approach but... > > >> > > return (pte_t *)pud; > >> > >> But then you no longer return NULL if *pud == 0. > > > > Does that actually matter? The bits of hugetlb code I looked at will > > deferenced the returned pud and handle the huge_pte_none case correctly. > > For hugetlb fault handling (hugetlb_fault()), returning NULL vs pointer > to the pud/pmd results in different behaviour. If we return the pud when > pud_none(), then we lose the resulting hugepage size check we get from > huge_pte_alloc(). At a quick look, there are a few other places where not returning NULL has some other effects (though I don't think any of them are fatal): - copy_huge_tlb_page_range() - unnecessary allocation of a destination pud - huge_pmd_share() - do we actually need the subsequent get_page()? - page_vma_mapped_walk() - it even has a comment: "when pud is not present, pte will be NULL". Now, that's no longer true with swap entries but we'd never return NULL for a pud_none() case Current code behaviour is to return NULL when !p*d_present(). We are slightly relaxing this for swap entries while still returning NULL for the p*d_none() case but I wouldn't go that far as to never return NULL here.
Will Deacon <will.deacon@arm.com> writes: > On Wed, Jun 07, 2017 at 04:32:28PM +0100, Punit Agrawal wrote: >> Will Deacon <will.deacon@arm.com> writes: >> >> > On Wed, Jun 07, 2017 at 03:30:37PM +0100, Catalin Marinas wrote: >> >> On Wed, Jun 07, 2017 at 02:47:32PM +0100, Will Deacon wrote: >> >> > On Wed, May 17, 2017 at 04:23:34PM +0100, Punit Agrawal wrote: >> >> > > --- a/arch/arm64/mm/hugetlbpage.c >> >> > > +++ b/arch/arm64/mm/hugetlbpage.c >> >> > > @@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) >> >> > > { >> >> > > pgd_t *pgd; >> >> > > pud_t *pud; >> >> > > - pmd_t *pmd = NULL; >> >> > > - pte_t *pte = NULL; >> >> > > + pmd_t *pmd; >> >> > > >> >> > > pgd = pgd_offset(mm, addr); >> >> > > pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); >> >> > > if (!pgd_present(*pgd)) >> >> > > return NULL; >> >> > > + >> >> > > pud = pud_offset(pgd, addr); >> >> > > - if (!pud_present(*pud)) >> >> > > + if (pud_none(*pud)) >> >> > > return NULL; >> >> > >> >> > Do you actually need this special case? >> >> > >> >> > > - >> >> > > - if (pud_huge(*pud)) >> >> > > + /* swap or huge page */ >> >> > > + if (!pud_present(*pud) || pud_huge(*pud)) >> >> > >> >> > ... couldn't you just add a '|| pud_none(*pud)' in here? >> >> > >> >> I think an earlier version took this approach but... >> >> >> > > return (pte_t *)pud; >> >> >> >> But then you no longer return NULL if *pud == 0. >> > >> > Does that actually matter? The bits of hugetlb code I looked at will >> > deferenced the returned pud and handle the huge_pte_none case correctly. >> >> For hugetlb fault handling (hugetlb_fault()), returning NULL vs pointer >> to the pud/pmd results in different behaviour. If we return the pud when >> pud_none(), then we lose the resulting hugepage size check we get from >> huge_pte_alloc(). > > Ok, so does that mean that many of the huge_pte_none checks in mm/hugetlb.c > that operate on a huge_ptep_get of non-NULL output from huge_pte_offset are > actually redundant? Summarising our offline discussion - the semantics of huge_pte_offset() are unclear in terms of when a pointer to (p*d) is returned vs when to return NULL. As part of enabling contiguous hugepage support[0][1], I have a patch to add a size argument to huge_pte_offset() that can then help disambiguate when we have a valid pte* vs when we have an error (NULL). I'll separately look at clarifying the semantics of the generic version of huge_pte_offse() and potentially also look at unifying the huge_pte_offset() implementations across the various architectures. [0] https://lkml.org/lkml/2017/5/24/463 [1] https://www.spinics.net/lists/arm-kernel/msg583367.html > > Will
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c213fdbd056c..6eae342ced6b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -441,7 +441,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) -#define pud_present(pud) (pud_val(pud)) +#define pud_present(pud) pte_present(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 7514a000e361..69b8200b1cfd 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; pud_t *pud; - pmd_t *pmd = NULL; - pte_t *pte = NULL; + pmd_t *pmd; pgd = pgd_offset(mm, addr); pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); if (!pgd_present(*pgd)) return NULL; + pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) + if (pud_none(*pud)) return NULL; - - if (pud_huge(*pud)) + /* swap or huge page */ + if (!pud_present(*pud) || pud_huge(*pud)) return (pte_t *)pud; + /* table; check the next level */ + pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) + if (pmd_none(*pmd)) return NULL; - - if (pte_cont(pmd_pte(*pmd))) { - pmd = pmd_offset( - pud, (addr & CONT_PMD_MASK)); - return (pte_t *)pmd; - } - if (pmd_huge(*pmd)) + if (!pmd_present(*pmd) || pmd_huge(*pmd)) return (pte_t *)pmd; - pte = pte_offset_kernel(pmd, addr); - if (pte_present(*pte) && pte_cont(*pte)) { - pte = pte_offset_kernel( - pmd, (addr & CONT_PTE_MASK)); - return pte; - } + return NULL; }