diff mbox series

[hmm,1/8] mm/hmm: add missing unmaps of the ptep during hmm_vma_handle_pte()

Message ID 20200311183506.3997-2-jgg@ziepe.ca (mailing list archive)
State New, archived
Headers show
Series Various error case bug fixes for hmm_range_fault() | expand

Commit Message

Jason Gunthorpe March 11, 2020, 6:34 p.m. UTC
From: Jason Gunthorpe <jgg@mellanox.com>

Many of the direct returns of error skipped doing the pte_unmap(). All non
zero exit paths must unmap the pte.

The pte_unmap() is split unnaturally like this because some of the error
exit paths trigger a sleep and must release the lock before sleeping.

Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed filesystem")
Fixes: 53f5c3f489ec ("mm/hmm: factor out pte and pmd handling to simplify hmm_vma_walk_pmd()")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 mm/hmm.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

Comments

Ralph Campbell March 12, 2020, 1:28 a.m. UTC | #1
On 3/11/20 11:34 AM, Jason Gunthorpe wrote:
> From: Jason Gunthorpe <jgg@mellanox.com>
> 
> Many of the direct returns of error skipped doing the pte_unmap(). All non
> zero exit paths must unmap the pte.
> 
> The pte_unmap() is split unnaturally like this because some of the error
> exit paths trigger a sleep and must release the lock before sleeping.
> 
> Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed filesystem")
> Fixes: 53f5c3f489ec ("mm/hmm: factor out pte and pmd handling to simplify hmm_vma_walk_pmd()")
> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>

The changes look OK to me but one issue noted below.
In any case, you can add:
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>

> ---
>   mm/hmm.c | 8 ++++++--
>   1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 72e5a6d9a41756..35f85424176d14 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -325,6 +325,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
>   		}
>   
>   		/* Report error for everything else */
> +		pte_unmap(ptep);
>   		*pfn = range->values[HMM_PFN_ERROR];
>   		return -EFAULT;
>   	} else {
> @@ -339,10 +340,13 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
>   	if (pte_devmap(pte)) {
>   		hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte),
>   					      hmm_vma_walk->pgmap);
> -		if (unlikely(!hmm_vma_walk->pgmap))
> +		if (unlikely(!hmm_vma_walk->pgmap)) {
> +			pte_unmap(ptep);
>   			return -EBUSY;
> +		}
>   	} else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) {
>   		if (!is_zero_pfn(pte_pfn(pte))) {
> +			pte_unmap(ptep);
>   			*pfn = range->values[HMM_PFN_SPECIAL];
>   			return -EFAULT;
>   		}
> @@ -437,7 +441,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
>   
>   		r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, &pfns[i]);
>   		if (r) {
> -			/* hmm_vma_handle_pte() did unmap pte directory */
> +			/* hmm_vma_handle_pte() did pte_unmap() */
>   			hmm_vma_walk->last = addr;
>   			return r;
>   		}
> 

I think there is a case where hmm_vma_handle_pte() is called, a fault is requested,
pte_unmap() and hmm_vma_walk_hole_() are called, the latter returns zero (the fault
was handled OK), but now the page table is unmapped for successive loop iterations
and pte_unmap(ptep - 1) is called at the loop end.
Since this isn't an issue on x86_64 but is on x86_32, I can see how it could be missed.
Jason Gunthorpe March 12, 2020, 2:24 p.m. UTC | #2
On Wed, Mar 11, 2020 at 06:28:30PM -0700, Ralph Campbell wrote:
> >   mm/hmm.c | 8 ++++++--
> >   1 file changed, 6 insertions(+), 2 deletions(-)
> > 
> > diff --git a/mm/hmm.c b/mm/hmm.c
> > index 72e5a6d9a41756..35f85424176d14 100644
> > +++ b/mm/hmm.c
> > @@ -325,6 +325,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
> >   		}
> >   		/* Report error for everything else */
> > +		pte_unmap(ptep);
> >   		*pfn = range->values[HMM_PFN_ERROR];
> >   		return -EFAULT;
> >   	} else {
> > @@ -339,10 +340,13 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
> >   	if (pte_devmap(pte)) {
> >   		hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte),
> >   					      hmm_vma_walk->pgmap);
> > -		if (unlikely(!hmm_vma_walk->pgmap))
> > +		if (unlikely(!hmm_vma_walk->pgmap)) {
> > +			pte_unmap(ptep);
> >   			return -EBUSY;
> > +		}
> >   	} else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) {
> >   		if (!is_zero_pfn(pte_pfn(pte))) {
> > +			pte_unmap(ptep);
> >   			*pfn = range->values[HMM_PFN_SPECIAL];
> >   			return -EFAULT;
> >   		}
> > @@ -437,7 +441,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
> >   		r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, &pfns[i]);
> >   		if (r) {
> > -			/* hmm_vma_handle_pte() did unmap pte directory */
> > +			/* hmm_vma_handle_pte() did pte_unmap() */
> >   			hmm_vma_walk->last = addr;
> >   			return r;
> >   		}
> > 
> 
> I think there is a case where hmm_vma_handle_pte() is called, a fault is requested,
> pte_unmap() and hmm_vma_walk_hole_() are called, the latter returns zero (the fault
> was handled OK)

Not quite, hmm_vma_walk_hole_() never returns 0 if called with fault:

        return (fault || write_fault) ? -EBUSY : 0;

And all the call sites of hmm_vma_walk_hole_() in hmm_vma_handle_pte()
are structured as:

                if (fault || write_fault)
                        goto fault;
fault:
        return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);

So, it never returns 0.

I already made a patch making this less twisty while fixing something
else:

https://github.com/jgunthorpe/linux/commit/078e10ca5919f2c263c245784fb5fe63ddbb61f4

Thanks,
Jason
Christoph Hellwig March 16, 2020, 8:58 a.m. UTC | #3
On Wed, Mar 11, 2020 at 03:34:59PM -0300, Jason Gunthorpe wrote:
> From: Jason Gunthorpe <jgg@mellanox.com>
> 
> Many of the direct returns of error skipped doing the pte_unmap(). All non
> zero exit paths must unmap the pte.
> 
> The pte_unmap() is split unnaturally like this because some of the error
> exit paths trigger a sleep and must release the lock before sleeping.
> 
> Fixes: 992de9a8b751 ("mm/hmm: allow to mirror vma of a file on a DAX backed filesystem")
> Fixes: 53f5c3f489ec ("mm/hmm: factor out pte and pmd handling to simplify hmm_vma_walk_pmd()")
> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

Probably papered over by the fact that hmm doesn't work on architectures
where pte_unmap isn't a noop right now..
diff mbox series

Patch

diff --git a/mm/hmm.c b/mm/hmm.c
index 72e5a6d9a41756..35f85424176d14 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -325,6 +325,7 @@  static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 		}
 
 		/* Report error for everything else */
+		pte_unmap(ptep);
 		*pfn = range->values[HMM_PFN_ERROR];
 		return -EFAULT;
 	} else {
@@ -339,10 +340,13 @@  static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 	if (pte_devmap(pte)) {
 		hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte),
 					      hmm_vma_walk->pgmap);
-		if (unlikely(!hmm_vma_walk->pgmap))
+		if (unlikely(!hmm_vma_walk->pgmap)) {
+			pte_unmap(ptep);
 			return -EBUSY;
+		}
 	} else if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL) && pte_special(pte)) {
 		if (!is_zero_pfn(pte_pfn(pte))) {
+			pte_unmap(ptep);
 			*pfn = range->values[HMM_PFN_SPECIAL];
 			return -EFAULT;
 		}
@@ -437,7 +441,7 @@  static int hmm_vma_walk_pmd(pmd_t *pmdp,
 
 		r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, &pfns[i]);
 		if (r) {
-			/* hmm_vma_handle_pte() did unmap pte directory */
+			/* hmm_vma_handle_pte() did pte_unmap() */
 			hmm_vma_walk->last = addr;
 			return r;
 		}