diff mbox series

[v2,5/9] mm: Move FAULT_FLAG_VMA_LOCK check down in handle_pte_fault()

Message ID 20230711202047.3818697-6-willy@infradead.org (mailing list archive)
State New
Headers show
Series Avoid the mmap lock for fault-around | expand

Commit Message

Matthew Wilcox July 11, 2023, 8:20 p.m. UTC
Call do_pte_missing() under the VMA lock ... then immediately retry
in do_fault().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 mm/memory.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

Comments

Suren Baghdasaryan July 14, 2023, 3:26 a.m. UTC | #1
On Tue, Jul 11, 2023 at 1:21 PM Matthew Wilcox (Oracle)
<willy@infradead.org> wrote:
>
> Call do_pte_missing() under the VMA lock ... then immediately retry
> in do_fault().
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>

Reviewed-by: Suren Baghdasaryan <surenb@google.com>

> ---
>  mm/memory.c | 15 ++++++++++-----
>  1 file changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/mm/memory.c b/mm/memory.c
> index 52f7fdd78380..88cf9860f17e 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -4661,6 +4661,11 @@ static vm_fault_t do_fault(struct vm_fault *vmf)
>         struct mm_struct *vm_mm = vma->vm_mm;
>         vm_fault_t ret;
>
> +       if (vmf->flags & FAULT_FLAG_VMA_LOCK){

nit: space before {

> +               vma_end_read(vma);
> +               return VM_FAULT_RETRY;
> +       }
> +
>         /*
>          * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND
>          */
> @@ -4924,11 +4929,6 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
>  {
>         pte_t entry;
>
> -       if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vmf->vma)) {
> -               vma_end_read(vmf->vma);
> -               return VM_FAULT_RETRY;
> -       }
> -

A comment a bit further talks about " A regular pmd is established and
it can't morph into a huge pmd by anon khugepaged, since that takes
mmap_lock in write mode"
I assume this is about collapse_pte_mapped_thp() and it does call
vma_start_write(vma), so I think we are ok.


>         if (unlikely(pmd_none(*vmf->pmd))) {
>                 /*
>                  * Leave __pte_alloc() until later: because vm_ops->fault may
> @@ -4961,6 +4961,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
>         if (!vmf->pte)
>                 return do_pte_missing(vmf);
>
> +       if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vmf->vma)) {
> +               vma_end_read(vmf->vma);
> +               return VM_FAULT_RETRY;
> +       }
> +
>         if (!pte_present(vmf->orig_pte))
>                 return do_swap_page(vmf);
>
> --
> 2.39.2
>
Jann Horn July 24, 2023, 3:46 p.m. UTC | #2
On Tue, Jul 11, 2023 at 10:20 PM Matthew Wilcox (Oracle)
<willy@infradead.org> wrote:
> Call do_pte_missing() under the VMA lock ... then immediately retry
> in do_fault().
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
[...]
> @@ -4961,6 +4961,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
>         if (!vmf->pte)
>                 return do_pte_missing(vmf);
>
> +       if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vmf->vma)) {
> +               vma_end_read(vmf->vma);
> +               return VM_FAULT_RETRY;
> +       }

At this point we can have vmf->pte mapped, right? Does this mean this
bailout leaks a kmap_local() on CONFIG_HIGHPTE?

>         if (!pte_present(vmf->orig_pte))
>                 return do_swap_page(vmf);
Matthew Wilcox July 24, 2023, 5:45 p.m. UTC | #3
On Mon, Jul 24, 2023 at 05:46:21PM +0200, Jann Horn wrote:
> > +       if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vmf->vma)) {
> > +               vma_end_read(vmf->vma);
> > +               return VM_FAULT_RETRY;
> > +       }
> 
> At this point we can have vmf->pte mapped, right? Does this mean this
> bailout leaks a kmap_local() on CONFIG_HIGHPTE?

Yup.  Guess nobody's testing on 32-bit machines.  Thanks, fixed.
diff mbox series

Patch

diff --git a/mm/memory.c b/mm/memory.c
index 52f7fdd78380..88cf9860f17e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4661,6 +4661,11 @@  static vm_fault_t do_fault(struct vm_fault *vmf)
 	struct mm_struct *vm_mm = vma->vm_mm;
 	vm_fault_t ret;
 
+	if (vmf->flags & FAULT_FLAG_VMA_LOCK){
+		vma_end_read(vma);
+		return VM_FAULT_RETRY;
+	}
+
 	/*
 	 * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND
 	 */
@@ -4924,11 +4929,6 @@  static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 {
 	pte_t entry;
 
-	if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vmf->vma)) {
-		vma_end_read(vmf->vma);
-		return VM_FAULT_RETRY;
-	}
-
 	if (unlikely(pmd_none(*vmf->pmd))) {
 		/*
 		 * Leave __pte_alloc() until later: because vm_ops->fault may
@@ -4961,6 +4961,11 @@  static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 	if (!vmf->pte)
 		return do_pte_missing(vmf);
 
+	if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vmf->vma)) {
+		vma_end_read(vmf->vma);
+		return VM_FAULT_RETRY;
+	}
+
 	if (!pte_present(vmf->orig_pte))
 		return do_swap_page(vmf);