Message ID | 20230927052505.2855872-3-willy@infradead.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Handle more faults under the VMA lock | expand |
On Tue, Sep 26, 2023 at 10:25 PM Matthew Wilcox (Oracle) <willy@infradead.org> wrote: > > It is usually safe to call wp_page_copy() under the VMA lock. The only > unsafe situation is when no anon_vma has been allocated for this VMA, > and we have to look at adjacent VMAs to determine if their anon_vma can > be shared. Since this happens only for the first COW of a page in this > VMA, the majority of calls to wp_page_copy() do not need to fall back > to the mmap_sem. > > Add vmf_anon_prepare() as an alternative to anon_vma_prepare() which > will return RETRY if we currently hold the VMA lock and need to allocate > an anon_vma. This lets us drop the check in do_wp_page(). > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > --- > mm/memory.c | 39 ++++++++++++++++++++++++++------------- > 1 file changed, 26 insertions(+), 13 deletions(-) > > diff --git a/mm/memory.c b/mm/memory.c > index 97f860d6cd2a..cff78c496728 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -3042,6 +3042,21 @@ static inline void wp_page_reuse(struct vm_fault *vmf) > count_vm_event(PGREUSE); > } > > +static vm_fault_t vmf_anon_prepare(struct vm_fault *vmf) > +{ > + struct vm_area_struct *vma = vmf->vma; > + > + if (likely(vma->anon_vma)) > + return 0; > + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { I don't think the above condition will happen today because lock_vma_under_rcu() returns NULL and do_page_fault() falls back to taking mmap_lock when !vma->anon_vma (https://elixir.bootlin.com/linux/v6.6-rc3/source/mm/memory.c#L5428). We would need to narrow down that check in lock_vma_under_rcu() to make this work here. > + vma_end_read(vma); > + return VM_FAULT_RETRY; > + } > + if (__anon_vma_prepare(vma)) > + return VM_FAULT_OOM; > + return 0; > +} > + > /* > * Handle the case of a page which we actually need to copy to a new page, > * either due to COW or unsharing. > @@ -3069,27 +3084,29 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) > pte_t entry; > int page_copied = 0; > struct mmu_notifier_range range; > - int ret; > + vm_fault_t ret; > > delayacct_wpcopy_start(); > > if (vmf->page) > old_folio = page_folio(vmf->page); > - if (unlikely(anon_vma_prepare(vma))) > - goto oom; > + ret = vmf_anon_prepare(vmf); > + if (unlikely(ret)) > + goto out; > > if (is_zero_pfn(pte_pfn(vmf->orig_pte))) { > new_folio = vma_alloc_zeroed_movable_folio(vma, vmf->address); > if (!new_folio) > goto oom; > } else { > + int err; > new_folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, > vmf->address, false); > if (!new_folio) > goto oom; > > - ret = __wp_page_copy_user(&new_folio->page, vmf->page, vmf); > - if (ret) { > + err = __wp_page_copy_user(&new_folio->page, vmf->page, vmf); > + if (err) { > /* > * COW failed, if the fault was solved by other, > * it's fine. If not, userspace would re-fault on > @@ -3102,7 +3119,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) > folio_put(old_folio); > > delayacct_wpcopy_end(); > - return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0; > + return err == -EHWPOISON ? VM_FAULT_HWPOISON : 0; > } > kmsan_copy_page_meta(&new_folio->page, vmf->page); > } > @@ -3212,11 +3229,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) > oom_free_new: > folio_put(new_folio); > oom: > + ret = VM_FAULT_OOM; > +out: > if (old_folio) > folio_put(old_folio); > > delayacct_wpcopy_end(); > - return VM_FAULT_OOM; > + return ret; > } > > /** > @@ -3458,12 +3477,6 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) > return 0; > } > copy: > - if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma->anon_vma) { > - pte_unmap_unlock(vmf->pte, vmf->ptl); > - vma_end_read(vmf->vma); > - return VM_FAULT_RETRY; > - } > - > /* > * Ok, we need to copy. Oh, well.. > */ > -- > 2.40.1 >
On Wed, Sep 27, 2023 at 03:38:38PM -0700, Suren Baghdasaryan wrote: > On Tue, Sep 26, 2023 at 10:25 PM Matthew Wilcox (Oracle) > <willy@infradead.org> wrote: > > It is usually safe to call wp_page_copy() under the VMA lock. The only > > unsafe situation is when no anon_vma has been allocated for this VMA, > > and we have to look at adjacent VMAs to determine if their anon_vma can > > be shared. Since this happens only for the first COW of a page in this > > VMA, the majority of calls to wp_page_copy() do not need to fall back > > to the mmap_sem. > > > > Add vmf_anon_prepare() as an alternative to anon_vma_prepare() which > > will return RETRY if we currently hold the VMA lock and need to allocate > > an anon_vma. This lets us drop the check in do_wp_page(). > > > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > > --- > > mm/memory.c | 39 ++++++++++++++++++++++++++------------- > > 1 file changed, 26 insertions(+), 13 deletions(-) > > > > diff --git a/mm/memory.c b/mm/memory.c > > index 97f860d6cd2a..cff78c496728 100644 > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@ -3042,6 +3042,21 @@ static inline void wp_page_reuse(struct vm_fault *vmf) > > count_vm_event(PGREUSE); > > } > > > > +static vm_fault_t vmf_anon_prepare(struct vm_fault *vmf) > > +{ > > + struct vm_area_struct *vma = vmf->vma; > > + > > + if (likely(vma->anon_vma)) > > + return 0; > > + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { > > I don't think the above condition will happen today because > lock_vma_under_rcu() returns NULL and do_page_fault() falls back to > taking mmap_lock when !vma->anon_vma > (https://elixir.bootlin.com/linux/v6.6-rc3/source/mm/memory.c#L5428). > We would need to narrow down that check in lock_vma_under_rcu() to > make this work here. That's only for anon VMAs. For file-backed VMAs, we can get here ... handle_pte_fault() if (vmf->flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) { if (!pte_write(entry)) return do_wp_page(vmf); ie we we have a MAP_PRIVATE of a file, first take a read-fault on it, then write to it. That causes us to allocate an anon page in this file-backed VMA, so we need an anon_vma to exist.
On Wed, Sep 27, 2023 at 10:18 PM Matthew Wilcox <willy@infradead.org> wrote: > > On Wed, Sep 27, 2023 at 03:38:38PM -0700, Suren Baghdasaryan wrote: > > On Tue, Sep 26, 2023 at 10:25 PM Matthew Wilcox (Oracle) > > <willy@infradead.org> wrote: > > > It is usually safe to call wp_page_copy() under the VMA lock. The only > > > unsafe situation is when no anon_vma has been allocated for this VMA, > > > and we have to look at adjacent VMAs to determine if their anon_vma can > > > be shared. Since this happens only for the first COW of a page in this > > > VMA, the majority of calls to wp_page_copy() do not need to fall back > > > to the mmap_sem. > > > > > > Add vmf_anon_prepare() as an alternative to anon_vma_prepare() which > > > will return RETRY if we currently hold the VMA lock and need to allocate > > > an anon_vma. This lets us drop the check in do_wp_page(). > > > > > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > > > --- > > > mm/memory.c | 39 ++++++++++++++++++++++++++------------- > > > 1 file changed, 26 insertions(+), 13 deletions(-) > > > > > > diff --git a/mm/memory.c b/mm/memory.c > > > index 97f860d6cd2a..cff78c496728 100644 > > > --- a/mm/memory.c > > > +++ b/mm/memory.c > > > @@ -3042,6 +3042,21 @@ static inline void wp_page_reuse(struct vm_fault *vmf) > > > count_vm_event(PGREUSE); > > > } > > > > > > +static vm_fault_t vmf_anon_prepare(struct vm_fault *vmf) > > > +{ > > > + struct vm_area_struct *vma = vmf->vma; > > > + > > > + if (likely(vma->anon_vma)) > > > + return 0; > > > + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { > > > > I don't think the above condition will happen today because > > lock_vma_under_rcu() returns NULL and do_page_fault() falls back to > > taking mmap_lock when !vma->anon_vma > > (https://elixir.bootlin.com/linux/v6.6-rc3/source/mm/memory.c#L5428). > > We would need to narrow down that check in lock_vma_under_rcu() to > > make this work here. > > That's only for anon VMAs. For file-backed VMAs, we can get here ... > > handle_pte_fault() > if (vmf->flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) { > if (!pte_write(entry)) > return do_wp_page(vmf); > > ie we we have a MAP_PRIVATE of a file, first take a read-fault on it, > then write to it. That causes us to allocate an anon page in this > file-backed VMA, so we need an anon_vma to exist. Oh, sorry, I completely missed that this check was only for anon VMAs. Now it makes sense. Thanks!
diff --git a/mm/memory.c b/mm/memory.c index 97f860d6cd2a..cff78c496728 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3042,6 +3042,21 @@ static inline void wp_page_reuse(struct vm_fault *vmf) count_vm_event(PGREUSE); } +static vm_fault_t vmf_anon_prepare(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + + if (likely(vma->anon_vma)) + return 0; + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { + vma_end_read(vma); + return VM_FAULT_RETRY; + } + if (__anon_vma_prepare(vma)) + return VM_FAULT_OOM; + return 0; +} + /* * Handle the case of a page which we actually need to copy to a new page, * either due to COW or unsharing. @@ -3069,27 +3084,29 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) pte_t entry; int page_copied = 0; struct mmu_notifier_range range; - int ret; + vm_fault_t ret; delayacct_wpcopy_start(); if (vmf->page) old_folio = page_folio(vmf->page); - if (unlikely(anon_vma_prepare(vma))) - goto oom; + ret = vmf_anon_prepare(vmf); + if (unlikely(ret)) + goto out; if (is_zero_pfn(pte_pfn(vmf->orig_pte))) { new_folio = vma_alloc_zeroed_movable_folio(vma, vmf->address); if (!new_folio) goto oom; } else { + int err; new_folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vmf->address, false); if (!new_folio) goto oom; - ret = __wp_page_copy_user(&new_folio->page, vmf->page, vmf); - if (ret) { + err = __wp_page_copy_user(&new_folio->page, vmf->page, vmf); + if (err) { /* * COW failed, if the fault was solved by other, * it's fine. If not, userspace would re-fault on @@ -3102,7 +3119,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) folio_put(old_folio); delayacct_wpcopy_end(); - return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0; + return err == -EHWPOISON ? VM_FAULT_HWPOISON : 0; } kmsan_copy_page_meta(&new_folio->page, vmf->page); } @@ -3212,11 +3229,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) oom_free_new: folio_put(new_folio); oom: + ret = VM_FAULT_OOM; +out: if (old_folio) folio_put(old_folio); delayacct_wpcopy_end(); - return VM_FAULT_OOM; + return ret; } /** @@ -3458,12 +3477,6 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) return 0; } copy: - if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma->anon_vma) { - pte_unmap_unlock(vmf->pte, vmf->ptl); - vma_end_read(vmf->vma); - return VM_FAULT_RETRY; - } - /* * Ok, we need to copy. Oh, well.. */
It is usually safe to call wp_page_copy() under the VMA lock. The only unsafe situation is when no anon_vma has been allocated for this VMA, and we have to look at adjacent VMAs to determine if their anon_vma can be shared. Since this happens only for the first COW of a page in this VMA, the majority of calls to wp_page_copy() do not need to fall back to the mmap_sem. Add vmf_anon_prepare() as an alternative to anon_vma_prepare() which will return RETRY if we currently hold the VMA lock and need to allocate an anon_vma. This lets us drop the check in do_wp_page(). Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> --- mm/memory.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-)