Message ID | 20240401202651.31440-4-vishal.moola@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Hugetlb fault path to use struct vm_fault | expand |
On Mon, Apr 01, 2024 at 01:26:51PM -0700, Vishal Moola (Oracle) wrote: > hugetlb_wp() can use the struct vm_fault passed in from hugetlb_fault(). > This alleviates the stack by consolidating 5 variables into a single > struct. > > Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> Reviewed-by: Oscar Salvador <osalvador@suse.de>
On 2024/4/2 04:26, Vishal Moola (Oracle) wrote: > hugetlb_wp() can use the struct vm_fault passed in from hugetlb_fault(). > This alleviates the stack by consolidating 5 variables into a single > struct. > > Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> > --- > mm/hugetlb.c | 61 ++++++++++++++++++++++++++-------------------------- > 1 file changed, 30 insertions(+), 31 deletions(-) > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index aca2f11b4138..d4f26947173e 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -5918,18 +5918,16 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, > * Keep the pte_same checks anyway to make transition from the mutex easier. > */ > static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, > - unsigned long address, pte_t *ptep, unsigned int flags, > - struct folio *pagecache_folio, spinlock_t *ptl, > + struct folio *pagecache_folio, The same as comment in the previous thread. Muchun, Thanks. > struct vm_fault *vmf) > { > - const bool unshare = flags & FAULT_FLAG_UNSHARE; > - pte_t pte = huge_ptep_get(ptep); > + const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; > + pte_t pte = huge_ptep_get(vmf->pte); > struct hstate *h = hstate_vma(vma); > struct folio *old_folio; > struct folio *new_folio; > int outside_reserve = 0; > vm_fault_t ret = 0; > - unsigned long haddr = address & huge_page_mask(h); > struct mmu_notifier_range range; > > /* > @@ -5952,7 +5950,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, > > /* Let's take out MAP_SHARED mappings first. */ > if (vma->vm_flags & VM_MAYSHARE) { > - set_huge_ptep_writable(vma, haddr, ptep); > + set_huge_ptep_writable(vma, vmf->address, vmf->pte); > return 0; > } > > @@ -5971,7 +5969,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, > SetPageAnonExclusive(&old_folio->page); > } > if (likely(!unshare)) > - set_huge_ptep_writable(vma, haddr, ptep); > + set_huge_ptep_writable(vma, vmf->address, vmf->pte); > > delayacct_wpcopy_end(); > return 0; > @@ -5998,8 +5996,8 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, > * Drop page table lock as buddy allocator may be called. It will > * be acquired again before returning to the caller, as expected. > */ > - spin_unlock(ptl); > - new_folio = alloc_hugetlb_folio(vma, haddr, outside_reserve); > + spin_unlock(vmf->ptl); > + new_folio = alloc_hugetlb_folio(vma, vmf->address, outside_reserve); > > if (IS_ERR(new_folio)) { > /* > @@ -6024,19 +6022,21 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, > * > * Reacquire both after unmap operation. > */ > - idx = vma_hugecache_offset(h, vma, haddr); > + idx = vma_hugecache_offset(h, vma, vmf->address); > hash = hugetlb_fault_mutex_hash(mapping, idx); > hugetlb_vma_unlock_read(vma); > mutex_unlock(&hugetlb_fault_mutex_table[hash]); > > - unmap_ref_private(mm, vma, &old_folio->page, haddr); > + unmap_ref_private(mm, vma, &old_folio->page, > + vmf->address); > > mutex_lock(&hugetlb_fault_mutex_table[hash]); > hugetlb_vma_lock_read(vma); > - spin_lock(ptl); > - ptep = hugetlb_walk(vma, haddr, huge_page_size(h)); > - if (likely(ptep && > - pte_same(huge_ptep_get(ptep), pte))) > + spin_lock(vmf->ptl); > + vmf->pte = hugetlb_walk(vma, vmf->address, > + huge_page_size(h)); > + if (likely(vmf->pte && > + pte_same(huge_ptep_get(vmf->pte), pte))) > goto retry_avoidcopy; > /* > * race occurs while re-acquiring page table > @@ -6058,37 +6058,38 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, > if (unlikely(ret)) > goto out_release_all; > > - if (copy_user_large_folio(new_folio, old_folio, address, vma)) { > + if (copy_user_large_folio(new_folio, old_folio, vmf->real_address, vma)) { > ret = VM_FAULT_HWPOISON_LARGE; > goto out_release_all; > } > __folio_mark_uptodate(new_folio); > > - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, haddr, > - haddr + huge_page_size(h)); > + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, vmf->address, > + vmf->address + huge_page_size(h)); > mmu_notifier_invalidate_range_start(&range); > > /* > * Retake the page table lock to check for racing updates > * before the page tables are altered > */ > - spin_lock(ptl); > - ptep = hugetlb_walk(vma, haddr, huge_page_size(h)); > - if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) { > + spin_lock(vmf->ptl); > + vmf->pte = hugetlb_walk(vma, vmf->address, huge_page_size(h)); > + if (likely(vmf->pte && pte_same(huge_ptep_get(vmf->pte), pte))) { > pte_t newpte = make_huge_pte(vma, &new_folio->page, !unshare); > > /* Break COW or unshare */ > - huge_ptep_clear_flush(vma, haddr, ptep); > + huge_ptep_clear_flush(vma, vmf->address, vmf->pte); > hugetlb_remove_rmap(old_folio); > - hugetlb_add_new_anon_rmap(new_folio, vma, haddr); > + hugetlb_add_new_anon_rmap(new_folio, vma, vmf->address); > if (huge_pte_uffd_wp(pte)) > newpte = huge_pte_mkuffd_wp(newpte); > - set_huge_pte_at(mm, haddr, ptep, newpte, huge_page_size(h)); > + set_huge_pte_at(mm, vmf->address, vmf->pte, newpte, > + huge_page_size(h)); > folio_set_hugetlb_migratable(new_folio); > /* Make the old page be freed below */ > new_folio = old_folio; > } > - spin_unlock(ptl); > + spin_unlock(vmf->ptl); > mmu_notifier_invalidate_range_end(&range); > out_release_all: > /* > @@ -6096,12 +6097,12 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, > * unshare) > */ > if (new_folio != old_folio) > - restore_reserve_on_error(h, vma, haddr, new_folio); > + restore_reserve_on_error(h, vma, vmf->address, new_folio); > folio_put(new_folio); > out_release_old: > folio_put(old_folio); > > - spin_lock(ptl); /* Caller expects lock to be held */ > + spin_lock(vmf->ptl); /* Caller expects lock to be held */ > > delayacct_wpcopy_end(); > return ret; > @@ -6365,8 +6366,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, > hugetlb_count_add(pages_per_huge_page(h), mm); > if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { > /* Optimization, do the COW without a second fault */ > - ret = hugetlb_wp(mm, vma, vmf->real_address, vmf->pte, > - vmf->flags, folio, vmf->ptl, vmf); > + ret = hugetlb_wp(mm, vma, folio, vmf); > } > > spin_unlock(vmf->ptl); > @@ -6579,8 +6579,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, > > if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) { > if (!huge_pte_write(vmf.orig_pte)) { > - ret = hugetlb_wp(mm, vma, address, vmf.pte, flags, > - pagecache_folio, vmf.ptl, &vmf); > + ret = hugetlb_wp(mm, vma, pagecache_folio, &vmf); > goto out_put_page; > } else if (likely(flags & FAULT_FLAG_WRITE)) { > vmf.orig_pte = huge_pte_mkdirty(vmf.orig_pte);
On Sun, Apr 07, 2024 at 05:12:42PM +0800, Muchun Song wrote: > > > On 2024/4/2 04:26, Vishal Moola (Oracle) wrote: > > hugetlb_wp() can use the struct vm_fault passed in from hugetlb_fault(). > > This alleviates the stack by consolidating 5 variables into a single > > struct. > > > > Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> > > --- > > mm/hugetlb.c | 61 ++++++++++++++++++++++++++-------------------------- > > 1 file changed, 30 insertions(+), 31 deletions(-) > > > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > > index aca2f11b4138..d4f26947173e 100644 > > --- a/mm/hugetlb.c > > +++ b/mm/hugetlb.c > > @@ -5918,18 +5918,16 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, > > * Keep the pte_same checks anyway to make transition from the mutex easier. > > */ > > static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, > > - unsigned long address, pte_t *ptep, unsigned int flags, > > - struct folio *pagecache_folio, spinlock_t *ptl, > > + struct folio *pagecache_folio, > > The same as comment in the previous thread. And fold the attached patch into here as well please Andrew? From f4adcf13ecc15a6733af43649756e53457078221 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com> Date: Mon, 8 Apr 2024 10:21:44 -0700 Subject: [PATCH 2/2] hugetlb: Simplyfy hugetlb_wp() arguments To simplify the function arguments, as suggested by Oscar and Muchun. Suggested-by: Muchun Song <muchun.song@linux.dev> Suggested-by: Oscar Salvador <osalvador@suse.de> Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> --- mm/hugetlb.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 05fe610f4699..0d96a41efde8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5915,10 +5915,11 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * cannot race with other handlers or page migration. * Keep the pte_same checks anyway to make transition from the mutex easier. */ -static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, - struct folio *pagecache_folio, +static vm_fault_t hugetlb_wp(struct folio *pagecache_folio, struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; + struct mm_struct *mm = vma->vm_mm; const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; pte_t pte = huge_ptep_get(vmf->pte); struct hstate *h = hstate_vma(vma); @@ -6364,7 +6365,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping, hugetlb_count_add(pages_per_huge_page(h), mm); if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { /* Optimization, do the COW without a second fault */ - ret = hugetlb_wp(mm, vma, folio, vmf); + ret = hugetlb_wp(folio, vmf); } spin_unlock(vmf->ptl); @@ -6577,7 +6578,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) { if (!huge_pte_write(vmf.orig_pte)) { - ret = hugetlb_wp(mm, vma, pagecache_folio, &vmf); + ret = hugetlb_wp(pagecache_folio, &vmf); goto out_put_page; } else if (likely(flags & FAULT_FLAG_WRITE)) { vmf.orig_pte = huge_pte_mkdirty(vmf.orig_pte);
On Mon, Apr 08, 2024 at 10:47:12AM -0700, Vishal Moola wrote: > -static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, > - struct folio *pagecache_folio, > +static vm_fault_t hugetlb_wp(struct folio *pagecache_folio, > struct vm_fault *vmf) > { > + struct vm_area_struct *vma = vmf->vma; > + struct mm_struct *mm = vma->vm_mm; I think 'vmf' should be the first parameter, not the second. Compare: static vm_fault_t do_page_mkwrite(struct vm_fault *vmf, struct folio *folio) static inline void wp_page_reuse(struct vm_fault *vmf, struct folio *folio) static vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf, struct folio *folio) static vm_fault_t wp_page_shared(struct vm_fault *vmf, struct folio *folio) vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) void set_pte_range(struct vm_fault *vmf, struct folio *folio, struct page *page, unsigned int nr, unsigned long addr) int numa_migrate_prep(struct folio *folio, struct vm_fault *vmf, unsigned long addr, int page_nid, int *flags) static void numa_rebuild_single_mapping(struct vm_fault *vmf, struct vm_area_struct *vma, unsigned long fault_addr, pte_t *fault_pte, bool writable) static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_struct *vma, struct folio *folio, pte_t fault_pte, bool ignore_writable, bool pte_write_upgrade) static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) numa_migrate_prep() is the only one which doesn't have vmf as the first param. It's a subtle inconsistency, but one you notice after a while ... then wish you'd done right the first time, but can't quite bring yourself to submit a patch to fix ;-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index aca2f11b4138..d4f26947173e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5918,18 +5918,16 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, * Keep the pte_same checks anyway to make transition from the mutex easier. */ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, unsigned int flags, - struct folio *pagecache_folio, spinlock_t *ptl, + struct folio *pagecache_folio, struct vm_fault *vmf) { - const bool unshare = flags & FAULT_FLAG_UNSHARE; - pte_t pte = huge_ptep_get(ptep); + const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; + pte_t pte = huge_ptep_get(vmf->pte); struct hstate *h = hstate_vma(vma); struct folio *old_folio; struct folio *new_folio; int outside_reserve = 0; vm_fault_t ret = 0; - unsigned long haddr = address & huge_page_mask(h); struct mmu_notifier_range range; /* @@ -5952,7 +5950,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, /* Let's take out MAP_SHARED mappings first. */ if (vma->vm_flags & VM_MAYSHARE) { - set_huge_ptep_writable(vma, haddr, ptep); + set_huge_ptep_writable(vma, vmf->address, vmf->pte); return 0; } @@ -5971,7 +5969,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, SetPageAnonExclusive(&old_folio->page); } if (likely(!unshare)) - set_huge_ptep_writable(vma, haddr, ptep); + set_huge_ptep_writable(vma, vmf->address, vmf->pte); delayacct_wpcopy_end(); return 0; @@ -5998,8 +5996,8 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, * Drop page table lock as buddy allocator may be called. It will * be acquired again before returning to the caller, as expected. */ - spin_unlock(ptl); - new_folio = alloc_hugetlb_folio(vma, haddr, outside_reserve); + spin_unlock(vmf->ptl); + new_folio = alloc_hugetlb_folio(vma, vmf->address, outside_reserve); if (IS_ERR(new_folio)) { /* @@ -6024,19 +6022,21 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, * * Reacquire both after unmap operation. */ - idx = vma_hugecache_offset(h, vma, haddr); + idx = vma_hugecache_offset(h, vma, vmf->address); hash = hugetlb_fault_mutex_hash(mapping, idx); hugetlb_vma_unlock_read(vma); mutex_unlock(&hugetlb_fault_mutex_table[hash]); - unmap_ref_private(mm, vma, &old_folio->page, haddr); + unmap_ref_private(mm, vma, &old_folio->page, + vmf->address); mutex_lock(&hugetlb_fault_mutex_table[hash]); hugetlb_vma_lock_read(vma); - spin_lock(ptl); - ptep = hugetlb_walk(vma, haddr, huge_page_size(h)); - if (likely(ptep && - pte_same(huge_ptep_get(ptep), pte))) + spin_lock(vmf->ptl); + vmf->pte = hugetlb_walk(vma, vmf->address, + huge_page_size(h)); + if (likely(vmf->pte && + pte_same(huge_ptep_get(vmf->pte), pte))) goto retry_avoidcopy; /* * race occurs while re-acquiring page table @@ -6058,37 +6058,38 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(ret)) goto out_release_all; - if (copy_user_large_folio(new_folio, old_folio, address, vma)) { + if (copy_user_large_folio(new_folio, old_folio, vmf->real_address, vma)) { ret = VM_FAULT_HWPOISON_LARGE; goto out_release_all; } __folio_mark_uptodate(new_folio); - mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, haddr, - haddr + huge_page_size(h)); + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, vmf->address, + vmf->address + huge_page_size(h)); mmu_notifier_invalidate_range_start(&range); /* * Retake the page table lock to check for racing updates * before the page tables are altered */ - spin_lock(ptl); - ptep = hugetlb_walk(vma, haddr, huge_page_size(h)); - if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) { + spin_lock(vmf->ptl); + vmf->pte = hugetlb_walk(vma, vmf->address, huge_page_size(h)); + if (likely(vmf->pte && pte_same(huge_ptep_get(vmf->pte), pte))) { pte_t newpte = make_huge_pte(vma, &new_folio->page, !unshare); /* Break COW or unshare */ - huge_ptep_clear_flush(vma, haddr, ptep); + huge_ptep_clear_flush(vma, vmf->address, vmf->pte); hugetlb_remove_rmap(old_folio); - hugetlb_add_new_anon_rmap(new_folio, vma, haddr); + hugetlb_add_new_anon_rmap(new_folio, vma, vmf->address); if (huge_pte_uffd_wp(pte)) newpte = huge_pte_mkuffd_wp(newpte); - set_huge_pte_at(mm, haddr, ptep, newpte, huge_page_size(h)); + set_huge_pte_at(mm, vmf->address, vmf->pte, newpte, + huge_page_size(h)); folio_set_hugetlb_migratable(new_folio); /* Make the old page be freed below */ new_folio = old_folio; } - spin_unlock(ptl); + spin_unlock(vmf->ptl); mmu_notifier_invalidate_range_end(&range); out_release_all: /* @@ -6096,12 +6097,12 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma, * unshare) */ if (new_folio != old_folio) - restore_reserve_on_error(h, vma, haddr, new_folio); + restore_reserve_on_error(h, vma, vmf->address, new_folio); folio_put(new_folio); out_release_old: folio_put(old_folio); - spin_lock(ptl); /* Caller expects lock to be held */ + spin_lock(vmf->ptl); /* Caller expects lock to be held */ delayacct_wpcopy_end(); return ret; @@ -6365,8 +6366,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, hugetlb_count_add(pages_per_huge_page(h), mm); if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { /* Optimization, do the COW without a second fault */ - ret = hugetlb_wp(mm, vma, vmf->real_address, vmf->pte, - vmf->flags, folio, vmf->ptl, vmf); + ret = hugetlb_wp(mm, vma, folio, vmf); } spin_unlock(vmf->ptl); @@ -6579,8 +6579,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) { if (!huge_pte_write(vmf.orig_pte)) { - ret = hugetlb_wp(mm, vma, address, vmf.pte, flags, - pagecache_folio, vmf.ptl, &vmf); + ret = hugetlb_wp(mm, vma, pagecache_folio, &vmf); goto out_put_page; } else if (likely(flags & FAULT_FLAG_WRITE)) { vmf.orig_pte = huge_pte_mkdirty(vmf.orig_pte);
hugetlb_wp() can use the struct vm_fault passed in from hugetlb_fault(). This alleviates the stack by consolidating 5 variables into a single struct. Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> --- mm/hugetlb.c | 61 ++++++++++++++++++++++++++-------------------------- 1 file changed, 30 insertions(+), 31 deletions(-)