Message ID | 20240129143221.263763-2-david@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/memory: optimize unmap/zap with PTE-mapped THP | expand |
On 29/01/2024 14:32, David Hildenbrand wrote: > Let's prepare for further changes by factoring out processing of present > PTEs. > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > mm/memory.c | 92 ++++++++++++++++++++++++++++++----------------------- > 1 file changed, 52 insertions(+), 40 deletions(-) > > diff --git a/mm/memory.c b/mm/memory.c > index b05fd28dbce1..50a6c79c78fc 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma, > pte_install_uffd_wp_if_needed(vma, addr, pte, pteval); > } > > +static inline void zap_present_pte(struct mmu_gather *tlb, > + struct vm_area_struct *vma, pte_t *pte, pte_t ptent, > + unsigned long addr, struct zap_details *details, > + int *rss, bool *force_flush, bool *force_break) > +{ > + struct mm_struct *mm = tlb->mm; > + bool delay_rmap = false; > + struct folio *folio; You need to init this to NULL otherwise its a random value when calling should_zap_folio() if vm_normal_page() returns NULL. > + struct page *page; > + > + page = vm_normal_page(vma, addr, ptent); > + if (page) > + folio = page_folio(page); > + > + if (unlikely(!should_zap_folio(details, folio))) > + return; > + ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); > + arch_check_zapped_pte(vma, ptent); > + tlb_remove_tlb_entry(tlb, pte, addr); > + zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent); > + if (unlikely(!page)) { > + ksm_might_unmap_zero_page(mm, ptent); > + return; > + } > + > + if (!folio_test_anon(folio)) { > + if (pte_dirty(ptent)) { > + folio_mark_dirty(folio); > + if (tlb_delay_rmap(tlb)) { > + delay_rmap = true; > + *force_flush = true; > + } > + } > + if (pte_young(ptent) && likely(vma_has_recency(vma))) > + folio_mark_accessed(folio); > + } > + rss[mm_counter(folio)]--; > + if (!delay_rmap) { > + folio_remove_rmap_pte(folio, page, vma); > + if (unlikely(page_mapcount(page) < 0)) > + print_bad_pte(vma, addr, ptent, page); > + } > + if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) { > + *force_flush = true; > + *force_break = true; > + } > +} > + > static unsigned long zap_pte_range(struct mmu_gather *tlb, > struct vm_area_struct *vma, pmd_t *pmd, > unsigned long addr, unsigned long end, > struct zap_details *details) > { > + bool force_flush = false, force_break = false; > struct mm_struct *mm = tlb->mm; > - int force_flush = 0; > int rss[NR_MM_COUNTERS]; > spinlock_t *ptl; > pte_t *start_pte; > @@ -1565,45 +1613,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, > break; > > if (pte_present(ptent)) { > - unsigned int delay_rmap; > - > - page = vm_normal_page(vma, addr, ptent); > - if (page) > - folio = page_folio(page); > - > - if (unlikely(!should_zap_folio(details, folio))) > - continue; > - ptent = ptep_get_and_clear_full(mm, addr, pte, > - tlb->fullmm); > - arch_check_zapped_pte(vma, ptent); > - tlb_remove_tlb_entry(tlb, pte, addr); > - zap_install_uffd_wp_if_needed(vma, addr, pte, details, > - ptent); > - if (unlikely(!page)) { > - ksm_might_unmap_zero_page(mm, ptent); > - continue; > - } > - > - delay_rmap = 0; > - if (!folio_test_anon(folio)) { > - if (pte_dirty(ptent)) { > - folio_mark_dirty(folio); > - if (tlb_delay_rmap(tlb)) { > - delay_rmap = 1; > - force_flush = 1; > - } > - } > - if (pte_young(ptent) && likely(vma_has_recency(vma))) > - folio_mark_accessed(folio); > - } > - rss[mm_counter(folio)]--; > - if (!delay_rmap) { > - folio_remove_rmap_pte(folio, page, vma); > - if (unlikely(page_mapcount(page) < 0)) > - print_bad_pte(vma, addr, ptent, page); > - } > - if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) { > - force_flush = 1; > + zap_present_pte(tlb, vma, pte, ptent, addr, details, > + rss, &force_flush, &force_break); > + if (unlikely(force_break)) { > addr += PAGE_SIZE; > break; > }
On 30.01.24 09:13, Ryan Roberts wrote: > On 29/01/2024 14:32, David Hildenbrand wrote: >> Let's prepare for further changes by factoring out processing of present >> PTEs. >> >> Signed-off-by: David Hildenbrand <david@redhat.com> >> --- >> mm/memory.c | 92 ++++++++++++++++++++++++++++++----------------------- >> 1 file changed, 52 insertions(+), 40 deletions(-) >> >> diff --git a/mm/memory.c b/mm/memory.c >> index b05fd28dbce1..50a6c79c78fc 100644 >> --- a/mm/memory.c >> +++ b/mm/memory.c >> @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma, >> pte_install_uffd_wp_if_needed(vma, addr, pte, pteval); >> } >> >> +static inline void zap_present_pte(struct mmu_gather *tlb, >> + struct vm_area_struct *vma, pte_t *pte, pte_t ptent, >> + unsigned long addr, struct zap_details *details, >> + int *rss, bool *force_flush, bool *force_break) >> +{ >> + struct mm_struct *mm = tlb->mm; >> + bool delay_rmap = false; >> + struct folio *folio; > > You need to init this to NULL otherwise its a random value when calling > should_zap_folio() if vm_normal_page() returns NULL. Right, and we can stop setting it to NULL in the original function. Patch #2 changes these checks, which is why it's only a problem in this patch. Will fix, thanks!
On 30/01/2024 08:41, David Hildenbrand wrote: > On 30.01.24 09:13, Ryan Roberts wrote: >> On 29/01/2024 14:32, David Hildenbrand wrote: >>> Let's prepare for further changes by factoring out processing of present >>> PTEs. >>> >>> Signed-off-by: David Hildenbrand <david@redhat.com> >>> --- >>> mm/memory.c | 92 ++++++++++++++++++++++++++++++----------------------- >>> 1 file changed, 52 insertions(+), 40 deletions(-) >>> >>> diff --git a/mm/memory.c b/mm/memory.c >>> index b05fd28dbce1..50a6c79c78fc 100644 >>> --- a/mm/memory.c >>> +++ b/mm/memory.c >>> @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct >>> *vma, >>> pte_install_uffd_wp_if_needed(vma, addr, pte, pteval); >>> } >>> +static inline void zap_present_pte(struct mmu_gather *tlb, >>> + struct vm_area_struct *vma, pte_t *pte, pte_t ptent, >>> + unsigned long addr, struct zap_details *details, >>> + int *rss, bool *force_flush, bool *force_break) >>> +{ >>> + struct mm_struct *mm = tlb->mm; >>> + bool delay_rmap = false; >>> + struct folio *folio; >> >> You need to init this to NULL otherwise its a random value when calling >> should_zap_folio() if vm_normal_page() returns NULL. > > Right, and we can stop setting it to NULL in the original function. Patch #2 > changes these checks, which is why it's only a problem in this patch. Yeah I only noticed that after sending out this reply and moving to the next patch. Still worth fixing this intermediate state I think. > > Will fix, thanks! >
On 30.01.24 09:46, Ryan Roberts wrote: > On 30/01/2024 08:41, David Hildenbrand wrote: >> On 30.01.24 09:13, Ryan Roberts wrote: >>> On 29/01/2024 14:32, David Hildenbrand wrote: >>>> Let's prepare for further changes by factoring out processing of present >>>> PTEs. >>>> >>>> Signed-off-by: David Hildenbrand <david@redhat.com> >>>> --- >>>> mm/memory.c | 92 ++++++++++++++++++++++++++++++----------------------- >>>> 1 file changed, 52 insertions(+), 40 deletions(-) >>>> >>>> diff --git a/mm/memory.c b/mm/memory.c >>>> index b05fd28dbce1..50a6c79c78fc 100644 >>>> --- a/mm/memory.c >>>> +++ b/mm/memory.c >>>> @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct >>>> *vma, >>>> pte_install_uffd_wp_if_needed(vma, addr, pte, pteval); >>>> } >>>> +static inline void zap_present_pte(struct mmu_gather *tlb, >>>> + struct vm_area_struct *vma, pte_t *pte, pte_t ptent, >>>> + unsigned long addr, struct zap_details *details, >>>> + int *rss, bool *force_flush, bool *force_break) >>>> +{ >>>> + struct mm_struct *mm = tlb->mm; >>>> + bool delay_rmap = false; >>>> + struct folio *folio; >>> >>> You need to init this to NULL otherwise its a random value when calling >>> should_zap_folio() if vm_normal_page() returns NULL. >> >> Right, and we can stop setting it to NULL in the original function. Patch #2 >> changes these checks, which is why it's only a problem in this patch. > > Yeah I only noticed that after sending out this reply and moving to the next > patch. Still worth fixing this intermediate state I think. Absolutely, I didn't do path-by-patch compilation yet (I suspect the compiler would complain).
diff --git a/mm/memory.c b/mm/memory.c index b05fd28dbce1..50a6c79c78fc 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1532,13 +1532,61 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma, pte_install_uffd_wp_if_needed(vma, addr, pte, pteval); } +static inline void zap_present_pte(struct mmu_gather *tlb, + struct vm_area_struct *vma, pte_t *pte, pte_t ptent, + unsigned long addr, struct zap_details *details, + int *rss, bool *force_flush, bool *force_break) +{ + struct mm_struct *mm = tlb->mm; + bool delay_rmap = false; + struct folio *folio; + struct page *page; + + page = vm_normal_page(vma, addr, ptent); + if (page) + folio = page_folio(page); + + if (unlikely(!should_zap_folio(details, folio))) + return; + ptent = ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); + arch_check_zapped_pte(vma, ptent); + tlb_remove_tlb_entry(tlb, pte, addr); + zap_install_uffd_wp_if_needed(vma, addr, pte, details, ptent); + if (unlikely(!page)) { + ksm_might_unmap_zero_page(mm, ptent); + return; + } + + if (!folio_test_anon(folio)) { + if (pte_dirty(ptent)) { + folio_mark_dirty(folio); + if (tlb_delay_rmap(tlb)) { + delay_rmap = true; + *force_flush = true; + } + } + if (pte_young(ptent) && likely(vma_has_recency(vma))) + folio_mark_accessed(folio); + } + rss[mm_counter(folio)]--; + if (!delay_rmap) { + folio_remove_rmap_pte(folio, page, vma); + if (unlikely(page_mapcount(page) < 0)) + print_bad_pte(vma, addr, ptent, page); + } + if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) { + *force_flush = true; + *force_break = true; + } +} + static unsigned long zap_pte_range(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, struct zap_details *details) { + bool force_flush = false, force_break = false; struct mm_struct *mm = tlb->mm; - int force_flush = 0; int rss[NR_MM_COUNTERS]; spinlock_t *ptl; pte_t *start_pte; @@ -1565,45 +1613,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, break; if (pte_present(ptent)) { - unsigned int delay_rmap; - - page = vm_normal_page(vma, addr, ptent); - if (page) - folio = page_folio(page); - - if (unlikely(!should_zap_folio(details, folio))) - continue; - ptent = ptep_get_and_clear_full(mm, addr, pte, - tlb->fullmm); - arch_check_zapped_pte(vma, ptent); - tlb_remove_tlb_entry(tlb, pte, addr); - zap_install_uffd_wp_if_needed(vma, addr, pte, details, - ptent); - if (unlikely(!page)) { - ksm_might_unmap_zero_page(mm, ptent); - continue; - } - - delay_rmap = 0; - if (!folio_test_anon(folio)) { - if (pte_dirty(ptent)) { - folio_mark_dirty(folio); - if (tlb_delay_rmap(tlb)) { - delay_rmap = 1; - force_flush = 1; - } - } - if (pte_young(ptent) && likely(vma_has_recency(vma))) - folio_mark_accessed(folio); - } - rss[mm_counter(folio)]--; - if (!delay_rmap) { - folio_remove_rmap_pte(folio, page, vma); - if (unlikely(page_mapcount(page) < 0)) - print_bad_pte(vma, addr, ptent, page); - } - if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) { - force_flush = 1; + zap_present_pte(tlb, vma, pte, ptent, addr, details, + rss, &force_flush, &force_break); + if (unlikely(force_break)) { addr += PAGE_SIZE; break; }
Let's prepare for further changes by factoring out processing of present PTEs. Signed-off-by: David Hildenbrand <david@redhat.com> --- mm/memory.c | 92 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 40 deletions(-)