Message ID | 20240129143221.263763-6-david@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/memory: optimize unmap/zap with PTE-mapped THP | expand |
On 29/01/2024 14:32, David Hildenbrand wrote: > We have two bits available in the encoded page pointer to store > additional information. Currently, we use one bit to request delay of the > rmap removal until after a TLB flush. > > We want to make use of the remaining bit internally for batching of > multiple pages of the same folio, specifying that the next encoded page > pointer in an array is actually "nr_pages". So pass page + delay_rmap flag > instead of an encoded page, to handle the encoding internally. > > Signed-off-by: David Hildenbrand <david@redhat.com> Reviewed-by: Ryan Roberts <ryan.roberts@arm.com> > --- > arch/s390/include/asm/tlb.h | 13 ++++++------- > include/asm-generic/tlb.h | 12 ++++++------ > mm/mmu_gather.c | 7 ++++--- > 3 files changed, 16 insertions(+), 16 deletions(-) > > diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h > index d1455a601adc..48df896d5b79 100644 > --- a/arch/s390/include/asm/tlb.h > +++ b/arch/s390/include/asm/tlb.h > @@ -25,8 +25,7 @@ > void __tlb_remove_table(void *_table); > static inline void tlb_flush(struct mmu_gather *tlb); > static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, > - struct encoded_page *page, > - int page_size); > + struct page *page, bool delay_rmap, int page_size); > > #define tlb_flush tlb_flush > #define pte_free_tlb pte_free_tlb > @@ -42,14 +41,14 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, > * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page > * has already been freed, so just do free_page_and_swap_cache. > * > - * s390 doesn't delay rmap removal, so there is nothing encoded in > - * the page pointer. > + * s390 doesn't delay rmap removal. > */ > static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, > - struct encoded_page *page, > - int page_size) > + struct page *page, bool delay_rmap, int page_size) > { > - free_page_and_swap_cache(encoded_page_ptr(page)); > + VM_WARN_ON_ONCE(delay_rmap); > + > + free_page_and_swap_cache(page); > return false; > } > > diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h > index 129a3a759976..2eb7b0d4f5d2 100644 > --- a/include/asm-generic/tlb.h > +++ b/include/asm-generic/tlb.h > @@ -260,9 +260,8 @@ struct mmu_gather_batch { > */ > #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) > > -extern bool __tlb_remove_page_size(struct mmu_gather *tlb, > - struct encoded_page *page, > - int page_size); > +extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, > + bool delay_rmap, int page_size); > > #ifdef CONFIG_SMP > /* > @@ -462,13 +461,14 @@ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) > static inline void tlb_remove_page_size(struct mmu_gather *tlb, > struct page *page, int page_size) > { > - if (__tlb_remove_page_size(tlb, encode_page(page, 0), page_size)) > + if (__tlb_remove_page_size(tlb, page, false, page_size)) > tlb_flush_mmu(tlb); > } > > -static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page, unsigned int flags) > +static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb, > + struct page *page, bool delay_rmap) > { > - return __tlb_remove_page_size(tlb, encode_page(page, flags), PAGE_SIZE); > + return __tlb_remove_page_size(tlb, page, delay_rmap, PAGE_SIZE); > } > > /* tlb_remove_page > diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c > index 604ddf08affe..ac733d81b112 100644 > --- a/mm/mmu_gather.c > +++ b/mm/mmu_gather.c > @@ -116,7 +116,8 @@ static void tlb_batch_list_free(struct mmu_gather *tlb) > tlb->local.next = NULL; > } > > -bool __tlb_remove_page_size(struct mmu_gather *tlb, struct encoded_page *page, int page_size) > +bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, > + bool delay_rmap, int page_size) > { > struct mmu_gather_batch *batch; > > @@ -131,13 +132,13 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct encoded_page *page, i > * Add the page and check if we are full. If so > * force a flush. > */ > - batch->encoded_pages[batch->nr++] = page; > + batch->encoded_pages[batch->nr++] = encode_page(page, delay_rmap); > if (batch->nr == batch->max) { > if (!tlb_next_batch(tlb)) > return true; > batch = tlb->active; > } > - VM_BUG_ON_PAGE(batch->nr > batch->max, encoded_page_ptr(page)); > + VM_BUG_ON_PAGE(batch->nr > batch->max, page); > > return false; > }
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index d1455a601adc..48df896d5b79 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -25,8 +25,7 @@ void __tlb_remove_table(void *_table); static inline void tlb_flush(struct mmu_gather *tlb); static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, - struct encoded_page *page, - int page_size); + struct page *page, bool delay_rmap, int page_size); #define tlb_flush tlb_flush #define pte_free_tlb pte_free_tlb @@ -42,14 +41,14 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page * has already been freed, so just do free_page_and_swap_cache. * - * s390 doesn't delay rmap removal, so there is nothing encoded in - * the page pointer. + * s390 doesn't delay rmap removal. */ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb, - struct encoded_page *page, - int page_size) + struct page *page, bool delay_rmap, int page_size) { - free_page_and_swap_cache(encoded_page_ptr(page)); + VM_WARN_ON_ONCE(delay_rmap); + + free_page_and_swap_cache(page); return false; } diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 129a3a759976..2eb7b0d4f5d2 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -260,9 +260,8 @@ struct mmu_gather_batch { */ #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) -extern bool __tlb_remove_page_size(struct mmu_gather *tlb, - struct encoded_page *page, - int page_size); +extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, + bool delay_rmap, int page_size); #ifdef CONFIG_SMP /* @@ -462,13 +461,14 @@ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) static inline void tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) { - if (__tlb_remove_page_size(tlb, encode_page(page, 0), page_size)) + if (__tlb_remove_page_size(tlb, page, false, page_size)) tlb_flush_mmu(tlb); } -static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page, unsigned int flags) +static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb, + struct page *page, bool delay_rmap) { - return __tlb_remove_page_size(tlb, encode_page(page, flags), PAGE_SIZE); + return __tlb_remove_page_size(tlb, page, delay_rmap, PAGE_SIZE); } /* tlb_remove_page diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 604ddf08affe..ac733d81b112 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -116,7 +116,8 @@ static void tlb_batch_list_free(struct mmu_gather *tlb) tlb->local.next = NULL; } -bool __tlb_remove_page_size(struct mmu_gather *tlb, struct encoded_page *page, int page_size) +bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, + bool delay_rmap, int page_size) { struct mmu_gather_batch *batch; @@ -131,13 +132,13 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct encoded_page *page, i * Add the page and check if we are full. If so * force a flush. */ - batch->encoded_pages[batch->nr++] = page; + batch->encoded_pages[batch->nr++] = encode_page(page, delay_rmap); if (batch->nr == batch->max) { if (!tlb_next_batch(tlb)) return true; batch = tlb->active; } - VM_BUG_ON_PAGE(batch->nr > batch->max, encoded_page_ptr(page)); + VM_BUG_ON_PAGE(batch->nr > batch->max, page); return false; }
We have two bits available in the encoded page pointer to store additional information. Currently, we use one bit to request delay of the rmap removal until after a TLB flush. We want to make use of the remaining bit internally for batching of multiple pages of the same folio, specifying that the next encoded page pointer in an array is actually "nr_pages". So pass page + delay_rmap flag instead of an encoded page, to handle the encoding internally. Signed-off-by: David Hildenbrand <david@redhat.com> --- arch/s390/include/asm/tlb.h | 13 ++++++------- include/asm-generic/tlb.h | 12 ++++++------ mm/mmu_gather.c | 7 ++++--- 3 files changed, 16 insertions(+), 16 deletions(-)