Message ID | 20230501192829.17086-6-vishal.moola@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Split ptdesc from struct page | expand |
On Mon, May 01, 2023 at 12:28:00PM -0700, Vishal Moola (Oracle) wrote: > Introduce utility functions setting the foundation for ptdescs. These > will also assist in the splitting out of ptdesc from struct page. > > ptdesc_alloc() is defined to allocate new ptdesc pages as compound > pages. This is to standardize ptdescs by allowing for one allocation > and one free function, in contrast to 2 allocation and 2 free functions. > > Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> > --- > include/asm-generic/tlb.h | 11 ++++++++++ > include/linux/mm.h | 44 +++++++++++++++++++++++++++++++++++++++ > include/linux/pgtable.h | 12 +++++++++++ > 3 files changed, 67 insertions(+) > > diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h > index b46617207c93..6bade9e0e799 100644 > --- a/include/asm-generic/tlb.h > +++ b/include/asm-generic/tlb.h > @@ -481,6 +481,17 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) > return tlb_remove_page_size(tlb, page, PAGE_SIZE); > } > > +static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) > +{ > + tlb_remove_table(tlb, pt); > +} > + > +/* Like tlb_remove_ptdesc, but for page-like page directories. */ > +static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt) > +{ > + tlb_remove_page(tlb, ptdesc_page(pt)); > +} > + > static inline void tlb_change_page_size(struct mmu_gather *tlb, > unsigned int page_size) > { > diff --git a/include/linux/mm.h b/include/linux/mm.h > index b18848ae7e22..258f3b730359 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2744,6 +2744,45 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a > } > #endif /* CONFIG_MMU */ > > +static inline struct ptdesc *virt_to_ptdesc(const void *x) > +{ > + return page_ptdesc(virt_to_head_page(x)); Do we ever use compound pages for page tables? > +} > + > +static inline void *ptdesc_to_virt(const struct ptdesc *pt) > +{ > + return page_to_virt(ptdesc_page(pt)); > +} > + > +static inline void *ptdesc_address(const struct ptdesc *pt) > +{ > + return folio_address(ptdesc_folio(pt)); > +} > + > +static inline bool ptdesc_is_reserved(struct ptdesc *pt) > +{ > + return folio_test_reserved(ptdesc_folio(pt)); > +} > + > +static inline struct ptdesc *ptdesc_alloc(gfp_t gfp, unsigned int order) > +{ > + struct page *page = alloc_pages(gfp | __GFP_COMP, order); > + > + return page_ptdesc(page); > +} > + > +static inline void ptdesc_free(struct ptdesc *pt) > +{ > + struct page *page = ptdesc_page(pt); > + > + __free_pages(page, compound_order(page)); > +} The ptdesc_{alloc,free} API does not sound right to me. The name ptdesc_alloc() implies the allocation of the ptdesc itself, rather than allocation of page table page. The same goes for free. > + > +static inline void ptdesc_clear(void *x) > +{ > + clear_page(x); > +} > + > #if USE_SPLIT_PTE_PTLOCKS > #if ALLOC_SPLIT_PTLOCKS > void __init ptlock_cache_init(void); > @@ -2970,6 +3009,11 @@ static inline void mark_page_reserved(struct page *page) > adjust_managed_page_count(page, -1); > } > > +static inline void free_reserved_ptdesc(struct ptdesc *pt) > +{ > + free_reserved_page(ptdesc_page(pt)); > +} > + > /* > * Default method to free all the __init memory into the buddy system. > * The freed pages will be poisoned with pattern "poison" if it's within > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index 5e0f51308724..b067ac10f3dd 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -1041,6 +1041,18 @@ TABLE_MATCH(ptl, ptl); > #undef TABLE_MATCH > static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); > > +#define ptdesc_page(pt) (_Generic((pt), \ > + const struct ptdesc *: (const struct page *)(pt), \ > + struct ptdesc *: (struct page *)(pt))) > + > +#define ptdesc_folio(pt) (_Generic((pt), \ > + const struct ptdesc *: (const struct folio *)(pt), \ > + struct ptdesc *: (struct folio *)(pt))) > + > +#define page_ptdesc(p) (_Generic((p), \ > + const struct page *: (const struct ptdesc *)(p), \ > + struct page *: (struct ptdesc *)(p))) > + > /* > * No-op macros that just return the current protection value. Defined here > * because these macros can be used even if CONFIG_MMU is not defined. > -- > 2.39.2 > >
On Thu, May 25, 2023 at 2:10 AM Mike Rapoport <rppt@kernel.org> wrote: > > On Mon, May 01, 2023 at 12:28:00PM -0700, Vishal Moola (Oracle) wrote: > > Introduce utility functions setting the foundation for ptdescs. These > > will also assist in the splitting out of ptdesc from struct page. > > > > ptdesc_alloc() is defined to allocate new ptdesc pages as compound > > pages. This is to standardize ptdescs by allowing for one allocation > > and one free function, in contrast to 2 allocation and 2 free functions. > > > > Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> > > --- > > include/asm-generic/tlb.h | 11 ++++++++++ > > include/linux/mm.h | 44 +++++++++++++++++++++++++++++++++++++++ > > include/linux/pgtable.h | 12 +++++++++++ > > 3 files changed, 67 insertions(+) > > > > diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h > > index b46617207c93..6bade9e0e799 100644 > > --- a/include/asm-generic/tlb.h > > +++ b/include/asm-generic/tlb.h > > @@ -481,6 +481,17 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) > > return tlb_remove_page_size(tlb, page, PAGE_SIZE); > > } > > > > +static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) > > +{ > > + tlb_remove_table(tlb, pt); > > +} > > + > > +/* Like tlb_remove_ptdesc, but for page-like page directories. */ > > +static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt) > > +{ > > + tlb_remove_page(tlb, ptdesc_page(pt)); > > +} > > + > > static inline void tlb_change_page_size(struct mmu_gather *tlb, > > unsigned int page_size) > > { > > diff --git a/include/linux/mm.h b/include/linux/mm.h > > index b18848ae7e22..258f3b730359 100644 > > --- a/include/linux/mm.h > > +++ b/include/linux/mm.h > > @@ -2744,6 +2744,45 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a > > } > > #endif /* CONFIG_MMU */ > > > > +static inline struct ptdesc *virt_to_ptdesc(const void *x) > > +{ > > + return page_ptdesc(virt_to_head_page(x)); > > Do we ever use compound pages for page tables? Mips and s390 crst tables use multi-order (but not compound) pages. The ptdesc api *should* change that, but until all the allocation/free paths are changed it may cause problems. Thanks for catching that, I'll change it in v3. > > +} > > + > > +static inline void *ptdesc_to_virt(const struct ptdesc *pt) > > +{ > > + return page_to_virt(ptdesc_page(pt)); > > +} > > + > > +static inline void *ptdesc_address(const struct ptdesc *pt) > > +{ > > + return folio_address(ptdesc_folio(pt)); > > +} > > + > > +static inline bool ptdesc_is_reserved(struct ptdesc *pt) > > +{ > > + return folio_test_reserved(ptdesc_folio(pt)); > > +} > > + > > +static inline struct ptdesc *ptdesc_alloc(gfp_t gfp, unsigned int order) > > +{ > > + struct page *page = alloc_pages(gfp | __GFP_COMP, order); > > + > > + return page_ptdesc(page); > > +} > > + > > +static inline void ptdesc_free(struct ptdesc *pt) > > +{ > > + struct page *page = ptdesc_page(pt); > > + > > + __free_pages(page, compound_order(page)); > > +} > > The ptdesc_{alloc,free} API does not sound right to me. The name > ptdesc_alloc() implies the allocation of the ptdesc itself, rather than > allocation of page table page. The same goes for free. I'm not sure I see the difference. Could you elaborate? > > + > > +static inline void ptdesc_clear(void *x) > > +{ > > + clear_page(x); > > +} > > + > > #if USE_SPLIT_PTE_PTLOCKS > > #if ALLOC_SPLIT_PTLOCKS > > void __init ptlock_cache_init(void); > > @@ -2970,6 +3009,11 @@ static inline void mark_page_reserved(struct page *page) > > adjust_managed_page_count(page, -1); > > } > > > > +static inline void free_reserved_ptdesc(struct ptdesc *pt) > > +{ > > + free_reserved_page(ptdesc_page(pt)); > > +} > > + > > /* > > * Default method to free all the __init memory into the buddy system. > > * The freed pages will be poisoned with pattern "poison" if it's within > > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > > index 5e0f51308724..b067ac10f3dd 100644 > > --- a/include/linux/pgtable.h > > +++ b/include/linux/pgtable.h > > @@ -1041,6 +1041,18 @@ TABLE_MATCH(ptl, ptl); > > #undef TABLE_MATCH > > static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); > > > > +#define ptdesc_page(pt) (_Generic((pt), \ > > + const struct ptdesc *: (const struct page *)(pt), \ > > + struct ptdesc *: (struct page *)(pt))) > > + > > +#define ptdesc_folio(pt) (_Generic((pt), \ > > + const struct ptdesc *: (const struct folio *)(pt), \ > > + struct ptdesc *: (struct folio *)(pt))) > > + > > +#define page_ptdesc(p) (_Generic((p), \ > > + const struct page *: (const struct ptdesc *)(p), \ > > + struct page *: (struct ptdesc *)(p))) > > + > > /* > > * No-op macros that just return the current protection value. Defined here > > * because these macros can be used even if CONFIG_MMU is not defined. > > -- > > 2.39.2 > > > > > > -- > Sincerely yours, > Mike.
On Thu, May 25, 2023 at 11:04:28AM -0700, Vishal Moola wrote: > On Thu, May 25, 2023 at 2:10 AM Mike Rapoport <rppt@kernel.org> wrote: > > > + > > > +static inline struct ptdesc *ptdesc_alloc(gfp_t gfp, unsigned int order) > > > +{ > > > + struct page *page = alloc_pages(gfp | __GFP_COMP, order); > > > + > > > + return page_ptdesc(page); > > > +} > > > + > > > +static inline void ptdesc_free(struct ptdesc *pt) > > > +{ > > > + struct page *page = ptdesc_page(pt); > > > + > > > + __free_pages(page, compound_order(page)); > > > +} > > > > The ptdesc_{alloc,free} API does not sound right to me. The name > > ptdesc_alloc() implies the allocation of the ptdesc itself, rather than > > allocation of page table page. The same goes for free. > > I'm not sure I see the difference. Could you elaborate? I read ptdesc_alloc() as "allocate a ptdesc" rather than as "allocate a page for page table and return ptdesc pointing to that page". Seems very confusing to me already and it will be even more confusion when we'll start allocating actual ptdescs.
On Thu, May 25, 2023 at 1:26 PM Mike Rapoport <rppt@kernel.org> wrote: > > On Thu, May 25, 2023 at 11:04:28AM -0700, Vishal Moola wrote: > > On Thu, May 25, 2023 at 2:10 AM Mike Rapoport <rppt@kernel.org> wrote: > > > > + > > > > +static inline struct ptdesc *ptdesc_alloc(gfp_t gfp, unsigned int order) > > > > +{ > > > > + struct page *page = alloc_pages(gfp | __GFP_COMP, order); > > > > + > > > > + return page_ptdesc(page); > > > > +} > > > > + > > > > +static inline void ptdesc_free(struct ptdesc *pt) > > > > +{ > > > > + struct page *page = ptdesc_page(pt); > > > > + > > > > + __free_pages(page, compound_order(page)); > > > > +} > > > > > > The ptdesc_{alloc,free} API does not sound right to me. The name > > > ptdesc_alloc() implies the allocation of the ptdesc itself, rather than > > > allocation of page table page. The same goes for free. > > > > I'm not sure I see the difference. Could you elaborate? > > I read ptdesc_alloc() as "allocate a ptdesc" rather than as "allocate a > page for page table and return ptdesc pointing to that page". Seems very > confusing to me already and it will be even more confusion when we'll start > allocating actual ptdescs. Hmm, I see what you're saying. I'm envisioning this function evolving into one that allocates a ptdesc later. I don't see why we would need to have both a page table page AND ptdesc at any point, but that may be a lack of knowledge from my part. I was thinking later, if necessary, we could make another function (only to be used internally) to allocate page table pages.
On Thu, May 25, 2023 at 01:53:24PM -0700, Vishal Moola wrote: > On Thu, May 25, 2023 at 1:26 PM Mike Rapoport <rppt@kernel.org> wrote: > > > > On Thu, May 25, 2023 at 11:04:28AM -0700, Vishal Moola wrote: > > > On Thu, May 25, 2023 at 2:10 AM Mike Rapoport <rppt@kernel.org> wrote: > > > > > + > > > > > +static inline struct ptdesc *ptdesc_alloc(gfp_t gfp, unsigned int order) > > > > > +{ > > > > > + struct page *page = alloc_pages(gfp | __GFP_COMP, order); > > > > > + > > > > > + return page_ptdesc(page); > > > > > +} > > > > > + > > > > > +static inline void ptdesc_free(struct ptdesc *pt) > > > > > +{ > > > > > + struct page *page = ptdesc_page(pt); > > > > > + > > > > > + __free_pages(page, compound_order(page)); > > > > > +} > > > > > > > > The ptdesc_{alloc,free} API does not sound right to me. The name > > > > ptdesc_alloc() implies the allocation of the ptdesc itself, rather than > > > > allocation of page table page. The same goes for free. > > > > > > I'm not sure I see the difference. Could you elaborate? > > > > I read ptdesc_alloc() as "allocate a ptdesc" rather than as "allocate a > > page for page table and return ptdesc pointing to that page". Seems very > > confusing to me already and it will be even more confusion when we'll start > > allocating actual ptdescs. > > Hmm, I see what you're saying. I'm envisioning this function evolving into > one that allocates a ptdesc later. I don't see why we would need to have both a > page table page AND ptdesc at any point, but that may be a lack of knowledge > from my part. Sorry if I wasn't clear, by "page table page" I meant the page (or memory for that matter) for actual page table rather than struct page describing that memory. So what we allocate here is the actual memory for the page tables and not the memory for the metadata. That's why I think the name ptdesc_alloc is confusing. > I was thinking later, if necessary, we could make another function > (only to be used internally) to allocate page table pages.
On Sat, May 27, 2023 at 01:41:44PM +0300, Mike Rapoport wrote: > Sorry if I wasn't clear, by "page table page" I meant the page (or memory > for that matter) for actual page table rather than struct page describing > that memory. > > So what we allocate here is the actual memory for the page tables and not > the memory for the metadata. That's why I think the name ptdesc_alloc is > confusing. But that's going to be the common pattern in the Glorious Future. You allocate a folio and that includes both the folio memory descriptor and the 2^n pages of memory described by that folio. Similarly for all the other memory descriptors.
On Sat, May 27, 2023 at 04:09:31PM +0100, Matthew Wilcox wrote: > On Sat, May 27, 2023 at 01:41:44PM +0300, Mike Rapoport wrote: > > Sorry if I wasn't clear, by "page table page" I meant the page (or memory > > for that matter) for actual page table rather than struct page describing > > that memory. > > > > So what we allocate here is the actual memory for the page tables and not > > the memory for the metadata. That's why I think the name ptdesc_alloc is > > confusing. > > But that's going to be the common pattern in the Glorious Future. > You allocate a folio and that includes both the folio memory descriptor > and the 2^n pages of memory described by that folio. Similarly for all > the other memory descriptors. I'm not arguing with that, I'm not happy about the naming. IMO, the name should reflect that we allocate memory for page tables rather than for the descriptor of that memory, say pgtable_alloc() or page_table_alloc().
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index b46617207c93..6bade9e0e799 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -481,6 +481,17 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) return tlb_remove_page_size(tlb, page, PAGE_SIZE); } +static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) +{ + tlb_remove_table(tlb, pt); +} + +/* Like tlb_remove_ptdesc, but for page-like page directories. */ +static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt) +{ + tlb_remove_page(tlb, ptdesc_page(pt)); +} + static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size) { diff --git a/include/linux/mm.h b/include/linux/mm.h index b18848ae7e22..258f3b730359 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2744,6 +2744,45 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a } #endif /* CONFIG_MMU */ +static inline struct ptdesc *virt_to_ptdesc(const void *x) +{ + return page_ptdesc(virt_to_head_page(x)); +} + +static inline void *ptdesc_to_virt(const struct ptdesc *pt) +{ + return page_to_virt(ptdesc_page(pt)); +} + +static inline void *ptdesc_address(const struct ptdesc *pt) +{ + return folio_address(ptdesc_folio(pt)); +} + +static inline bool ptdesc_is_reserved(struct ptdesc *pt) +{ + return folio_test_reserved(ptdesc_folio(pt)); +} + +static inline struct ptdesc *ptdesc_alloc(gfp_t gfp, unsigned int order) +{ + struct page *page = alloc_pages(gfp | __GFP_COMP, order); + + return page_ptdesc(page); +} + +static inline void ptdesc_free(struct ptdesc *pt) +{ + struct page *page = ptdesc_page(pt); + + __free_pages(page, compound_order(page)); +} + +static inline void ptdesc_clear(void *x) +{ + clear_page(x); +} + #if USE_SPLIT_PTE_PTLOCKS #if ALLOC_SPLIT_PTLOCKS void __init ptlock_cache_init(void); @@ -2970,6 +3009,11 @@ static inline void mark_page_reserved(struct page *page) adjust_managed_page_count(page, -1); } +static inline void free_reserved_ptdesc(struct ptdesc *pt) +{ + free_reserved_page(ptdesc_page(pt)); +} + /* * Default method to free all the __init memory into the buddy system. * The freed pages will be poisoned with pattern "poison" if it's within diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 5e0f51308724..b067ac10f3dd 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1041,6 +1041,18 @@ TABLE_MATCH(ptl, ptl); #undef TABLE_MATCH static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); +#define ptdesc_page(pt) (_Generic((pt), \ + const struct ptdesc *: (const struct page *)(pt), \ + struct ptdesc *: (struct page *)(pt))) + +#define ptdesc_folio(pt) (_Generic((pt), \ + const struct ptdesc *: (const struct folio *)(pt), \ + struct ptdesc *: (struct folio *)(pt))) + +#define page_ptdesc(p) (_Generic((p), \ + const struct page *: (const struct ptdesc *)(p), \ + struct page *: (struct ptdesc *)(p))) + /* * No-op macros that just return the current protection value. Defined here * because these macros can be used even if CONFIG_MMU is not defined.
Introduce utility functions setting the foundation for ptdescs. These will also assist in the splitting out of ptdesc from struct page. ptdesc_alloc() is defined to allocate new ptdesc pages as compound pages. This is to standardize ptdescs by allowing for one allocation and one free function, in contrast to 2 allocation and 2 free functions. Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> --- include/asm-generic/tlb.h | 11 ++++++++++ include/linux/mm.h | 44 +++++++++++++++++++++++++++++++++++++++ include/linux/pgtable.h | 12 +++++++++++ 3 files changed, 67 insertions(+)