Message ID | 20230627031431.29653-4-vishal.moola@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Split ptdesc from struct page | expand |
On Mon, Jun 26, 2023 at 08:14:01PM -0700, Vishal Moola (Oracle) wrote: > Currently, page table information is stored within struct page. As part > of simplifying struct page, create struct ptdesc for page table > information. > > Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> > Acked-by: Mike Rapoport (IBM) <rppt@kernel.org> > --- > include/linux/pgtable.h | 68 +++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 68 insertions(+) > > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index 5063b482e34f..d46cb709ce08 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -987,6 +987,74 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, > #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ > #endif /* CONFIG_MMU */ > > + > +/** > + * struct ptdesc - Memory descriptor for page tables. > + * @__page_flags: Same as page flags. Unused for page tables. > + * @pt_rcu_head: For freeing page table pages. > + * @pt_list: List of used page tables. Used for s390 and x86. > + * @_pt_pad_1: Padding that aliases with page's compound head. > + * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs. > + * @_pt_s390_gaddr: Aliases with page's mapping. Used for s390 gmap only. Should some arch-specific bits (and a few others) always under some #ifdefs, so it shouldn't appear on other archs? > + * @pt_mm: Used for x86 pgds. > + * @pt_frag_refcount: For fragmented page table tracking. Powerpc and s390 only. > + * @ptl: Lock for the page table. > + * @__page_type: Same as page->page_type. Unused for page tables. > + * @_refcount: Same as page refcount. Used for s390 page tables. > + * @pt_memcg_data: Memcg data. Tracked for page tables here. > + * > + * This struct overlays struct page for now. Do not modify without a good > + * understanding of the issues. > + */ > +struct ptdesc { > + unsigned long __page_flags; > + > + union { > + struct rcu_head pt_rcu_head; > + struct list_head pt_list; > + struct { > + unsigned long _pt_pad_1; > + pgtable_t pmd_huge_pte; > + }; > + }; > + unsigned long _pt_s390_gaddr; > + > + union { > + struct mm_struct *pt_mm; > + atomic_t pt_frag_refcount; > + }; > + > + union { > + unsigned long _pt_pad_2; > +#if ALLOC_SPLIT_PTLOCKS > + spinlock_t *ptl; > +#else > + spinlock_t ptl; > +#endif > + }; > + unsigned int __page_type; > + atomic_t _refcount; > +#ifdef CONFIG_MEMCG > + unsigned long pt_memcg_data; > +#endif > +};
On Tue, Jun 27, 2023 at 9:07 AM Peter Xu <peterx@redhat.com> wrote: > > On Mon, Jun 26, 2023 at 08:14:01PM -0700, Vishal Moola (Oracle) wrote: > > Currently, page table information is stored within struct page. As part > > of simplifying struct page, create struct ptdesc for page table > > information. > > > > Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com> > > Acked-by: Mike Rapoport (IBM) <rppt@kernel.org> > > --- > > include/linux/pgtable.h | 68 +++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 68 insertions(+) > > > > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > > index 5063b482e34f..d46cb709ce08 100644 > > --- a/include/linux/pgtable.h > > +++ b/include/linux/pgtable.h > > @@ -987,6 +987,74 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, > > #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ > > #endif /* CONFIG_MMU */ > > > > + > > +/** > > + * struct ptdesc - Memory descriptor for page tables. > > + * @__page_flags: Same as page flags. Unused for page tables. > > + * @pt_rcu_head: For freeing page table pages. > > + * @pt_list: List of used page tables. Used for s390 and x86. > > + * @_pt_pad_1: Padding that aliases with page's compound head. > > + * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs. > > + * @_pt_s390_gaddr: Aliases with page's mapping. Used for s390 gmap only. > > Should some arch-specific bits (and a few others) always under some > #ifdefs, so it shouldn't appear on other archs? Right now this struct completely overlays struct page, so the padding as well as any arch-specific fields have to stay. Whenever we get ptdescs independent of struct page we can cleanup any unnecessary fields, as well as omit unnecessary fields from unrelated architectures.
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 5063b482e34f..d46cb709ce08 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -987,6 +987,74 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ #endif /* CONFIG_MMU */ + +/** + * struct ptdesc - Memory descriptor for page tables. + * @__page_flags: Same as page flags. Unused for page tables. + * @pt_rcu_head: For freeing page table pages. + * @pt_list: List of used page tables. Used for s390 and x86. + * @_pt_pad_1: Padding that aliases with page's compound head. + * @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs. + * @_pt_s390_gaddr: Aliases with page's mapping. Used for s390 gmap only. + * @pt_mm: Used for x86 pgds. + * @pt_frag_refcount: For fragmented page table tracking. Powerpc and s390 only. + * @ptl: Lock for the page table. + * @__page_type: Same as page->page_type. Unused for page tables. + * @_refcount: Same as page refcount. Used for s390 page tables. + * @pt_memcg_data: Memcg data. Tracked for page tables here. + * + * This struct overlays struct page for now. Do not modify without a good + * understanding of the issues. + */ +struct ptdesc { + unsigned long __page_flags; + + union { + struct rcu_head pt_rcu_head; + struct list_head pt_list; + struct { + unsigned long _pt_pad_1; + pgtable_t pmd_huge_pte; + }; + }; + unsigned long _pt_s390_gaddr; + + union { + struct mm_struct *pt_mm; + atomic_t pt_frag_refcount; + }; + + union { + unsigned long _pt_pad_2; +#if ALLOC_SPLIT_PTLOCKS + spinlock_t *ptl; +#else + spinlock_t ptl; +#endif + }; + unsigned int __page_type; + atomic_t _refcount; +#ifdef CONFIG_MEMCG + unsigned long pt_memcg_data; +#endif +}; + +#define TABLE_MATCH(pg, pt) \ + static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt)) +TABLE_MATCH(flags, __page_flags); +TABLE_MATCH(compound_head, pt_list); +TABLE_MATCH(compound_head, _pt_pad_1); +TABLE_MATCH(pmd_huge_pte, pmd_huge_pte); +TABLE_MATCH(mapping, _pt_s390_gaddr); +TABLE_MATCH(pt_mm, pt_mm); +TABLE_MATCH(ptl, ptl); +TABLE_MATCH(rcu_head, pt_rcu_head); +#ifdef CONFIG_MEMCG +TABLE_MATCH(memcg_data, pt_memcg_data); +#endif +#undef TABLE_MATCH +static_assert(sizeof(struct ptdesc) <= sizeof(struct page)); + /* * No-op macros that just return the current protection value. Defined here * because these macros can be used even if CONFIG_MMU is not defined.