Message ID | 3818cc9a-9999-d064-d778-9c94c5911e6@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm,huge,rmap: unify and speed up compound mapcounts | expand |
On 11/2/22 6:48 PM, Hugh Dickins wrote: > We want to declare one more int in the first tail of a compound page: > that first tail page being valuable property, since every compound page > has a first tail, but perhaps no more than that. > > No problem on 64-bit: there is already space for it. No problem with > 32-bit THPs: 5.18 commit 5232c63f46fd ("mm: Make compound_pincount always > available") kindly cleared the space for it, apparently not realizing > that only 64-bit architectures enable CONFIG_THP_SWAP (whose use of tail > page->private might conflict) - but make sure of that in its Kconfig. > > But hugetlb pages use tail page->private of the first tail page for a > subpool pointer, which will conflict; and they also use page->private > of the 2nd, 3rd and 4th tails. > > Undo "mm: add private field of first tail to struct page and struct > folio"'s recent addition of private_1 to the folio tail: instead add > hugetlb_subpool, hugetlb_cgroup, hugetlb_cgroup_rsvd, hugetlb_hwpoison > to a second tail page of the folio: THP has long been using several > fields of that tail, so make better use of it for hugetlb too. > This is not how a generic folio should be declared in future, > but it is an effective transitional way to make use of it. > > Delete the SUBPAGE_INDEX stuff, but keep __NR_USED_SUBPAGE: now 3. > > Signed-off-by: Hugh Dickins <hughd@google.com> > --- > include/linux/hugetlb.h | 23 +++-------- > include/linux/hugetlb_cgroup.h | 31 +++++---------- > include/linux/mm_types.h | 72 ++++++++++++++++++++++------------ > mm/Kconfig | 2 +- > mm/memory-failure.c | 5 +-- > 5 files changed, 65 insertions(+), 68 deletions(-) > > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h > index 65ea34022aa2..03ecf1c5e46f 100644 > --- a/include/linux/hugetlb.h > +++ b/include/linux/hugetlb.h > @@ -33,22 +33,9 @@ typedef struct { unsigned long pd; } hugepd_t; > /* > * For HugeTLB page, there are more metadata to save in the struct page. But > * the head struct page cannot meet our needs, so we have to abuse other tail > - * struct page to store the metadata. In order to avoid conflicts caused by > - * subsequent use of more tail struct pages, we gather these discrete indexes > - * of tail struct page here. > + * struct page to store the metadata. > */ > -enum { > - SUBPAGE_INDEX_SUBPOOL = 1, /* reuse page->private */ > -#ifdef CONFIG_CGROUP_HUGETLB > - SUBPAGE_INDEX_CGROUP, /* reuse page->private */ > - SUBPAGE_INDEX_CGROUP_RSVD, /* reuse page->private */ > - __MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD, > -#endif > -#ifdef CONFIG_MEMORY_FAILURE > - SUBPAGE_INDEX_HWPOISON, > -#endif > - __NR_USED_SUBPAGE, > -}; > +#define __NR_USED_SUBPAGE 3 > > struct hugepage_subpool { > spinlock_t lock; > @@ -722,11 +709,11 @@ extern unsigned int default_hstate_idx; > > static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) > { > - return (void *)folio_get_private_1(folio); > + return folio->_hugetlb_subpool; > } > > /* > - * hugetlb page subpool pointer located in hpage[1].private > + * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool > */ > static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) > { > @@ -736,7 +723,7 @@ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) > static inline void hugetlb_set_folio_subpool(struct folio *folio, > struct hugepage_subpool *subpool) > { > - folio_set_private_1(folio, (unsigned long)subpool); > + folio->_hugetlb_subpool = subpool; > } > > static inline void hugetlb_set_page_subpool(struct page *hpage, > diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h > index c70f92fe493e..f706626a8063 100644 > --- a/include/linux/hugetlb_cgroup.h > +++ b/include/linux/hugetlb_cgroup.h > @@ -24,12 +24,10 @@ struct file_region; > #ifdef CONFIG_CGROUP_HUGETLB > /* > * Minimum page order trackable by hugetlb cgroup. > - * At least 4 pages are necessary for all the tracking information. > - * The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault > - * usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD]) > - * is the reservation usage cgroup. > + * At least 3 pages are necessary for all the tracking information. > + * The second tail page contains all of the hugetlb-specific fields. > */ > -#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__MAX_CGROUP_SUBPAGE_INDEX + 1) > +#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE) > > enum hugetlb_memory_event { > HUGETLB_MAX, > @@ -69,21 +67,13 @@ struct hugetlb_cgroup { > static inline struct hugetlb_cgroup * > __hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd) > { > - struct page *tail; > - > VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); > if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) > return NULL; > - > - if (rsvd) { > - tail = folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD); > - return (void *)page_private(tail); > - } > - > - else { > - tail = folio_page(folio, SUBPAGE_INDEX_CGROUP); > - return (void *)page_private(tail); > - } > + if (rsvd) > + return folio->_hugetlb_cgroup_rsvd; > + else > + return folio->_hugetlb_cgroup; > } > > static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio) > @@ -101,15 +91,12 @@ static inline void __set_hugetlb_cgroup(struct folio *folio, > struct hugetlb_cgroup *h_cg, bool rsvd) > { > VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); > - > if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) > return; > if (rsvd) > - set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD), > - (unsigned long)h_cg); > + folio->_hugetlb_cgroup_rsvd = h_cg; > else > - set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP), > - (unsigned long)h_cg); > + folio->_hugetlb_cgroup = h_cg; > } > > static inline void set_hugetlb_cgroup(struct folio *folio, > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > index 834022721bc6..728eb6089bba 100644 > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -145,15 +145,22 @@ struct page { > atomic_t compound_pincount; > #ifdef CONFIG_64BIT > unsigned int compound_nr; /* 1 << compound_order */ > - unsigned long _private_1; > #endif > }; > - struct { /* Second tail page of compound page */ > + struct { /* Second tail page of transparent huge page */ > unsigned long _compound_pad_1; /* compound_head */ > unsigned long _compound_pad_2; > /* For both global and memcg */ > struct list_head deferred_list; > }; > + struct { /* Second tail page of hugetlb page */ > + unsigned long _hugetlb_pad_1; /* compound_head */ > + void *hugetlb_subpool; > + void *hugetlb_cgroup; > + void *hugetlb_cgroup_rsvd; > + void *hugetlb_hwpoison; > + /* No more space on 32-bit: use third tail if more */ > + }; > struct { /* Page table pages */ > unsigned long _pt_pad_1; /* compound_head */ > pgtable_t pmd_huge_pte; /* protected by page->ptl */ > @@ -260,13 +267,16 @@ struct page { > * to find how many references there are to this folio. > * @memcg_data: Memory Control Group data. > * @_flags_1: For large folios, additional page flags. > - * @__head: Points to the folio. Do not use. > + * @_head_1: Points to the folio. Do not use. Changes to my original patch set look good, this seems to be a cleaner implementation. Should the usage of page_1 and page_2 also be documented here? Thanks, Sidhartha Kumar > * @_folio_dtor: Which destructor to use for this folio. > * @_folio_order: Do not use directly, call folio_order(). > * @_total_mapcount: Do not use directly, call folio_entire_mapcount(). > * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). > * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). > - * @_private_1: Do not use directly, call folio_get_private_1(). > + * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h. > + * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. > + * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. > + * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head(). > * > * A folio is a physically, virtually and logically contiguous set > * of bytes. It is a power-of-two in size, and it is aligned to that > @@ -305,16 +315,31 @@ struct folio { > }; > struct page page; > }; > - unsigned long _flags_1; > - unsigned long __head; > - unsigned char _folio_dtor; > - unsigned char _folio_order; > - atomic_t _total_mapcount; > - atomic_t _pincount; > + union { > + struct { > + unsigned long _flags_1; > + unsigned long _head_1; > + unsigned char _folio_dtor; > + unsigned char _folio_order; > + atomic_t _total_mapcount; > + atomic_t _pincount; > #ifdef CONFIG_64BIT > - unsigned int _folio_nr_pages; > + unsigned int _folio_nr_pages; > #endif > - unsigned long _private_1; > + }; > + struct page page_1; > + }; > + union { > + struct { > + unsigned long _flags_2; > + unsigned long _head_2; > + void *_hugetlb_subpool; > + void *_hugetlb_cgroup; > + void *_hugetlb_cgroup_rsvd; > + void *_hugetlb_hwpoison; > + }; > + struct page page_2; > + }; > }; > > #define FOLIO_MATCH(pg, fl) \ > @@ -335,16 +360,25 @@ FOLIO_MATCH(memcg_data, memcg_data); > static_assert(offsetof(struct folio, fl) == \ > offsetof(struct page, pg) + sizeof(struct page)) > FOLIO_MATCH(flags, _flags_1); > -FOLIO_MATCH(compound_head, __head); > +FOLIO_MATCH(compound_head, _head_1); > FOLIO_MATCH(compound_dtor, _folio_dtor); > FOLIO_MATCH(compound_order, _folio_order); > FOLIO_MATCH(compound_mapcount, _total_mapcount); > FOLIO_MATCH(compound_pincount, _pincount); > #ifdef CONFIG_64BIT > FOLIO_MATCH(compound_nr, _folio_nr_pages); > -FOLIO_MATCH(_private_1, _private_1); > #endif > #undef FOLIO_MATCH > +#define FOLIO_MATCH(pg, fl) \ > + static_assert(offsetof(struct folio, fl) == \ > + offsetof(struct page, pg) + 2 * sizeof(struct page)) > +FOLIO_MATCH(flags, _flags_2); > +FOLIO_MATCH(compound_head, _head_2); > +FOLIO_MATCH(hugetlb_subpool, _hugetlb_subpool); > +FOLIO_MATCH(hugetlb_cgroup, _hugetlb_cgroup); > +FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd); > +FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison); > +#undef FOLIO_MATCH > > static inline atomic_t *folio_mapcount_ptr(struct folio *folio) > { > @@ -388,16 +422,6 @@ static inline void *folio_get_private(struct folio *folio) > return folio->private; > } > > -static inline void folio_set_private_1(struct folio *folio, unsigned long private) > -{ > - folio->_private_1 = private; > -} > - > -static inline unsigned long folio_get_private_1(struct folio *folio) > -{ > - return folio->_private_1; > -} > - > struct page_frag_cache { > void * va; > #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) > diff --git a/mm/Kconfig b/mm/Kconfig > index 57e1d8c5b505..bc7e7dacfcd5 100644 > --- a/mm/Kconfig > +++ b/mm/Kconfig > @@ -775,7 +775,7 @@ endchoice > > config THP_SWAP > def_bool y > - depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP > + depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP && 64BIT > help > Swap transparent huge pages in one piece, without splitting. > XXX: For now, swap cluster backing transparent huge page > diff --git a/mm/memory-failure.c b/mm/memory-failure.c > index 779a426d2cab..63d8501001c6 100644 > --- a/mm/memory-failure.c > +++ b/mm/memory-failure.c > @@ -1687,8 +1687,7 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs); > #ifdef CONFIG_HUGETLB_PAGE > /* > * Struct raw_hwp_page represents information about "raw error page", > - * constructing singly linked list originated from ->private field of > - * SUBPAGE_INDEX_HWPOISON-th tail page. > + * constructing singly linked list from ->_hugetlb_hwpoison field of folio. > */ > struct raw_hwp_page { > struct llist_node node; > @@ -1697,7 +1696,7 @@ struct raw_hwp_page { > > static inline struct llist_head *raw_hwp_list_head(struct page *hpage) > { > - return (struct llist_head *)&page_private(hpage + SUBPAGE_INDEX_HWPOISON); > + return (struct llist_head *)&page_folio(hpage)->_hugetlb_hwpoison; > } > > static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag)
On Thu, 3 Nov 2022, Sidhartha Kumar wrote: > On 11/2/22 6:48 PM, Hugh Dickins wrote: ... > > Undo "mm: add private field of first tail to struct page and struct > > folio"'s recent addition of private_1 to the folio tail: instead add > > hugetlb_subpool, hugetlb_cgroup, hugetlb_cgroup_rsvd, hugetlb_hwpoison > > to a second tail page of the folio: THP has long been using several > > fields of that tail, so make better use of it for hugetlb too. > > This is not how a generic folio should be declared in future, > > but it is an effective transitional way to make use of it. ... > > @@ -260,13 +267,16 @@ struct page { > > * to find how many references there are to this folio. > > * @memcg_data: Memory Control Group data. > > * @_flags_1: For large folios, additional page flags. > > - * @__head: Points to the folio. Do not use. > > + * @_head_1: Points to the folio. Do not use. > > Changes to my original patch set look good, this seems to be a cleaner > implementation. Thanks a lot, Sidhartha, I'm glad to hear that it works for you too. I expect that it will be done differently in the future: maybe generalizing the additional fields to further "private"s as you did, letting different subsystems accessorize them differently; or removing them completely from struct folio, letting subsystems declare their own struct folio containers. I don't know how that will end up, but this for now seems good and clear. > > Should the usage of page_1 and page_2 also be documented here? You must have something interesting in mind to document about them, but I cannot guess what! They are for field alignment, not for use. (page_2 to help when/if someone needs to add another pageful.) Do you mean that I should copy the /* private: the union with struct page is transitional */ comment from above the original "struct page page;" line I copied? Or give all three of them a few underscores to imply not for use? Thanks, Hugh
On Wed, Nov 02, 2022 at 06:48:45PM -0700, Hugh Dickins wrote: > @@ -260,13 +267,16 @@ struct page { > * to find how many references there are to this folio. > * @memcg_data: Memory Control Group data. > * @_flags_1: For large folios, additional page flags. > - * @__head: Points to the folio. Do not use. > + * @_head_1: Points to the folio. Do not use. > * @_folio_dtor: Which destructor to use for this folio. > * @_folio_order: Do not use directly, call folio_order(). > * @_total_mapcount: Do not use directly, call folio_entire_mapcount(). > * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). > * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). > - * @_private_1: Do not use directly, call folio_get_private_1(). Looks like it misses + * @_flags_2: For large folios, additional page flags. + * @_head_2: Points to the folio. Do not use. to match the first tail page documentation. Otherwise the patch looks good to me: Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
On 11/3/22 9:29 PM, Hugh Dickins wrote: > On Thu, 3 Nov 2022, Sidhartha Kumar wrote: >> On 11/2/22 6:48 PM, Hugh Dickins wrote: > ... >>> Undo "mm: add private field of first tail to struct page and struct >>> folio"'s recent addition of private_1 to the folio tail: instead add >>> hugetlb_subpool, hugetlb_cgroup, hugetlb_cgroup_rsvd, hugetlb_hwpoison >>> to a second tail page of the folio: THP has long been using several >>> fields of that tail, so make better use of it for hugetlb too. >>> This is not how a generic folio should be declared in future, >>> but it is an effective transitional way to make use of it. > ... >>> @@ -260,13 +267,16 @@ struct page { >>> * to find how many references there are to this folio. >>> * @memcg_data: Memory Control Group data. >>> * @_flags_1: For large folios, additional page flags. >>> - * @__head: Points to the folio. Do not use. >>> + * @_head_1: Points to the folio. Do not use. >> Changes to my original patch set look good, this seems to be a cleaner >> implementation. > Thanks a lot, Sidhartha, I'm glad to hear that it works for you too. > > I expect that it will be done differently in the future: maybe generalizing > the additional fields to further "private"s as you did, letting different > subsystems accessorize them differently; or removing them completely from > struct folio, letting subsystems declare their own struct folio containers. > I don't know how that will end up, but this for now seems good and clear. > >> Should the usage of page_1 and page_2 also be documented here? > You must have something interesting in mind to document about them, > but I cannot guess what! They are for field alignment, not for use. > (page_2 to help when/if someone needs to add another pageful.) > > Do you mean that I should copy the > /* private: the union with struct page is transitional */ > comment from above the original "struct page page;" line I copied? > Or give all three of them a few underscores to imply not for use? I think the underscores with a comment about not for use could be helpful. Thanks, Sidhartha Kumar > Thanks, > Hugh
On Sat, 5 Nov 2022, Kirill A. Shutemov wrote: > On Wed, Nov 02, 2022 at 06:48:45PM -0700, Hugh Dickins wrote: > > @@ -260,13 +267,16 @@ struct page { > > * to find how many references there are to this folio. > > * @memcg_data: Memory Control Group data. > > * @_flags_1: For large folios, additional page flags. > > - * @__head: Points to the folio. Do not use. > > + * @_head_1: Points to the folio. Do not use. > > * @_folio_dtor: Which destructor to use for this folio. > > * @_folio_order: Do not use directly, call folio_order(). > > * @_total_mapcount: Do not use directly, call folio_entire_mapcount(). > > * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). > > * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). > > - * @_private_1: Do not use directly, call folio_get_private_1(). > > Looks like it misses > > + * @_flags_2: For large folios, additional page flags. > + * @_head_2: Points to the folio. Do not use. > > to match the first tail page documentation. > > Otherwise the patch looks good to me: > > Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Many thanks for all your encouragement and reviews, Kirill. Okay, I've added a couple of lines on those fields; but did not want to recommend the _flags_2 field for use (by the time we run out in _flags_1, I hope all this will be gone or look different). I'm sending an incremental fix, once I've responded to Sidhartha. Hugh
On Wed, 9 Nov 2022, Sidhartha Kumar wrote: > On 11/3/22 9:29 PM, Hugh Dickins wrote: > > > >> Should the usage of page_1 and page_2 also be documented here? > > You must have something interesting in mind to document about them, > > but I cannot guess what! They are for field alignment, not for use. > > (page_2 to help when/if someone needs to add another pageful.) > > > > Do you mean that I should copy the > > /* private: the union with struct page is transitional */ > > comment from above the original "struct page page;" line I copied? > > Or give all three of them a few underscores to imply not for use? > > I think the underscores with a comment about not for use could be helpful. I've given them two underscores (but not to the original "struct page page", since a build showed that used as "page" elsewhere, not just for alignment). I'm sorry, but I've not given them any comment: I don't think they belong in the commented fields section (_flags_1 etc), "page" is not there; and I'm, let's be honest, terrified of dabbling in this kerneldoc area - feel very fortunate to have escaped attack by a robot for my additions so far. I'll leave adding comment to you or other cognoscenti. Hugh
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 65ea34022aa2..03ecf1c5e46f 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -33,22 +33,9 @@ typedef struct { unsigned long pd; } hugepd_t; /* * For HugeTLB page, there are more metadata to save in the struct page. But * the head struct page cannot meet our needs, so we have to abuse other tail - * struct page to store the metadata. In order to avoid conflicts caused by - * subsequent use of more tail struct pages, we gather these discrete indexes - * of tail struct page here. + * struct page to store the metadata. */ -enum { - SUBPAGE_INDEX_SUBPOOL = 1, /* reuse page->private */ -#ifdef CONFIG_CGROUP_HUGETLB - SUBPAGE_INDEX_CGROUP, /* reuse page->private */ - SUBPAGE_INDEX_CGROUP_RSVD, /* reuse page->private */ - __MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD, -#endif -#ifdef CONFIG_MEMORY_FAILURE - SUBPAGE_INDEX_HWPOISON, -#endif - __NR_USED_SUBPAGE, -}; +#define __NR_USED_SUBPAGE 3 struct hugepage_subpool { spinlock_t lock; @@ -722,11 +709,11 @@ extern unsigned int default_hstate_idx; static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) { - return (void *)folio_get_private_1(folio); + return folio->_hugetlb_subpool; } /* - * hugetlb page subpool pointer located in hpage[1].private + * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool */ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) { @@ -736,7 +723,7 @@ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) static inline void hugetlb_set_folio_subpool(struct folio *folio, struct hugepage_subpool *subpool) { - folio_set_private_1(folio, (unsigned long)subpool); + folio->_hugetlb_subpool = subpool; } static inline void hugetlb_set_page_subpool(struct page *hpage, diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index c70f92fe493e..f706626a8063 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -24,12 +24,10 @@ struct file_region; #ifdef CONFIG_CGROUP_HUGETLB /* * Minimum page order trackable by hugetlb cgroup. - * At least 4 pages are necessary for all the tracking information. - * The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault - * usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD]) - * is the reservation usage cgroup. + * At least 3 pages are necessary for all the tracking information. + * The second tail page contains all of the hugetlb-specific fields. */ -#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__MAX_CGROUP_SUBPAGE_INDEX + 1) +#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE) enum hugetlb_memory_event { HUGETLB_MAX, @@ -69,21 +67,13 @@ struct hugetlb_cgroup { static inline struct hugetlb_cgroup * __hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd) { - struct page *tail; - VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) return NULL; - - if (rsvd) { - tail = folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD); - return (void *)page_private(tail); - } - - else { - tail = folio_page(folio, SUBPAGE_INDEX_CGROUP); - return (void *)page_private(tail); - } + if (rsvd) + return folio->_hugetlb_cgroup_rsvd; + else + return folio->_hugetlb_cgroup; } static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio) @@ -101,15 +91,12 @@ static inline void __set_hugetlb_cgroup(struct folio *folio, struct hugetlb_cgroup *h_cg, bool rsvd) { VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); - if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER) return; if (rsvd) - set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD), - (unsigned long)h_cg); + folio->_hugetlb_cgroup_rsvd = h_cg; else - set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP), - (unsigned long)h_cg); + folio->_hugetlb_cgroup = h_cg; } static inline void set_hugetlb_cgroup(struct folio *folio, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 834022721bc6..728eb6089bba 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -145,15 +145,22 @@ struct page { atomic_t compound_pincount; #ifdef CONFIG_64BIT unsigned int compound_nr; /* 1 << compound_order */ - unsigned long _private_1; #endif }; - struct { /* Second tail page of compound page */ + struct { /* Second tail page of transparent huge page */ unsigned long _compound_pad_1; /* compound_head */ unsigned long _compound_pad_2; /* For both global and memcg */ struct list_head deferred_list; }; + struct { /* Second tail page of hugetlb page */ + unsigned long _hugetlb_pad_1; /* compound_head */ + void *hugetlb_subpool; + void *hugetlb_cgroup; + void *hugetlb_cgroup_rsvd; + void *hugetlb_hwpoison; + /* No more space on 32-bit: use third tail if more */ + }; struct { /* Page table pages */ unsigned long _pt_pad_1; /* compound_head */ pgtable_t pmd_huge_pte; /* protected by page->ptl */ @@ -260,13 +267,16 @@ struct page { * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. * @_flags_1: For large folios, additional page flags. - * @__head: Points to the folio. Do not use. + * @_head_1: Points to the folio. Do not use. * @_folio_dtor: Which destructor to use for this folio. * @_folio_order: Do not use directly, call folio_order(). * @_total_mapcount: Do not use directly, call folio_entire_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). - * @_private_1: Do not use directly, call folio_get_private_1(). + * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h. + * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. + * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. + * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head(). * * A folio is a physically, virtually and logically contiguous set * of bytes. It is a power-of-two in size, and it is aligned to that @@ -305,16 +315,31 @@ struct folio { }; struct page page; }; - unsigned long _flags_1; - unsigned long __head; - unsigned char _folio_dtor; - unsigned char _folio_order; - atomic_t _total_mapcount; - atomic_t _pincount; + union { + struct { + unsigned long _flags_1; + unsigned long _head_1; + unsigned char _folio_dtor; + unsigned char _folio_order; + atomic_t _total_mapcount; + atomic_t _pincount; #ifdef CONFIG_64BIT - unsigned int _folio_nr_pages; + unsigned int _folio_nr_pages; #endif - unsigned long _private_1; + }; + struct page page_1; + }; + union { + struct { + unsigned long _flags_2; + unsigned long _head_2; + void *_hugetlb_subpool; + void *_hugetlb_cgroup; + void *_hugetlb_cgroup_rsvd; + void *_hugetlb_hwpoison; + }; + struct page page_2; + }; }; #define FOLIO_MATCH(pg, fl) \ @@ -335,16 +360,25 @@ FOLIO_MATCH(memcg_data, memcg_data); static_assert(offsetof(struct folio, fl) == \ offsetof(struct page, pg) + sizeof(struct page)) FOLIO_MATCH(flags, _flags_1); -FOLIO_MATCH(compound_head, __head); +FOLIO_MATCH(compound_head, _head_1); FOLIO_MATCH(compound_dtor, _folio_dtor); FOLIO_MATCH(compound_order, _folio_order); FOLIO_MATCH(compound_mapcount, _total_mapcount); FOLIO_MATCH(compound_pincount, _pincount); #ifdef CONFIG_64BIT FOLIO_MATCH(compound_nr, _folio_nr_pages); -FOLIO_MATCH(_private_1, _private_1); #endif #undef FOLIO_MATCH +#define FOLIO_MATCH(pg, fl) \ + static_assert(offsetof(struct folio, fl) == \ + offsetof(struct page, pg) + 2 * sizeof(struct page)) +FOLIO_MATCH(flags, _flags_2); +FOLIO_MATCH(compound_head, _head_2); +FOLIO_MATCH(hugetlb_subpool, _hugetlb_subpool); +FOLIO_MATCH(hugetlb_cgroup, _hugetlb_cgroup); +FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd); +FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison); +#undef FOLIO_MATCH static inline atomic_t *folio_mapcount_ptr(struct folio *folio) { @@ -388,16 +422,6 @@ static inline void *folio_get_private(struct folio *folio) return folio->private; } -static inline void folio_set_private_1(struct folio *folio, unsigned long private) -{ - folio->_private_1 = private; -} - -static inline unsigned long folio_get_private_1(struct folio *folio) -{ - return folio->_private_1; -} - struct page_frag_cache { void * va; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) diff --git a/mm/Kconfig b/mm/Kconfig index 57e1d8c5b505..bc7e7dacfcd5 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -775,7 +775,7 @@ endchoice config THP_SWAP def_bool y - depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP + depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP && 64BIT help Swap transparent huge pages in one piece, without splitting. XXX: For now, swap cluster backing transparent huge page diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 779a426d2cab..63d8501001c6 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1687,8 +1687,7 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs); #ifdef CONFIG_HUGETLB_PAGE /* * Struct raw_hwp_page represents information about "raw error page", - * constructing singly linked list originated from ->private field of - * SUBPAGE_INDEX_HWPOISON-th tail page. + * constructing singly linked list from ->_hugetlb_hwpoison field of folio. */ struct raw_hwp_page { struct llist_node node; @@ -1697,7 +1696,7 @@ struct raw_hwp_page { static inline struct llist_head *raw_hwp_list_head(struct page *hpage) { - return (struct llist_head *)&page_private(hpage + SUBPAGE_INDEX_HWPOISON); + return (struct llist_head *)&page_folio(hpage)->_hugetlb_hwpoison; } static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag)
We want to declare one more int in the first tail of a compound page: that first tail page being valuable property, since every compound page has a first tail, but perhaps no more than that. No problem on 64-bit: there is already space for it. No problem with 32-bit THPs: 5.18 commit 5232c63f46fd ("mm: Make compound_pincount always available") kindly cleared the space for it, apparently not realizing that only 64-bit architectures enable CONFIG_THP_SWAP (whose use of tail page->private might conflict) - but make sure of that in its Kconfig. But hugetlb pages use tail page->private of the first tail page for a subpool pointer, which will conflict; and they also use page->private of the 2nd, 3rd and 4th tails. Undo "mm: add private field of first tail to struct page and struct folio"'s recent addition of private_1 to the folio tail: instead add hugetlb_subpool, hugetlb_cgroup, hugetlb_cgroup_rsvd, hugetlb_hwpoison to a second tail page of the folio: THP has long been using several fields of that tail, so make better use of it for hugetlb too. This is not how a generic folio should be declared in future, but it is an effective transitional way to make use of it. Delete the SUBPAGE_INDEX stuff, but keep __NR_USED_SUBPAGE: now 3. Signed-off-by: Hugh Dickins <hughd@google.com> --- include/linux/hugetlb.h | 23 +++-------- include/linux/hugetlb_cgroup.h | 31 +++++---------- include/linux/mm_types.h | 72 ++++++++++++++++++++++------------ mm/Kconfig | 2 +- mm/memory-failure.c | 5 +-- 5 files changed, 65 insertions(+), 68 deletions(-)