diff mbox series

[1/3] mm,hugetlb: use folio fields in second tail page

Message ID 3818cc9a-9999-d064-d778-9c94c5911e6@google.com (mailing list archive)
State New
Headers show
Series mm,huge,rmap: unify and speed up compound mapcounts | expand

Commit Message

Hugh Dickins Nov. 3, 2022, 1:48 a.m. UTC
We want to declare one more int in the first tail of a compound page:
that first tail page being valuable property, since every compound page
has a first tail, but perhaps no more than that.

No problem on 64-bit: there is already space for it.  No problem with
32-bit THPs: 5.18 commit 5232c63f46fd ("mm: Make compound_pincount always
available") kindly cleared the space for it, apparently not realizing
that only 64-bit architectures enable CONFIG_THP_SWAP (whose use of tail
page->private might conflict) - but make sure of that in its Kconfig.

But hugetlb pages use tail page->private of the first tail page for a
subpool pointer, which will conflict; and they also use page->private
of the 2nd, 3rd and 4th tails.

Undo "mm: add private field of first tail to struct page and struct
folio"'s recent addition of private_1 to the folio tail: instead add
hugetlb_subpool, hugetlb_cgroup, hugetlb_cgroup_rsvd, hugetlb_hwpoison
to a second tail page of the folio: THP has long been using several
fields of that tail, so make better use of it for hugetlb too.
This is not how a generic folio should be declared in future,
but it is an effective transitional way to make use of it.

Delete the SUBPAGE_INDEX stuff, but keep __NR_USED_SUBPAGE: now 3.

Signed-off-by: Hugh Dickins <hughd@google.com>
---
 include/linux/hugetlb.h        | 23 +++--------
 include/linux/hugetlb_cgroup.h | 31 +++++----------
 include/linux/mm_types.h       | 72 ++++++++++++++++++++++------------
 mm/Kconfig                     |  2 +-
 mm/memory-failure.c            |  5 +--
 5 files changed, 65 insertions(+), 68 deletions(-)

Comments

Sidhartha Kumar Nov. 3, 2022, 9:18 p.m. UTC | #1
On 11/2/22 6:48 PM, Hugh Dickins wrote:
> We want to declare one more int in the first tail of a compound page:
> that first tail page being valuable property, since every compound page
> has a first tail, but perhaps no more than that.
>
> No problem on 64-bit: there is already space for it.  No problem with
> 32-bit THPs: 5.18 commit 5232c63f46fd ("mm: Make compound_pincount always
> available") kindly cleared the space for it, apparently not realizing
> that only 64-bit architectures enable CONFIG_THP_SWAP (whose use of tail
> page->private might conflict) - but make sure of that in its Kconfig.
>
> But hugetlb pages use tail page->private of the first tail page for a
> subpool pointer, which will conflict; and they also use page->private
> of the 2nd, 3rd and 4th tails.
>
> Undo "mm: add private field of first tail to struct page and struct
> folio"'s recent addition of private_1 to the folio tail: instead add
> hugetlb_subpool, hugetlb_cgroup, hugetlb_cgroup_rsvd, hugetlb_hwpoison
> to a second tail page of the folio: THP has long been using several
> fields of that tail, so make better use of it for hugetlb too.
> This is not how a generic folio should be declared in future,
> but it is an effective transitional way to make use of it.
>
> Delete the SUBPAGE_INDEX stuff, but keep __NR_USED_SUBPAGE: now 3.
>
> Signed-off-by: Hugh Dickins <hughd@google.com>
> ---
>   include/linux/hugetlb.h        | 23 +++--------
>   include/linux/hugetlb_cgroup.h | 31 +++++----------
>   include/linux/mm_types.h       | 72 ++++++++++++++++++++++------------
>   mm/Kconfig                     |  2 +-
>   mm/memory-failure.c            |  5 +--
>   5 files changed, 65 insertions(+), 68 deletions(-)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 65ea34022aa2..03ecf1c5e46f 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -33,22 +33,9 @@ typedef struct { unsigned long pd; } hugepd_t;
>   /*
>    * For HugeTLB page, there are more metadata to save in the struct page. But
>    * the head struct page cannot meet our needs, so we have to abuse other tail
> - * struct page to store the metadata. In order to avoid conflicts caused by
> - * subsequent use of more tail struct pages, we gather these discrete indexes
> - * of tail struct page here.
> + * struct page to store the metadata.
>    */
> -enum {
> -	SUBPAGE_INDEX_SUBPOOL = 1,	/* reuse page->private */
> -#ifdef CONFIG_CGROUP_HUGETLB
> -	SUBPAGE_INDEX_CGROUP,		/* reuse page->private */
> -	SUBPAGE_INDEX_CGROUP_RSVD,	/* reuse page->private */
> -	__MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD,
> -#endif
> -#ifdef CONFIG_MEMORY_FAILURE
> -	SUBPAGE_INDEX_HWPOISON,
> -#endif
> -	__NR_USED_SUBPAGE,
> -};
> +#define __NR_USED_SUBPAGE 3
>   
>   struct hugepage_subpool {
>   	spinlock_t lock;
> @@ -722,11 +709,11 @@ extern unsigned int default_hstate_idx;
>   
>   static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
>   {
> -	return (void *)folio_get_private_1(folio);
> +	return folio->_hugetlb_subpool;
>   }
>   
>   /*
> - * hugetlb page subpool pointer located in hpage[1].private
> + * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool
>    */
>   static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
>   {
> @@ -736,7 +723,7 @@ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
>   static inline void hugetlb_set_folio_subpool(struct folio *folio,
>   					struct hugepage_subpool *subpool)
>   {
> -	folio_set_private_1(folio, (unsigned long)subpool);
> +	folio->_hugetlb_subpool = subpool;
>   }
>   
>   static inline void hugetlb_set_page_subpool(struct page *hpage,
> diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
> index c70f92fe493e..f706626a8063 100644
> --- a/include/linux/hugetlb_cgroup.h
> +++ b/include/linux/hugetlb_cgroup.h
> @@ -24,12 +24,10 @@ struct file_region;
>   #ifdef CONFIG_CGROUP_HUGETLB
>   /*
>    * Minimum page order trackable by hugetlb cgroup.
> - * At least 4 pages are necessary for all the tracking information.
> - * The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault
> - * usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD])
> - * is the reservation usage cgroup.
> + * At least 3 pages are necessary for all the tracking information.
> + * The second tail page contains all of the hugetlb-specific fields.
>    */
> -#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__MAX_CGROUP_SUBPAGE_INDEX + 1)
> +#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE)
>   
>   enum hugetlb_memory_event {
>   	HUGETLB_MAX,
> @@ -69,21 +67,13 @@ struct hugetlb_cgroup {
>   static inline struct hugetlb_cgroup *
>   __hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd)
>   {
> -	struct page *tail;
> -
>   	VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
>   	if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
>   		return NULL;
> -
> -	if (rsvd) {
> -		tail = folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD);
> -		return (void *)page_private(tail);
> -	}
> -
> -	else {
> -		tail = folio_page(folio, SUBPAGE_INDEX_CGROUP);
> -		return (void *)page_private(tail);
> -	}
> +	if (rsvd)
> +		return folio->_hugetlb_cgroup_rsvd;
> +	else
> +		return folio->_hugetlb_cgroup;
>   }
>   
>   static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio)
> @@ -101,15 +91,12 @@ static inline void __set_hugetlb_cgroup(struct folio *folio,
>   				       struct hugetlb_cgroup *h_cg, bool rsvd)
>   {
>   	VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
> -
>   	if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
>   		return;
>   	if (rsvd)
> -		set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD),
> -				 (unsigned long)h_cg);
> +		folio->_hugetlb_cgroup_rsvd = h_cg;
>   	else
> -		set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP),
> -				 (unsigned long)h_cg);
> +		folio->_hugetlb_cgroup = h_cg;
>   }
>   
>   static inline void set_hugetlb_cgroup(struct folio *folio,
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 834022721bc6..728eb6089bba 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -145,15 +145,22 @@ struct page {
>   			atomic_t compound_pincount;
>   #ifdef CONFIG_64BIT
>   			unsigned int compound_nr; /* 1 << compound_order */
> -			unsigned long _private_1;
>   #endif
>   		};
> -		struct {	/* Second tail page of compound page */
> +		struct {	/* Second tail page of transparent huge page */
>   			unsigned long _compound_pad_1;	/* compound_head */
>   			unsigned long _compound_pad_2;
>   			/* For both global and memcg */
>   			struct list_head deferred_list;
>   		};
> +		struct {	/* Second tail page of hugetlb page */
> +			unsigned long _hugetlb_pad_1;	/* compound_head */
> +			void *hugetlb_subpool;
> +			void *hugetlb_cgroup;
> +			void *hugetlb_cgroup_rsvd;
> +			void *hugetlb_hwpoison;
> +			/* No more space on 32-bit: use third tail if more */
> +		};
>   		struct {	/* Page table pages */
>   			unsigned long _pt_pad_1;	/* compound_head */
>   			pgtable_t pmd_huge_pte; /* protected by page->ptl */
> @@ -260,13 +267,16 @@ struct page {
>    *    to find how many references there are to this folio.
>    * @memcg_data: Memory Control Group data.
>    * @_flags_1: For large folios, additional page flags.
> - * @__head: Points to the folio.  Do not use.
> + * @_head_1: Points to the folio.  Do not use.

Changes to my original patch set look good, this seems to be a cleaner 
implementation.

Should the usage of page_1 and page_2 also be documented here?

Thanks,

Sidhartha Kumar

>    * @_folio_dtor: Which destructor to use for this folio.
>    * @_folio_order: Do not use directly, call folio_order().
>    * @_total_mapcount: Do not use directly, call folio_entire_mapcount().
>    * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
>    * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
> - * @_private_1: Do not use directly, call folio_get_private_1().
> + * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h.
> + * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h.
> + * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h.
> + * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head().
>    *
>    * A folio is a physically, virtually and logically contiguous set
>    * of bytes.  It is a power-of-two in size, and it is aligned to that
> @@ -305,16 +315,31 @@ struct folio {
>   		};
>   		struct page page;
>   	};
> -	unsigned long _flags_1;
> -	unsigned long __head;
> -	unsigned char _folio_dtor;
> -	unsigned char _folio_order;
> -	atomic_t _total_mapcount;
> -	atomic_t _pincount;
> +	union {
> +		struct {
> +			unsigned long _flags_1;
> +			unsigned long _head_1;
> +			unsigned char _folio_dtor;
> +			unsigned char _folio_order;
> +			atomic_t _total_mapcount;
> +			atomic_t _pincount;
>   #ifdef CONFIG_64BIT
> -	unsigned int _folio_nr_pages;
> +			unsigned int _folio_nr_pages;
>   #endif
> -	unsigned long _private_1;
> +		};
> +		struct page page_1;
> +	};
> +	union {
> +		struct {
> +			unsigned long _flags_2;
> +			unsigned long _head_2;
> +			void *_hugetlb_subpool;
> +			void *_hugetlb_cgroup;
> +			void *_hugetlb_cgroup_rsvd;
> +			void *_hugetlb_hwpoison;
> +		};
> +		struct page page_2;
> +	};
>   };
>   
>   #define FOLIO_MATCH(pg, fl)						\
> @@ -335,16 +360,25 @@ FOLIO_MATCH(memcg_data, memcg_data);
>   	static_assert(offsetof(struct folio, fl) ==			\
>   			offsetof(struct page, pg) + sizeof(struct page))
>   FOLIO_MATCH(flags, _flags_1);
> -FOLIO_MATCH(compound_head, __head);
> +FOLIO_MATCH(compound_head, _head_1);
>   FOLIO_MATCH(compound_dtor, _folio_dtor);
>   FOLIO_MATCH(compound_order, _folio_order);
>   FOLIO_MATCH(compound_mapcount, _total_mapcount);
>   FOLIO_MATCH(compound_pincount, _pincount);
>   #ifdef CONFIG_64BIT
>   FOLIO_MATCH(compound_nr, _folio_nr_pages);
> -FOLIO_MATCH(_private_1, _private_1);
>   #endif
>   #undef FOLIO_MATCH
> +#define FOLIO_MATCH(pg, fl)						\
> +	static_assert(offsetof(struct folio, fl) ==			\
> +			offsetof(struct page, pg) + 2 * sizeof(struct page))
> +FOLIO_MATCH(flags, _flags_2);
> +FOLIO_MATCH(compound_head, _head_2);
> +FOLIO_MATCH(hugetlb_subpool, _hugetlb_subpool);
> +FOLIO_MATCH(hugetlb_cgroup, _hugetlb_cgroup);
> +FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd);
> +FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison);
> +#undef FOLIO_MATCH
>   
>   static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
>   {
> @@ -388,16 +422,6 @@ static inline void *folio_get_private(struct folio *folio)
>   	return folio->private;
>   }
>   
> -static inline void folio_set_private_1(struct folio *folio, unsigned long private)
> -{
> -	folio->_private_1 = private;
> -}
> -
> -static inline unsigned long folio_get_private_1(struct folio *folio)
> -{
> -	return folio->_private_1;
> -}
> -
>   struct page_frag_cache {
>   	void * va;
>   #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 57e1d8c5b505..bc7e7dacfcd5 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -775,7 +775,7 @@ endchoice
>   
>   config THP_SWAP
>   	def_bool y
> -	depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP
> +	depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP && 64BIT
>   	help
>   	  Swap transparent huge pages in one piece, without splitting.
>   	  XXX: For now, swap cluster backing transparent huge page
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 779a426d2cab..63d8501001c6 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1687,8 +1687,7 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
>   #ifdef CONFIG_HUGETLB_PAGE
>   /*
>    * Struct raw_hwp_page represents information about "raw error page",
> - * constructing singly linked list originated from ->private field of
> - * SUBPAGE_INDEX_HWPOISON-th tail page.
> + * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
>    */
>   struct raw_hwp_page {
>   	struct llist_node node;
> @@ -1697,7 +1696,7 @@ struct raw_hwp_page {
>   
>   static inline struct llist_head *raw_hwp_list_head(struct page *hpage)
>   {
> -	return (struct llist_head *)&page_private(hpage + SUBPAGE_INDEX_HWPOISON);
> +	return (struct llist_head *)&page_folio(hpage)->_hugetlb_hwpoison;
>   }
>   
>   static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag)
Hugh Dickins Nov. 4, 2022, 4:29 a.m. UTC | #2
On Thu, 3 Nov 2022, Sidhartha Kumar wrote:
> On 11/2/22 6:48 PM, Hugh Dickins wrote:
...
> > Undo "mm: add private field of first tail to struct page and struct
> > folio"'s recent addition of private_1 to the folio tail: instead add
> > hugetlb_subpool, hugetlb_cgroup, hugetlb_cgroup_rsvd, hugetlb_hwpoison
> > to a second tail page of the folio: THP has long been using several
> > fields of that tail, so make better use of it for hugetlb too.
> > This is not how a generic folio should be declared in future,
> > but it is an effective transitional way to make use of it.
...
> > @@ -260,13 +267,16 @@ struct page {
> >    *    to find how many references there are to this folio.
> >    * @memcg_data: Memory Control Group data.
> >    * @_flags_1: For large folios, additional page flags.
> > - * @__head: Points to the folio.  Do not use.
> > + * @_head_1: Points to the folio.  Do not use.
> 
> Changes to my original patch set look good, this seems to be a cleaner
> implementation.

Thanks a lot, Sidhartha, I'm glad to hear that it works for you too.

I expect that it will be done differently in the future: maybe generalizing
the additional fields to further "private"s as you did, letting different
subsystems accessorize them differently; or removing them completely from
struct folio, letting subsystems declare their own struct folio containers.
I don't know how that will end up, but this for now seems good and clear.

> 
> Should the usage of page_1 and page_2 also be documented here?

You must have something interesting in mind to document about them,
but I cannot guess what! They are for field alignment, not for use.
(page_2 to help when/if someone needs to add another pageful.)

Do you mean that I should copy the 
	/* private: the union with struct page is transitional */
comment from above the original "struct page page;" line I copied?
Or give all three of them a few underscores to imply not for use?

Thanks,
Hugh
Kirill A. Shutemov Nov. 5, 2022, 7:13 p.m. UTC | #3
On Wed, Nov 02, 2022 at 06:48:45PM -0700, Hugh Dickins wrote:
> @@ -260,13 +267,16 @@ struct page {
>   *    to find how many references there are to this folio.
>   * @memcg_data: Memory Control Group data.
>   * @_flags_1: For large folios, additional page flags.
> - * @__head: Points to the folio.  Do not use.
> + * @_head_1: Points to the folio.  Do not use.
>   * @_folio_dtor: Which destructor to use for this folio.
>   * @_folio_order: Do not use directly, call folio_order().
>   * @_total_mapcount: Do not use directly, call folio_entire_mapcount().
>   * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
>   * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
> - * @_private_1: Do not use directly, call folio_get_private_1().

Looks like it misses

  + * @_flags_2: For large folios, additional page flags.
  + * @_head_2: Points to the folio.  Do not use.

to match the first tail page documentation.

Otherwise the patch looks good to me:

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Sidhartha Kumar Nov. 10, 2022, 12:11 a.m. UTC | #4
On 11/3/22 9:29 PM, Hugh Dickins wrote:
> On Thu, 3 Nov 2022, Sidhartha Kumar wrote:
>> On 11/2/22 6:48 PM, Hugh Dickins wrote:
> ...
>>> Undo "mm: add private field of first tail to struct page and struct
>>> folio"'s recent addition of private_1 to the folio tail: instead add
>>> hugetlb_subpool, hugetlb_cgroup, hugetlb_cgroup_rsvd, hugetlb_hwpoison
>>> to a second tail page of the folio: THP has long been using several
>>> fields of that tail, so make better use of it for hugetlb too.
>>> This is not how a generic folio should be declared in future,
>>> but it is an effective transitional way to make use of it.
> ...
>>> @@ -260,13 +267,16 @@ struct page {
>>>     *    to find how many references there are to this folio.
>>>     * @memcg_data: Memory Control Group data.
>>>     * @_flags_1: For large folios, additional page flags.
>>> - * @__head: Points to the folio.  Do not use.
>>> + * @_head_1: Points to the folio.  Do not use.
>> Changes to my original patch set look good, this seems to be a cleaner
>> implementation.
> Thanks a lot, Sidhartha, I'm glad to hear that it works for you too.
>
> I expect that it will be done differently in the future: maybe generalizing
> the additional fields to further "private"s as you did, letting different
> subsystems accessorize them differently; or removing them completely from
> struct folio, letting subsystems declare their own struct folio containers.
> I don't know how that will end up, but this for now seems good and clear.
>
>> Should the usage of page_1 and page_2 also be documented here?
> You must have something interesting in mind to document about them,
> but I cannot guess what! They are for field alignment, not for use.
> (page_2 to help when/if someone needs to add another pageful.)
>
> Do you mean that I should copy the
> 	/* private: the union with struct page is transitional */
> comment from above the original "struct page page;" line I copied?
> Or give all three of them a few underscores to imply not for use?

I think the underscores with a comment about not for use could be helpful.

Thanks,

Sidhartha Kumar

> Thanks,
> Hugh
Hugh Dickins Nov. 10, 2022, 1:58 a.m. UTC | #5
On Sat, 5 Nov 2022, Kirill A. Shutemov wrote:
> On Wed, Nov 02, 2022 at 06:48:45PM -0700, Hugh Dickins wrote:
> > @@ -260,13 +267,16 @@ struct page {
> >   *    to find how many references there are to this folio.
> >   * @memcg_data: Memory Control Group data.
> >   * @_flags_1: For large folios, additional page flags.
> > - * @__head: Points to the folio.  Do not use.
> > + * @_head_1: Points to the folio.  Do not use.
> >   * @_folio_dtor: Which destructor to use for this folio.
> >   * @_folio_order: Do not use directly, call folio_order().
> >   * @_total_mapcount: Do not use directly, call folio_entire_mapcount().
> >   * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
> >   * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
> > - * @_private_1: Do not use directly, call folio_get_private_1().
> 
> Looks like it misses
> 
>   + * @_flags_2: For large folios, additional page flags.
>   + * @_head_2: Points to the folio.  Do not use.
> 
> to match the first tail page documentation.
> 
> Otherwise the patch looks good to me:
> 
> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

Many thanks for all your encouragement and reviews, Kirill.

Okay, I've added a couple of lines on those fields; but did not
want to recommend the _flags_2 field for use (by the time we run
out in _flags_1, I hope all this will be gone or look different).

I'm sending an incremental fix, once I've responded to Sidhartha.

Hugh
Hugh Dickins Nov. 10, 2022, 2:10 a.m. UTC | #6
On Wed, 9 Nov 2022, Sidhartha Kumar wrote:
> On 11/3/22 9:29 PM, Hugh Dickins wrote:
> >
> >> Should the usage of page_1 and page_2 also be documented here?
> > You must have something interesting in mind to document about them,
> > but I cannot guess what! They are for field alignment, not for use.
> > (page_2 to help when/if someone needs to add another pageful.)
> >
> > Do you mean that I should copy the
> > 	/* private: the union with struct page is transitional */
> > comment from above the original "struct page page;" line I copied?
> > Or give all three of them a few underscores to imply not for use?
> 
> I think the underscores with a comment about not for use could be helpful.

I've given them two underscores (but not to the original "struct page page",
since a build showed that used as "page" elsewhere, not just for alignment).

I'm sorry, but I've not given them any comment: I don't think they
belong in the commented fields section (_flags_1 etc), "page" is not
there; and I'm, let's be honest, terrified of dabbling in this kerneldoc
area - feel very fortunate to have escaped attack by a robot for my
additions so far.  I'll leave adding comment to you or other cognoscenti.

Hugh
diff mbox series

Patch

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 65ea34022aa2..03ecf1c5e46f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -33,22 +33,9 @@  typedef struct { unsigned long pd; } hugepd_t;
 /*
  * For HugeTLB page, there are more metadata to save in the struct page. But
  * the head struct page cannot meet our needs, so we have to abuse other tail
- * struct page to store the metadata. In order to avoid conflicts caused by
- * subsequent use of more tail struct pages, we gather these discrete indexes
- * of tail struct page here.
+ * struct page to store the metadata.
  */
-enum {
-	SUBPAGE_INDEX_SUBPOOL = 1,	/* reuse page->private */
-#ifdef CONFIG_CGROUP_HUGETLB
-	SUBPAGE_INDEX_CGROUP,		/* reuse page->private */
-	SUBPAGE_INDEX_CGROUP_RSVD,	/* reuse page->private */
-	__MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD,
-#endif
-#ifdef CONFIG_MEMORY_FAILURE
-	SUBPAGE_INDEX_HWPOISON,
-#endif
-	__NR_USED_SUBPAGE,
-};
+#define __NR_USED_SUBPAGE 3
 
 struct hugepage_subpool {
 	spinlock_t lock;
@@ -722,11 +709,11 @@  extern unsigned int default_hstate_idx;
 
 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
 {
-	return (void *)folio_get_private_1(folio);
+	return folio->_hugetlb_subpool;
 }
 
 /*
- * hugetlb page subpool pointer located in hpage[1].private
+ * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool
  */
 static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
 {
@@ -736,7 +723,7 @@  static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
 static inline void hugetlb_set_folio_subpool(struct folio *folio,
 					struct hugepage_subpool *subpool)
 {
-	folio_set_private_1(folio, (unsigned long)subpool);
+	folio->_hugetlb_subpool = subpool;
 }
 
 static inline void hugetlb_set_page_subpool(struct page *hpage,
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index c70f92fe493e..f706626a8063 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -24,12 +24,10 @@  struct file_region;
 #ifdef CONFIG_CGROUP_HUGETLB
 /*
  * Minimum page order trackable by hugetlb cgroup.
- * At least 4 pages are necessary for all the tracking information.
- * The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault
- * usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD])
- * is the reservation usage cgroup.
+ * At least 3 pages are necessary for all the tracking information.
+ * The second tail page contains all of the hugetlb-specific fields.
  */
-#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__MAX_CGROUP_SUBPAGE_INDEX + 1)
+#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE)
 
 enum hugetlb_memory_event {
 	HUGETLB_MAX,
@@ -69,21 +67,13 @@  struct hugetlb_cgroup {
 static inline struct hugetlb_cgroup *
 __hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd)
 {
-	struct page *tail;
-
 	VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
 	if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
 		return NULL;
-
-	if (rsvd) {
-		tail = folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD);
-		return (void *)page_private(tail);
-	}
-
-	else {
-		tail = folio_page(folio, SUBPAGE_INDEX_CGROUP);
-		return (void *)page_private(tail);
-	}
+	if (rsvd)
+		return folio->_hugetlb_cgroup_rsvd;
+	else
+		return folio->_hugetlb_cgroup;
 }
 
 static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio)
@@ -101,15 +91,12 @@  static inline void __set_hugetlb_cgroup(struct folio *folio,
 				       struct hugetlb_cgroup *h_cg, bool rsvd)
 {
 	VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
-
 	if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
 		return;
 	if (rsvd)
-		set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD),
-				 (unsigned long)h_cg);
+		folio->_hugetlb_cgroup_rsvd = h_cg;
 	else
-		set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP),
-				 (unsigned long)h_cg);
+		folio->_hugetlb_cgroup = h_cg;
 }
 
 static inline void set_hugetlb_cgroup(struct folio *folio,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 834022721bc6..728eb6089bba 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -145,15 +145,22 @@  struct page {
 			atomic_t compound_pincount;
 #ifdef CONFIG_64BIT
 			unsigned int compound_nr; /* 1 << compound_order */
-			unsigned long _private_1;
 #endif
 		};
-		struct {	/* Second tail page of compound page */
+		struct {	/* Second tail page of transparent huge page */
 			unsigned long _compound_pad_1;	/* compound_head */
 			unsigned long _compound_pad_2;
 			/* For both global and memcg */
 			struct list_head deferred_list;
 		};
+		struct {	/* Second tail page of hugetlb page */
+			unsigned long _hugetlb_pad_1;	/* compound_head */
+			void *hugetlb_subpool;
+			void *hugetlb_cgroup;
+			void *hugetlb_cgroup_rsvd;
+			void *hugetlb_hwpoison;
+			/* No more space on 32-bit: use third tail if more */
+		};
 		struct {	/* Page table pages */
 			unsigned long _pt_pad_1;	/* compound_head */
 			pgtable_t pmd_huge_pte; /* protected by page->ptl */
@@ -260,13 +267,16 @@  struct page {
  *    to find how many references there are to this folio.
  * @memcg_data: Memory Control Group data.
  * @_flags_1: For large folios, additional page flags.
- * @__head: Points to the folio.  Do not use.
+ * @_head_1: Points to the folio.  Do not use.
  * @_folio_dtor: Which destructor to use for this folio.
  * @_folio_order: Do not use directly, call folio_order().
  * @_total_mapcount: Do not use directly, call folio_entire_mapcount().
  * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
  * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
- * @_private_1: Do not use directly, call folio_get_private_1().
+ * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h.
+ * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h.
+ * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h.
+ * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head().
  *
  * A folio is a physically, virtually and logically contiguous set
  * of bytes.  It is a power-of-two in size, and it is aligned to that
@@ -305,16 +315,31 @@  struct folio {
 		};
 		struct page page;
 	};
-	unsigned long _flags_1;
-	unsigned long __head;
-	unsigned char _folio_dtor;
-	unsigned char _folio_order;
-	atomic_t _total_mapcount;
-	atomic_t _pincount;
+	union {
+		struct {
+			unsigned long _flags_1;
+			unsigned long _head_1;
+			unsigned char _folio_dtor;
+			unsigned char _folio_order;
+			atomic_t _total_mapcount;
+			atomic_t _pincount;
 #ifdef CONFIG_64BIT
-	unsigned int _folio_nr_pages;
+			unsigned int _folio_nr_pages;
 #endif
-	unsigned long _private_1;
+		};
+		struct page page_1;
+	};
+	union {
+		struct {
+			unsigned long _flags_2;
+			unsigned long _head_2;
+			void *_hugetlb_subpool;
+			void *_hugetlb_cgroup;
+			void *_hugetlb_cgroup_rsvd;
+			void *_hugetlb_hwpoison;
+		};
+		struct page page_2;
+	};
 };
 
 #define FOLIO_MATCH(pg, fl)						\
@@ -335,16 +360,25 @@  FOLIO_MATCH(memcg_data, memcg_data);
 	static_assert(offsetof(struct folio, fl) ==			\
 			offsetof(struct page, pg) + sizeof(struct page))
 FOLIO_MATCH(flags, _flags_1);
-FOLIO_MATCH(compound_head, __head);
+FOLIO_MATCH(compound_head, _head_1);
 FOLIO_MATCH(compound_dtor, _folio_dtor);
 FOLIO_MATCH(compound_order, _folio_order);
 FOLIO_MATCH(compound_mapcount, _total_mapcount);
 FOLIO_MATCH(compound_pincount, _pincount);
 #ifdef CONFIG_64BIT
 FOLIO_MATCH(compound_nr, _folio_nr_pages);
-FOLIO_MATCH(_private_1, _private_1);
 #endif
 #undef FOLIO_MATCH
+#define FOLIO_MATCH(pg, fl)						\
+	static_assert(offsetof(struct folio, fl) ==			\
+			offsetof(struct page, pg) + 2 * sizeof(struct page))
+FOLIO_MATCH(flags, _flags_2);
+FOLIO_MATCH(compound_head, _head_2);
+FOLIO_MATCH(hugetlb_subpool, _hugetlb_subpool);
+FOLIO_MATCH(hugetlb_cgroup, _hugetlb_cgroup);
+FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd);
+FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison);
+#undef FOLIO_MATCH
 
 static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
 {
@@ -388,16 +422,6 @@  static inline void *folio_get_private(struct folio *folio)
 	return folio->private;
 }
 
-static inline void folio_set_private_1(struct folio *folio, unsigned long private)
-{
-	folio->_private_1 = private;
-}
-
-static inline unsigned long folio_get_private_1(struct folio *folio)
-{
-	return folio->_private_1;
-}
-
 struct page_frag_cache {
 	void * va;
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
diff --git a/mm/Kconfig b/mm/Kconfig
index 57e1d8c5b505..bc7e7dacfcd5 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -775,7 +775,7 @@  endchoice
 
 config THP_SWAP
 	def_bool y
-	depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP
+	depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP && 64BIT
 	help
 	  Swap transparent huge pages in one piece, without splitting.
 	  XXX: For now, swap cluster backing transparent huge page
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 779a426d2cab..63d8501001c6 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1687,8 +1687,7 @@  EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
 #ifdef CONFIG_HUGETLB_PAGE
 /*
  * Struct raw_hwp_page represents information about "raw error page",
- * constructing singly linked list originated from ->private field of
- * SUBPAGE_INDEX_HWPOISON-th tail page.
+ * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
  */
 struct raw_hwp_page {
 	struct llist_node node;
@@ -1697,7 +1696,7 @@  struct raw_hwp_page {
 
 static inline struct llist_head *raw_hwp_list_head(struct page *hpage)
 {
-	return (struct llist_head *)&page_private(hpage + SUBPAGE_INDEX_HWPOISON);
+	return (struct llist_head *)&page_folio(hpage)->_hugetlb_hwpoison;
 }
 
 static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag)