diff mbox series

[2/7] mm: add private field of first tail to struct page and struct folio

Message ID 20220829230014.384722-3-sidhartha.kumar@oracle.com (mailing list archive)
State New
Headers show
Series begin converting hugetlb code to folios | expand

Commit Message

Sidhartha Kumar Aug. 29, 2022, 11 p.m. UTC
Allows struct folio to store hugetlb metadata that is contained in the
private field of the first tail page.

Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
---
 include/linux/mm_types.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

Comments

Matthew Wilcox Aug. 30, 2022, 3:36 a.m. UTC | #1
On Mon, Aug 29, 2022 at 04:00:09PM -0700, Sidhartha Kumar wrote:
> +++ b/include/linux/mm_types.h
> @@ -144,6 +144,7 @@ struct page {
>  #ifdef CONFIG_64BIT
>  			unsigned int compound_nr; /* 1 << compound_order */
>  #endif
> +			unsigned long _private_1;
>  		};
>  		struct {	/* Second tail page of compound page */
>  			unsigned long _compound_pad_1;	/* compound_head */

Have you tested compiling this on 32-bit?  I think you need to move
the _private_1 inside the ifdef CONFIG_64BIT.

> @@ -251,6 +252,7 @@ struct page {
>   * @_total_mapcount: Do not use directly, call folio_entire_mapcount().
>   * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
>   * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
> + * @_private_1: Do not use directly, call folio_get_private_1().
>   *
>   * A folio is a physically, virtually and logically contiguous set
>   * of bytes.  It is a power-of-two in size, and it is aligned to that
> @@ -298,6 +300,8 @@ struct folio {
>  #ifdef CONFIG_64BIT
>  	unsigned int _folio_nr_pages;
>  #endif
> +	unsigned long _private_1;

(but don't do that here!)

The intent is that _private_1 lines up with head[1].private on 32-bit.
It's a bit tricky, and I'm not sure that I'm thinking about it quite right.

>  };
>  
>  #define FOLIO_MATCH(pg, fl)						\
> @@ -325,6 +329,7 @@ FOLIO_MATCH(compound_mapcount, _total_mapcount);
>  FOLIO_MATCH(compound_pincount, _pincount);
>  #ifdef CONFIG_64BIT
>  FOLIO_MATCH(compound_nr, _folio_nr_pages);
> +FOLIO_MATCH(_private_1, _private_1);
>  #endif
>  #undef FOLIO_MATCH
>  
> @@ -370,6 +375,16 @@ static inline void *folio_get_private(struct folio *folio)
>  	return folio->private;
>  }
>  
> +static inline void folio_set_private_1(struct folio *folio, unsigned long private)
> +{
> +	folio->_private_1 = private;
> +}
> +
> +static inline unsigned long folio_get_private_1(struct folio *folio)
> +{
> +	return folio->_private_1;
> +}
> +
>  struct page_frag_cache {
>  	void * va;
>  #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> -- 
> 2.31.1
>
Mike Kravetz Sept. 1, 2022, 5:32 p.m. UTC | #2
On 08/29/22 16:00, Sidhartha Kumar wrote:
> Allows struct folio to store hugetlb metadata that is contained in the
> private field of the first tail page.
> 
> Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
> ---
>  include/linux/mm_types.h | 15 +++++++++++++++
>  1 file changed, 15 insertions(+)
> 
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 8a9ee9d24973..726c5304172c 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -144,6 +144,7 @@ struct page {
>  #ifdef CONFIG_64BIT
>  			unsigned int compound_nr; /* 1 << compound_order */
>  #endif
> +			unsigned long _private_1;
>  		};
>  		struct {	/* Second tail page of compound page */
>  			unsigned long _compound_pad_1;	/* compound_head */
> @@ -251,6 +252,7 @@ struct page {
>   * @_total_mapcount: Do not use directly, call folio_entire_mapcount().
>   * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
>   * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
> + * @_private_1: Do not use directly, call folio_get_private_1().
>   *
>   * A folio is a physically, virtually and logically contiguous set
>   * of bytes.  It is a power-of-two in size, and it is aligned to that

Not really an issue with this patch, but it made me read more of this
comment about folios.  It goes on to say ...

 * same power-of-two.  It is at least as large as %PAGE_SIZE.  If it is
 * in the page cache, it is at a file offset which is a multiple of that
 * power-of-two.  It may be mapped into userspace at an address which is
 * at an arbitrary page offset, but its kernel virtual address is aligned
 * to its size.
 */

This series is to begin converting hugetlb code to folios.  Just want to
note that 'hugetlb folios' have specific user space alignment restrictions.
So, I do not think the comment about arbitrary page offset would apply to
hugetlb.

Matthew, should we note that hugetlb is special in the comment?  Or, is it
not worth updating?

Also, folio_get_private_1 will be used for the hugetlb subpool pointer
which resides in page[1].private.  This is used in the next patch of
this series.  I'm sure you are aware that hugetlb also uses page private
in sub pages 2 and 3.  Can/will/should this method of accessing private
in sub pages be expanded to cover these as well?  Expansion can happen
later, but if this can not be expanded perhaps we should come up with
another scheme.
Matthew Wilcox Sept. 1, 2022, 6:32 p.m. UTC | #3
On Thu, Sep 01, 2022 at 10:32:43AM -0700, Mike Kravetz wrote:
> Not really an issue with this patch, but it made me read more of this
> comment about folios.  It goes on to say ...
> 
>  * same power-of-two.  It is at least as large as %PAGE_SIZE.  If it is
>  * in the page cache, it is at a file offset which is a multiple of that
>  * power-of-two.  It may be mapped into userspace at an address which is
>  * at an arbitrary page offset, but its kernel virtual address is aligned
>  * to its size.
>  */
> 
> This series is to begin converting hugetlb code to folios.  Just want to
> note that 'hugetlb folios' have specific user space alignment restrictions.
> So, I do not think the comment about arbitrary page offset would apply to
> hugetlb.
> 
> Matthew, should we note that hugetlb is special in the comment?  Or, is it
> not worth updating?

I'm open to updating it if we can find good wording.  What I'm trying
to get across there is that when dealing with folios, you can assume
that they're naturally aligned physically, logically (in the file) and
virtually (kernel address), but not necessarily virtually (user
address).  Hugetlb folios are special in that they are guaranteed to
be virtually aligned in user space, but I don't know if here is the
right place to document that.  It's an additional restriction, so code
which handles generic folios doesn't need to know it.

> Also, folio_get_private_1 will be used for the hugetlb subpool pointer
> which resides in page[1].private.  This is used in the next patch of
> this series.  I'm sure you are aware that hugetlb also uses page private
> in sub pages 2 and 3.  Can/will/should this method of accessing private
> in sub pages be expanded to cover these as well?  Expansion can happen
> later, but if this can not be expanded perhaps we should come up with
> another scheme.

There's a few ways of tackling this.  What I'm currently thinking is
that we change how hugetlbfs uses struct page to store its extra data.
It would end up looking something like this (in struct page):

+++ b/include/linux/mm_types.h
@@ -147,9 +147,10 @@ struct page {
                };
                struct {        /* Second tail page of compound page */
                        unsigned long _compound_pad_1;  /* compound_head */
-                       unsigned long _compound_pad_2;
                        /* For both global and memcg */
                        struct list_head deferred_list;
+                       unsigned long hugetlbfs_private_2;
+                       unsigned long hugetlbfs_private_3;
                };
                struct {        /* Page table pages */
                        unsigned long _pt_pad_1;        /* compound_head */

although we could use better names and/or types?  I haven't looked to
see what you're storing here yet.  And then we can make the
corresponding change to struct folio to add these elements at the
right place.

Does that sound sensible?
Mike Kravetz Sept. 1, 2022, 8:29 p.m. UTC | #4
On 09/01/22 19:32, Matthew Wilcox wrote:
> On Thu, Sep 01, 2022 at 10:32:43AM -0700, Mike Kravetz wrote:
> > Not really an issue with this patch, but it made me read more of this
> > comment about folios.  It goes on to say ...
> > 
> >  * same power-of-two.  It is at least as large as %PAGE_SIZE.  If it is
> >  * in the page cache, it is at a file offset which is a multiple of that
> >  * power-of-two.  It may be mapped into userspace at an address which is
> >  * at an arbitrary page offset, but its kernel virtual address is aligned
> >  * to its size.
> >  */
> > 
> > This series is to begin converting hugetlb code to folios.  Just want to
> > note that 'hugetlb folios' have specific user space alignment restrictions.
> > So, I do not think the comment about arbitrary page offset would apply to
> > hugetlb.
> > 
> > Matthew, should we note that hugetlb is special in the comment?  Or, is it
> > not worth updating?
> 
> I'm open to updating it if we can find good wording.  What I'm trying
> to get across there is that when dealing with folios, you can assume
> that they're naturally aligned physically, logically (in the file) and
> virtually (kernel address), but not necessarily virtually (user
> address).  Hugetlb folios are special in that they are guaranteed to
> be virtually aligned in user space, but I don't know if here is the
> right place to document that.  It's an additional restriction, so code
> which handles generic folios doesn't need to know it.

Fair enough.  No need to change.  It just caught my eye.

> > Also, folio_get_private_1 will be used for the hugetlb subpool pointer
> > which resides in page[1].private.  This is used in the next patch of
> > this series.  I'm sure you are aware that hugetlb also uses page private
> > in sub pages 2 and 3.  Can/will/should this method of accessing private
> > in sub pages be expanded to cover these as well?  Expansion can happen
> > later, but if this can not be expanded perhaps we should come up with
> > another scheme.
> 
> There's a few ways of tackling this.  What I'm currently thinking is
> that we change how hugetlbfs uses struct page to store its extra data.
> It would end up looking something like this (in struct page):
> 
> +++ b/include/linux/mm_types.h
> @@ -147,9 +147,10 @@ struct page {
>                 };
>                 struct {        /* Second tail page of compound page */
>                         unsigned long _compound_pad_1;  /* compound_head */
> -                       unsigned long _compound_pad_2;
>                         /* For both global and memcg */
>                         struct list_head deferred_list;
> +                       unsigned long hugetlbfs_private_2;
> +                       unsigned long hugetlbfs_private_3;
>                 };
>                 struct {        /* Page table pages */
>                         unsigned long _pt_pad_1;        /* compound_head */
> 
> although we could use better names and/or types?  I haven't looked to
> see what you're storing here yet.  And then we can make the
> corresponding change to struct folio to add these elements at the
> right place.

I am terrible at names.  hugetlb is storing pointers in the private fields.
FWICT, something like this would work.

> 
> Does that sound sensible?
diff mbox series

Patch

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8a9ee9d24973..726c5304172c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -144,6 +144,7 @@  struct page {
 #ifdef CONFIG_64BIT
 			unsigned int compound_nr; /* 1 << compound_order */
 #endif
+			unsigned long _private_1;
 		};
 		struct {	/* Second tail page of compound page */
 			unsigned long _compound_pad_1;	/* compound_head */
@@ -251,6 +252,7 @@  struct page {
  * @_total_mapcount: Do not use directly, call folio_entire_mapcount().
  * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
  * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
+ * @_private_1: Do not use directly, call folio_get_private_1().
  *
  * A folio is a physically, virtually and logically contiguous set
  * of bytes.  It is a power-of-two in size, and it is aligned to that
@@ -298,6 +300,8 @@  struct folio {
 #ifdef CONFIG_64BIT
 	unsigned int _folio_nr_pages;
 #endif
+	unsigned long _private_1;
+
 };
 
 #define FOLIO_MATCH(pg, fl)						\
@@ -325,6 +329,7 @@  FOLIO_MATCH(compound_mapcount, _total_mapcount);
 FOLIO_MATCH(compound_pincount, _pincount);
 #ifdef CONFIG_64BIT
 FOLIO_MATCH(compound_nr, _folio_nr_pages);
+FOLIO_MATCH(_private_1, _private_1);
 #endif
 #undef FOLIO_MATCH
 
@@ -370,6 +375,16 @@  static inline void *folio_get_private(struct folio *folio)
 	return folio->private;
 }
 
+static inline void folio_set_private_1(struct folio *folio, unsigned long private)
+{
+	folio->_private_1 = private;
+}
+
+static inline unsigned long folio_get_private_1(struct folio *folio)
+{
+	return folio->_private_1;
+}
+
 struct page_frag_cache {
 	void * va;
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)