diff mbox series

[net-next,v20,06/14] mm: page_frag: reuse existing space for 'size' and 'pfmemalloc'

Message ID 20241008112049.2279307-7-linyunsheng@huawei.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Replace page_frag with page_frag_cache for sk_page_frag() | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 557 this patch: 557
netdev/build_tools success Errors and warnings before: 0 (+2) this patch: 0 (+2)
netdev/cc_maintainers success CCed 4 of 4 maintainers
netdev/build_clang success Errors and warnings before: 1039 this patch: 1039
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 14792 this patch: 14792
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Yunsheng Lin Oct. 8, 2024, 11:20 a.m. UTC
Currently there is one 'struct page_frag' for every 'struct
sock' and 'struct task_struct', we are about to replace the
'struct page_frag' with 'struct page_frag_cache' for them.
Before begin the replacing, we need to ensure the size of
'struct page_frag_cache' is not bigger than the size of
'struct page_frag', as there may be tens of thousands of
'struct sock' and 'struct task_struct' instances in the
system.

By or'ing the page order & pfmemalloc with lower bits of
'va' instead of using 'u16' or 'u32' for page size and 'u8'
for pfmemalloc, we are able to avoid 3 or 5 bytes space waste.
And page address & pfmemalloc & order is unchanged for the
same page in the same 'page_frag_cache' instance, it makes
sense to fit them together.

After this patch, the size of 'struct page_frag_cache' should be
the same as the size of 'struct page_frag'.

CC: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
 include/linux/mm_types_task.h   | 19 +++++----
 include/linux/page_frag_cache.h | 24 ++++++++++-
 mm/page_frag_cache.c            | 75 +++++++++++++++++++++++----------
 3 files changed, 86 insertions(+), 32 deletions(-)

Comments

Alexander Duyck Oct. 9, 2024, 11:50 p.m. UTC | #1
On Tue, Oct 8, 2024 at 4:27 AM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>
> Currently there is one 'struct page_frag' for every 'struct
> sock' and 'struct task_struct', we are about to replace the
> 'struct page_frag' with 'struct page_frag_cache' for them.
> Before begin the replacing, we need to ensure the size of
> 'struct page_frag_cache' is not bigger than the size of
> 'struct page_frag', as there may be tens of thousands of
> 'struct sock' and 'struct task_struct' instances in the
> system.
>
> By or'ing the page order & pfmemalloc with lower bits of
> 'va' instead of using 'u16' or 'u32' for page size and 'u8'
> for pfmemalloc, we are able to avoid 3 or 5 bytes space waste.
> And page address & pfmemalloc & order is unchanged for the
> same page in the same 'page_frag_cache' instance, it makes
> sense to fit them together.
>
> After this patch, the size of 'struct page_frag_cache' should be
> the same as the size of 'struct page_frag'.
>
> CC: Alexander Duyck <alexander.duyck@gmail.com>
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> ---
>  include/linux/mm_types_task.h   | 19 +++++----
>  include/linux/page_frag_cache.h | 24 ++++++++++-
>  mm/page_frag_cache.c            | 75 +++++++++++++++++++++++----------
>  3 files changed, 86 insertions(+), 32 deletions(-)
>
> diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
> index 0ac6daebdd5c..a82aa80c0ba4 100644
> --- a/include/linux/mm_types_task.h
> +++ b/include/linux/mm_types_task.h
> @@ -47,18 +47,21 @@ struct page_frag {
>  #define PAGE_FRAG_CACHE_MAX_SIZE       __ALIGN_MASK(32768, ~PAGE_MASK)
>  #define PAGE_FRAG_CACHE_MAX_ORDER      get_order(PAGE_FRAG_CACHE_MAX_SIZE)
>  struct page_frag_cache {
> -       void *va;
> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +       /* encoded_page consists of the virtual address, pfmemalloc bit and
> +        * order of a page.
> +        */
> +       unsigned long encoded_page;
> +
> +       /* we maintain a pagecount bias, so that we dont dirty cache line
> +        * containing page->_refcount every time we allocate a fragment.
> +        */
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
>         __u16 offset;
> -       __u16 size;
> +       __u16 pagecnt_bias;
>  #else
>         __u32 offset;
> +       __u32 pagecnt_bias;
>  #endif
> -       /* we maintain a pagecount bias, so that we dont dirty cache line
> -        * containing page->_refcount every time we allocate a fragment.
> -        */
> -       unsigned int            pagecnt_bias;
> -       bool pfmemalloc;
>  };
>
>  /* Track pages that require TLB flushes */
> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
> index 0a52f7a179c8..dba2268e451a 100644
> --- a/include/linux/page_frag_cache.h
> +++ b/include/linux/page_frag_cache.h
> @@ -3,18 +3,38 @@
>  #ifndef _LINUX_PAGE_FRAG_CACHE_H
>  #define _LINUX_PAGE_FRAG_CACHE_H
>
> +#include <linux/bits.h>
>  #include <linux/log2.h>
>  #include <linux/mm_types_task.h>
>  #include <linux/types.h>
>
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +/* Use a full byte here to enable assembler optimization as the shift
> + * operation is usually expecting a byte.
> + */
> +#define PAGE_FRAG_CACHE_ORDER_MASK             GENMASK(7, 0)
> +#else
> +/* Compiler should be able to figure out we don't read things as any value
> + * ANDed with 0 is 0.
> + */
> +#define PAGE_FRAG_CACHE_ORDER_MASK             0
> +#endif
> +
> +#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT         (PAGE_FRAG_CACHE_ORDER_MASK + 1)
> +
> +static inline bool page_frag_encoded_page_pfmemalloc(unsigned long encoded_page)
> +{
> +       return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
> +}
> +

Rather than calling this encoded_page_pfmemalloc you might just go
with decode_pfmemalloc. Also rather than passing the unsigned long we
might just want to pass the page_frag_cache pointer.

>  static inline void page_frag_cache_init(struct page_frag_cache *nc)
>  {
> -       nc->va = NULL;
> +       nc->encoded_page = 0;
>  }
>
>  static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
>  {
> -       return !!nc->pfmemalloc;
> +       return page_frag_encoded_page_pfmemalloc(nc->encoded_page);
>  }
>
>  void page_frag_cache_drain(struct page_frag_cache *nc);
> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
> index 4c8e04379cb3..4bff4de58808 100644
> --- a/mm/page_frag_cache.c
> +++ b/mm/page_frag_cache.c
> @@ -12,6 +12,7 @@
>   * be used in the "frags" portion of skb_shared_info.
>   */
>
> +#include <linux/build_bug.h>
>  #include <linux/export.h>
>  #include <linux/gfp_types.h>
>  #include <linux/init.h>
> @@ -19,9 +20,41 @@
>  #include <linux/page_frag_cache.h>
>  #include "internal.h"
>
> +static unsigned long page_frag_encode_page(struct page *page, unsigned int order,
> +                                          bool pfmemalloc)
> +{
> +       BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
> +       BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE);
> +
> +       return (unsigned long)page_address(page) |
> +               (order & PAGE_FRAG_CACHE_ORDER_MASK) |
> +               ((unsigned long)pfmemalloc * PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
> +}
> +
> +static unsigned long page_frag_encoded_page_order(unsigned long encoded_page)
> +{
> +       return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK;
> +}
> +
> +static void *page_frag_encoded_page_address(unsigned long encoded_page)
> +{
> +       return (void *)(encoded_page & PAGE_MASK);
> +}
> +
> +static struct page *page_frag_encoded_page_ptr(unsigned long encoded_page)
> +{
> +       return virt_to_page((void *)encoded_page);
> +}
> +

Same with these. Instead of calling it encoded_page_XXX we could
probably just go with decode_page, decode_order, and decode_address.
Also instead of passing an unsigned long it would make more sense to
be passing the page_frag_cache pointer, especially once you start
pulling these out of this block.

If you are wanting to just work with the raw unsigned long value in
the file it might make more sense to drop the "page_frag_" prefix from
it and just have functions for handling your "encoded_page_" value. In
that case you might rename page_frag_encode_page to
"encoded_page_encode" or something like that.


> +static unsigned int page_frag_cache_page_size(unsigned long encoded_page)
> +{
> +       return PAGE_SIZE << page_frag_encoded_page_order(encoded_page);
> +}
> +
>  static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
>                                              gfp_t gfp_mask)
>  {
> +       unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER;
>         struct page *page = NULL;
>         gfp_t gfp = gfp_mask;
>
> @@ -30,23 +63,26 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
>                    __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
>         page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
>                                 PAGE_FRAG_CACHE_MAX_ORDER);
> -       nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
>  #endif
> -       if (unlikely(!page))
> +       if (unlikely(!page)) {
>                 page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
> +               order = 0;
> +       }
>
> -       nc->va = page ? page_address(page) : NULL;
> +       nc->encoded_page = page ?
> +               page_frag_encode_page(page, order, page_is_pfmemalloc(page)) : 0;
>
>         return page;
>  }
>
>  void page_frag_cache_drain(struct page_frag_cache *nc)
>  {
> -       if (!nc->va)
> +       if (!nc->encoded_page)
>                 return;
>
> -       __page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
> -       nc->va = NULL;
> +       __page_frag_cache_drain(page_frag_encoded_page_ptr(nc->encoded_page),
> +                               nc->pagecnt_bias);
> +       nc->encoded_page = 0;
>  }
>  EXPORT_SYMBOL(page_frag_cache_drain);
>
> @@ -63,35 +99,29 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
>                               unsigned int fragsz, gfp_t gfp_mask,
>                               unsigned int align_mask)
>  {
> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> -       unsigned int size = nc->size;
> -#else
> -       unsigned int size = PAGE_SIZE;
> -#endif
> -       unsigned int offset;
> +       unsigned long encoded_page = nc->encoded_page;
> +       unsigned int size, offset;
>         struct page *page;
>
> -       if (unlikely(!nc->va)) {
> +       if (unlikely(!encoded_page)) {
>  refill:
>                 page = __page_frag_cache_refill(nc, gfp_mask);
>                 if (!page)
>                         return NULL;
>
> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> -               /* if size can vary use size else just use PAGE_SIZE */
> -               size = nc->size;
> -#endif
> +               encoded_page = nc->encoded_page;
> +
>                 /* Even if we own the page, we do not use atomic_set().
>                  * This would break get_page_unless_zero() users.
>                  */
>                 page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
>
>                 /* reset page count bias and offset to start of new frag */
> -               nc->pfmemalloc = page_is_pfmemalloc(page);
>                 nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
>                 nc->offset = 0;
>         }
>
> +       size = page_frag_cache_page_size(encoded_page);
>         offset = __ALIGN_KERNEL_MASK(nc->offset, ~align_mask);
>         if (unlikely(offset + fragsz > size)) {
>                 if (unlikely(fragsz > PAGE_SIZE)) {
> @@ -107,13 +137,14 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
>                         return NULL;
>                 }
>
> -               page = virt_to_page(nc->va);
> +               page = page_frag_encoded_page_ptr(encoded_page);
>
>                 if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
>                         goto refill;
>
> -               if (unlikely(nc->pfmemalloc)) {
> -                       free_unref_page(page, compound_order(page));
> +               if (unlikely(page_frag_encoded_page_pfmemalloc(encoded_page))) {
> +                       free_unref_page(page,
> +                                       page_frag_encoded_page_order(encoded_page));
>                         goto refill;
>                 }
>
> @@ -128,7 +159,7 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
>         nc->pagecnt_bias--;
>         nc->offset = offset + fragsz;
>
> -       return nc->va + offset;
> +       return page_frag_encoded_page_address(encoded_page) + offset;
>  }
>  EXPORT_SYMBOL(__page_frag_alloc_align);
>
> --
> 2.33.0
>
Yunsheng Lin Oct. 10, 2024, 11:32 a.m. UTC | #2
On 2024/10/10 7:50, Alexander Duyck wrote:

...

>> +
>> +#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT         (PAGE_FRAG_CACHE_ORDER_MASK + 1)
>> +
>> +static inline bool page_frag_encoded_page_pfmemalloc(unsigned long encoded_page)
>> +{
>> +       return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
>> +}
>> +
> 
> Rather than calling this encoded_page_pfmemalloc you might just go
> with decode_pfmemalloc. Also rather than passing the unsigned long we
> might just want to pass the page_frag_cache pointer.
As the page_frag_encoded_page_pfmemalloc() is also called in
__page_frag_alloc_align(), and __page_frag_alloc_align() uses a
local variable for 'nc->encoded_page' to avoid fetching from
page_frag_cache pointer multi-times, so passing an 'unsigned long'
is perferred here?

I am not sure if decode_pfmemalloc() is simple enough that it
might be conflicted with naming from other subsystem in the
future. I thought about adding a '__' prefix to it, but the naming
seems long enough that some inline helper' naming is over 80 characters.

> 
>>  static inline void page_frag_cache_init(struct page_frag_cache *nc)
>>  {
>> -       nc->va = NULL;
>> +       nc->encoded_page = 0;
>>  }
>>
>>  static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
>>  {
>> -       return !!nc->pfmemalloc;
>> +       return page_frag_encoded_page_pfmemalloc(nc->encoded_page);
>>  }
>>
>>  void page_frag_cache_drain(struct page_frag_cache *nc);
>> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
>> index 4c8e04379cb3..4bff4de58808 100644
>> --- a/mm/page_frag_cache.c
>> +++ b/mm/page_frag_cache.c
>> @@ -12,6 +12,7 @@
>>   * be used in the "frags" portion of skb_shared_info.
>>   */
>>
>> +#include <linux/build_bug.h>
>>  #include <linux/export.h>
>>  #include <linux/gfp_types.h>
>>  #include <linux/init.h>
>> @@ -19,9 +20,41 @@
>>  #include <linux/page_frag_cache.h>
>>  #include "internal.h"
>>
>> +static unsigned long page_frag_encode_page(struct page *page, unsigned int order,
>> +                                          bool pfmemalloc)
>> +{
>> +       BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
>> +       BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE);
>> +
>> +       return (unsigned long)page_address(page) |
>> +               (order & PAGE_FRAG_CACHE_ORDER_MASK) |
>> +               ((unsigned long)pfmemalloc * PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
>> +}
>> +
>> +static unsigned long page_frag_encoded_page_order(unsigned long encoded_page)
>> +{
>> +       return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK;
>> +}
>> +
>> +static void *page_frag_encoded_page_address(unsigned long encoded_page)
>> +{
>> +       return (void *)(encoded_page & PAGE_MASK);
>> +}
>> +
>> +static struct page *page_frag_encoded_page_ptr(unsigned long encoded_page)
>> +{
>> +       return virt_to_page((void *)encoded_page);
>> +}
>> +
> 
> Same with these. Instead of calling it encoded_page_XXX we could
> probably just go with decode_page, decode_order, and decode_address.
> Also instead of passing an unsigned long it would make more sense to
> be passing the page_frag_cache pointer, especially once you start
> pulling these out of this block.

For the not passing the page_frag_cache pointer part, it is the same
as above, it is mainly to avoid fetching from pointer multi-times.

> 
> If you are wanting to just work with the raw unsigned long value in
> the file it might make more sense to drop the "page_frag_" prefix from
> it and just have functions for handling your "encoded_page_" value. In
> that case you might rename page_frag_encode_page to
> "encoded_page_encode" or something like that.

It am supposing you meant 'encoded_page_decode' here instead of
"encoded_page_encode"?
Something like below?
encoded_page_decode_pfmemalloc()
encoded_page_decode_order()
encoded_page_decode_page()
encoded_page_decode_virt()

> 
>
Alexander Duyck Oct. 10, 2024, 2:33 p.m. UTC | #3
On Thu, Oct 10, 2024 at 4:32 AM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>
> On 2024/10/10 7:50, Alexander Duyck wrote:
>
> ...
>
> >> +
> >> +#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT         (PAGE_FRAG_CACHE_ORDER_MASK + 1)
> >> +
> >> +static inline bool page_frag_encoded_page_pfmemalloc(unsigned long encoded_page)
> >> +{
> >> +       return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
> >> +}
> >> +
> >
> > Rather than calling this encoded_page_pfmemalloc you might just go
> > with decode_pfmemalloc. Also rather than passing the unsigned long we
> > might just want to pass the page_frag_cache pointer.
> As the page_frag_encoded_page_pfmemalloc() is also called in
> __page_frag_alloc_align(), and __page_frag_alloc_align() uses a
> local variable for 'nc->encoded_page' to avoid fetching from
> page_frag_cache pointer multi-times, so passing an 'unsigned long'
> is perferred here?
>
> I am not sure if decode_pfmemalloc() is simple enough that it
> might be conflicted with naming from other subsystem in the
> future. I thought about adding a '__' prefix to it, but the naming
> seems long enough that some inline helper' naming is over 80 characters.

What you might do is look at having a page_frag version of the
function and a encoded_page version as I called out below with the
naming. It would make sense to call the two out separately as this is
operating on an encoded page, not a page frag. With that we can avoid
any sort of naming confusion.

> >
> >>  static inline void page_frag_cache_init(struct page_frag_cache *nc)
> >>  {
> >> -       nc->va = NULL;
> >> +       nc->encoded_page = 0;
> >>  }
> >>
> >>  static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
> >>  {
> >> -       return !!nc->pfmemalloc;
> >> +       return page_frag_encoded_page_pfmemalloc(nc->encoded_page);
> >>  }
> >>
> >>  void page_frag_cache_drain(struct page_frag_cache *nc);
> >> diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
> >> index 4c8e04379cb3..4bff4de58808 100644
> >> --- a/mm/page_frag_cache.c
> >> +++ b/mm/page_frag_cache.c
> >> @@ -12,6 +12,7 @@
> >>   * be used in the "frags" portion of skb_shared_info.
> >>   */
> >>
> >> +#include <linux/build_bug.h>
> >>  #include <linux/export.h>
> >>  #include <linux/gfp_types.h>
> >>  #include <linux/init.h>
> >> @@ -19,9 +20,41 @@
> >>  #include <linux/page_frag_cache.h>
> >>  #include "internal.h"
> >>
> >> +static unsigned long page_frag_encode_page(struct page *page, unsigned int order,
> >> +                                          bool pfmemalloc)
> >> +{
> >> +       BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
> >> +       BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE);
> >> +
> >> +       return (unsigned long)page_address(page) |
> >> +               (order & PAGE_FRAG_CACHE_ORDER_MASK) |
> >> +               ((unsigned long)pfmemalloc * PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
> >> +}
> >> +
> >> +static unsigned long page_frag_encoded_page_order(unsigned long encoded_page)
> >> +{
> >> +       return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK;
> >> +}
> >> +
> >> +static void *page_frag_encoded_page_address(unsigned long encoded_page)
> >> +{
> >> +       return (void *)(encoded_page & PAGE_MASK);
> >> +}
> >> +
> >> +static struct page *page_frag_encoded_page_ptr(unsigned long encoded_page)
> >> +{
> >> +       return virt_to_page((void *)encoded_page);
> >> +}
> >> +
> >
> > Same with these. Instead of calling it encoded_page_XXX we could
> > probably just go with decode_page, decode_order, and decode_address.
> > Also instead of passing an unsigned long it would make more sense to
> > be passing the page_frag_cache pointer, especially once you start
> > pulling these out of this block.
>
> For the not passing the page_frag_cache pointer part, it is the same
> as above, it is mainly to avoid fetching from pointer multi-times.
>
> >
> > If you are wanting to just work with the raw unsigned long value in
> > the file it might make more sense to drop the "page_frag_" prefix from
> > it and just have functions for handling your "encoded_page_" value. In
> > that case you might rename page_frag_encode_page to
> > "encoded_page_encode" or something like that.
>
> It am supposing you meant 'encoded_page_decode' here instead of
> "encoded_page_encode"?
> Something like below?
> encoded_page_decode_pfmemalloc()
> encoded_page_decode_order()
> encoded_page_decode_page()
> encoded_page_decode_virt()

For the decodes yes. I was referring to page_frag_encode_page.
Basically the output from that isn't anything page frag, it is your
encoded page type so you could probably just call it
encoded_page_encode, or encoded_page_create or something like that.
Yunsheng Lin Oct. 11, 2024, 11:40 a.m. UTC | #4
On 2024/10/10 22:33, Alexander Duyck wrote:

...

> 
> For the decodes yes. I was referring to page_frag_encode_page.
> Basically the output from that isn't anything page frag, it is your
> encoded page type so you could probably just call it
> encoded_page_encode, or encoded_page_create or something like that.

It is kind of confusing as there is some mix of encode/encoded/decode
here, but let's be more specific if it is something like below:

static unsigned long encoded_page_create(struct page *page, unsigned int order,
					 bool pfmemalloc)
{
	BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
	BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE);

	return (unsigned long)page_address(page) |
		(order & PAGE_FRAG_CACHE_ORDER_MASK) |
		((unsigned long)pfmemalloc * PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
}

static inline bool encoded_page_decode_pfmemalloc(unsigned long encoded_page)
{
	return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
}

static unsigned long encoded_page_decode_order(unsigned long encoded_page)
{
	return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK;
}

static void *encoded_page_decode_virt(unsigned long encoded_page)
{
	return (void *)(encoded_page & PAGE_MASK);
}

static struct page *encoded_page_decode_page(unsigned long encoded_page)
{
	return virt_to_page((void *)encoded_page);
}
Alexander Duyck Oct. 11, 2024, 3:31 p.m. UTC | #5
On Fri, Oct 11, 2024 at 4:40 AM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>
> On 2024/10/10 22:33, Alexander Duyck wrote:
>
> ...
>
> >
> > For the decodes yes. I was referring to page_frag_encode_page.
> > Basically the output from that isn't anything page frag, it is your
> > encoded page type so you could probably just call it
> > encoded_page_encode, or encoded_page_create or something like that.
>
> It is kind of confusing as there is some mix of encode/encoded/decode
> here, but let's be more specific if it is something like below:
>
> static unsigned long encoded_page_create(struct page *page, unsigned int order,
>                                          bool pfmemalloc)
> {
>         BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
>         BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE);
>
>         return (unsigned long)page_address(page) |
>                 (order & PAGE_FRAG_CACHE_ORDER_MASK) |
>                 ((unsigned long)pfmemalloc * PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
> }
>
> static inline bool encoded_page_decode_pfmemalloc(unsigned long encoded_page)
> {
>         return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
> }
>
> static unsigned long encoded_page_decode_order(unsigned long encoded_page)
> {
>         return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK;
> }
>
> static void *encoded_page_decode_virt(unsigned long encoded_page)
> {
>         return (void *)(encoded_page & PAGE_MASK);
> }
>
> static struct page *encoded_page_decode_page(unsigned long encoded_page)
> {
>         return virt_to_page((void *)encoded_page);
> }

Yes, this is what I had in mind. Basically the encoded_page is the
object you are working on so it becomes the prefix for the function
name and the action is the suffix, so you are either doing a "create"
to put together the object, or performing a "decode" to get the
individual components.
diff mbox series

Patch

diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 0ac6daebdd5c..a82aa80c0ba4 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -47,18 +47,21 @@  struct page_frag {
 #define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 struct page_frag_cache {
-	void *va;
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+	/* encoded_page consists of the virtual address, pfmemalloc bit and
+	 * order of a page.
+	 */
+	unsigned long encoded_page;
+
+	/* we maintain a pagecount bias, so that we dont dirty cache line
+	 * containing page->_refcount every time we allocate a fragment.
+	 */
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
 	__u16 offset;
-	__u16 size;
+	__u16 pagecnt_bias;
 #else
 	__u32 offset;
+	__u32 pagecnt_bias;
 #endif
-	/* we maintain a pagecount bias, so that we dont dirty cache line
-	 * containing page->_refcount every time we allocate a fragment.
-	 */
-	unsigned int		pagecnt_bias;
-	bool pfmemalloc;
 };
 
 /* Track pages that require TLB flushes */
diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
index 0a52f7a179c8..dba2268e451a 100644
--- a/include/linux/page_frag_cache.h
+++ b/include/linux/page_frag_cache.h
@@ -3,18 +3,38 @@ 
 #ifndef _LINUX_PAGE_FRAG_CACHE_H
 #define _LINUX_PAGE_FRAG_CACHE_H
 
+#include <linux/bits.h>
 #include <linux/log2.h>
 #include <linux/mm_types_task.h>
 #include <linux/types.h>
 
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+/* Use a full byte here to enable assembler optimization as the shift
+ * operation is usually expecting a byte.
+ */
+#define PAGE_FRAG_CACHE_ORDER_MASK		GENMASK(7, 0)
+#else
+/* Compiler should be able to figure out we don't read things as any value
+ * ANDed with 0 is 0.
+ */
+#define PAGE_FRAG_CACHE_ORDER_MASK		0
+#endif
+
+#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT		(PAGE_FRAG_CACHE_ORDER_MASK + 1)
+
+static inline bool page_frag_encoded_page_pfmemalloc(unsigned long encoded_page)
+{
+	return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
+}
+
 static inline void page_frag_cache_init(struct page_frag_cache *nc)
 {
-	nc->va = NULL;
+	nc->encoded_page = 0;
 }
 
 static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
 {
-	return !!nc->pfmemalloc;
+	return page_frag_encoded_page_pfmemalloc(nc->encoded_page);
 }
 
 void page_frag_cache_drain(struct page_frag_cache *nc);
diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
index 4c8e04379cb3..4bff4de58808 100644
--- a/mm/page_frag_cache.c
+++ b/mm/page_frag_cache.c
@@ -12,6 +12,7 @@ 
  * be used in the "frags" portion of skb_shared_info.
  */
 
+#include <linux/build_bug.h>
 #include <linux/export.h>
 #include <linux/gfp_types.h>
 #include <linux/init.h>
@@ -19,9 +20,41 @@ 
 #include <linux/page_frag_cache.h>
 #include "internal.h"
 
+static unsigned long page_frag_encode_page(struct page *page, unsigned int order,
+					   bool pfmemalloc)
+{
+	BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
+	BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE);
+
+	return (unsigned long)page_address(page) |
+		(order & PAGE_FRAG_CACHE_ORDER_MASK) |
+		((unsigned long)pfmemalloc * PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
+}
+
+static unsigned long page_frag_encoded_page_order(unsigned long encoded_page)
+{
+	return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK;
+}
+
+static void *page_frag_encoded_page_address(unsigned long encoded_page)
+{
+	return (void *)(encoded_page & PAGE_MASK);
+}
+
+static struct page *page_frag_encoded_page_ptr(unsigned long encoded_page)
+{
+	return virt_to_page((void *)encoded_page);
+}
+
+static unsigned int page_frag_cache_page_size(unsigned long encoded_page)
+{
+	return PAGE_SIZE << page_frag_encoded_page_order(encoded_page);
+}
+
 static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
 					     gfp_t gfp_mask)
 {
+	unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER;
 	struct page *page = NULL;
 	gfp_t gfp = gfp_mask;
 
@@ -30,23 +63,26 @@  static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
 		   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
 	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
 				PAGE_FRAG_CACHE_MAX_ORDER);
-	nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
 #endif
-	if (unlikely(!page))
+	if (unlikely(!page)) {
 		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
+		order = 0;
+	}
 
-	nc->va = page ? page_address(page) : NULL;
+	nc->encoded_page = page ?
+		page_frag_encode_page(page, order, page_is_pfmemalloc(page)) : 0;
 
 	return page;
 }
 
 void page_frag_cache_drain(struct page_frag_cache *nc)
 {
-	if (!nc->va)
+	if (!nc->encoded_page)
 		return;
 
-	__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
-	nc->va = NULL;
+	__page_frag_cache_drain(page_frag_encoded_page_ptr(nc->encoded_page),
+				nc->pagecnt_bias);
+	nc->encoded_page = 0;
 }
 EXPORT_SYMBOL(page_frag_cache_drain);
 
@@ -63,35 +99,29 @@  void *__page_frag_alloc_align(struct page_frag_cache *nc,
 			      unsigned int fragsz, gfp_t gfp_mask,
 			      unsigned int align_mask)
 {
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-	unsigned int size = nc->size;
-#else
-	unsigned int size = PAGE_SIZE;
-#endif
-	unsigned int offset;
+	unsigned long encoded_page = nc->encoded_page;
+	unsigned int size, offset;
 	struct page *page;
 
-	if (unlikely(!nc->va)) {
+	if (unlikely(!encoded_page)) {
 refill:
 		page = __page_frag_cache_refill(nc, gfp_mask);
 		if (!page)
 			return NULL;
 
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-		/* if size can vary use size else just use PAGE_SIZE */
-		size = nc->size;
-#endif
+		encoded_page = nc->encoded_page;
+
 		/* Even if we own the page, we do not use atomic_set().
 		 * This would break get_page_unless_zero() users.
 		 */
 		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
 
 		/* reset page count bias and offset to start of new frag */
-		nc->pfmemalloc = page_is_pfmemalloc(page);
 		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
 		nc->offset = 0;
 	}
 
+	size = page_frag_cache_page_size(encoded_page);
 	offset = __ALIGN_KERNEL_MASK(nc->offset, ~align_mask);
 	if (unlikely(offset + fragsz > size)) {
 		if (unlikely(fragsz > PAGE_SIZE)) {
@@ -107,13 +137,14 @@  void *__page_frag_alloc_align(struct page_frag_cache *nc,
 			return NULL;
 		}
 
-		page = virt_to_page(nc->va);
+		page = page_frag_encoded_page_ptr(encoded_page);
 
 		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
 			goto refill;
 
-		if (unlikely(nc->pfmemalloc)) {
-			free_unref_page(page, compound_order(page));
+		if (unlikely(page_frag_encoded_page_pfmemalloc(encoded_page))) {
+			free_unref_page(page,
+					page_frag_encoded_page_order(encoded_page));
 			goto refill;
 		}
 
@@ -128,7 +159,7 @@  void *__page_frag_alloc_align(struct page_frag_cache *nc,
 	nc->pagecnt_bias--;
 	nc->offset = offset + fragsz;
 
-	return nc->va + offset;
+	return page_frag_encoded_page_address(encoded_page) + offset;
 }
 EXPORT_SYMBOL(__page_frag_alloc_align);