diff mbox series

[RFC,v1,04/57] mm/page_alloc: Make page_frag_cache boot-time page size compatible

Message ID 20241014105912.3207374-4-ryan.roberts@arm.com (mailing list archive)
State New, archived
Headers show
Series Boot-time page size selection for arm64 | expand

Commit Message

Ryan Roberts Oct. 14, 2024, 10:58 a.m. UTC
"struct page_frag_cache" has some optimizations that depend on page
size. Let's refactor it a bit so that those optimizations can be
determined at run-time for the case where page size is a boot-time
parameter. For compile-time page size, the compiler should dead code
strip and the result is very similar to before.

One wrinkle is that we don't know if we need the size member until
runtime. So remove the ifdeffery and always define offset as u32 (needed
if PAGE_SIZE is >= 64K) and size as u16 (only used when PAGE_SIZE <=
32K). We move the members around a bit so that the overall size of the
struct remains the same; 24 bytes for 64-bit and 16 bytes on 32 bit.

Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>

page_alloc
---

***NOTE***
Any confused maintainers may want to read the cover note here for context:
https://lore.kernel.org/all/20241014105514.3206191-1-ryan.roberts@arm.com/

 include/linux/mm_types.h | 13 ++++++-------
 mm/page_alloc.c          | 31 ++++++++++++++++++-------------
 2 files changed, 24 insertions(+), 20 deletions(-)

Comments

Vlastimil Babka Nov. 14, 2024, 8:23 a.m. UTC | #1
On 10/14/24 12:58, Ryan Roberts wrote:
> "struct page_frag_cache" has some optimizations that depend on page
> size. Let's refactor it a bit so that those optimizations can be
> determined at run-time for the case where page size is a boot-time
> parameter. For compile-time page size, the compiler should dead code
> strip and the result is very similar to before.
> 
> One wrinkle is that we don't know if we need the size member until
> runtime. So remove the ifdeffery and always define offset as u32 (needed
> if PAGE_SIZE is >= 64K) and size as u16 (only used when PAGE_SIZE <=
> 32K). We move the members around a bit so that the overall size of the
> struct remains the same; 24 bytes for 64-bit and 16 bytes on 32 bit.
> 
> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>

Looks ok, but ideally the PAGE_FRAG_CACHE_MAX_ORDER #define should also be
replaced by some variable that's populated just once. It can be static local
to page_alloc.c as nothing else seems to use it.

> 
> page_alloc
> ---
> 
> ***NOTE***
> Any confused maintainers may want to read the cover note here for context:
> https://lore.kernel.org/all/20241014105514.3206191-1-ryan.roberts@arm.com/
> 
>  include/linux/mm_types.h | 13 ++++++-------
>  mm/page_alloc.c          | 31 ++++++++++++++++++-------------
>  2 files changed, 24 insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 4854249792545..0844ed7cfaa53 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -544,16 +544,15 @@ static inline void *folio_get_private(struct folio *folio)
>  
>  struct page_frag_cache {
>  	void * va;
> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> -	__u16 offset;
> -	__u16 size;
> -#else
> -	__u32 offset;
> -#endif
>  	/* we maintain a pagecount bias, so that we dont dirty cache line
>  	 * containing page->_refcount every time we allocate a fragment.
>  	 */
> -	unsigned int		pagecnt_bias;
> +	unsigned int pagecnt_bias;
> +	__u32 offset;
> +	/* size only used when PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE, in which
> +	 * case PAGE_FRAG_CACHE_MAX_SIZE is 32K and 16 bits is sufficient.
> +	 */
> +	__u16 size;
>  	bool pfmemalloc;
>  };
>  
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 91ace8ca97e21..8678103b1b396 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -4822,13 +4822,18 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
>  	struct page *page = NULL;
>  	gfp_t gfp = gfp_mask;
>  
> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> -	gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP |
> -		   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
> -	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
> -				PAGE_FRAG_CACHE_MAX_ORDER);
> -	nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
> -#endif
> +	if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) {
> +		gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP |
> +			   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
> +		page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
> +					PAGE_FRAG_CACHE_MAX_ORDER);
> +		/*
> +		 * Cast to silence warning due to 16-bit nc->size. Not real
> +		 * because PAGE_SIZE only less than PAGE_FRAG_CACHE_MAX_SIZE
> +		 * when PAGE_FRAG_CACHE_MAX_SIZE is 32K.
> +		 */
> +		nc->size = (__u16)(page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE);
> +	}
>  	if (unlikely(!page))
>  		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
>  
> @@ -4870,10 +4875,10 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
>  		if (!page)
>  			return NULL;
>  
> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>  		/* if size can vary use size else just use PAGE_SIZE */
> -		size = nc->size;
> -#endif
> +		if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +			size = nc->size;
> +
>  		/* Even if we own the page, we do not use atomic_set().
>  		 * This would break get_page_unless_zero() users.
>  		 */
> @@ -4897,10 +4902,10 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
>  			goto refill;
>  		}
>  
> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>  		/* if size can vary use size else just use PAGE_SIZE */
> -		size = nc->size;
> -#endif
> +		if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +			size = nc->size;
> +
>  		/* OK, page count is 0, we can safely set it */
>  		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
>
Ryan Roberts Nov. 14, 2024, 9:36 a.m. UTC | #2
On 14/11/2024 08:23, Vlastimil Babka wrote:
> On 10/14/24 12:58, Ryan Roberts wrote:
>> "struct page_frag_cache" has some optimizations that depend on page
>> size. Let's refactor it a bit so that those optimizations can be
>> determined at run-time for the case where page size is a boot-time
>> parameter. For compile-time page size, the compiler should dead code
>> strip and the result is very similar to before.
>>
>> One wrinkle is that we don't know if we need the size member until
>> runtime. So remove the ifdeffery and always define offset as u32 (needed
>> if PAGE_SIZE is >= 64K) and size as u16 (only used when PAGE_SIZE <=
>> 32K). We move the members around a bit so that the overall size of the
>> struct remains the same; 24 bytes for 64-bit and 16 bytes on 32 bit.
>>
>> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
> 
> Looks ok, but ideally the PAGE_FRAG_CACHE_MAX_ORDER #define should also be
> replaced by some variable that's populated just once. It can be static local
> to page_alloc.c as nothing else seems to use it.

I can certainly do that, but wouldn't that be penalizing a compile-time page
size configuration? My current change means that PAGE_FRAG_CACHE_MAX_ORDER still
resolves to a compile-time constant in that situation and the compiler can
eliminate conditional branches it knows will never be taken. Or perhaps you're
suggesting I conditionally make it a variable if PAGE_SIZE_MIN != PAGE_SIZE_MAX?

Thanks,
Ryan

> 
>>
>> page_alloc
>> ---
>>
>> ***NOTE***
>> Any confused maintainers may want to read the cover note here for context:
>> https://lore.kernel.org/all/20241014105514.3206191-1-ryan.roberts@arm.com/
>>
>>  include/linux/mm_types.h | 13 ++++++-------
>>  mm/page_alloc.c          | 31 ++++++++++++++++++-------------
>>  2 files changed, 24 insertions(+), 20 deletions(-)
>>
>> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
>> index 4854249792545..0844ed7cfaa53 100644
>> --- a/include/linux/mm_types.h
>> +++ b/include/linux/mm_types.h
>> @@ -544,16 +544,15 @@ static inline void *folio_get_private(struct folio *folio)
>>  
>>  struct page_frag_cache {
>>  	void * va;
>> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>> -	__u16 offset;
>> -	__u16 size;
>> -#else
>> -	__u32 offset;
>> -#endif
>>  	/* we maintain a pagecount bias, so that we dont dirty cache line
>>  	 * containing page->_refcount every time we allocate a fragment.
>>  	 */
>> -	unsigned int		pagecnt_bias;
>> +	unsigned int pagecnt_bias;
>> +	__u32 offset;
>> +	/* size only used when PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE, in which
>> +	 * case PAGE_FRAG_CACHE_MAX_SIZE is 32K and 16 bits is sufficient.
>> +	 */
>> +	__u16 size;
>>  	bool pfmemalloc;
>>  };
>>  
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 91ace8ca97e21..8678103b1b396 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -4822,13 +4822,18 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
>>  	struct page *page = NULL;
>>  	gfp_t gfp = gfp_mask;
>>  
>> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>> -	gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP |
>> -		   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
>> -	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
>> -				PAGE_FRAG_CACHE_MAX_ORDER);
>> -	nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
>> -#endif
>> +	if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) {
>> +		gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP |
>> +			   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
>> +		page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
>> +					PAGE_FRAG_CACHE_MAX_ORDER);
>> +		/*
>> +		 * Cast to silence warning due to 16-bit nc->size. Not real
>> +		 * because PAGE_SIZE only less than PAGE_FRAG_CACHE_MAX_SIZE
>> +		 * when PAGE_FRAG_CACHE_MAX_SIZE is 32K.
>> +		 */
>> +		nc->size = (__u16)(page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE);
>> +	}
>>  	if (unlikely(!page))
>>  		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
>>  
>> @@ -4870,10 +4875,10 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
>>  		if (!page)
>>  			return NULL;
>>  
>> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>>  		/* if size can vary use size else just use PAGE_SIZE */
>> -		size = nc->size;
>> -#endif
>> +		if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>> +			size = nc->size;
>> +
>>  		/* Even if we own the page, we do not use atomic_set().
>>  		 * This would break get_page_unless_zero() users.
>>  		 */
>> @@ -4897,10 +4902,10 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
>>  			goto refill;
>>  		}
>>  
>> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>>  		/* if size can vary use size else just use PAGE_SIZE */
>> -		size = nc->size;
>> -#endif
>> +		if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>> +			size = nc->size;
>> +
>>  		/* OK, page count is 0, we can safely set it */
>>  		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
>>  
>
Vlastimil Babka Nov. 14, 2024, 9:43 a.m. UTC | #3
On 11/14/24 10:36, Ryan Roberts wrote:
> On 14/11/2024 08:23, Vlastimil Babka wrote:
>> On 10/14/24 12:58, Ryan Roberts wrote:
>>> "struct page_frag_cache" has some optimizations that depend on page
>>> size. Let's refactor it a bit so that those optimizations can be
>>> determined at run-time for the case where page size is a boot-time
>>> parameter. For compile-time page size, the compiler should dead code
>>> strip and the result is very similar to before.
>>>
>>> One wrinkle is that we don't know if we need the size member until
>>> runtime. So remove the ifdeffery and always define offset as u32 (needed
>>> if PAGE_SIZE is >= 64K) and size as u16 (only used when PAGE_SIZE <=
>>> 32K). We move the members around a bit so that the overall size of the
>>> struct remains the same; 24 bytes for 64-bit and 16 bytes on 32 bit.
>>>
>>> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
>> 
>> Looks ok, but ideally the PAGE_FRAG_CACHE_MAX_ORDER #define should also be
>> replaced by some variable that's populated just once. It can be static local
>> to page_alloc.c as nothing else seems to use it.
> 
> I can certainly do that, but wouldn't that be penalizing a compile-time page
> size configuration? My current change means that PAGE_FRAG_CACHE_MAX_ORDER still
> resolves to a compile-time constant in that situation and the compiler can
> eliminate conditional branches it knows will never be taken. Or perhaps you're

Ah, I see.

> suggesting I conditionally make it a variable if PAGE_SIZE_MIN != PAGE_SIZE_MAX?

Given the only place it's being used, it shouldn't be worth it after all.

You can add for this patch:

Acked-by: Vlastimil Babka <vbabka@suse.cz>

> Thanks,
> Ryan
diff mbox series

Patch

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4854249792545..0844ed7cfaa53 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -544,16 +544,15 @@  static inline void *folio_get_private(struct folio *folio)
 
 struct page_frag_cache {
 	void * va;
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-	__u16 offset;
-	__u16 size;
-#else
-	__u32 offset;
-#endif
 	/* we maintain a pagecount bias, so that we dont dirty cache line
 	 * containing page->_refcount every time we allocate a fragment.
 	 */
-	unsigned int		pagecnt_bias;
+	unsigned int pagecnt_bias;
+	__u32 offset;
+	/* size only used when PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE, in which
+	 * case PAGE_FRAG_CACHE_MAX_SIZE is 32K and 16 bits is sufficient.
+	 */
+	__u16 size;
 	bool pfmemalloc;
 };
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 91ace8ca97e21..8678103b1b396 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4822,13 +4822,18 @@  static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
 	struct page *page = NULL;
 	gfp_t gfp = gfp_mask;
 
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-	gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP |
-		   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
-	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
-				PAGE_FRAG_CACHE_MAX_ORDER);
-	nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
-#endif
+	if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) {
+		gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) |  __GFP_COMP |
+			   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
+		page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
+					PAGE_FRAG_CACHE_MAX_ORDER);
+		/*
+		 * Cast to silence warning due to 16-bit nc->size. Not real
+		 * because PAGE_SIZE only less than PAGE_FRAG_CACHE_MAX_SIZE
+		 * when PAGE_FRAG_CACHE_MAX_SIZE is 32K.
+		 */
+		nc->size = (__u16)(page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE);
+	}
 	if (unlikely(!page))
 		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
 
@@ -4870,10 +4875,10 @@  void *__page_frag_alloc_align(struct page_frag_cache *nc,
 		if (!page)
 			return NULL;
 
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
 		/* if size can vary use size else just use PAGE_SIZE */
-		size = nc->size;
-#endif
+		if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+			size = nc->size;
+
 		/* Even if we own the page, we do not use atomic_set().
 		 * This would break get_page_unless_zero() users.
 		 */
@@ -4897,10 +4902,10 @@  void *__page_frag_alloc_align(struct page_frag_cache *nc,
 			goto refill;
 		}
 
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
 		/* if size can vary use size else just use PAGE_SIZE */
-		size = nc->size;
-#endif
+		if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+			size = nc->size;
+
 		/* OK, page count is 0, we can safely set it */
 		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);