diff mbox series

[net-next,v2,2/4] page_pool: add interface to manipulate frag count in page pool

Message ID 1628217982-53533-3-git-send-email-linyunsheng@huawei.com (mailing list archive)
State Accepted
Delegated to: Netdev Maintainers
Headers show
Series add frag page support in page pool | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 4 maintainers not CCed: jhubbard@nvidia.com yuzhao@google.com aarcange@redhat.com thunder.leizhen@huawei.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 18259 this patch: 18259
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 99 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 17415 this patch: 17415
netdev/header_inline success Link

Commit Message

Yunsheng Lin Aug. 6, 2021, 2:46 a.m. UTC
For 32 bit systems with 64 bit dma, dma_addr[1] is used to
store the upper 32 bit dma addr, those system should be rare
those days.

For normal system, the dma_addr[1] in 'struct page' is not
used, so we can reuse dma_addr[1] for storing frag count,
which means how many frags this page might be splited to.

In order to simplify the page frag support in the page pool,
the PAGE_POOL_DMA_USE_PP_FRAG_COUNT macro is added to indicate
the 32 bit systems with 64 bit dma, and the page frag support
in page pool is disabled for such system.

The newly added page_pool_set_frag_count() is called to reserve
the maximum frag count before any page frag is passed to the
user. The page_pool_atomic_sub_frag_count_return() is called
when user is done with the page frag.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
 include/linux/mm_types.h | 18 +++++++++++++-----
 include/net/page_pool.h  | 46 +++++++++++++++++++++++++++++++++++++++-------
 net/core/page_pool.c     |  4 ++++
 3 files changed, 56 insertions(+), 12 deletions(-)

Comments

Jesper Dangaard Brouer Aug. 10, 2021, 2:58 p.m. UTC | #1
On 06/08/2021 04.46, Yunsheng Lin wrote:
> For 32 bit systems with 64 bit dma, dma_addr[1] is used to
> store the upper 32 bit dma addr, those system should be rare
> those days.
> 
> For normal system, the dma_addr[1] in 'struct page' is not
> used, so we can reuse dma_addr[1] for storing frag count,
> which means how many frags this page might be splited to.
> 
> In order to simplify the page frag support in the page pool,
> the PAGE_POOL_DMA_USE_PP_FRAG_COUNT macro is added to indicate
> the 32 bit systems with 64 bit dma, and the page frag support
> in page pool is disabled for such system.
> 
> The newly added page_pool_set_frag_count() is called to reserve
> the maximum frag count before any page frag is passed to the
> user. The page_pool_atomic_sub_frag_count_return() is called
> when user is done with the page frag.
> 
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> ---
>   include/linux/mm_types.h | 18 +++++++++++++-----
>   include/net/page_pool.h  | 46 +++++++++++++++++++++++++++++++++++++++-------
>   net/core/page_pool.c     |  4 ++++
>   3 files changed, 56 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 52bbd2b..7f8ee09 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -103,11 +103,19 @@ struct page {
>   			unsigned long pp_magic;
>   			struct page_pool *pp;
>   			unsigned long _pp_mapping_pad;
> -			/**
> -			 * @dma_addr: might require a 64-bit value on
> -			 * 32-bit architectures.
> -			 */
> -			unsigned long dma_addr[2];
> +			unsigned long dma_addr;
> +			union {
> +				/**
> +				 * dma_addr_upper: might require a 64-bit
> +				 * value on 32-bit architectures.
> +				 */
> +				unsigned long dma_addr_upper;
> +				/**
> +				 * For frag page support, not supported in
> +				 * 32-bit architectures with 64-bit DMA.
> +				 */
> +				atomic_long_t pp_frag_count;
> +			};
>   		};
>   		struct {	/* slab, slob and slub */
>   			union {
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index 8d7744d..42e6997 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -45,7 +45,10 @@
>   					* Please note DMA-sync-for-CPU is still
>   					* device driver responsibility
>   					*/
> -#define PP_FLAG_ALL		(PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
> +#define PP_FLAG_PAGE_FRAG	BIT(2) /* for page frag feature */
> +#define PP_FLAG_ALL		(PP_FLAG_DMA_MAP |\
> +				 PP_FLAG_DMA_SYNC_DEV |\
> +				 PP_FLAG_PAGE_FRAG)
>   
>   /*
>    * Fast allocation side cache array/stack
> @@ -198,19 +201,48 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
>   	page_pool_put_full_page(pool, page, true);
>   }
>   
> +#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT	\
> +		(sizeof(dma_addr_t) > sizeof(unsigned long))
> +
>   static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>   {
> -	dma_addr_t ret = page->dma_addr[0];
> -	if (sizeof(dma_addr_t) > sizeof(unsigned long))
> -		ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
> +	dma_addr_t ret = page->dma_addr;
> +
> +	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
> +		ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;

I find the macro name confusing.

I think it would be easier to read the code, if it was called:
  PAGE_POOL_DMA_CANNOT_USE_PP_FRAG_COUNT

> +
>   	return ret;
>   }
>   
>   static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
>   {
> -	page->dma_addr[0] = addr;
> -	if (sizeof(dma_addr_t) > sizeof(unsigned long))
> -		page->dma_addr[1] = upper_32_bits(addr);
> +	page->dma_addr = addr;
> +	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
> +		page->dma_addr_upper = upper_32_bits(addr);
> +}
> +
> +static inline void page_pool_set_frag_count(struct page *page, long nr)
> +{
> +	atomic_long_set(&page->pp_frag_count, nr);
> +}
> +
> +static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
> +							  long nr)
> +{
> +	long ret;
> +
> +	/* As suggested by Alexander, atomic_long_read() may cover up the
> +	 * reference count errors, so avoid calling atomic_long_read() in
> +	 * the cases of freeing or draining the page_frags, where we would
> +	 * not expect it to match or that are slowpath anyway.
> +	 */
> +	if (__builtin_constant_p(nr) &&
> +	    atomic_long_read(&page->pp_frag_count) == nr)
> +		return 0;
> +
> +	ret = atomic_long_sub_return(nr, &page->pp_frag_count);
> +	WARN_ON(ret < 0);
> +	return ret;
>   }
>   
>   static inline bool is_page_pool_compiled_in(void)
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index 78838c6..68fab94 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -67,6 +67,10 @@ static int page_pool_init(struct page_pool *pool,
>   		 */
>   	}
>   
> +	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
> +	    pool->p.flags & PP_FLAG_PAGE_FRAG)
> +		return -EINVAL;

I read this as: if the page_pool use pp_frag_count and have flag set, 
then it is invalid/no-allowed, which seems wrong.

I find this code more intuitive to read:

  +	if (PAGE_POOL_DMA_CANNOT_USE_PP_FRAG_COUNT &&
  +	    pool->p.flags & PP_FLAG_PAGE_FRAG)
  +		return -EINVAL;

--Jesper
Yunsheng Lin Aug. 11, 2021, 12:48 a.m. UTC | #2
On 2021/8/10 22:58, Jesper Dangaard Brouer wrote:
> 
> 
> On 06/08/2021 04.46, Yunsheng Lin wrote:
>> For 32 bit systems with 64 bit dma, dma_addr[1] is used to
>> store the upper 32 bit dma addr, those system should be rare
>> those days.
>>
>> For normal system, the dma_addr[1] in 'struct page' is not
>> used, so we can reuse dma_addr[1] for storing frag count,
>> which means how many frags this page might be splited to.
>>
>> In order to simplify the page frag support in the page pool,
>> the PAGE_POOL_DMA_USE_PP_FRAG_COUNT macro is added to indicate
>> the 32 bit systems with 64 bit dma, and the page frag support
>> in page pool is disabled for such system.
>>
>> The newly added page_pool_set_frag_count() is called to reserve
>> the maximum frag count before any page frag is passed to the
>> user. The page_pool_atomic_sub_frag_count_return() is called
>> when user is done with the page frag.
>>
>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
>> ---
>>   include/linux/mm_types.h | 18 +++++++++++++-----
>>   include/net/page_pool.h  | 46 +++++++++++++++++++++++++++++++++++++++-------
>>   net/core/page_pool.c     |  4 ++++
>>   3 files changed, 56 insertions(+), 12 deletions(-)
>>
>> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
>> index 52bbd2b..7f8ee09 100644
>> --- a/include/linux/mm_types.h
>> +++ b/include/linux/mm_types.h
>> @@ -103,11 +103,19 @@ struct page {
>>               unsigned long pp_magic;
>>               struct page_pool *pp;
>>               unsigned long _pp_mapping_pad;
>> -            /**
>> -             * @dma_addr: might require a 64-bit value on
>> -             * 32-bit architectures.
>> -             */
>> -            unsigned long dma_addr[2];
>> +            unsigned long dma_addr;
>> +            union {
>> +                /**
>> +                 * dma_addr_upper: might require a 64-bit
>> +                 * value on 32-bit architectures.
>> +                 */
>> +                unsigned long dma_addr_upper;
>> +                /**
>> +                 * For frag page support, not supported in
>> +                 * 32-bit architectures with 64-bit DMA.
>> +                 */
>> +                atomic_long_t pp_frag_count;
>> +            };
>>           };
>>           struct {    /* slab, slob and slub */
>>               union {
>> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
>> index 8d7744d..42e6997 100644
>> --- a/include/net/page_pool.h
>> +++ b/include/net/page_pool.h
>> @@ -45,7 +45,10 @@
>>                       * Please note DMA-sync-for-CPU is still
>>                       * device driver responsibility
>>                       */
>> -#define PP_FLAG_ALL        (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
>> +#define PP_FLAG_PAGE_FRAG    BIT(2) /* for page frag feature */
>> +#define PP_FLAG_ALL        (PP_FLAG_DMA_MAP |\
>> +                 PP_FLAG_DMA_SYNC_DEV |\
>> +                 PP_FLAG_PAGE_FRAG)
>>     /*
>>    * Fast allocation side cache array/stack
>> @@ -198,19 +201,48 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
>>       page_pool_put_full_page(pool, page, true);
>>   }
>>   +#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT    \
>> +        (sizeof(dma_addr_t) > sizeof(unsigned long))
>> +
>>   static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>>   {
>> -    dma_addr_t ret = page->dma_addr[0];
>> -    if (sizeof(dma_addr_t) > sizeof(unsigned long))
>> -        ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
>> +    dma_addr_t ret = page->dma_addr;
>> +
>> +    if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
>> +        ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
> 
> I find the macro name confusing.
> 
> I think it would be easier to read the code, if it was called:
>  PAGE_POOL_DMA_CANNOT_USE_PP_FRAG_COUNT

Actually, there is a *DMA* in tha above macro, which means DMA
addr uses the PP_FRAG_COUNT field.
Perhaps PAGE_POOL_DMA_ADDR_UPPER_USE_PP_FRAG_COUNT is more obvious
here?

> 
>> +
>>       return ret;
>>   }
>>     static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
>>   {
>> -    page->dma_addr[0] = addr;
>> -    if (sizeof(dma_addr_t) > sizeof(unsigned long))
>> -        page->dma_addr[1] = upper_32_bits(addr);
>> +    page->dma_addr = addr;
>> +    if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
>> +        page->dma_addr_upper = upper_32_bits(addr);
>> +}
>> +
>> +static inline void page_pool_set_frag_count(struct page *page, long nr)
>> +{
>> +    atomic_long_set(&page->pp_frag_count, nr);
>> +}
>> +
>> +static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
>> +                              long nr)
>> +{
>> +    long ret;
>> +
>> +    /* As suggested by Alexander, atomic_long_read() may cover up the
>> +     * reference count errors, so avoid calling atomic_long_read() in
>> +     * the cases of freeing or draining the page_frags, where we would
>> +     * not expect it to match or that are slowpath anyway.
>> +     */
>> +    if (__builtin_constant_p(nr) &&
>> +        atomic_long_read(&page->pp_frag_count) == nr)
>> +        return 0;
>> +
>> +    ret = atomic_long_sub_return(nr, &page->pp_frag_count);
>> +    WARN_ON(ret < 0);
>> +    return ret;
>>   }
>>     static inline bool is_page_pool_compiled_in(void)
>> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
>> index 78838c6..68fab94 100644
>> --- a/net/core/page_pool.c
>> +++ b/net/core/page_pool.c
>> @@ -67,6 +67,10 @@ static int page_pool_init(struct page_pool *pool,
>>            */
>>       }
>>   +    if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
>> +        pool->p.flags & PP_FLAG_PAGE_FRAG)
>> +        return -EINVAL;
> 
> I read this as: if the page_pool use pp_frag_count and have flag set, then it is invalid/no-allowed, which seems wrong.
> 
> I find this code more intuitive to read:
> 
>  +    if (PAGE_POOL_DMA_CANNOT_USE_PP_FRAG_COUNT &&
>  +        pool->p.flags & PP_FLAG_PAGE_FRAG)
>  +        return -EINVAL;
> 
> --Jesper
> 
> .
>
Jesper Dangaard Brouer Aug. 12, 2021, 3:17 p.m. UTC | #3
On 06/08/2021 04.46, Yunsheng Lin wrote:
> +static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
> +							  long nr)
> +{
> +	long ret;
> +
> +	/* As suggested by Alexander, atomic_long_read() may cover up the
> +	 * reference count errors, so avoid calling atomic_long_read() in
> +	 * the cases of freeing or draining the page_frags, where we would
> +	 * not expect it to match or that are slowpath anyway.
> +	 */
> +	if (__builtin_constant_p(nr) &&
> +	    atomic_long_read(&page->pp_frag_count) == nr)
> +		return 0;
> +
> +	ret = atomic_long_sub_return(nr, &page->pp_frag_count);
> +	WARN_ON(ret < 0);

I worried about this WARN_ON() as it generates an 'ud2' instruction 
which influence I-cache fetching.  But I have disassembled (objdump) the 
page_pool.o binary and the ud2 gets placed last in the main function 
page_pool_put_page() that use this inlined function.
Thus, I assume this is not a problem :-)


> +	return ret;
diff mbox series

Patch

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 52bbd2b..7f8ee09 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -103,11 +103,19 @@  struct page {
 			unsigned long pp_magic;
 			struct page_pool *pp;
 			unsigned long _pp_mapping_pad;
-			/**
-			 * @dma_addr: might require a 64-bit value on
-			 * 32-bit architectures.
-			 */
-			unsigned long dma_addr[2];
+			unsigned long dma_addr;
+			union {
+				/**
+				 * dma_addr_upper: might require a 64-bit
+				 * value on 32-bit architectures.
+				 */
+				unsigned long dma_addr_upper;
+				/**
+				 * For frag page support, not supported in
+				 * 32-bit architectures with 64-bit DMA.
+				 */
+				atomic_long_t pp_frag_count;
+			};
 		};
 		struct {	/* slab, slob and slub */
 			union {
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 8d7744d..42e6997 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -45,7 +45,10 @@ 
 					* Please note DMA-sync-for-CPU is still
 					* device driver responsibility
 					*/
-#define PP_FLAG_ALL		(PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
+#define PP_FLAG_PAGE_FRAG	BIT(2) /* for page frag feature */
+#define PP_FLAG_ALL		(PP_FLAG_DMA_MAP |\
+				 PP_FLAG_DMA_SYNC_DEV |\
+				 PP_FLAG_PAGE_FRAG)
 
 /*
  * Fast allocation side cache array/stack
@@ -198,19 +201,48 @@  static inline void page_pool_recycle_direct(struct page_pool *pool,
 	page_pool_put_full_page(pool, page, true);
 }
 
+#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT	\
+		(sizeof(dma_addr_t) > sizeof(unsigned long))
+
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
-	dma_addr_t ret = page->dma_addr[0];
-	if (sizeof(dma_addr_t) > sizeof(unsigned long))
-		ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
+	dma_addr_t ret = page->dma_addr;
+
+	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+		ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+
 	return ret;
 }
 
 static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
 {
-	page->dma_addr[0] = addr;
-	if (sizeof(dma_addr_t) > sizeof(unsigned long))
-		page->dma_addr[1] = upper_32_bits(addr);
+	page->dma_addr = addr;
+	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+		page->dma_addr_upper = upper_32_bits(addr);
+}
+
+static inline void page_pool_set_frag_count(struct page *page, long nr)
+{
+	atomic_long_set(&page->pp_frag_count, nr);
+}
+
+static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
+							  long nr)
+{
+	long ret;
+
+	/* As suggested by Alexander, atomic_long_read() may cover up the
+	 * reference count errors, so avoid calling atomic_long_read() in
+	 * the cases of freeing or draining the page_frags, where we would
+	 * not expect it to match or that are slowpath anyway.
+	 */
+	if (__builtin_constant_p(nr) &&
+	    atomic_long_read(&page->pp_frag_count) == nr)
+		return 0;
+
+	ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+	WARN_ON(ret < 0);
+	return ret;
 }
 
 static inline bool is_page_pool_compiled_in(void)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 78838c6..68fab94 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -67,6 +67,10 @@  static int page_pool_init(struct page_pool *pool,
 		 */
 	}
 
+	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
+	    pool->p.flags & PP_FLAG_PAGE_FRAG)
+		return -EINVAL;
+
 	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
 		return -ENOMEM;