diff mbox series

[v6,net-next,1/5] net: add generic per-cpu page_pool allocator

Message ID 5b0222d3df382c22fe0fa96154ae7b27189f7ecd.1706451150.git.lorenzo@kernel.org (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series add multi-buff support for xdp running in generic mode | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 1216 this patch: 1217
netdev/build_tools success Errors and warnings before: 1 this patch: 0
netdev/cc_maintainers success CCed 0 of 0 maintainers
netdev/build_clang success Errors and warnings before: 1082 this patch: 1082
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 1247 this patch: 1248
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 152 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Lorenzo Bianconi Jan. 28, 2024, 2:20 p.m. UTC
Introduce generic percpu page_pools allocator.
Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
in order to recycle the page in the page_pool "hot" cache if
napi_pp_put_page() is running on the same cpu.
This is a preliminary patch to add xdp multi-buff support for xdp running
in generic mode.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 include/net/page_pool/types.h |  3 +++
 net/core/dev.c                | 40 +++++++++++++++++++++++++++++++++++
 net/core/page_pool.c          | 23 ++++++++++++++++----
 net/core/skbuff.c             |  5 +++--
 4 files changed, 65 insertions(+), 6 deletions(-)

Comments

kernel test robot Jan. 29, 2024, 6:37 a.m. UTC | #1
Hi Lorenzo,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Lorenzo-Bianconi/net-add-generic-per-cpu-page_pool-allocator/20240128-222506
base:   net-next/main
patch link:    https://lore.kernel.org/r/5b0222d3df382c22fe0fa96154ae7b27189f7ecd.1706451150.git.lorenzo%40kernel.org
patch subject: [PATCH v6 net-next 1/5] net: add generic per-cpu page_pool allocator
config: sparc-randconfig-r123-20240129 (https://download.01.org/0day-ci/archive/20240129/202401291436.jz59b9EZ-lkp@intel.com/config)
compiler: sparc-linux-gcc (GCC) 13.2.0
reproduce: (https://download.01.org/0day-ci/archive/20240129/202401291436.jz59b9EZ-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202401291436.jz59b9EZ-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> net/core/dev.c:447:1: sparse: sparse: symbol '__pcpu_scope_page_pool' was not declared. Should it be static?
   net/core/dev.c:3352:23: sparse: sparse: incorrect type in argument 4 (different base types) @@     expected restricted __wsum [usertype] csum @@     got unsigned int @@
   net/core/dev.c:3352:23: sparse:     expected restricted __wsum [usertype] csum
   net/core/dev.c:3352:23: sparse:     got unsigned int
   net/core/dev.c:3352:23: sparse: sparse: incorrect type in argument 4 (different base types) @@     expected restricted __wsum [usertype] csum @@     got unsigned int @@
   net/core/dev.c:3352:23: sparse:     expected restricted __wsum [usertype] csum
   net/core/dev.c:3352:23: sparse:     got unsigned int
   net/core/dev.c:3352:23: sparse: sparse: incorrect type in argument 1 (different base types) @@     expected unsigned int [usertype] val @@     got restricted __wsum @@
   net/core/dev.c:3352:23: sparse:     expected unsigned int [usertype] val
   net/core/dev.c:3352:23: sparse:     got restricted __wsum
   net/core/dev.c:3352:23: sparse: sparse: incorrect type in argument 4 (different base types) @@     expected restricted __wsum [usertype] csum @@     got unsigned int @@
   net/core/dev.c:3352:23: sparse:     expected restricted __wsum [usertype] csum
   net/core/dev.c:3352:23: sparse:     got unsigned int
   net/core/dev.c:3352:23: sparse: sparse: cast from restricted __wsum
   net/core/dev.c:3352:23: sparse: sparse: incorrect type in argument 4 (different base types) @@     expected restricted __wsum [usertype] csum @@     got unsigned int @@
   net/core/dev.c:3352:23: sparse:     expected restricted __wsum [usertype] csum
   net/core/dev.c:3352:23: sparse:     got unsigned int
   net/core/dev.c:3352:23: sparse: sparse: cast from restricted __wsum
   net/core/dev.c:3352:23: sparse: sparse: incorrect type in argument 4 (different base types) @@     expected restricted __wsum [usertype] csum @@     got unsigned int @@
   net/core/dev.c:3352:23: sparse:     expected restricted __wsum [usertype] csum
   net/core/dev.c:3352:23: sparse:     got unsigned int
   net/core/dev.c:3352:23: sparse: sparse: cast from restricted __wsum
   net/core/dev.c:3352:23: sparse: sparse: incorrect type in argument 4 (different base types) @@     expected restricted __wsum [usertype] csum @@     got unsigned int @@
   net/core/dev.c:3352:23: sparse:     expected restricted __wsum [usertype] csum
   net/core/dev.c:3352:23: sparse:     got unsigned int
   net/core/dev.c:3352:23: sparse: sparse: cast from restricted __wsum
   net/core/dev.c: note: in included file (through include/linux/mmzone.h, include/linux/gfp.h, include/linux/xarray.h, ...):
   include/linux/page-flags.h:242:46: sparse: sparse: self-comparison always evaluates to false
   net/core/dev.c:205:9: sparse: sparse: context imbalance in 'unlist_netdevice' - different lock contexts for basic block
   net/core/dev.c:3792:17: sparse: sparse: context imbalance in '__dev_queue_xmit' - different lock contexts for basic block
   net/core/dev.c:5172:17: sparse: sparse: context imbalance in 'net_tx_action' - different lock contexts for basic block
   net/core/dev.c:8833:38: sparse: sparse: self-comparison always evaluates to false

vim +/__pcpu_scope_page_pool +447 net/core/dev.c

   446	
 > 447	DEFINE_PER_CPU_ALIGNED(struct page_pool *, page_pool);
   448
Yunsheng Lin Jan. 29, 2024, 12:05 p.m. UTC | #2
On 2024/1/28 22:20, Lorenzo Bianconi wrote:

>  #ifdef CONFIG_LOCKDEP
>  /*
>   * register_netdevice() inits txq->_xmit_lock and sets lockdep class
> @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
>   *
>   */
>  
> +#define SD_PAGE_POOL_RING_SIZE	256

I might missed that if there is a reason we choose 256 here, do we
need to use different value for differe page size, for 64K page size,
it means we might need to reserve 16MB memory for each CPU.

> +static int net_page_pool_alloc(int cpuid)
> +{
> +#if IS_ENABLED(CONFIG_PAGE_POOL)
> +	struct page_pool_params page_pool_params = {
> +		.pool_size = SD_PAGE_POOL_RING_SIZE,
> +		.nid = NUMA_NO_NODE,
> +	};
> +	struct page_pool *pp_ptr;
> +
> +	pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
> +	if (IS_ERR(pp_ptr)) {
> +		pp_ptr = NULL;

unnecessary NULL setting?

> +		return -ENOMEM;
> +	}
> +
> +	per_cpu(page_pool, cpuid) = pp_ptr;
> +#endif
> +	return 0;
> +}
> +
>  /*
>   *       This is called single threaded during boot, so no need
>   *       to take the rtnl semaphore.
> @@ -11738,6 +11763,9 @@ static int __init net_dev_init(void)
>  		init_gro_hash(&sd->backlog);
>  		sd->backlog.poll = process_backlog;
>  		sd->backlog.weight = weight_p;
> +
> +		if (net_page_pool_alloc(i))
> +			goto out;
>  	}
>  
>  	dev_boot_phase = 0;
> @@ -11765,6 +11793,18 @@ static int __init net_dev_init(void)
>  	WARN_ON(rc < 0);
>  	rc = 0;
>  out:
> +	if (rc < 0) {
> +		for_each_possible_cpu(i) {
> +			struct page_pool *pp_ptr = this_cpu_read(page_pool);

this_cpu_read() -> per_cpu_ptr()?

> +
> +			if (!pp_ptr)
> +				continue;
> +
> +			page_pool_destroy(pp_ptr);
> +			per_cpu(page_pool, i) = NULL;
> +		}
> +	}
> +
>  	return rc;
>  }
Toke Høiland-Jørgensen Jan. 29, 2024, 12:45 p.m. UTC | #3
Lorenzo Bianconi <lorenzo@kernel.org> writes:

> Introduce generic percpu page_pools allocator.
> Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
> in order to recycle the page in the page_pool "hot" cache if
> napi_pp_put_page() is running on the same cpu.
> This is a preliminary patch to add xdp multi-buff support for xdp running
> in generic mode.
>
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> ---
>  include/net/page_pool/types.h |  3 +++
>  net/core/dev.c                | 40 +++++++++++++++++++++++++++++++++++
>  net/core/page_pool.c          | 23 ++++++++++++++++----
>  net/core/skbuff.c             |  5 +++--
>  4 files changed, 65 insertions(+), 6 deletions(-)
>
> diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> index 76481c465375..3828396ae60c 100644
> --- a/include/net/page_pool/types.h
> +++ b/include/net/page_pool/types.h
> @@ -128,6 +128,7 @@ struct page_pool_stats {
>  struct page_pool {
>  	struct page_pool_params_fast p;
>  
> +	int cpuid;
>  	bool has_init_callback;
>  
>  	long frag_users;
> @@ -203,6 +204,8 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
>  struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
>  				  unsigned int size, gfp_t gfp);
>  struct page_pool *page_pool_create(const struct page_pool_params *params);
> +struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
> +					  int cpuid);
>  
>  struct xdp_mem_info;
>  
> diff --git a/net/core/dev.c b/net/core/dev.c
> index cb2dab0feee0..bf9ec740b09a 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -153,6 +153,8 @@
>  #include <linux/prandom.h>
>  #include <linux/once_lite.h>
>  #include <net/netdev_rx_queue.h>
> +#include <net/page_pool/types.h>
> +#include <net/page_pool/helpers.h>
>  
>  #include "dev.h"
>  #include "net-sysfs.h"
> @@ -442,6 +444,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
>  DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
>  EXPORT_PER_CPU_SYMBOL(softnet_data);
>  
> +DEFINE_PER_CPU_ALIGNED(struct page_pool *, page_pool);

I think we should come up with a better name than just "page_pool" for
this global var. In the code below it looks like it's a local variable
that's being referenced. Maybe "global_page_pool" or "system_page_pool"
or something along those lines?

-Toke
Lorenzo Bianconi Jan. 29, 2024, 12:52 p.m. UTC | #4
> Lorenzo Bianconi <lorenzo@kernel.org> writes:
> 
> > Introduce generic percpu page_pools allocator.
> > Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
> > in order to recycle the page in the page_pool "hot" cache if
> > napi_pp_put_page() is running on the same cpu.
> > This is a preliminary patch to add xdp multi-buff support for xdp running
> > in generic mode.
> >
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > ---
> >  include/net/page_pool/types.h |  3 +++
> >  net/core/dev.c                | 40 +++++++++++++++++++++++++++++++++++
> >  net/core/page_pool.c          | 23 ++++++++++++++++----
> >  net/core/skbuff.c             |  5 +++--
> >  4 files changed, 65 insertions(+), 6 deletions(-)
> >
> > diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> > index 76481c465375..3828396ae60c 100644
> > --- a/include/net/page_pool/types.h
> > +++ b/include/net/page_pool/types.h
> > @@ -128,6 +128,7 @@ struct page_pool_stats {
> >  struct page_pool {
> >  	struct page_pool_params_fast p;
> >  
> > +	int cpuid;
> >  	bool has_init_callback;
> >  
> >  	long frag_users;
> > @@ -203,6 +204,8 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
> >  struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
> >  				  unsigned int size, gfp_t gfp);
> >  struct page_pool *page_pool_create(const struct page_pool_params *params);
> > +struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
> > +					  int cpuid);
> >  
> >  struct xdp_mem_info;
> >  
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index cb2dab0feee0..bf9ec740b09a 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -153,6 +153,8 @@
> >  #include <linux/prandom.h>
> >  #include <linux/once_lite.h>
> >  #include <net/netdev_rx_queue.h>
> > +#include <net/page_pool/types.h>
> > +#include <net/page_pool/helpers.h>
> >  
> >  #include "dev.h"
> >  #include "net-sysfs.h"
> > @@ -442,6 +444,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
> >  DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
> >  EXPORT_PER_CPU_SYMBOL(softnet_data);
> >  
> > +DEFINE_PER_CPU_ALIGNED(struct page_pool *, page_pool);
> 
> I think we should come up with a better name than just "page_pool" for
> this global var. In the code below it looks like it's a local variable
> that's being referenced. Maybe "global_page_pool" or "system_page_pool"
> or something along those lines?

ack, I will fix it. system_page_pool seems better, agree?

Regards,
Lorenzo

> 
> -Toke
>
Lorenzo Bianconi Jan. 29, 2024, 1:04 p.m. UTC | #5
> On 2024/1/28 22:20, Lorenzo Bianconi wrote:
> 
> >  #ifdef CONFIG_LOCKDEP
> >  /*
> >   * register_netdevice() inits txq->_xmit_lock and sets lockdep class
> > @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
> >   *
> >   */
> >  
> > +#define SD_PAGE_POOL_RING_SIZE	256
> 
> I might missed that if there is a reason we choose 256 here, do we
> need to use different value for differe page size, for 64K page size,
> it means we might need to reserve 16MB memory for each CPU.

honestly I have not spent time on it, most of the current page_pool users set
pool_size to 256. Anyway, do you mean something like:

diff --git a/net/core/dev.c b/net/core/dev.c
index f70fb6cad2b2..3934a3fc5c45 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11806,12 +11806,11 @@ static void __init net_dev_struct_check(void)
  *
  */
 
-#define SD_PAGE_POOL_RING_SIZE	256
 static int net_page_pool_alloc(int cpuid)
 {
 #if IS_ENABLED(CONFIG_PAGE_POOL)
 	struct page_pool_params page_pool_params = {
-		.pool_size = SD_PAGE_POOL_RING_SIZE,
+		.pool_size = PAGE_SIZE < SZ_64K ? 256 : 16,
 		.nid = NUMA_NO_NODE,
 	};
 	struct page_pool *pp_ptr;

> 
> > +static int net_page_pool_alloc(int cpuid)
> > +{
> > +#if IS_ENABLED(CONFIG_PAGE_POOL)
> > +	struct page_pool_params page_pool_params = {
> > +		.pool_size = SD_PAGE_POOL_RING_SIZE,
> > +		.nid = NUMA_NO_NODE,
> > +	};
> > +	struct page_pool *pp_ptr;
> > +
> > +	pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
> > +	if (IS_ERR(pp_ptr)) {
> > +		pp_ptr = NULL;
> 
> unnecessary NULL setting?

ack, I will get rid of it.

> 
> > +		return -ENOMEM;
> > +	}
> > +
> > +	per_cpu(page_pool, cpuid) = pp_ptr;
> > +#endif
> > +	return 0;
> > +}
> > +
> >  /*
> >   *       This is called single threaded during boot, so no need
> >   *       to take the rtnl semaphore.
> > @@ -11738,6 +11763,9 @@ static int __init net_dev_init(void)
> >  		init_gro_hash(&sd->backlog);
> >  		sd->backlog.poll = process_backlog;
> >  		sd->backlog.weight = weight_p;
> > +
> > +		if (net_page_pool_alloc(i))
> > +			goto out;
> >  	}
> >  
> >  	dev_boot_phase = 0;
> > @@ -11765,6 +11793,18 @@ static int __init net_dev_init(void)
> >  	WARN_ON(rc < 0);
> >  	rc = 0;
> >  out:
> > +	if (rc < 0) {
> > +		for_each_possible_cpu(i) {
> > +			struct page_pool *pp_ptr = this_cpu_read(page_pool);
> 
> this_cpu_read() -> per_cpu_ptr()?

ack, I will fix it.

Regards,
Lorenzo

> 
> > +
> > +			if (!pp_ptr)
> > +				continue;
> > +
> > +			page_pool_destroy(pp_ptr);
> > +			per_cpu(page_pool, i) = NULL;
> > +		}
> > +	}
> > +
> >  	return rc;
> >  }
> 
>
Toke Høiland-Jørgensen Jan. 29, 2024, 3:44 p.m. UTC | #6
Lorenzo Bianconi <lorenzo.bianconi@redhat.com> writes:

>> Lorenzo Bianconi <lorenzo@kernel.org> writes:
>> 
>> > Introduce generic percpu page_pools allocator.
>> > Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
>> > in order to recycle the page in the page_pool "hot" cache if
>> > napi_pp_put_page() is running on the same cpu.
>> > This is a preliminary patch to add xdp multi-buff support for xdp running
>> > in generic mode.
>> >
>> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
>> > ---
>> >  include/net/page_pool/types.h |  3 +++
>> >  net/core/dev.c                | 40 +++++++++++++++++++++++++++++++++++
>> >  net/core/page_pool.c          | 23 ++++++++++++++++----
>> >  net/core/skbuff.c             |  5 +++--
>> >  4 files changed, 65 insertions(+), 6 deletions(-)
>> >
>> > diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
>> > index 76481c465375..3828396ae60c 100644
>> > --- a/include/net/page_pool/types.h
>> > +++ b/include/net/page_pool/types.h
>> > @@ -128,6 +128,7 @@ struct page_pool_stats {
>> >  struct page_pool {
>> >  	struct page_pool_params_fast p;
>> >  
>> > +	int cpuid;
>> >  	bool has_init_callback;
>> >  
>> >  	long frag_users;
>> > @@ -203,6 +204,8 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
>> >  struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
>> >  				  unsigned int size, gfp_t gfp);
>> >  struct page_pool *page_pool_create(const struct page_pool_params *params);
>> > +struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
>> > +					  int cpuid);
>> >  
>> >  struct xdp_mem_info;
>> >  
>> > diff --git a/net/core/dev.c b/net/core/dev.c
>> > index cb2dab0feee0..bf9ec740b09a 100644
>> > --- a/net/core/dev.c
>> > +++ b/net/core/dev.c
>> > @@ -153,6 +153,8 @@
>> >  #include <linux/prandom.h>
>> >  #include <linux/once_lite.h>
>> >  #include <net/netdev_rx_queue.h>
>> > +#include <net/page_pool/types.h>
>> > +#include <net/page_pool/helpers.h>
>> >  
>> >  #include "dev.h"
>> >  #include "net-sysfs.h"
>> > @@ -442,6 +444,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
>> >  DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
>> >  EXPORT_PER_CPU_SYMBOL(softnet_data);
>> >  
>> > +DEFINE_PER_CPU_ALIGNED(struct page_pool *, page_pool);
>> 
>> I think we should come up with a better name than just "page_pool" for
>> this global var. In the code below it looks like it's a local variable
>> that's being referenced. Maybe "global_page_pool" or "system_page_pool"
>> or something along those lines?
>
> ack, I will fix it. system_page_pool seems better, agree?

Yeah, agreed :)

-Toke
Yunsheng Lin Jan. 30, 2024, 11:02 a.m. UTC | #7
On 2024/1/29 21:04, Lorenzo Bianconi wrote:
>> On 2024/1/28 22:20, Lorenzo Bianconi wrote:
>>
>>>  #ifdef CONFIG_LOCKDEP
>>>  /*
>>>   * register_netdevice() inits txq->_xmit_lock and sets lockdep class
>>> @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
>>>   *
>>>   */
>>>  
>>> +#define SD_PAGE_POOL_RING_SIZE	256
>>
>> I might missed that if there is a reason we choose 256 here, do we
>> need to use different value for differe page size, for 64K page size,
>> it means we might need to reserve 16MB memory for each CPU.
> 
> honestly I have not spent time on it, most of the current page_pool users set
> pool_size to 256. Anyway, do you mean something like:
> 
> diff --git a/net/core/dev.c b/net/core/dev.c
> index f70fb6cad2b2..3934a3fc5c45 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -11806,12 +11806,11 @@ static void __init net_dev_struct_check(void)
>   *
>   */
>  
> -#define SD_PAGE_POOL_RING_SIZE	256
>  static int net_page_pool_alloc(int cpuid)
>  {
>  #if IS_ENABLED(CONFIG_PAGE_POOL)

Isn't better to have a config like CONFIG_PER_CPU_PAGE_POOL to enable
this feature? and this config can be selected by whoever needs this
feature?

>  	struct page_pool_params page_pool_params = {
> -		.pool_size = SD_PAGE_POOL_RING_SIZE,
> +		.pool_size = PAGE_SIZE < SZ_64K ? 256 : 16,

What about other page size? like 16KB?
How about something like below:
PAGE_SIZE << get_order(PER_CPU_PAGE_POOL_MAX_SIZE)

>  		.nid = NUMA_NO_NODE,
>  	};
>  	struct page_pool *pp_ptr;
Lorenzo Bianconi Jan. 30, 2024, 2:26 p.m. UTC | #8
> On 2024/1/29 21:04, Lorenzo Bianconi wrote:
> >> On 2024/1/28 22:20, Lorenzo Bianconi wrote:
> >>
> >>>  #ifdef CONFIG_LOCKDEP
> >>>  /*
> >>>   * register_netdevice() inits txq->_xmit_lock and sets lockdep class
> >>> @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
> >>>   *
> >>>   */
> >>>  
> >>> +#define SD_PAGE_POOL_RING_SIZE	256
> >>
> >> I might missed that if there is a reason we choose 256 here, do we
> >> need to use different value for differe page size, for 64K page size,
> >> it means we might need to reserve 16MB memory for each CPU.
> > 
> > honestly I have not spent time on it, most of the current page_pool users set
> > pool_size to 256. Anyway, do you mean something like:
> > 
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index f70fb6cad2b2..3934a3fc5c45 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -11806,12 +11806,11 @@ static void __init net_dev_struct_check(void)
> >   *
> >   */
> >  
> > -#define SD_PAGE_POOL_RING_SIZE	256
> >  static int net_page_pool_alloc(int cpuid)
> >  {
> >  #if IS_ENABLED(CONFIG_PAGE_POOL)
> 
> Isn't better to have a config like CONFIG_PER_CPU_PAGE_POOL to enable
> this feature? and this config can be selected by whoever needs this
> feature?

since it will be used for generic xdp (at least) I think this will be 99%
enabled when we have bpf enabled, right?

> 
> >  	struct page_pool_params page_pool_params = {
> > -		.pool_size = SD_PAGE_POOL_RING_SIZE,
> > +		.pool_size = PAGE_SIZE < SZ_64K ? 256 : 16,
> 
> What about other page size? like 16KB?
> How about something like below:
> PAGE_SIZE << get_order(PER_CPU_PAGE_POOL_MAX_SIZE)

since pool_size is the number of elements in the ptr_ring associated to the pool,
assuming we want to consume PER_CPU_PAGE_POOL_MAX_SIZE for each cpu, something
like:

PER_CPU_PAGE_POOL_MAX_SIZE / PAGE_SIZE

Regards,
Lorenzo

> 
> >  		.nid = NUMA_NO_NODE,
> >  	};
> >  	struct page_pool *pp_ptr;
>
Jesper Dangaard Brouer Jan. 31, 2024, 12:28 p.m. UTC | #9
On 28/01/2024 15.20, Lorenzo Bianconi wrote:
> Introduce generic percpu page_pools allocator.
> Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
> in order to recycle the page in the page_pool "hot" cache if
> napi_pp_put_page() is running on the same cpu.
> This is a preliminary patch to add xdp multi-buff support for xdp running
> in generic mode.
> 
> Signed-off-by: Lorenzo Bianconi<lorenzo@kernel.org>
> ---
>   include/net/page_pool/types.h |  3 +++
>   net/core/dev.c                | 40 +++++++++++++++++++++++++++++++++++
>   net/core/page_pool.c          | 23 ++++++++++++++++----
>   net/core/skbuff.c             |  5 +++--
>   4 files changed, 65 insertions(+), 6 deletions(-)
> 
[...]
> diff --git a/net/core/dev.c b/net/core/dev.c
> index cb2dab0feee0..bf9ec740b09a 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
[...]
> @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
>    *
>    */
>   
> +#define SD_PAGE_POOL_RING_SIZE	256
> +static int net_page_pool_alloc(int cpuid)

I don't like the name net_page_pool_alloc().
It uses the page_pool_create APIs.

Let us renamed to net_page_pool_create() ?


> +{
> +#if IS_ENABLED(CONFIG_PAGE_POOL)
> +	struct page_pool_params page_pool_params = {
> +		.pool_size = SD_PAGE_POOL_RING_SIZE,
> +		.nid = NUMA_NO_NODE,
> +	};
> +	struct page_pool *pp_ptr;
> +
> +	pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
> +	if (IS_ERR(pp_ptr)) {
> +		pp_ptr = NULL;
> +		return -ENOMEM;
> +	}
> +
> +	per_cpu(page_pool, cpuid) = pp_ptr;
> +#endif
> +	return 0;
> +}
> +
>   /*
>    *       This is called single threaded during boot, so no need
>    *       to take the rtnl semaphore.
> @@ -11738,6 +11763,9 @@ static int __init net_dev_init(void)
>   		init_gro_hash(&sd->backlog);
>   		sd->backlog.poll = process_backlog;
>   		sd->backlog.weight = weight_p;
> +
> +		if (net_page_pool_alloc(i))
> +			goto out;
>   	}
>   
>   	dev_boot_phase = 0;
> @@ -11765,6 +11793,18 @@ static int __init net_dev_init(void)
>   	WARN_ON(rc < 0);
>   	rc = 0;
>   out:
> +	if (rc < 0) {
> +		for_each_possible_cpu(i) {
> +			struct page_pool *pp_ptr = this_cpu_read(page_pool);
> +
> +			if (!pp_ptr)
> +				continue;
> +
> +			page_pool_destroy(pp_ptr);
> +			per_cpu(page_pool, i) = NULL;
> +		}
> +	}
> +
>   	return rc;
>   }
>
Jesper Dangaard Brouer Jan. 31, 2024, 12:51 p.m. UTC | #10
On 29/01/2024 16.44, Toke Høiland-Jørgensen wrote:
> Lorenzo Bianconi <lorenzo.bianconi@redhat.com> writes:
> 
>>> Lorenzo Bianconi <lorenzo@kernel.org> writes:
>>>
>>>> Introduce generic percpu page_pools allocator.
>>>> Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
>>>> in order to recycle the page in the page_pool "hot" cache if
>>>> napi_pp_put_page() is running on the same cpu.
>>>> This is a preliminary patch to add xdp multi-buff support for xdp running
>>>> in generic mode.
>>>>
>>>> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
>>>> ---
>>>>   include/net/page_pool/types.h |  3 +++
>>>>   net/core/dev.c                | 40 +++++++++++++++++++++++++++++++++++
>>>>   net/core/page_pool.c          | 23 ++++++++++++++++----
>>>>   net/core/skbuff.c             |  5 +++--
>>>>   4 files changed, 65 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
>>>> index 76481c465375..3828396ae60c 100644
>>>> --- a/include/net/page_pool/types.h
>>>> +++ b/include/net/page_pool/types.h
>>>> @@ -128,6 +128,7 @@ struct page_pool_stats {
>>>>   struct page_pool {
>>>>   	struct page_pool_params_fast p;
>>>>   
>>>> +	int cpuid;
>>>>   	bool has_init_callback;
>>>>   
>>>>   	long frag_users;
>>>> @@ -203,6 +204,8 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
>>>>   struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
>>>>   				  unsigned int size, gfp_t gfp);
>>>>   struct page_pool *page_pool_create(const struct page_pool_params *params);
>>>> +struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
>>>> +					  int cpuid);
>>>>   
>>>>   struct xdp_mem_info;
>>>>   
>>>> diff --git a/net/core/dev.c b/net/core/dev.c
>>>> index cb2dab0feee0..bf9ec740b09a 100644
>>>> --- a/net/core/dev.c
>>>> +++ b/net/core/dev.c
>>>> @@ -153,6 +153,8 @@
>>>>   #include <linux/prandom.h>
>>>>   #include <linux/once_lite.h>
>>>>   #include <net/netdev_rx_queue.h>
>>>> +#include <net/page_pool/types.h>
>>>> +#include <net/page_pool/helpers.h>
>>>>   
>>>>   #include "dev.h"
>>>>   #include "net-sysfs.h"
>>>> @@ -442,6 +444,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
>>>>   DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
>>>>   EXPORT_PER_CPU_SYMBOL(softnet_data);
>>>>   
>>>> +DEFINE_PER_CPU_ALIGNED(struct page_pool *, page_pool);
>>>
>>> I think we should come up with a better name than just "page_pool" for
>>> this global var. In the code below it looks like it's a local variable
>>> that's being referenced. Maybe "global_page_pool" or "system_page_pool"
>>> or something along those lines?
>>
>> ack, I will fix it. system_page_pool seems better, agree?
> 
> Yeah, agreed :)

Naming it "system_page_pool" is good by me.

Should we add some comments about concurrency expectations when using this?
Or is this implied by "PER_CPU" define?

PP alloc side have a lockless array/stack, and the per_cpu stuff do
already imply only one CPU is accessing this, and implicitly (also) we
cannot handle reentrance cause by preemption.  So, the caller have the
responsibility to call this from appropriate context.

--Jesper
Lorenzo Bianconi Jan. 31, 2024, 1:36 p.m. UTC | #11
> 
> 
> On 28/01/2024 15.20, Lorenzo Bianconi wrote:
> > Introduce generic percpu page_pools allocator.
> > Moreover add page_pool_create_percpu() and cpuid filed in page_pool struct
> > in order to recycle the page in the page_pool "hot" cache if
> > napi_pp_put_page() is running on the same cpu.
> > This is a preliminary patch to add xdp multi-buff support for xdp running
> > in generic mode.
> > 
> > Signed-off-by: Lorenzo Bianconi<lorenzo@kernel.org>
> > ---
> >   include/net/page_pool/types.h |  3 +++
> >   net/core/dev.c                | 40 +++++++++++++++++++++++++++++++++++
> >   net/core/page_pool.c          | 23 ++++++++++++++++----
> >   net/core/skbuff.c             |  5 +++--
> >   4 files changed, 65 insertions(+), 6 deletions(-)
> > 
> [...]
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index cb2dab0feee0..bf9ec740b09a 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> [...]
> > @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
> >    *
> >    */
> > +#define SD_PAGE_POOL_RING_SIZE	256
> > +static int net_page_pool_alloc(int cpuid)
> 
> I don't like the name net_page_pool_alloc().
> It uses the page_pool_create APIs.
> 
> Let us renamed to net_page_pool_create() ?

ack, I will fix it.

Regards,
Lorenzo

> 
> 
> > +{
> > +#if IS_ENABLED(CONFIG_PAGE_POOL)
> > +	struct page_pool_params page_pool_params = {
> > +		.pool_size = SD_PAGE_POOL_RING_SIZE,
> > +		.nid = NUMA_NO_NODE,
> > +	};
> > +	struct page_pool *pp_ptr;
> > +
> > +	pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
> > +	if (IS_ERR(pp_ptr)) {
> > +		pp_ptr = NULL;
> > +		return -ENOMEM;
> > +	}
> > +
> > +	per_cpu(page_pool, cpuid) = pp_ptr;
> > +#endif
> > +	return 0;
> > +}
> > +
> >   /*
> >    *       This is called single threaded during boot, so no need
> >    *       to take the rtnl semaphore.
> > @@ -11738,6 +11763,9 @@ static int __init net_dev_init(void)
> >   		init_gro_hash(&sd->backlog);
> >   		sd->backlog.poll = process_backlog;
> >   		sd->backlog.weight = weight_p;
> > +
> > +		if (net_page_pool_alloc(i))
> > +			goto out;
> >   	}
> >   	dev_boot_phase = 0;
> > @@ -11765,6 +11793,18 @@ static int __init net_dev_init(void)
> >   	WARN_ON(rc < 0);
> >   	rc = 0;
> >   out:
> > +	if (rc < 0) {
> > +		for_each_possible_cpu(i) {
> > +			struct page_pool *pp_ptr = this_cpu_read(page_pool);
> > +
> > +			if (!pp_ptr)
> > +				continue;
> > +
> > +			page_pool_destroy(pp_ptr);
> > +			per_cpu(page_pool, i) = NULL;
> > +		}
> > +	}
> > +
> >   	return rc;
> >   }
Lorenzo Bianconi Feb. 1, 2024, 11:49 a.m. UTC | #12
> > On 2024/1/29 21:04, Lorenzo Bianconi wrote:
> > >> On 2024/1/28 22:20, Lorenzo Bianconi wrote:
> > >>
> > >>>  #ifdef CONFIG_LOCKDEP
> > >>>  /*
> > >>>   * register_netdevice() inits txq->_xmit_lock and sets lockdep class
> > >>> @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
> > >>>   *
> > >>>   */
> > >>>  
> > >>> +#define SD_PAGE_POOL_RING_SIZE	256
> > >>
> > >> I might missed that if there is a reason we choose 256 here, do we
> > >> need to use different value for differe page size, for 64K page size,
> > >> it means we might need to reserve 16MB memory for each CPU.
> > > 
> > > honestly I have not spent time on it, most of the current page_pool users set
> > > pool_size to 256. Anyway, do you mean something like:
> > > 
> > > diff --git a/net/core/dev.c b/net/core/dev.c
> > > index f70fb6cad2b2..3934a3fc5c45 100644
> > > --- a/net/core/dev.c
> > > +++ b/net/core/dev.c
> > > @@ -11806,12 +11806,11 @@ static void __init net_dev_struct_check(void)
> > >   *
> > >   */
> > >  
> > > -#define SD_PAGE_POOL_RING_SIZE	256
> > >  static int net_page_pool_alloc(int cpuid)
> > >  {
> > >  #if IS_ENABLED(CONFIG_PAGE_POOL)
> > 
> > Isn't better to have a config like CONFIG_PER_CPU_PAGE_POOL to enable
> > this feature? and this config can be selected by whoever needs this
> > feature?
> 
> since it will be used for generic xdp (at least) I think this will be 99%
> enabled when we have bpf enabled, right?
> 
> > 
> > >  	struct page_pool_params page_pool_params = {
> > > -		.pool_size = SD_PAGE_POOL_RING_SIZE,
> > > +		.pool_size = PAGE_SIZE < SZ_64K ? 256 : 16,
> > 
> > What about other page size? like 16KB?
> > How about something like below:
> > PAGE_SIZE << get_order(PER_CPU_PAGE_POOL_MAX_SIZE)
> 
> since pool_size is the number of elements in the ptr_ring associated to the pool,
> assuming we want to consume PER_CPU_PAGE_POOL_MAX_SIZE for each cpu, something
> like:
> 
> PER_CPU_PAGE_POOL_MAX_SIZE / PAGE_SIZE
> 
> Regards,
> Lorenzo

Discussing with Jesper and Toke, we agreed page_pool infrastructure will need
a way to release memory when the system is under memory pressure, so we can
defer this item to a subsequent series, what do you think?

Regards,
Lorenzo

> 
> > 
> > >  		.nid = NUMA_NO_NODE,
> > >  	};
> > >  	struct page_pool *pp_ptr;
> >
Yunsheng Lin Feb. 1, 2024, 12:14 p.m. UTC | #13
On 2024/2/1 19:49, Lorenzo Bianconi wrote:
>>> On 2024/1/29 21:04, Lorenzo Bianconi wrote:
>>>>> On 2024/1/28 22:20, Lorenzo Bianconi wrote:
>>>>>
>>>>>>  #ifdef CONFIG_LOCKDEP
>>>>>>  /*
>>>>>>   * register_netdevice() inits txq->_xmit_lock and sets lockdep class
>>>>>> @@ -11686,6 +11690,27 @@ static void __init net_dev_struct_check(void)
>>>>>>   *
>>>>>>   */
>>>>>>  
>>>>>> +#define SD_PAGE_POOL_RING_SIZE	256
>>>>>
>>>>> I might missed that if there is a reason we choose 256 here, do we
>>>>> need to use different value for differe page size, for 64K page size,
>>>>> it means we might need to reserve 16MB memory for each CPU.
>>>>
>>>> honestly I have not spent time on it, most of the current page_pool users set
>>>> pool_size to 256. Anyway, do you mean something like:
>>>>
>>>> diff --git a/net/core/dev.c b/net/core/dev.c
>>>> index f70fb6cad2b2..3934a3fc5c45 100644
>>>> --- a/net/core/dev.c
>>>> +++ b/net/core/dev.c
>>>> @@ -11806,12 +11806,11 @@ static void __init net_dev_struct_check(void)
>>>>   *
>>>>   */
>>>>  
>>>> -#define SD_PAGE_POOL_RING_SIZE	256
>>>>  static int net_page_pool_alloc(int cpuid)
>>>>  {
>>>>  #if IS_ENABLED(CONFIG_PAGE_POOL)
>>>
>>> Isn't better to have a config like CONFIG_PER_CPU_PAGE_POOL to enable
>>> this feature? and this config can be selected by whoever needs this
>>> feature?
>>
>> since it will be used for generic xdp (at least) I think this will be 99%
>> enabled when we have bpf enabled, right?
>>
>>>
>>>>  	struct page_pool_params page_pool_params = {
>>>> -		.pool_size = SD_PAGE_POOL_RING_SIZE,
>>>> +		.pool_size = PAGE_SIZE < SZ_64K ? 256 : 16,
>>>
>>> What about other page size? like 16KB?
>>> How about something like below:
>>> PAGE_SIZE << get_order(PER_CPU_PAGE_POOL_MAX_SIZE)
>>
>> since pool_size is the number of elements in the ptr_ring associated to the pool,
>> assuming we want to consume PER_CPU_PAGE_POOL_MAX_SIZE for each cpu, something
>> like:
>>
>> PER_CPU_PAGE_POOL_MAX_SIZE / PAGE_SIZE

Using something like the above makes sense to me, thanks.

>>
>> Regards,
>> Lorenzo
> 
> Discussing with Jesper and Toke, we agreed page_pool infrastructure will need
> a way to release memory when the system is under memory pressure, so we can
> defer this item to a subsequent series, what do you think?
>
diff mbox series

Patch

diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 76481c465375..3828396ae60c 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -128,6 +128,7 @@  struct page_pool_stats {
 struct page_pool {
 	struct page_pool_params_fast p;
 
+	int cpuid;
 	bool has_init_callback;
 
 	long frag_users;
@@ -203,6 +204,8 @@  struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
 struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
 				  unsigned int size, gfp_t gfp);
 struct page_pool *page_pool_create(const struct page_pool_params *params);
+struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
+					  int cpuid);
 
 struct xdp_mem_info;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index cb2dab0feee0..bf9ec740b09a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -153,6 +153,8 @@ 
 #include <linux/prandom.h>
 #include <linux/once_lite.h>
 #include <net/netdev_rx_queue.h>
+#include <net/page_pool/types.h>
+#include <net/page_pool/helpers.h>
 
 #include "dev.h"
 #include "net-sysfs.h"
@@ -442,6 +444,8 @@  static RAW_NOTIFIER_HEAD(netdev_chain);
 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 EXPORT_PER_CPU_SYMBOL(softnet_data);
 
+DEFINE_PER_CPU_ALIGNED(struct page_pool *, page_pool);
+
 #ifdef CONFIG_LOCKDEP
 /*
  * register_netdevice() inits txq->_xmit_lock and sets lockdep class
@@ -11686,6 +11690,27 @@  static void __init net_dev_struct_check(void)
  *
  */
 
+#define SD_PAGE_POOL_RING_SIZE	256
+static int net_page_pool_alloc(int cpuid)
+{
+#if IS_ENABLED(CONFIG_PAGE_POOL)
+	struct page_pool_params page_pool_params = {
+		.pool_size = SD_PAGE_POOL_RING_SIZE,
+		.nid = NUMA_NO_NODE,
+	};
+	struct page_pool *pp_ptr;
+
+	pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid);
+	if (IS_ERR(pp_ptr)) {
+		pp_ptr = NULL;
+		return -ENOMEM;
+	}
+
+	per_cpu(page_pool, cpuid) = pp_ptr;
+#endif
+	return 0;
+}
+
 /*
  *       This is called single threaded during boot, so no need
  *       to take the rtnl semaphore.
@@ -11738,6 +11763,9 @@  static int __init net_dev_init(void)
 		init_gro_hash(&sd->backlog);
 		sd->backlog.poll = process_backlog;
 		sd->backlog.weight = weight_p;
+
+		if (net_page_pool_alloc(i))
+			goto out;
 	}
 
 	dev_boot_phase = 0;
@@ -11765,6 +11793,18 @@  static int __init net_dev_init(void)
 	WARN_ON(rc < 0);
 	rc = 0;
 out:
+	if (rc < 0) {
+		for_each_possible_cpu(i) {
+			struct page_pool *pp_ptr = this_cpu_read(page_pool);
+
+			if (!pp_ptr)
+				continue;
+
+			page_pool_destroy(pp_ptr);
+			per_cpu(page_pool, i) = NULL;
+		}
+	}
+
 	return rc;
 }
 
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 4933762e5a6b..89c835fcf094 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -171,13 +171,16 @@  static void page_pool_producer_unlock(struct page_pool *pool,
 }
 
 static int page_pool_init(struct page_pool *pool,
-			  const struct page_pool_params *params)
+			  const struct page_pool_params *params,
+			  int cpuid)
 {
 	unsigned int ring_qsize = 1024; /* Default */
 
 	memcpy(&pool->p, &params->fast, sizeof(pool->p));
 	memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
 
+	pool->cpuid = cpuid;
+
 	/* Validate only known flags were used */
 	if (pool->p.flags & ~(PP_FLAG_ALL))
 		return -EINVAL;
@@ -253,10 +256,12 @@  static void page_pool_uninit(struct page_pool *pool)
 }
 
 /**
- * page_pool_create() - create a page pool.
+ * page_pool_create_percpu() - create a page pool for a given cpu.
  * @params: parameters, see struct page_pool_params
+ * @cpuid: cpu identifier
  */
-struct page_pool *page_pool_create(const struct page_pool_params *params)
+struct page_pool *
+page_pool_create_percpu(const struct page_pool_params *params, int cpuid)
 {
 	struct page_pool *pool;
 	int err;
@@ -265,7 +270,7 @@  struct page_pool *page_pool_create(const struct page_pool_params *params)
 	if (!pool)
 		return ERR_PTR(-ENOMEM);
 
-	err = page_pool_init(pool, params);
+	err = page_pool_init(pool, params, cpuid);
 	if (err < 0)
 		goto err_free;
 
@@ -282,6 +287,16 @@  struct page_pool *page_pool_create(const struct page_pool_params *params)
 	kfree(pool);
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL(page_pool_create_percpu);
+
+/**
+ * page_pool_create() - create a page pool
+ * @params: parameters, see struct page_pool_params
+ */
+struct page_pool *page_pool_create(const struct page_pool_params *params)
+{
+	return page_pool_create_percpu(params, -1);
+}
 EXPORT_SYMBOL(page_pool_create);
 
 static void page_pool_return_page(struct page_pool *pool, struct page *page);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index edbbef563d4d..9e5eb47b4025 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -923,9 +923,10 @@  bool napi_pp_put_page(struct page *page, bool napi_safe)
 	 */
 	if (napi_safe || in_softirq()) {
 		const struct napi_struct *napi = READ_ONCE(pp->p.napi);
+		unsigned int cpuid = smp_processor_id();
 
-		allow_direct = napi &&
-			READ_ONCE(napi->list_owner) == smp_processor_id();
+		allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid;
+		allow_direct |= (pp->cpuid == cpuid);
 	}
 
 	/* Driver set this to memory recycling info. Reset it on recycle.