@@ -422,7 +422,21 @@ static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
*/
static inline dma_addr_t page_pool_get_dma_addr(const struct page *page)
{
- return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page));
+ dma_addr_t ret = page->dma_addr;
+
+ if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
+ ret <<= PAGE_SHIFT;
+
+ return ret;
+}
+
+static inline void __page_pool_dma_sync_for_cpu(const struct page_pool *pool,
+ const dma_addr_t dma_addr,
+ u32 offset, u32 dma_sync_size)
+{
+ dma_sync_single_range_for_cpu(pool->p.dev, dma_addr,
+ offset + pool->p.offset, dma_sync_size,
+ page_pool_get_dma_dir(pool));
}
/**
@@ -441,10 +455,21 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool,
const struct page *page,
u32 offset, u32 dma_sync_size)
{
- dma_sync_single_range_for_cpu(pool->p.dev,
- page_pool_get_dma_addr(page),
- offset + pool->p.offset, dma_sync_size,
- page_pool_get_dma_dir(pool));
+ __page_pool_dma_sync_for_cpu(pool, page_pool_get_dma_addr(page), offset,
+ dma_sync_size);
+}
+
+static inline void
+page_pool_dma_sync_netmem_for_cpu(const struct page_pool *pool,
+ const netmem_ref netmem, u32 offset,
+ u32 dma_sync_size)
+{
+ if (!pool->dma_sync_for_cpu)
+ return;
+
+ __page_pool_dma_sync_for_cpu(pool,
+ page_pool_get_dma_addr_netmem(netmem),
+ offset, dma_sync_size);
}
static inline bool page_pool_put(struct page_pool *pool)
@@ -164,7 +164,8 @@ struct page_pool {
bool has_init_callback:1; /* slow::init_callback is set */
bool dma_map:1; /* Perform DMA mapping */
- bool dma_sync:1; /* Perform DMA sync */
+ bool dma_sync:1; /* Perform DMA sync for device */
+ bool dma_sync_for_cpu:1; /* Perform DMA sync for cpu */
#ifdef CONFIG_PAGE_POOL_STATS
bool system:1; /* This is a global percpu pool */
#endif
@@ -335,6 +335,7 @@ int mp_dmabuf_devmem_init(struct page_pool *pool)
* dma_sync_for_cpu/device. Force disable dma_sync.
*/
pool->dma_sync = false;
+ pool->dma_sync_for_cpu = false;
if (pool->p.order != 0)
return -E2BIG;
@@ -201,6 +201,7 @@ static int page_pool_init(struct page_pool *pool,
memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow));
pool->cpuid = cpuid;
+ pool->dma_sync_for_cpu = true;
/* Validate only known flags were used */
if (pool->slow.flags & ~PP_FLAG_ALL)
dmabuf dma-addresses should not be dma_sync'd for CPU/device. Typically its the driver responsibility to dma_sync for CPU, but the driver should not dma_sync for CPU if the netmem is actually coming from a dmabuf memory provider. The page_pool already exposes a helper for dma_sync_for_cpu: page_pool_dma_sync_for_cpu. Upgrade this existing helper to handle netmem, and have it skip dma_sync if the memory is from a dmabuf memory provider. Drivers should migrate to using this helper when adding support for netmem. Also minimize the impact on the dma syncing performance for pages. Special case the dma-sync path for pages to not go through the overhead checks for dma-syncing and conversion to netmem. Cc: Alexander Lobakin <aleksander.lobakin@intel.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Signed-off-by: Mina Almasry <almasrymina@google.com> --- v4: - Special case pages to reduce perf overhead (Jakub) - Add page_pool flag to set dma syncing for cpu (Jakub) --- include/net/page_pool/helpers.h | 35 ++++++++++++++++++++++++++++----- include/net/page_pool/types.h | 3 ++- net/core/devmem.c | 1 + net/core/page_pool.c | 1 + 4 files changed, 34 insertions(+), 6 deletions(-)