diff mbox series

[rfc,v2,2/5] page_pool: add interface for getting and setting pagecnt_bias

Message ID 1625903002-31619-3-git-send-email-linyunsheng@huawei.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series add elevated refcnt support for page pool | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Guessed tree name to be net-next
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cc_maintainers warning 5 maintainers not CCed: yhs@fb.com kpsingh@kernel.org andrii@kernel.org kafai@fb.com songliubraving@fb.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 6118 this patch: 6118
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 40 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 6175 this patch: 6175
netdev/header_inline success Link

Commit Message

Yunsheng Lin July 10, 2021, 7:43 a.m. UTC
As suggested by Alexander, "A DMA mapping should be page
aligned anyway so the lower 12 bits would be reserved 0",
so it might make more sense to repurpose the lower 12 bits
of the dma address to store the pagecnt_bias for elevated
refcnt case in page pool.

As newly added page_pool_get_pagecnt_bias() may be called
outside of the softirq context, so annotate the access to
page->dma_addr[0] with READ_ONCE() and WRITE_ONCE().

Other three interfaces using page->dma_addr[0] is only called
in the softirq context during normal rx processing, hopefully
the barrier in the rx processing will ensure the correct order
between getting and setting pagecnt_bias.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
 include/net/page_pool.h | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

Comments

Alexander Duyck July 10, 2021, 4:55 p.m. UTC | #1
On Sat, Jul 10, 2021 at 12:44 AM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>
> As suggested by Alexander, "A DMA mapping should be page
> aligned anyway so the lower 12 bits would be reserved 0",
> so it might make more sense to repurpose the lower 12 bits
> of the dma address to store the pagecnt_bias for elevated
> refcnt case in page pool.
>
> As newly added page_pool_get_pagecnt_bias() may be called
> outside of the softirq context, so annotate the access to
> page->dma_addr[0] with READ_ONCE() and WRITE_ONCE().
>
> Other three interfaces using page->dma_addr[0] is only called
> in the softirq context during normal rx processing, hopefully
> the barrier in the rx processing will ensure the correct order
> between getting and setting pagecnt_bias.
>
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> ---
>  include/net/page_pool.h | 24 ++++++++++++++++++++++--
>  1 file changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
> index 8d7744d..5746f17 100644
> --- a/include/net/page_pool.h
> +++ b/include/net/page_pool.h
> @@ -200,7 +200,7 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
>
>  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>  {
> -       dma_addr_t ret = page->dma_addr[0];
> +       dma_addr_t ret = READ_ONCE(page->dma_addr[0]) & PAGE_MASK;
>         if (sizeof(dma_addr_t) > sizeof(unsigned long))
>                 ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
>         return ret;
> @@ -208,11 +208,31 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>
>  static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
>  {
> -       page->dma_addr[0] = addr;
> +       unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);
> +
> +       dma_addr_0 &= ~PAGE_MASK;
> +       dma_addr_0 |= (addr & PAGE_MASK);

So rather than doing all this testing and clearing it would probably
be better to add a return value to the function and do something like:

if (WARN_ON(dma_addr_0 & ~PAGE_MASK))
    return -1;

That way you could have page_pool_dma_map unmap, free the page, and
return false indicating that the DMA mapping failed with a visible
error in the event that our expectionat that the dma_addr is page
aligned is ever violated.

> +       WRITE_ONCE(page->dma_addr[0], dma_addr_0);
> +
>         if (sizeof(dma_addr_t) > sizeof(unsigned long))
>                 page->dma_addr[1] = upper_32_bits(addr);
>  }
>
> +static inline int page_pool_get_pagecnt_bias(struct page *page)
> +{
> +       return (READ_ONCE(page->dma_addr[0]) & ~PAGE_MASK);

You don't need the parenthesis around the READ_ONCE and PAGE_MASK.

> +}
> +
> +static inline void page_pool_set_pagecnt_bias(struct page *page, int bias)
> +{
> +       unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);
> +
> +       dma_addr_0 &= PAGE_MASK;
> +       dma_addr_0 |= (bias & ~PAGE_MASK);
> +
> +       WRITE_ONCE(page->dma_addr[0], dma_addr_0);
> +}
> +
>  static inline bool is_page_pool_compiled_in(void)
>  {
>  #ifdef CONFIG_PAGE_POOL
> --
> 2.7.4
>
Yunsheng Lin July 12, 2021, 7:44 a.m. UTC | #2
On 2021/7/11 0:55, Alexander Duyck wrote:
> On Sat, Jul 10, 2021 at 12:44 AM Yunsheng Lin <linyunsheng@huawei.com> wrote:
>>
>> As suggested by Alexander, "A DMA mapping should be page
>> aligned anyway so the lower 12 bits would be reserved 0",
>> so it might make more sense to repurpose the lower 12 bits
>> of the dma address to store the pagecnt_bias for elevated
>> refcnt case in page pool.
>>
>> As newly added page_pool_get_pagecnt_bias() may be called
>> outside of the softirq context, so annotate the access to
>> page->dma_addr[0] with READ_ONCE() and WRITE_ONCE().
>>
>> Other three interfaces using page->dma_addr[0] is only called
>> in the softirq context during normal rx processing, hopefully
>> the barrier in the rx processing will ensure the correct order
>> between getting and setting pagecnt_bias.
>>
>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
>> ---
>>  include/net/page_pool.h | 24 ++++++++++++++++++++++--
>>  1 file changed, 22 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/net/page_pool.h b/include/net/page_pool.h
>> index 8d7744d..5746f17 100644
>> --- a/include/net/page_pool.h
>> +++ b/include/net/page_pool.h
>> @@ -200,7 +200,7 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
>>
>>  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>>  {
>> -       dma_addr_t ret = page->dma_addr[0];
>> +       dma_addr_t ret = READ_ONCE(page->dma_addr[0]) & PAGE_MASK;
>>         if (sizeof(dma_addr_t) > sizeof(unsigned long))
>>                 ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
>>         return ret;
>> @@ -208,11 +208,31 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>>
>>  static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
>>  {
>> -       page->dma_addr[0] = addr;
>> +       unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);
>> +
>> +       dma_addr_0 &= ~PAGE_MASK;
>> +       dma_addr_0 |= (addr & PAGE_MASK);
> 
> So rather than doing all this testing and clearing it would probably
> be better to add a return value to the function and do something like:
> 
> if (WARN_ON(dma_addr_0 & ~PAGE_MASK))
>     return -1;
> 
> That way you could have page_pool_dma_map unmap, free the page, and
> return false indicating that the DMA mapping failed with a visible
> error in the event that our expectionat that the dma_addr is page
> aligned is ever violated.

I suppose the above is based on that page_pool_set_dma_addr() is called
only once before page_pool_set_pagecnt_bias(), right? so we could:

static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
{
	if (WARN_ON(dma_addr_0 & ~PAGE_MASK))
		return false;

	page->dma_addr[0] = addr;
	if (sizeof(dma_addr_t) > sizeof(unsigned long))
		page->dma_addr[1] = upper_32_bits(addr);

	return true;
}

> 
>> +       WRITE_ONCE(page->dma_addr[0], dma_addr_0);
>> +
>>         if (sizeof(dma_addr_t) > sizeof(unsigned long))
>>                 page->dma_addr[1] = upper_32_bits(addr);
>>  }
>>
>> +static inline int page_pool_get_pagecnt_bias(struct page *page)
>> +{
>> +       return (READ_ONCE(page->dma_addr[0]) & ~PAGE_MASK);
> 
> You don't need the parenthesis around the READ_ONCE and PAGE_MASK.

ok.

> 
>> +}
>> +
>> +static inline void page_pool_set_pagecnt_bias(struct page *page, int bias)
>> +{
>> +       unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);
>> +
>> +       dma_addr_0 &= PAGE_MASK;
>> +       dma_addr_0 |= (bias & ~PAGE_MASK);
>> +
>> +       WRITE_ONCE(page->dma_addr[0], dma_addr_0);
>> +}
>> +
>>  static inline bool is_page_pool_compiled_in(void)
>>  {
>>  #ifdef CONFIG_PAGE_POOL
>> --
>> 2.7.4
>>
> .
>
diff mbox series

Patch

diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 8d7744d..5746f17 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -200,7 +200,7 @@  static inline void page_pool_recycle_direct(struct page_pool *pool,
 
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
-	dma_addr_t ret = page->dma_addr[0];
+	dma_addr_t ret = READ_ONCE(page->dma_addr[0]) & PAGE_MASK;
 	if (sizeof(dma_addr_t) > sizeof(unsigned long))
 		ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
 	return ret;
@@ -208,11 +208,31 @@  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 
 static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
 {
-	page->dma_addr[0] = addr;
+	unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);
+
+	dma_addr_0 &= ~PAGE_MASK;
+	dma_addr_0 |= (addr & PAGE_MASK);
+	WRITE_ONCE(page->dma_addr[0], dma_addr_0);
+
 	if (sizeof(dma_addr_t) > sizeof(unsigned long))
 		page->dma_addr[1] = upper_32_bits(addr);
 }
 
+static inline int page_pool_get_pagecnt_bias(struct page *page)
+{
+	return (READ_ONCE(page->dma_addr[0]) & ~PAGE_MASK);
+}
+
+static inline void page_pool_set_pagecnt_bias(struct page *page, int bias)
+{
+	unsigned long dma_addr_0 = READ_ONCE(page->dma_addr[0]);
+
+	dma_addr_0 &= PAGE_MASK;
+	dma_addr_0 |= (bias & ~PAGE_MASK);
+
+	WRITE_ONCE(page->dma_addr[0], dma_addr_0);
+}
+
 static inline bool is_page_pool_compiled_in(void)
 {
 #ifdef CONFIG_PAGE_POOL