diff mbox series

[net-next,v3,10/13] mm: page_frag: introduce prepare/probe/commit API

Message ID 20240508133408.54708-11-linyunsheng@huawei.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series First try to replace page_frag with page_frag_cache | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5763 this patch: 5763
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 3 of 3 maintainers
netdev/build_clang success Errors and warnings before: 1057 this patch: 1057
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6044 this patch: 6044
netdev/checkpatch warning WARNING: Do not crash the kernel unless it is absolutely unavoidable--use WARN_ON_ONCE() plus recovery code (if feasible) instead of BUG() or variants WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Yunsheng Lin May 8, 2024, 1:34 p.m. UTC
There are many use cases that need minimum memory in order
for forward progressing, but more performant if more memory
is available or need to probe the cache info to use any
memory available for frag caoleasing reason.

Currently skb_page_frag_refill() API is used to solve the
above usecases, caller need to know about the internal detail
and access the data field of 'struct page_frag' to meet the
requirement of the above use cases and its implementation is
similar to the one in mm subsystem.

To unify those two page_frag implementations, introduce a
prepare API to ensure minimum memory is satisfied and return
how much the actual memory is available to the caller and a
probe API to report the current available memory to caller
without doing cache refilling. The caller needs to either call
the commit API to report how much memory it actually uses, or
not do so if deciding to not use any memory.

As next patch is about to replace 'struct page_frag' with
'struct page_frag_cache' in linux/sched.h, which is included
by the asm-offsets.s, using the virt_to_page() in the inline
helper of page_frag_cache.h cause a "‘vmemmap’ undeclared"
compiling error for asm-offsets.s, use a macro for probe API
to avoid that compiling error.

CC: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
 include/linux/page_frag_cache.h |  86 ++++++++++++++++++++++++
 mm/page_frag_cache.c            | 113 ++++++++++++++++++++++++++++++++
 2 files changed, 199 insertions(+)

Comments

Mat Martineau May 10, 2024, 5:38 p.m. UTC | #1
On Wed, 8 May 2024, Yunsheng Lin wrote:

> There are many use cases that need minimum memory in order
> for forward progressing, but more performant if more memory
> is available or need to probe the cache info to use any
> memory available for frag caoleasing reason.
>
> Currently skb_page_frag_refill() API is used to solve the
> above usecases, caller need to know about the internal detail
> and access the data field of 'struct page_frag' to meet the
> requirement of the above use cases and its implementation is
> similar to the one in mm subsystem.
>
> To unify those two page_frag implementations, introduce a
> prepare API to ensure minimum memory is satisfied and return
> how much the actual memory is available to the caller and a
> probe API to report the current available memory to caller
> without doing cache refilling. The caller needs to either call
> the commit API to report how much memory it actually uses, or
> not do so if deciding to not use any memory.
>
> As next patch is about to replace 'struct page_frag' with
> 'struct page_frag_cache' in linux/sched.h, which is included
> by the asm-offsets.s, using the virt_to_page() in the inline
> helper of page_frag_cache.h cause a "???vmemmap??? undeclared"
> compiling error for asm-offsets.s, use a macro for probe API
> to avoid that compiling error.
>
> CC: Alexander Duyck <alexander.duyck@gmail.com>
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> ---
> include/linux/page_frag_cache.h |  86 ++++++++++++++++++++++++
> mm/page_frag_cache.c            | 113 ++++++++++++++++++++++++++++++++
> 2 files changed, 199 insertions(+)
>
> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
> index 88e91ee57b91..30893638155b 100644
> --- a/include/linux/page_frag_cache.h
> +++ b/include/linux/page_frag_cache.h
> @@ -71,6 +71,21 @@ static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
> 	return encoded_page_pfmemalloc(nc->encoded_va);
> }
>
> +static inline unsigned int page_frag_cache_page_size(struct encoded_va *encoded_va)
> +{
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +	return PAGE_SIZE << encoded_page_order(encoded_va);
> +#else
> +	return PAGE_SIZE;
> +#endif
> +}
> +
> +static inline unsigned int __page_frag_cache_page_offset(struct encoded_va *encoded_va,
> +							 unsigned int remaining)
> +{
> +	return page_frag_cache_page_size(encoded_va) - remaining;
> +}
> +
> void page_frag_cache_drain(struct page_frag_cache *nc);
> void __page_frag_cache_drain(struct page *page, unsigned int count);
> void *__page_frag_alloc_va_align(struct page_frag_cache *nc,
> @@ -85,12 +100,83 @@ static inline void *page_frag_alloc_va_align(struct page_frag_cache *nc,
> 	return __page_frag_alloc_va_align(nc, fragsz, gfp_mask, -align);
> }
>
> +static inline unsigned int page_frag_cache_page_offset(const struct page_frag_cache *nc)
> +{
> +	return __page_frag_cache_page_offset(nc->encoded_va, nc->remaining);
> +}
> +
> static inline void *page_frag_alloc_va(struct page_frag_cache *nc,
> 				       unsigned int fragsz, gfp_t gfp_mask)
> {
> 	return __page_frag_alloc_va_align(nc, fragsz, gfp_mask, ~0u);
> }
>
> +void *page_frag_alloc_va_prepare(struct page_frag_cache *nc, unsigned int *fragsz,
> +				 gfp_t gfp);
> +
> +static inline void *page_frag_alloc_va_prepare_align(struct page_frag_cache *nc,
> +						     unsigned int *fragsz,
> +						     gfp_t gfp,
> +						     unsigned int align)
> +{
> +	WARN_ON_ONCE(!is_power_of_2(align) || align > PAGE_SIZE);
> +	nc->remaining = nc->remaining & -align;
> +	return page_frag_alloc_va_prepare(nc, fragsz, gfp);
> +}
> +
> +struct page *page_frag_alloc_pg_prepare(struct page_frag_cache *nc,
> +					unsigned int *offset,
> +					unsigned int *fragsz, gfp_t gfp);
> +
> +struct page *page_frag_alloc_prepare(struct page_frag_cache *nc,
> +				     unsigned int *offset,
> +				     unsigned int *fragsz,
> +				     void **va, gfp_t gfp);
> +
> +static inline struct encoded_va *__page_frag_alloc_probe(struct page_frag_cache *nc,
> +							 unsigned int *offset,
> +							 unsigned int *fragsz,
> +							 void **va)
> +{
> +	struct encoded_va *encoded_va;
> +
> +	*fragsz = nc->remaining;
> +	encoded_va = nc->encoded_va;
> +	*offset = __page_frag_cache_page_offset(encoded_va, *fragsz);
> +	*va = encoded_page_address(encoded_va) + *offset;
> +
> +	return encoded_va;
> +}
> +
> +#define page_frag_alloc_probe(nc, offset, fragsz, va)			\
> +({									\
> +	struct encoded_va *__encoded_va;				\
> +	struct page *__page = NULL;					\
> +									\

Hi Yunsheng -

I made this suggestion for patch 13 (documentation), but want to clarify 
my request here:

> +	if (likely((nc)->remaining))					\

I think it would be more useful to change this line to

 	if ((nc)->remaining >= *fragsz)

That way the caller can use this function to "probe" for a specific amount 
of available space, rather than "nonzero" space. If the caller wants to 
check for available space, they can set *fragsz = 1.

In other words, I think the functionality you described in the 
documentation is better and the code should be changed to match!

- Mat

> +		__page = virt_to_page(__page_frag_alloc_probe(nc,	\
> +							      offset,	\
> +							      fragsz,	\
> +							      va));	\
> +									\
> +	__page;								\
> +})
> +
> +static inline void page_frag_alloc_commit(struct page_frag_cache *nc,
> +					  unsigned int fragsz)
> +{
> +	VM_BUG_ON(fragsz > nc->remaining || !nc->pagecnt_bias);
> +	nc->pagecnt_bias--;
> +	nc->remaining -= fragsz;
> +}
> +
> +static inline void page_frag_alloc_commit_noref(struct page_frag_cache *nc,
> +						unsigned int fragsz)
> +{
> +	VM_BUG_ON(fragsz > nc->remaining);
> +	nc->remaining -= fragsz;
> +}
> +
> void page_frag_free_va(void *addr);
>
> #endif
diff mbox series

Patch

diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
index 88e91ee57b91..30893638155b 100644
--- a/include/linux/page_frag_cache.h
+++ b/include/linux/page_frag_cache.h
@@ -71,6 +71,21 @@  static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
 	return encoded_page_pfmemalloc(nc->encoded_va);
 }
 
+static inline unsigned int page_frag_cache_page_size(struct encoded_va *encoded_va)
+{
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+	return PAGE_SIZE << encoded_page_order(encoded_va);
+#else
+	return PAGE_SIZE;
+#endif
+}
+
+static inline unsigned int __page_frag_cache_page_offset(struct encoded_va *encoded_va,
+							 unsigned int remaining)
+{
+	return page_frag_cache_page_size(encoded_va) - remaining;
+}
+
 void page_frag_cache_drain(struct page_frag_cache *nc);
 void __page_frag_cache_drain(struct page *page, unsigned int count);
 void *__page_frag_alloc_va_align(struct page_frag_cache *nc,
@@ -85,12 +100,83 @@  static inline void *page_frag_alloc_va_align(struct page_frag_cache *nc,
 	return __page_frag_alloc_va_align(nc, fragsz, gfp_mask, -align);
 }
 
+static inline unsigned int page_frag_cache_page_offset(const struct page_frag_cache *nc)
+{
+	return __page_frag_cache_page_offset(nc->encoded_va, nc->remaining);
+}
+
 static inline void *page_frag_alloc_va(struct page_frag_cache *nc,
 				       unsigned int fragsz, gfp_t gfp_mask)
 {
 	return __page_frag_alloc_va_align(nc, fragsz, gfp_mask, ~0u);
 }
 
+void *page_frag_alloc_va_prepare(struct page_frag_cache *nc, unsigned int *fragsz,
+				 gfp_t gfp);
+
+static inline void *page_frag_alloc_va_prepare_align(struct page_frag_cache *nc,
+						     unsigned int *fragsz,
+						     gfp_t gfp,
+						     unsigned int align)
+{
+	WARN_ON_ONCE(!is_power_of_2(align) || align > PAGE_SIZE);
+	nc->remaining = nc->remaining & -align;
+	return page_frag_alloc_va_prepare(nc, fragsz, gfp);
+}
+
+struct page *page_frag_alloc_pg_prepare(struct page_frag_cache *nc,
+					unsigned int *offset,
+					unsigned int *fragsz, gfp_t gfp);
+
+struct page *page_frag_alloc_prepare(struct page_frag_cache *nc,
+				     unsigned int *offset,
+				     unsigned int *fragsz,
+				     void **va, gfp_t gfp);
+
+static inline struct encoded_va *__page_frag_alloc_probe(struct page_frag_cache *nc,
+							 unsigned int *offset,
+							 unsigned int *fragsz,
+							 void **va)
+{
+	struct encoded_va *encoded_va;
+
+	*fragsz = nc->remaining;
+	encoded_va = nc->encoded_va;
+	*offset = __page_frag_cache_page_offset(encoded_va, *fragsz);
+	*va = encoded_page_address(encoded_va) + *offset;
+
+	return encoded_va;
+}
+
+#define page_frag_alloc_probe(nc, offset, fragsz, va)			\
+({									\
+	struct encoded_va *__encoded_va;				\
+	struct page *__page = NULL;					\
+									\
+	if (likely((nc)->remaining))					\
+		__page = virt_to_page(__page_frag_alloc_probe(nc,	\
+							      offset,	\
+							      fragsz,	\
+							      va));	\
+									\
+	__page;								\
+})
+
+static inline void page_frag_alloc_commit(struct page_frag_cache *nc,
+					  unsigned int fragsz)
+{
+	VM_BUG_ON(fragsz > nc->remaining || !nc->pagecnt_bias);
+	nc->pagecnt_bias--;
+	nc->remaining -= fragsz;
+}
+
+static inline void page_frag_alloc_commit_noref(struct page_frag_cache *nc,
+						unsigned int fragsz)
+{
+	VM_BUG_ON(fragsz > nc->remaining);
+	nc->remaining -= fragsz;
+}
+
 void page_frag_free_va(void *addr);
 
 #endif
diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
index 4542d72e7b01..eb8bf59b26bb 100644
--- a/mm/page_frag_cache.c
+++ b/mm/page_frag_cache.c
@@ -60,6 +60,119 @@  static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
 	return NULL;
 }
 
+static struct page *page_frag_cache_refill(struct page_frag_cache *nc,
+					   gfp_t gfp_mask)
+{
+	struct encoded_va *encoded_va = nc->encoded_va;
+
+	if (likely(encoded_va)) {
+		struct page *page = virt_to_page(encoded_va);
+
+		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
+			return __page_frag_cache_refill(nc, gfp_mask);
+
+		if (unlikely(encoded_page_pfmemalloc(encoded_va))) {
+			free_unref_page(page, compound_order(page));
+			return __page_frag_cache_refill(nc, gfp_mask);
+		}
+
+		/* OK, page count is 0, we can safely set it */
+		set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
+
+		/* reset page count bias and offset to start of new frag */
+		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
+		nc->remaining = page_frag_cache_page_size(encoded_va);
+
+		return page;
+	}
+
+	return __page_frag_cache_refill(nc, gfp_mask);
+}
+
+void *page_frag_alloc_va_prepare(struct page_frag_cache *nc,
+				 unsigned int *fragsz, gfp_t gfp)
+{
+	struct encoded_va *encoded_va;
+	unsigned int remaining;
+
+	remaining = nc->remaining;
+	if (unlikely(*fragsz > remaining)) {
+		if (WARN_ON_ONCE(*fragsz > PAGE_SIZE) ||
+		    !page_frag_cache_refill(nc, gfp))
+			return NULL;
+
+		remaining = nc->remaining;
+	}
+
+	encoded_va = nc->encoded_va;
+	*fragsz = remaining;
+	return encoded_page_address(encoded_va) +
+			__page_frag_cache_page_offset(encoded_va, remaining);
+}
+EXPORT_SYMBOL(page_frag_alloc_va_prepare);
+
+struct page *page_frag_alloc_pg_prepare(struct page_frag_cache *nc,
+					unsigned int *offset,
+					unsigned int *fragsz, gfp_t gfp)
+{
+	struct encoded_va *encoded_va;
+	unsigned int remaining;
+	struct page *page;
+
+	remaining = nc->remaining;
+	if (unlikely(*fragsz > remaining)) {
+		if (WARN_ON_ONCE(*fragsz > PAGE_SIZE)) {
+			*fragsz = 0;
+			return NULL;
+		}
+
+		page = page_frag_cache_refill(nc, gfp);
+		remaining = nc->remaining;
+		encoded_va = nc->encoded_va;
+	} else {
+		encoded_va = nc->encoded_va;
+		page = virt_to_page(encoded_va);
+	}
+
+	*offset = __page_frag_cache_page_offset(encoded_va, remaining);
+	*fragsz = remaining;
+
+	return page;
+}
+EXPORT_SYMBOL(page_frag_alloc_pg_prepare);
+
+struct page *page_frag_alloc_prepare(struct page_frag_cache *nc,
+				     unsigned int *offset,
+				     unsigned int *fragsz,
+				     void **va, gfp_t gfp)
+{
+	struct encoded_va *encoded_va;
+	unsigned int remaining;
+	struct page *page;
+
+	remaining = nc->remaining;
+	if (unlikely(*fragsz > remaining)) {
+		if (WARN_ON_ONCE(*fragsz > PAGE_SIZE)) {
+			*fragsz = 0;
+			return NULL;
+		}
+
+		page = page_frag_cache_refill(nc, gfp);
+		remaining = nc->remaining;
+		encoded_va = nc->encoded_va;
+	} else {
+		encoded_va = nc->encoded_va;
+		page = virt_to_page(encoded_va);
+	}
+
+	*offset = __page_frag_cache_page_offset(encoded_va, remaining);
+	*fragsz = remaining;
+	*va = encoded_page_address(encoded_va) + *offset;
+
+	return page;
+}
+EXPORT_SYMBOL(page_frag_alloc_prepare);
+
 void page_frag_cache_drain(struct page_frag_cache *nc)
 {
 	if (!nc->encoded_va)