diff mbox series

[bpf-next,v2,5/6] mm, bpf: Use __GFP_ACCOUNT in try_alloc_pages().

Message ID 20241210023936.46871-6-alexei.starovoitov@gmail.com (mailing list archive)
State New
Headers show
Series bpf, mm: Introduce __GFP_TRYLOCK | expand

Commit Message

Alexei Starovoitov Dec. 10, 2024, 2:39 a.m. UTC
From: Alexei Starovoitov <ast@kernel.org>

Unconditionally use __GFP_ACCOUNT in try_alloc_pages().
The caller is responsible to setup memcg correctly.
All BPF memory accounting is memcg based.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/gfp.h | 5 ++---
 mm/page_alloc.c     | 5 ++++-
 2 files changed, 6 insertions(+), 4 deletions(-)

Comments

Vlastimil Babka Dec. 11, 2024, 12:05 p.m. UTC | #1
On 12/10/24 03:39, Alexei Starovoitov wrote:
> From: Alexei Starovoitov <ast@kernel.org>
> 
> Unconditionally use __GFP_ACCOUNT in try_alloc_pages().
> The caller is responsible to setup memcg correctly.
> All BPF memory accounting is memcg based.

Hm I guess if there was later another user besides bpf that didn't want
memcg for some reason, a variant of the function (or a bool param, to avoid
passing full gfp param which would be thrown away besides __GFP_ACCOUNT)
could be easily created.

> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
> ---
>  include/linux/gfp.h | 5 ++---
>  mm/page_alloc.c     | 5 ++++-
>  2 files changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index dcae733ed006..820c4938c9cd 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -356,18 +356,17 @@ static inline struct page *try_alloc_pages_noprof(int nid, unsigned int order)
>  	 */
>  	if (preemptible() && !rcu_preempt_depth())
>  		return alloc_pages_node_noprof(nid,
> -					       GFP_NOWAIT | __GFP_ZERO,
> +					       GFP_NOWAIT | __GFP_ZERO | __GFP_ACCOUNT,
>  					       order);
>  	/*
>  	 * Best effort allocation from percpu free list.
>  	 * If it's empty attempt to spin_trylock zone->lock.
>  	 * Do not specify __GFP_KSWAPD_RECLAIM to avoid wakeup_kswapd
>  	 * that may need to grab a lock.
> -	 * Do not specify __GFP_ACCOUNT to avoid local_lock.
>  	 * Do not warn either.
>  	 */
>  	return alloc_pages_node_noprof(nid,
> -				       __GFP_TRYLOCK | __GFP_NOWARN | __GFP_ZERO,
> +				       __GFP_TRYLOCK | __GFP_NOWARN | __GFP_ZERO | __GFP_ACCOUNT,
>  				       order);
>  }
>  #define try_alloc_pages(...)			alloc_hooks(try_alloc_pages_noprof(__VA_ARGS__))
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index a969a62ec0c3..1fada16b8a14 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -4818,7 +4818,10 @@ struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order,
>  out:
>  	if (memcg_kmem_online() && (gfp & __GFP_ACCOUNT) && page &&
>  	    unlikely(__memcg_kmem_charge_page(page, gfp, order) != 0)) {
> -		__free_pages(page, order);
> +		if (unlikely(gfp & __GFP_TRYLOCK))
> +			free_pages_nolock(page, order);
> +		else
> +			__free_pages(page, order);
>  		page = NULL;
>  	}
>
Alexei Starovoitov Dec. 12, 2024, 2:54 a.m. UTC | #2
On Wed, Dec 11, 2024 at 4:05 AM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> On 12/10/24 03:39, Alexei Starovoitov wrote:
> > From: Alexei Starovoitov <ast@kernel.org>
> >
> > Unconditionally use __GFP_ACCOUNT in try_alloc_pages().
> > The caller is responsible to setup memcg correctly.
> > All BPF memory accounting is memcg based.
>
> Hm I guess if there was later another user besides bpf that didn't want
> memcg for some reason, a variant of the function (or a bool param, to avoid
> passing full gfp param which would be thrown away besides __GFP_ACCOUNT)
> could be easily created.

Let's cross that bridge when we get there.
tbh the code that doesn't play nice with memcg should be shamed.
I think CONFIG_MEMCG should disappear and become a default.
diff mbox series

Patch

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index dcae733ed006..820c4938c9cd 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -356,18 +356,17 @@  static inline struct page *try_alloc_pages_noprof(int nid, unsigned int order)
 	 */
 	if (preemptible() && !rcu_preempt_depth())
 		return alloc_pages_node_noprof(nid,
-					       GFP_NOWAIT | __GFP_ZERO,
+					       GFP_NOWAIT | __GFP_ZERO | __GFP_ACCOUNT,
 					       order);
 	/*
 	 * Best effort allocation from percpu free list.
 	 * If it's empty attempt to spin_trylock zone->lock.
 	 * Do not specify __GFP_KSWAPD_RECLAIM to avoid wakeup_kswapd
 	 * that may need to grab a lock.
-	 * Do not specify __GFP_ACCOUNT to avoid local_lock.
 	 * Do not warn either.
 	 */
 	return alloc_pages_node_noprof(nid,
-				       __GFP_TRYLOCK | __GFP_NOWARN | __GFP_ZERO,
+				       __GFP_TRYLOCK | __GFP_NOWARN | __GFP_ZERO | __GFP_ACCOUNT,
 				       order);
 }
 #define try_alloc_pages(...)			alloc_hooks(try_alloc_pages_noprof(__VA_ARGS__))
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a969a62ec0c3..1fada16b8a14 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4818,7 +4818,10 @@  struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order,
 out:
 	if (memcg_kmem_online() && (gfp & __GFP_ACCOUNT) && page &&
 	    unlikely(__memcg_kmem_charge_page(page, gfp, order) != 0)) {
-		__free_pages(page, order);
+		if (unlikely(gfp & __GFP_TRYLOCK))
+			free_pages_nolock(page, order);
+		else
+			__free_pages(page, order);
 		page = NULL;
 	}