diff mbox series

mm: don't call should_failslab() for !CONFIG_FAILSLAB

Message ID e01e5e40-692a-519c-4cba-e3331f173c82@kernel.dk (mailing list archive)
State New
Headers show
Series mm: don't call should_failslab() for !CONFIG_FAILSLAB | expand

Commit Message

Jens Axboe Oct. 5, 2021, 3:31 p.m. UTC
Allocations can be a very hot path, and this out-of-line function
call is noticeable.

Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

Comments

Andrew Morton Oct. 5, 2021, 9:18 p.m. UTC | #1
On Tue, 5 Oct 2021 09:31:43 -0600 Jens Axboe <axboe@kernel.dk> wrote:

> Allocations can be a very hot path, and this out-of-line function
> call is noticeable.
> 
> --- a/include/linux/fault-inject.h
> +++ b/include/linux/fault-inject.h
> @@ -64,8 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
>  
>  struct kmem_cache;
>  
> -int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
>  #ifdef CONFIG_FAILSLAB
> +int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
>  extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
>  #else
>  static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> diff --git a/mm/slab.h b/mm/slab.h
> index 58c01a34e5b8..92fd6fe01877 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -491,8 +491,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
>  
>  	might_alloc(flags);
>  
> +#ifdef CONFIG_FAILSLAB
>  	if (should_failslab(s, flags))
>  		return NULL;
> +#endif

Can we avoid the ifdefs here?

>  
>  	if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
>  		return NULL;
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index ec2bb0beed75..c21bd447f237 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1323,6 +1323,7 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
>  EXPORT_TRACEPOINT_SYMBOL(kfree);
>  EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
>  
> +#ifdef CONFIG_FAILSLAB
>  int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>  {
>  	if (__should_failslab(s, gfpflags))
> @@ -1330,3 +1331,4 @@ int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>  	return 0;
>  }
>  ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
> +#endif

Like,

--- a/include/linux/fault-inject.h~mm-dont-call-should_failslab-for-config_failslab-fix
+++ a/include/linux/fault-inject.h
@@ -68,6 +68,10 @@ struct kmem_cache;
 int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #else
+static inline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
+{
+	return 0;
+}
 static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
 	return false;
--- a/mm/slab.h~mm-dont-call-should_failslab-for-config_failslab-fix
+++ a/mm/slab.h
@@ -491,10 +491,8 @@ static inline struct kmem_cache *slab_pr
 
 	might_alloc(flags);
 
-#ifdef CONFIG_FAILSLAB
 	if (should_failslab(s, flags))
 		return NULL;
-#endif
 
 	if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
 		return NULL;
Jens Axboe Oct. 5, 2021, 9:20 p.m. UTC | #2
On 10/5/21 3:18 PM, Andrew Morton wrote:
> On Tue, 5 Oct 2021 09:31:43 -0600 Jens Axboe <axboe@kernel.dk> wrote:
> 
>> Allocations can be a very hot path, and this out-of-line function
>> call is noticeable.
>>
>> --- a/include/linux/fault-inject.h
>> +++ b/include/linux/fault-inject.h
>> @@ -64,8 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
>>  
>>  struct kmem_cache;
>>  
>> -int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
>>  #ifdef CONFIG_FAILSLAB
>> +int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
>>  extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
>>  #else
>>  static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>> diff --git a/mm/slab.h b/mm/slab.h
>> index 58c01a34e5b8..92fd6fe01877 100644
>> --- a/mm/slab.h
>> +++ b/mm/slab.h
>> @@ -491,8 +491,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
>>  
>>  	might_alloc(flags);
>>  
>> +#ifdef CONFIG_FAILSLAB
>>  	if (should_failslab(s, flags))
>>  		return NULL;
>> +#endif
> 
> Can we avoid the ifdefs here?
> 
>>  
>>  	if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
>>  		return NULL;
>> diff --git a/mm/slab_common.c b/mm/slab_common.c
>> index ec2bb0beed75..c21bd447f237 100644
>> --- a/mm/slab_common.c
>> +++ b/mm/slab_common.c
>> @@ -1323,6 +1323,7 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
>>  EXPORT_TRACEPOINT_SYMBOL(kfree);
>>  EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
>>  
>> +#ifdef CONFIG_FAILSLAB
>>  int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>>  {
>>  	if (__should_failslab(s, gfpflags))
>> @@ -1330,3 +1331,4 @@ int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>>  	return 0;
>>  }
>>  ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
>> +#endif
> 
> Like,
> 
> --- a/include/linux/fault-inject.h~mm-dont-call-should_failslab-for-config_failslab-fix
> +++ a/include/linux/fault-inject.h
> @@ -68,6 +68,10 @@ struct kmem_cache;
>  int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
>  extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
>  #else
> +static inline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> +{
> +	return 0;
> +}
>  static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>  {
>  	return false;
> --- a/mm/slab.h~mm-dont-call-should_failslab-for-config_failslab-fix
> +++ a/mm/slab.h
> @@ -491,10 +491,8 @@ static inline struct kmem_cache *slab_pr
>  
>  	might_alloc(flags);
>  
> -#ifdef CONFIG_FAILSLAB
>  	if (should_failslab(s, flags))
>  		return NULL;
> -#endif
>  
>  	if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
>  		return NULL;
> _

Yep, that'll work!
Vlastimil Babka Oct. 7, 2021, 3:32 p.m. UTC | #3
On 10/5/21 17:31, Jens Axboe wrote:
> Allocations can be a very hot path, and this out-of-line function
> call is noticeable.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>

It used to be inline b4 (hi, Konstantin!) and then was converted to be like
this intentionally :/

See 4f6923fbb352 ("mm: make should_failslab always available for fault
injection")

And now also kernel/bpf/verifier.c contains:
BTF_ID(func, should_failslab)

I think either your or Andrew's version will break this BTF_ID thing, at the
very least.

But I do strongly agree that putting unconditionally a non-inline call into
slab allocator fastpath sucks. Can we make it so that bpf can only do these
overrides when CONFIG_FAILSLAB is enabled?
I don't know, perhaps putting this BTF_ID() in #ifdef as well, or providing
a dummy that is always available (so that nothing breaks), but doesn't
actually affect slab_pre_alloc_hook() unless CONFIG_FAILSLAB has been enabled?

> ---
> 
> diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
> index e525f6957c49..3128d2c8b3b4 100644
> --- a/include/linux/fault-inject.h
> +++ b/include/linux/fault-inject.h
> @@ -64,8 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name,
>  
>  struct kmem_cache;
>  
> -int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
>  #ifdef CONFIG_FAILSLAB
> +int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
>  extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
>  #else
>  static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> diff --git a/mm/slab.h b/mm/slab.h
> index 58c01a34e5b8..92fd6fe01877 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -491,8 +491,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
>  
>  	might_alloc(flags);
>  
> +#ifdef CONFIG_FAILSLAB
>  	if (should_failslab(s, flags))
>  		return NULL;
> +#endif
>  
>  	if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
>  		return NULL;
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index ec2bb0beed75..c21bd447f237 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1323,6 +1323,7 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
>  EXPORT_TRACEPOINT_SYMBOL(kfree);
>  EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
>  
> +#ifdef CONFIG_FAILSLAB
>  int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>  {
>  	if (__should_failslab(s, gfpflags))
> @@ -1330,3 +1331,4 @@ int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>  	return 0;
>  }
>  ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
> +#endif
>
Jens Axboe Oct. 7, 2021, 3:50 p.m. UTC | #4
On 10/7/21 9:32 AM, Vlastimil Babka wrote:
> On 10/5/21 17:31, Jens Axboe wrote:
>> Allocations can be a very hot path, and this out-of-line function
>> call is noticeable.
>>
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> 
> It used to be inline b4 (hi, Konstantin!) and then was converted to be like
> this intentionally :/
> 
> See 4f6923fbb352 ("mm: make should_failslab always available for fault
> injection")
> 
> And now also kernel/bpf/verifier.c contains:
> BTF_ID(func, should_failslab)
> 
> I think either your or Andrew's version will break this BTF_ID thing, at the
> very least.
> 
> But I do strongly agree that putting unconditionally a non-inline call into
> slab allocator fastpath sucks. Can we make it so that bpf can only do these
> overrides when CONFIG_FAILSLAB is enabled?
> I don't know, perhaps putting this BTF_ID() in #ifdef as well, or providing
> a dummy that is always available (so that nothing breaks), but doesn't
> actually affect slab_pre_alloc_hook() unless CONFIG_FAILSLAB has been enabled?

That seems to be the right approach, limiting it on it actually being enabled
and a function call.
Konstantin Ryabitsev Oct. 7, 2021, 4:36 p.m. UTC | #5
On Thu, Oct 07, 2021 at 05:32:52PM +0200, Vlastimil Babka wrote:
> On 10/5/21 17:31, Jens Axboe wrote:
> > Allocations can be a very hot path, and this out-of-line function
> > call is noticeable.
> > 
> > Signed-off-by: Jens Axboe <axboe@kernel.dk>
> 
> It used to be inline b4 (hi, Konstantin!)

Congratulations, you made me look. :)

-K
Vlastimil Babka May 31, 2024, 9:36 a.m. UTC | #6
On 5/27/24 11:34 AM, Mateusz Guzik wrote:
> +cc Linus
> 
> On Thu, Oct 07, 2021 at 05:32:52PM +0200, Vlastimil Babka wrote:
>> On 10/5/21 17:31, Jens Axboe wrote:
>> > Allocations can be a very hot path, and this out-of-line function
>> > call is noticeable.
>> > 
>> > Signed-off-by: Jens Axboe <axboe@kernel.dk>
>> 
>> It used to be inline b4 (hi, Konstantin!) and then was converted to be like
>> this intentionally :/
>> 
>> See 4f6923fbb352 ("mm: make should_failslab always available for fault
>> injection")
>> 
>> And now also kernel/bpf/verifier.c contains:
>> BTF_ID(func, should_failslab)
>> 
>> I think either your or Andrew's version will break this BTF_ID thing, at the
>> very least.
>> 
>> But I do strongly agree that putting unconditionally a non-inline call into
>> slab allocator fastpath sucks. Can we make it so that bpf can only do these
>> overrides when CONFIG_FAILSLAB is enabled?
>> I don't know, perhaps putting this BTF_ID() in #ifdef as well, or providing
>> a dummy that is always available (so that nothing breaks), but doesn't
>> actually affect slab_pre_alloc_hook() unless CONFIG_FAILSLAB has been enabled?
>> 
> 
> I just ran into it while looking at kmalloc + kfree pair.
> 
> A toy test which calls this in a loop like so:
> static long noinline custom_bench(void)
> {
>         void *buf;
> 
>         while (!signal_pending(current)) {
>                 buf = kmalloc(16, GFP_KERNEL);
>                 kfree(buf);
>                 cond_resched();
>         }
> 
>         return -EINTR;
> }
> 
> ... shows this with perf top:
>    57.88%  [kernel]           [k] kfree
>    31.38%  [kernel]           [k] kmalloc_trace_noprof
>     3.20%  [kernel]           [k] should_failslab.constprop.0
> 
> A side note is that I verified majority of the time in kfree and
> kmalloc_trace_noprof is cmpxchg16b, which is both good and bad news.
> 
> As for should_failslab, it compiles to an empty func on production
> kernels and is present even when there are no supported means of
> instrumenting it. As in everyone pays for its existence, even if there
> is no way to use it.
> 
> Also note there are 3 unrelated mechanisms to alter the return code,
> which imo is 2 too many. But more importantly they are not even
> coordinated.
> 
> A hard requirement for a long term solution is to not alter the fast
> path beyond nops for hot patching.
> 
> So far I think implementing this in a clean manner would require
> agreeing on some namespace for bpf ("failprobes"?) and coordinating
> hotpatching between different mechanisms. Maybe there is a better, I
> don't know.

I've attempted something (not complete yet) here:

https://lore.kernel.org/all/20240531-fault-injection-statickeys-v1-0-a513fd0a9614@suse.cz/

> Here is the crux of my e-mail though:
> 1. turning should_failslab into a mandatory func call is an ok local
>    hack for the test farm, not a viable approach for production
> 2. as such it is up to the original submitter (or whoever else
>    who wants to pick up the slack) to implement something which
>    hotpatches the callsite as opposed to inducing a function call for
>    everyone
> 
> In the meantime the routine should disappear unless explicitly included
> in kernel config. The patch submitted here would be one way to do it.
diff mbox series

Patch

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index e525f6957c49..3128d2c8b3b4 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -64,8 +64,8 @@  static inline struct dentry *fault_create_debugfs_attr(const char *name,
 
 struct kmem_cache;
 
-int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #ifdef CONFIG_FAILSLAB
+int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #else
 static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
diff --git a/mm/slab.h b/mm/slab.h
index 58c01a34e5b8..92fd6fe01877 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -491,8 +491,10 @@  static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
 
 	might_alloc(flags);
 
+#ifdef CONFIG_FAILSLAB
 	if (should_failslab(s, flags))
 		return NULL;
+#endif
 
 	if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags))
 		return NULL;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index ec2bb0beed75..c21bd447f237 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1323,6 +1323,7 @@  EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
 EXPORT_TRACEPOINT_SYMBOL(kfree);
 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
 
+#ifdef CONFIG_FAILSLAB
 int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
 	if (__should_failslab(s, gfpflags))
@@ -1330,3 +1331,4 @@  int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 	return 0;
 }
 ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
+#endif