Message ID | e01e5e40-692a-519c-4cba-e3331f173c82@kernel.dk (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm: don't call should_failslab() for !CONFIG_FAILSLAB | expand |
On Tue, 5 Oct 2021 09:31:43 -0600 Jens Axboe <axboe@kernel.dk> wrote: > Allocations can be a very hot path, and this out-of-line function > call is noticeable. > > --- a/include/linux/fault-inject.h > +++ b/include/linux/fault-inject.h > @@ -64,8 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name, > > struct kmem_cache; > > -int should_failslab(struct kmem_cache *s, gfp_t gfpflags); > #ifdef CONFIG_FAILSLAB > +int should_failslab(struct kmem_cache *s, gfp_t gfpflags); > extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags); > #else > static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) > diff --git a/mm/slab.h b/mm/slab.h > index 58c01a34e5b8..92fd6fe01877 100644 > --- a/mm/slab.h > +++ b/mm/slab.h > @@ -491,8 +491,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, > > might_alloc(flags); > > +#ifdef CONFIG_FAILSLAB > if (should_failslab(s, flags)) > return NULL; > +#endif Can we avoid the ifdefs here? > > if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags)) > return NULL; > diff --git a/mm/slab_common.c b/mm/slab_common.c > index ec2bb0beed75..c21bd447f237 100644 > --- a/mm/slab_common.c > +++ b/mm/slab_common.c > @@ -1323,6 +1323,7 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node); > EXPORT_TRACEPOINT_SYMBOL(kfree); > EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); > > +#ifdef CONFIG_FAILSLAB > int should_failslab(struct kmem_cache *s, gfp_t gfpflags) > { > if (__should_failslab(s, gfpflags)) > @@ -1330,3 +1331,4 @@ int should_failslab(struct kmem_cache *s, gfp_t gfpflags) > return 0; > } > ALLOW_ERROR_INJECTION(should_failslab, ERRNO); > +#endif Like, --- a/include/linux/fault-inject.h~mm-dont-call-should_failslab-for-config_failslab-fix +++ a/include/linux/fault-inject.h @@ -68,6 +68,10 @@ struct kmem_cache; int should_failslab(struct kmem_cache *s, gfp_t gfpflags); extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags); #else +static inline int should_failslab(struct kmem_cache *s, gfp_t gfpflags) +{ + return 0; +} static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) { return false; --- a/mm/slab.h~mm-dont-call-should_failslab-for-config_failslab-fix +++ a/mm/slab.h @@ -491,10 +491,8 @@ static inline struct kmem_cache *slab_pr might_alloc(flags); -#ifdef CONFIG_FAILSLAB if (should_failslab(s, flags)) return NULL; -#endif if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags)) return NULL;
On 10/5/21 3:18 PM, Andrew Morton wrote: > On Tue, 5 Oct 2021 09:31:43 -0600 Jens Axboe <axboe@kernel.dk> wrote: > >> Allocations can be a very hot path, and this out-of-line function >> call is noticeable. >> >> --- a/include/linux/fault-inject.h >> +++ b/include/linux/fault-inject.h >> @@ -64,8 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name, >> >> struct kmem_cache; >> >> -int should_failslab(struct kmem_cache *s, gfp_t gfpflags); >> #ifdef CONFIG_FAILSLAB >> +int should_failslab(struct kmem_cache *s, gfp_t gfpflags); >> extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags); >> #else >> static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) >> diff --git a/mm/slab.h b/mm/slab.h >> index 58c01a34e5b8..92fd6fe01877 100644 >> --- a/mm/slab.h >> +++ b/mm/slab.h >> @@ -491,8 +491,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, >> >> might_alloc(flags); >> >> +#ifdef CONFIG_FAILSLAB >> if (should_failslab(s, flags)) >> return NULL; >> +#endif > > Can we avoid the ifdefs here? > >> >> if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags)) >> return NULL; >> diff --git a/mm/slab_common.c b/mm/slab_common.c >> index ec2bb0beed75..c21bd447f237 100644 >> --- a/mm/slab_common.c >> +++ b/mm/slab_common.c >> @@ -1323,6 +1323,7 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node); >> EXPORT_TRACEPOINT_SYMBOL(kfree); >> EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); >> >> +#ifdef CONFIG_FAILSLAB >> int should_failslab(struct kmem_cache *s, gfp_t gfpflags) >> { >> if (__should_failslab(s, gfpflags)) >> @@ -1330,3 +1331,4 @@ int should_failslab(struct kmem_cache *s, gfp_t gfpflags) >> return 0; >> } >> ALLOW_ERROR_INJECTION(should_failslab, ERRNO); >> +#endif > > Like, > > --- a/include/linux/fault-inject.h~mm-dont-call-should_failslab-for-config_failslab-fix > +++ a/include/linux/fault-inject.h > @@ -68,6 +68,10 @@ struct kmem_cache; > int should_failslab(struct kmem_cache *s, gfp_t gfpflags); > extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags); > #else > +static inline int should_failslab(struct kmem_cache *s, gfp_t gfpflags) > +{ > + return 0; > +} > static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) > { > return false; > --- a/mm/slab.h~mm-dont-call-should_failslab-for-config_failslab-fix > +++ a/mm/slab.h > @@ -491,10 +491,8 @@ static inline struct kmem_cache *slab_pr > > might_alloc(flags); > > -#ifdef CONFIG_FAILSLAB > if (should_failslab(s, flags)) > return NULL; > -#endif > > if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags)) > return NULL; > _ Yep, that'll work!
On 10/5/21 17:31, Jens Axboe wrote: > Allocations can be a very hot path, and this out-of-line function > call is noticeable. > > Signed-off-by: Jens Axboe <axboe@kernel.dk> It used to be inline b4 (hi, Konstantin!) and then was converted to be like this intentionally :/ See 4f6923fbb352 ("mm: make should_failslab always available for fault injection") And now also kernel/bpf/verifier.c contains: BTF_ID(func, should_failslab) I think either your or Andrew's version will break this BTF_ID thing, at the very least. But I do strongly agree that putting unconditionally a non-inline call into slab allocator fastpath sucks. Can we make it so that bpf can only do these overrides when CONFIG_FAILSLAB is enabled? I don't know, perhaps putting this BTF_ID() in #ifdef as well, or providing a dummy that is always available (so that nothing breaks), but doesn't actually affect slab_pre_alloc_hook() unless CONFIG_FAILSLAB has been enabled? > --- > > diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h > index e525f6957c49..3128d2c8b3b4 100644 > --- a/include/linux/fault-inject.h > +++ b/include/linux/fault-inject.h > @@ -64,8 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name, > > struct kmem_cache; > > -int should_failslab(struct kmem_cache *s, gfp_t gfpflags); > #ifdef CONFIG_FAILSLAB > +int should_failslab(struct kmem_cache *s, gfp_t gfpflags); > extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags); > #else > static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) > diff --git a/mm/slab.h b/mm/slab.h > index 58c01a34e5b8..92fd6fe01877 100644 > --- a/mm/slab.h > +++ b/mm/slab.h > @@ -491,8 +491,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, > > might_alloc(flags); > > +#ifdef CONFIG_FAILSLAB > if (should_failslab(s, flags)) > return NULL; > +#endif > > if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags)) > return NULL; > diff --git a/mm/slab_common.c b/mm/slab_common.c > index ec2bb0beed75..c21bd447f237 100644 > --- a/mm/slab_common.c > +++ b/mm/slab_common.c > @@ -1323,6 +1323,7 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node); > EXPORT_TRACEPOINT_SYMBOL(kfree); > EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); > > +#ifdef CONFIG_FAILSLAB > int should_failslab(struct kmem_cache *s, gfp_t gfpflags) > { > if (__should_failslab(s, gfpflags)) > @@ -1330,3 +1331,4 @@ int should_failslab(struct kmem_cache *s, gfp_t gfpflags) > return 0; > } > ALLOW_ERROR_INJECTION(should_failslab, ERRNO); > +#endif >
On 10/7/21 9:32 AM, Vlastimil Babka wrote: > On 10/5/21 17:31, Jens Axboe wrote: >> Allocations can be a very hot path, and this out-of-line function >> call is noticeable. >> >> Signed-off-by: Jens Axboe <axboe@kernel.dk> > > It used to be inline b4 (hi, Konstantin!) and then was converted to be like > this intentionally :/ > > See 4f6923fbb352 ("mm: make should_failslab always available for fault > injection") > > And now also kernel/bpf/verifier.c contains: > BTF_ID(func, should_failslab) > > I think either your or Andrew's version will break this BTF_ID thing, at the > very least. > > But I do strongly agree that putting unconditionally a non-inline call into > slab allocator fastpath sucks. Can we make it so that bpf can only do these > overrides when CONFIG_FAILSLAB is enabled? > I don't know, perhaps putting this BTF_ID() in #ifdef as well, or providing > a dummy that is always available (so that nothing breaks), but doesn't > actually affect slab_pre_alloc_hook() unless CONFIG_FAILSLAB has been enabled? That seems to be the right approach, limiting it on it actually being enabled and a function call.
On Thu, Oct 07, 2021 at 05:32:52PM +0200, Vlastimil Babka wrote: > On 10/5/21 17:31, Jens Axboe wrote: > > Allocations can be a very hot path, and this out-of-line function > > call is noticeable. > > > > Signed-off-by: Jens Axboe <axboe@kernel.dk> > > It used to be inline b4 (hi, Konstantin!) Congratulations, you made me look. :) -K
On 5/27/24 11:34 AM, Mateusz Guzik wrote: > +cc Linus > > On Thu, Oct 07, 2021 at 05:32:52PM +0200, Vlastimil Babka wrote: >> On 10/5/21 17:31, Jens Axboe wrote: >> > Allocations can be a very hot path, and this out-of-line function >> > call is noticeable. >> > >> > Signed-off-by: Jens Axboe <axboe@kernel.dk> >> >> It used to be inline b4 (hi, Konstantin!) and then was converted to be like >> this intentionally :/ >> >> See 4f6923fbb352 ("mm: make should_failslab always available for fault >> injection") >> >> And now also kernel/bpf/verifier.c contains: >> BTF_ID(func, should_failslab) >> >> I think either your or Andrew's version will break this BTF_ID thing, at the >> very least. >> >> But I do strongly agree that putting unconditionally a non-inline call into >> slab allocator fastpath sucks. Can we make it so that bpf can only do these >> overrides when CONFIG_FAILSLAB is enabled? >> I don't know, perhaps putting this BTF_ID() in #ifdef as well, or providing >> a dummy that is always available (so that nothing breaks), but doesn't >> actually affect slab_pre_alloc_hook() unless CONFIG_FAILSLAB has been enabled? >> > > I just ran into it while looking at kmalloc + kfree pair. > > A toy test which calls this in a loop like so: > static long noinline custom_bench(void) > { > void *buf; > > while (!signal_pending(current)) { > buf = kmalloc(16, GFP_KERNEL); > kfree(buf); > cond_resched(); > } > > return -EINTR; > } > > ... shows this with perf top: > 57.88% [kernel] [k] kfree > 31.38% [kernel] [k] kmalloc_trace_noprof > 3.20% [kernel] [k] should_failslab.constprop.0 > > A side note is that I verified majority of the time in kfree and > kmalloc_trace_noprof is cmpxchg16b, which is both good and bad news. > > As for should_failslab, it compiles to an empty func on production > kernels and is present even when there are no supported means of > instrumenting it. As in everyone pays for its existence, even if there > is no way to use it. > > Also note there are 3 unrelated mechanisms to alter the return code, > which imo is 2 too many. But more importantly they are not even > coordinated. > > A hard requirement for a long term solution is to not alter the fast > path beyond nops for hot patching. > > So far I think implementing this in a clean manner would require > agreeing on some namespace for bpf ("failprobes"?) and coordinating > hotpatching between different mechanisms. Maybe there is a better, I > don't know. I've attempted something (not complete yet) here: https://lore.kernel.org/all/20240531-fault-injection-statickeys-v1-0-a513fd0a9614@suse.cz/ > Here is the crux of my e-mail though: > 1. turning should_failslab into a mandatory func call is an ok local > hack for the test farm, not a viable approach for production > 2. as such it is up to the original submitter (or whoever else > who wants to pick up the slack) to implement something which > hotpatches the callsite as opposed to inducing a function call for > everyone > > In the meantime the routine should disappear unless explicitly included > in kernel config. The patch submitted here would be one way to do it.
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index e525f6957c49..3128d2c8b3b4 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -64,8 +64,8 @@ static inline struct dentry *fault_create_debugfs_attr(const char *name, struct kmem_cache; -int should_failslab(struct kmem_cache *s, gfp_t gfpflags); #ifdef CONFIG_FAILSLAB +int should_failslab(struct kmem_cache *s, gfp_t gfpflags); extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags); #else static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) diff --git a/mm/slab.h b/mm/slab.h index 58c01a34e5b8..92fd6fe01877 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -491,8 +491,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, might_alloc(flags); +#ifdef CONFIG_FAILSLAB if (should_failslab(s, flags)) return NULL; +#endif if (!memcg_slab_pre_alloc_hook(s, objcgp, size, flags)) return NULL; diff --git a/mm/slab_common.c b/mm/slab_common.c index ec2bb0beed75..c21bd447f237 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1323,6 +1323,7 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node); EXPORT_TRACEPOINT_SYMBOL(kfree); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); +#ifdef CONFIG_FAILSLAB int should_failslab(struct kmem_cache *s, gfp_t gfpflags) { if (__should_failslab(s, gfpflags)) @@ -1330,3 +1331,4 @@ int should_failslab(struct kmem_cache *s, gfp_t gfpflags) return 0; } ALLOW_ERROR_INJECTION(should_failslab, ERRNO); +#endif
Allocations can be a very hot path, and this out-of-line function call is noticeable. Signed-off-by: Jens Axboe <axboe@kernel.dk> ---