Message ID | 20221107033109.59709-1-zhengqi.arch@bytedance.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm: fix unexpected changes to {failslab|fail_page_alloc}.attr | expand |
On Mon, Nov 07, 2022 at 11:31:09AM +0800, Qi Zheng wrote: > @@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) > return false; > > if (gfpflags & __GFP_NOWARN) > - failslab.attr.no_warn = true; > + flags |= FAULT_NOWARN; You should add a comment here about why this is required, to avoid deadlocking printk Jason
On 2022/11/7 20:42, Jason Gunthorpe wrote: > On Mon, Nov 07, 2022 at 11:31:09AM +0800, Qi Zheng wrote: > >> @@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) >> return false; >> >> if (gfpflags & __GFP_NOWARN) >> - failslab.attr.no_warn = true; >> + flags |= FAULT_NOWARN; > > You should add a comment here about why this is required, to avoid > deadlocking printk I think this comment should be placed where __GFP_NOWARN is specified instead of here. What do you think? :) Thanks, Qi > > Jason
On Mon, Nov 07, 2022 at 11:05:42PM +0800, Qi Zheng wrote: > > > On 2022/11/7 20:42, Jason Gunthorpe wrote: > > On Mon, Nov 07, 2022 at 11:31:09AM +0800, Qi Zheng wrote: > > > > > @@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) > > > return false; > > > if (gfpflags & __GFP_NOWARN) > > > - failslab.attr.no_warn = true; > > > + flags |= FAULT_NOWARN; > > > > You should add a comment here about why this is required, to avoid > > deadlocking printk > > I think this comment should be placed where __GFP_NOWARN is specified > instead of here. What do you think? :) NOWARN is clear what it does, it is this specifically that is very subtle about avoiding deadlock aginst allocations triggered by printk/etc code. Jason
On 2022/11/8 00:26, Jason Gunthorpe wrote: > On Mon, Nov 07, 2022 at 11:05:42PM +0800, Qi Zheng wrote: >> >> >> On 2022/11/7 20:42, Jason Gunthorpe wrote: >>> On Mon, Nov 07, 2022 at 11:31:09AM +0800, Qi Zheng wrote: >>> >>>> @@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) >>>> return false; >>>> if (gfpflags & __GFP_NOWARN) >>>> - failslab.attr.no_warn = true; >>>> + flags |= FAULT_NOWARN; >>> >>> You should add a comment here about why this is required, to avoid >>> deadlocking printk >> >> I think this comment should be placed where __GFP_NOWARN is specified >> instead of here. What do you think? :) > > NOWARN is clear what it does, it is this specifically that is very > subtle about avoiding deadlock aginst allocations triggered by > printk/etc code. Oh, maybe I understand your concern. Some people may think that this is just a print of fault injection information, not a warning. I'll add a comment explaining why in some cases there must be no printing. Thanks, Qi > > Jason
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f6e25467844..444236dadcf0 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -20,7 +20,6 @@ struct fault_attr { atomic_t space; unsigned long verbose; bool task_filter; - bool no_warn; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end; @@ -32,6 +31,10 @@ struct fault_attr { struct dentry *dname; }; +enum fault_flags { + FAULT_NOWARN = 1 << 0, +}; + #define FAULT_ATTR_INITIALIZER { \ .interval = 1, \ .times = ATOMIC_INIT(1), \ @@ -40,11 +43,11 @@ struct fault_attr { .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ .verbose = 2, \ .dname = NULL, \ - .no_warn = false, \ } #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER int setup_fault_attr(struct fault_attr *attr, char *str); +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 4b8fafce415c..5971f7c3e49e 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr); static void fail_dump(struct fault_attr *attr) { - if (attr->no_warn) - return; - if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, " @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr) * http://www.nongnu.org/failmalloc/ */ -bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { bool stack_checked = false; @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false; fail: - fail_dump(attr); + if (!(flags & FAULT_NOWARN)) + fail_dump(attr); if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times); return true; } +EXPORT_SYMBOL_GPL(should_fail_ex); + +bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return should_fail_ex(attr, size, 0); +} EXPORT_SYMBOL_GPL(should_fail); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/mm/failslab.c b/mm/failslab.c index 58df9789f1d2..fc046f26606c 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -16,6 +16,8 @@ static struct { bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) { + int flags = 0; + /* No fault-injection for bootstrap cache */ if (unlikely(s == kmem_cache)) return false; @@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) return false; if (gfpflags & __GFP_NOWARN) - failslab.attr.no_warn = true; + flags |= FAULT_NOWARN; - return should_fail(&failslab.attr, s->object_size); + return should_fail_ex(&failslab.attr, s->object_size, flags); } static int __init setup_failslab(char *str) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7192ded44ad0..e537d3a950a4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc); static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) { + int flags = 0; + if (order < fail_page_alloc.min_order) return false; if (gfp_mask & __GFP_NOFAIL) @@ -3913,9 +3915,9 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) return false; if (gfp_mask & __GFP_NOWARN) - fail_page_alloc.attr.no_warn = true; + flags |= FAULT_NOWARN; - return should_fail(&fail_page_alloc.attr, 1 << order); + return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags); } #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
When we specify __GFP_NOWARN, we only expect that no warnings will be issued for current caller. But in the __should_failslab() and __should_fail_alloc_page(), the local GFP flags alter the global {failslab|fail_page_alloc}.attr, which is persistent and shared by all tasks. This is not what we expected, let's fix it. Cc: stable@vger.kernel.org Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") Reported-by: Dmitry Vyukov <dvyukov@google.com> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com> --- include/linux/fault-inject.h | 7 +++++-- lib/fault-inject.c | 14 +++++++++----- mm/failslab.c | 6 ++++-- mm/page_alloc.c | 6 ++++-- 4 files changed, 22 insertions(+), 11 deletions(-)