When we specify __GFP_NOWARN, we only expect that no warnings will be issued for current caller. But in the __should_failslab() and __should_fail_alloc_page(), the local GFP flags alter the global {failslab|fail_page_alloc}.attr, which is persistent and shared by all tasks. This is not what we expected, let's fix it.
Cc: stable@vger.kernel.org Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") Reported-by: Dmitry Vyukov dvyukov@google.com Signed-off-by: Qi Zheng zhengqi.arch@bytedance.com --- include/linux/fault-inject.h | 7 +++++-- lib/fault-inject.c | 14 +++++++++----- mm/failslab.c | 6 ++++-- mm/page_alloc.c | 6 ++++-- 4 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f6e25467844..444236dadcf0 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -20,7 +20,6 @@ struct fault_attr { atomic_t space; unsigned long verbose; bool task_filter; - bool no_warn; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end; @@ -32,6 +31,10 @@ struct fault_attr { struct dentry *dname; };
+enum fault_flags { + FAULT_NOWARN = 1 << 0, +}; + #define FAULT_ATTR_INITIALIZER { \ .interval = 1, \ .times = ATOMIC_INIT(1), \ @@ -40,11 +43,11 @@ struct fault_attr { .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ .verbose = 2, \ .dname = NULL, \ - .no_warn = false, \ }
#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER int setup_fault_attr(struct fault_attr *attr, char *str); +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 4b8fafce415c..5971f7c3e49e 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
static void fail_dump(struct fault_attr *attr) { - if (attr->no_warn) - return; - if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, " @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr) * http://www.nongnu.org/failmalloc/ */
-bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { bool stack_checked = false;
@@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false;
fail: - fail_dump(attr); + if (!(flags & FAULT_NOWARN)) + fail_dump(attr);
if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times);
return true; } +EXPORT_SYMBOL_GPL(should_fail_ex); + +bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return should_fail_ex(attr, size, 0); +} EXPORT_SYMBOL_GPL(should_fail);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/mm/failslab.c b/mm/failslab.c index 58df9789f1d2..fc046f26606c 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -16,6 +16,8 @@ static struct {
bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) { + int flags = 0; + /* No fault-injection for bootstrap cache */ if (unlikely(s == kmem_cache)) return false; @@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) return false;
if (gfpflags & __GFP_NOWARN) - failslab.attr.no_warn = true; + flags |= FAULT_NOWARN;
- return should_fail(&failslab.attr, s->object_size); + return should_fail_ex(&failslab.attr, s->object_size, flags); }
static int __init setup_failslab(char *str) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7192ded44ad0..e537d3a950a4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc);
static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) { + int flags = 0; + if (order < fail_page_alloc.min_order) return false; if (gfp_mask & __GFP_NOFAIL) @@ -3913,9 +3915,9 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) return false;
if (gfp_mask & __GFP_NOWARN) - fail_page_alloc.attr.no_warn = true; + flags |= FAULT_NOWARN;
- return should_fail(&fail_page_alloc.attr, 1 << order); + return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags); }
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
On Mon, Nov 07, 2022 at 11:31:09AM +0800, Qi Zheng wrote:
@@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) return false; if (gfpflags & __GFP_NOWARN)
failslab.attr.no_warn = true;
flags |= FAULT_NOWARN;
You should add a comment here about why this is required, to avoid deadlocking printk
Jason
On 2022/11/7 20:42, Jason Gunthorpe wrote:
On Mon, Nov 07, 2022 at 11:31:09AM +0800, Qi Zheng wrote:
@@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) return false; if (gfpflags & __GFP_NOWARN)
failslab.attr.no_warn = true;
flags |= FAULT_NOWARN;
You should add a comment here about why this is required, to avoid deadlocking printk
I think this comment should be placed where __GFP_NOWARN is specified instead of here. What do you think? :)
Thanks, Qi
Jason
On Mon, Nov 07, 2022 at 11:05:42PM +0800, Qi Zheng wrote:
On 2022/11/7 20:42, Jason Gunthorpe wrote:
On Mon, Nov 07, 2022 at 11:31:09AM +0800, Qi Zheng wrote:
@@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) return false; if (gfpflags & __GFP_NOWARN)
failslab.attr.no_warn = true;
flags |= FAULT_NOWARN;
You should add a comment here about why this is required, to avoid deadlocking printk
I think this comment should be placed where __GFP_NOWARN is specified instead of here. What do you think? :)
NOWARN is clear what it does, it is this specifically that is very subtle about avoiding deadlock aginst allocations triggered by printk/etc code.
Jason
On 2022/11/8 00:26, Jason Gunthorpe wrote:
On Mon, Nov 07, 2022 at 11:05:42PM +0800, Qi Zheng wrote:
On 2022/11/7 20:42, Jason Gunthorpe wrote:
On Mon, Nov 07, 2022 at 11:31:09AM +0800, Qi Zheng wrote:
@@ -31,9 +33,9 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) return false; if (gfpflags & __GFP_NOWARN)
failslab.attr.no_warn = true;
flags |= FAULT_NOWARN;
You should add a comment here about why this is required, to avoid deadlocking printk
I think this comment should be placed where __GFP_NOWARN is specified instead of here. What do you think? :)
NOWARN is clear what it does, it is this specifically that is very subtle about avoiding deadlock aginst allocations triggered by printk/etc code.
Oh, maybe I understand your concern. Some people may think that this is just a print of fault injection information, not a warning. I'll add a comment explaining why in some cases there must be no printing.
Thanks, Qi
Jason
When we specify __GFP_NOWARN, we only expect that no warnings will be issued for current caller. But in the __should_failslab() and __should_fail_alloc_page(), the local GFP flags alter the global {failslab|fail_page_alloc}.attr, which is persistent and shared by all tasks. This is not what we expected, let's fix it.
Cc: stable@vger.kernel.org Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") Reported-by: Dmitry Vyukov dvyukov@google.com Signed-off-by: Qi Zheng zhengqi.arch@bytedance.com --- v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.c...
Changelog in v1 -> v2: - add comment for __should_failslab() and __should_fail_alloc_page() (suggested by Jason)
include/linux/fault-inject.h | 7 +++++-- lib/fault-inject.c | 14 +++++++++----- mm/failslab.c | 12 ++++++++++-- mm/page_alloc.c | 7 +++++-- 4 files changed, 29 insertions(+), 11 deletions(-)
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f6e25467844..444236dadcf0 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -20,7 +20,6 @@ struct fault_attr { atomic_t space; unsigned long verbose; bool task_filter; - bool no_warn; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end; @@ -32,6 +31,10 @@ struct fault_attr { struct dentry *dname; };
+enum fault_flags { + FAULT_NOWARN = 1 << 0, +}; + #define FAULT_ATTR_INITIALIZER { \ .interval = 1, \ .times = ATOMIC_INIT(1), \ @@ -40,11 +43,11 @@ struct fault_attr { .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ .verbose = 2, \ .dname = NULL, \ - .no_warn = false, \ }
#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER int setup_fault_attr(struct fault_attr *attr, char *str); +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 4b8fafce415c..5971f7c3e49e 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
static void fail_dump(struct fault_attr *attr) { - if (attr->no_warn) - return; - if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, " @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr) * http://www.nongnu.org/failmalloc/ */
-bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { bool stack_checked = false;
@@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false;
fail: - fail_dump(attr); + if (!(flags & FAULT_NOWARN)) + fail_dump(attr);
if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times);
return true; } +EXPORT_SYMBOL_GPL(should_fail_ex); + +bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return should_fail_ex(attr, size, 0); +} EXPORT_SYMBOL_GPL(should_fail);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/mm/failslab.c b/mm/failslab.c index 58df9789f1d2..ffc420c0e767 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -16,6 +16,8 @@ static struct {
bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) { + int flags = 0; + /* No fault-injection for bootstrap cache */ if (unlikely(s == kmem_cache)) return false; @@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB)) return false;
+ /* + * In some cases, it expects to specify __GFP_NOWARN + * to avoid printing any information(not just a warning), + * thus avoiding deadlocks. See commit 6b9dbedbe349 for + * details. + */ if (gfpflags & __GFP_NOWARN) - failslab.attr.no_warn = true; + flags |= FAULT_NOWARN;
- return should_fail(&failslab.attr, s->object_size); + return should_fail_ex(&failslab.attr, s->object_size, flags); }
static int __init setup_failslab(char *str) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7192ded44ad0..cb6fe715d983 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc);
static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) { + int flags = 0; + if (order < fail_page_alloc.min_order) return false; if (gfp_mask & __GFP_NOFAIL) @@ -3912,10 +3914,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) (gfp_mask & __GFP_DIRECT_RECLAIM)) return false;
+ /* See comment in __should_failslab() */ if (gfp_mask & __GFP_NOWARN) - fail_page_alloc.attr.no_warn = true; + flags |= FAULT_NOWARN;
- return should_fail(&fail_page_alloc.attr, 1 << order); + return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags); }
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
Hi Zheng Qi,
On 2022/11/8 11:52, Qi Zheng wrote:
When we specify __GFP_NOWARN, we only expect that no warnings will be issued for current caller. But in the __should_failslab() and __should_fail_alloc_page(), the local GFP flags alter the global {failslab|fail_page_alloc}.attr, which is persistent and shared by all tasks. This is not what we expected, let's fix it.
Cc: stable@vger.kernel.org Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") Reported-by: Dmitry Vyukov dvyukov@google.com Signed-off-by: Qi Zheng zhengqi.arch@bytedance.com
v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.c...
Changelog in v1 -> v2:
- add comment for __should_failslab() and __should_fail_alloc_page() (suggested by Jason)
include/linux/fault-inject.h | 7 +++++-- lib/fault-inject.c | 14 +++++++++----- mm/failslab.c | 12 ++++++++++-- mm/page_alloc.c | 7 +++++-- 4 files changed, 29 insertions(+), 11 deletions(-)
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f6e25467844..444236dadcf0 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -20,7 +20,6 @@ struct fault_attr { atomic_t space; unsigned long verbose; bool task_filter;
- bool no_warn; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end;
@@ -32,6 +31,10 @@ struct fault_attr { struct dentry *dname; }; +enum fault_flags {
- FAULT_NOWARN = 1 << 0,
+};
#define FAULT_ATTR_INITIALIZER { \ .interval = 1, \ .times = ATOMIC_INIT(1), \ @@ -40,11 +43,11 @@ struct fault_attr { .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ .verbose = 2, \ .dname = NULL, \
.no_warn = false, \
How about keep no_warn attr as it be, and export it to user?
When testing with fault injection, and each fault will print an backtrace. but not all of the testsuit can tell us which one is fault injection message or other is a real warning/crash like syzkaller do.
In my case, to make things simple, we usually used a regex to detect whether wanring/error happend. So we disabled the slab/page fault warning message by default, and only enable it when debug real issue.
Regards,
} #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER int setup_fault_attr(struct fault_attr *attr, char *str); +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 4b8fafce415c..5971f7c3e49e 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr); static void fail_dump(struct fault_attr *attr) {
- if (attr->no_warn)
return;
- if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, "
@@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
*/ -bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { bool stack_checked = false; @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false; fail:
- fail_dump(attr);
- if (!(flags & FAULT_NOWARN))
fail_dump(attr);
if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times); return true; } +EXPORT_SYMBOL_GPL(should_fail_ex);
+bool should_fail(struct fault_attr *attr, ssize_t size) +{
- return should_fail_ex(attr, size, 0);
+} EXPORT_SYMBOL_GPL(should_fail); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/mm/failslab.c b/mm/failslab.c index 58df9789f1d2..ffc420c0e767 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -16,6 +16,8 @@ static struct { bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) {
- int flags = 0;
- /* No fault-injection for bootstrap cache */ if (unlikely(s == kmem_cache)) return false;
@@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB)) return false;
- /*
* In some cases, it expects to specify __GFP_NOWARN
* to avoid printing any information(not just a warning),
* thus avoiding deadlocks. See commit 6b9dbedbe349 for
* details.
if (gfpflags & __GFP_NOWARN)*/
failslab.attr.no_warn = true;
flags |= FAULT_NOWARN;
- return should_fail(&failslab.attr, s->object_size);
- return should_fail_ex(&failslab.attr, s->object_size, flags);
} static int __init setup_failslab(char *str) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7192ded44ad0..cb6fe715d983 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc); static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) {
- int flags = 0;
- if (order < fail_page_alloc.min_order) return false; if (gfp_mask & __GFP_NOFAIL)
@@ -3912,10 +3914,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) (gfp_mask & __GFP_DIRECT_RECLAIM)) return false;
- /* See comment in __should_failslab() */ if (gfp_mask & __GFP_NOWARN)
fail_page_alloc.attr.no_warn = true;
flags |= FAULT_NOWARN;
- return should_fail(&fail_page_alloc.attr, 1 << order);
- return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags);
} #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
On 2022/11/8 16:44, Wei Yongjun wrote:
Hi Zheng Qi,
On 2022/11/8 11:52, Qi Zheng wrote:
When we specify __GFP_NOWARN, we only expect that no warnings will be issued for current caller. But in the __should_failslab() and __should_fail_alloc_page(), the local GFP flags alter the global {failslab|fail_page_alloc}.attr, which is persistent and shared by all tasks. This is not what we expected, let's fix it.
Cc: stable@vger.kernel.org Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") Reported-by: Dmitry Vyukov dvyukov@google.com Signed-off-by: Qi Zheng zhengqi.arch@bytedance.com
v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.c...
Changelog in v1 -> v2:
- add comment for __should_failslab() and __should_fail_alloc_page() (suggested by Jason)
include/linux/fault-inject.h | 7 +++++-- lib/fault-inject.c | 14 +++++++++----- mm/failslab.c | 12 ++++++++++-- mm/page_alloc.c | 7 +++++-- 4 files changed, 29 insertions(+), 11 deletions(-)
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f6e25467844..444236dadcf0 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -20,7 +20,6 @@ struct fault_attr { atomic_t space; unsigned long verbose; bool task_filter;
- bool no_warn; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end;
@@ -32,6 +31,10 @@ struct fault_attr { struct dentry *dname; }; +enum fault_flags {
- FAULT_NOWARN = 1 << 0,
+};
- #define FAULT_ATTR_INITIALIZER { \ .interval = 1, \ .times = ATOMIC_INIT(1), \
@@ -40,11 +43,11 @@ struct fault_attr { .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ .verbose = 2, \ .dname = NULL, \
.no_warn = false, \
How about keep no_warn attr as it be, and export it to user?
When testing with fault injection, and each fault will print an backtrace. but not all of the testsuit can tell us which one is fault injection message or other is a real warning/crash like syzkaller do.
In my case, to make things simple, we usually used a regex to detect whether wanring/error happend. So we disabled the slab/page fault warning message by default, and only enable it when debug real issue.
So you want to set/clear this no_warn attr through the procfs or sysfs interface, so that you can easily disable/enable the slab/page fault warning message from the user mode. Right?
Seems reasonable to me. Anyone else has an opinion on this? If it is really needed, I can do it later.
Thanks, Qi
Regards,
} #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER int setup_fault_attr(struct fault_attr *attr, char *str); +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 4b8fafce415c..5971f7c3e49e 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr); static void fail_dump(struct fault_attr *attr) {
- if (attr->no_warn)
return;
- if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, "
@@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
*/ -bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { bool stack_checked = false; @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false; fail:
- fail_dump(attr);
- if (!(flags & FAULT_NOWARN))
fail_dump(attr);
if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times); return true; } +EXPORT_SYMBOL_GPL(should_fail_ex);
+bool should_fail(struct fault_attr *attr, ssize_t size) +{
- return should_fail_ex(attr, size, 0);
+} EXPORT_SYMBOL_GPL(should_fail); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/mm/failslab.c b/mm/failslab.c index 58df9789f1d2..ffc420c0e767 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -16,6 +16,8 @@ static struct { bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) {
- int flags = 0;
- /* No fault-injection for bootstrap cache */ if (unlikely(s == kmem_cache)) return false;
@@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB)) return false;
- /*
* In some cases, it expects to specify __GFP_NOWARN
* to avoid printing any information(not just a warning),
* thus avoiding deadlocks. See commit 6b9dbedbe349 for
* details.
if (gfpflags & __GFP_NOWARN)*/
failslab.attr.no_warn = true;
flags |= FAULT_NOWARN;
- return should_fail(&failslab.attr, s->object_size);
- return should_fail_ex(&failslab.attr, s->object_size, flags); }
static int __init setup_failslab(char *str) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7192ded44ad0..cb6fe715d983 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc); static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) {
- int flags = 0;
- if (order < fail_page_alloc.min_order) return false; if (gfp_mask & __GFP_NOFAIL)
@@ -3912,10 +3914,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) (gfp_mask & __GFP_DIRECT_RECLAIM)) return false;
- /* See comment in __should_failslab() */ if (gfp_mask & __GFP_NOWARN)
fail_page_alloc.attr.no_warn = true;
flags |= FAULT_NOWARN;
- return should_fail(&fail_page_alloc.attr, 1 << order);
- return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags); }
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
On 2022/11/8 16:58, Qi Zheng wrote:
On 2022/11/8 16:44, Wei Yongjun wrote:
Hi Zheng Qi,
On 2022/11/8 11:52, Qi Zheng wrote:
When we specify __GFP_NOWARN, we only expect that no warnings will be issued for current caller. But in the __should_failslab() and __should_fail_alloc_page(), the local GFP flags alter the global {failslab|fail_page_alloc}.attr, which is persistent and shared by all tasks. This is not what we expected, let's fix it.
Cc: stable@vger.kernel.org Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") Reported-by: Dmitry Vyukov dvyukov@google.com Signed-off-by: Qi Zheng zhengqi.arch@bytedance.com
v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.c...
Changelog in v1 -> v2: - add comment for __should_failslab() and __should_fail_alloc_page() (suggested by Jason)
include/linux/fault-inject.h | 7 +++++-- lib/fault-inject.c | 14 +++++++++----- mm/failslab.c | 12 ++++++++++-- mm/page_alloc.c | 7 +++++-- 4 files changed, 29 insertions(+), 11 deletions(-)
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f6e25467844..444236dadcf0 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -20,7 +20,6 @@ struct fault_attr { atomic_t space; unsigned long verbose; bool task_filter; - bool no_warn; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end; @@ -32,6 +31,10 @@ struct fault_attr { struct dentry *dname; }; +enum fault_flags { + FAULT_NOWARN = 1 << 0, +};
#define FAULT_ATTR_INITIALIZER { \ .interval = 1, \ .times = ATOMIC_INIT(1), \ @@ -40,11 +43,11 @@ struct fault_attr { .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ .verbose = 2, \ .dname = NULL, \ - .no_warn = false, \
How about keep no_warn attr as it be, and export it to user?
When testing with fault injection, and each fault will print an backtrace. but not all of the testsuit can tell us which one is fault injection message or other is a real warning/crash like syzkaller do.
In my case, to make things simple, we usually used a regex to detect whether wanring/error happend. So we disabled the slab/page fault warning message by default, and only enable it when debug real issue.
So you want to set/clear this no_warn attr through the procfs or sysfs interface, so that you can easily disable/enable the slab/page fault warning message from the user mode. Right?
Yes, just like:
echo 1 > /sys/kernel/debug/failslab/no_warn #disable message echo 0 > /sys/kernel/debug/failslab/no_warn #enable message
Regards Wei Yongjun
Seems reasonable to me. Anyone else has an opinion on this? If it is really needed, I can do it later.
Thanks, Qi
Regards,
} #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER int setup_fault_attr(struct fault_attr *attr, char *str); +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 4b8fafce415c..5971f7c3e49e 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr); static void fail_dump(struct fault_attr *attr) { - if (attr->no_warn) - return;
if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, " @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr) * http://www.nongnu.org/failmalloc/ */ -bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { bool stack_checked = false; @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false; fail: - fail_dump(attr); + if (!(flags & FAULT_NOWARN)) + fail_dump(attr); if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times); return true; } +EXPORT_SYMBOL_GPL(should_fail_ex);
+bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return should_fail_ex(attr, size, 0); +} EXPORT_SYMBOL_GPL(should_fail); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/mm/failslab.c b/mm/failslab.c index 58df9789f1d2..ffc420c0e767 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -16,6 +16,8 @@ static struct { bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) { + int flags = 0;
/* No fault-injection for bootstrap cache */ if (unlikely(s == kmem_cache)) return false; @@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB)) return false; + /* + * In some cases, it expects to specify __GFP_NOWARN + * to avoid printing any information(not just a warning), + * thus avoiding deadlocks. See commit 6b9dbedbe349 for + * details. + */ if (gfpflags & __GFP_NOWARN) - failslab.attr.no_warn = true; + flags |= FAULT_NOWARN; - return should_fail(&failslab.attr, s->object_size); + return should_fail_ex(&failslab.attr, s->object_size, flags); } static int __init setup_failslab(char *str) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7192ded44ad0..cb6fe715d983 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc); static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) { + int flags = 0;
if (order < fail_page_alloc.min_order) return false; if (gfp_mask & __GFP_NOFAIL) @@ -3912,10 +3914,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) (gfp_mask & __GFP_DIRECT_RECLAIM)) return false; + /* See comment in __should_failslab() */ if (gfp_mask & __GFP_NOWARN) - fail_page_alloc.attr.no_warn = true; + flags |= FAULT_NOWARN; - return should_fail(&fail_page_alloc.attr, 1 << order); + return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags); } #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
On 2022/11/8 17:32, Wei Yongjun wrote:
On 2022/11/8 16:58, Qi Zheng wrote:
On 2022/11/8 16:44, Wei Yongjun wrote:
Hi Zheng Qi,
On 2022/11/8 11:52, Qi Zheng wrote:
When we specify __GFP_NOWARN, we only expect that no warnings will be issued for current caller. But in the __should_failslab() and __should_fail_alloc_page(), the local GFP flags alter the global {failslab|fail_page_alloc}.attr, which is persistent and shared by all tasks. This is not what we expected, let's fix it.
Cc: stable@vger.kernel.org Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") Reported-by: Dmitry Vyukov dvyukov@google.com Signed-off-by: Qi Zheng zhengqi.arch@bytedance.com
v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.c...
Changelog in v1 -> v2: - add comment for __should_failslab() and __should_fail_alloc_page() (suggested by Jason)
include/linux/fault-inject.h | 7 +++++-- lib/fault-inject.c | 14 +++++++++----- mm/failslab.c | 12 ++++++++++-- mm/page_alloc.c | 7 +++++-- 4 files changed, 29 insertions(+), 11 deletions(-)
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f6e25467844..444236dadcf0 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -20,7 +20,6 @@ struct fault_attr { atomic_t space; unsigned long verbose; bool task_filter; - bool no_warn; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end; @@ -32,6 +31,10 @@ struct fault_attr { struct dentry *dname; }; +enum fault_flags { + FAULT_NOWARN = 1 << 0, +};
#define FAULT_ATTR_INITIALIZER { \ .interval = 1, \ .times = ATOMIC_INIT(1), \ @@ -40,11 +43,11 @@ struct fault_attr { .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ .verbose = 2, \ .dname = NULL, \ - .no_warn = false, \
How about keep no_warn attr as it be, and export it to user?
When testing with fault injection, and each fault will print an backtrace. but not all of the testsuit can tell us which one is fault injection message or other is a real warning/crash like syzkaller do.
In my case, to make things simple, we usually used a regex to detect whether wanring/error happend. So we disabled the slab/page fault warning message by default, and only enable it when debug real issue.
So you want to set/clear this no_warn attr through the procfs or sysfs interface, so that you can easily disable/enable the slab/page fault warning message from the user mode. Right?
Yes, just like:
echo 1 > /sys/kernel/debug/failslab/no_warn #disable message echo 0 > /sys/kernel/debug/failslab/no_warn #enable message
Got it. Let's wait for the other people's comments and suggestions. :)
Regards Wei Yongjun
Seems reasonable to me. Anyone else has an opinion on this? If it is really needed, I can do it later.
Thanks, Qi
Regards,
} #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER int setup_fault_attr(struct fault_attr *attr, char *str); +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 4b8fafce415c..5971f7c3e49e 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr); static void fail_dump(struct fault_attr *attr) { - if (attr->no_warn) - return;
if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, " @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr) * http://www.nongnu.org/failmalloc/ */ -bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { bool stack_checked = false; @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false; fail: - fail_dump(attr); + if (!(flags & FAULT_NOWARN)) + fail_dump(attr); if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times); return true; } +EXPORT_SYMBOL_GPL(should_fail_ex);
+bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return should_fail_ex(attr, size, 0); +} EXPORT_SYMBOL_GPL(should_fail); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/mm/failslab.c b/mm/failslab.c index 58df9789f1d2..ffc420c0e767 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -16,6 +16,8 @@ static struct { bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) { + int flags = 0;
/* No fault-injection for bootstrap cache */ if (unlikely(s == kmem_cache)) return false; @@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB)) return false; + /* + * In some cases, it expects to specify __GFP_NOWARN + * to avoid printing any information(not just a warning), + * thus avoiding deadlocks. See commit 6b9dbedbe349 for + * details. + */ if (gfpflags & __GFP_NOWARN) - failslab.attr.no_warn = true; + flags |= FAULT_NOWARN; - return should_fail(&failslab.attr, s->object_size); + return should_fail_ex(&failslab.attr, s->object_size, flags); } static int __init setup_failslab(char *str) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7192ded44ad0..cb6fe715d983 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3902,6 +3902,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc); static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) { + int flags = 0;
if (order < fail_page_alloc.min_order) return false; if (gfp_mask & __GFP_NOFAIL) @@ -3912,10 +3914,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) (gfp_mask & __GFP_DIRECT_RECLAIM)) return false; + /* See comment in __should_failslab() */ if (gfp_mask & __GFP_NOWARN) - fail_page_alloc.attr.no_warn = true; + flags |= FAULT_NOWARN; - return should_fail(&fail_page_alloc.attr, 1 << order); + return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags); } #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
On Tue, Nov 08, 2022 at 05:32:52PM +0800, Wei Yongjun wrote:
So you want to set/clear this no_warn attr through the procfs or sysfs interface, so that you can easily disable/enable the slab/page fault warning message from the user mode. Right?
Yes, just like:
echo 1 > /sys/kernel/debug/failslab/no_warn #disable message echo 0 > /sys/kernel/debug/failslab/no_warn #enable message
You can already do that:
echo 0 > /sys/kernel/debug/failslab/verbose #disable message
Jason
On 2022/11/8 20:04, Jason Gunthorpe wrote:
On Tue, Nov 08, 2022 at 05:32:52PM +0800, Wei Yongjun wrote:
So you want to set/clear this no_warn attr through the procfs or sysfs interface, so that you can easily disable/enable the slab/page fault warning message from the user mode. Right?
Yes, just like:
echo 1 > /sys/kernel/debug/failslab/no_warn #disable message echo 0 > /sys/kernel/debug/failslab/no_warn #enable message
You can already do that:
echo 0 > /sys/kernel/debug/failslab/verbose #disable message
Got it, thanks.
Wei Yongjun
2022年11月8日(火) 12:52 Qi Zheng zhengqi.arch@bytedance.com:
When we specify __GFP_NOWARN, we only expect that no warnings will be issued for current caller. But in the __should_failslab() and __should_fail_alloc_page(), the local GFP flags alter the global {failslab|fail_page_alloc}.attr, which is persistent and shared by all tasks. This is not what we expected, let's fix it.
Cc: stable@vger.kernel.org Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") Reported-by: Dmitry Vyukov dvyukov@google.com Signed-off-by: Qi Zheng zhengqi.arch@bytedance.com
v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.c...
Changelog in v1 -> v2:
- add comment for __should_failslab() and __should_fail_alloc_page() (suggested by Jason)
Looks good.
Reviewed-by: Akinobu Mita akinobu.mita@gmail.com
On 2022/11/9 01:36, Akinobu Mita wrote:
2022年11月8日(火) 12:52 Qi Zheng zhengqi.arch@bytedance.com:
When we specify __GFP_NOWARN, we only expect that no warnings will be issued for current caller. But in the __should_failslab() and __should_fail_alloc_page(), the local GFP flags alter the global {failslab|fail_page_alloc}.attr, which is persistent and shared by all tasks. This is not what we expected, let's fix it.
Cc: stable@vger.kernel.org Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") Reported-by: Dmitry Vyukov dvyukov@google.com Signed-off-by: Qi Zheng zhengqi.arch@bytedance.com
v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.c...
Changelog in v1 -> v2:
- add comment for __should_failslab() and __should_fail_alloc_page() (suggested by Jason)
Looks good.
Reviewed-by: Akinobu Mita akinobu.mita@gmail.com
Thanks. And hi Andrew, seems no action left for me, can this patch be applied to mm-unstable tree for testing? :)
linux-stable-mirror@lists.linaro.org