When we specify __GFP_NOWARN, we only expect that no warnings will be issued for current caller. But in the __should_failslab() and __should_fail_alloc_page(), the local GFP flags alter the global {failslab|fail_page_alloc}.attr, which is persistent and shared by all tasks. This is not what we expected, let's fix it.
Cc: stable@vger.kernel.org Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") Reported-by: Dmitry Vyukov dvyukov@google.com Signed-off-by: Qi Zheng zhengqi.arch@bytedance.com Reviewed-by: Akinobu Mita akinobu.mita@gmail.com --- v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.c... v2: https://lore.kernel.org/lkml/20221108035232.87180-1-zhengqi.arch@bytedance.c...
Changelog in v2 -> v3: - collect Reviewed-by - rebase onto the next-20221118
Changelog in v1 -> v2: - add comment for __should_failslab() and __should_fail_alloc_page() (suggested by Jason)
include/linux/fault-inject.h | 7 +++++-- lib/fault-inject.c | 14 +++++++++----- mm/failslab.c | 12 ++++++++++-- mm/page_alloc.c | 7 +++++-- 4 files changed, 29 insertions(+), 11 deletions(-)
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 9f6e25467844..444236dadcf0 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -20,7 +20,6 @@ struct fault_attr { atomic_t space; unsigned long verbose; bool task_filter; - bool no_warn; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end; @@ -32,6 +31,10 @@ struct fault_attr { struct dentry *dname; };
+enum fault_flags { + FAULT_NOWARN = 1 << 0, +}; + #define FAULT_ATTR_INITIALIZER { \ .interval = 1, \ .times = ATOMIC_INIT(1), \ @@ -40,11 +43,11 @@ struct fault_attr { .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \ .verbose = 2, \ .dname = NULL, \ - .no_warn = false, \ }
#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER int setup_fault_attr(struct fault_attr *attr, char *str); +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags); bool should_fail(struct fault_attr *attr, ssize_t size);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 4b8fafce415c..5971f7c3e49e 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -41,9 +41,6 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
static void fail_dump(struct fault_attr *attr) { - if (attr->no_warn) - return; - if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) { printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n" "name %pd, interval %lu, probability %lu, " @@ -103,7 +100,7 @@ static inline bool fail_stacktrace(struct fault_attr *attr) * http://www.nongnu.org/failmalloc/ */
-bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { bool stack_checked = false;
@@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false;
fail: - fail_dump(attr); + if (!(flags & FAULT_NOWARN)) + fail_dump(attr);
if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times);
return true; } +EXPORT_SYMBOL_GPL(should_fail_ex); + +bool should_fail(struct fault_attr *attr, ssize_t size) +{ + return should_fail_ex(attr, size, 0); +} EXPORT_SYMBOL_GPL(should_fail);
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS diff --git a/mm/failslab.c b/mm/failslab.c index 58df9789f1d2..ffc420c0e767 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -16,6 +16,8 @@ static struct {
bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) { + int flags = 0; + /* No fault-injection for bootstrap cache */ if (unlikely(s == kmem_cache)) return false; @@ -30,10 +32,16 @@ bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) if (failslab.cache_filter && !(s->flags & SLAB_FAILSLAB)) return false;
+ /* + * In some cases, it expects to specify __GFP_NOWARN + * to avoid printing any information(not just a warning), + * thus avoiding deadlocks. See commit 6b9dbedbe349 for + * details. + */ if (gfpflags & __GFP_NOWARN) - failslab.attr.no_warn = true; + flags |= FAULT_NOWARN;
- return should_fail(&failslab.attr, s->object_size); + return should_fail_ex(&failslab.attr, s->object_size, flags); }
static int __init setup_failslab(char *str) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f7a63684e6c4..baf97166172c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3909,6 +3909,8 @@ __setup("fail_page_alloc=", setup_fail_page_alloc);
static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) { + int flags = 0; + if (order < fail_page_alloc.min_order) return false; if (gfp_mask & __GFP_NOFAIL) @@ -3919,10 +3921,11 @@ static bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) (gfp_mask & __GFP_DIRECT_RECLAIM)) return false;
+ /* See comment in __should_failslab() */ if (gfp_mask & __GFP_NOWARN) - fail_page_alloc.attr.no_warn = true; + flags |= FAULT_NOWARN;
- return should_fail(&fail_page_alloc.attr, 1 << order); + return should_fail_ex(&fail_page_alloc.attr, 1 << order, flags); }
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
On Fri, Nov 18, 2022 at 06:00:11PM +0800, Qi Zheng wrote:
When we specify __GFP_NOWARN, we only expect that no warnings will be issued for current caller. But in the __should_failslab() and __should_fail_alloc_page(), the local GFP flags alter the global {failslab|fail_page_alloc}.attr, which is persistent and shared by all tasks. This is not what we expected, let's fix it.
Cc: stable@vger.kernel.org Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") Reported-by: Dmitry Vyukov dvyukov@google.com Signed-off-by: Qi Zheng zhengqi.arch@bytedance.com Reviewed-by: Akinobu Mita akinobu.mita@gmail.com
v1: https://lore.kernel.org/lkml/20221107033109.59709-1-zhengqi.arch@bytedance.c... v2: https://lore.kernel.org/lkml/20221108035232.87180-1-zhengqi.arch@bytedance.c...
Reviewed-by: Jason Gunthorpe jgg@nvidia.com
Jason
On Fri, 18 Nov 2022 18:00:11 +0800 Qi Zheng zhengqi.arch@bytedance.com wrote:
When we specify __GFP_NOWARN, we only expect that no warnings will be issued for current caller. But in the __should_failslab() and __should_fail_alloc_page(), the local GFP flags alter the global {failslab|fail_page_alloc}.attr, which is persistent and shared by all tasks. This is not what we expected, let's fix it.
Cc: stable@vger.kernel.org Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") Reported-by: Dmitry Vyukov dvyukov@google.com Signed-off-by: Qi Zheng zhengqi.arch@bytedance.com Reviewed-by: Akinobu Mita akinobu.mita@gmail.com
...
-bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { bool stack_checked = false; @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false; fail:
- fail_dump(attr);
- if (!(flags & FAULT_NOWARN))
fail_dump(attr);
if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times); return true; } +EXPORT_SYMBOL_GPL(should_fail_ex);
I don't see a need to export this?
On 2022/11/19 05:42, Andrew Morton wrote:
On Fri, 18 Nov 2022 18:00:11 +0800 Qi Zheng zhengqi.arch@bytedance.com wrote:
When we specify __GFP_NOWARN, we only expect that no warnings will be issued for current caller. But in the __should_failslab() and __should_fail_alloc_page(), the local GFP flags alter the global {failslab|fail_page_alloc}.attr, which is persistent and shared by all tasks. This is not what we expected, let's fix it.
Cc: stable@vger.kernel.org Fixes: 3f913fc5f974 ("mm: fix missing handler for __GFP_NOWARN") Reported-by: Dmitry Vyukov dvyukov@google.com Signed-off-by: Qi Zheng zhengqi.arch@bytedance.com Reviewed-by: Akinobu Mita akinobu.mita@gmail.com
...
-bool should_fail(struct fault_attr *attr, ssize_t size) +bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags) { bool stack_checked = false; @@ -152,13 +149,20 @@ bool should_fail(struct fault_attr *attr, ssize_t size) return false; fail:
- fail_dump(attr);
- if (!(flags & FAULT_NOWARN))
fail_dump(attr);
if (atomic_read(&attr->times) != -1) atomic_dec_not_zero(&attr->times); return true; } +EXPORT_SYMBOL_GPL(should_fail_ex);
I don't see a need to export this?
Yes, my initial thought was that there might be a driver using this function, but there really isn't one yet.
And I see you've helped remove this, thanks a lot. :)
linux-stable-mirror@lists.linaro.org