The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023112421-hunchback-jinx-7a88@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
ebf6cb7c6e12 ("ext4: no need to generate from free list in mballoc")
ad635507b5b2 ("ext4: remove unnecessary return for void function")
5354b2af3406 ("ext4: allow ext4_get_group_info() to fail")
fa08a7b61dff ("ext4: fix WARNING in mb_find_extent")
01e4ca294517 ("ext4: allow to find by goal if EXT4_MB_HINT_GOAL_ONLY is set")
123e3016ee9b ("ext4: rename ext4_set_bits to mb_set_bits")
8ac3939db99f ("ext4: refactor ext4_free_blocks() to pull out ext4_mb_clear_bb()")
bfdc502a4a4c ("ext4: fix ext4_mb_mark_bb() with flex_bg with fast_commit")
a5c0e2fdf7ce ("ext4: correct cluster len and clusters changed accounting in ext4_mb_mark_bb")
196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning")
4b68f6df1059 ("ext4: add MB_NUM_ORDERS macro")
a6c75eaf1103 ("ext4: add mballoc stats proc file")
b237e3044450 ("ext4: add ability to return parsed options from parse_options")
67d251860461 ("ext4: drop s_mb_bal_lock and convert protected fields to atomic")
a72b38eebea4 ("ext4: handle dax mount option collision")
99c880decf27 ("ext4: cleanup fast commit mount options")
0f0672ffb61a ("ext4: add a mount opt to forcefully turn fast commits on")
8016e29f4362 ("ext4: fast commit recovery path")
5b849b5f96b4 ("jbd2: fast commit recovery path")
aa75f4d3daae ("ext4: main fast-commit commit path")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88 Mon Sep 17 00:00:00 2001
From: Wang Jianjian <wangjianjian0(a)foxmail.com>
Date: Thu, 24 Aug 2023 23:56:31 +0800
Subject: [PATCH] ext4: no need to generate from free list in mballoc
Commit 7a2fcbf7f85 ("ext4: don't use blocks freed but not yet committed in
buddy cache init") added a code to mark as used blocks in the list of not yet
committed freed blocks during initialization of a buddy page. However
ext4_mb_free_metadata() makes sure buddy page is already loaded and takes a
reference to it so it cannot happen that ext4_mb_init_cache() is called
when efd list is non-empty. Just remove the
ext4_mb_generate_from_freelist() call.
Fixes: 7a2fcbf7f85('ext4: don't use blocks freed but not yet committed in buddy cache init')
Signed-off-by: Wang Jianjian <wangjianjian0(a)foxmail.com>
Link: https://lore.kernel.org/r/tencent_53CBCB1668358AE862684E453DF37B722008@qq.c…
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)kernel.org
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1d65c738c4c9..6e304c18d390 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -417,8 +417,6 @@ static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group);
static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
@@ -1361,17 +1359,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
* We place the buddy block and bitmap block
* close together
*/
+ grinfo = ext4_get_group_info(sb, group);
+ if (!grinfo) {
+ err = -EFSCORRUPTED;
+ goto out;
+ }
if ((first_block + i) & 1) {
/* this is block of buddy */
BUG_ON(incore == NULL);
mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
group, page->index, i * blocksize);
trace_ext4_mb_buddy_bitmap_load(sb, group);
- grinfo = ext4_get_group_info(sb, group);
- if (!grinfo) {
- err = -EFSCORRUPTED;
- goto out;
- }
grinfo->bb_fragments = 0;
memset(grinfo->bb_counters, 0,
sizeof(*grinfo->bb_counters) *
@@ -1398,7 +1396,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
/* mark all preallocated blks used in in-core bitmap */
ext4_mb_generate_from_pa(sb, data, group);
- ext4_mb_generate_from_freelist(sb, data, group);
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&grinfo->bb_free_root));
ext4_unlock_group(sb, group);
/* set incore so that the buddy information can be
@@ -4950,31 +4948,6 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
return false;
}
-/*
- * the function goes through all block freed in the group
- * but not yet committed and marks them used in in-core bitmap.
- * buddy must be generated from this bitmap
- * Need to be called with the ext4 group lock held
- */
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group)
-{
- struct rb_node *n;
- struct ext4_group_info *grp;
- struct ext4_free_data *entry;
-
- grp = ext4_get_group_info(sb, group);
- if (!grp)
- return;
- n = rb_first(&(grp->bb_free_root));
-
- while (n) {
- entry = rb_entry(n, struct ext4_free_data, efd_node);
- mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
- n = rb_next(n);
- }
-}
-
/*
* the function goes through all preallocation in this group and marks them
* used in in-core bitmap. buddy must be generated from this bitmap
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023112420-mumbling-backlog-4117@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
ebf6cb7c6e12 ("ext4: no need to generate from free list in mballoc")
ad635507b5b2 ("ext4: remove unnecessary return for void function")
5354b2af3406 ("ext4: allow ext4_get_group_info() to fail")
fa08a7b61dff ("ext4: fix WARNING in mb_find_extent")
01e4ca294517 ("ext4: allow to find by goal if EXT4_MB_HINT_GOAL_ONLY is set")
123e3016ee9b ("ext4: rename ext4_set_bits to mb_set_bits")
8ac3939db99f ("ext4: refactor ext4_free_blocks() to pull out ext4_mb_clear_bb()")
bfdc502a4a4c ("ext4: fix ext4_mb_mark_bb() with flex_bg with fast_commit")
a5c0e2fdf7ce ("ext4: correct cluster len and clusters changed accounting in ext4_mb_mark_bb")
196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning")
4b68f6df1059 ("ext4: add MB_NUM_ORDERS macro")
a6c75eaf1103 ("ext4: add mballoc stats proc file")
b237e3044450 ("ext4: add ability to return parsed options from parse_options")
67d251860461 ("ext4: drop s_mb_bal_lock and convert protected fields to atomic")
a72b38eebea4 ("ext4: handle dax mount option collision")
99c880decf27 ("ext4: cleanup fast commit mount options")
0f0672ffb61a ("ext4: add a mount opt to forcefully turn fast commits on")
8016e29f4362 ("ext4: fast commit recovery path")
5b849b5f96b4 ("jbd2: fast commit recovery path")
aa75f4d3daae ("ext4: main fast-commit commit path")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88 Mon Sep 17 00:00:00 2001
From: Wang Jianjian <wangjianjian0(a)foxmail.com>
Date: Thu, 24 Aug 2023 23:56:31 +0800
Subject: [PATCH] ext4: no need to generate from free list in mballoc
Commit 7a2fcbf7f85 ("ext4: don't use blocks freed but not yet committed in
buddy cache init") added a code to mark as used blocks in the list of not yet
committed freed blocks during initialization of a buddy page. However
ext4_mb_free_metadata() makes sure buddy page is already loaded and takes a
reference to it so it cannot happen that ext4_mb_init_cache() is called
when efd list is non-empty. Just remove the
ext4_mb_generate_from_freelist() call.
Fixes: 7a2fcbf7f85('ext4: don't use blocks freed but not yet committed in buddy cache init')
Signed-off-by: Wang Jianjian <wangjianjian0(a)foxmail.com>
Link: https://lore.kernel.org/r/tencent_53CBCB1668358AE862684E453DF37B722008@qq.c…
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)kernel.org
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1d65c738c4c9..6e304c18d390 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -417,8 +417,6 @@ static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group);
static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
@@ -1361,17 +1359,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
* We place the buddy block and bitmap block
* close together
*/
+ grinfo = ext4_get_group_info(sb, group);
+ if (!grinfo) {
+ err = -EFSCORRUPTED;
+ goto out;
+ }
if ((first_block + i) & 1) {
/* this is block of buddy */
BUG_ON(incore == NULL);
mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
group, page->index, i * blocksize);
trace_ext4_mb_buddy_bitmap_load(sb, group);
- grinfo = ext4_get_group_info(sb, group);
- if (!grinfo) {
- err = -EFSCORRUPTED;
- goto out;
- }
grinfo->bb_fragments = 0;
memset(grinfo->bb_counters, 0,
sizeof(*grinfo->bb_counters) *
@@ -1398,7 +1396,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
/* mark all preallocated blks used in in-core bitmap */
ext4_mb_generate_from_pa(sb, data, group);
- ext4_mb_generate_from_freelist(sb, data, group);
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&grinfo->bb_free_root));
ext4_unlock_group(sb, group);
/* set incore so that the buddy information can be
@@ -4950,31 +4948,6 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
return false;
}
-/*
- * the function goes through all block freed in the group
- * but not yet committed and marks them used in in-core bitmap.
- * buddy must be generated from this bitmap
- * Need to be called with the ext4 group lock held
- */
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group)
-{
- struct rb_node *n;
- struct ext4_group_info *grp;
- struct ext4_free_data *entry;
-
- grp = ext4_get_group_info(sb, group);
- if (!grp)
- return;
- n = rb_first(&(grp->bb_free_root));
-
- while (n) {
- entry = rb_entry(n, struct ext4_free_data, efd_node);
- mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
- n = rb_next(n);
- }
-}
-
/*
* the function goes through all preallocation in this group and marks them
* used in in-core bitmap. buddy must be generated from this bitmap
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023112419-retrieval-wrongness-5ee3@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
ebf6cb7c6e12 ("ext4: no need to generate from free list in mballoc")
ad635507b5b2 ("ext4: remove unnecessary return for void function")
5354b2af3406 ("ext4: allow ext4_get_group_info() to fail")
fa08a7b61dff ("ext4: fix WARNING in mb_find_extent")
01e4ca294517 ("ext4: allow to find by goal if EXT4_MB_HINT_GOAL_ONLY is set")
123e3016ee9b ("ext4: rename ext4_set_bits to mb_set_bits")
8ac3939db99f ("ext4: refactor ext4_free_blocks() to pull out ext4_mb_clear_bb()")
bfdc502a4a4c ("ext4: fix ext4_mb_mark_bb() with flex_bg with fast_commit")
a5c0e2fdf7ce ("ext4: correct cluster len and clusters changed accounting in ext4_mb_mark_bb")
196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning")
4b68f6df1059 ("ext4: add MB_NUM_ORDERS macro")
a6c75eaf1103 ("ext4: add mballoc stats proc file")
b237e3044450 ("ext4: add ability to return parsed options from parse_options")
67d251860461 ("ext4: drop s_mb_bal_lock and convert protected fields to atomic")
a72b38eebea4 ("ext4: handle dax mount option collision")
99c880decf27 ("ext4: cleanup fast commit mount options")
0f0672ffb61a ("ext4: add a mount opt to forcefully turn fast commits on")
8016e29f4362 ("ext4: fast commit recovery path")
5b849b5f96b4 ("jbd2: fast commit recovery path")
aa75f4d3daae ("ext4: main fast-commit commit path")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88 Mon Sep 17 00:00:00 2001
From: Wang Jianjian <wangjianjian0(a)foxmail.com>
Date: Thu, 24 Aug 2023 23:56:31 +0800
Subject: [PATCH] ext4: no need to generate from free list in mballoc
Commit 7a2fcbf7f85 ("ext4: don't use blocks freed but not yet committed in
buddy cache init") added a code to mark as used blocks in the list of not yet
committed freed blocks during initialization of a buddy page. However
ext4_mb_free_metadata() makes sure buddy page is already loaded and takes a
reference to it so it cannot happen that ext4_mb_init_cache() is called
when efd list is non-empty. Just remove the
ext4_mb_generate_from_freelist() call.
Fixes: 7a2fcbf7f85('ext4: don't use blocks freed but not yet committed in buddy cache init')
Signed-off-by: Wang Jianjian <wangjianjian0(a)foxmail.com>
Link: https://lore.kernel.org/r/tencent_53CBCB1668358AE862684E453DF37B722008@qq.c…
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)kernel.org
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1d65c738c4c9..6e304c18d390 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -417,8 +417,6 @@ static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group);
static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
@@ -1361,17 +1359,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
* We place the buddy block and bitmap block
* close together
*/
+ grinfo = ext4_get_group_info(sb, group);
+ if (!grinfo) {
+ err = -EFSCORRUPTED;
+ goto out;
+ }
if ((first_block + i) & 1) {
/* this is block of buddy */
BUG_ON(incore == NULL);
mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
group, page->index, i * blocksize);
trace_ext4_mb_buddy_bitmap_load(sb, group);
- grinfo = ext4_get_group_info(sb, group);
- if (!grinfo) {
- err = -EFSCORRUPTED;
- goto out;
- }
grinfo->bb_fragments = 0;
memset(grinfo->bb_counters, 0,
sizeof(*grinfo->bb_counters) *
@@ -1398,7 +1396,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
/* mark all preallocated blks used in in-core bitmap */
ext4_mb_generate_from_pa(sb, data, group);
- ext4_mb_generate_from_freelist(sb, data, group);
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&grinfo->bb_free_root));
ext4_unlock_group(sb, group);
/* set incore so that the buddy information can be
@@ -4950,31 +4948,6 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
return false;
}
-/*
- * the function goes through all block freed in the group
- * but not yet committed and marks them used in in-core bitmap.
- * buddy must be generated from this bitmap
- * Need to be called with the ext4 group lock held
- */
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group)
-{
- struct rb_node *n;
- struct ext4_group_info *grp;
- struct ext4_free_data *entry;
-
- grp = ext4_get_group_info(sb, group);
- if (!grp)
- return;
- n = rb_first(&(grp->bb_free_root));
-
- while (n) {
- entry = rb_entry(n, struct ext4_free_data, efd_node);
- mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
- n = rb_next(n);
- }
-}
-
/*
* the function goes through all preallocation in this group and marks them
* used in in-core bitmap. buddy must be generated from this bitmap
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023112418-embattled-polio-b4b2@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
ebf6cb7c6e12 ("ext4: no need to generate from free list in mballoc")
ad635507b5b2 ("ext4: remove unnecessary return for void function")
5354b2af3406 ("ext4: allow ext4_get_group_info() to fail")
fa08a7b61dff ("ext4: fix WARNING in mb_find_extent")
01e4ca294517 ("ext4: allow to find by goal if EXT4_MB_HINT_GOAL_ONLY is set")
123e3016ee9b ("ext4: rename ext4_set_bits to mb_set_bits")
8ac3939db99f ("ext4: refactor ext4_free_blocks() to pull out ext4_mb_clear_bb()")
bfdc502a4a4c ("ext4: fix ext4_mb_mark_bb() with flex_bg with fast_commit")
a5c0e2fdf7ce ("ext4: correct cluster len and clusters changed accounting in ext4_mb_mark_bb")
196e402adf2e ("ext4: improve cr 0 / cr 1 group scanning")
4b68f6df1059 ("ext4: add MB_NUM_ORDERS macro")
a6c75eaf1103 ("ext4: add mballoc stats proc file")
b237e3044450 ("ext4: add ability to return parsed options from parse_options")
67d251860461 ("ext4: drop s_mb_bal_lock and convert protected fields to atomic")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88 Mon Sep 17 00:00:00 2001
From: Wang Jianjian <wangjianjian0(a)foxmail.com>
Date: Thu, 24 Aug 2023 23:56:31 +0800
Subject: [PATCH] ext4: no need to generate from free list in mballoc
Commit 7a2fcbf7f85 ("ext4: don't use blocks freed but not yet committed in
buddy cache init") added a code to mark as used blocks in the list of not yet
committed freed blocks during initialization of a buddy page. However
ext4_mb_free_metadata() makes sure buddy page is already loaded and takes a
reference to it so it cannot happen that ext4_mb_init_cache() is called
when efd list is non-empty. Just remove the
ext4_mb_generate_from_freelist() call.
Fixes: 7a2fcbf7f85('ext4: don't use blocks freed but not yet committed in buddy cache init')
Signed-off-by: Wang Jianjian <wangjianjian0(a)foxmail.com>
Link: https://lore.kernel.org/r/tencent_53CBCB1668358AE862684E453DF37B722008@qq.c…
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)kernel.org
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1d65c738c4c9..6e304c18d390 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -417,8 +417,6 @@ static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group);
static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
@@ -1361,17 +1359,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
* We place the buddy block and bitmap block
* close together
*/
+ grinfo = ext4_get_group_info(sb, group);
+ if (!grinfo) {
+ err = -EFSCORRUPTED;
+ goto out;
+ }
if ((first_block + i) & 1) {
/* this is block of buddy */
BUG_ON(incore == NULL);
mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
group, page->index, i * blocksize);
trace_ext4_mb_buddy_bitmap_load(sb, group);
- grinfo = ext4_get_group_info(sb, group);
- if (!grinfo) {
- err = -EFSCORRUPTED;
- goto out;
- }
grinfo->bb_fragments = 0;
memset(grinfo->bb_counters, 0,
sizeof(*grinfo->bb_counters) *
@@ -1398,7 +1396,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
/* mark all preallocated blks used in in-core bitmap */
ext4_mb_generate_from_pa(sb, data, group);
- ext4_mb_generate_from_freelist(sb, data, group);
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&grinfo->bb_free_root));
ext4_unlock_group(sb, group);
/* set incore so that the buddy information can be
@@ -4950,31 +4948,6 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
return false;
}
-/*
- * the function goes through all block freed in the group
- * but not yet committed and marks them used in in-core bitmap.
- * buddy must be generated from this bitmap
- * Need to be called with the ext4 group lock held
- */
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group)
-{
- struct rb_node *n;
- struct ext4_group_info *grp;
- struct ext4_free_data *entry;
-
- grp = ext4_get_group_info(sb, group);
- if (!grp)
- return;
- n = rb_first(&(grp->bb_free_root));
-
- while (n) {
- entry = rb_entry(n, struct ext4_free_data, efd_node);
- mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
- n = rb_next(n);
- }
-}
-
/*
* the function goes through all preallocation in this group and marks them
* used in in-core bitmap. buddy must be generated from this bitmap
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023112417-onscreen-purify-30ed@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
ebf6cb7c6e12 ("ext4: no need to generate from free list in mballoc")
ad635507b5b2 ("ext4: remove unnecessary return for void function")
5354b2af3406 ("ext4: allow ext4_get_group_info() to fail")
fa08a7b61dff ("ext4: fix WARNING in mb_find_extent")
01e4ca294517 ("ext4: allow to find by goal if EXT4_MB_HINT_GOAL_ONLY is set")
123e3016ee9b ("ext4: rename ext4_set_bits to mb_set_bits")
8ac3939db99f ("ext4: refactor ext4_free_blocks() to pull out ext4_mb_clear_bb()")
bfdc502a4a4c ("ext4: fix ext4_mb_mark_bb() with flex_bg with fast_commit")
a5c0e2fdf7ce ("ext4: correct cluster len and clusters changed accounting in ext4_mb_mark_bb")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88 Mon Sep 17 00:00:00 2001
From: Wang Jianjian <wangjianjian0(a)foxmail.com>
Date: Thu, 24 Aug 2023 23:56:31 +0800
Subject: [PATCH] ext4: no need to generate from free list in mballoc
Commit 7a2fcbf7f85 ("ext4: don't use blocks freed but not yet committed in
buddy cache init") added a code to mark as used blocks in the list of not yet
committed freed blocks during initialization of a buddy page. However
ext4_mb_free_metadata() makes sure buddy page is already loaded and takes a
reference to it so it cannot happen that ext4_mb_init_cache() is called
when efd list is non-empty. Just remove the
ext4_mb_generate_from_freelist() call.
Fixes: 7a2fcbf7f85('ext4: don't use blocks freed but not yet committed in buddy cache init')
Signed-off-by: Wang Jianjian <wangjianjian0(a)foxmail.com>
Link: https://lore.kernel.org/r/tencent_53CBCB1668358AE862684E453DF37B722008@qq.c…
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)kernel.org
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1d65c738c4c9..6e304c18d390 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -417,8 +417,6 @@ static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group);
static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
@@ -1361,17 +1359,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
* We place the buddy block and bitmap block
* close together
*/
+ grinfo = ext4_get_group_info(sb, group);
+ if (!grinfo) {
+ err = -EFSCORRUPTED;
+ goto out;
+ }
if ((first_block + i) & 1) {
/* this is block of buddy */
BUG_ON(incore == NULL);
mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
group, page->index, i * blocksize);
trace_ext4_mb_buddy_bitmap_load(sb, group);
- grinfo = ext4_get_group_info(sb, group);
- if (!grinfo) {
- err = -EFSCORRUPTED;
- goto out;
- }
grinfo->bb_fragments = 0;
memset(grinfo->bb_counters, 0,
sizeof(*grinfo->bb_counters) *
@@ -1398,7 +1396,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
/* mark all preallocated blks used in in-core bitmap */
ext4_mb_generate_from_pa(sb, data, group);
- ext4_mb_generate_from_freelist(sb, data, group);
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&grinfo->bb_free_root));
ext4_unlock_group(sb, group);
/* set incore so that the buddy information can be
@@ -4950,31 +4948,6 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
return false;
}
-/*
- * the function goes through all block freed in the group
- * but not yet committed and marks them used in in-core bitmap.
- * buddy must be generated from this bitmap
- * Need to be called with the ext4 group lock held
- */
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group)
-{
- struct rb_node *n;
- struct ext4_group_info *grp;
- struct ext4_free_data *entry;
-
- grp = ext4_get_group_info(sb, group);
- if (!grp)
- return;
- n = rb_first(&(grp->bb_free_root));
-
- while (n) {
- entry = rb_entry(n, struct ext4_free_data, efd_node);
- mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
- n = rb_next(n);
- }
-}
-
/*
* the function goes through all preallocation in this group and marks them
* used in in-core bitmap. buddy must be generated from this bitmap
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023112416-unread-perky-d540@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
ebf6cb7c6e12 ("ext4: no need to generate from free list in mballoc")
ad635507b5b2 ("ext4: remove unnecessary return for void function")
5354b2af3406 ("ext4: allow ext4_get_group_info() to fail")
fa08a7b61dff ("ext4: fix WARNING in mb_find_extent")
01e4ca294517 ("ext4: allow to find by goal if EXT4_MB_HINT_GOAL_ONLY is set")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88 Mon Sep 17 00:00:00 2001
From: Wang Jianjian <wangjianjian0(a)foxmail.com>
Date: Thu, 24 Aug 2023 23:56:31 +0800
Subject: [PATCH] ext4: no need to generate from free list in mballoc
Commit 7a2fcbf7f85 ("ext4: don't use blocks freed but not yet committed in
buddy cache init") added a code to mark as used blocks in the list of not yet
committed freed blocks during initialization of a buddy page. However
ext4_mb_free_metadata() makes sure buddy page is already loaded and takes a
reference to it so it cannot happen that ext4_mb_init_cache() is called
when efd list is non-empty. Just remove the
ext4_mb_generate_from_freelist() call.
Fixes: 7a2fcbf7f85('ext4: don't use blocks freed but not yet committed in buddy cache init')
Signed-off-by: Wang Jianjian <wangjianjian0(a)foxmail.com>
Link: https://lore.kernel.org/r/tencent_53CBCB1668358AE862684E453DF37B722008@qq.c…
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)kernel.org
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1d65c738c4c9..6e304c18d390 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -417,8 +417,6 @@ static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group);
static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
@@ -1361,17 +1359,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
* We place the buddy block and bitmap block
* close together
*/
+ grinfo = ext4_get_group_info(sb, group);
+ if (!grinfo) {
+ err = -EFSCORRUPTED;
+ goto out;
+ }
if ((first_block + i) & 1) {
/* this is block of buddy */
BUG_ON(incore == NULL);
mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
group, page->index, i * blocksize);
trace_ext4_mb_buddy_bitmap_load(sb, group);
- grinfo = ext4_get_group_info(sb, group);
- if (!grinfo) {
- err = -EFSCORRUPTED;
- goto out;
- }
grinfo->bb_fragments = 0;
memset(grinfo->bb_counters, 0,
sizeof(*grinfo->bb_counters) *
@@ -1398,7 +1396,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
/* mark all preallocated blks used in in-core bitmap */
ext4_mb_generate_from_pa(sb, data, group);
- ext4_mb_generate_from_freelist(sb, data, group);
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&grinfo->bb_free_root));
ext4_unlock_group(sb, group);
/* set incore so that the buddy information can be
@@ -4950,31 +4948,6 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
return false;
}
-/*
- * the function goes through all block freed in the group
- * but not yet committed and marks them used in in-core bitmap.
- * buddy must be generated from this bitmap
- * Need to be called with the ext4 group lock held
- */
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group)
-{
- struct rb_node *n;
- struct ext4_group_info *grp;
- struct ext4_free_data *entry;
-
- grp = ext4_get_group_info(sb, group);
- if (!grp)
- return;
- n = rb_first(&(grp->bb_free_root));
-
- while (n) {
- entry = rb_entry(n, struct ext4_free_data, efd_node);
- mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
- n = rb_next(n);
- }
-}
-
/*
* the function goes through all preallocation in this group and marks them
* used in in-core bitmap. buddy must be generated from this bitmap
The patch below does not apply to the 6.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y
git checkout FETCH_HEAD
git cherry-pick -x ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023112415-stabilize-splashy-9350@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^..
Possible dependencies:
ebf6cb7c6e12 ("ext4: no need to generate from free list in mballoc")
ad635507b5b2 ("ext4: remove unnecessary return for void function")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ebf6cb7c6e1241984f75f29f1bdbfa2fe7168f88 Mon Sep 17 00:00:00 2001
From: Wang Jianjian <wangjianjian0(a)foxmail.com>
Date: Thu, 24 Aug 2023 23:56:31 +0800
Subject: [PATCH] ext4: no need to generate from free list in mballoc
Commit 7a2fcbf7f85 ("ext4: don't use blocks freed but not yet committed in
buddy cache init") added a code to mark as used blocks in the list of not yet
committed freed blocks during initialization of a buddy page. However
ext4_mb_free_metadata() makes sure buddy page is already loaded and takes a
reference to it so it cannot happen that ext4_mb_init_cache() is called
when efd list is non-empty. Just remove the
ext4_mb_generate_from_freelist() call.
Fixes: 7a2fcbf7f85('ext4: don't use blocks freed but not yet committed in buddy cache init')
Signed-off-by: Wang Jianjian <wangjianjian0(a)foxmail.com>
Link: https://lore.kernel.org/r/tencent_53CBCB1668358AE862684E453DF37B722008@qq.c…
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Cc: stable(a)kernel.org
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 1d65c738c4c9..6e304c18d390 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -417,8 +417,6 @@ static const char * const ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group);
static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);
static bool ext4_mb_good_group(struct ext4_allocation_context *ac,
@@ -1361,17 +1359,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
* We place the buddy block and bitmap block
* close together
*/
+ grinfo = ext4_get_group_info(sb, group);
+ if (!grinfo) {
+ err = -EFSCORRUPTED;
+ goto out;
+ }
if ((first_block + i) & 1) {
/* this is block of buddy */
BUG_ON(incore == NULL);
mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
group, page->index, i * blocksize);
trace_ext4_mb_buddy_bitmap_load(sb, group);
- grinfo = ext4_get_group_info(sb, group);
- if (!grinfo) {
- err = -EFSCORRUPTED;
- goto out;
- }
grinfo->bb_fragments = 0;
memset(grinfo->bb_counters, 0,
sizeof(*grinfo->bb_counters) *
@@ -1398,7 +1396,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
/* mark all preallocated blks used in in-core bitmap */
ext4_mb_generate_from_pa(sb, data, group);
- ext4_mb_generate_from_freelist(sb, data, group);
+ WARN_ON_ONCE(!RB_EMPTY_ROOT(&grinfo->bb_free_root));
ext4_unlock_group(sb, group);
/* set incore so that the buddy information can be
@@ -4950,31 +4948,6 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
return false;
}
-/*
- * the function goes through all block freed in the group
- * but not yet committed and marks them used in in-core bitmap.
- * buddy must be generated from this bitmap
- * Need to be called with the ext4 group lock held
- */
-static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
- ext4_group_t group)
-{
- struct rb_node *n;
- struct ext4_group_info *grp;
- struct ext4_free_data *entry;
-
- grp = ext4_get_group_info(sb, group);
- if (!grp)
- return;
- n = rb_first(&(grp->bb_free_root));
-
- while (n) {
- entry = rb_entry(n, struct ext4_free_data, efd_node);
- mb_set_bits(bitmap, entry->efd_start_cluster, entry->efd_count);
- n = rb_next(n);
- }
-}
-
/*
* the function goes through all preallocation in this group and marks them
* used in in-core bitmap. buddy must be generated from this bitmap
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 745f17a4166e79315e4b7f33ce89d03e75a76983
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023112459-footman-slicer-8193@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
745f17a4166e ("ext4: fix race between writepages and remount")
1b2924393309 ("Revert "ext4: don't clear SB_RDONLY when remounting r/w until quota is re-enabled"")
eb1f822c76be ("ext4: enable the lazy init thread when remounting read/write")
4c0b4818b1f6 ("ext4: improve error recovery code paths in __ext4_remount()")
a44be64bbecb ("ext4: don't clear SB_RDONLY when remounting r/w until quota is re-enabled")
3b50d5018ed0 ("ext4: reflect error codes from ext4_multi_mount_protect() to its callers")
7edfd85b1ffd ("ext4: Completely separate options parsing and sb setup")
6e47a3cc68fc ("ext4: get rid of super block and sbi from handle_mount_ops()")
b6bd243500b6 ("ext4: check ext2/3 compatibility outside handle_mount_opt()")
e6e268cb6822 ("ext4: move quota configuration out of handle_mount_opt()")
da812f611934 ("ext4: Allow sb to be NULL in ext4_msg()")
461c3af045d3 ("ext4: Change handle_mount_opt() to use fs_parameter")
4c94bff967d9 ("ext4: move option validation to a separate function")
3bbef91bdd21 ("ext4: remove an unused variable warning with CONFIG_QUOTA=n")
2e5fd489a4e5 ("Merge tag 'libnvdimm-for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 745f17a4166e79315e4b7f33ce89d03e75a76983 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1(a)huawei.com>
Date: Wed, 24 May 2023 15:25:38 +0800
Subject: [PATCH] ext4: fix race between writepages and remount
We got a WARNING in ext4_add_complete_io:
==================================================================
WARNING: at fs/ext4/page-io.c:231 ext4_put_io_end_defer+0x182/0x250
CPU: 10 PID: 77 Comm: ksoftirqd/10 Tainted: 6.3.0-rc2 #85
RIP: 0010:ext4_put_io_end_defer+0x182/0x250 [ext4]
[...]
Call Trace:
<TASK>
ext4_end_bio+0xa8/0x240 [ext4]
bio_endio+0x195/0x310
blk_update_request+0x184/0x770
scsi_end_request+0x2f/0x240
scsi_io_completion+0x75/0x450
scsi_finish_command+0xef/0x160
scsi_complete+0xa3/0x180
blk_complete_reqs+0x60/0x80
blk_done_softirq+0x25/0x40
__do_softirq+0x119/0x4c8
run_ksoftirqd+0x42/0x70
smpboot_thread_fn+0x136/0x3c0
kthread+0x140/0x1a0
ret_from_fork+0x2c/0x50
==================================================================
Above issue may happen as follows:
cpu1 cpu2
----------------------------|----------------------------
mount -o dioread_lock
ext4_writepages
ext4_do_writepages
*if (ext4_should_dioread_nolock(inode))*
// rsv_blocks is not assigned here
mount -o remount,dioread_nolock
ext4_journal_start_with_reserve
__ext4_journal_start
__ext4_journal_start_sb
jbd2__journal_start
*if (rsv_blocks)*
// h_rsv_handle is not initialized here
mpage_map_and_submit_extent
mpage_map_one_extent
dioread_nolock = ext4_should_dioread_nolock(inode)
if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN))
mpd->io_submit.io_end->handle = handle->h_rsv_handle
ext4_set_io_unwritten_flag
io_end->flag |= EXT4_IO_END_UNWRITTEN
// now io_end->handle is NULL but has EXT4_IO_END_UNWRITTEN flag
scsi_finish_command
scsi_io_completion
scsi_io_completion_action
scsi_end_request
blk_update_request
req_bio_endio
bio_endio
bio->bi_end_io > ext4_end_bio
ext4_put_io_end_defer
ext4_add_complete_io
// trigger WARN_ON(!io_end->handle && sbi->s_journal);
The immediate cause of this problem is that ext4_should_dioread_nolock()
function returns inconsistent values in the ext4_do_writepages() and
mpage_map_one_extent(). There are four conditions in this function that
can be changed at mount time to cause this problem. These four conditions
can be divided into two categories:
(1) journal_data and EXT4_EXTENTS_FL, which can be changed by ioctl
(2) DELALLOC and DIOREAD_NOLOCK, which can be changed by remount
The two in the first category have been fixed by commit c8585c6fcaf2
("ext4: fix races between changing inode journal mode and ext4_writepages")
and commit cb85f4d23f79 ("ext4: fix race between writepages and enabling
EXT4_EXTENTS_FL") respectively.
Two cases in the other category have not yet been fixed, and the above
issue is caused by this situation. We refer to the fix for the first
category, when applying options during remount, we grab s_writepages_rwsem
to avoid racing with writepages ops to trigger this problem.
Fixes: 6b523df4fb5a ("ext4: use transaction reservation for extent conversion in ext4_end_io")
Cc: stable(a)vger.kernel.org
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20230524072538.2883391-1-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9418359b1d9d..cd4ccae1e28a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1676,7 +1676,8 @@ struct ext4_sb_info {
/*
* Barrier between writepages ops and changing any inode's JOURNAL_DATA
- * or EXTENTS flag.
+ * or EXTENTS flag or between writepages ops and changing DELALLOC or
+ * DIOREAD_NOLOCK mount options on remount.
*/
struct percpu_rw_semaphore s_writepages_rwsem;
struct dax_device *s_daxdev;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6f48dec19f4a..d062383ea50e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6443,6 +6443,7 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
struct ext4_mount_options old_opts;
ext4_group_t g;
int err = 0;
+ int alloc_ctx;
#ifdef CONFIG_QUOTA
int enable_quota = 0;
int i, j;
@@ -6483,7 +6484,16 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
}
+ /*
+ * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause
+ * two calls to ext4_should_dioread_nolock() to return inconsistent
+ * values, triggering WARN_ON in ext4_add_complete_io(). we grab
+ * here s_writepages_rwsem to avoid race between writepages ops and
+ * remount.
+ */
+ alloc_ctx = ext4_writepages_down_write(sb);
ext4_apply_options(fc, sb);
+ ext4_writepages_up_write(sb, alloc_ctx);
if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
test_opt(sb, JOURNAL_CHECKSUM)) {
@@ -6701,6 +6711,8 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
sb_any_quota_suspended(sb))
dquot_resume(sb, -1);
+
+ alloc_ctx = ext4_writepages_down_write(sb);
sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.s_mount_opt;
sbi->s_mount_opt2 = old_opts.s_mount_opt2;
@@ -6709,6 +6721,8 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
sbi->s_commit_interval = old_opts.s_commit_interval;
sbi->s_min_batch_time = old_opts.s_min_batch_time;
sbi->s_max_batch_time = old_opts.s_max_batch_time;
+ ext4_writepages_up_write(sb, alloc_ctx);
+
if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
ext4_release_system_zone(sb);
#ifdef CONFIG_QUOTA
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 745f17a4166e79315e4b7f33ce89d03e75a76983
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023112457-vowel-preppy-cf5d@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
745f17a4166e ("ext4: fix race between writepages and remount")
1b2924393309 ("Revert "ext4: don't clear SB_RDONLY when remounting r/w until quota is re-enabled"")
eb1f822c76be ("ext4: enable the lazy init thread when remounting read/write")
4c0b4818b1f6 ("ext4: improve error recovery code paths in __ext4_remount()")
a44be64bbecb ("ext4: don't clear SB_RDONLY when remounting r/w until quota is re-enabled")
3b50d5018ed0 ("ext4: reflect error codes from ext4_multi_mount_protect() to its callers")
7edfd85b1ffd ("ext4: Completely separate options parsing and sb setup")
6e47a3cc68fc ("ext4: get rid of super block and sbi from handle_mount_ops()")
b6bd243500b6 ("ext4: check ext2/3 compatibility outside handle_mount_opt()")
e6e268cb6822 ("ext4: move quota configuration out of handle_mount_opt()")
da812f611934 ("ext4: Allow sb to be NULL in ext4_msg()")
461c3af045d3 ("ext4: Change handle_mount_opt() to use fs_parameter")
4c94bff967d9 ("ext4: move option validation to a separate function")
3bbef91bdd21 ("ext4: remove an unused variable warning with CONFIG_QUOTA=n")
2e5fd489a4e5 ("Merge tag 'libnvdimm-for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 745f17a4166e79315e4b7f33ce89d03e75a76983 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1(a)huawei.com>
Date: Wed, 24 May 2023 15:25:38 +0800
Subject: [PATCH] ext4: fix race between writepages and remount
We got a WARNING in ext4_add_complete_io:
==================================================================
WARNING: at fs/ext4/page-io.c:231 ext4_put_io_end_defer+0x182/0x250
CPU: 10 PID: 77 Comm: ksoftirqd/10 Tainted: 6.3.0-rc2 #85
RIP: 0010:ext4_put_io_end_defer+0x182/0x250 [ext4]
[...]
Call Trace:
<TASK>
ext4_end_bio+0xa8/0x240 [ext4]
bio_endio+0x195/0x310
blk_update_request+0x184/0x770
scsi_end_request+0x2f/0x240
scsi_io_completion+0x75/0x450
scsi_finish_command+0xef/0x160
scsi_complete+0xa3/0x180
blk_complete_reqs+0x60/0x80
blk_done_softirq+0x25/0x40
__do_softirq+0x119/0x4c8
run_ksoftirqd+0x42/0x70
smpboot_thread_fn+0x136/0x3c0
kthread+0x140/0x1a0
ret_from_fork+0x2c/0x50
==================================================================
Above issue may happen as follows:
cpu1 cpu2
----------------------------|----------------------------
mount -o dioread_lock
ext4_writepages
ext4_do_writepages
*if (ext4_should_dioread_nolock(inode))*
// rsv_blocks is not assigned here
mount -o remount,dioread_nolock
ext4_journal_start_with_reserve
__ext4_journal_start
__ext4_journal_start_sb
jbd2__journal_start
*if (rsv_blocks)*
// h_rsv_handle is not initialized here
mpage_map_and_submit_extent
mpage_map_one_extent
dioread_nolock = ext4_should_dioread_nolock(inode)
if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN))
mpd->io_submit.io_end->handle = handle->h_rsv_handle
ext4_set_io_unwritten_flag
io_end->flag |= EXT4_IO_END_UNWRITTEN
// now io_end->handle is NULL but has EXT4_IO_END_UNWRITTEN flag
scsi_finish_command
scsi_io_completion
scsi_io_completion_action
scsi_end_request
blk_update_request
req_bio_endio
bio_endio
bio->bi_end_io > ext4_end_bio
ext4_put_io_end_defer
ext4_add_complete_io
// trigger WARN_ON(!io_end->handle && sbi->s_journal);
The immediate cause of this problem is that ext4_should_dioread_nolock()
function returns inconsistent values in the ext4_do_writepages() and
mpage_map_one_extent(). There are four conditions in this function that
can be changed at mount time to cause this problem. These four conditions
can be divided into two categories:
(1) journal_data and EXT4_EXTENTS_FL, which can be changed by ioctl
(2) DELALLOC and DIOREAD_NOLOCK, which can be changed by remount
The two in the first category have been fixed by commit c8585c6fcaf2
("ext4: fix races between changing inode journal mode and ext4_writepages")
and commit cb85f4d23f79 ("ext4: fix race between writepages and enabling
EXT4_EXTENTS_FL") respectively.
Two cases in the other category have not yet been fixed, and the above
issue is caused by this situation. We refer to the fix for the first
category, when applying options during remount, we grab s_writepages_rwsem
to avoid racing with writepages ops to trigger this problem.
Fixes: 6b523df4fb5a ("ext4: use transaction reservation for extent conversion in ext4_end_io")
Cc: stable(a)vger.kernel.org
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20230524072538.2883391-1-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9418359b1d9d..cd4ccae1e28a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1676,7 +1676,8 @@ struct ext4_sb_info {
/*
* Barrier between writepages ops and changing any inode's JOURNAL_DATA
- * or EXTENTS flag.
+ * or EXTENTS flag or between writepages ops and changing DELALLOC or
+ * DIOREAD_NOLOCK mount options on remount.
*/
struct percpu_rw_semaphore s_writepages_rwsem;
struct dax_device *s_daxdev;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6f48dec19f4a..d062383ea50e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6443,6 +6443,7 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
struct ext4_mount_options old_opts;
ext4_group_t g;
int err = 0;
+ int alloc_ctx;
#ifdef CONFIG_QUOTA
int enable_quota = 0;
int i, j;
@@ -6483,7 +6484,16 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
}
+ /*
+ * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause
+ * two calls to ext4_should_dioread_nolock() to return inconsistent
+ * values, triggering WARN_ON in ext4_add_complete_io(). we grab
+ * here s_writepages_rwsem to avoid race between writepages ops and
+ * remount.
+ */
+ alloc_ctx = ext4_writepages_down_write(sb);
ext4_apply_options(fc, sb);
+ ext4_writepages_up_write(sb, alloc_ctx);
if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
test_opt(sb, JOURNAL_CHECKSUM)) {
@@ -6701,6 +6711,8 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
sb_any_quota_suspended(sb))
dquot_resume(sb, -1);
+
+ alloc_ctx = ext4_writepages_down_write(sb);
sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.s_mount_opt;
sbi->s_mount_opt2 = old_opts.s_mount_opt2;
@@ -6709,6 +6721,8 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
sbi->s_commit_interval = old_opts.s_commit_interval;
sbi->s_min_batch_time = old_opts.s_min_batch_time;
sbi->s_max_batch_time = old_opts.s_max_batch_time;
+ ext4_writepages_up_write(sb, alloc_ctx);
+
if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
ext4_release_system_zone(sb);
#ifdef CONFIG_QUOTA
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 745f17a4166e79315e4b7f33ce89d03e75a76983
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023112455-epileptic-throbbing-51d2@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
745f17a4166e ("ext4: fix race between writepages and remount")
1b2924393309 ("Revert "ext4: don't clear SB_RDONLY when remounting r/w until quota is re-enabled"")
eb1f822c76be ("ext4: enable the lazy init thread when remounting read/write")
4c0b4818b1f6 ("ext4: improve error recovery code paths in __ext4_remount()")
a44be64bbecb ("ext4: don't clear SB_RDONLY when remounting r/w until quota is re-enabled")
3b50d5018ed0 ("ext4: reflect error codes from ext4_multi_mount_protect() to its callers")
7edfd85b1ffd ("ext4: Completely separate options parsing and sb setup")
6e47a3cc68fc ("ext4: get rid of super block and sbi from handle_mount_ops()")
b6bd243500b6 ("ext4: check ext2/3 compatibility outside handle_mount_opt()")
e6e268cb6822 ("ext4: move quota configuration out of handle_mount_opt()")
da812f611934 ("ext4: Allow sb to be NULL in ext4_msg()")
461c3af045d3 ("ext4: Change handle_mount_opt() to use fs_parameter")
4c94bff967d9 ("ext4: move option validation to a separate function")
3bbef91bdd21 ("ext4: remove an unused variable warning with CONFIG_QUOTA=n")
2e5fd489a4e5 ("Merge tag 'libnvdimm-for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 745f17a4166e79315e4b7f33ce89d03e75a76983 Mon Sep 17 00:00:00 2001
From: Baokun Li <libaokun1(a)huawei.com>
Date: Wed, 24 May 2023 15:25:38 +0800
Subject: [PATCH] ext4: fix race between writepages and remount
We got a WARNING in ext4_add_complete_io:
==================================================================
WARNING: at fs/ext4/page-io.c:231 ext4_put_io_end_defer+0x182/0x250
CPU: 10 PID: 77 Comm: ksoftirqd/10 Tainted: 6.3.0-rc2 #85
RIP: 0010:ext4_put_io_end_defer+0x182/0x250 [ext4]
[...]
Call Trace:
<TASK>
ext4_end_bio+0xa8/0x240 [ext4]
bio_endio+0x195/0x310
blk_update_request+0x184/0x770
scsi_end_request+0x2f/0x240
scsi_io_completion+0x75/0x450
scsi_finish_command+0xef/0x160
scsi_complete+0xa3/0x180
blk_complete_reqs+0x60/0x80
blk_done_softirq+0x25/0x40
__do_softirq+0x119/0x4c8
run_ksoftirqd+0x42/0x70
smpboot_thread_fn+0x136/0x3c0
kthread+0x140/0x1a0
ret_from_fork+0x2c/0x50
==================================================================
Above issue may happen as follows:
cpu1 cpu2
----------------------------|----------------------------
mount -o dioread_lock
ext4_writepages
ext4_do_writepages
*if (ext4_should_dioread_nolock(inode))*
// rsv_blocks is not assigned here
mount -o remount,dioread_nolock
ext4_journal_start_with_reserve
__ext4_journal_start
__ext4_journal_start_sb
jbd2__journal_start
*if (rsv_blocks)*
// h_rsv_handle is not initialized here
mpage_map_and_submit_extent
mpage_map_one_extent
dioread_nolock = ext4_should_dioread_nolock(inode)
if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN))
mpd->io_submit.io_end->handle = handle->h_rsv_handle
ext4_set_io_unwritten_flag
io_end->flag |= EXT4_IO_END_UNWRITTEN
// now io_end->handle is NULL but has EXT4_IO_END_UNWRITTEN flag
scsi_finish_command
scsi_io_completion
scsi_io_completion_action
scsi_end_request
blk_update_request
req_bio_endio
bio_endio
bio->bi_end_io > ext4_end_bio
ext4_put_io_end_defer
ext4_add_complete_io
// trigger WARN_ON(!io_end->handle && sbi->s_journal);
The immediate cause of this problem is that ext4_should_dioread_nolock()
function returns inconsistent values in the ext4_do_writepages() and
mpage_map_one_extent(). There are four conditions in this function that
can be changed at mount time to cause this problem. These four conditions
can be divided into two categories:
(1) journal_data and EXT4_EXTENTS_FL, which can be changed by ioctl
(2) DELALLOC and DIOREAD_NOLOCK, which can be changed by remount
The two in the first category have been fixed by commit c8585c6fcaf2
("ext4: fix races between changing inode journal mode and ext4_writepages")
and commit cb85f4d23f79 ("ext4: fix race between writepages and enabling
EXT4_EXTENTS_FL") respectively.
Two cases in the other category have not yet been fixed, and the above
issue is caused by this situation. We refer to the fix for the first
category, when applying options during remount, we grab s_writepages_rwsem
to avoid racing with writepages ops to trigger this problem.
Fixes: 6b523df4fb5a ("ext4: use transaction reservation for extent conversion in ext4_end_io")
Cc: stable(a)vger.kernel.org
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20230524072538.2883391-1-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9418359b1d9d..cd4ccae1e28a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1676,7 +1676,8 @@ struct ext4_sb_info {
/*
* Barrier between writepages ops and changing any inode's JOURNAL_DATA
- * or EXTENTS flag.
+ * or EXTENTS flag or between writepages ops and changing DELALLOC or
+ * DIOREAD_NOLOCK mount options on remount.
*/
struct percpu_rw_semaphore s_writepages_rwsem;
struct dax_device *s_daxdev;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6f48dec19f4a..d062383ea50e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6443,6 +6443,7 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
struct ext4_mount_options old_opts;
ext4_group_t g;
int err = 0;
+ int alloc_ctx;
#ifdef CONFIG_QUOTA
int enable_quota = 0;
int i, j;
@@ -6483,7 +6484,16 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
}
+ /*
+ * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause
+ * two calls to ext4_should_dioread_nolock() to return inconsistent
+ * values, triggering WARN_ON in ext4_add_complete_io(). we grab
+ * here s_writepages_rwsem to avoid race between writepages ops and
+ * remount.
+ */
+ alloc_ctx = ext4_writepages_down_write(sb);
ext4_apply_options(fc, sb);
+ ext4_writepages_up_write(sb, alloc_ctx);
if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
test_opt(sb, JOURNAL_CHECKSUM)) {
@@ -6701,6 +6711,8 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
sb_any_quota_suspended(sb))
dquot_resume(sb, -1);
+
+ alloc_ctx = ext4_writepages_down_write(sb);
sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.s_mount_opt;
sbi->s_mount_opt2 = old_opts.s_mount_opt2;
@@ -6709,6 +6721,8 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
sbi->s_commit_interval = old_opts.s_commit_interval;
sbi->s_min_batch_time = old_opts.s_min_batch_time;
sbi->s_max_batch_time = old_opts.s_max_batch_time;
+ ext4_writepages_up_write(sb, alloc_ctx);
+
if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
ext4_release_system_zone(sb);
#ifdef CONFIG_QUOTA