The fix from commit 2d29f6b96d8f introduced an unexpected performance regression when allocating blocks. Fix by rewriting the overly complicated wrap-around logic in gfs2_rbm_find in a more reasonable way. Discovered and verified with iozone.
Fixes: 2d29f6b96d8f ("gfs2: Fix loop in gfs2_rbm_find") Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Andreas Gruenbacher agruenba@redhat.com --- fs/gfs2/rgrp.c | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 831d7cb5a49c4..3f16411d885d9 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1730,25 +1730,15 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, const struct gfs2_inode *ip, bool nowrap) { struct buffer_head *bh; - int initial_bii; - u32 initial_offset; int first_bii = rbm->bii; u32 first_offset = rbm->offset; u32 offset; u8 *buffer; - int n = 0; - int iters = rbm->rgd->rd_length; + bool wrapped = false; int ret; struct gfs2_bitmap *bi; struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
- /* If we are not starting at the beginning of a bitmap, then we - * need to add one to the bitmap count to ensure that we search - * the starting bitmap twice. - */ - if (rbm->offset != 0) - iters++; - while(1) { bi = rbm_bi(rbm); if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) && @@ -1761,47 +1751,43 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, WARN_ON(!buffer_uptodate(bh)); if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) buffer = bi->bi_clone + bi->bi_offset; - initial_offset = rbm->offset; offset = gfs2_bitfit(buffer, bi->bi_bytes, rbm->offset, state); - if (offset == BFITNOENT) - goto bitmap_full; + if (offset == BFITNOENT) { + if (state == GFS2_BLKST_FREE && rbm->offset == 0) + set_bit(GBF_FULL, &bi->bi_flags); + goto next_bitmap; + } rbm->offset = offset; if (ip == NULL) return 0;
- initial_bii = rbm->bii; ret = gfs2_reservation_check_and_update(rbm, ip, minext ? *minext : 0, &maxext); if (ret == 0) return 0; - if (ret > 0) { - n += (rbm->bii - initial_bii); + if (ret > 0) goto next_iter; - } if (ret == -E2BIG) { - n += rbm->bii - initial_bii; rbm->bii = 0; rbm->offset = 0; goto res_covered_end_of_rgrp; } return ret;
-bitmap_full: /* Mark bitmap as full and fall through */ - if ((state == GFS2_BLKST_FREE) && initial_offset == 0) - set_bit(GBF_FULL, &bi->bi_flags); - next_bitmap: /* Find next bitmap in the rgrp */ rbm->offset = 0; rbm->bii++; if (rbm->bii == rbm->rgd->rd_length) rbm->bii = 0; res_covered_end_of_rgrp: - if ((rbm->bii == 0) && nowrap) - break; - n++; + if (rbm->bii == 0) { + if (nowrap) + break; + wrapped = true; + } next_iter: - if (n >= iters) + if (wrapped && rbm->bii > first_bii) break; }
----- Original Message -----
The fix from commit 2d29f6b96d8f introduced an unexpected performance regression when allocating blocks. Fix by rewriting the overly complicated wrap-around logic in gfs2_rbm_find in a more reasonable way. Discovered and verified with iozone.
Fixes: 2d29f6b96d8f ("gfs2: Fix loop in gfs2_rbm_find") Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Andreas Gruenbacher agruenba@redhat.com
fs/gfs2/rgrp.c | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-)
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 831d7cb5a49c4..3f16411d885d9 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1730,25 +1730,15 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, const struct gfs2_inode *ip, bool nowrap) { struct buffer_head *bh;
- int initial_bii;
- u32 initial_offset; int first_bii = rbm->bii; u32 first_offset = rbm->offset; u32 offset; u8 *buffer;
- int n = 0;
- int iters = rbm->rgd->rd_length;
- bool wrapped = false; int ret; struct gfs2_bitmap *bi; struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, };
- /* If we are not starting at the beginning of a bitmap, then we
* need to add one to the bitmap count to ensure that we search
* the starting bitmap twice.
*/
- if (rbm->offset != 0)
iters++;
- while(1) { bi = rbm_bi(rbm); if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) &&
@@ -1761,47 +1751,43 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, WARN_ON(!buffer_uptodate(bh)); if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) buffer = bi->bi_clone + bi->bi_offset;
offset = gfs2_bitfit(buffer, bi->bi_bytes, rbm->offset, state);initial_offset = rbm->offset;
if (offset == BFITNOENT)
goto bitmap_full;
if (offset == BFITNOENT) {
if (state == GFS2_BLKST_FREE && rbm->offset == 0)
set_bit(GBF_FULL, &bi->bi_flags);
goto next_bitmap;
rbm->offset = offset; if (ip == NULL) return 0;}
ret = gfs2_reservation_check_and_update(rbm, ip, minext ? *minext : 0, &maxext); if (ret == 0) return 0;initial_bii = rbm->bii;
if (ret > 0) {
n += (rbm->bii - initial_bii);
if (ret > 0) goto next_iter;
if (ret == -E2BIG) {}
} return ret;n += rbm->bii - initial_bii; rbm->bii = 0; rbm->offset = 0; goto res_covered_end_of_rgrp;
-bitmap_full: /* Mark bitmap as full and fall through */
if ((state == GFS2_BLKST_FREE) && initial_offset == 0)
set_bit(GBF_FULL, &bi->bi_flags);
next_bitmap: /* Find next bitmap in the rgrp */ rbm->offset = 0; rbm->bii++; if (rbm->bii == rbm->rgd->rd_length) rbm->bii = 0; res_covered_end_of_rgrp:
if ((rbm->bii == 0) && nowrap)
break;
n++;
if (rbm->bii == 0) {
if (nowrap)
break;
wrapped = true;
}
next_iter:
if (n >= iters)
}if (wrapped && rbm->bii > first_bii) break;
2.20.1
Hi,
I think you're on the right track here, and I definitely like most of your improvements, but I'm concerned about one thing.
The reason we currently do (rd_length + 1) iterations is, for example, cases where space is really tight and the starting point of the search comes after the last free block.
So simplifying the test down to a tiny file system where there is only one rgrp, which contains the only bitmap. Assuming block size 4K, minus the rgrp header, you've got 0xf80 (3968) bytes possible, or 1984 blocks possible.
Let's say you have two processes, A and B. Let's say A wants to create a dinode, and its "hint" says to start at the (only) bitmap block 1500. But process B already has the rgrp locked because he's freeing a dinode at block 1000, so he marks block 1000 free and releases the lock. Now process A starts searching the bitmap at offset 1500 until the end. The only free block is 1000 (because of process B's free), so A's bitmap search reaches the end of the bitmap and gets BFITNOENT.
With this new patch, it will just go back next_bitmap, but there are no more bitmaps. It sets wrap, but since we're at the end, it returns -ENOSPC, when, in fact, there is a free block, no?
That's why we coded it the way we did: With today's algorithm, it would reach the end of the rgrp, wrap to the beginning again, and find free block 1000.
I know this scenario may seem absurd. My concern is not whether this particular scenario is broken with the patch, but more that the algorithm may cause:
(a) more file system fragmentation (b) more file fragmentation (c) improperly reporting of -ENOSPC (d) or something else.
Before we push this upstream, we should probably run a file system aging test, like sas calibration without unmounts, and compare filefrag results with older kernels that don't have 2d29f6b96d8f. And we should do it with very little free space after, say, 13 iterations. IOW, so it hits free space limitations for many different rgrps.
Regards,
Bob Peterson Red Hat File Systems
----- Original Message -----
----- Original Message -----
The fix from commit 2d29f6b96d8f introduced an unexpected performance regression when allocating blocks. Fix by rewriting the overly complicated wrap-around logic in gfs2_rbm_find in a more reasonable way. Discovered and verified with iozone.
Fixes: 2d29f6b96d8f ("gfs2: Fix loop in gfs2_rbm_find") Cc: stable@vger.kernel.org # v3.13+ Signed-off-by: Andreas Gruenbacher agruenba@redhat.com
(snip)
With this new patch, it will just go back next_bitmap, but there are no more bitmaps. It sets wrap, but since we're at the end, it returns -ENOSPC, when, in fact, there is a free block, no?
On closer inspection, the patch looks okay to me. I still want to test it before we push it.
Bob Peterson
Hi,
[This is an automated email]
This commit has been processed because it contains a "Fixes:" tag, fixing commit: 2d29f6b96d8f gfs2: Fix loop in gfs2_rbm_find.
The bot has tested the following trees: v4.20.5, v4.19.18, v4.14.96, v4.9.153, v4.4.172, v3.18.133.
v4.20.5: Build OK! v4.19.18: Failed to apply! Possible dependencies: 281b4952d185 ("gfs2: Rename bitmap.bi_{len => bytes}") e54c78a27fcd ("gfs2: Use fs_* functions instead of pr_* function where we can")
v4.14.96: Failed to apply! Possible dependencies: 281b4952d185 ("gfs2: Rename bitmap.bi_{len => bytes}") dc8fbb03dcd6 ("GFS2: gfs2_free_extlen can return an extent that is too long") e54c78a27fcd ("gfs2: Use fs_* functions instead of pr_* function where we can")
v4.9.153: Failed to apply! Possible dependencies: 0d1c7ae9d849 ("GFS2: Prevent BUG from occurring when normal Withdraws occur") 281b4952d185 ("gfs2: Rename bitmap.bi_{len => bytes}") 9862ca056e65 ("GFS2: Switch tr_touched to flag in transaction") dc8fbb03dcd6 ("GFS2: gfs2_free_extlen can return an extent that is too long") e54c78a27fcd ("gfs2: Use fs_* functions instead of pr_* function where we can") ed17545d01e4 ("GFS2: Allow glocks to be unlocked after withdraw")
v4.4.172: Failed to apply! Possible dependencies: 0d1c7ae9d849 ("GFS2: Prevent BUG from occurring when normal Withdraws occur") 281b4952d185 ("gfs2: Rename bitmap.bi_{len => bytes}") 3e11e5304150 ("GFS2: ignore unlock failures after withdraw") 471f3db2786b ("gfs2: change gfs2 readdir cookie") 9862ca056e65 ("GFS2: Switch tr_touched to flag in transaction") dc8fbb03dcd6 ("GFS2: gfs2_free_extlen can return an extent that is too long") e54c78a27fcd ("gfs2: Use fs_* functions instead of pr_* function where we can") ed17545d01e4 ("GFS2: Allow glocks to be unlocked after withdraw")
v3.18.133: Failed to apply! Possible dependencies: 0d1c7ae9d849 ("GFS2: Prevent BUG from occurring when normal Withdraws occur") 281b4952d185 ("gfs2: Rename bitmap.bi_{len => bytes}") 2e60d7683c8d ("GFS2: update freeze code to use freeze/thaw_super on all nodes") 3cdcf63ed2d1 ("GFS2: use kvfree() instead of open-coding it") 3e11e5304150 ("GFS2: ignore unlock failures after withdraw") 471f3db2786b ("gfs2: change gfs2 readdir cookie") 9862ca056e65 ("GFS2: Switch tr_touched to flag in transaction") a3e3213676d8 ("gfs2: fix shadow warning in gfs2_rbm_find()") dc8fbb03dcd6 ("GFS2: gfs2_free_extlen can return an extent that is too long") e54c78a27fcd ("gfs2: Use fs_* functions instead of pr_* function where we can") ed17545d01e4 ("GFS2: Allow glocks to be unlocked after withdraw")
How should we proceed with this patch?
-- Thanks, Sasha
----- Original Message -----
Hi,
[This is an automated email]
This commit has been processed because it contains a "Fixes:" tag, fixing commit: 2d29f6b96d8f gfs2: Fix loop in gfs2_rbm_find.
The bot has tested the following trees: v4.20.5, v4.19.18, v4.14.96, v4.9.153, v4.4.172, v3.18.133.
v4.20.5: Build OK! v4.19.18: Failed to apply! Possible dependencies: 281b4952d185 ("gfs2: Rename bitmap.bi_{len => bytes}") e54c78a27fcd ("gfs2: Use fs_* functions instead of pr_* function where we can")
v4.14.96: Failed to apply! Possible dependencies: 281b4952d185 ("gfs2: Rename bitmap.bi_{len => bytes}") dc8fbb03dcd6 ("GFS2: gfs2_free_extlen can return an extent that is too long") e54c78a27fcd ("gfs2: Use fs_* functions instead of pr_* function where we can")
v4.9.153: Failed to apply! Possible dependencies: 0d1c7ae9d849 ("GFS2: Prevent BUG from occurring when normal Withdraws occur") 281b4952d185 ("gfs2: Rename bitmap.bi_{len => bytes}") 9862ca056e65 ("GFS2: Switch tr_touched to flag in transaction") dc8fbb03dcd6 ("GFS2: gfs2_free_extlen can return an extent that is too long") e54c78a27fcd ("gfs2: Use fs_* functions instead of pr_* function where we can") ed17545d01e4 ("GFS2: Allow glocks to be unlocked after withdraw")
v4.4.172: Failed to apply! Possible dependencies: 0d1c7ae9d849 ("GFS2: Prevent BUG from occurring when normal Withdraws occur") 281b4952d185 ("gfs2: Rename bitmap.bi_{len => bytes}") 3e11e5304150 ("GFS2: ignore unlock failures after withdraw") 471f3db2786b ("gfs2: change gfs2 readdir cookie") 9862ca056e65 ("GFS2: Switch tr_touched to flag in transaction") dc8fbb03dcd6 ("GFS2: gfs2_free_extlen can return an extent that is too long") e54c78a27fcd ("gfs2: Use fs_* functions instead of pr_* function where we can") ed17545d01e4 ("GFS2: Allow glocks to be unlocked after withdraw")
v3.18.133: Failed to apply! Possible dependencies: 0d1c7ae9d849 ("GFS2: Prevent BUG from occurring when normal Withdraws occur") 281b4952d185 ("gfs2: Rename bitmap.bi_{len => bytes}") 2e60d7683c8d ("GFS2: update freeze code to use freeze/thaw_super on all nodes") 3cdcf63ed2d1 ("GFS2: use kvfree() instead of open-coding it") 3e11e5304150 ("GFS2: ignore unlock failures after withdraw") 471f3db2786b ("gfs2: change gfs2 readdir cookie") 9862ca056e65 ("GFS2: Switch tr_touched to flag in transaction") a3e3213676d8 ("gfs2: fix shadow warning in gfs2_rbm_find()") dc8fbb03dcd6 ("GFS2: gfs2_free_extlen can return an extent that is too long") e54c78a27fcd ("gfs2: Use fs_* functions instead of pr_* function where we can") ed17545d01e4 ("GFS2: Allow glocks to be unlocked after withdraw")
How should we proceed with this patch?
-- Thanks, Sasha
Hi Sasha,
We have found problems with the "gfs2: Fix loop in gfs2_rbm_find (2)" patch, so for now we would like to revert the patch it's trying to fix, which is "gfs2: Fix loop in gfs2_rbm_find" (the original).
Andreas Gruenbacher just sent a revert request for the broken patch to Linus, so we should just delete "gfs2: Fix loop in gfs2_rbm_find (2)" until we can get a proper replacement and test it thoroughly.
Regards,
Bob Peterson
linux-stable-mirror@lists.linaro.org