Let's reduce the complexity of mixed use of rb_tree in victim_entry from extent_cache and discard_cmd.
This should fix arm32 memory alignment issue caused by shared rb_entry.
[struct victim_entry] [struct rb_entry] [0] struct rb_node rb_node; [0] struct rb_node rb_node; union { struct { unsigned int ofs; unsigned int len; }; [16] unsigned long long mtime; [12] unsigned long long key; } __packed;
Cc: stable@vger.kernel.org Fixes: 093749e296e2 ("f2fs: support age threshold based garbage collection") Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org --- fs/f2fs/extent_cache.c | 36 +------------------- fs/f2fs/f2fs.h | 15 ++------- fs/f2fs/gc.c | 74 ++++++++++++++++++++++++++++++++++-------- fs/f2fs/gc.h | 14 ++------ fs/f2fs/segment.c | 4 +-- 5 files changed, 68 insertions(+), 75 deletions(-)
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 28b12553f2b3..d1aa4609ca6b 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -204,29 +204,6 @@ struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root, return re; }
-struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi, - struct rb_root_cached *root, - struct rb_node **parent, - unsigned long long key, bool *leftmost) -{ - struct rb_node **p = &root->rb_root.rb_node; - struct rb_entry *re; - - while (*p) { - *parent = *p; - re = rb_entry(*parent, struct rb_entry, rb_node); - - if (key < re->key) { - p = &(*p)->rb_left; - } else { - p = &(*p)->rb_right; - *leftmost = false; - } - } - - return p; -} - struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, struct rb_root_cached *root, struct rb_node **parent, @@ -335,7 +312,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, }
bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, - struct rb_root_cached *root, bool check_key) + struct rb_root_cached *root) { #ifdef CONFIG_F2FS_CHECK_FS struct rb_node *cur = rb_first_cached(root), *next; @@ -352,23 +329,12 @@ bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, cur_re = rb_entry(cur, struct rb_entry, rb_node); next_re = rb_entry(next, struct rb_entry, rb_node);
- if (check_key) { - if (cur_re->key > next_re->key) { - f2fs_info(sbi, "inconsistent rbtree, " - "cur(%llu) next(%llu)", - cur_re->key, next_re->key); - return false; - } - goto next; - } - if (cur_re->ofs + cur_re->len > next_re->ofs) { f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)", cur_re->ofs, cur_re->len, next_re->ofs, next_re->len); return false; } -next: cur = next; } #endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9c3ddebd28e3..9396549e112d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -630,13 +630,8 @@ enum extent_type {
struct rb_entry { struct rb_node rb_node; /* rb node located in rb-tree */ - union { - struct { - unsigned int ofs; /* start offset of the entry */ - unsigned int len; /* length of the entry */ - }; - unsigned long long key; /* 64-bits key */ - } __packed; + unsigned int ofs; /* start offset of the entry */ + unsigned int len; /* length of the entry */ };
struct extent_info { @@ -4139,10 +4134,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); bool sanity_check_extent_cache(struct inode *inode); struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root, struct rb_entry *cached_re, unsigned int ofs); -struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi, - struct rb_root_cached *root, - struct rb_node **parent, - unsigned long long key, bool *left_most); struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, struct rb_root_cached *root, struct rb_node **parent, @@ -4153,7 +4144,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, struct rb_node ***insert_p, struct rb_node **insert_parent, bool force, bool *leftmost); bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, - struct rb_root_cached *root, bool check_key); + struct rb_root_cached *root); void f2fs_init_extent_tree(struct inode *inode); void f2fs_drop_extent_tree(struct inode *inode); void f2fs_destroy_extent_node(struct inode *inode); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 292a17d62f56..19a6d0c54581 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -398,8 +398,7 @@ static struct victim_entry *attach_victim_entry(struct f2fs_sb_info *sbi, struct atgc_management *am = &sbi->am; struct victim_entry *ve;
- ve = f2fs_kmem_cache_alloc(victim_entry_slab, - GFP_NOFS, true, NULL); + ve = f2fs_kmem_cache_alloc(victim_entry_slab, GFP_NOFS, true, NULL);
ve->mtime = mtime; ve->segno = segno; @@ -414,6 +413,29 @@ static struct victim_entry *attach_victim_entry(struct f2fs_sb_info *sbi, return ve; }
+static struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi, + struct rb_root_cached *root, + struct rb_node **parent, + unsigned long long mtime, bool *leftmost) +{ + struct rb_node **p = &root->rb_root.rb_node; + struct victim_entry *ve; + + while (*p) { + *parent = *p; + ve = rb_entry(*parent, struct victim_entry, rb_node); + + if (mtime < ve->mtime) { + p = &(*p)->rb_left; + } else { + p = &(*p)->rb_right; + *leftmost = false; + } + } + + return p; +} + static void insert_victim_entry(struct f2fs_sb_info *sbi, unsigned long long mtime, unsigned int segno) { @@ -481,7 +503,6 @@ static void atgc_lookup_victim(struct f2fs_sb_info *sbi, struct atgc_management *am = &sbi->am; struct rb_root_cached *root = &am->root; struct rb_node *node; - struct rb_entry *re; struct victim_entry *ve; unsigned long long total_time; unsigned long long age, u, accu; @@ -508,12 +529,10 @@ static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
node = rb_first_cached(root); next: - re = rb_entry_safe(node, struct rb_entry, rb_node); - if (!re) + ve = rb_entry_safe(node, struct victim_entry, rb_node); + if (!ve) return;
- ve = (struct victim_entry *)re; - if (ve->mtime >= max_mtime || ve->mtime < min_mtime) goto skip;
@@ -556,7 +575,6 @@ static void atssr_lookup_victim(struct f2fs_sb_info *sbi, struct sit_info *sit_i = SIT_I(sbi); struct atgc_management *am = &sbi->am; struct rb_node *node; - struct rb_entry *re; struct victim_entry *ve; unsigned long long age; unsigned long long max_mtime = sit_i->dirty_max_mtime; @@ -576,15 +594,13 @@ static void atssr_lookup_victim(struct f2fs_sb_info *sbi, next_stage: node = lookup_central_victim(sbi, p); next_node: - re = rb_entry_safe(node, struct rb_entry, rb_node); - if (!re) { + ve = rb_entry_safe(node, struct victim_entry, rb_node); + if (!ve) { if (stage == 0) goto skip_stage; return; }
- ve = (struct victim_entry *)re; - if (ve->mtime >= max_mtime || ve->mtime < min_mtime) goto skip_node;
@@ -623,11 +639,41 @@ static void atssr_lookup_victim(struct f2fs_sb_info *sbi, goto next_stage; } } + +static bool f2fs_check_victim_tree(struct f2fs_sb_info *sbi, + struct rb_root_cached *root) +{ +#ifdef CONFIG_F2FS_CHECK_FS + struct rb_node *cur = rb_first_cached(root), *next; + struct victim_entry *cur_ve, *next_ve; + + if (!cur) + return true; + + while (cur) { + next = rb_next(cur); + if (!next) + return true; + + cur_ve = rb_entry(cur, struct victim_entry, rb_node); + next_ve = rb_entry(next, struct victim_entry, rb_node); + + if (cur_ve->mtime > next_ve->mtime) { + f2fs_info(sbi, "broken victim_rbtree, " + "cur_mtime(%llu) next_mtime(%llu)", + cur_ve->mtime, next_ve->mtime); + return false; + } + cur = next; + } +#endif + return true; +} + static void lookup_victim_by_age(struct f2fs_sb_info *sbi, struct victim_sel_policy *p) { - f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, - &sbi->am.root, true)); + f2fs_bug_on(sbi, !f2fs_check_victim_tree(sbi, &sbi->am.root));
if (p->gc_mode == GC_AT) atgc_lookup_victim(sbi, p); diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 15bd1d680f67..5ad6ac63e13f 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -55,20 +55,10 @@ struct gc_inode_list { struct radix_tree_root iroot; };
-struct victim_info { - unsigned long long mtime; /* mtime of section */ - unsigned int segno; /* section No. */ -}; - struct victim_entry { struct rb_node rb_node; /* rb node located in rb-tree */ - union { - struct { - unsigned long long mtime; /* mtime of section */ - unsigned int segno; /* segment No. */ - }; - struct victim_info vi; /* victim info */ - }; + unsigned long long mtime; /* mtime of section */ + unsigned int segno; /* segment No. */ struct list_head list; };
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 227e25836173..e98a12e8dca1 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1478,7 +1478,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, goto next; if (unlikely(dcc->rbtree_check)) f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, - &dcc->root, false)); + &dcc->root)); blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); @@ -2965,7 +2965,7 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, mutex_lock(&dcc->cmd_lock); if (unlikely(dcc->rbtree_check)) f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, - &dcc->root, false)); + &dcc->root));
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, NULL, start,
This is a second part to remove the mixed use of rb_tree in discard_cmd from extent_cache.
This should also fix arm32 memory alignment issue caused by shared rb_entry.
[struct discard_cmd] [struct rb_entry] [0] struct rb_node rb_node; [0] struct rb_node rb_node; union { union { struct { struct { [16] block_t lstart; [12] unsigned int ofs; block_t len; unsigned int len; }; unsigned long long key; } __packed;
Cc: stable@vger.kernel.org Fixes: 004b68621897 ("f2fs: use rb-tree to track pending discard commands") Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org --- fs/f2fs/extent_cache.c | 36 +----- fs/f2fs/f2fs.h | 23 +--- fs/f2fs/segment.c | 255 +++++++++++++++++++++++++++-------------- 3 files changed, 172 insertions(+), 142 deletions(-)
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index d1aa4609ca6b..5c206f941aac 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -192,7 +192,7 @@ static struct rb_entry *__lookup_rb_tree_slow(struct rb_root_cached *root, return NULL; }
-struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root, +static struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root, struct rb_entry *cached_re, unsigned int ofs) { struct rb_entry *re; @@ -204,7 +204,7 @@ struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root, return re; }
-struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, +static struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, struct rb_root_cached *root, struct rb_node **parent, unsigned int ofs, bool *leftmost) @@ -238,7 +238,7 @@ struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, * in order to simplify the insertion after. * tree must stay unchanged between lookup and insertion. */ -struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, +static struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, struct rb_entry *cached_re, unsigned int ofs, struct rb_entry **prev_entry, @@ -311,36 +311,6 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, return re; }
-bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, - struct rb_root_cached *root) -{ -#ifdef CONFIG_F2FS_CHECK_FS - struct rb_node *cur = rb_first_cached(root), *next; - struct rb_entry *cur_re, *next_re; - - if (!cur) - return true; - - while (cur) { - next = rb_next(cur); - if (!next) - return true; - - cur_re = rb_entry(cur, struct rb_entry, rb_node); - next_re = rb_entry(next, struct rb_entry, rb_node); - - if (cur_re->ofs + cur_re->len > next_re->ofs) { - f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)", - cur_re->ofs, cur_re->len, - next_re->ofs, next_re->len); - return false; - } - cur = next; - } -#endif - return true; -} - static struct kmem_cache *extent_tree_slab; static struct kmem_cache *extent_node_slab;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9396549e112d..6e04fea9c34f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -353,15 +353,7 @@ struct discard_info {
struct discard_cmd { struct rb_node rb_node; /* rb node located in rb-tree */ - union { - struct { - block_t lstart; /* logical start address */ - block_t len; /* length */ - block_t start; /* actual start address in dev */ - }; - struct discard_info di; /* discard info */ - - }; + struct discard_info di; /* discard info */ struct list_head list; /* command list */ struct completion wait; /* compleation */ struct block_device *bdev; /* bdev */ @@ -4132,19 +4124,6 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); * extent_cache.c */ bool sanity_check_extent_cache(struct inode *inode); -struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root, - struct rb_entry *cached_re, unsigned int ofs); -struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, - struct rb_root_cached *root, - struct rb_node **parent, - unsigned int ofs, bool *leftmost); -struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, - struct rb_entry *cached_re, unsigned int ofs, - struct rb_entry **prev_entry, struct rb_entry **next_entry, - struct rb_node ***insert_p, struct rb_node **insert_parent, - bool force, bool *leftmost); -bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, - struct rb_root_cached *root); void f2fs_init_extent_tree(struct inode *inode); void f2fs_drop_extent_tree(struct inode *inode); void f2fs_destroy_extent_node(struct inode *inode); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e98a12e8dca1..961f5b149ee4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -933,9 +933,9 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL); INIT_LIST_HEAD(&dc->list); dc->bdev = bdev; - dc->lstart = lstart; - dc->start = start; - dc->len = len; + dc->di.lstart = lstart; + dc->di.start = start; + dc->di.len = len; dc->ref = 0; dc->state = D_PREP; dc->queued = 0; @@ -950,20 +950,111 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, return dc; }
-static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, - struct block_device *bdev, block_t lstart, - block_t start, block_t len, - struct rb_node *parent, struct rb_node **p, - bool leftmost) +static bool f2fs_check_discard_tree(struct f2fs_sb_info *sbi) +{ +#ifdef CONFIG_F2FS_CHECK_FS + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct rb_node *cur = rb_first_cached(&dcc->root), *next; + struct discard_cmd *cur_dc, *next_dc; + + if (!cur) + return true; + + while (cur) { + next = rb_next(cur); + if (!next) + return true; + + cur_dc = rb_entry(cur, struct discard_cmd, rb_node); + next_dc = rb_entry(next, struct discard_cmd, rb_node); + + if (cur_dc->di.lstart + cur_dc->di.len > next_dc->di.lstart) { + f2fs_info(sbi, "broken discard_rbtree, " + "cur(%u, %u) next(%u, %u)", + cur_dc->di.lstart, cur_dc->di.len, + next_dc->di.lstart, next_dc->di.len); + return false; + } + cur = next; + } +#endif + return true; +} + +static struct discard_cmd *__lookup_discard_cmd(struct f2fs_sb_info *sbi, + block_t blkaddr) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct rb_node *node = dcc->root.rb_root.rb_node; struct discard_cmd *dc;
- dc = __create_discard_cmd(sbi, bdev, lstart, start, len); + while (node) { + dc = rb_entry(node, struct discard_cmd, rb_node);
- rb_link_node(&dc->rb_node, parent, p); - rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost); + if (blkaddr < dc->di.lstart) + node = node->rb_left; + else if (blkaddr >= dc->di.lstart + dc->di.len) + node = node->rb_right; + else + return dc; + } + return NULL; +} + +static struct discard_cmd *__lookup_discard_cmd_ret(struct rb_root_cached *root, + block_t blkaddr, + struct discard_cmd **prev_entry, + struct discard_cmd **next_entry, + struct rb_node ***insert_p, + struct rb_node **insert_parent) +{ + struct rb_node **pnode = &root->rb_root.rb_node; + struct rb_node *parent = NULL, *tmp_node; + struct discard_cmd *dc; + + *insert_p = NULL; + *insert_parent = NULL; + *prev_entry = NULL; + *next_entry = NULL; + + if (RB_EMPTY_ROOT(&root->rb_root)) + return NULL; + + while (*pnode) { + parent = *pnode; + dc = rb_entry(*pnode, struct discard_cmd, rb_node); + + if (blkaddr < dc->di.lstart) + pnode = &(*pnode)->rb_left; + else if (blkaddr >= dc->di.lstart + dc->di.len) + pnode = &(*pnode)->rb_right; + else + goto lookup_neighbors; + }
+ *insert_p = pnode; + *insert_parent = parent; + + dc = rb_entry(parent, struct discard_cmd, rb_node); + tmp_node = parent; + if (parent && blkaddr > dc->di.lstart) + tmp_node = rb_next(parent); + *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node); + + tmp_node = parent; + if (parent && blkaddr < dc->di.lstart) + tmp_node = rb_prev(parent); + *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node); + return NULL; + +lookup_neighbors: + /* lookup prev node for merging backward later */ + tmp_node = rb_prev(&dc->rb_node); + *prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node); + + /* lookup next node for merging frontward later */ + tmp_node = rb_next(&dc->rb_node); + *next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node); return dc; }
@@ -975,7 +1066,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc,
list_del(&dc->list); rb_erase_cached(&dc->rb_node, &dcc->root); - dcc->undiscard_blks -= dc->len; + dcc->undiscard_blks -= dc->di.len;
kmem_cache_free(discard_cmd_slab, dc);
@@ -988,7 +1079,7 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; unsigned long flags;
- trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len); + trace_f2fs_remove_discard(dc->bdev, dc->di.start, dc->di.len);
spin_lock_irqsave(&dc->lock, flags); if (dc->bio_ref) { @@ -1006,7 +1097,7 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, printk_ratelimited( "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d", KERN_INFO, sbi->sb->s_id, - dc->lstart, dc->start, dc->len, dc->error); + dc->di.lstart, dc->di.start, dc->di.len, dc->error); __detach_discard_cmd(dcc, dc); }
@@ -1122,14 +1213,14 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) return 0;
- trace_f2fs_issue_discard(bdev, dc->start, dc->len); + trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len);
- lstart = dc->lstart; - start = dc->start; - len = dc->len; + lstart = dc->di.lstart; + start = dc->di.start; + len = dc->di.len; total_len = len;
- dc->len = 0; + dc->di.len = 0;
while (total_len && *issued < dpolicy->max_requests && !err) { struct bio *bio = NULL; @@ -1145,7 +1236,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, if (*issued == dpolicy->max_requests) last = true;
- dc->len += len; + dc->di.len += len;
if (time_to_inject(sbi, FAULT_DISCARD)) { err = -EIO; @@ -1207,34 +1298,41 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, return err; }
-static void __insert_discard_tree(struct f2fs_sb_info *sbi, +static void __insert_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, - block_t start, block_t len, - struct rb_node **insert_p, - struct rb_node *insert_parent) + block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct rb_node **p; + struct rb_node **p = &dcc->root.rb_root.rb_node; struct rb_node *parent = NULL; + struct discard_cmd *dc; bool leftmost = true;
- if (insert_p && insert_parent) { - parent = insert_parent; - p = insert_p; - goto do_insert; + /* look up rb tree to find parent node */ + while (*p) { + parent = *p; + dc = rb_entry(parent, struct discard_cmd, rb_node); + + if (lstart < dc->di.lstart) { + p = &(*p)->rb_left; + } else if (lstart >= dc->di.lstart + dc->di.len) { + p = &(*p)->rb_right; + leftmost = false; + } else { + f2fs_bug_on(sbi, 1); + } }
- p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, - lstart, &leftmost); -do_insert: - __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, - p, leftmost); + dc = __create_discard_cmd(sbi, bdev, lstart, start, len); + + rb_link_node(&dc->rb_node, parent, p); + rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost); }
static void __relocate_discard_cmd(struct discard_cmd_control *dcc, struct discard_cmd *dc) { - list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]); + list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->di.len)]); }
static void __punch_discard_cmd(struct f2fs_sb_info *sbi, @@ -1244,7 +1342,7 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, struct discard_info di = dc->di; bool modified = false;
- if (dc->state == D_DONE || dc->len == 1) { + if (dc->state == D_DONE || dc->di.len == 1) { __remove_discard_cmd(sbi, dc); return; } @@ -1252,23 +1350,22 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, dcc->undiscard_blks -= di.len;
if (blkaddr > di.lstart) { - dc->len = blkaddr - dc->lstart; - dcc->undiscard_blks += dc->len; + dc->di.len = blkaddr - dc->di.lstart; + dcc->undiscard_blks += dc->di.len; __relocate_discard_cmd(dcc, dc); modified = true; }
if (blkaddr < di.lstart + di.len - 1) { if (modified) { - __insert_discard_tree(sbi, dc->bdev, blkaddr + 1, + __insert_discard_cmd(sbi, dc->bdev, blkaddr + 1, di.start + blkaddr + 1 - di.lstart, - di.lstart + di.len - 1 - blkaddr, - NULL, NULL); + di.lstart + di.len - 1 - blkaddr); } else { - dc->lstart++; - dc->len--; - dc->start++; - dcc->undiscard_blks += dc->len; + dc->di.lstart++; + dc->di.len--; + dc->di.start++; + dcc->undiscard_blks += dc->di.len; __relocate_discard_cmd(dcc, dc); } } @@ -1287,37 +1384,33 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev)); block_t end = lstart + len;
- dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, - NULL, lstart, - (struct rb_entry **)&prev_dc, - (struct rb_entry **)&next_dc, - &insert_p, &insert_parent, true, NULL); + dc = __lookup_discard_cmd_ret(&dcc->root, lstart, + &prev_dc, &next_dc, &insert_p, &insert_parent); if (dc) prev_dc = dc;
if (!prev_dc) { di.lstart = lstart; - di.len = next_dc ? next_dc->lstart - lstart : len; + di.len = next_dc ? next_dc->di.lstart - lstart : len; di.len = min(di.len, len); di.start = start; }
while (1) { struct rb_node *node; - bool merged = false; struct discard_cmd *tdc = NULL;
if (prev_dc) { - di.lstart = prev_dc->lstart + prev_dc->len; + di.lstart = prev_dc->di.lstart + prev_dc->di.len; if (di.lstart < lstart) di.lstart = lstart; if (di.lstart >= end) break;
- if (!next_dc || next_dc->lstart > end) + if (!next_dc || next_dc->di.lstart > end) di.len = end - di.lstart; else - di.len = next_dc->lstart - di.lstart; + di.len = next_dc->di.lstart - di.lstart; di.start = start + di.lstart - lstart; }
@@ -1333,7 +1426,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, __relocate_discard_cmd(dcc, prev_dc); di = prev_dc->di; tdc = prev_dc; - merged = true; + goto next; }
if (next_dc && next_dc->state == D_PREP && @@ -1347,13 +1440,10 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, __relocate_discard_cmd(dcc, next_dc); if (tdc) __remove_discard_cmd(sbi, tdc); - merged = true; + goto next; }
- if (!merged) { - __insert_discard_tree(sbi, bdev, di.lstart, di.start, - di.len, NULL, NULL); - } + __insert_discard_cmd(sbi, bdev, di.lstart, di.start, di.len); next: prev_dc = next_dc; if (!prev_dc) @@ -1392,15 +1482,11 @@ static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, struct rb_node **insert_p = NULL, *insert_parent = NULL; struct discard_cmd *dc; struct blk_plug plug; - unsigned int pos = dcc->next_pos; bool io_interrupted = false;
mutex_lock(&dcc->cmd_lock); - dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, - NULL, pos, - (struct rb_entry **)&prev_dc, - (struct rb_entry **)&next_dc, - &insert_p, &insert_parent, true, NULL); + dc = __lookup_discard_cmd_ret(&dcc->root, dcc->next_pos, + &prev_dc, &next_dc, &insert_p, &insert_parent); if (!dc) dc = next_dc;
@@ -1418,7 +1504,7 @@ static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, break; }
- dcc->next_pos = dc->lstart + dc->len; + dcc->next_pos = dc->di.lstart + dc->di.len; err = __submit_discard_cmd(sbi, dpolicy, dc, issued);
if (*issued >= dpolicy->max_requests) @@ -1477,8 +1563,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (list_empty(pend_list)) goto next; if (unlikely(dcc->rbtree_check)) - f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, - &dcc->root)); + f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi)); blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); @@ -1556,7 +1641,7 @@ static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi, dc->ref--; if (!dc->ref) { if (!dc->error) - len = dc->len; + len = dc->di.len; __remove_discard_cmd(sbi, dc); } mutex_unlock(&dcc->cmd_lock); @@ -1579,14 +1664,15 @@ static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(iter, tmp, wait_list, list) { - if (iter->lstart + iter->len <= start || end <= iter->lstart) + if (iter->di.lstart + iter->di.len <= start || + end <= iter->di.lstart) continue; - if (iter->len < dpolicy->granularity) + if (iter->di.len < dpolicy->granularity) continue; if (iter->state == D_DONE && !iter->ref) { wait_for_completion_io(&iter->wait); if (!iter->error) - trimmed += iter->len; + trimmed += iter->di.len; __remove_discard_cmd(sbi, iter); } else { iter->ref++; @@ -1630,8 +1716,7 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) bool need_wait = false;
mutex_lock(&dcc->cmd_lock); - dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root, - NULL, blkaddr); + dc = __lookup_discard_cmd(sbi, blkaddr); if (dc) { if (dc->state == D_PREP) { __punch_discard_cmd(sbi, dc, blkaddr); @@ -2964,24 +3049,20 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
mutex_lock(&dcc->cmd_lock); if (unlikely(dcc->rbtree_check)) - f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, - &dcc->root)); - - dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, - NULL, start, - (struct rb_entry **)&prev_dc, - (struct rb_entry **)&next_dc, - &insert_p, &insert_parent, true, NULL); + f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi)); + + dc = __lookup_discard_cmd_ret(&dcc->root, start, + &prev_dc, &next_dc, &insert_p, &insert_parent); if (!dc) dc = next_dc;
blk_start_plug(&plug);
- while (dc && dc->lstart <= end) { + while (dc && dc->di.lstart <= end) { struct rb_node *node; int err = 0;
- if (dc->len < dpolicy->granularity) + if (dc->di.len < dpolicy->granularity) goto skip;
if (dc->state != D_PREP) { @@ -2992,7 +3073,7 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
if (issued >= dpolicy->max_requests) { - start = dc->lstart + dc->len; + start = dc->di.lstart + dc->di.len;
if (err) __remove_discard_cmd(sbi, dc);
On Fri, Mar 10, 2023 at 01:04:53PM -0800, Jaegeuk Kim wrote:
+static bool f2fs_check_discard_tree(struct f2fs_sb_info *sbi) +{ +#ifdef CONFIG_F2FS_CHECK_FS
- struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
- struct rb_node *cur = rb_first_cached(&dcc->root), *next;
- struct discard_cmd *cur_dc, *next_dc;
- if (!cur)
return true;
- while (cur) {
The !cur check is redundant here.
- Eric
This is a last part to remove the memory sharing for rb_tree in extent_cache.
This should also fix arm32 memory alignment issue.
[struct extent_node] [struct rb_entry] [0] struct rb_node rb_node; [0] struct rb_node rb_node; union { union { struct { struct { [16] unsigned int fofs; [12] unsigned int ofs; unsigned int len; unsigned int len; }; unsigned long long key; } __packed;
Cc: stable@vger.kernel.org Fixes: 13054c548a1c ("f2fs: introduce infra macro and data structure of rb-tree extent cache") Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org --- fs/f2fs/extent_cache.c | 177 +++++++++++++++++------------------------ fs/f2fs/f2fs.h | 6 -- 2 files changed, 71 insertions(+), 112 deletions(-)
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 5c206f941aac..9a8153895d20 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -161,95 +161,52 @@ static bool __is_front_mergeable(struct extent_info *cur, return __is_extent_mergeable(cur, front, type); }
-static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, - unsigned int ofs) -{ - if (cached_re) { - if (cached_re->ofs <= ofs && - cached_re->ofs + cached_re->len > ofs) { - return cached_re; - } - } - return NULL; -} - -static struct rb_entry *__lookup_rb_tree_slow(struct rb_root_cached *root, - unsigned int ofs) +static struct extent_node *__lookup_extent_node(struct rb_root_cached *root, + struct extent_node *cached_en, unsigned int fofs) { struct rb_node *node = root->rb_root.rb_node; - struct rb_entry *re; + struct extent_node *en; + + /* check a cached entry */ + if (cached_en && cached_en->ei.fofs <= fofs && + cached_en->ei.fofs + cached_en->ei.len > fofs) + return cached_en;
+ /* check rb_tree */ while (node) { - re = rb_entry(node, struct rb_entry, rb_node); + en = rb_entry(node, struct extent_node, rb_node);
- if (ofs < re->ofs) + if (fofs < en->ei.fofs) node = node->rb_left; - else if (ofs >= re->ofs + re->len) + else if (fofs >= en->ei.fofs + en->ei.len) node = node->rb_right; else - return re; + return en; } return NULL; }
-static struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root, - struct rb_entry *cached_re, unsigned int ofs) -{ - struct rb_entry *re; - - re = __lookup_rb_tree_fast(cached_re, ofs); - if (!re) - return __lookup_rb_tree_slow(root, ofs); - - return re; -} - -static struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, - struct rb_root_cached *root, - struct rb_node **parent, - unsigned int ofs, bool *leftmost) -{ - struct rb_node **p = &root->rb_root.rb_node; - struct rb_entry *re; - - while (*p) { - *parent = *p; - re = rb_entry(*parent, struct rb_entry, rb_node); - - if (ofs < re->ofs) { - p = &(*p)->rb_left; - } else if (ofs >= re->ofs + re->len) { - p = &(*p)->rb_right; - *leftmost = false; - } else { - f2fs_bug_on(sbi, 1); - } - } - - return p; -} - /* - * lookup rb entry in position of @ofs in rb-tree, + * lookup rb entry in position of @fofs in rb-tree, * if hit, return the entry, otherwise, return NULL - * @prev_ex: extent before ofs - * @next_ex: extent after ofs - * @insert_p: insert point for new extent at ofs + * @prev_ex: extent before fofs + * @next_ex: extent after fofs + * @insert_p: insert point for new extent at fofs * in order to simplify the insertion after. * tree must stay unchanged between lookup and insertion. */ -static struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, - struct rb_entry *cached_re, - unsigned int ofs, - struct rb_entry **prev_entry, - struct rb_entry **next_entry, +static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root, + struct extent_node *cached_en, + unsigned int fofs, + struct extent_node **prev_entry, + struct extent_node **next_entry, struct rb_node ***insert_p, struct rb_node **insert_parent, - bool force, bool *leftmost) + bool *leftmost) { struct rb_node **pnode = &root->rb_root.rb_node; struct rb_node *parent = NULL, *tmp_node; - struct rb_entry *re = cached_re; + struct extent_node *en = cached_en;
*insert_p = NULL; *insert_parent = NULL; @@ -259,24 +216,20 @@ static struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, if (RB_EMPTY_ROOT(&root->rb_root)) return NULL;
- if (re) { - if (re->ofs <= ofs && re->ofs + re->len > ofs) - goto lookup_neighbors; - } + if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs) + goto lookup_neighbors;
- if (leftmost) - *leftmost = true; + *leftmost = true;
while (*pnode) { parent = *pnode; - re = rb_entry(*pnode, struct rb_entry, rb_node); + en = rb_entry(*pnode, struct extent_node, rb_node);
- if (ofs < re->ofs) { + if (fofs < en->ei.fofs) { pnode = &(*pnode)->rb_left; - } else if (ofs >= re->ofs + re->len) { + } else if (fofs >= en->ei.fofs + en->ei.len) { pnode = &(*pnode)->rb_right; - if (leftmost) - *leftmost = false; + *leftmost = false; } else { goto lookup_neighbors; } @@ -285,30 +238,32 @@ static struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root, *insert_p = pnode; *insert_parent = parent;
- re = rb_entry(parent, struct rb_entry, rb_node); + en = rb_entry(parent, struct extent_node, rb_node); tmp_node = parent; - if (parent && ofs > re->ofs) + if (parent && fofs > en->ei.fofs) tmp_node = rb_next(parent); - *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + *next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
tmp_node = parent; - if (parent && ofs < re->ofs) + if (parent && fofs < en->ei.fofs) tmp_node = rb_prev(parent); - *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + *prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node); return NULL;
lookup_neighbors: - if (ofs == re->ofs || force) { + if (fofs == en->ei.fofs) { /* lookup prev node for merging backward later */ - tmp_node = rb_prev(&re->rb_node); - *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + tmp_node = rb_prev(&en->rb_node); + *prev_entry = rb_entry_safe(tmp_node, + struct extent_node, rb_node); } - if (ofs == re->ofs + re->len - 1 || force) { + if (fofs == en->ei.fofs + en->ei.len - 1) { /* lookup next node for merging frontward later */ - tmp_node = rb_next(&re->rb_node); - *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + tmp_node = rb_next(&en->rb_node); + *next_entry = rb_entry_safe(tmp_node, + struct extent_node, rb_node); } - return re; + return en; }
static struct kmem_cache *extent_tree_slab; @@ -523,8 +478,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, goto out; }
- en = (struct extent_node *)f2fs_lookup_rb_tree(&et->root, - (struct rb_entry *)et->cached_en, pgofs); + en = __lookup_extent_node(&et->root, et->cached_en, pgofs); if (!en) goto out;
@@ -598,7 +552,7 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, bool leftmost) { struct extent_tree_info *eti = &sbi->extent_tree[et->type]; - struct rb_node **p; + struct rb_node **p = &et->root.rb_root.rb_node; struct rb_node *parent = NULL; struct extent_node *en = NULL;
@@ -610,8 +564,21 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
leftmost = true;
- p = f2fs_lookup_rb_tree_for_insert(sbi, &et->root, &parent, - ei->fofs, &leftmost); + /* look up extent_node in the rb tree */ + while (*p) { + parent = *p; + en = rb_entry(parent, struct extent_node, rb_node); + + if (ei->fofs < en->ei.fofs) { + p = &(*p)->rb_left; + } else if (ei->fofs >= en->ei.fofs + en->ei.len) { + p = &(*p)->rb_right; + leftmost = false; + } else { + f2fs_bug_on(sbi, 1); + } + } + do_insert: en = __attach_extent_node(sbi, et, ei, parent, p, leftmost); if (!en) @@ -670,11 +637,10 @@ static void __update_extent_tree_range(struct inode *inode, }
/* 1. lookup first extent node in range [fofs, fofs + len - 1] */ - en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, - (struct rb_entry *)et->cached_en, fofs, - (struct rb_entry **)&prev_en, - (struct rb_entry **)&next_en, - &insert_p, &insert_parent, false, + en = __lookup_extent_node_ret(&et->root, + et->cached_en, fofs, + &prev_en, &next_en, + &insert_p, &insert_parent, &leftmost); if (!en) en = next_en; @@ -812,12 +778,11 @@ void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
write_lock(&et->lock);
- en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root, - (struct rb_entry *)et->cached_en, fofs, - (struct rb_entry **)&prev_en, - (struct rb_entry **)&next_en, - &insert_p, &insert_parent, false, - &leftmost); + en = __lookup_extent_node_ret(&et->root, + et->cached_en, fofs, + &prev_en, &next_en, + &insert_p, &insert_parent, + &leftmost); if (en) goto unlock_out;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6e04fea9c34f..90a67feddcdc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -620,12 +620,6 @@ enum extent_type { NR_EXTENT_CACHES, };
-struct rb_entry { - struct rb_node rb_node; /* rb node located in rb-tree */ - unsigned int ofs; /* start offset of the entry */ - unsigned int len; /* length of the entry */ -}; - struct extent_info { unsigned int fofs; /* start offset in a file */ unsigned int len; /* length of the extent */
On Fri, Mar 10, 2023 at 01:04:52PM -0800, Jaegeuk Kim wrote:
Let's reduce the complexity of mixed use of rb_tree in victim_entry from extent_cache and discard_cmd.
This should fix arm32 memory alignment issue caused by shared rb_entry.
[struct victim_entry] [struct rb_entry] [0] struct rb_node rb_node; [0] struct rb_node rb_node; union { struct { unsigned int ofs; unsigned int len; }; [16] unsigned long long mtime; [12] unsigned long long key; } __packed;
Cc: stable@vger.kernel.org Fixes: 093749e296e2 ("f2fs: support age threshold based garbage collection") Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org
Thanks for fixing this properly. It looks much better than the weird type punning that was being done before...
+static struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
struct rb_root_cached *root,
struct rb_node **parent,
unsigned long long mtime, bool *leftmost)
Call this f2fs_lookup_victim_entry()?
+static bool f2fs_check_victim_tree(struct f2fs_sb_info *sbi,
struct rb_root_cached *root)
+{ +#ifdef CONFIG_F2FS_CHECK_FS
- struct rb_node *cur = rb_first_cached(root), *next;
- struct victim_entry *cur_ve, *next_ve;
- if (!cur)
return true;
- while (cur) {
The !cur check is redundant.
- Eric
Hello:
This series was applied to jaegeuk/f2fs.git (dev) by Jaegeuk Kim jaegeuk@kernel.org:
On Fri, 10 Mar 2023 13:04:52 -0800 you wrote:
Let's reduce the complexity of mixed use of rb_tree in victim_entry from extent_cache and discard_cmd.
This should fix arm32 memory alignment issue caused by shared rb_entry.
[struct victim_entry] [struct rb_entry] [0] struct rb_node rb_node; [0] struct rb_node rb_node; union { struct { unsigned int ofs; unsigned int len; }; [16] unsigned long long mtime; [12] unsigned long long key; } __packed;
[...]
Here is the summary with links: - [f2fs-dev,1/3] f2fs: factor out victim_entry usage from general rb_tree use (no matching commit) - [f2fs-dev,2/3] f2fs: factor out discard_cmd usage from general rb_tree use (no matching commit) - [f2fs-dev,3/3] f2fs: remove entire rb_entry sharing https://git.kernel.org/jaegeuk/f2fs/c/6b40bc364c10
You are awesome, thank you!
linux-stable-mirror@lists.linaro.org