4.4-stable review patch. If anyone has any objections, please let me know.
------------------
From: Jaegeuk Kim jaegeuk@kernel.org
commit a51311938e14c17f5a94d30baac9d7bec71f5858 upstream.
There was a subtle bug on nat cache management which incurs wrong nid allocation or wrong block addresses when try_to_free_nats is triggered heavily. This patch enlarges the previous coverage of nat_tree_lock to avoid data race.
Signed-off-by: Jaegeuk Kim jaegeuk@kernel.org Signed-off-by: Ben Hutchings ben.hutchings@codethink.co.uk Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- fs/f2fs/node.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-)
--- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -261,13 +261,11 @@ static void cache_nat_entry(struct f2fs_ { struct nat_entry *e;
- down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) { e = grab_nat_entry(nm_i, nid); node_info_from_raw_nat(&e->ni, ne); } - up_write(&nm_i->nat_tree_lock); }
static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, @@ -379,6 +377,8 @@ void get_node_info(struct f2fs_sb_info *
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
+ down_write(&nm_i->nat_tree_lock); + /* Check current segment summary */ mutex_lock(&curseg->curseg_mutex); i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0); @@ -399,6 +399,7 @@ void get_node_info(struct f2fs_sb_info * cache: /* cache nat entry */ cache_nat_entry(NM_I(sbi), nid, &ne); + up_write(&nm_i->nat_tree_lock); }
/* @@ -1440,13 +1441,10 @@ static int add_free_nid(struct f2fs_sb_i
if (build) { /* do not add allocated nids */ - down_read(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); - if (ne && - (!get_nat_flag(ne, IS_CHECKPOINTED) || + if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR)) allocated = true; - up_read(&nm_i->nat_tree_lock); if (allocated) return 0; } @@ -1532,6 +1530,8 @@ static void build_free_nids(struct f2fs_ ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true);
+ down_read(&nm_i->nat_tree_lock); + while (1) { struct page *page = get_current_nat_page(sbi, nid);
@@ -1560,6 +1560,7 @@ static void build_free_nids(struct f2fs_ remove_free_nid(nm_i, nid); } mutex_unlock(&curseg->curseg_mutex); + up_read(&nm_i->nat_tree_lock);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); @@ -1842,14 +1843,12 @@ static void remove_nats_in_journal(struc
raw_ne = nat_in_journal(sum, i);
- down_write(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); if (!ne) { ne = grab_nat_entry(nm_i, nid); node_info_from_raw_nat(&ne->ni, &raw_ne); } __set_nat_cache_dirty(nm_i, ne); - up_write(&nm_i->nat_tree_lock); } update_nats_in_cursum(sum, -i); mutex_unlock(&curseg->curseg_mutex); @@ -1883,7 +1882,6 @@ static void __flush_nat_entry_set(struct struct f2fs_nat_block *nat_blk; struct nat_entry *ne, *cur; struct page *page = NULL; - struct f2fs_nm_info *nm_i = NM_I(sbi);
/* * there are two steps to flush nat entries: @@ -1920,12 +1918,8 @@ static void __flush_nat_entry_set(struct raw_ne = &nat_blk->entries[nid - start_nid]; } raw_nat_from_node_info(raw_ne, &ne->ni); - - down_write(&NM_I(sbi)->nat_tree_lock); nat_reset_flag(ne); __clear_nat_cache_dirty(NM_I(sbi), ne); - up_write(&NM_I(sbi)->nat_tree_lock); - if (nat_get_blkaddr(ne) == NULL_ADDR) add_free_nid(sbi, nid, false); } @@ -1937,9 +1931,7 @@ static void __flush_nat_entry_set(struct
f2fs_bug_on(sbi, set->entry_cnt);
- down_write(&nm_i->nat_tree_lock); radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); - up_write(&nm_i->nat_tree_lock); kmem_cache_free(nat_entry_set_slab, set); }
@@ -1959,6 +1951,9 @@ void flush_nat_entries(struct f2fs_sb_in
if (!nm_i->dirty_nat_cnt) return; + + down_write(&nm_i->nat_tree_lock); + /* * if there are no enough space in journal to store dirty nat * entries, remove all entries from journal and merge them @@ -1967,7 +1962,6 @@ void flush_nat_entries(struct f2fs_sb_in if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) remove_nats_in_journal(sbi);
- down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_set(nm_i, set_idx, SETVEC_SIZE, setvec))) { unsigned idx; @@ -1976,12 +1970,13 @@ void flush_nat_entries(struct f2fs_sb_in __adjust_nat_entry_set(setvec[idx], &sets, MAX_NAT_JENTRIES(sum)); } - up_write(&nm_i->nat_tree_lock);
/* flush dirty nats in nat entry set */ list_for_each_entry_safe(set, tmp, &sets, set_list) __flush_nat_entry_set(sbi, set);
+ up_write(&nm_i->nat_tree_lock); + f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); }