April 2022 - Linux-stable-mirror

FAILED: patch "[PATCH] gfs2: gfs2_setattr_size error path fix" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 7336905a89f19173bf9301cd50a24421162f417c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher <agruenba(a)redhat.com> Date: Fri, 10 Dec 2021 14:43:36 +0100 Subject: [PATCH] gfs2: gfs2_setattr_size error path fix When gfs2_setattr_size() fails, it calls gfs2_rs_delete(ip, NULL) to get rid of any reservations the inode may have. Instead, it should pass in the inode's write count as the second parameter to allow gfs2_rs_delete() to figure out if the inode has any writers left. In a next step, there are two instances of gfs2_rs_delete(ip, NULL) left where we know that there can be no other users of the inode. Replace those with gfs2_rs_deltree(&ip->i_res) to avoid the unnecessary write count check. With that, gfs2_rs_delete() is only called with the inode's actual write count, so get rid of the second parameter. Fixes: a097dc7e24cb ("GFS2: Make rgrp reservations part of the gfs2_inode structure") Signed-off-by: Andreas Gruenbacher <agruenba(a)redhat.com> diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index d67108489148..fbdb7a30470a 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -2146,7 +2146,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) ret = do_shrink(inode, newsize); out: - gfs2_rs_delete(ip, NULL); + gfs2_rs_delete(ip); gfs2_qa_put(ip); return ret; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 8c39a8571b1f..5bac4f6e8e05 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -706,7 +706,7 @@ static int gfs2_release(struct inode *inode, struct file *file) if (file->f_mode & FMODE_WRITE) { if (gfs2_rs_active(&ip->i_res)) - gfs2_rs_delete(ip, &inode->i_writecount); + gfs2_rs_delete(ip); gfs2_qa_put(ip); } return 0; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 89905f4f29bb..66a123306aec 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -793,7 +793,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (free_vfs_inode) /* else evict will do the put for us */ gfs2_glock_put(ip->i_gl); } - gfs2_rs_delete(ip, NULL); + gfs2_rs_deltree(&ip->i_res); gfs2_qa_put(ip); fail_free_acls: posix_acl_release(default_acl); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9b04a570c582..9df8a84f85a6 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -680,13 +680,14 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) /** * gfs2_rs_delete - delete a multi-block reservation * @ip: The inode for this reservation - * @wcount: The inode's write count, or NULL * */ -void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) +void gfs2_rs_delete(struct gfs2_inode *ip) { + struct inode *inode = &ip->i_inode; + down_write(&ip->i_rw_mutex); - if ((wcount == NULL) || (atomic_read(wcount) <= 1)) + if (atomic_read(&inode->i_writecount) <= 1) gfs2_rs_deltree(&ip->i_res); up_write(&ip->i_rw_mutex); } diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 3e2ca1fb4305..46dd94e9e085 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -45,7 +45,7 @@ extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, bool dinode, u64 *generation); extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs); -extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount); +extern void gfs2_rs_delete(struct gfs2_inode *ip); extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, u64 bstart, u32 blen, int meta); extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 64c67090f503..143a47359d1b 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1396,7 +1396,7 @@ static void gfs2_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); - gfs2_rs_delete(ip, NULL); + gfs2_rs_deltree(&ip->i_res); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip);

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] rtc: check if __rtc_read_time was successful" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 915593a7a663b2ad08b895a5f3ba8b19d89d4ebf Mon Sep 17 00:00:00 2001 From: Tom Rix <trix(a)redhat.com> Date: Sat, 26 Mar 2022 12:42:36 -0700 Subject: [PATCH] rtc: check if __rtc_read_time was successful Clang static analysis reports this issue interface.c:810:8: warning: Passed-by-value struct argument contains uninitialized data now = rtc_tm_to_ktime(tm); ^~~~~~~~~~~~~~~~~~~ tm is set by a successful call to __rtc_read_time() but its return status is not checked. Check if it was successful before setting the enabled flag. Move the decl of err to function scope. Fixes: 2b2f5ff00f63 ("rtc: interface: ignore expired timers when enqueuing new timers") Signed-off-by: Tom Rix <trix(a)redhat.com> Signed-off-by: Alexandre Belloni <alexandre.belloni(a)bootlin.com> Link: https://lore.kernel.org/r/20220326194236.2916310-1-trix@redhat.com diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index d8e835798153..9edd662c69ac 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -804,9 +804,13 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue); struct rtc_time tm; ktime_t now; + int err; + + err = __rtc_read_time(rtc, &tm); + if (err) + return err; timer->enabled = 1; - __rtc_read_time(rtc, &tm); now = rtc_tm_to_ktime(tm); /* Skip over expired timers */ @@ -820,7 +824,6 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) trace_rtc_timer_enqueue(timer); if (!next || ktime_before(timer->node.expires, next->expires)) { struct rtc_wkalrm alarm; - int err; alarm.time = rtc_ktime_to_tm(timer->node.expires); alarm.enabled = 1;

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] rtc: check if __rtc_read_time was successful" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 915593a7a663b2ad08b895a5f3ba8b19d89d4ebf Mon Sep 17 00:00:00 2001 From: Tom Rix <trix(a)redhat.com> Date: Sat, 26 Mar 2022 12:42:36 -0700 Subject: [PATCH] rtc: check if __rtc_read_time was successful Clang static analysis reports this issue interface.c:810:8: warning: Passed-by-value struct argument contains uninitialized data now = rtc_tm_to_ktime(tm); ^~~~~~~~~~~~~~~~~~~ tm is set by a successful call to __rtc_read_time() but its return status is not checked. Check if it was successful before setting the enabled flag. Move the decl of err to function scope. Fixes: 2b2f5ff00f63 ("rtc: interface: ignore expired timers when enqueuing new timers") Signed-off-by: Tom Rix <trix(a)redhat.com> Signed-off-by: Alexandre Belloni <alexandre.belloni(a)bootlin.com> Link: https://lore.kernel.org/r/20220326194236.2916310-1-trix@redhat.com diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index d8e835798153..9edd662c69ac 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -804,9 +804,13 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue); struct rtc_time tm; ktime_t now; + int err; + + err = __rtc_read_time(rtc, &tm); + if (err) + return err; timer->enabled = 1; - __rtc_read_time(rtc, &tm); now = rtc_tm_to_ktime(tm); /* Skip over expired timers */ @@ -820,7 +824,6 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) trace_rtc_timer_enqueue(timer); if (!next || ktime_before(timer->node.expires, next->expires)) { struct rtc_wkalrm alarm; - int err; alarm.time = rtc_ktime_to_tm(timer->node.expires); alarm.enabled = 1;

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] rtc: check if __rtc_read_time was successful" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 915593a7a663b2ad08b895a5f3ba8b19d89d4ebf Mon Sep 17 00:00:00 2001 From: Tom Rix <trix(a)redhat.com> Date: Sat, 26 Mar 2022 12:42:36 -0700 Subject: [PATCH] rtc: check if __rtc_read_time was successful Clang static analysis reports this issue interface.c:810:8: warning: Passed-by-value struct argument contains uninitialized data now = rtc_tm_to_ktime(tm); ^~~~~~~~~~~~~~~~~~~ tm is set by a successful call to __rtc_read_time() but its return status is not checked. Check if it was successful before setting the enabled flag. Move the decl of err to function scope. Fixes: 2b2f5ff00f63 ("rtc: interface: ignore expired timers when enqueuing new timers") Signed-off-by: Tom Rix <trix(a)redhat.com> Signed-off-by: Alexandre Belloni <alexandre.belloni(a)bootlin.com> Link: https://lore.kernel.org/r/20220326194236.2916310-1-trix@redhat.com diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index d8e835798153..9edd662c69ac 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -804,9 +804,13 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue); struct rtc_time tm; ktime_t now; + int err; + + err = __rtc_read_time(rtc, &tm); + if (err) + return err; timer->enabled = 1; - __rtc_read_time(rtc, &tm); now = rtc_tm_to_ktime(tm); /* Skip over expired timers */ @@ -820,7 +824,6 @@ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) trace_rtc_timer_enqueue(timer); if (!next || ktime_before(timer->node.expires, next->expires)) { struct rtc_wkalrm alarm; - int err; alarm.time = rtc_ktime_to_tm(timer->node.expires); alarm.enabled = 1;

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] ubifs: Fix to add refcount once page is set private" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 3b67db8a6ca83e6ff90b756d3da0c966f61cd37b Mon Sep 17 00:00:00 2001 From: Zhihao Cheng <chengzhihao1(a)huawei.com> Date: Mon, 27 Dec 2021 11:22:41 +0800 Subject: [PATCH] ubifs: Fix to add refcount once page is set private MM defined the rule [1] very clearly that once page was set with PG_private flag, we should increment the refcount in that page, also main flows like pageout(), migrate_page() will assume there is one additional page reference count if page_has_private() returns true. Otherwise, we may get a BUG in page migration: page:0000000080d05b9d refcount:-1 mapcount:0 mapping:000000005f4d82a8 index:0xe2 pfn:0x14c12 aops:ubifs_file_address_operations [ubifs] ino:8f1 dentry name:"f30e" flags: 0x1fffff80002405(locked|uptodate|owner_priv_1|private|node=0| zone=1|lastcpupid=0x1fffff) page dumped because: VM_BUG_ON_PAGE(page_count(page) != 0) ------------[ cut here ]------------ kernel BUG at include/linux/page_ref.h:184! invalid opcode: 0000 [#1] SMP CPU: 3 PID: 38 Comm: kcompactd0 Not tainted 5.15.0-rc5 RIP: 0010:migrate_page_move_mapping+0xac3/0xe70 Call Trace: ubifs_migrate_page+0x22/0xc0 [ubifs] move_to_new_page+0xb4/0x600 migrate_pages+0x1523/0x1cc0 compact_zone+0x8c5/0x14b0 kcompactd+0x2bc/0x560 kthread+0x18c/0x1e0 ret_from_fork+0x1f/0x30 Before the time, we should make clean a concept, what does refcount means in page gotten from grab_cache_page_write_begin(). There are 2 situations: Situation 1: refcount is 3, page is created by __page_cache_alloc. TYPE_A - the write process is using this page TYPE_B - page is assigned to one certain mapping by calling __add_to_page_cache_locked() TYPE_C - page is added into pagevec list corresponding current cpu by calling lru_cache_add() Situation 2: refcount is 2, page is gotten from the mapping's tree TYPE_B - page has been assigned to one certain mapping TYPE_A - the write process is using this page (by calling page_cache_get_speculative()) Filesystem releases one refcount by calling put_page() in xxx_write_end(), the released refcount corresponds to TYPE_A (write task is using it). If there are any processes using a page, page migration process will skip the page by judging whether expected_page_refs() equals to page refcount. The BUG is caused by following process: PA(cpu 0) kcompactd(cpu 1) compact_zone ubifs_write_begin page_a = grab_cache_page_write_begin add_to_page_cache_lru lru_cache_add pagevec_add // put page into cpu 0's pagevec (refcnf = 3, for page creation process) ubifs_write_end SetPagePrivate(page_a) // doesn't increase page count ! unlock_page(page_a) put_page(page_a) // refcnt = 2 [...] PB(cpu 0) filemap_read filemap_get_pages add_to_page_cache_lru lru_cache_add __pagevec_lru_add // traverse all pages in cpu 0's pagevec __pagevec_lru_add_fn SetPageLRU(page_a) isolate_migratepages isolate_migratepages_block get_page_unless_zero(page_a) // refcnt = 3 list_add(page_a, from_list) migrate_pages(from_list) __unmap_and_move move_to_new_page ubifs_migrate_page(page_a) migrate_page_move_mapping expected_page_refs get 3 (migration[1] + mapping[1] + private[1]) release_pages put_page_testzero(page_a) // refcnt = 3 page_ref_freeze // refcnt = 0 page_ref_dec_and_test(0 - 1 = -1) page_ref_unfreeze VM_BUG_ON_PAGE(-1 != 0, page) UBIFS doesn't increase the page refcount after setting private flag, which leads to page migration task believes the page is not used by any other processes, so the page is migrated. This causes concurrent accessing on page refcount between put_page() called by other process(eg. read process calls lru_cache_add) and page_ref_unfreeze() called by migration task. Actually zhangjun has tried to fix this problem [2] by recalculating page refcnt in ubifs_migrate_page(). It's better to follow MM rules [1], because just like Kirill suggested in [2], we need to check all users of page_has_private() helper. Like f2fs does in [3], fix it by adding/deleting refcount when setting/clearing private for a page. BTW, according to [4], we set 'page->private' as 1 because ubifs just simply SetPagePrivate(). And, [5] provided a common helper to set/clear page private, ubifs can use this helper following the example of iomap, afs, btrfs, etc. Jump [6] to find a reproducer. [1] https://lore.kernel.org/lkml/2b19b3c4-2bc4-15fa-15cc-27a13e5c7af1@aol.com [2] https://www.spinics.net/lists/linux-mtd/msg04018.html [3] http://lkml.iu.edu/hypermail/linux/kernel/1903.0/03313.html [4] https://lore.kernel.org/linux-f2fs-devel/20210422154705.GO3596236@casper.in… [5] https://lore.kernel.org/all/20200517214718.468-1-guoqing.jiang@cloud.ionos.… [6] https://bugzilla.kernel.org/show_bug.cgi?id=214961 Fixes: 1e51764a3c2ac0 ("UBIFS: add new flash file system") Signed-off-by: Zhihao Cheng <chengzhihao1(a)huawei.com> Signed-off-by: Richard Weinberger <richard(a)nod.at> diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 5cfa28cd00cd..6b45a037a047 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -570,7 +570,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, } if (!PagePrivate(page)) { - SetPagePrivate(page); + attach_page_private(page, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); __set_page_dirty_nobuffers(page); } @@ -947,7 +947,7 @@ static int do_writepage(struct page *page, int len) release_existing_page_budget(c); atomic_long_dec(&c->dirty_pg_cnt); - ClearPagePrivate(page); + detach_page_private(page); ClearPageChecked(page); kunmap(page); @@ -1304,7 +1304,7 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset, release_existing_page_budget(c); atomic_long_dec(&c->dirty_pg_cnt); - ClearPagePrivate(page); + detach_page_private(page); ClearPageChecked(page); } @@ -1471,8 +1471,8 @@ static int ubifs_migrate_page(struct address_space *mapping, return rc; if (PagePrivate(page)) { - ClearPagePrivate(page); - SetPagePrivate(newpage); + detach_page_private(page); + attach_page_private(newpage, (void *)1); } if (mode != MIGRATE_SYNC_NO_COPY) @@ -1496,7 +1496,7 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) return 0; ubifs_assert(c, PagePrivate(page)); ubifs_assert(c, 0); - ClearPagePrivate(page); + detach_page_private(page); ClearPageChecked(page); return 1; } @@ -1567,7 +1567,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) else { if (!PageChecked(page)) ubifs_convert_page_budget(c); - SetPagePrivate(page); + attach_page_private(page, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); __set_page_dirty_nobuffers(page); }

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] ubifs: Fix to add refcount once page is set private" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 3b67db8a6ca83e6ff90b756d3da0c966f61cd37b Mon Sep 17 00:00:00 2001 From: Zhihao Cheng <chengzhihao1(a)huawei.com> Date: Mon, 27 Dec 2021 11:22:41 +0800 Subject: [PATCH] ubifs: Fix to add refcount once page is set private MM defined the rule [1] very clearly that once page was set with PG_private flag, we should increment the refcount in that page, also main flows like pageout(), migrate_page() will assume there is one additional page reference count if page_has_private() returns true. Otherwise, we may get a BUG in page migration: page:0000000080d05b9d refcount:-1 mapcount:0 mapping:000000005f4d82a8 index:0xe2 pfn:0x14c12 aops:ubifs_file_address_operations [ubifs] ino:8f1 dentry name:"f30e" flags: 0x1fffff80002405(locked|uptodate|owner_priv_1|private|node=0| zone=1|lastcpupid=0x1fffff) page dumped because: VM_BUG_ON_PAGE(page_count(page) != 0) ------------[ cut here ]------------ kernel BUG at include/linux/page_ref.h:184! invalid opcode: 0000 [#1] SMP CPU: 3 PID: 38 Comm: kcompactd0 Not tainted 5.15.0-rc5 RIP: 0010:migrate_page_move_mapping+0xac3/0xe70 Call Trace: ubifs_migrate_page+0x22/0xc0 [ubifs] move_to_new_page+0xb4/0x600 migrate_pages+0x1523/0x1cc0 compact_zone+0x8c5/0x14b0 kcompactd+0x2bc/0x560 kthread+0x18c/0x1e0 ret_from_fork+0x1f/0x30 Before the time, we should make clean a concept, what does refcount means in page gotten from grab_cache_page_write_begin(). There are 2 situations: Situation 1: refcount is 3, page is created by __page_cache_alloc. TYPE_A - the write process is using this page TYPE_B - page is assigned to one certain mapping by calling __add_to_page_cache_locked() TYPE_C - page is added into pagevec list corresponding current cpu by calling lru_cache_add() Situation 2: refcount is 2, page is gotten from the mapping's tree TYPE_B - page has been assigned to one certain mapping TYPE_A - the write process is using this page (by calling page_cache_get_speculative()) Filesystem releases one refcount by calling put_page() in xxx_write_end(), the released refcount corresponds to TYPE_A (write task is using it). If there are any processes using a page, page migration process will skip the page by judging whether expected_page_refs() equals to page refcount. The BUG is caused by following process: PA(cpu 0) kcompactd(cpu 1) compact_zone ubifs_write_begin page_a = grab_cache_page_write_begin add_to_page_cache_lru lru_cache_add pagevec_add // put page into cpu 0's pagevec (refcnf = 3, for page creation process) ubifs_write_end SetPagePrivate(page_a) // doesn't increase page count ! unlock_page(page_a) put_page(page_a) // refcnt = 2 [...] PB(cpu 0) filemap_read filemap_get_pages add_to_page_cache_lru lru_cache_add __pagevec_lru_add // traverse all pages in cpu 0's pagevec __pagevec_lru_add_fn SetPageLRU(page_a) isolate_migratepages isolate_migratepages_block get_page_unless_zero(page_a) // refcnt = 3 list_add(page_a, from_list) migrate_pages(from_list) __unmap_and_move move_to_new_page ubifs_migrate_page(page_a) migrate_page_move_mapping expected_page_refs get 3 (migration[1] + mapping[1] + private[1]) release_pages put_page_testzero(page_a) // refcnt = 3 page_ref_freeze // refcnt = 0 page_ref_dec_and_test(0 - 1 = -1) page_ref_unfreeze VM_BUG_ON_PAGE(-1 != 0, page) UBIFS doesn't increase the page refcount after setting private flag, which leads to page migration task believes the page is not used by any other processes, so the page is migrated. This causes concurrent accessing on page refcount between put_page() called by other process(eg. read process calls lru_cache_add) and page_ref_unfreeze() called by migration task. Actually zhangjun has tried to fix this problem [2] by recalculating page refcnt in ubifs_migrate_page(). It's better to follow MM rules [1], because just like Kirill suggested in [2], we need to check all users of page_has_private() helper. Like f2fs does in [3], fix it by adding/deleting refcount when setting/clearing private for a page. BTW, according to [4], we set 'page->private' as 1 because ubifs just simply SetPagePrivate(). And, [5] provided a common helper to set/clear page private, ubifs can use this helper following the example of iomap, afs, btrfs, etc. Jump [6] to find a reproducer. [1] https://lore.kernel.org/lkml/2b19b3c4-2bc4-15fa-15cc-27a13e5c7af1@aol.com [2] https://www.spinics.net/lists/linux-mtd/msg04018.html [3] http://lkml.iu.edu/hypermail/linux/kernel/1903.0/03313.html [4] https://lore.kernel.org/linux-f2fs-devel/20210422154705.GO3596236@casper.in… [5] https://lore.kernel.org/all/20200517214718.468-1-guoqing.jiang@cloud.ionos.… [6] https://bugzilla.kernel.org/show_bug.cgi?id=214961 Fixes: 1e51764a3c2ac0 ("UBIFS: add new flash file system") Signed-off-by: Zhihao Cheng <chengzhihao1(a)huawei.com> Signed-off-by: Richard Weinberger <richard(a)nod.at> diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 5cfa28cd00cd..6b45a037a047 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -570,7 +570,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, } if (!PagePrivate(page)) { - SetPagePrivate(page); + attach_page_private(page, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); __set_page_dirty_nobuffers(page); } @@ -947,7 +947,7 @@ static int do_writepage(struct page *page, int len) release_existing_page_budget(c); atomic_long_dec(&c->dirty_pg_cnt); - ClearPagePrivate(page); + detach_page_private(page); ClearPageChecked(page); kunmap(page); @@ -1304,7 +1304,7 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset, release_existing_page_budget(c); atomic_long_dec(&c->dirty_pg_cnt); - ClearPagePrivate(page); + detach_page_private(page); ClearPageChecked(page); } @@ -1471,8 +1471,8 @@ static int ubifs_migrate_page(struct address_space *mapping, return rc; if (PagePrivate(page)) { - ClearPagePrivate(page); - SetPagePrivate(newpage); + detach_page_private(page); + attach_page_private(newpage, (void *)1); } if (mode != MIGRATE_SYNC_NO_COPY) @@ -1496,7 +1496,7 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) return 0; ubifs_assert(c, PagePrivate(page)); ubifs_assert(c, 0); - ClearPagePrivate(page); + detach_page_private(page); ClearPageChecked(page); return 1; } @@ -1567,7 +1567,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf) else { if (!PageChecked(page)) ubifs_convert_page_budget(c); - SetPagePrivate(page); + attach_page_private(page, (void *)1); atomic_long_inc(&c->dirty_pg_cnt); __set_page_dirty_nobuffers(page); }

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] ubifs: Fix read out-of-bounds in ubifs_wbuf_write_nolock()" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 4f2262a334641e05f645364d5ade1f565c85f20b Mon Sep 17 00:00:00 2001 From: Zhihao Cheng <chengzhihao1(a)huawei.com> Date: Mon, 27 Dec 2021 11:22:40 +0800 Subject: [PATCH] ubifs: Fix read out-of-bounds in ubifs_wbuf_write_nolock() Function ubifs_wbuf_write_nolock() may access buf out of bounds in following process: ubifs_wbuf_write_nolock(): aligned_len = ALIGN(len, 8); // Assume len = 4089, aligned_len = 4096 if (aligned_len <= wbuf->avail) ... // Not satisfy if (wbuf->used) { ubifs_leb_write() // Fill some data in avail wbuf len -= wbuf->avail; // len is still not 8-bytes aligned aligned_len -= wbuf->avail; } n = aligned_len >> c->max_write_shift; if (n) { n <<= c->max_write_shift; err = ubifs_leb_write(c, wbuf->lnum, buf + written, wbuf->offs, n); // n > len, read out of bounds less than 8(n-len) bytes } , which can be catched by KASAN: ========================================================= BUG: KASAN: slab-out-of-bounds in ecc_sw_hamming_calculate+0x1dc/0x7d0 Read of size 4 at addr ffff888105594ff8 by task kworker/u8:4/128 Workqueue: writeback wb_workfn (flush-ubifs_0_0) Call Trace: kasan_report.cold+0x81/0x165 nand_write_page_swecc+0xa9/0x160 ubifs_leb_write+0xf2/0x1b0 [ubifs] ubifs_wbuf_write_nolock+0x421/0x12c0 [ubifs] write_head+0xdc/0x1c0 [ubifs] ubifs_jnl_write_inode+0x627/0x960 [ubifs] wb_workfn+0x8af/0xb80 Function ubifs_wbuf_write_nolock() accepts that parameter 'len' is not 8 bytes aligned, the 'len' represents the true length of buf (which is allocated in 'ubifs_jnl_xxx', eg. ubifs_jnl_write_inode), so ubifs_wbuf_write_nolock() must handle the length read from 'buf' carefully to write leb safely. Fetch a reproducer in [Link]. Fixes: 1e51764a3c2ac0 ("UBIFS: add new flash file system") Link: https://bugzilla.kernel.org/show_bug.cgi?id=214785 Reported-by: Chengsong Ke <kechengsong(a)huawei.com> Signed-off-by: Zhihao Cheng <chengzhihao1(a)huawei.com> Signed-off-by: Richard Weinberger <richard(a)nod.at> diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 789a7813f3fa..1607a3c76681 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -854,16 +854,42 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) */ n = aligned_len >> c->max_write_shift; if (n) { - n <<= c->max_write_shift; + int m = n - 1; + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, wbuf->offs); - err = ubifs_leb_write(c, wbuf->lnum, buf + written, - wbuf->offs, n); + + if (m) { + /* '(n-1)<<c->max_write_shift < len' is always true. */ + m <<= c->max_write_shift; + err = ubifs_leb_write(c, wbuf->lnum, buf + written, + wbuf->offs, m); + if (err) + goto out; + wbuf->offs += m; + aligned_len -= m; + len -= m; + written += m; + } + + /* + * The non-written len of buf may be less than 'n' because + * parameter 'len' is not 8 bytes aligned, so here we read + * min(len, n) bytes from buf. + */ + n = 1 << c->max_write_shift; + memcpy(wbuf->buf, buf + written, min(len, n)); + if (n > len) { + ubifs_assert(c, n - len < 8); + ubifs_pad(c, wbuf->buf + len, n - len); + } + + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, n); if (err) goto out; wbuf->offs += n; aligned_len -= n; - len -= n; + len -= min(len, n); written += n; }

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] ubifs: rename_whiteout: correct old_dir size computing" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 705757274599e2e064dd3054aabc74e8af31a095 Mon Sep 17 00:00:00 2001 From: Baokun Li <libaokun1(a)huawei.com> Date: Tue, 15 Feb 2022 12:07:36 +0800 Subject: [PATCH] ubifs: rename_whiteout: correct old_dir size computing When renaming the whiteout file, the old whiteout file is not deleted. Therefore, we add the old dentry size to the old dir like XFS. Otherwise, an error may be reported due to `fscki->calc_sz != fscki->size` in check_indes. Fixes: 9e0a1fff8db56ea ("ubifs: Implement RENAME_WHITEOUT") Reported-by: Zhihao Cheng <chengzhihao1(a)huawei.com> Signed-off-by: Baokun Li <libaokun1(a)huawei.com> Signed-off-by: Richard Weinberger <richard(a)nod.at> diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index ae082a0be2a3..86151889548e 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1402,6 +1402,9 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, iput(whiteout); goto out_release; } + + /* Add the old_dentry size to the old_dir size. */ + old_sz -= CALC_DENT_SIZE(fname_len(&old_nm)); } lock_4_inodes(old_dir, new_dir, new_inode, whiteout);

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] ubifs: Fix read out-of-bounds in ubifs_wbuf_write_nolock()" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 4f2262a334641e05f645364d5ade1f565c85f20b Mon Sep 17 00:00:00 2001 From: Zhihao Cheng <chengzhihao1(a)huawei.com> Date: Mon, 27 Dec 2021 11:22:40 +0800 Subject: [PATCH] ubifs: Fix read out-of-bounds in ubifs_wbuf_write_nolock() Function ubifs_wbuf_write_nolock() may access buf out of bounds in following process: ubifs_wbuf_write_nolock(): aligned_len = ALIGN(len, 8); // Assume len = 4089, aligned_len = 4096 if (aligned_len <= wbuf->avail) ... // Not satisfy if (wbuf->used) { ubifs_leb_write() // Fill some data in avail wbuf len -= wbuf->avail; // len is still not 8-bytes aligned aligned_len -= wbuf->avail; } n = aligned_len >> c->max_write_shift; if (n) { n <<= c->max_write_shift; err = ubifs_leb_write(c, wbuf->lnum, buf + written, wbuf->offs, n); // n > len, read out of bounds less than 8(n-len) bytes } , which can be catched by KASAN: ========================================================= BUG: KASAN: slab-out-of-bounds in ecc_sw_hamming_calculate+0x1dc/0x7d0 Read of size 4 at addr ffff888105594ff8 by task kworker/u8:4/128 Workqueue: writeback wb_workfn (flush-ubifs_0_0) Call Trace: kasan_report.cold+0x81/0x165 nand_write_page_swecc+0xa9/0x160 ubifs_leb_write+0xf2/0x1b0 [ubifs] ubifs_wbuf_write_nolock+0x421/0x12c0 [ubifs] write_head+0xdc/0x1c0 [ubifs] ubifs_jnl_write_inode+0x627/0x960 [ubifs] wb_workfn+0x8af/0xb80 Function ubifs_wbuf_write_nolock() accepts that parameter 'len' is not 8 bytes aligned, the 'len' represents the true length of buf (which is allocated in 'ubifs_jnl_xxx', eg. ubifs_jnl_write_inode), so ubifs_wbuf_write_nolock() must handle the length read from 'buf' carefully to write leb safely. Fetch a reproducer in [Link]. Fixes: 1e51764a3c2ac0 ("UBIFS: add new flash file system") Link: https://bugzilla.kernel.org/show_bug.cgi?id=214785 Reported-by: Chengsong Ke <kechengsong(a)huawei.com> Signed-off-by: Zhihao Cheng <chengzhihao1(a)huawei.com> Signed-off-by: Richard Weinberger <richard(a)nod.at> diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 789a7813f3fa..1607a3c76681 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -854,16 +854,42 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) */ n = aligned_len >> c->max_write_shift; if (n) { - n <<= c->max_write_shift; + int m = n - 1; + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, wbuf->offs); - err = ubifs_leb_write(c, wbuf->lnum, buf + written, - wbuf->offs, n); + + if (m) { + /* '(n-1)<<c->max_write_shift < len' is always true. */ + m <<= c->max_write_shift; + err = ubifs_leb_write(c, wbuf->lnum, buf + written, + wbuf->offs, m); + if (err) + goto out; + wbuf->offs += m; + aligned_len -= m; + len -= m; + written += m; + } + + /* + * The non-written len of buf may be less than 'n' because + * parameter 'len' is not 8 bytes aligned, so here we read + * min(len, n) bytes from buf. + */ + n = 1 << c->max_write_shift; + memcpy(wbuf->buf, buf + written, min(len, n)); + if (n > len) { + ubifs_assert(c, n - len < 8); + ubifs_pad(c, wbuf->buf + len, n - len); + } + + err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, n); if (err) goto out; wbuf->offs += n; aligned_len -= n; - len -= n; + len -= min(len, n); written += n; }

3 years, 8 months

1
0
0 0

FAILED: patch "[PATCH] nvme: allow duplicate NSIDs for private namespaces" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 5974ea7ce0f9a5987fc8cf5e08ad6e3e70bb542e Mon Sep 17 00:00:00 2001 From: Sungup Moon <sungup.moon(a)samsung.com> Date: Mon, 14 Mar 2022 20:05:45 +0900 Subject: [PATCH] nvme: allow duplicate NSIDs for private namespaces A NVMe subsystem with multiple controller can have private namespaces that use the same NSID under some conditions: "If Namespace Management, ANA Reporting, or NVM Sets are supported, the NSIDs shall be unique within the NVM subsystem. If the Namespace Management, ANA Reporting, and NVM Sets are not supported, then NSIDs: a) for shared namespace shall be unique; and b) for private namespace are not required to be unique." Reference: Section 6.1.6 NSID and Namespace Usage; NVM Express 1.4c spec. Make sure this specific setup is supported in Linux. Fixes: 9ad1927a3bc2 ("nvme: always search for namespace head") Signed-off-by: Sungup Moon <sungup.moon(a)samsung.com> [hch: refactored and fixed the controller vs subsystem based naming conflict] Signed-off-by: Christoph Hellwig <hch(a)lst.de> Reviewed-by: Sagi Grimberg <sagi(a)grimberg.me> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ccc5877d514b..6dc05c1fa7db 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3627,15 +3627,20 @@ static const struct attribute_group *nvme_dev_attr_groups[] = { NULL, }; -static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, +static struct nvme_ns_head *nvme_find_ns_head(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns_head *h; - lockdep_assert_held(&subsys->lock); + lockdep_assert_held(&ctrl->subsys->lock); - list_for_each_entry(h, &subsys->nsheads, entry) { - if (h->ns_id != nsid) + list_for_each_entry(h, &ctrl->subsys->nsheads, entry) { + /* + * Private namespaces can share NSIDs under some conditions. + * In that case we can't use the same ns_head for namespaces + * with the same NSID. + */ + if (h->ns_id != nsid || !nvme_is_unique_nsid(ctrl, h)) continue; if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) return h; @@ -3829,7 +3834,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, } mutex_lock(&ctrl->subsys->lock); - head = nvme_find_ns_head(ctrl->subsys, nsid); + head = nvme_find_ns_head(ctrl, nsid); if (!head) { ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, ids); if (ret) { diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index c97d7f843977..ba90555124c4 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -482,10 +482,11 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) /* * Add a multipath node if the subsystems supports multiple controllers. - * We also do this for private namespaces as the namespace sharing data could - * change after a rescan. + * We also do this for private namespaces as the namespace sharing flag + * could change after a rescan. */ - if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath) + if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || + !nvme_is_unique_nsid(ctrl, head) || !multipath) return 0; head->disk = blk_alloc_disk(ctrl->numa_node); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1ea908d43e17..1552a48719d6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -722,6 +722,25 @@ static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, return queue_live; return __nvme_check_ready(ctrl, rq, queue_live); } + +/* + * NSID shall be unique for all shared namespaces, or if at least one of the + * following conditions is met: + * 1. Namespace Management is supported by the controller + * 2. ANA is supported by the controller + * 3. NVM Set are supported by the controller + * + * In other case, private namespace are not required to report a unique NSID. + */ +static inline bool nvme_is_unique_nsid(struct nvme_ctrl *ctrl, + struct nvme_ns_head *head) +{ + return head->shared || + (ctrl->oacs & NVME_CTRL_OACS_NS_MNGT_SUPP) || + (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA) || + (ctrl->ctratt & NVME_CTRL_CTRATT_NVM_SETS); +} + int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9dbc3ef4daf7..2dcee34d467d 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -345,6 +345,7 @@ enum { NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, + NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3, NVME_CTRL_OACS_DIRECTIVES = 1 << 5, NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1,

3 years, 8 months

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror April 2022