The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to stable@vger.kernel.org.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 86432a6dca9bed79111990851df5756d3eb5f57c Mon Sep 17 00:00:00 2001
From: Gao Xiang hsiangkao@linux.alibaba.com Date: Thu, 4 Nov 2021 02:20:06 +0800 Subject: [PATCH] erofs: fix unsafe pagevec reuse of hooked pclusters
There are pclusters in runtime marked with Z_EROFS_PCLUSTER_TAIL before actual I/O submission. Thus, the decompression chain can be extended if the following pcluster chain hooks such tail pcluster.
As the related comment mentioned, if some page is made of a hooked pcluster and another followed pcluster, it can be reused for in-place I/O (since I/O should be submitted anyway): _______________________________________________________________ | tail (partial) page | head (partial) page | |_____PRIMARY_HOOKED___|____________PRIMARY_FOLLOWED____________|
However, it's by no means safe to reuse as pagevec since if such PRIMARY_HOOKED pclusters finally move into bypass chain without I/O submission. It's somewhat hard to reproduce with LZ4 and I just found it (general protection fault) by ro_fsstressing a LZMA image for long time.
I'm going to actively clean up related code together with multi-page folio adaption in the next few months. Let's address it directly for easier backporting for now.
Call trace for reference: z_erofs_decompress_pcluster+0x10a/0x8a0 [erofs] z_erofs_decompress_queue.isra.36+0x3c/0x60 [erofs] z_erofs_runqueue+0x5f3/0x840 [erofs] z_erofs_readahead+0x1e8/0x320 [erofs] read_pages+0x91/0x270 page_cache_ra_unbounded+0x18b/0x240 filemap_get_pages+0x10a/0x5f0 filemap_read+0xa9/0x330 new_sync_read+0x11b/0x1a0 vfs_read+0xf1/0x190
Link: https://lore.kernel.org/r/20211103182006.4040-1-xiang@kernel.org Fixes: 3883a79abd02 ("staging: erofs: introduce VLE decompression support") Cc: stable@vger.kernel.org # 4.19+ Reviewed-by: Chao Yu chao@kernel.org Signed-off-by: Gao Xiang hsiangkao@linux.alibaba.com
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 11c7a1aaebad..eb51df4a9f77 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -373,8 +373,8 @@ static bool z_erofs_try_inplace_io(struct z_erofs_collector *clt,
/* callers must be with collection lock held */ static int z_erofs_attach_page(struct z_erofs_collector *clt, - struct page *page, - enum z_erofs_page_type type) + struct page *page, enum z_erofs_page_type type, + bool pvec_safereuse) { int ret;
@@ -384,9 +384,9 @@ static int z_erofs_attach_page(struct z_erofs_collector *clt, z_erofs_try_inplace_io(clt, page)) return 0;
- ret = z_erofs_pagevec_enqueue(&clt->vector, page, type); + ret = z_erofs_pagevec_enqueue(&clt->vector, page, type, + pvec_safereuse); clt->cl->vcnt += (unsigned int)ret; - return ret ? 0 : -EAGAIN; }
@@ -729,7 +729,8 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED);
retry: - err = z_erofs_attach_page(clt, page, page_type); + err = z_erofs_attach_page(clt, page, page_type, + clt->mode >= COLLECT_PRIMARY_FOLLOWED); /* should allocate an additional short-lived page for pagevec */ if (err == -EAGAIN) { struct page *const newpage = @@ -737,7 +738,7 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE); err = z_erofs_attach_page(clt, newpage, - Z_EROFS_PAGE_TYPE_EXCLUSIVE); + Z_EROFS_PAGE_TYPE_EXCLUSIVE, true); if (!err) goto retry; } diff --git a/fs/erofs/zpvec.h b/fs/erofs/zpvec.h index dfd7fe0503bb..b05464f4a808 100644 --- a/fs/erofs/zpvec.h +++ b/fs/erofs/zpvec.h @@ -106,11 +106,18 @@ static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor, struct page *page, - enum z_erofs_page_type type) + enum z_erofs_page_type type, + bool pvec_safereuse) { - if (!ctor->next && type) - if (ctor->index + 1 == ctor->nr) + if (!ctor->next) { + /* some pages cannot be reused as pvec safely without I/O */ + if (type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && !pvec_safereuse) + type = Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED; + + if (type != Z_EROFS_PAGE_TYPE_EXCLUSIVE && + ctor->index + 1 == ctor->nr) return false; + }
if (ctor->index >= ctor->nr) z_erofs_pagevec_ctor_pagedown(ctor, false);
From: Yue Hu huyue2@yulong.com
commit 7dea3de7d384f4c8156e8bd93112ba6db1eb276c upstream.
No any behavior to variable occupied in z_erofs_attach_page() which is only caller to z_erofs_pagevec_enqueue().
Link: https://lore.kernel.org/r/20210419102623.2015-1-zbestahu@gmail.com Signed-off-by: Yue Hu huyue2@yulong.com Reviewed-by: Gao Xiang xiang@kernel.org Signed-off-by: Gao Xiang xiang@kernel.org [ Gao Xiang: handle 4.19 codebase conflicts manually. ] Signed-off-by: Gao Xiang hsiangkao@linux.alibaba.com --- Gao Xiang: Same to 5.4.y and 5.10.y, apply this trivial cleanup as well.
drivers/staging/erofs/unzip_pagevec.h | 5 +---- drivers/staging/erofs/unzip_vle.c | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-)
diff --git a/drivers/staging/erofs/unzip_pagevec.h b/drivers/staging/erofs/unzip_pagevec.h index 23856ba2742d..64724dd1e04e 100644 --- a/drivers/staging/erofs/unzip_pagevec.h +++ b/drivers/staging/erofs/unzip_pagevec.h @@ -117,10 +117,8 @@ static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor, static inline bool z_erofs_pagevec_ctor_enqueue(struct z_erofs_pagevec_ctor *ctor, struct page *page, - enum z_erofs_page_type type, - bool *occupied) + enum z_erofs_page_type type) { - *occupied = false; if (unlikely(ctor->next == NULL && type)) if (ctor->index + 1 == ctor->nr) return false; @@ -135,7 +133,6 @@ z_erofs_pagevec_ctor_enqueue(struct z_erofs_pagevec_ctor *ctor, /* should remind that collector->next never equal to 1, 2 */ if (type == (uintptr_t)ctor->next) { ctor->next = page; - *occupied = true; }
ctor->pages[ctor->index++] = diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c index 0f1558c6747e..48c21a4d5dc8 100644 --- a/drivers/staging/erofs/unzip_vle.c +++ b/drivers/staging/erofs/unzip_vle.c @@ -234,7 +234,6 @@ static int z_erofs_vle_work_add_page( enum z_erofs_page_type type) { int ret; - bool occupied;
/* give priority for the compressed data storage */ if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY && @@ -242,8 +241,7 @@ static int z_erofs_vle_work_add_page( try_to_reuse_as_compressed_page(builder, page)) return 0;
- ret = z_erofs_pagevec_ctor_enqueue(&builder->vector, - page, type, &occupied); + ret = z_erofs_pagevec_ctor_enqueue(&builder->vector, page, type); builder->work->vcnt += (unsigned)ret;
return ret ? 0 : -EAGAIN;
commit 86432a6dca9bed79111990851df5756d3eb5f57c upstream.
There are pclusters in runtime marked with Z_EROFS_PCLUSTER_TAIL before actual I/O submission. Thus, the decompression chain can be extended if the following pcluster chain hooks such tail pcluster.
As the related comment mentioned, if some page is made of a hooked pcluster and another followed pcluster, it can be reused for in-place I/O (since I/O should be submitted anyway): _______________________________________________________________ | tail (partial) page | head (partial) page | |_____PRIMARY_HOOKED___|____________PRIMARY_FOLLOWED____________|
However, it's by no means safe to reuse as pagevec since if such PRIMARY_HOOKED pclusters finally move into bypass chain without I/O submission. It's somewhat hard to reproduce with LZ4 and I just found it (general protection fault) by ro_fsstressing a LZMA image for long time.
I'm going to actively clean up related code together with multi-page folio adaption in the next few months. Let's address it directly for easier backporting for now.
Call trace for reference: z_erofs_decompress_pcluster+0x10a/0x8a0 [erofs] z_erofs_decompress_queue.isra.36+0x3c/0x60 [erofs] z_erofs_runqueue+0x5f3/0x840 [erofs] z_erofs_readahead+0x1e8/0x320 [erofs] read_pages+0x91/0x270 page_cache_ra_unbounded+0x18b/0x240 filemap_get_pages+0x10a/0x5f0 filemap_read+0xa9/0x330 new_sync_read+0x11b/0x1a0 vfs_read+0xf1/0x190
Link: https://lore.kernel.org/r/20211103182006.4040-1-xiang@kernel.org Fixes: 3883a79abd02 ("staging: erofs: introduce VLE decompression support") Cc: stable@vger.kernel.org # 4.19+ Reviewed-by: Chao Yu chao@kernel.org Signed-off-by: Gao Xiang hsiangkao@linux.alibaba.com --- drivers/staging/erofs/unzip_pagevec.h | 13 ++++++++++--- drivers/staging/erofs/unzip_vle.c | 17 +++++++++-------- 2 files changed, 19 insertions(+), 11 deletions(-)
diff --git a/drivers/staging/erofs/unzip_pagevec.h b/drivers/staging/erofs/unzip_pagevec.h index 64724dd1e04e..efbf541e11bb 100644 --- a/drivers/staging/erofs/unzip_pagevec.h +++ b/drivers/staging/erofs/unzip_pagevec.h @@ -117,11 +117,18 @@ static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor, static inline bool z_erofs_pagevec_ctor_enqueue(struct z_erofs_pagevec_ctor *ctor, struct page *page, - enum z_erofs_page_type type) + enum z_erofs_page_type type, + bool pvec_safereuse) { - if (unlikely(ctor->next == NULL && type)) - if (ctor->index + 1 == ctor->nr) + if (!ctor->next) { + /* some pages cannot be reused as pvec safely without I/O */ + if (type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && !pvec_safereuse) + type = Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED; + + if (type != Z_EROFS_PAGE_TYPE_EXCLUSIVE && + ctor->index + 1 == ctor->nr) return false; + }
if (unlikely(ctor->index >= ctor->nr)) z_erofs_pagevec_ctor_pagedown(ctor, false); diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c index 48c21a4d5dc8..83e4d9384bd2 100644 --- a/drivers/staging/erofs/unzip_vle.c +++ b/drivers/staging/erofs/unzip_vle.c @@ -228,10 +228,10 @@ static inline bool try_to_reuse_as_compressed_page( }
/* callers must be with work->lock held */ -static int z_erofs_vle_work_add_page( - struct z_erofs_vle_work_builder *builder, - struct page *page, - enum z_erofs_page_type type) +static int z_erofs_vle_work_add_page(struct z_erofs_vle_work_builder *builder, + struct page *page, + enum z_erofs_page_type type, + bool pvec_safereuse) { int ret;
@@ -241,9 +241,9 @@ static int z_erofs_vle_work_add_page( try_to_reuse_as_compressed_page(builder, page)) return 0;
- ret = z_erofs_pagevec_ctor_enqueue(&builder->vector, page, type); + ret = z_erofs_pagevec_ctor_enqueue(&builder->vector, page, type, + pvec_safereuse); builder->work->vcnt += (unsigned)ret; - return ret ? 0 : -EAGAIN; }
@@ -688,14 +688,15 @@ static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe, tight &= builder_is_followed(builder);
retry: - err = z_erofs_vle_work_add_page(builder, page, page_type); + err = z_erofs_vle_work_add_page(builder, page, page_type, + builder_is_followed(builder)); /* should allocate an additional staging page for pagevec */ if (err == -EAGAIN) { struct page *const newpage = __stagingpage_alloc(page_pool, GFP_NOFS);
err = z_erofs_vle_work_add_page(builder, - newpage, Z_EROFS_PAGE_TYPE_EXCLUSIVE); + newpage, Z_EROFS_PAGE_TYPE_EXCLUSIVE, true); if (likely(!err)) goto retry; }
linux-stable-mirror@lists.linaro.org