Re: [PATCH 6.1 1/2] erofs: handle overlapped pclusters out of crafted images properly

2 Mar 2025

On Fri, 28. Feb 19:51, Alexey Panov wrote:
...
From: Gao Xiang hsiangkao@linux.alibaba.com
commit 9e2f9d34dd12e6e5b244ec488bcebd0c2d566c50 upstream.
syzbot reported a task hang issue due to a deadlock case where it is
waiting for the folio lock of a cached folio that will be used for
cache I/Os.
After looking into the crafted fuzzed image, I found it's formed with
several overlapped big pclusters as below:
Ext:   logical offset   |  length :     physical offset    |  length
   0:        0..   16384 |   16384 :     151552..    167936 |   16384
   1:    16384..   32768 |   16384 :     155648..    172032 |   16384
   2:    32768..   49152 |   16384 :  537223168.. 537239552 |   16384
...
Here, extent 0/1 are physically overlapped although it's entirely
_impossible_ for normal filesystem images generated by mkfs.
First, managed folios containing compressed data will be marked as
up-to-date and then unlocked immediately (unlike in-place folios) when
compressed I/Os are complete.  If physical blocks are not submitted in
the incremental order, there should be separate BIOs to avoid dependency
issues.  However, the current code mis-arranges z_erofs_fill_bio_vec()
and BIO submission which causes unexpected BIO waits.
Second, managed folios will be connected to their own pclusters for
efficient inter-queries.  However, this is somewhat hard to implement
easily if overlapped big pclusters exist.  Again, these only appear in
fuzzed images so let's simply fall back to temporary short-lived pages
for correctness.
Additionally, it justifies that referenced managed folios cannot be
truncated for now and reverts part of commit 2080ca1ed3e4 ("erofs: tidy
up `struct z_erofs_bvec`") for simplicity although it shouldn't be any
difference.
Reported-by: syzbot+4fc98ed414ae63d1ada2@syzkaller.appspotmail.com
Reported-by: syzbot+de04e06b28cfecf2281c@syzkaller.appspotmail.com
Reported-by: syzbot+c8c8238b394be4a1087d@syzkaller.appspotmail.com
Tested-by: syzbot+4fc98ed414ae63d1ada2@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/r/0000000000002fda01061e334873@google.com
Fixes: 8e6c8fa9f2e9 ("erofs: enable big pcluster feature")
Signed-off-by: Gao Xiang hsiangkao@linux.alibaba.com
Link: https://lore.kernel.org/r/20240910070847.3356592-1-hsiangkao@linux.alibaba.c...
[Alexey: minor fix to resolve merge conflict]
Urgh, it doesn't look so minor indeed. Backward struct folio -> struct
page conversions can be tricky sometimes. Please see several comments
below.
...
Signed-off-by: Alexey Panov apanov@astralinux.ru
Backport fix for CVE-2024-47736
fs/erofs/zdata.c | 59 +++++++++++++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 28 deletions(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 94e9e0bf3bbd..ac01c0ede7f7 100644
I'm looking at the diff of upstream commit and the first thing it does
is to remove zeroing out the folio/page private field here:
// upstream commit 9e2f9d34dd12 ("erofs: handle overlapped pclusters out of crafted images properly")
  @@ -1450,7 +1451,6 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
           * file-backed folios will be used instead.
           */
          if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
  -               folio->private = 0;
                  tocache = true;
                  goto out_tocache;
          }
while in 6.1.129 the corresponding fragment seems untouched with the
backport patch. Is it intended?
...
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1346,14 +1346,13 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
   	goto out;
 
   lock_page(page);


/* only true if page reclaim goes wrong, should never happen */
DBG_BUGON(justfound && PagePrivate(page));

/* the page is still in manage cache */
if (page->mapping == mc) {


if (likely(page->mapping == mc)) {
WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);


/*


 * The cached folio is still in managed cache but without



Are the comments worth to be adapted as well? I don't think the "folio"
stuff would be useful while reading 6.1 source code.
...

 * a valid `->private` pcluster hint.  Let's reconnect them.


 */

if (!PagePrivate(page)) {
	/*
	 * impossible to be !PagePrivate(page) for

@@ -1367,22 +1366,24 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
   		SetPagePrivate(page);
   	}

/* no need to submit io if it is already up-to-date */


if (PageUptodate(page)) {


	unlock_page(page);


	page = NULL;




if (likely(page->private == (unsigned long)pcl)) {


	/* don't submit cache I/Os again if already uptodate */


	if (PageUptodate(page)) {


		unlock_page(page);


		page = NULL;



	}


	goto out;

}


goto out;




/*


 * Already linked with another pcluster, which only appears in


 * crafted images by fuzzers for now.  But handle this anyway.


 */


tocache = false;	/* use temporary short-lived pages */


} else {
DBG_BUGON(1); /* referenced managed folios can't be truncated */


tocache = true;

}



/*
* the managed page has been truncated, it's unsafe to


* reuse this one, let's allocate a new cache-managed page.


*/


DBG_BUGON(page->mapping);
DBG_BUGON(!justfound);

tocache = true;
unlock_page(page);
put_page(page);

out_allocpage:
There is a whole bunch of out path handling changes done for this function
in upstream commit, like
// upstream commit 9e2f9d34dd12 ("erofs: handle overlapped pclusters out of crafted images properly")
   out_allocfolio:
  -       zbv.page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
  +       page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
          spin_lock(&pcl->obj.lockref.lock);
  -       if (pcl->compressed_bvecs[nr].page) {
  -               erofs_pagepool_add(&f->pagepool, zbv.page);
  +       if (unlikely(pcl->compressed_bvecs[nr].page != zbv.page)) {
  +               erofs_pagepool_add(&f->pagepool, page);
                  spin_unlock(&pcl->obj.lockref.lock);
                  cond_resched();
                  goto repeat;
          }
  -       bvec->bv_page = pcl->compressed_bvecs[nr].page = zbv.page;
  -       folio = page_folio(zbv.page);
  -       /* first mark it as a temporary shortlived folio (now 1 ref) */
  -       folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
  +       bvec->bv_page = pcl->compressed_bvecs[nr].page = page;
  +       folio = page_folio(page);
          spin_unlock(&pcl->obj.lockref.lock);
   out_tocache:
          if (!tocache || bs != PAGE_SIZE ||
  -           filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp))
  +           filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp)) {
  +               /* turn into a temporary shortlived folio (1 ref) */
  +               folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
                  return;
  +       }
          folio_attach_private(folio, pcl);
          /* drop a refcount added by allocpage (then 2 refs in total here) */
          folio_put(folio);
These changes are probably not relevant for the 6.1.y but this fact is
still usually worth a brief mentioning in the backporter's comment.
...
@@ -1536,16 +1537,11 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
   	end = cur + pcl->pclusterpages;
 
   	do {

	struct page *page;



	page = pickup_page_for_submission(pcl, i++, pagepool,


					  mc);


	if (!page)


		continue;




	struct page *page = NULL;



if (bio && (cur != last_index + 1 ||
   			    last_bdev != mdev.m_bdev)) {
-submit_bio_retry:
+drain_io:
   			submit_bio(bio);
   			if (memstall) {
   				psi_memstall_leave(&pflags);
@@ -1554,6 +1550,13 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
   			bio = NULL;
   		}

	if (!page) {


		page = pickup_page_for_submission(pcl, i++,


				pagepool, mc);


		if (!page)


			continue;


	}


if (unlikely(PageWorkingset(page)) && !memstall) {
	psi_memstall_enter(&pflags);
	memstall = 1;



@@ -1574,7 +1577,7 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
   		}
 
   		if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)

		goto submit_bio_retry;




		goto drain_io;



last_index = cur;
   		bypass = false;
-- 
2.39.5

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

Re: [PATCH 6.1 1/2] erofs: handle overlapped pclusters out of crafted images properly

Signed-off-by: Alexey Panov apanov@astralinux.ru