Commit e26ee4efbc79 ("fuse: allocate ff->release_args only if release is needed") skips allocating ff->release_args if the server does not implement open. However in doing so, fuse_prepare_release() now skips grabbing the reference on the inode, which makes it possible for an inode to be evicted from the dcache while there are inflight readahead requests. This causes a deadlock if the server triggers reclaim while servicing the readahead request and reclaim attempts to evict the inode of the file being read ahead. Since the folio is locked during readahead, when reclaim evicts the fuse inode and fuse_evict_inode() attempts to remove all folios associated with the inode from the page cache (truncate_inode_pages_range()), reclaim will block forever waiting for the lock since readahead cannot relinquish the lock because it is itself blocked in reclaim:
stack_trace(1504735)
folio_wait_bit_common (mm/filemap.c:1308:4) folio_lock (./include/linux/pagemap.h:1052:3) truncate_inode_pages_range (mm/truncate.c:336:10) fuse_evict_inode (fs/fuse/inode.c:161:2) evict (fs/inode.c:704:3) dentry_unlink_inode (fs/dcache.c:412:3) __dentry_kill (fs/dcache.c:615:3) shrink_kill (fs/dcache.c:1060:12) shrink_dentry_list (fs/dcache.c:1087:3) prune_dcache_sb (fs/dcache.c:1168:2) super_cache_scan (fs/super.c:221:10) do_shrink_slab (mm/shrinker.c:435:9) shrink_slab (mm/shrinker.c:626:10) shrink_node (mm/vmscan.c:5951:2) shrink_zones (mm/vmscan.c:6195:3) do_try_to_free_pages (mm/vmscan.c:6257:3) do_swap_page (mm/memory.c:4136:11) handle_pte_fault (mm/memory.c:5562:10) handle_mm_fault (mm/memory.c:5870:9) do_user_addr_fault (arch/x86/mm/fault.c:1338:10) handle_page_fault (arch/x86/mm/fault.c:1481:3) exc_page_fault (arch/x86/mm/fault.c:1539:2) asm_exc_page_fault+0x22/0x27
Fix this deadlock by allocating ff->release_args and grabbing the reference on the inode when preparing the file for release even if the server does not implement open. The inode reference will be dropped when the last reference on the fuse file is dropped (see fuse_file_put() -> fuse_release_end()).
Fixes: e26ee4efbc79 ("fuse: allocate ff->release_args only if release is needed") Cc: stable@vger.kernel.org Signed-off-by: Joanne Koong joannelkoong@gmail.com Reported-by: Omar Sandoval osandov@fb.com --- fs/fuse/file.c | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f1ef77a0be05..654e21ee93fb 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -100,7 +100,7 @@ static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args, kfree(ra); }
-static void fuse_file_put(struct fuse_file *ff, bool sync) +static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) { if (refcount_dec_and_test(&ff->count)) { struct fuse_release_args *ra = &ff->args->release_args; @@ -110,7 +110,9 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) fuse_file_io_release(ff, ra->inode);
if (!args) { - /* Do nothing when server does not implement 'open' */ + /* Do nothing when server does not implement 'opendir' */ + } else if (!isdir && ff->fm->fc->no_open) { + fuse_release_end(ff->fm, args, 0); } else if (sync) { fuse_simple_request(ff->fm, args); fuse_release_end(ff->fm, args, 0); @@ -131,8 +133,17 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, struct fuse_file *ff; int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; bool open = isdir ? !fc->no_opendir : !fc->no_open; + bool release = !isdir || open;
- ff = fuse_file_alloc(fm, open); + /* + * ff->args->release_args still needs to be allocated (so we can hold an + * inode reference while there are pending inflight file operations when + * ->release() is called, see fuse_prepare_release()) even if + * fc->no_open is set else it becomes possible for reclaim to deadlock + * if while servicing the readahead request the server triggers reclaim + * and reclaim evicts the inode of the file being read ahead. + */ + ff = fuse_file_alloc(fm, release); if (!ff) return ERR_PTR(-ENOMEM);
@@ -152,13 +163,14 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, fuse_file_free(ff); return ERR_PTR(err); } else { - /* No release needed */ - kfree(ff->args); - ff->args = NULL; - if (isdir) + if (isdir) { + /* No release needed */ + kfree(ff->args); + ff->args = NULL; fc->no_opendir = 1; - else + } else { fc->no_open = 1; + } } }
@@ -363,7 +375,7 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff, * own ref to the file, the IO completion has to drop the ref, which is * how the fuse server can end up closing its clients' files. */ - fuse_file_put(ff, false); + fuse_file_put(ff, false, isdir); }
void fuse_release_common(struct file *file, bool isdir) @@ -394,7 +406,7 @@ void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, { WARN_ON(refcount_read(&ff->count) > 1); fuse_prepare_release(fi, ff, flags, FUSE_RELEASE, true); - fuse_file_put(ff, true); + fuse_file_put(ff, true, false); } EXPORT_SYMBOL_GPL(fuse_sync_release);
@@ -891,7 +903,7 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, folio_put(ap->folios[i]); } if (ia->ff) - fuse_file_put(ia->ff, false); + fuse_file_put(ia->ff, false, false);
fuse_io_free(ia); } @@ -1815,7 +1827,7 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa) if (wpa->bucket) fuse_sync_bucket_dec(wpa->bucket);
- fuse_file_put(wpa->ia.ff, false); + fuse_file_put(wpa->ia.ff, false, false);
kfree(ap->folios); kfree(wpa); @@ -1968,7 +1980,7 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc) ff = __fuse_write_file_get(fi); err = fuse_flush_times(inode, ff); if (ff) - fuse_file_put(ff, false); + fuse_file_put(ff, false, false);
return err; } @@ -2186,7 +2198,7 @@ static int fuse_iomap_writeback_submit(struct iomap_writepage_ctx *wpc, }
if (data->ff) - fuse_file_put(data->ff, false); + fuse_file_put(data->ff, false, false);
return error; }
On Fri, Oct 10, 2025 at 3:08 PM Joanne Koong joannelkoong@gmail.com wrote:
Commit e26ee4efbc79 ("fuse: allocate ff->release_args only if release is needed") skips allocating ff->release_args if the server does not implement open. However in doing so, fuse_prepare_release() now skips grabbing the reference on the inode, which makes it possible for an inode to be evicted from the dcache while there are inflight readahead requests. This causes a deadlock if the server triggers reclaim while servicing the readahead request and reclaim attempts to evict the inode of the file being read ahead. Since the folio is locked during readahead, when reclaim evicts the fuse inode and fuse_evict_inode() attempts to remove all folios associated with the inode from the page cache (truncate_inode_pages_range()), reclaim will block forever waiting for the lock since readahead cannot relinquish the lock because it is itself blocked in reclaim:
stack_trace(1504735)
folio_wait_bit_common (mm/filemap.c:1308:4) folio_lock (./include/linux/pagemap.h:1052:3) truncate_inode_pages_range (mm/truncate.c:336:10) fuse_evict_inode (fs/fuse/inode.c:161:2) evict (fs/inode.c:704:3) dentry_unlink_inode (fs/dcache.c:412:3) __dentry_kill (fs/dcache.c:615:3) shrink_kill (fs/dcache.c:1060:12) shrink_dentry_list (fs/dcache.c:1087:3) prune_dcache_sb (fs/dcache.c:1168:2) super_cache_scan (fs/super.c:221:10) do_shrink_slab (mm/shrinker.c:435:9) shrink_slab (mm/shrinker.c:626:10) shrink_node (mm/vmscan.c:5951:2) shrink_zones (mm/vmscan.c:6195:3) do_try_to_free_pages (mm/vmscan.c:6257:3) do_swap_page (mm/memory.c:4136:11) handle_pte_fault (mm/memory.c:5562:10) handle_mm_fault (mm/memory.c:5870:9) do_user_addr_fault (arch/x86/mm/fault.c:1338:10) handle_page_fault (arch/x86/mm/fault.c:1481:3) exc_page_fault (arch/x86/mm/fault.c:1539:2) asm_exc_page_fault+0x22/0x27
Fix this deadlock by allocating ff->release_args and grabbing the reference on the inode when preparing the file for release even if the server does not implement open. The inode reference will be dropped when the last reference on the fuse file is dropped (see fuse_file_put() -> fuse_release_end()).
Fixes: e26ee4efbc79 ("fuse: allocate ff->release_args only if release is needed") Cc: stable@vger.kernel.org Signed-off-by: Joanne Koong joannelkoong@gmail.com Reported-by: Omar Sandoval osandov@fb.com
fs/fuse/file.c | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-)
Miklos, does this approach look okay to you?
Thanks, Joanne
On Sat, 11 Oct 2025 at 00:08, Joanne Koong joannelkoong@gmail.com wrote:
@@ -110,7 +110,9 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) fuse_file_io_release(ff, ra->inode);
if (!args) {
/* Do nothing when server does not implement 'open' */
/* Do nothing when server does not implement 'opendir' */} else if (!isdir && ff->fm->fc->no_open) {
How about (args->opcode == FUSE_RELEASE && ff->fm->fc->no_open) instead?
I think it's more readable here and also removes the need for multiple bool args, which can confusing.
No need to resend if you agree, I'll apply with this change.
Thanks, Miklos
On Tue, Nov 11, 2025 at 7:08 AM Miklos Szeredi miklos@szeredi.hu wrote:
On Sat, 11 Oct 2025 at 00:08, Joanne Koong joannelkoong@gmail.com wrote:
@@ -110,7 +110,9 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) fuse_file_io_release(ff, ra->inode);
if (!args) {
/* Do nothing when server does not implement 'open' */
/* Do nothing when server does not implement 'opendir' */} else if (!isdir && ff->fm->fc->no_open) {How about (args->opcode == FUSE_RELEASE && ff->fm->fc->no_open) instead?
I think it's more readable here and also removes the need for multiple bool args, which can confusing.
No need to resend if you agree, I'll apply with this change.
That's a great idea. I agree, using args->opcode == FUSE_RELEASE is much better.
Thanks, Joanne
Thanks, Miklos
linux-stable-mirror@lists.linaro.org