On 15 Jan 2025, at 10:36, Greg Kroah-Hartman gregkh@linuxfoundation.org wrote:
6.6-stable review patch. If anyone has any objections, please let me know.
From: Amir Goldstein amir73il@gmail.com
[ Upstream commit 5b02bfc1e7e3811c5bf7f0fa626a0694d0dbbd77 ]
When lower fs is a nested overlayfs, calling encode_fh() on a lower directory dentry may trigger copy up and take sb_writers on the upper fs of the lower nested overlayfs.
The lower nested overlayfs may have the same upper fs as this overlayfs, so nested sb_writers lock is illegal.
Move all the callers that encode lower fh to before ovl_want_write().
Signed-off-by: Amir Goldstein amir73il@gmail.com Stable-dep-of: c45beebfde34 ("ovl: support encoding fid from inode with no alias") Signed-off-by: Sasha Levin sashal@kernel.org
Hi,
This patch seems to trigger the following warning on 6.6.72, when running simple “$ docker run --rm -it debian” (creating a container):
------------[ cut here ]------------ WARNING: CPU: 12 PID: 668 at fs/namespace.c:1245 cleanup_mnt+0x130/0x150 Modules linked in: xt_conntrack(E) nft_chain_nat(E) xt_MASQUERADE(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) bridge(E) stp(E) llc(E) xfrm_user(E) xfrm_algo(E) xt_addrtype(E) nft_compat(E) nf_tables(E) overlay(E) kvm_amd(E) ccp(E) kvm(E) irqbypass(E) crc32_pclmul(E) sha512_ssse3(E) sha256_ssse3(E) sha1_ssse3(E) aesni_intel(E) crypto_simd(E) cryptd(E) iTCO_wdt(E) virtio_console(E) virtio_balloon(E) iTCO_vendor_support(E) tiny_power_button(E) button(E) sch_fq_codel(E) fuse(E) nfnetlink(E) vsock_loopback(E) vmw_vsock_virtio_transport_common(E) vsock(E) efivarfs(E) ip_tables(E) x_tables(E) virtio_net(E) net_failover(E) virtio_blk(E) virtio_scsi(E) failover(E) crc32c_intel(E) i2c_i801(E) virtio_pci(E) virtio_pci_legacy_dev(E) i2c_smbus(E) lpc_ich(E) virtio_pci_modern_dev(E) mfd_core(E) virtio(E) virtio_ring(E) CPU: 12 PID: 668 Comm: dockerd Tainted: G E 6.6.71+ #18 Hardware name: KubeVirt None/RHEL, BIOS edk2-20230524-3.el9 05/24/2023 RIP: 0010:cleanup_mnt+0x130/0x150 Code: 2c 01 00 00 85 c0 75 16 e8 6d fb ff ff eb 8a c7 87 2c 01 00 00 00 00 00 00 e9 6a ff ff ff c7 87 2c 01 00 00 00 00 00 00 eb de <0f> 0b 48 83 bd 30 01 00 00 00 0f 84 e9 fe ff ff 48 89 ef e8 18 e7 RSP: 0018:ffffc9000095fec8 EFLAGS: 00010282 RAX: 00000000fffffffe RBX: 0000000000000000 RCX: 0000000000000010 RDX: 0000000000000010 RSI: 0000000000000010 RDI: 0000000000000010 RBP: ffff888109ea57c0 R08: ffffffffbc27ab60 R09: 0000000000000000 R10: 0000000000037420 R11: 0000000000000000 R12: ffff88810acba9bc R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 00007f1041ffb6c0(0000) GS:ffff88903fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000c000b7f02f CR3: 00000001034ca002 CR4: 0000000000770ee0 PKRU: 55555554 Call Trace: <TASK> ? cleanup_mnt+0x130/0x150 ? __warn+0x81/0x130 ? cleanup_mnt+0x130/0x150 ? report_bug+0x16f/0x1a0 ? handle_bug+0x53/0x90 ? exc_invalid_op+0x17/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? cleanup_mnt+0x130/0x150 ? cleanup_mnt+0x13/0x150 task_work_run+0x5d/0x90 exit_to_user_mode_prepare+0xf8/0x100 syscall_exit_to_user_mode+0x21/0x40 ? srso_alias_return_thunk+0x5/0xfbef5 do_syscall_64+0x45/0x90 entry_SYSCALL_64_after_hwframe+0x60/0xca RIP: 0033:0x55d0e0726dee Code: 48 83 ec 38 e8 13 00 00 00 48 83 c4 38 5d c3 cc cc cc cc cc cc cc cc cc cc cc cc cc 49 89 f2 48 89 fa 48 89 ce 48 89 df 0f 05 <48> 3d 01 f0 ff ff 76 15 48 f7 d8 48 89 c1 48 c7 c0 ff ff ff ff 48 RSP: 002b:000000c000145a10 EFLAGS: 00000216 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 000000c000b7fce0 RCX: 000055d0e0726dee RDX: 0000000000000000 RSI: 0000000000000002 RDI: 000000c000b7fce0 RBP: 000000c000145a50 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000216 R12: 000000c000b7fce0 R13: 0000000000000000 R14: 000000c000b06e00 R15: 1fffffffffffffff </TASK> ---[ end trace 0000000000000000 ]—
This commit was pointed by my bisecting 6.6.71..6.6.72, but to double-check it I had to revert the following commits to make 6.6.72 compile and not exhibit the issue:
* a3f8a2b13a277d942c810d2ccc654d5bc824a430 (“ovl: pass realinode to ovl_encode_real_fh() instead of realdentry ”) [ Upstream commit 07aeefae7ff44d80524375253980b1bdee2396b0 ] * 26423e18cd6f709ca4fe7194c29c11658cd0cdd0 (“ovl: do not encode lower fh with upper sb_writers held”) [ Upstream commit 5b02bfc1e7e3811c5bf7f0fa626a0694d0dbbd77 ] * a1a541fbfa7e97c1100144db34b57553d7164ce5 ("ovl: support encoding fid from inode with no alias”) [ Upstream commit c45beebfde34aa71afbc48b2c54cdda623515037 ]
I can also confirm we don’t see this warning on the latest 6.12.10 release, so perhaps we have missed some dependencies in 6.6?
Ignat
fs/overlayfs/copy_up.c | 53 +++++++++++++++++++++++++--------------- fs/overlayfs/namei.c | 37 +++++++++++++++++++++------- fs/overlayfs/overlayfs.h | 26 ++++++++++++++------ fs/overlayfs/super.c | 20 ++++++++++----- fs/overlayfs/util.c | 10 ++++++++ 5 files changed, 104 insertions(+), 42 deletions(-)
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index ada3fcc9c6d5..5c9af24bae4a 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -426,29 +426,29 @@ struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, return ERR_PTR(err); }
-int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
- struct dentry *upper)
+struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin) {
- const struct ovl_fh *fh = NULL;
- int err;
/*
- When lower layer doesn't support export operations store a 'null' fh,
- so we can use the overlay.origin xattr to distignuish between a copy
- up and a pure upper inode.
*/
- if (ovl_can_decode_fh(lower->d_sb)) {
- fh = ovl_encode_real_fh(ofs, lower, false);
- if (IS_ERR(fh))
- return PTR_ERR(fh);
- }
- if (!ovl_can_decode_fh(origin->d_sb))
- return NULL;
- return ovl_encode_real_fh(ofs, origin, false);
+}
+int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
struct dentry *upper)
+{
- int err;
/*
- Do not fail when upper doesn't support xattrs.
*/ err = ovl_check_setxattr(ofs, upper, OVL_XATTR_ORIGIN, fh->buf, fh ? fh->fb.len : 0, 0);
- kfree(fh);
/* Ignore -EPERM from setting "user.*" on symlink/special */ return err == -EPERM ? 0 : err; @@ -476,7 +476,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
- Caller must hold i_mutex on indexdir.
*/ -static int ovl_create_index(struct dentry *dentry, struct dentry *origin, +static int ovl_create_index(struct dentry *dentry, const struct ovl_fh *fh, struct dentry *upper) { struct ovl_fs *ofs = OVL_FS(dentry->d_sb); @@ -502,7 +502,7 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin, if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry)))) return -EIO;
- err = ovl_get_index_name(ofs, origin, &name);
- err = ovl_get_index_name_fh(fh, &name);
if (err) return err;
@@ -541,6 +541,7 @@ struct ovl_copy_up_ctx { struct dentry *destdir; struct qstr destname; struct dentry *workdir;
- const struct ovl_fh *origin_fh;
bool origin; bool indexed; bool metacopy; @@ -637,7 +638,7 @@ static int ovl_copy_up_metadata(struct ovl_copy_up_ctx *c, struct dentry *temp)
- hard link.
*/ if (c->origin) {
- err = ovl_set_origin(ofs, c->lowerpath.dentry, temp);
- err = ovl_set_origin_fh(ofs, c->origin_fh, temp);
if (err) return err; } @@ -749,7 +750,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c) goto cleanup;
if (S_ISDIR(c->stat.mode) && c->indexed) {
- err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
- err = ovl_create_index(c->dentry, c->origin_fh, temp);
if (err) goto cleanup; } @@ -861,6 +862,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) { int err; struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
- struct dentry *origin = c->lowerpath.dentry;
- struct ovl_fh *fh = NULL;
bool to_index = false;
/* @@ -877,17 +880,25 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) to_index = true; }
- if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index)
- if (S_ISDIR(c->stat.mode) || c->stat.nlink == 1 || to_index) {
- fh = ovl_get_origin_fh(ofs, origin);
- if (IS_ERR(fh))
- return PTR_ERR(fh);
- /* origin_fh may be NULL */
- c->origin_fh = fh;
c->origin = true;
- }
if (to_index) { c->destdir = ovl_indexdir(c->dentry->d_sb);
- err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname);
- err = ovl_get_index_name(ofs, origin, &c->destname);
if (err)
- return err;
- goto out_free_fh;
} else if (WARN_ON(!c->parent)) { /* Disconnected dentry must be copied up to index dir */
- return -EIO;
- err = -EIO;
- goto out_free_fh;
} else { /*
- Mark parent "impure" because it may now contain non-pure
@@ -895,7 +906,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) */ err = ovl_set_impure(c->parent, c->destdir); if (err)
- return err;
- goto out_free_fh;
}
/* Should we copyup with O_TMPFILE or with workdir? */ @@ -927,6 +938,8 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c) out: if (to_index) kfree(c->destname.name); +out_free_fh:
- kfree(fh);
return err; }
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 80391c687c2a..f10ac4ae35f0 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -507,6 +507,19 @@ static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry, return err; }
+int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
enum ovl_xattr ox, const struct ovl_fh *fh,
bool is_upper, bool set)
+{
- int err;
- err = ovl_verify_fh(ofs, dentry, ox, fh);
- if (set && err == -ENODATA)
- err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
- return err;
+}
/*
- Verify that @real dentry matches the file handle stored in xattr @name.
@@ -515,9 +528,9 @@ static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
- Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
*/ -int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
enum ovl_xattr ox, struct dentry *real, bool is_upper,
bool set)
+int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
- enum ovl_xattr ox, struct dentry *real,
- bool is_upper, bool set)
{ struct inode *inode; struct ovl_fh *fh; @@ -530,9 +543,7 @@ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, goto fail; }
- err = ovl_verify_fh(ofs, dentry, ox, fh);
- if (set && err == -ENODATA)
- err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
- err = ovl_verify_set_fh(ofs, dentry, ox, fh, is_upper, set);
if (err) goto fail;
@@ -548,6 +559,7 @@ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry, goto out; }
/* Get upper dentry from index */ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index, bool connected) @@ -684,7 +696,7 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index) goto out; }
-static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name) +int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name) { char *n, *s;
@@ -873,20 +885,27 @@ int ovl_path_next(int idx, struct dentry *dentry, struct path *path) static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry, struct dentry *lower, struct dentry *upper) {
- const struct ovl_fh *fh;
int err;
if (ovl_check_origin_xattr(ofs, upper)) return 0;
- fh = ovl_get_origin_fh(ofs, lower);
- if (IS_ERR(fh))
- return PTR_ERR(fh);
err = ovl_want_write(dentry); if (err)
- return err;
- goto out;
- err = ovl_set_origin(ofs, lower, upper);
- err = ovl_set_origin_fh(ofs, fh, upper);
if (!err) err = ovl_set_impure(dentry->d_parent, upper->d_parent);
ovl_drop_write(dentry); +out:
- kfree(fh);
return err; }
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 09ca82ed0f8c..61e03d664d7d 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -632,11 +632,15 @@ struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh, int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected, struct dentry *upperdentry, struct ovl_path **stackp); int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
enum ovl_xattr ox, struct dentry *real, bool is_upper,
bool set);
enum ovl_xattr ox, const struct ovl_fh *fh,
bool is_upper, bool set);
+int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
- enum ovl_xattr ox, struct dentry *real,
- bool is_upper, bool set);
struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index, bool connected); int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index); +int ovl_get_index_name_fh(const struct ovl_fh *fh, struct qstr *name); int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin, struct qstr *name); struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh); @@ -648,17 +652,24 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); bool ovl_lower_positive(struct dentry *dentry);
+static inline int ovl_verify_origin_fh(struct ovl_fs *ofs, struct dentry *upper,
const struct ovl_fh *fh, bool set)
+{
- return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, fh, false, set);
+}
static inline int ovl_verify_origin(struct ovl_fs *ofs, struct dentry *upper, struct dentry *origin, bool set) {
- return ovl_verify_set_fh(ofs, upper, OVL_XATTR_ORIGIN, origin,
- false, set);
- return ovl_verify_origin_xattr(ofs, upper, OVL_XATTR_ORIGIN, origin,
false, set);
}
static inline int ovl_verify_upper(struct ovl_fs *ofs, struct dentry *index, struct dentry *upper, bool set) {
- return ovl_verify_set_fh(ofs, index, OVL_XATTR_UPPER, upper, true, set);
- return ovl_verify_origin_xattr(ofs, index, OVL_XATTR_UPPER, upper,
true, set);
}
/* readdir.c */ @@ -823,8 +834,9 @@ int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentr int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat); struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real, bool is_upper); -int ovl_set_origin(struct ovl_fs *ofs, struct dentry *lower,
- struct dentry *upper);
+struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin); +int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
struct dentry *upper);
/* export.c */ extern const struct export_operations ovl_export_operations; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 2c056d737c27..e2574034c3fa 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -879,15 +879,20 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs, { struct vfsmount *mnt = ovl_upper_mnt(ofs); struct dentry *indexdir;
- struct dentry *origin = ovl_lowerstack(oe)->dentry;
- const struct ovl_fh *fh;
int err;
- fh = ovl_get_origin_fh(ofs, origin);
- if (IS_ERR(fh))
- return PTR_ERR(fh);
err = mnt_want_write(mnt); if (err)
- return err;
- goto out_free_fh;
/* Verify lower root is upper root origin */
- err = ovl_verify_origin(ofs, upperpath->dentry,
- ovl_lowerstack(oe)->dentry, true);
- err = ovl_verify_origin_fh(ofs, upperpath->dentry, fh, true);
if (err) { pr_err("failed to verify upper root origin\n"); goto out; @@ -919,9 +924,10 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
- directory entries.
*/ if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
- err = ovl_verify_set_fh(ofs, ofs->indexdir,
- OVL_XATTR_ORIGIN,
- upperpath->dentry, true, false);
- err = ovl_verify_origin_xattr(ofs, ofs->indexdir,
OVL_XATTR_ORIGIN,
upperpath->dentry, true,
false);
if (err) pr_err("failed to verify index dir 'origin' xattr\n"); } @@ -939,6 +945,8 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
out: mnt_drop_write(mnt); +out_free_fh:
- kfree(fh);
return err; }
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 0bf3ffcd072f..4e6b747e0f2e 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -976,12 +976,18 @@ static void ovl_cleanup_index(struct dentry *dentry) struct dentry *index = NULL; struct inode *inode; struct qstr name = { };
- bool got_write = false;
int err;
err = ovl_get_index_name(ofs, lowerdentry, &name); if (err) goto fail;
- err = ovl_want_write(dentry);
- if (err)
- goto fail;
- got_write = true;
inode = d_inode(upperdentry); if (!S_ISDIR(inode->i_mode) && inode->i_nlink != 1) { pr_warn_ratelimited("cleanup linked index (%pd2, ino=%lu, nlink=%u)\n", @@ -1019,6 +1025,8 @@ static void ovl_cleanup_index(struct dentry *dentry) goto fail;
out:
- if (got_write)
- ovl_drop_write(dentry);
kfree(name.name); dput(index); return; @@ -1089,6 +1097,8 @@ void ovl_nlink_end(struct dentry *dentry) { struct inode *inode = d_inode(dentry);
- ovl_drop_write(dentry);
if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) { const struct cred *old_cred;
-- 2.39.5