January 2021 - Linux-stable-mirror

FAILED: patch "[PATCH] fuse: fix bad inode" failed to apply to 4.9-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.9-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 5d069dbe8aaf2a197142558b6fb2978189ba3454 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi <mszeredi(a)redhat.com> Date: Thu, 10 Dec 2020 15:33:14 +0100 Subject: [PATCH] fuse: fix bad inode Jan Kara's analysis of the syzbot report (edited): The reproducer opens a directory on FUSE filesystem, it then attaches dnotify mark to the open directory. After that a fuse_do_getattr() call finds that attributes returned by the server are inconsistent, and calls make_bad_inode() which, among other things does: inode->i_mode = S_IFREG; This then confuses dnotify which doesn't tear down its structures properly and eventually crashes. Avoid calling make_bad_inode() on a live inode: switch to a private flag on the fuse inode. Also add the test to ops which the bad_inode_ops would have caught. This bug goes back to the initial merge of fuse in 2.6.14... Reported-by: syzbot+f427adf9324b92652ccc(a)syzkaller.appspotmail.com Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com> Tested-by: Jan Kara <jack(a)suse.cz> Cc: <stable(a)vger.kernel.org> diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index 5a48cee6d7d3..f529075a2ce8 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -19,6 +19,9 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type) void *value = NULL; struct posix_acl *acl; + if (fuse_is_bad(inode)) + return ERR_PTR(-EIO); + if (!fc->posix_acl || fc->no_getxattr) return NULL; @@ -53,6 +56,9 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type) const char *name; int ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fc->posix_acl || fc->no_setxattr) return -EOPNOTSUPP; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5d43af1169b7..78f9f209078c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -202,7 +202,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) int ret; inode = d_inode_rcu(entry); - if (inode && is_bad_inode(inode)) + if (inode && fuse_is_bad(inode)) goto invalid; else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) { @@ -462,6 +462,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, bool outarg_valid = true; bool locked; + if (fuse_is_bad(dir)) + return ERR_PTR(-EIO); + locked = fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, &outarg, &inode); @@ -611,6 +614,9 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct fuse_conn *fc = get_fuse_conn(dir); struct dentry *res = NULL; + if (fuse_is_bad(dir)) + return -EIO; + if (d_in_lookup(entry)) { res = fuse_lookup(dir, entry, 0); if (IS_ERR(res)) @@ -659,6 +665,9 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, int err; struct fuse_forget_link *forget; + if (fuse_is_bad(dir)) + return -EIO; + forget = fuse_alloc_forget(); if (!forget) return -ENOMEM; @@ -786,6 +795,9 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); + if (fuse_is_bad(dir)) + return -EIO; + args.opcode = FUSE_UNLINK; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -822,6 +834,9 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); + if (fuse_is_bad(dir)) + return -EIO; + args.opcode = FUSE_RMDIR; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -900,6 +915,9 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, struct fuse_conn *fc = get_fuse_conn(olddir); int err; + if (fuse_is_bad(olddir)) + return -EIO; + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; @@ -1035,7 +1053,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, if (!err) { if (fuse_invalid_attr(&outarg.attr) || (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); + fuse_make_bad(inode); err = -EIO; } else { fuse_change_attributes(inode, &outarg.attr, @@ -1237,6 +1255,9 @@ static int fuse_permission(struct inode *inode, int mask) bool refreshed = false; int err = 0; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fc)) return -EACCES; @@ -1332,7 +1353,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out_err; if (fc->cache_symlinks) @@ -1380,7 +1401,7 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (fc->no_fsyncdir) @@ -1679,7 +1700,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, if (fuse_invalid_attr(&outarg.attr) || (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); + fuse_make_bad(inode); err = -EIO; goto error; } @@ -1742,6 +1763,9 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL; int ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(get_fuse_conn(inode))) return -EACCES; @@ -1800,6 +1824,9 @@ static int fuse_getattr(const struct path *path, struct kstat *stat, struct inode *inode = d_inode(path->dentry); struct fuse_conn *fc = get_fuse_conn(inode); + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fc)) { if (!request_mask) { /* diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 349885353036..8cccecb55fb8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -232,6 +232,9 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) bool dax_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc && FUSE_IS_DAX(inode); + if (fuse_is_bad(inode)) + return -EIO; + err = generic_file_open(inode, file); if (err) return err; @@ -469,7 +472,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) FUSE_ARGS(args); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; err = write_inode_now(inode, 1); @@ -541,7 +544,7 @@ static int fuse_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; inode_lock(inode); @@ -865,7 +868,7 @@ static int fuse_readpage(struct file *file, struct page *page) int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out; err = fuse_do_readpage(file, page); @@ -958,7 +961,7 @@ static void fuse_readahead(struct readahead_control *rac) struct fuse_conn *fc = get_fuse_conn(inode); unsigned int i, max_pages, nr_pages = 0; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return; max_pages = min_t(unsigned int, fc->max_pages, @@ -1570,7 +1573,7 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(file); - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (FUSE_IS_DAX(inode)) @@ -1588,7 +1591,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(file); - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (FUSE_IS_DAX(inode)) @@ -2187,7 +2190,7 @@ static int fuse_writepages(struct address_space *mapping, int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out; data.inode = inode; @@ -2972,7 +2975,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, if (!fuse_allow_current_process(fc)) return -EACCES; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; return fuse_do_ioctl(file, cmd, arg, flags); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d414c787e362..7c4b8cb93f9f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -172,6 +172,8 @@ enum { FUSE_I_INIT_RDPLUS, /** An operation changing file size is in progress */ FUSE_I_SIZE_UNSTABLE, + /* Bad inode */ + FUSE_I_BAD, }; struct fuse_conn; @@ -859,6 +861,16 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc) return atomic64_read(&fc->attr_version); } +static inline void fuse_make_bad(struct inode *inode) +{ + set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); +} + +static inline bool fuse_is_bad(struct inode *inode) +{ + return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state)); +} + /** Device operations */ extern const struct file_operations fuse_dev_operations; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 36ab05315828..b0e18b470e91 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -132,7 +132,7 @@ static void fuse_evict_inode(struct inode *inode) fi->forget = NULL; } } - if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) { + if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(!list_empty(&fi->write_files)); WARN_ON(!list_empty(&fi->queued_writes)); } @@ -352,7 +352,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, unlock_new_inode(inode); } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { /* Inode has changed type, any I/O on the old should fail */ - make_bad_inode(inode); + fuse_make_bad(inode); iput(inode); goto retry; } diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 3b5e91045871..3441ffa740f3 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -207,7 +207,7 @@ static int fuse_direntplus_link(struct file *file, dput(dentry); goto retry; } - if (is_bad_inode(inode)) { + if (fuse_is_bad(inode)) { dput(dentry); return -EIO; } @@ -568,7 +568,7 @@ int fuse_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; mutex_lock(&ff->readdir.lock); diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 371bdcbc7233..cdea18de94f7 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -113,6 +113,9 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) struct fuse_getxattr_out outarg; ssize_t ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fm->fc)) return -EACCES; @@ -178,6 +181,9 @@ static int fuse_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { + if (fuse_is_bad(inode)) + return -EIO; + return fuse_getxattr(inode, name, value, size); } @@ -186,6 +192,9 @@ static int fuse_xattr_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { + if (fuse_is_bad(inode)) + return -EIO; + if (!value) return fuse_removexattr(inode, name);

4 years, 6 months

1
0
0 0

FAILED: patch "[PATCH] fuse: fix bad inode" failed to apply to 4.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 5d069dbe8aaf2a197142558b6fb2978189ba3454 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi <mszeredi(a)redhat.com> Date: Thu, 10 Dec 2020 15:33:14 +0100 Subject: [PATCH] fuse: fix bad inode Jan Kara's analysis of the syzbot report (edited): The reproducer opens a directory on FUSE filesystem, it then attaches dnotify mark to the open directory. After that a fuse_do_getattr() call finds that attributes returned by the server are inconsistent, and calls make_bad_inode() which, among other things does: inode->i_mode = S_IFREG; This then confuses dnotify which doesn't tear down its structures properly and eventually crashes. Avoid calling make_bad_inode() on a live inode: switch to a private flag on the fuse inode. Also add the test to ops which the bad_inode_ops would have caught. This bug goes back to the initial merge of fuse in 2.6.14... Reported-by: syzbot+f427adf9324b92652ccc(a)syzkaller.appspotmail.com Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com> Tested-by: Jan Kara <jack(a)suse.cz> Cc: <stable(a)vger.kernel.org> diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index 5a48cee6d7d3..f529075a2ce8 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -19,6 +19,9 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type) void *value = NULL; struct posix_acl *acl; + if (fuse_is_bad(inode)) + return ERR_PTR(-EIO); + if (!fc->posix_acl || fc->no_getxattr) return NULL; @@ -53,6 +56,9 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type) const char *name; int ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fc->posix_acl || fc->no_setxattr) return -EOPNOTSUPP; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5d43af1169b7..78f9f209078c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -202,7 +202,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) int ret; inode = d_inode_rcu(entry); - if (inode && is_bad_inode(inode)) + if (inode && fuse_is_bad(inode)) goto invalid; else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) { @@ -462,6 +462,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, bool outarg_valid = true; bool locked; + if (fuse_is_bad(dir)) + return ERR_PTR(-EIO); + locked = fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, &outarg, &inode); @@ -611,6 +614,9 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct fuse_conn *fc = get_fuse_conn(dir); struct dentry *res = NULL; + if (fuse_is_bad(dir)) + return -EIO; + if (d_in_lookup(entry)) { res = fuse_lookup(dir, entry, 0); if (IS_ERR(res)) @@ -659,6 +665,9 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, int err; struct fuse_forget_link *forget; + if (fuse_is_bad(dir)) + return -EIO; + forget = fuse_alloc_forget(); if (!forget) return -ENOMEM; @@ -786,6 +795,9 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); + if (fuse_is_bad(dir)) + return -EIO; + args.opcode = FUSE_UNLINK; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -822,6 +834,9 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); + if (fuse_is_bad(dir)) + return -EIO; + args.opcode = FUSE_RMDIR; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -900,6 +915,9 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, struct fuse_conn *fc = get_fuse_conn(olddir); int err; + if (fuse_is_bad(olddir)) + return -EIO; + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; @@ -1035,7 +1053,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, if (!err) { if (fuse_invalid_attr(&outarg.attr) || (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); + fuse_make_bad(inode); err = -EIO; } else { fuse_change_attributes(inode, &outarg.attr, @@ -1237,6 +1255,9 @@ static int fuse_permission(struct inode *inode, int mask) bool refreshed = false; int err = 0; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fc)) return -EACCES; @@ -1332,7 +1353,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out_err; if (fc->cache_symlinks) @@ -1380,7 +1401,7 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (fc->no_fsyncdir) @@ -1679,7 +1700,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, if (fuse_invalid_attr(&outarg.attr) || (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); + fuse_make_bad(inode); err = -EIO; goto error; } @@ -1742,6 +1763,9 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL; int ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(get_fuse_conn(inode))) return -EACCES; @@ -1800,6 +1824,9 @@ static int fuse_getattr(const struct path *path, struct kstat *stat, struct inode *inode = d_inode(path->dentry); struct fuse_conn *fc = get_fuse_conn(inode); + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fc)) { if (!request_mask) { /* diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 349885353036..8cccecb55fb8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -232,6 +232,9 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) bool dax_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc && FUSE_IS_DAX(inode); + if (fuse_is_bad(inode)) + return -EIO; + err = generic_file_open(inode, file); if (err) return err; @@ -469,7 +472,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) FUSE_ARGS(args); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; err = write_inode_now(inode, 1); @@ -541,7 +544,7 @@ static int fuse_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; inode_lock(inode); @@ -865,7 +868,7 @@ static int fuse_readpage(struct file *file, struct page *page) int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out; err = fuse_do_readpage(file, page); @@ -958,7 +961,7 @@ static void fuse_readahead(struct readahead_control *rac) struct fuse_conn *fc = get_fuse_conn(inode); unsigned int i, max_pages, nr_pages = 0; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return; max_pages = min_t(unsigned int, fc->max_pages, @@ -1570,7 +1573,7 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(file); - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (FUSE_IS_DAX(inode)) @@ -1588,7 +1591,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(file); - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (FUSE_IS_DAX(inode)) @@ -2187,7 +2190,7 @@ static int fuse_writepages(struct address_space *mapping, int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out; data.inode = inode; @@ -2972,7 +2975,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, if (!fuse_allow_current_process(fc)) return -EACCES; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; return fuse_do_ioctl(file, cmd, arg, flags); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d414c787e362..7c4b8cb93f9f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -172,6 +172,8 @@ enum { FUSE_I_INIT_RDPLUS, /** An operation changing file size is in progress */ FUSE_I_SIZE_UNSTABLE, + /* Bad inode */ + FUSE_I_BAD, }; struct fuse_conn; @@ -859,6 +861,16 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc) return atomic64_read(&fc->attr_version); } +static inline void fuse_make_bad(struct inode *inode) +{ + set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); +} + +static inline bool fuse_is_bad(struct inode *inode) +{ + return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state)); +} + /** Device operations */ extern const struct file_operations fuse_dev_operations; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 36ab05315828..b0e18b470e91 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -132,7 +132,7 @@ static void fuse_evict_inode(struct inode *inode) fi->forget = NULL; } } - if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) { + if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(!list_empty(&fi->write_files)); WARN_ON(!list_empty(&fi->queued_writes)); } @@ -352,7 +352,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, unlock_new_inode(inode); } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { /* Inode has changed type, any I/O on the old should fail */ - make_bad_inode(inode); + fuse_make_bad(inode); iput(inode); goto retry; } diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 3b5e91045871..3441ffa740f3 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -207,7 +207,7 @@ static int fuse_direntplus_link(struct file *file, dput(dentry); goto retry; } - if (is_bad_inode(inode)) { + if (fuse_is_bad(inode)) { dput(dentry); return -EIO; } @@ -568,7 +568,7 @@ int fuse_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; mutex_lock(&ff->readdir.lock); diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 371bdcbc7233..cdea18de94f7 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -113,6 +113,9 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) struct fuse_getxattr_out outarg; ssize_t ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fm->fc)) return -EACCES; @@ -178,6 +181,9 @@ static int fuse_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { + if (fuse_is_bad(inode)) + return -EIO; + return fuse_getxattr(inode, name, value, size); } @@ -186,6 +192,9 @@ static int fuse_xattr_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { + if (fuse_is_bad(inode)) + return -EIO; + if (!value) return fuse_removexattr(inode, name);

4 years, 6 months

1
0
0 0

FAILED: patch "[PATCH] fuse: fix bad inode" failed to apply to 4.14-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.14-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 5d069dbe8aaf2a197142558b6fb2978189ba3454 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi <mszeredi(a)redhat.com> Date: Thu, 10 Dec 2020 15:33:14 +0100 Subject: [PATCH] fuse: fix bad inode Jan Kara's analysis of the syzbot report (edited): The reproducer opens a directory on FUSE filesystem, it then attaches dnotify mark to the open directory. After that a fuse_do_getattr() call finds that attributes returned by the server are inconsistent, and calls make_bad_inode() which, among other things does: inode->i_mode = S_IFREG; This then confuses dnotify which doesn't tear down its structures properly and eventually crashes. Avoid calling make_bad_inode() on a live inode: switch to a private flag on the fuse inode. Also add the test to ops which the bad_inode_ops would have caught. This bug goes back to the initial merge of fuse in 2.6.14... Reported-by: syzbot+f427adf9324b92652ccc(a)syzkaller.appspotmail.com Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com> Tested-by: Jan Kara <jack(a)suse.cz> Cc: <stable(a)vger.kernel.org> diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index 5a48cee6d7d3..f529075a2ce8 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -19,6 +19,9 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type) void *value = NULL; struct posix_acl *acl; + if (fuse_is_bad(inode)) + return ERR_PTR(-EIO); + if (!fc->posix_acl || fc->no_getxattr) return NULL; @@ -53,6 +56,9 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type) const char *name; int ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fc->posix_acl || fc->no_setxattr) return -EOPNOTSUPP; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5d43af1169b7..78f9f209078c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -202,7 +202,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) int ret; inode = d_inode_rcu(entry); - if (inode && is_bad_inode(inode)) + if (inode && fuse_is_bad(inode)) goto invalid; else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) { @@ -462,6 +462,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, bool outarg_valid = true; bool locked; + if (fuse_is_bad(dir)) + return ERR_PTR(-EIO); + locked = fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, &outarg, &inode); @@ -611,6 +614,9 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct fuse_conn *fc = get_fuse_conn(dir); struct dentry *res = NULL; + if (fuse_is_bad(dir)) + return -EIO; + if (d_in_lookup(entry)) { res = fuse_lookup(dir, entry, 0); if (IS_ERR(res)) @@ -659,6 +665,9 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, int err; struct fuse_forget_link *forget; + if (fuse_is_bad(dir)) + return -EIO; + forget = fuse_alloc_forget(); if (!forget) return -ENOMEM; @@ -786,6 +795,9 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); + if (fuse_is_bad(dir)) + return -EIO; + args.opcode = FUSE_UNLINK; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -822,6 +834,9 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); + if (fuse_is_bad(dir)) + return -EIO; + args.opcode = FUSE_RMDIR; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -900,6 +915,9 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, struct fuse_conn *fc = get_fuse_conn(olddir); int err; + if (fuse_is_bad(olddir)) + return -EIO; + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; @@ -1035,7 +1053,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, if (!err) { if (fuse_invalid_attr(&outarg.attr) || (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); + fuse_make_bad(inode); err = -EIO; } else { fuse_change_attributes(inode, &outarg.attr, @@ -1237,6 +1255,9 @@ static int fuse_permission(struct inode *inode, int mask) bool refreshed = false; int err = 0; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fc)) return -EACCES; @@ -1332,7 +1353,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out_err; if (fc->cache_symlinks) @@ -1380,7 +1401,7 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (fc->no_fsyncdir) @@ -1679,7 +1700,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, if (fuse_invalid_attr(&outarg.attr) || (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); + fuse_make_bad(inode); err = -EIO; goto error; } @@ -1742,6 +1763,9 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL; int ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(get_fuse_conn(inode))) return -EACCES; @@ -1800,6 +1824,9 @@ static int fuse_getattr(const struct path *path, struct kstat *stat, struct inode *inode = d_inode(path->dentry); struct fuse_conn *fc = get_fuse_conn(inode); + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fc)) { if (!request_mask) { /* diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 349885353036..8cccecb55fb8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -232,6 +232,9 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) bool dax_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc && FUSE_IS_DAX(inode); + if (fuse_is_bad(inode)) + return -EIO; + err = generic_file_open(inode, file); if (err) return err; @@ -469,7 +472,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) FUSE_ARGS(args); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; err = write_inode_now(inode, 1); @@ -541,7 +544,7 @@ static int fuse_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; inode_lock(inode); @@ -865,7 +868,7 @@ static int fuse_readpage(struct file *file, struct page *page) int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out; err = fuse_do_readpage(file, page); @@ -958,7 +961,7 @@ static void fuse_readahead(struct readahead_control *rac) struct fuse_conn *fc = get_fuse_conn(inode); unsigned int i, max_pages, nr_pages = 0; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return; max_pages = min_t(unsigned int, fc->max_pages, @@ -1570,7 +1573,7 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(file); - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (FUSE_IS_DAX(inode)) @@ -1588,7 +1591,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(file); - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (FUSE_IS_DAX(inode)) @@ -2187,7 +2190,7 @@ static int fuse_writepages(struct address_space *mapping, int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out; data.inode = inode; @@ -2972,7 +2975,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, if (!fuse_allow_current_process(fc)) return -EACCES; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; return fuse_do_ioctl(file, cmd, arg, flags); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d414c787e362..7c4b8cb93f9f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -172,6 +172,8 @@ enum { FUSE_I_INIT_RDPLUS, /** An operation changing file size is in progress */ FUSE_I_SIZE_UNSTABLE, + /* Bad inode */ + FUSE_I_BAD, }; struct fuse_conn; @@ -859,6 +861,16 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc) return atomic64_read(&fc->attr_version); } +static inline void fuse_make_bad(struct inode *inode) +{ + set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); +} + +static inline bool fuse_is_bad(struct inode *inode) +{ + return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state)); +} + /** Device operations */ extern const struct file_operations fuse_dev_operations; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 36ab05315828..b0e18b470e91 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -132,7 +132,7 @@ static void fuse_evict_inode(struct inode *inode) fi->forget = NULL; } } - if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) { + if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(!list_empty(&fi->write_files)); WARN_ON(!list_empty(&fi->queued_writes)); } @@ -352,7 +352,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, unlock_new_inode(inode); } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { /* Inode has changed type, any I/O on the old should fail */ - make_bad_inode(inode); + fuse_make_bad(inode); iput(inode); goto retry; } diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 3b5e91045871..3441ffa740f3 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -207,7 +207,7 @@ static int fuse_direntplus_link(struct file *file, dput(dentry); goto retry; } - if (is_bad_inode(inode)) { + if (fuse_is_bad(inode)) { dput(dentry); return -EIO; } @@ -568,7 +568,7 @@ int fuse_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; mutex_lock(&ff->readdir.lock); diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 371bdcbc7233..cdea18de94f7 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -113,6 +113,9 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) struct fuse_getxattr_out outarg; ssize_t ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fm->fc)) return -EACCES; @@ -178,6 +181,9 @@ static int fuse_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { + if (fuse_is_bad(inode)) + return -EIO; + return fuse_getxattr(inode, name, value, size); } @@ -186,6 +192,9 @@ static int fuse_xattr_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { + if (fuse_is_bad(inode)) + return -EIO; + if (!value) return fuse_removexattr(inode, name);

4 years, 6 months

1
0
0 0

FAILED: patch "[PATCH] fuse: fix bad inode" failed to apply to 4.19-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 4.19-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 5d069dbe8aaf2a197142558b6fb2978189ba3454 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi <mszeredi(a)redhat.com> Date: Thu, 10 Dec 2020 15:33:14 +0100 Subject: [PATCH] fuse: fix bad inode Jan Kara's analysis of the syzbot report (edited): The reproducer opens a directory on FUSE filesystem, it then attaches dnotify mark to the open directory. After that a fuse_do_getattr() call finds that attributes returned by the server are inconsistent, and calls make_bad_inode() which, among other things does: inode->i_mode = S_IFREG; This then confuses dnotify which doesn't tear down its structures properly and eventually crashes. Avoid calling make_bad_inode() on a live inode: switch to a private flag on the fuse inode. Also add the test to ops which the bad_inode_ops would have caught. This bug goes back to the initial merge of fuse in 2.6.14... Reported-by: syzbot+f427adf9324b92652ccc(a)syzkaller.appspotmail.com Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com> Tested-by: Jan Kara <jack(a)suse.cz> Cc: <stable(a)vger.kernel.org> diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index 5a48cee6d7d3..f529075a2ce8 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -19,6 +19,9 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type) void *value = NULL; struct posix_acl *acl; + if (fuse_is_bad(inode)) + return ERR_PTR(-EIO); + if (!fc->posix_acl || fc->no_getxattr) return NULL; @@ -53,6 +56,9 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type) const char *name; int ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fc->posix_acl || fc->no_setxattr) return -EOPNOTSUPP; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5d43af1169b7..78f9f209078c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -202,7 +202,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) int ret; inode = d_inode_rcu(entry); - if (inode && is_bad_inode(inode)) + if (inode && fuse_is_bad(inode)) goto invalid; else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) { @@ -462,6 +462,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, bool outarg_valid = true; bool locked; + if (fuse_is_bad(dir)) + return ERR_PTR(-EIO); + locked = fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, &outarg, &inode); @@ -611,6 +614,9 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct fuse_conn *fc = get_fuse_conn(dir); struct dentry *res = NULL; + if (fuse_is_bad(dir)) + return -EIO; + if (d_in_lookup(entry)) { res = fuse_lookup(dir, entry, 0); if (IS_ERR(res)) @@ -659,6 +665,9 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, int err; struct fuse_forget_link *forget; + if (fuse_is_bad(dir)) + return -EIO; + forget = fuse_alloc_forget(); if (!forget) return -ENOMEM; @@ -786,6 +795,9 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); + if (fuse_is_bad(dir)) + return -EIO; + args.opcode = FUSE_UNLINK; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -822,6 +834,9 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); + if (fuse_is_bad(dir)) + return -EIO; + args.opcode = FUSE_RMDIR; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -900,6 +915,9 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, struct fuse_conn *fc = get_fuse_conn(olddir); int err; + if (fuse_is_bad(olddir)) + return -EIO; + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; @@ -1035,7 +1053,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, if (!err) { if (fuse_invalid_attr(&outarg.attr) || (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); + fuse_make_bad(inode); err = -EIO; } else { fuse_change_attributes(inode, &outarg.attr, @@ -1237,6 +1255,9 @@ static int fuse_permission(struct inode *inode, int mask) bool refreshed = false; int err = 0; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fc)) return -EACCES; @@ -1332,7 +1353,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out_err; if (fc->cache_symlinks) @@ -1380,7 +1401,7 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (fc->no_fsyncdir) @@ -1679,7 +1700,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, if (fuse_invalid_attr(&outarg.attr) || (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); + fuse_make_bad(inode); err = -EIO; goto error; } @@ -1742,6 +1763,9 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL; int ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(get_fuse_conn(inode))) return -EACCES; @@ -1800,6 +1824,9 @@ static int fuse_getattr(const struct path *path, struct kstat *stat, struct inode *inode = d_inode(path->dentry); struct fuse_conn *fc = get_fuse_conn(inode); + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fc)) { if (!request_mask) { /* diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 349885353036..8cccecb55fb8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -232,6 +232,9 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) bool dax_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc && FUSE_IS_DAX(inode); + if (fuse_is_bad(inode)) + return -EIO; + err = generic_file_open(inode, file); if (err) return err; @@ -469,7 +472,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) FUSE_ARGS(args); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; err = write_inode_now(inode, 1); @@ -541,7 +544,7 @@ static int fuse_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; inode_lock(inode); @@ -865,7 +868,7 @@ static int fuse_readpage(struct file *file, struct page *page) int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out; err = fuse_do_readpage(file, page); @@ -958,7 +961,7 @@ static void fuse_readahead(struct readahead_control *rac) struct fuse_conn *fc = get_fuse_conn(inode); unsigned int i, max_pages, nr_pages = 0; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return; max_pages = min_t(unsigned int, fc->max_pages, @@ -1570,7 +1573,7 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(file); - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (FUSE_IS_DAX(inode)) @@ -1588,7 +1591,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(file); - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (FUSE_IS_DAX(inode)) @@ -2187,7 +2190,7 @@ static int fuse_writepages(struct address_space *mapping, int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out; data.inode = inode; @@ -2972,7 +2975,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, if (!fuse_allow_current_process(fc)) return -EACCES; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; return fuse_do_ioctl(file, cmd, arg, flags); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d414c787e362..7c4b8cb93f9f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -172,6 +172,8 @@ enum { FUSE_I_INIT_RDPLUS, /** An operation changing file size is in progress */ FUSE_I_SIZE_UNSTABLE, + /* Bad inode */ + FUSE_I_BAD, }; struct fuse_conn; @@ -859,6 +861,16 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc) return atomic64_read(&fc->attr_version); } +static inline void fuse_make_bad(struct inode *inode) +{ + set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); +} + +static inline bool fuse_is_bad(struct inode *inode) +{ + return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state)); +} + /** Device operations */ extern const struct file_operations fuse_dev_operations; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 36ab05315828..b0e18b470e91 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -132,7 +132,7 @@ static void fuse_evict_inode(struct inode *inode) fi->forget = NULL; } } - if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) { + if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(!list_empty(&fi->write_files)); WARN_ON(!list_empty(&fi->queued_writes)); } @@ -352,7 +352,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, unlock_new_inode(inode); } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { /* Inode has changed type, any I/O on the old should fail */ - make_bad_inode(inode); + fuse_make_bad(inode); iput(inode); goto retry; } diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 3b5e91045871..3441ffa740f3 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -207,7 +207,7 @@ static int fuse_direntplus_link(struct file *file, dput(dentry); goto retry; } - if (is_bad_inode(inode)) { + if (fuse_is_bad(inode)) { dput(dentry); return -EIO; } @@ -568,7 +568,7 @@ int fuse_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; mutex_lock(&ff->readdir.lock); diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 371bdcbc7233..cdea18de94f7 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -113,6 +113,9 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) struct fuse_getxattr_out outarg; ssize_t ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fm->fc)) return -EACCES; @@ -178,6 +181,9 @@ static int fuse_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { + if (fuse_is_bad(inode)) + return -EIO; + return fuse_getxattr(inode, name, value, size); } @@ -186,6 +192,9 @@ static int fuse_xattr_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { + if (fuse_is_bad(inode)) + return -EIO; + if (!value) return fuse_removexattr(inode, name);

4 years, 6 months

1
0
0 0

FAILED: patch "[PATCH] fuse: fix bad inode" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 5d069dbe8aaf2a197142558b6fb2978189ba3454 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi <mszeredi(a)redhat.com> Date: Thu, 10 Dec 2020 15:33:14 +0100 Subject: [PATCH] fuse: fix bad inode Jan Kara's analysis of the syzbot report (edited): The reproducer opens a directory on FUSE filesystem, it then attaches dnotify mark to the open directory. After that a fuse_do_getattr() call finds that attributes returned by the server are inconsistent, and calls make_bad_inode() which, among other things does: inode->i_mode = S_IFREG; This then confuses dnotify which doesn't tear down its structures properly and eventually crashes. Avoid calling make_bad_inode() on a live inode: switch to a private flag on the fuse inode. Also add the test to ops which the bad_inode_ops would have caught. This bug goes back to the initial merge of fuse in 2.6.14... Reported-by: syzbot+f427adf9324b92652ccc(a)syzkaller.appspotmail.com Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com> Tested-by: Jan Kara <jack(a)suse.cz> Cc: <stable(a)vger.kernel.org> diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index 5a48cee6d7d3..f529075a2ce8 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -19,6 +19,9 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type) void *value = NULL; struct posix_acl *acl; + if (fuse_is_bad(inode)) + return ERR_PTR(-EIO); + if (!fc->posix_acl || fc->no_getxattr) return NULL; @@ -53,6 +56,9 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type) const char *name; int ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fc->posix_acl || fc->no_setxattr) return -EOPNOTSUPP; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5d43af1169b7..78f9f209078c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -202,7 +202,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) int ret; inode = d_inode_rcu(entry); - if (inode && is_bad_inode(inode)) + if (inode && fuse_is_bad(inode)) goto invalid; else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) { @@ -462,6 +462,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, bool outarg_valid = true; bool locked; + if (fuse_is_bad(dir)) + return ERR_PTR(-EIO); + locked = fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, &outarg, &inode); @@ -611,6 +614,9 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct fuse_conn *fc = get_fuse_conn(dir); struct dentry *res = NULL; + if (fuse_is_bad(dir)) + return -EIO; + if (d_in_lookup(entry)) { res = fuse_lookup(dir, entry, 0); if (IS_ERR(res)) @@ -659,6 +665,9 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, int err; struct fuse_forget_link *forget; + if (fuse_is_bad(dir)) + return -EIO; + forget = fuse_alloc_forget(); if (!forget) return -ENOMEM; @@ -786,6 +795,9 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); + if (fuse_is_bad(dir)) + return -EIO; + args.opcode = FUSE_UNLINK; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -822,6 +834,9 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); + if (fuse_is_bad(dir)) + return -EIO; + args.opcode = FUSE_RMDIR; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -900,6 +915,9 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, struct fuse_conn *fc = get_fuse_conn(olddir); int err; + if (fuse_is_bad(olddir)) + return -EIO; + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; @@ -1035,7 +1053,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, if (!err) { if (fuse_invalid_attr(&outarg.attr) || (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); + fuse_make_bad(inode); err = -EIO; } else { fuse_change_attributes(inode, &outarg.attr, @@ -1237,6 +1255,9 @@ static int fuse_permission(struct inode *inode, int mask) bool refreshed = false; int err = 0; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fc)) return -EACCES; @@ -1332,7 +1353,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out_err; if (fc->cache_symlinks) @@ -1380,7 +1401,7 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (fc->no_fsyncdir) @@ -1679,7 +1700,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, if (fuse_invalid_attr(&outarg.attr) || (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); + fuse_make_bad(inode); err = -EIO; goto error; } @@ -1742,6 +1763,9 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL; int ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(get_fuse_conn(inode))) return -EACCES; @@ -1800,6 +1824,9 @@ static int fuse_getattr(const struct path *path, struct kstat *stat, struct inode *inode = d_inode(path->dentry); struct fuse_conn *fc = get_fuse_conn(inode); + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fc)) { if (!request_mask) { /* diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 349885353036..8cccecb55fb8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -232,6 +232,9 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) bool dax_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc && FUSE_IS_DAX(inode); + if (fuse_is_bad(inode)) + return -EIO; + err = generic_file_open(inode, file); if (err) return err; @@ -469,7 +472,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) FUSE_ARGS(args); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; err = write_inode_now(inode, 1); @@ -541,7 +544,7 @@ static int fuse_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; inode_lock(inode); @@ -865,7 +868,7 @@ static int fuse_readpage(struct file *file, struct page *page) int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out; err = fuse_do_readpage(file, page); @@ -958,7 +961,7 @@ static void fuse_readahead(struct readahead_control *rac) struct fuse_conn *fc = get_fuse_conn(inode); unsigned int i, max_pages, nr_pages = 0; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return; max_pages = min_t(unsigned int, fc->max_pages, @@ -1570,7 +1573,7 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(file); - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (FUSE_IS_DAX(inode)) @@ -1588,7 +1591,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(file); - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (FUSE_IS_DAX(inode)) @@ -2187,7 +2190,7 @@ static int fuse_writepages(struct address_space *mapping, int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out; data.inode = inode; @@ -2972,7 +2975,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, if (!fuse_allow_current_process(fc)) return -EACCES; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; return fuse_do_ioctl(file, cmd, arg, flags); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d414c787e362..7c4b8cb93f9f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -172,6 +172,8 @@ enum { FUSE_I_INIT_RDPLUS, /** An operation changing file size is in progress */ FUSE_I_SIZE_UNSTABLE, + /* Bad inode */ + FUSE_I_BAD, }; struct fuse_conn; @@ -859,6 +861,16 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc) return atomic64_read(&fc->attr_version); } +static inline void fuse_make_bad(struct inode *inode) +{ + set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); +} + +static inline bool fuse_is_bad(struct inode *inode) +{ + return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state)); +} + /** Device operations */ extern const struct file_operations fuse_dev_operations; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 36ab05315828..b0e18b470e91 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -132,7 +132,7 @@ static void fuse_evict_inode(struct inode *inode) fi->forget = NULL; } } - if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) { + if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(!list_empty(&fi->write_files)); WARN_ON(!list_empty(&fi->queued_writes)); } @@ -352,7 +352,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, unlock_new_inode(inode); } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { /* Inode has changed type, any I/O on the old should fail */ - make_bad_inode(inode); + fuse_make_bad(inode); iput(inode); goto retry; } diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 3b5e91045871..3441ffa740f3 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -207,7 +207,7 @@ static int fuse_direntplus_link(struct file *file, dput(dentry); goto retry; } - if (is_bad_inode(inode)) { + if (fuse_is_bad(inode)) { dput(dentry); return -EIO; } @@ -568,7 +568,7 @@ int fuse_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; mutex_lock(&ff->readdir.lock); diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 371bdcbc7233..cdea18de94f7 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -113,6 +113,9 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) struct fuse_getxattr_out outarg; ssize_t ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fm->fc)) return -EACCES; @@ -178,6 +181,9 @@ static int fuse_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { + if (fuse_is_bad(inode)) + return -EIO; + return fuse_getxattr(inode, name, value, size); } @@ -186,6 +192,9 @@ static int fuse_xattr_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { + if (fuse_is_bad(inode)) + return -EIO; + if (!value) return fuse_removexattr(inode, name);

4 years, 6 months

1
0
0 0

FAILED: patch "[PATCH] fuse: fix bad inode" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From 5d069dbe8aaf2a197142558b6fb2978189ba3454 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi <mszeredi(a)redhat.com> Date: Thu, 10 Dec 2020 15:33:14 +0100 Subject: [PATCH] fuse: fix bad inode Jan Kara's analysis of the syzbot report (edited): The reproducer opens a directory on FUSE filesystem, it then attaches dnotify mark to the open directory. After that a fuse_do_getattr() call finds that attributes returned by the server are inconsistent, and calls make_bad_inode() which, among other things does: inode->i_mode = S_IFREG; This then confuses dnotify which doesn't tear down its structures properly and eventually crashes. Avoid calling make_bad_inode() on a live inode: switch to a private flag on the fuse inode. Also add the test to ops which the bad_inode_ops would have caught. This bug goes back to the initial merge of fuse in 2.6.14... Reported-by: syzbot+f427adf9324b92652ccc(a)syzkaller.appspotmail.com Signed-off-by: Miklos Szeredi <mszeredi(a)redhat.com> Tested-by: Jan Kara <jack(a)suse.cz> Cc: <stable(a)vger.kernel.org> diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index 5a48cee6d7d3..f529075a2ce8 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -19,6 +19,9 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type) void *value = NULL; struct posix_acl *acl; + if (fuse_is_bad(inode)) + return ERR_PTR(-EIO); + if (!fc->posix_acl || fc->no_getxattr) return NULL; @@ -53,6 +56,9 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type) const char *name; int ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fc->posix_acl || fc->no_setxattr) return -EOPNOTSUPP; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5d43af1169b7..78f9f209078c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -202,7 +202,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) int ret; inode = d_inode_rcu(entry); - if (inode && is_bad_inode(inode)) + if (inode && fuse_is_bad(inode)) goto invalid; else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) { @@ -462,6 +462,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, bool outarg_valid = true; bool locked; + if (fuse_is_bad(dir)) + return ERR_PTR(-EIO); + locked = fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, &outarg, &inode); @@ -611,6 +614,9 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct fuse_conn *fc = get_fuse_conn(dir); struct dentry *res = NULL; + if (fuse_is_bad(dir)) + return -EIO; + if (d_in_lookup(entry)) { res = fuse_lookup(dir, entry, 0); if (IS_ERR(res)) @@ -659,6 +665,9 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, int err; struct fuse_forget_link *forget; + if (fuse_is_bad(dir)) + return -EIO; + forget = fuse_alloc_forget(); if (!forget) return -ENOMEM; @@ -786,6 +795,9 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); + if (fuse_is_bad(dir)) + return -EIO; + args.opcode = FUSE_UNLINK; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -822,6 +834,9 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); + if (fuse_is_bad(dir)) + return -EIO; + args.opcode = FUSE_RMDIR; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -900,6 +915,9 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, struct fuse_conn *fc = get_fuse_conn(olddir); int err; + if (fuse_is_bad(olddir)) + return -EIO; + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; @@ -1035,7 +1053,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, if (!err) { if (fuse_invalid_attr(&outarg.attr) || (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); + fuse_make_bad(inode); err = -EIO; } else { fuse_change_attributes(inode, &outarg.attr, @@ -1237,6 +1255,9 @@ static int fuse_permission(struct inode *inode, int mask) bool refreshed = false; int err = 0; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fc)) return -EACCES; @@ -1332,7 +1353,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out_err; if (fc->cache_symlinks) @@ -1380,7 +1401,7 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (fc->no_fsyncdir) @@ -1679,7 +1700,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, if (fuse_invalid_attr(&outarg.attr) || (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { - make_bad_inode(inode); + fuse_make_bad(inode); err = -EIO; goto error; } @@ -1742,6 +1763,9 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL; int ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(get_fuse_conn(inode))) return -EACCES; @@ -1800,6 +1824,9 @@ static int fuse_getattr(const struct path *path, struct kstat *stat, struct inode *inode = d_inode(path->dentry); struct fuse_conn *fc = get_fuse_conn(inode); + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fc)) { if (!request_mask) { /* diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 349885353036..8cccecb55fb8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -232,6 +232,9 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) bool dax_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc && FUSE_IS_DAX(inode); + if (fuse_is_bad(inode)) + return -EIO; + err = generic_file_open(inode, file); if (err) return err; @@ -469,7 +472,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) FUSE_ARGS(args); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; err = write_inode_now(inode, 1); @@ -541,7 +544,7 @@ static int fuse_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; inode_lock(inode); @@ -865,7 +868,7 @@ static int fuse_readpage(struct file *file, struct page *page) int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out; err = fuse_do_readpage(file, page); @@ -958,7 +961,7 @@ static void fuse_readahead(struct readahead_control *rac) struct fuse_conn *fc = get_fuse_conn(inode); unsigned int i, max_pages, nr_pages = 0; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return; max_pages = min_t(unsigned int, fc->max_pages, @@ -1570,7 +1573,7 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(file); - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (FUSE_IS_DAX(inode)) @@ -1588,7 +1591,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct fuse_file *ff = file->private_data; struct inode *inode = file_inode(file); - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; if (FUSE_IS_DAX(inode)) @@ -2187,7 +2190,7 @@ static int fuse_writepages(struct address_space *mapping, int err; err = -EIO; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) goto out; data.inode = inode; @@ -2972,7 +2975,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, if (!fuse_allow_current_process(fc)) return -EACCES; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; return fuse_do_ioctl(file, cmd, arg, flags); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d414c787e362..7c4b8cb93f9f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -172,6 +172,8 @@ enum { FUSE_I_INIT_RDPLUS, /** An operation changing file size is in progress */ FUSE_I_SIZE_UNSTABLE, + /* Bad inode */ + FUSE_I_BAD, }; struct fuse_conn; @@ -859,6 +861,16 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc) return atomic64_read(&fc->attr_version); } +static inline void fuse_make_bad(struct inode *inode) +{ + set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); +} + +static inline bool fuse_is_bad(struct inode *inode) +{ + return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state)); +} + /** Device operations */ extern const struct file_operations fuse_dev_operations; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 36ab05315828..b0e18b470e91 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -132,7 +132,7 @@ static void fuse_evict_inode(struct inode *inode) fi->forget = NULL; } } - if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) { + if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { WARN_ON(!list_empty(&fi->write_files)); WARN_ON(!list_empty(&fi->queued_writes)); } @@ -352,7 +352,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, unlock_new_inode(inode); } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { /* Inode has changed type, any I/O on the old should fail */ - make_bad_inode(inode); + fuse_make_bad(inode); iput(inode); goto retry; } diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 3b5e91045871..3441ffa740f3 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -207,7 +207,7 @@ static int fuse_direntplus_link(struct file *file, dput(dentry); goto retry; } - if (is_bad_inode(inode)) { + if (fuse_is_bad(inode)) { dput(dentry); return -EIO; } @@ -568,7 +568,7 @@ int fuse_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); int err; - if (is_bad_inode(inode)) + if (fuse_is_bad(inode)) return -EIO; mutex_lock(&ff->readdir.lock); diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 371bdcbc7233..cdea18de94f7 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -113,6 +113,9 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) struct fuse_getxattr_out outarg; ssize_t ret; + if (fuse_is_bad(inode)) + return -EIO; + if (!fuse_allow_current_process(fm->fc)) return -EACCES; @@ -178,6 +181,9 @@ static int fuse_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { + if (fuse_is_bad(inode)) + return -EIO; + return fuse_getxattr(inode, name, value, size); } @@ -186,6 +192,9 @@ static int fuse_xattr_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { + if (fuse_is_bad(inode)) + return -EIO; + if (!value) return fuse_removexattr(inode, name);

4 years, 6 months

1
0
0 0

FAILED: patch "[PATCH] RDMA/siw, rxe: Make emulated devices virtual in the device" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From a9d2e9ae953f0ddd0327479c81a085adaa76d903 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe <jgg(a)nvidia.com> Date: Fri, 6 Nov 2020 10:00:49 -0400 Subject: [PATCH] RDMA/siw,rxe: Make emulated devices virtual in the device tree This moves siw and rxe to be virtual devices in the device tree: lrwxrwxrwx 1 root root 0 Nov 6 13:55 /sys/class/infiniband/rxe0 -> ../../devices/virtual/infiniband/rxe0/ Previously they were trying to parent themselves to the physical device of their attached netdev, which doesn't make alot of sense. My hope is this will solve some weird syzkaller hits related to sysfs as it could be possible that the parent of a netdev is another netdev, eg under bonding or some other syzkaller found netdev configuration. Nesting a ib_device under anything but a physical device is going to cause inconsistencies in sysfs during destructions. Link: https://lore.kernel.org/r/0-v1-dcbfc68c4b4a+d6-virtual_dev_jgg@nvidia.com Signed-off-by: Jason Gunthorpe <jgg(a)nvidia.com> diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index ee3cf3af7cab..c4b06ced30a7 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -19,15 +19,6 @@ static struct rxe_recv_sockets recv_sockets; -struct device *rxe_dma_device(struct rxe_dev *rxe) -{ - struct net_device *ndev; - - ndev = rxe->ndev; - - return ndev->dev.parent; -} - int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) { int err; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index a2bd91aaa5de..2fbea2b2d72a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1134,7 +1134,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; dev->num_comp_vectors = num_possible_cpus(); - dev->dev.parent = rxe_dma_device(rxe); dev->local_dma_lkey = 0; addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index d9de062852c4..ee95cf29179d 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -305,24 +305,8 @@ static struct siw_device *siw_device_create(struct net_device *netdev) { struct siw_device *sdev = NULL; struct ib_device *base_dev; - struct device *parent = netdev->dev.parent; int rv; - if (!parent) { - /* - * The loopback device has no parent device, - * so it appears as a top-level device. To support - * loopback device connectivity, take this device - * as the parent device. Skip all other devices - * w/o parent device. - */ - if (netdev->type != ARPHRD_LOOPBACK) { - pr_warn("siw: device %s error: no parent device\n", - netdev->name); - return NULL; - } - parent = &netdev->dev; - } sdev = ib_alloc_device(siw_device, base_dev); if (!sdev) return NULL; @@ -359,7 +343,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev) * per physical port. */ base_dev->phys_port_cnt = 1; - base_dev->dev.parent = parent; base_dev->num_comp_vectors = num_possible_cpus(); xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); @@ -401,7 +384,7 @@ static struct siw_device *siw_device_create(struct net_device *netdev) atomic_set(&sdev->num_mr, 0); atomic_set(&sdev->num_pd, 0); - sdev->numa_node = dev_to_node(parent); + sdev->numa_node = dev_to_node(&netdev->dev); spin_lock_init(&sdev->lock); return sdev;

4 years, 6 months

1
0
0 0

FAILED: patch "[PATCH] scsi: ufs: Re-enable WriteBooster after device reset" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. thanks, greg k-h ------------------ original commit in Linus's tree ------------------ >From bd14bf0e4a084514aa62d24d2109e0f09a93822f Mon Sep 17 00:00:00 2001 From: Stanley Chu <stanley.chu(a)mediatek.com> Date: Tue, 8 Dec 2020 21:56:34 +0800 Subject: [PATCH] scsi: ufs: Re-enable WriteBooster after device reset UFS 3.1 specification mentions that the WriteBooster flags listed below will be set to their default values, i.e. disabled, after power cycle or any type of reset event. Thus we need to reset the flag variables kept in struct hba to align with the device status and ensure that WriteBooster-related functions are configured properly after device reset. Without this fix, WriteBooster will not be enabled successfully after by ufshcd_wb_ctrl() after device reset because hba->wb_enabled remains true. Flags required to be reset to default values: - fWriteBoosterEn: hba->wb_enabled - fWriteBoosterBufferFlushEn: hba->wb_buf_flush_enabled - fWriteBoosterBufferFlushDuringHibernate: No variable mapped Link: https://lore.kernel.org/r/20201208135635.15326-2-stanley.chu@mediatek.com Fixes: 3d17b9b5ab11 ("scsi: ufs: Add write booster feature support") Reviewed-by: Bean Huo <beanhuo(a)micron.com> Signed-off-by: Stanley Chu <stanley.chu(a)mediatek.com> Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 08c8a591e6b0..36d367eb8139 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -1221,8 +1221,13 @@ static inline void ufshcd_vops_device_reset(struct ufs_hba *hba) if (hba->vops && hba->vops->device_reset) { int err = hba->vops->device_reset(hba); - if (!err) + if (!err) { ufshcd_set_ufs_dev_active(hba); + if (ufshcd_is_wb_allowed(hba)) { + hba->wb_enabled = false; + hba->wb_buf_flush_enabled = false; + } + } if (err != -EOPNOTSUPP) ufshcd_update_evt_hist(hba, UFS_EVT_DEV_RESET, err); }

4 years, 6 months

1
0
0 0

[PATCH 00/10] Cover letter: fix a race in release_task when flushing the dentry

by Wen Yang

The dentries such as /proc/<pid>/ns/ have the DCACHE_OP_DELETE flag, they should be deleted when the process exits. Suppose the following race appears： release_task dput -> proc_flush_task -> dentry->d_op->d_delete(dentry) -> __exit_signal -> dentry->d_lockref.count-- and return. In the proc_flush_task(), if another process is using this dentry, it will not be deleted. At the same time, in dput(), d_op->d_delete() can be executed before __exit_signal(pid has not been hashed), d_delete returns false, so this dentry still cannot be deleted. This dentry will always be cached (although its count is 0 and the DCACHE_OP_DELETE flag is set), its parent denry will also be cached too, and these dentries can only be deleted when drop_caches is manually triggered. This will result in wasted memory. What's more troublesome is that these dentries reference pid, according to the commit f333c700c610 ("pidns: Add a limit on the number of pid namespaces"), if the pid cannot be released, it may result in the inability to create a new pid_ns. This problem occurred in our cluster environment (Linux 4.9 LTS). We could reproduce it by manually constructing a test program + adding some debugging switches in the kernel: * A test program to open the directory (/proc/<pid>/ns) [1] * Adding some debugging switches to the kernel, adding a delay between proc_flush_task and __exit_signal in release_task() [2] The test process is as follows: A, terminal #1 Turn on the debug switch: echo 1> /proc/sys/vm/dentry_debug_trace Execute the following unshare command: sudo unshare --pid --fork --mount-proc bash B, terminal #2 Find the pid of the unshare process: # pstree -p | grep unshare | `-sshd(716)---bash(718)--sudo(816)---unshare(817)---bash(818) Find the corresponding dentry: # dmesg | grep pid=818 [70.424722] XXX proc_pid_instantiate:3119 pid=818 tid=818 entry=818/ffff8802c7b670e8 C, terminal #3 Execute the opendir program, it will always open the /proc/818/ns/ directory: # ./a.out /proc/818/ns/ pid: 876 . .. net uts ipc pid user mnt cgroup D, go back to terminal #2 Turn on the debugging switches to construct the race: # echo 818> /proc/sys/vm/dentry_debug_pid # echo 1> /proc/sys/vm/dentry_debug_delay Kill the unshare process (pid 818). Since the debugging switches have been turned on, it will get stuck in release_task(): # kill -9 818 Then kill the process that opened the /proc/818/ns/ directory: # kill -9 876 Then turn off these debugging switches to allow the 818 process to exit: # echo 0> /proc/sys/vm/dentry_debug_delay # echo 0> /proc/sys/vm/dentry_debug_pid Checking the dmesg, we will find that the dentry(/proc/818/ns) ’s count is 0, and the flag is 2800cc (#define DCACHE_OP_DELETE 0x00000008), but it is still cached: # dmesg | grep ffff8802a3999548 … [565.559156] XXX dput:853 dentry=ns/ffff8802bea7b528, flag=2800cc, cnt=0, inode=ffff8802b38c2010, pdentry=818/ffff8802c7b670e8, pflag=20008c, pcnt=1, pinode=ffff8802c7812010, keywords: be cached It could also be verified via the crash tool: crash> dentry.d_flags,d_iname,d_inode,d_lockref -x ffff8802bea7b528 d_flags = 0x2800cc d_iname = "ns\000kkkkkkkkkkkkkkkkkkkkkkkkkkkk" d_inode = 0xffff8802b38c2010 d_lockref = { { lock_count = 0x0, { lock = { { rlock = { raw_lock = { { val = { counter = 0x0 }, { locked = 0x0, pending = 0x0 }, { locked_pending = 0x0, tail = 0x0 } } } } } }, count = 0x0 } } } crash> kmem ffff8802bea7b528 CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME ffff8802dd5f5900 192 23663 26130 871 16k dentry SLAB MEMORY NODE TOTAL ALLOCATED FREE ffffea000afa9e00 ffff8802bea78000 0 30 25 5 FREE / [ALLOCATED] [ffff8802bea7b520] PAGE PHYSICAL MAPPING INDEX CNT FLAGS ffffea000afa9ec0 2bea7b000 dead000000000400 0 0 2fffff80000000 crash> This series of patches is to fix this issue. Regards, Wen Alexey Dobriyan (1): proc: use %u for pid printing and slightly less stack Andreas Gruenbacher (1): proc: Pass file mode to proc_pid_make_inode Christian Brauner (1): clone: add CLONE_PIDFD Eric W. Biederman (6): proc: Better ownership of files for non-dumpable tasks in user namespaces proc: Rename in proc_inode rename sysctl_inodes sibling_inodes proc: Generalize proc_sys_prune_dcache into proc_prune_siblings_dcache proc: Clear the pieces of proc_inode that proc_evict_inode cares about proc: Use d_invalidate in proc_prune_siblings_dcache proc: Use a list of inodes to flush from proc Joel Fernandes (Google) (1): pidfd: add polling support fs/proc/base.c | 242 ++++++++++++++++++++------------------------- fs/proc/fd.c | 20 +--- fs/proc/inode.c | 67 ++++++++++++- fs/proc/internal.h | 22 ++--- fs/proc/namespaces.c | 3 +- fs/proc/proc_sysctl.c | 45 ++------- fs/proc/self.c | 6 +- fs/proc/thread_self.c | 5 +- include/linux/pid.h | 5 + include/linux/proc_fs.h | 4 +- include/uapi/linux/sched.h | 1 + kernel/exit.c | 5 +- kernel/fork.c | 145 ++++++++++++++++++++++++++- kernel/pid.c | 3 + kernel/signal.c | 11 +++ security/selinux/hooks.c | 1 + 16 files changed, 357 insertions(+), 228 deletions(-) [1] A test program to open the directory (/proc/<pid>/ns) #include <stdio.h> #include <sys/types.h> #include <dirent.h> #include <errno.h> int main(int argc, char *argv[]) { DIR *dip; struct dirent *dit; if (argc < 2) { printf("Usage :%s <directory>\n", argv[0]); return -1; } if ((dip = opendir(argv[1])) == NULL) { perror("opendir"); return -1; } printf("pid: %d\n", getpid()); while((dit = readdir (dip)) != NULL) { printf("%s\n", dit->d_name); } while (1) sleep (1); return 0; } [2] Adding some debugging switches to the kernel, also adding a delay between proc_flush_task and __exit_signal in release_task(): diff --git a/fs/dcache.c b/fs/dcache.c index 05bad55..fafad37 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -84,6 +84,9 @@ int sysctl_vfs_cache_pressure __read_mostly = 100; EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); +int sysctl_dentry_debug_trace __read_mostly = 0; +EXPORT_SYMBOL_GPL(sysctl_dentry_debug_trace); + __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); EXPORT_SYMBOL(rename_lock); @@ -758,6 +761,26 @@ static inline bool fast_dput(struct dentry *dentry) return 0; } +#define DENTRY_DEBUG_TRACE(dentry, keywords) \ +do { \ + if (sysctl_dentry_debug_trace) \ + printk("XXX %s:%d " \ + "dentry=%s/%p, flag=%x, cnt=%d, inode=%p, " \ + "pdentry=%s/%p, pflag=%x, pcnt=%d, pinode=%p, " \ + "keywords: %s\n", \ + __func__, __LINE__, \ + dentry->d_name.name, \ + dentry, \ + dentry->d_flags, \ + dentry->d_lockref.count, \ + dentry->d_inode, \ + dentry->d_parent->d_name.name, \ + dentry->d_parent, \ + dentry->d_parent->d_flags, \ + dentry->d_parent->d_lockref.count, \ + dentry->d_parent->d_inode, \ + keywords); \ +} while (0) /* * This is dput @@ -804,6 +827,8 @@ void dput(struct dentry *dentry) WARN_ON(d_in_lookup(dentry)); + DENTRY_DEBUG_TRACE(dentry, "be checked"); + /* Unreachable? Get rid of it */ if (unlikely(d_unhashed(dentry))) goto kill_it; @@ -812,8 +837,10 @@ void dput(struct dentry *dentry) goto kill_it; if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { - if (dentry->d_op->d_delete(dentry)) + if (dentry->d_op->d_delete(dentry)) { + DENTRY_DEBUG_TRACE(dentry, "be killed"); goto kill_it; + } } if (!(dentry->d_flags & DCACHE_REFERENCED)) @@ -822,6 +849,9 @@ void dput(struct dentry *dentry) dentry->d_lockref.count--; spin_unlock(&dentry->d_lock); + + DENTRY_DEBUG_TRACE(dentry, "be cached"); + return; kill_it: diff --git a/fs/proc/base.c b/fs/proc/base.c index b9e4183..419a409 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3090,6 +3090,8 @@ void proc_flush_task(struct task_struct *task) } } +extern int sysctl_dentry_debug_trace; + static int proc_pid_instantiate(struct inode *dir, struct dentry * dentry, struct task_struct *task, const void *ptr) @@ -3111,6 +3113,12 @@ static int proc_pid_instantiate(struct inode *dir, d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); + + if (sysctl_dentry_debug_trace) + printk("XXX %s:%d pid=%d tid=%d entry=%s/%p\n", + __func__, __LINE__, task->pid, task->tgid, + dentry->d_name.name, dentry); + /* Close the race of the process dying before we return the dentry */ if (pid_revalidate(dentry, 0)) return 0; diff --git a/kernel/exit.c b/kernel/exit.c index 27f4168..2b3e1b6 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -55,6 +55,8 @@ #include <linux/shm.h> #include <linux/kcov.h> +#include <linux/delay.h> + #include <asm/uaccess.h> #include <asm/unistd.h> #include <asm/pgtable.h> @@ -164,6 +166,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp) put_task_struct(tsk); } +int sysctl_dentry_debug_delay __read_mostly = 0; +int sysctl_dentry_debug_pid __read_mostly = 0; void release_task(struct task_struct *p) { @@ -178,6 +182,11 @@ void release_task(struct task_struct *p) proc_flush_task(p); + if (sysctl_dentry_debug_delay && p->pid == sysctl_dentry_debug_pid) { + while (sysctl_dentry_debug_delay) + mdelay(1); + } + write_lock_irq(&tasklist_lock); ptrace_release_task(p); __exit_signal(p); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 513e6da..27f1395 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -282,6 +282,10 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write, static int max_extfrag_threshold = 1000; #endif +extern int sysctl_dentry_debug_trace; +extern int sysctl_dentry_debug_delay; +extern int sysctl_dentry_debug_pid; + static struct ctl_table kern_table[] = { { .procname = "sched_child_runs_first", @@ -1498,6 +1502,30 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write, .proc_handler = proc_dointvec, .extra1 = &zero, }, + { + .procname = "dentry_debug_trace", + .data = &sysctl_dentry_debug_trace, + .maxlen = sizeof(sysctl_dentry_debug_trace), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = &zero, + }, + { + .procname = "dentry_debug_delay", + .data = &sysctl_dentry_debug_delay, + .maxlen = sizeof(sysctl_dentry_debug_delay), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = &zero, + }, + { + .procname = "dentry_debug_pid", + .data = &sysctl_dentry_debug_pid, + .maxlen = sizeof(sysctl_dentry_debug_pid), + .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = &zero, + }, #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT { .procname = "legacy_va_layout", Signed-off-by: Wen Yang <wenyang(a)linux.alibaba.com> Cc: Pavel Emelyanov <xemul(a)openvz.org> Cc: Oleg Nesterov <oleg(a)tv-sign.ru> Cc: Sukadev Bhattiprolu <sukadev(a)us.ibm.com> Cc: Paul Menage <menage(a)google.com> Cc: "Eric W. Biederman" <ebiederm(a)xmission.com> Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Cc: <stable(a)vger.kernel.org> -- 1.8.3.1

4 years, 6 months

4
18
0 0

[PATCH 4.19 1/7] mm: drop meminit_pfn_in_nid as it is redundant

by Pavel Tatashin

From: Alexander Duyck <alexander.h.duyck(a)linux.intel.com> From: Alexander Duyck <alexander.h.duyck(a)linux.intel.com> commit 56ec43d8b02719402c9fcf984feb52ec2300f8a5 upstream. As best as I can tell the meminit_pfn_in_nid call is completely redundant. The deferred memory initialization is already making use of for_each_free_mem_range which in turn will call into __next_mem_range which will only return a memory range if it matches the node ID provided assuming it is not NUMA_NO_NODE. I am operating on the assumption that there are no zones or pgdata_t structures that have a NUMA node of NUMA_NO_NODE associated with them. If that is the case then __next_mem_range will never return a memory range that doesn't match the zone's node ID and as such the check is redundant. So one piece I would like to verify on this is if this works for ia64. Technically it was using a different approach to get the node ID, but it seems to have the node ID also encoded into the memblock. So I am assuming this is okay, but would like to get confirmation on that. On my x86_64 test system with 384GB of memory per node I saw a reduction in initialization time from 2.80s to 1.85s as a result of this patch. Link: http://lkml.kernel.org/r/20190405221219.12227.93957.stgit@localhost.localdo… Signed-off-by: Alexander Duyck <alexander.h.duyck(a)linux.intel.com> Reviewed-by: Pavel Tatashin <pavel.tatashin(a)microsoft.com> Acked-by: Michal Hocko <mhocko(a)suse.com> Cc: Mike Rapoport <rppt(a)linux.ibm.com> Cc: Dan Williams <dan.j.williams(a)intel.com> Cc: Dave Jiang <dave.jiang(a)intel.com> Cc: David S. Miller <davem(a)davemloft.net> Cc: Ingo Molnar <mingo(a)kernel.org> Cc: Khalid Aziz <khalid.aziz(a)oracle.com> Cc: "Kirill A. Shutemov" <kirill.shutemov(a)linux.intel.com> Cc: Laurent Dufour <ldufour(a)linux.vnet.ibm.com> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Mel Gorman <mgorman(a)techsingularity.net> Cc: Mike Rapoport <rppt(a)linux.vnet.ibm.com> Cc: Pavel Tatashin <pasha.tatashin(a)soleen.com> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: <yi.z.zhang(a)linux.intel.com> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org> Signed-off-by: Pavel Tatashin <pasha.tatashin(a)soleen.com> --- mm/page_alloc.c | 51 ++++++++++++++----------------------------------- 1 file changed, 14 insertions(+), 37 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d8c3051387d1..c86a117acb5b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1321,36 +1321,22 @@ int __meminit early_pfn_to_nid(unsigned long pfn) #endif #ifdef CONFIG_NODES_SPAN_OTHER_NODES -static inline bool __meminit __maybe_unused -meminit_pfn_in_nid(unsigned long pfn, int node, - struct mminit_pfnnid_cache *state) +/* Only safe to use early in boot when initialisation is single-threaded */ +static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node) { int nid; - nid = __early_pfn_to_nid(pfn, state); + nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache); if (nid >= 0 && nid != node) return false; return true; } -/* Only safe to use early in boot when initialisation is single-threaded */ -static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node) -{ - return meminit_pfn_in_nid(pfn, node, &early_pfnnid_cache); -} - #else - static inline bool __meminit early_pfn_in_nid(unsigned long pfn, int node) { return true; } -static inline bool __meminit __maybe_unused -meminit_pfn_in_nid(unsigned long pfn, int node, - struct mminit_pfnnid_cache *state) -{ - return true; -} #endif @@ -1480,21 +1466,13 @@ static inline void __init pgdat_init_report_one_done(void) * * Then, we check if a current large page is valid by only checking the validity * of the head pfn. - * - * Finally, meminit_pfn_in_nid is checked on systems where pfns can interleave - * within a node: a pfn is between start and end of a node, but does not belong - * to this memory node. */ -static inline bool __init -deferred_pfn_valid(int nid, unsigned long pfn, - struct mminit_pfnnid_cache *nid_init_state) +static inline bool __init deferred_pfn_valid(unsigned long pfn) { if (!pfn_valid_within(pfn)) return false; if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn)) return false; - if (!meminit_pfn_in_nid(pfn, nid, nid_init_state)) - return false; return true; } @@ -1502,15 +1480,14 @@ deferred_pfn_valid(int nid, unsigned long pfn, * Free pages to buddy allocator. Try to free aligned pages in * pageblock_nr_pages sizes. */ -static void __init deferred_free_pages(int nid, int zid, unsigned long pfn, +static void __init deferred_free_pages(unsigned long pfn, unsigned long end_pfn) { - struct mminit_pfnnid_cache nid_init_state = { }; unsigned long nr_pgmask = pageblock_nr_pages - 1; unsigned long nr_free = 0; for (; pfn < end_pfn; pfn++) { - if (!deferred_pfn_valid(nid, pfn, &nid_init_state)) { + if (!deferred_pfn_valid(pfn)) { deferred_free_range(pfn - nr_free, nr_free); nr_free = 0; } else if (!(pfn & nr_pgmask)) { @@ -1530,17 +1507,18 @@ static void __init deferred_free_pages(int nid, int zid, unsigned long pfn, * by performing it only once every pageblock_nr_pages. * Return number of pages initialized. */ -static unsigned long __init deferred_init_pages(int nid, int zid, +static unsigned long __init deferred_init_pages(struct zone *zone, unsigned long pfn, unsigned long end_pfn) { - struct mminit_pfnnid_cache nid_init_state = { }; unsigned long nr_pgmask = pageblock_nr_pages - 1; + int nid = zone_to_nid(zone); unsigned long nr_pages = 0; + int zid = zone_idx(zone); struct page *page = NULL; for (; pfn < end_pfn; pfn++) { - if (!deferred_pfn_valid(nid, pfn, &nid_init_state)) { + if (!deferred_pfn_valid(pfn)) { page = NULL; continue; } else if (!page || !(pfn & nr_pgmask)) { @@ -1603,12 +1581,12 @@ static int __init deferred_init_memmap(void *data) for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); - nr_pages += deferred_init_pages(nid, zid, spfn, epfn); + nr_pages += deferred_init_pages(zone, spfn, epfn); } for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); - deferred_free_pages(nid, zid, spfn, epfn); + deferred_free_pages(spfn, epfn); } pgdat_resize_unlock(pgdat, &flags); @@ -1640,7 +1618,6 @@ static int __init deferred_init_memmap(void *data) static noinline bool __init deferred_grow_zone(struct zone *zone, unsigned int order) { - int zid = zone_idx(zone); int nid = zone_to_nid(zone); pg_data_t *pgdat = NODE_DATA(nid); unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION); @@ -1690,7 +1667,7 @@ deferred_grow_zone(struct zone *zone, unsigned int order) while (spfn < epfn && nr_pages < nr_pages_needed) { t = ALIGN(spfn + PAGES_PER_SECTION, PAGES_PER_SECTION); first_deferred_pfn = min(t, epfn); - nr_pages += deferred_init_pages(nid, zid, spfn, + nr_pages += deferred_init_pages(zone, spfn, first_deferred_pfn); spfn = first_deferred_pfn; } @@ -1702,7 +1679,7 @@ deferred_grow_zone(struct zone *zone, unsigned int order) for_each_free_mem_range(i, nid, MEMBLOCK_NONE, &spa, &epa, NULL) { spfn = max_t(unsigned long, first_init_pfn, PFN_UP(spa)); epfn = min_t(unsigned long, first_deferred_pfn, PFN_DOWN(epa)); - deferred_free_pages(nid, zid, spfn, epfn); + deferred_free_pages(spfn, epfn); if (first_deferred_pfn == epfn) break; -- 2.25.1

4 years, 6 months

2
7
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror January 2021