From: Xiubo Li xiubli@redhat.com
When releasing the file locks the fl->fl_file memory could be already released by another thread in filp_close(), so we couldn't depend on fl->fl_file to get the inode. Just use a xarray to record the opened files for each inode.
Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/57986 Signed-off-by: Xiubo Li xiubli@redhat.com --- fs/ceph/file.c | 9 +++++++++ fs/ceph/inode.c | 4 ++++ fs/ceph/locks.c | 17 ++++++++++++++++- fs/ceph/super.h | 4 ++++ 4 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 85afcbbb5648..cb4a9c52df27 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -231,6 +231,13 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, fi->flags |= CEPH_F_SYNC;
file->private_data = fi; + + ret = xa_insert(&ci->i_opened_files, (unsigned long)file, + CEPH_FILP_AVAILABLE, GFP_KERNEL); + if (ret) { + kmem_cache_free(ceph_file_cachep, fi); + return ret; + } }
ceph_get_fmode(ci, fmode, 1); @@ -932,6 +939,8 @@ int ceph_release(struct inode *inode, struct file *file) dout("release inode %p regular file %p\n", inode, file); WARN_ON(!list_empty(&fi->rw_contexts));
+ xa_erase(&ci->i_opened_files, (unsigned long)file); + ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE); ceph_put_fmode(ci, fi->fmode, 1);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 77b0cd9af370..554450838e44 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -619,6 +619,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_unsafe_iops); spin_lock_init(&ci->i_unsafe_lock);
+ xa_init(&ci->i_opened_files); + ci->i_snap_realm = NULL; INIT_LIST_HEAD(&ci->i_snap_realm_item); INIT_LIST_HEAD(&ci->i_snap_flush_item); @@ -637,6 +639,8 @@ void ceph_free_inode(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode);
+ xa_destroy(&ci->i_opened_files); + kfree(ci->i_symlink); #ifdef CONFIG_FS_ENCRYPTION kfree(ci->fscrypt_auth); diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index d8385dd0076e..a176a30badd0 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -42,9 +42,10 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
static void ceph_fl_release_lock(struct file_lock *fl) { - struct ceph_file_info *fi = fl->fl_file->private_data; struct inode *inode = fl->fl_u.ceph_fl.fl_inode; struct ceph_inode_info *ci; + struct ceph_file_info *fi; + void *val;
/* * If inode is NULL it should be a request file_lock, @@ -54,6 +55,20 @@ static void ceph_fl_release_lock(struct file_lock *fl) return;
ci = ceph_inode(inode); + + /* + * For Posix-style locks, it may race between filp_close()s, + * and it's possible that the 'file' memory pointed by + * 'fl->fl_file' has been released. If so just skip it. + */ + rcu_read_lock(); + val = xa_load(&ci->i_opened_files, (unsigned long)fl->fl_file); + if (val == CEPH_FILP_AVAILABLE) { + fi = fl->fl_file->private_data; + atomic_dec(&fi->num_locks); + } + rcu_read_unlock(); + if (atomic_dec_and_test(&ci->i_filelock_ref)) { /* clear error when all locks are released */ spin_lock(&ci->i_ceph_lock); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 7b75a84ba48d..b3e89192cbec 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -329,6 +329,8 @@ struct ceph_inode_xattrs_info { u64 version, index_version; };
+#define CEPH_FILP_AVAILABLE xa_mk_value(1) + /* * Ceph inode. */ @@ -434,6 +436,8 @@ struct ceph_inode_info { struct list_head i_unsafe_iops; /* uncommitted mds inode ops */ spinlock_t i_unsafe_lock;
+ struct xarray i_opened_files; + union { struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */ struct ceph_snapid_map *i_snapid_map; /* snapid -> dev_t */
Hi,
I love your patch! Perhaps something to improve:
[auto build test WARNING on ceph-client/testing] [also build test WARNING on ceph-client/for-linus linus/master v6.1-rc5 next-20221111] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/xiubli-redhat-com/ceph-fix-th... base: https://github.com/ceph/ceph-client.git testing patch link: https://lore.kernel.org/r/20221114051901.15371-3-xiubli%40redhat.com patch subject: [PATCH 2/2 v2] ceph: use a xarray to record all the opened files for each inode config: hexagon-randconfig-r041-20221114 compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 463da45892e2d2a262277b91b96f5f8c05dc25d0) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/232cc8f1dbeddb308946202a7c67ee... git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review xiubli-redhat-com/ceph-fix-the-use-after-free-bug-for-file_lock/20221114-132233 git checkout 232cc8f1dbeddb308946202a7c67ee4d20451ae7 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash fs/ceph/
If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot lkp@intel.com
All warnings (new ones prefixed by >>):
In file included from fs/ceph/locks.c:8: In file included from fs/ceph/super.h:8: In file included from include/linux/backing-dev.h:16: In file included from include/linux/writeback.h:13: In file included from include/linux/blk_types.h:10: In file included from include/linux/bvec.h:10: In file included from include/linux/highmem.h:12: In file included from include/linux/hardirq.h:11: In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/hexagon/include/asm/io.h:334: include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __raw_readb(PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr)); ~~~~~~~~~~ ^ include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu' #define __le16_to_cpu(x) ((__force __u16)(__le16)(x)) ^ In file included from fs/ceph/locks.c:8: In file included from fs/ceph/super.h:8: In file included from include/linux/backing-dev.h:16: In file included from include/linux/writeback.h:13: In file included from include/linux/blk_types.h:10: In file included from include/linux/bvec.h:10: In file included from include/linux/highmem.h:12: In file included from include/linux/hardirq.h:11: In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/hexagon/include/asm/io.h:334: include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr)); ~~~~~~~~~~ ^ include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu' #define __le32_to_cpu(x) ((__force __u32)(__le32)(x)) ^ In file included from fs/ceph/locks.c:8: In file included from fs/ceph/super.h:8: In file included from include/linux/backing-dev.h:16: In file included from include/linux/writeback.h:13: In file included from include/linux/blk_types.h:10: In file included from include/linux/bvec.h:10: In file included from include/linux/highmem.h:12: In file included from include/linux/hardirq.h:11: In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/hexagon/include/asm/io.h:334: include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writeb(value, PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr); ~~~~~~~~~~ ^
fs/ceph/locks.c:66:6: warning: variable 'fi' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
if (val == CEPH_FILP_AVAILABLE) { ^~~~~~~~~~~~~~~~~~~~~~~~~~ fs/ceph/locks.c:79:14: note: uninitialized use occurs here atomic_dec(&fi->num_locks); ^~ fs/ceph/locks.c:66:2: note: remove the 'if' if its condition is always true if (val == CEPH_FILP_AVAILABLE) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fs/ceph/locks.c:47:27: note: initialize the variable 'fi' to silence this warning struct ceph_file_info *fi; ^ = NULL 7 warnings generated.
vim +66 fs/ceph/locks.c
42 43 static void ceph_fl_release_lock(struct file_lock *fl) 44 { 45 struct inode *inode = fl->fl_u.ceph_fl.fl_inode; 46 struct ceph_inode_info *ci; 47 struct ceph_file_info *fi; 48 void *val; 49 50 /* 51 * If inode is NULL it should be a request file_lock, 52 * nothing we can do. 53 */ 54 if (!inode) 55 return; 56 57 ci = ceph_inode(inode); 58 59 /* 60 * For Posix-style locks, it may race between filp_close()s, 61 * and it's possible that the 'file' memory pointed by 62 * 'fl->fl_file' has been released. If so just skip it. 63 */ 64 rcu_read_lock(); 65 val = xa_load(&ci->i_opened_files, (unsigned long)fl->fl_file);
66 if (val == CEPH_FILP_AVAILABLE) {
67 fi = fl->fl_file->private_data; 68 atomic_dec(&fi->num_locks); 69 } 70 rcu_read_unlock(); 71 72 if (atomic_dec_and_test(&ci->i_filelock_ref)) { 73 /* clear error when all locks are released */ 74 spin_lock(&ci->i_ceph_lock); 75 ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK; 76 spin_unlock(&ci->i_ceph_lock); 77 } 78 iput(inode); 79 atomic_dec(&fi->num_locks); 80 } 81
Hi
Thanks for reporting this.
I will fix it in the next version.
- Xiubo
On 14/11/2022 16:54, kernel test robot wrote:
Hi,
I love your patch! Perhaps something to improve:
[auto build test WARNING on ceph-client/testing] [also build test WARNING on ceph-client/for-linus linus/master v6.1-rc5 next-20221111] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/xiubli-redhat-com/ceph-fix-th... base: https://github.com/ceph/ceph-client.git testing patch link: https://lore.kernel.org/r/20221114051901.15371-3-xiubli%40redhat.com patch subject: [PATCH 2/2 v2] ceph: use a xarray to record all the opened files for each inode config: hexagon-randconfig-r041-20221114 compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 463da45892e2d2a262277b91b96f5f8c05dc25d0) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/232cc8f1dbeddb308946202a7c67ee... git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review xiubli-redhat-com/ceph-fix-the-use-after-free-bug-for-file_lock/20221114-132233 git checkout 232cc8f1dbeddb308946202a7c67ee4d20451ae7 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=hexagon SHELL=/bin/bash fs/ceph/
If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot lkp@intel.com
All warnings (new ones prefixed by >>):
In file included from fs/ceph/locks.c:8: In file included from fs/ceph/super.h:8: In file included from include/linux/backing-dev.h:16: In file included from include/linux/writeback.h:13: In file included from include/linux/blk_types.h:10: In file included from include/linux/bvec.h:10: In file included from include/linux/highmem.h:12: In file included from include/linux/hardirq.h:11: In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/hexagon/include/asm/io.h:334: include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __raw_readb(PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr)); ~~~~~~~~~~ ^ include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu' #define __le16_to_cpu(x) ((__force __u16)(__le16)(x)) ^ In file included from fs/ceph/locks.c:8: In file included from fs/ceph/super.h:8: In file included from include/linux/backing-dev.h:16: In file included from include/linux/writeback.h:13: In file included from include/linux/blk_types.h:10: In file included from include/linux/bvec.h:10: In file included from include/linux/highmem.h:12: In file included from include/linux/hardirq.h:11: In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/hexagon/include/asm/io.h:334: include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr)); ~~~~~~~~~~ ^ include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu' #define __le32_to_cpu(x) ((__force __u32)(__le32)(x)) ^ In file included from fs/ceph/locks.c:8: In file included from fs/ceph/super.h:8: In file included from include/linux/backing-dev.h:16: In file included from include/linux/writeback.h:13: In file included from include/linux/blk_types.h:10: In file included from include/linux/bvec.h:10: In file included from include/linux/highmem.h:12: In file included from include/linux/hardirq.h:11: In file included from ./arch/hexagon/include/generated/asm/hardirq.h:1: In file included from include/asm-generic/hardirq.h:17: In file included from include/linux/irq.h:20: In file included from include/linux/io.h:13: In file included from arch/hexagon/include/asm/io.h:334: include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writeb(value, PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr); ~~~~~~~~~~ ^ include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic] __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr); ~~~~~~~~~~ ^
fs/ceph/locks.c:66:6: warning: variable 'fi' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
if (val == CEPH_FILP_AVAILABLE) { ^~~~~~~~~~~~~~~~~~~~~~~~~~ fs/ceph/locks.c:79:14: note: uninitialized use occurs here atomic_dec(&fi->num_locks); ^~ fs/ceph/locks.c:66:2: note: remove the 'if' if its condition is always true if (val == CEPH_FILP_AVAILABLE) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fs/ceph/locks.c:47:27: note: initialize the variable 'fi' to silence this warning struct ceph_file_info *fi; ^ = NULL 7 warnings generated.
vim +66 fs/ceph/locks.c
42 43 static void ceph_fl_release_lock(struct file_lock *fl) 44 { 45 struct inode *inode = fl->fl_u.ceph_fl.fl_inode; 46 struct ceph_inode_info *ci; 47 struct ceph_file_info *fi; 48 void *val; 49 50 /* 51 * If inode is NULL it should be a request file_lock, 52 * nothing we can do. 53 */ 54 if (!inode) 55 return; 56 57 ci = ceph_inode(inode); 58 59 /* 60 * For Posix-style locks, it may race between filp_close()s, 61 * and it's possible that the 'file' memory pointed by 62 * 'fl->fl_file' has been released. If so just skip it. 63 */ 64 rcu_read_lock(); 65 val = xa_load(&ci->i_opened_files, (unsigned long)fl->fl_file);
66 if (val == CEPH_FILP_AVAILABLE) {
67 fi = fl->fl_file->private_data; 68 atomic_dec(&fi->num_locks); 69 } 70 rcu_read_unlock(); 71 72 if (atomic_dec_and_test(&ci->i_filelock_ref)) { 73 /* clear error when all locks are released */ 74 spin_lock(&ci->i_ceph_lock); 75 ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK; 76 spin_unlock(&ci->i_ceph_lock); 77 } 78 iput(inode); 79 atomic_dec(&fi->num_locks); 80 } 81
On Mon, 2022-11-14 at 13:19 +0800, xiubli@redhat.com wrote:
From: Xiubo Li xiubli@redhat.com
When releasing the file locks the fl->fl_file memory could be already released by another thread in filp_close(), so we couldn't depend on fl->fl_file to get the inode. Just use a xarray to record the opened files for each inode.
Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/57986 Signed-off-by: Xiubo Li xiubli@redhat.com
fs/ceph/file.c | 9 +++++++++ fs/ceph/inode.c | 4 ++++ fs/ceph/locks.c | 17 ++++++++++++++++- fs/ceph/super.h | 4 ++++ 4 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 85afcbbb5648..cb4a9c52df27 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -231,6 +231,13 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, fi->flags |= CEPH_F_SYNC; file->private_data = fi;
ret = xa_insert(&ci->i_opened_files, (unsigned long)file,
CEPH_FILP_AVAILABLE, GFP_KERNEL);
if (ret) {
kmem_cache_free(ceph_file_cachep, fi);
return ret;
}}
ceph_get_fmode(ci, fmode, 1); @@ -932,6 +939,8 @@ int ceph_release(struct inode *inode, struct file *file) dout("release inode %p regular file %p\n", inode, file); WARN_ON(!list_empty(&fi->rw_contexts));
xa_erase(&ci->i_opened_files, (unsigned long)file);
- ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE); ceph_put_fmode(ci, fi->fmode, 1);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 77b0cd9af370..554450838e44 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -619,6 +619,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_unsafe_iops); spin_lock_init(&ci->i_unsafe_lock);
- xa_init(&ci->i_opened_files);
- ci->i_snap_realm = NULL; INIT_LIST_HEAD(&ci->i_snap_realm_item); INIT_LIST_HEAD(&ci->i_snap_flush_item);
@@ -637,6 +639,8 @@ void ceph_free_inode(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode);
- xa_destroy(&ci->i_opened_files);
- kfree(ci->i_symlink);
#ifdef CONFIG_FS_ENCRYPTION kfree(ci->fscrypt_auth); diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index d8385dd0076e..a176a30badd0 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -42,9 +42,10 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) static void ceph_fl_release_lock(struct file_lock *fl) {
- struct ceph_file_info *fi = fl->fl_file->private_data; struct inode *inode = fl->fl_u.ceph_fl.fl_inode; struct ceph_inode_info *ci;
- struct ceph_file_info *fi;
- void *val;
/* * If inode is NULL it should be a request file_lock, @@ -54,6 +55,20 @@ static void ceph_fl_release_lock(struct file_lock *fl) return; ci = ceph_inode(inode);
- /*
* For Posix-style locks, it may race between filp_close()s,
* and it's possible that the 'file' memory pointed by
* 'fl->fl_file' has been released. If so just skip it.
*/
- rcu_read_lock();
- val = xa_load(&ci->i_opened_files, (unsigned long)fl->fl_file);
- if (val == CEPH_FILP_AVAILABLE) {
fi = fl->fl_file->private_data;
atomic_dec(&fi->num_locks);
Don't you need to remove the old atomic_dec from this function if you move it here?
- }
- rcu_read_unlock();
- if (atomic_dec_and_test(&ci->i_filelock_ref)) { /* clear error when all locks are released */ spin_lock(&ci->i_ceph_lock);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 7b75a84ba48d..b3e89192cbec 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -329,6 +329,8 @@ struct ceph_inode_xattrs_info { u64 version, index_version; }; +#define CEPH_FILP_AVAILABLE xa_mk_value(1)
/*
- Ceph inode.
*/ @@ -434,6 +436,8 @@ struct ceph_inode_info { struct list_head i_unsafe_iops; /* uncommitted mds inode ops */ spinlock_t i_unsafe_lock;
- struct xarray i_opened_files;
- union { struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */ struct ceph_snapid_map *i_snapid_map; /* snapid -> dev_t */
This looks like it'll work, but it's a lot of extra work, having to track this extra xarray just on the off chance that one of these fd's might have file locks. The num_locks field is only checked in one place in ceph_get_caps.
Here's what I'd recommend instead:
Have ceph_get_caps look at the lists in inode->i_flctx and see whether any of its locks have an fl_file that matches the @filp argument in that function. Most inodes never get any file locks, so in most cases this will turn out to just be a NULL pointer check for i_flctx anyway.
Then you can just remove the num_locks field and call the new helper from ceph_get_caps instead. I'll send along a proposed patch for the helper in a bit.
On 14/11/2022 21:39, Jeff Layton wrote:
On Mon, 2022-11-14 at 13:19 +0800, xiubli@redhat.com wrote:
From: Xiubo Li xiubli@redhat.com
When releasing the file locks the fl->fl_file memory could be already released by another thread in filp_close(), so we couldn't depend on fl->fl_file to get the inode. Just use a xarray to record the opened files for each inode.
Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/57986 Signed-off-by: Xiubo Li xiubli@redhat.com
fs/ceph/file.c | 9 +++++++++ fs/ceph/inode.c | 4 ++++ fs/ceph/locks.c | 17 ++++++++++++++++- fs/ceph/super.h | 4 ++++ 4 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 85afcbbb5648..cb4a9c52df27 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -231,6 +231,13 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, fi->flags |= CEPH_F_SYNC; file->private_data = fi;
ret = xa_insert(&ci->i_opened_files, (unsigned long)file,
CEPH_FILP_AVAILABLE, GFP_KERNEL);
if (ret) {
kmem_cache_free(ceph_file_cachep, fi);
return ret;
}}
ceph_get_fmode(ci, fmode, 1); @@ -932,6 +939,8 @@ int ceph_release(struct inode *inode, struct file *file) dout("release inode %p regular file %p\n", inode, file); WARN_ON(!list_empty(&fi->rw_contexts));
xa_erase(&ci->i_opened_files, (unsigned long)file);
- ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE); ceph_put_fmode(ci, fi->fmode, 1);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 77b0cd9af370..554450838e44 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -619,6 +619,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_unsafe_iops); spin_lock_init(&ci->i_unsafe_lock);
- xa_init(&ci->i_opened_files);
- ci->i_snap_realm = NULL; INIT_LIST_HEAD(&ci->i_snap_realm_item); INIT_LIST_HEAD(&ci->i_snap_flush_item);
@@ -637,6 +639,8 @@ void ceph_free_inode(struct inode *inode) { struct ceph_inode_info *ci = ceph_inode(inode);
- xa_destroy(&ci->i_opened_files);
- kfree(ci->i_symlink); #ifdef CONFIG_FS_ENCRYPTION kfree(ci->fscrypt_auth);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index d8385dd0076e..a176a30badd0 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -42,9 +42,10 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) static void ceph_fl_release_lock(struct file_lock *fl) {
- struct ceph_file_info *fi = fl->fl_file->private_data; struct inode *inode = fl->fl_u.ceph_fl.fl_inode; struct ceph_inode_info *ci;
- struct ceph_file_info *fi;
- void *val;
/* * If inode is NULL it should be a request file_lock, @@ -54,6 +55,20 @@ static void ceph_fl_release_lock(struct file_lock *fl) return; ci = ceph_inode(inode);
- /*
* For Posix-style locks, it may race between filp_close()s,
* and it's possible that the 'file' memory pointed by
* 'fl->fl_file' has been released. If so just skip it.
*/
- rcu_read_lock();
- val = xa_load(&ci->i_opened_files, (unsigned long)fl->fl_file);
- if (val == CEPH_FILP_AVAILABLE) {
fi = fl->fl_file->private_data;
atomic_dec(&fi->num_locks);
Don't you need to remove the old atomic_dec from this function if you move it here?
Yeah, I thought I have removed that. Not sure why I added it back.
- }
- rcu_read_unlock();
- if (atomic_dec_and_test(&ci->i_filelock_ref)) { /* clear error when all locks are released */ spin_lock(&ci->i_ceph_lock);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 7b75a84ba48d..b3e89192cbec 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -329,6 +329,8 @@ struct ceph_inode_xattrs_info { u64 version, index_version; }; +#define CEPH_FILP_AVAILABLE xa_mk_value(1)
- /*
*/
- Ceph inode.
@@ -434,6 +436,8 @@ struct ceph_inode_info { struct list_head i_unsafe_iops; /* uncommitted mds inode ops */ spinlock_t i_unsafe_lock;
- struct xarray i_opened_files;
- union { struct ceph_snap_realm *i_snap_realm; /* snap realm (if caps) */ struct ceph_snapid_map *i_snapid_map; /* snapid -> dev_t */
This looks like it'll work, but it's a lot of extra work, having to track this extra xarray just on the off chance that one of these fd's might have file locks. The num_locks field is only checked in one place in ceph_get_caps.
Here's what I'd recommend instead:
Have ceph_get_caps look at the lists in inode->i_flctx and see whether any of its locks have an fl_file that matches the @filp argument in that function. Most inodes never get any file locks, so in most cases this will turn out to just be a NULL pointer check for i_flctx anyway.
Then you can just remove the num_locks field and call the new helper from ceph_get_caps instead. I'll send along a proposed patch for the helper in a bit.
Sure, Thanks Jeff.
- Xiubo
linux-stable-mirror@lists.linaro.org