From: Xiubo Li <xiubli(a)redhat.com>
When ceph releasing the file_lock it will try to get the inode pointer
from the fl->fl_file, which the memory could already be released by
another thread in filp_close(). Because in VFS layer the fl->fl_file
doesn't increase the file's reference counter.
Will switch to use ceph dedicate lock info to track the inode.
And in ceph_fl_release_lock() we should skip all the operations if
the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
inserting it to the inode lock list, which is when copying the lock.
Cc: stable(a)vger.kernel.org
URL: https://tracker.ceph.com/issues/57986
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
---
fs/ceph/locks.c | 18 +++++++++++++++---
include/linux/ceph/ceph_fs_fl.h | 26 ++++++++++++++++++++++++++
include/linux/fs.h | 2 ++
3 files changed, 43 insertions(+), 3 deletions(-)
create mode 100644 include/linux/ceph/ceph_fs_fl.h
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 3e2843e86e27..d8385dd0076e 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -34,22 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
{
struct ceph_file_info *fi = dst->fl_file->private_data;
struct inode *inode = file_inode(dst->fl_file);
+
atomic_inc(&ceph_inode(inode)->i_filelock_ref);
atomic_inc(&fi->num_locks);
+ dst->fl_u.ceph_fl.fl_inode = igrab(inode);
}
static void ceph_fl_release_lock(struct file_lock *fl)
{
struct ceph_file_info *fi = fl->fl_file->private_data;
- struct inode *inode = file_inode(fl->fl_file);
- struct ceph_inode_info *ci = ceph_inode(inode);
- atomic_dec(&fi->num_locks);
+ struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
+ struct ceph_inode_info *ci;
+
+ /*
+ * If inode is NULL it should be a request file_lock,
+ * nothing we can do.
+ */
+ if (!inode)
+ return;
+
+ ci = ceph_inode(inode);
if (atomic_dec_and_test(&ci->i_filelock_ref)) {
/* clear error when all locks are released */
spin_lock(&ci->i_ceph_lock);
ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
spin_unlock(&ci->i_ceph_lock);
}
+ iput(inode);
+ atomic_dec(&fi->num_locks);
}
static const struct file_lock_operations ceph_fl_lock_ops = {
diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h
new file mode 100644
index 000000000000..02a412b26095
--- /dev/null
+++ b/include/linux/ceph/ceph_fs_fl.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ceph_fs.h - Ceph constants and data types to share between kernel and
+ * user space.
+ *
+ * Most types in this file are defined as little-endian, and are
+ * primarily intended to describe data structures that pass over the
+ * wire or that are stored on disk.
+ *
+ * LGPL2
+ */
+
+#ifndef CEPH_FS_FL_H
+#define CEPH_FS_FL_H
+
+#include <linux/fs.h>
+
+/*
+ * Ceph lock info
+ */
+
+struct ceph_lock_info {
+ struct inode *fl_inode;
+};
+
+#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e654435f1651..db4810d19e26 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1066,6 +1066,7 @@ bool opens_in_grace(struct net *);
/* that will die - we need it for nfs_lock_info */
#include <linux/nfs_fs_i.h>
+#include <linux/ceph/ceph_fs_fl.h>
/*
* struct file_lock represents a generic "file lock". It's used to represent
@@ -1119,6 +1120,7 @@ struct file_lock {
int state; /* state of grant or error if -ve */
unsigned int debug_id;
} afs;
+ struct ceph_lock_info ceph_fl;
} fl_u;
} __randomize_layout;
--
2.31.1
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
Possible dependencies:
3a876060892b ("drm/amdkfd: Migrate in CPU page fault use current mm")
acac270d0982 ("drm/amdkfd: Add migration SMI event")
e0f1e65b836c ("drm/amdkfd: Add GPU recoverable fault SMI event")
9527b9caf82b ("drm/amdkfd: evict svm bo worker handle error")
7ad153db5859 ("drm/amdkfd: handle VMA remove race")
740a451b0797 ("drm/amdkfd: Handle incomplete migration to system memory")
33c6bd989d5e ("drm/amdkfd: debug message to count successfully migrated pages")
75fa98d6e458 ("drm/amdkfd: clarify the origin of cpages returned by migration functions")
ca432dcc27a1 ("drm/amdkfd: handle svm partial migration cpages 0")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3a876060892ba52dd67d197c78b955e62657d906 Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang(a)amd.com>
Date: Thu, 8 Sep 2022 17:56:09 -0400
Subject: [PATCH] drm/amdkfd: Migrate in CPU page fault use current mm
migrate_vma_setup shows below warning because we don't hold another
process mm mmap_lock. We should use current vmf->vma->vm_mm instead, the
caller already hold current mmap lock inside CPU page fault handler.
WARNING: CPU: 10 PID: 3054 at include/linux/mmap_lock.h:155 find_vma
Call Trace:
walk_page_range+0x76/0x150
migrate_vma_setup+0x18a/0x640
svm_migrate_vram_to_ram+0x245/0xa10 [amdgpu]
svm_migrate_to_ram+0x36f/0x470 [amdgpu]
do_swap_page+0xcfe/0xec0
__handle_mm_fault+0x96b/0x15e0
handle_mm_fault+0x13f/0x3e0
do_user_addr_fault+0x1e7/0x690
Fixes: e1f84eef313f ("drm/amdkfd: handle CPU fault on COW mapping")
Signed-off-by: Philip Yang <Philip.Yang(a)amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling(a)amd.com>
Signed-off-by: Alex Deucher <alexander.deucher(a)amd.com>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 929dec1da0e4..7522bf2d2f57 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -949,7 +949,8 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
goto out_unlock_prange;
}
- r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU);
+ r = svm_migrate_vram_to_ram(prange, vmf->vma->vm_mm,
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU);
if (r)
pr_debug("failed %d migrate svms 0x%p range 0x%p [0x%lx 0x%lx]\n",
r, prange->svms, prange, prange->start, prange->last);