From: Xiubo Li <xiubli(a)redhat.com>
The lock order is incorrect between denty and its parent, we should
always make sure that the parent get the lock first.
Switch to use the 'dget_parent()' to get the parent dentry and also
keep holding the parent until the corresponding inode is not being
refereenced.
Cc: stable(a)vger.kernel.org
Reported-by: Al Viro <viro(a)zeniv.linux.org.uk>
URL: https://www.spinics.net/lists/ceph-devel/msg58622.html
Fixes: adf0d68701c7 ("ceph: fix unsafe dcache access in ceph_encode_dentry_release")
Cc: Jeff Layton <jlayton(a)kernel.org>
Signed-off-by: Xiubo Li <xiubli(a)redhat.com>
---
fs/ceph/caps.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 284424659329..6f7a56a800c2 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4923,6 +4923,11 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
int force = 0;
int ret;
+ if (!dir) {
+ parent = dget_parent(dentry);
+ dir = d_inode(parent);
+ }
+
/*
* force an record for the directory caps if we have a dentry lease.
* this is racy (can't take i_ceph_lock and d_lock together), but it
@@ -4932,14 +4937,9 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
spin_lock(&dentry->d_lock);
if (di->lease_session && di->lease_session->s_mds == mds)
force = 1;
- if (!dir) {
- parent = dget(dentry->d_parent);
- dir = d_inode(parent);
- }
spin_unlock(&dentry->d_lock);
ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
- dput(parent);
cl = ceph_inode_to_client(dir);
spin_lock(&dentry->d_lock);
@@ -4952,8 +4952,10 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
if (IS_ENCRYPTED(dir) && fscrypt_has_encryption_key(dir)) {
int ret2 = ceph_encode_encrypted_fname(dir, dentry, *p);
- if (ret2 < 0)
+ if (ret2 < 0) {
+ dput(parent);
return ret2;
+ }
rel->dname_len = cpu_to_le32(ret2);
*p += ret2;
@@ -4965,6 +4967,8 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
} else {
spin_unlock(&dentry->d_lock);
}
+
+ dput(parent);
return ret;
}
--
2.39.1
Hi Trond and Greg:
LTS 4.19 reported null-ptr-deref BUG as follows:
BUG: unable to handle kernel NULL pointer dereference at 0000000000000080
Call Trace:
nfs_inode_add_request+0x1cc/0x5b8
nfs_setup_write_request+0x1fa/0x1fc
nfs_writepage_setup+0x2d/0x7d
nfs_updatepage+0x8b8/0x936
nfs_write_end+0x61d/0xd45
generic_perform_write+0x19a/0x3f0
nfs_file_write+0x2cc/0x6e5
new_sync_write+0x442/0x560
__vfs_write+0xda/0xef
vfs_write+0x176/0x48b
ksys_write+0x10a/0x1e9
__se_sys_write+0x24/0x29
__x64_sys_write+0x79/0x93
do_syscall_64+0x16d/0x4bb
entry_SYSCALL_64_after_hwframe+0x5c/0xc1
The reason is: generic_error_remove_page set page->mapping to NULL when
nfs server have a fatal error:
nfs_updatepage
nfs_writepage_setup
nfs_setup_write_request
nfs_try_to_update_request // return NULL
nfs_wb_page // return 0
nfs_writepage_locked // return 0
nfs_do_writepage // return 0
nfs_page_async_flush // return 0
nfs_error_is_fatal_on_server
generic_error_remove_page
truncate_inode_page
delete_from_page_cache
__delete_from_page_cache
page_cache_tree_delete
page->mapping = NULL // this is point
nfs_create_request
req->wb_page = page // the page is freed
nfs_inode_add_request
mapping = page_file_mapping(req->wb_page)
return page->mapping
spin_lock(&mapping->private_lock) // mapping is NULL
It is reasonable by reverting the patch "89047634f5ce NFS: Don't
interrupt file writeout due to fatal errors" to fix this bug?
This patch is one patch of patchset [Fix up soft mounts for
NFSv4.x](https://lore.kernel.org/all/20190407175912.23528-1-trond.myklebust…,
the patchset replace custom error reporting mechanism. it seams that we
should merge all the patchset to LTS 4.19, or all patchs should not be
merged. And the "Fixes:" label is not correct, this patch is a
refactoring patch, not for fixing bugs.
From: Tony Lindgren <tony(a)atomide.com>
[ Upstream commit fbb74e56378d8306f214658e3d525a8b3f000c5a ]
We need to check for an active device as otherwise we get warnings
for some mcbsp instances for "Runtime PM usage count underflow!".
Reported-by: Andreas Kemnade <andreas(a)kemnade.info>
Signed-off-by: Tony Lindgren <tony(a)atomide.com>
Link: https://lore.kernel.org/r/20231030052340.13415-1-tony@atomide.com
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
sound/soc/ti/omap-mcbsp.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/sound/soc/ti/omap-mcbsp.c b/sound/soc/ti/omap-mcbsp.c
index fdabed5133e83..b399d86f22777 100644
--- a/sound/soc/ti/omap-mcbsp.c
+++ b/sound/soc/ti/omap-mcbsp.c
@@ -74,14 +74,16 @@ static int omap2_mcbsp_set_clks_src(struct omap_mcbsp *mcbsp, u8 fck_src_id)
return 0;
}
- pm_runtime_put_sync(mcbsp->dev);
+ if (mcbsp->active)
+ pm_runtime_put_sync(mcbsp->dev);
r = clk_set_parent(mcbsp->fclk, fck_src);
if (r)
dev_err(mcbsp->dev, "CLKS: could not clk_set_parent() to %s\n",
src);
- pm_runtime_get_sync(mcbsp->dev);
+ if (mcbsp->active)
+ pm_runtime_get_sync(mcbsp->dev);
clk_put(fck_src);
--
2.42.0
#regzbot introduced: 89c290ea758911e660878e26270e084d862c03b0
#regzbot link: https://gitlab.freedesktop.org/drm/nouveau/-/issues/273
#regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=218124
## Reproducing
1. Boot system to framebuffer console.
2. Run `systemctl suspend`. If undocked without secondary display,
suspend fails. If docked with secondary display, suspend succeeds.
3. Resume from suspend if applicable.
4. System is now in a broken state.
## Testing
- culprit commit is 89c290ea758911e660878e26270e084d862c03b0
- v6.6 fails
- v6.6 with culprit commit reverted does not fail
- Compiled with
<https://gitlab.freedesktop.org/drm/nouveau/uploads/788d7faf22ba2884dcc09d7b…>
## Hardware
- ThinkPad W530 2438-52U
- Dock with Nvidia-connected DVI ports
- Secondary display connected via DVI
- Nvidia Optimus GPU switching system
```console
$ lspci | grep -i vga
00:02.0 VGA compatible controller: Intel Corporation 3rd Gen Core
processor Graphics Controller (rev 09)
01:00.0 VGA compatible controller: NVIDIA Corporation GK107GLM [Quadro
K2000M] (rev a1)
```
## Decoded logs from v6.6
- System is not docked and fails to suspend:
<https://gitlab.freedesktop.org/drm/nouveau/uploads/fb8fdf5a6bed1b1491d2544a…>
- System is docked and fails after resume:
<https://gitlab.freedesktop.org/drm/nouveau/uploads/cb3d5ac55c01f663cd80fa00…>
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x bb32500fb9b78215e4ef6ee8b4345c5f5d7eafb4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023110612-tribute-salvage-1b7f@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bb32500fb9b78215e4ef6ee8b4345c5f5d7eafb4 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
Date: Tue, 31 Oct 2023 12:24:53 -0400
Subject: [PATCH] tracing: Have trace_event_file have ref counters
The following can crash the kernel:
# cd /sys/kernel/tracing
# echo 'p:sched schedule' > kprobe_events
# exec 5>>events/kprobes/sched/enable
# > kprobe_events
# exec 5>&-
The above commands:
1. Change directory to the tracefs directory
2. Create a kprobe event (doesn't matter what one)
3. Open bash file descriptor 5 on the enable file of the kprobe event
4. Delete the kprobe event (removes the files too)
5. Close the bash file descriptor 5
The above causes a crash!
BUG: kernel NULL pointer dereference, address: 0000000000000028
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: 0000 [#1] PREEMPT SMP PTI
CPU: 6 PID: 877 Comm: bash Not tainted 6.5.0-rc4-test-00008-g2c6b6b1029d4-dirty #186
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
RIP: 0010:tracing_release_file_tr+0xc/0x50
What happens here is that the kprobe event creates a trace_event_file
"file" descriptor that represents the file in tracefs to the event. It
maintains state of the event (is it enabled for the given instance?).
Opening the "enable" file gets a reference to the event "file" descriptor
via the open file descriptor. When the kprobe event is deleted, the file is
also deleted from the tracefs system which also frees the event "file"
descriptor.
But as the tracefs file is still opened by user space, it will not be
totally removed until the final dput() is called on it. But this is not
true with the event "file" descriptor that is already freed. If the user
does a write to or simply closes the file descriptor it will reference the
event "file" descriptor that was just freed, causing a use-after-free bug.
To solve this, add a ref count to the event "file" descriptor as well as a
new flag called "FREED". The "file" will not be freed until the last
reference is released. But the FREE flag will be set when the event is
removed to prevent any more modifications to that event from happening,
even if there's still a reference to the event "file" descriptor.
Link: https://lore.kernel.org/linux-trace-kernel/20231031000031.1e705592@gandalf.…
Link: https://lore.kernel.org/linux-trace-kernel/20231031122453.7a48b923@gandalf.…
Cc: stable(a)vger.kernel.org
Cc: Mark Rutland <mark.rutland(a)arm.com>
Fixes: f5ca233e2e66d ("tracing: Increase trace array ref count on enable and filter files")
Reported-by: Beau Belgrave <beaub(a)linux.microsoft.com>
Tested-by: Beau Belgrave <beaub(a)linux.microsoft.com>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 12207dc6722d..696f8dc4aa53 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -492,6 +492,7 @@ enum {
EVENT_FILE_FL_TRIGGER_COND_BIT,
EVENT_FILE_FL_PID_FILTER_BIT,
EVENT_FILE_FL_WAS_ENABLED_BIT,
+ EVENT_FILE_FL_FREED_BIT,
};
extern struct trace_event_file *trace_get_event_file(const char *instance,
@@ -630,6 +631,7 @@ extern int __kprobe_event_add_fields(struct dynevent_cmd *cmd, ...);
* TRIGGER_COND - When set, one or more triggers has an associated filter
* PID_FILTER - When set, the event is filtered based on pid
* WAS_ENABLED - Set when enabled to know to clear trace on module removal
+ * FREED - File descriptor is freed, all fields should be considered invalid
*/
enum {
EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT),
@@ -643,6 +645,7 @@ enum {
EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT),
EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT),
EVENT_FILE_FL_WAS_ENABLED = (1 << EVENT_FILE_FL_WAS_ENABLED_BIT),
+ EVENT_FILE_FL_FREED = (1 << EVENT_FILE_FL_FREED_BIT),
};
struct trace_event_file {
@@ -671,6 +674,7 @@ struct trace_event_file {
* caching and such. Which is mostly OK ;-)
*/
unsigned long flags;
+ atomic_t ref; /* ref count for opened files */
atomic_t sm_ref; /* soft-mode reference counter */
atomic_t tm_ref; /* trigger-mode reference counter */
};
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2539cfc20a97..9aebf904ff97 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4978,6 +4978,20 @@ int tracing_open_file_tr(struct inode *inode, struct file *filp)
if (ret)
return ret;
+ mutex_lock(&event_mutex);
+
+ /* Fail if the file is marked for removal */
+ if (file->flags & EVENT_FILE_FL_FREED) {
+ trace_array_put(file->tr);
+ ret = -ENODEV;
+ } else {
+ event_file_get(file);
+ }
+
+ mutex_unlock(&event_mutex);
+ if (ret)
+ return ret;
+
filp->private_data = inode->i_private;
return 0;
@@ -4988,6 +5002,7 @@ int tracing_release_file_tr(struct inode *inode, struct file *filp)
struct trace_event_file *file = inode->i_private;
trace_array_put(file->tr);
+ event_file_put(file);
return 0;
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 0e1405abf4f7..b7f4ea25a194 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1669,6 +1669,9 @@ extern void event_trigger_unregister(struct event_command *cmd_ops,
char *glob,
struct event_trigger_data *trigger_data);
+extern void event_file_get(struct trace_event_file *file);
+extern void event_file_put(struct trace_event_file *file);
+
/**
* struct event_trigger_ops - callbacks for trace event triggers
*
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f9e3e24d8796..f29e815ca5b2 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -990,13 +990,35 @@ static void remove_subsystem(struct trace_subsystem_dir *dir)
}
}
+void event_file_get(struct trace_event_file *file)
+{
+ atomic_inc(&file->ref);
+}
+
+void event_file_put(struct trace_event_file *file)
+{
+ if (WARN_ON_ONCE(!atomic_read(&file->ref))) {
+ if (file->flags & EVENT_FILE_FL_FREED)
+ kmem_cache_free(file_cachep, file);
+ return;
+ }
+
+ if (atomic_dec_and_test(&file->ref)) {
+ /* Count should only go to zero when it is freed */
+ if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED)))
+ return;
+ kmem_cache_free(file_cachep, file);
+ }
+}
+
static void remove_event_file_dir(struct trace_event_file *file)
{
eventfs_remove_dir(file->ei);
list_del(&file->list);
remove_subsystem(file->system);
free_event_filter(file->filter);
- kmem_cache_free(file_cachep, file);
+ file->flags |= EVENT_FILE_FL_FREED;
+ event_file_put(file);
}
/*
@@ -1369,7 +1391,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
flags = file->flags;
mutex_unlock(&event_mutex);
- if (!file)
+ if (!file || flags & EVENT_FILE_FL_FREED)
return -ENODEV;
if (flags & EVENT_FILE_FL_ENABLED &&
@@ -1403,7 +1425,7 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
ret = -ENODEV;
mutex_lock(&event_mutex);
file = event_file_data(filp);
- if (likely(file)) {
+ if (likely(file && !(file->flags & EVENT_FILE_FL_FREED))) {
ret = tracing_update_buffers(file->tr);
if (ret < 0) {
mutex_unlock(&event_mutex);
@@ -1683,7 +1705,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
mutex_lock(&event_mutex);
file = event_file_data(filp);
- if (file)
+ if (file && !(file->flags & EVENT_FILE_FL_FREED))
print_event_filter(file, s);
mutex_unlock(&event_mutex);
@@ -2902,6 +2924,7 @@ trace_create_new_event(struct trace_event_call *call,
atomic_set(&file->tm_ref, 0);
INIT_LIST_HEAD(&file->triggers);
list_add(&file->list, &tr->events);
+ event_file_get(file);
return file;
}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 33264e510d16..0c611b281a5b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -2349,6 +2349,9 @@ int apply_event_filter(struct trace_event_file *file, char *filter_string)
struct event_filter *filter = NULL;
int err;
+ if (file->flags & EVENT_FILE_FL_FREED)
+ return -ENODEV;
+
if (!strcmp(strstrip(filter_string), "0")) {
filter_disable(file);
filter = event_filter(file);