From: Andreas Gruenbacher agruenba@redhat.com
commit 0636b34b44589b142700ac137b5f69802cfe2e37 upstream.
When a DLM lockspace is released and there ares still locks in that lockspace, DLM will unlock those locks automatically. Commit fb6791d100d1b started exploiting this behavior to speed up filesystem unmount: gfs2 would simply free glocks it didn't want to unlock and then release the lockspace. This didn't take the bast callbacks for asynchronous lock contention notifications into account, which remain active until until a lock is unlocked or its lockspace is released.
To prevent those callbacks from accessing deallocated objects, put the glocks that should not be unlocked on the sd_dead_glocks list, release the lockspace, and only then free those glocks.
As an additional measure, ignore unexpected ast and bast callbacks if the receiving glock is dead.
Fixes: fb6791d100d1b ("GFS2: skip dlm_unlock calls in unmount") Signed-off-by: Andreas Gruenbacher agruenba@redhat.com Cc: David Teigland teigland@redhat.com
CVE: CVE-2024-38570
[Zhe: sd_glock_wait in gfs2_glock_free_later is not renamed to sd_kill_wait yet. So still use sd_glock_wait in gfs2_glock_free_later in this case.]
Signed-off-by: He Zhe zhe.he@windriver.com --- fs/gfs2/glock.c | 35 ++++++++++++++++++++++++++++++++--- fs/gfs2/glock.h | 1 + fs/gfs2/incore.h | 1 + fs/gfs2/lock_dlm.c | 13 +++++++++++-- fs/gfs2/ops_fstype.c | 1 + fs/gfs2/super.c | 3 --- 6 files changed, 46 insertions(+), 8 deletions(-)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index b0f01a8e3776..11206d810344 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -159,19 +159,46 @@ static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) return true; }
-void gfs2_glock_free(struct gfs2_glock *gl) +static void __gfs2_glock_free(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0); rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); smp_mb(); wake_up_glock(gl); call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); +} + +void gfs2_glock_free(struct gfs2_glock *gl) { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + __gfs2_glock_free(gl); if (atomic_dec_and_test(&sdp->sd_glock_disposal)) wake_up(&sdp->sd_glock_wait); }
+void gfs2_glock_free_later(struct gfs2_glock *gl) { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + spin_lock(&lru_lock); + list_add(&gl->gl_lru, &sdp->sd_dead_glocks); + spin_unlock(&lru_lock); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_glock_wait); +} + +static void gfs2_free_dead_glocks(struct gfs2_sbd *sdp) +{ + struct list_head *list = &sdp->sd_dead_glocks; + + while(!list_empty(list)) { + struct gfs2_glock *gl; + + gl = list_first_entry(list, struct gfs2_glock, gl_lru); + list_del_init(&gl->gl_lru); + __gfs2_glock_free(gl); + } +} + /** * gfs2_glock_hold() - increment reference count on glock * @gl: The glock to hold @@ -2016,6 +2043,8 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) wait_event_timeout(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0, HZ * 600); + gfs2_lm_unmount(sdp); + gfs2_free_dead_glocks(sdp); glock_hash_walk(dump_glock_func, sdp); }
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 53813364517b..b81b369e7485 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -253,6 +253,7 @@ extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip); extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); extern void gfs2_glock_add_to_lru(struct gfs2_glock *gl); extern void gfs2_glock_free(struct gfs2_glock *gl); +extern void gfs2_glock_free_later(struct gfs2_glock *gl);
extern int __init gfs2_glock_init(void); extern void gfs2_glock_exit(void); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index f8858d995b24..44cee9a4eef6 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -863,6 +863,7 @@ struct gfs2_sbd { struct gfs2_holder sd_freeze_gh; atomic_t sd_freeze_state; struct mutex sd_freeze_mutex; + struct list_head sd_dead_glocks;
char sd_fsname[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; char sd_table_name[GFS2_FSNAME_LEN]; diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 5564aa8b4592..9aad03f0dcdf 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -118,6 +118,11 @@ static void gdlm_ast(void *arg) struct gfs2_glock *gl = arg; unsigned ret = gl->gl_state;
+ /* If the glock is dead, we only react to a dlm_unlock() reply. */ + if (__lockref_is_dead(&gl->gl_lockref) && + gl->gl_lksb.sb_status != -DLM_EUNLOCK) + return; + gfs2_update_reply_times(gl); BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
@@ -168,6 +173,9 @@ static void gdlm_bast(void *arg, int mode) { struct gfs2_glock *gl = arg;
+ if (__lockref_is_dead(&gl->gl_lockref)) + return; + switch (mode) { case DLM_LOCK_EX: gfs2_glock_cb(gl, LM_ST_UNLOCKED); @@ -286,6 +294,8 @@ static void gdlm_put_lock(struct gfs2_glock *gl) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error;
+ BUG_ON(!__lockref_is_dead(&gl->gl_lockref)); + if (gl->gl_lksb.sb_lkid == 0) { gfs2_glock_free(gl); return; @@ -305,7 +315,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && !gl->gl_lksb.sb_lvbptr) { - gfs2_glock_free(gl); + gfs2_glock_free_later(gl); return; }
@@ -315,7 +325,6 @@ static void gdlm_put_lock(struct gfs2_glock *gl) fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n", gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, error); - return; } }
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 648f7336043f..4a8c070d14cf 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -141,6 +141,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_waitqueue_head(&sdp->sd_log_flush_wait); atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); mutex_init(&sdp->sd_freeze_mutex); + INIT_LIST_HEAD(&sdp->sd_dead_glocks);
return sdp;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 8cf4ef61cdc4..039d678b1689 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -662,10 +662,7 @@ static void gfs2_put_super(struct super_block *sb) gfs2_gl_hash_clear(sdp); truncate_inode_pages_final(&sdp->sd_aspace); gfs2_delete_debugfs_file(sdp); - /* Unmount the locking protocol */ - gfs2_lm_unmount(sdp);
- /* At this point, we're through participating in the lockspace */ gfs2_sys_fs_del(sdp); free_sbd(sdp); }
From: Nikita Kiryushin kiryushin@ancud.ru
commit cc5645fddb0ce28492b15520306d092730dffa48 upstream.
There is a possibility of buffer overflow in show_rcu_tasks_trace_gp_kthread() if counters, passed to sprintf() are huge. Counter numbers, needed for this are unrealistically high, but buffer overflow is still possible.
Use snprintf() with buffer size instead of sprintf().
Found by Linux Verification Center (linuxtesting.org) with SVACE.
Fixes: edf3775f0ad6 ("rcu-tasks: Add count for idle tasks on offline CPUs") Signed-off-by: Nikita Kiryushin kiryushin@ancud.ru Reviewed-by: Steven Rostedt (Google) rostedt@goodmis.org Signed-off-by: Paul E. McKenney paulmck@kernel.org Signed-off-by: Uladzislau Rezki (Sony) urezki@gmail.com
CVE: CVE-2024-38577
Signed-off-by: He Zhe zhe.he@windriver.com --- kernel/rcu/tasks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 105fdc2bb004..bede3a4f108e 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -1240,7 +1240,7 @@ static void show_rcu_tasks_trace_gp_kthread(void) { char buf[64];
- sprintf(buf, "N%d h:%lu/%lu/%lu", atomic_read(&trc_n_readers_need_end), + snprintf(buf, sizeof(buf), "N%d h:%lu/%lu/%lu", atomic_read(&trc_n_readers_need_end), data_race(n_heavy_reader_ofl_updates), data_race(n_heavy_reader_updates), data_race(n_heavy_reader_attempts));
On Fri, Oct 18, 2024 at 09:54:25PM +0800, He Zhe wrote:
From: Nikita Kiryushin kiryushin@ancud.ru
commit cc5645fddb0ce28492b15520306d092730dffa48 upstream.
Again, this is already in a release, 5.10.226, which came out well over a month ago. Are you sure you tested all of these properly? What tree are you working against? When was it last updated?
confused,
greg k-h
From: Zheng Yejian zhengyejian1@huawei.com
commit e60b613df8b6253def41215402f72986fee3fc8d upstream.
KASAN reports a bug:
BUG: KASAN: use-after-free in ftrace_location+0x90/0x120 Read of size 8 at addr ffff888141d40010 by task insmod/424 CPU: 8 PID: 424 Comm: insmod Tainted: G W 6.9.0-rc2+ [...] Call Trace: <TASK> dump_stack_lvl+0x68/0xa0 print_report+0xcf/0x610 kasan_report+0xb5/0xe0 ftrace_location+0x90/0x120 register_kprobe+0x14b/0xa40 kprobe_init+0x2d/0xff0 [kprobe_example] do_one_initcall+0x8f/0x2d0 do_init_module+0x13a/0x3c0 load_module+0x3082/0x33d0 init_module_from_file+0xd2/0x130 __x64_sys_finit_module+0x306/0x440 do_syscall_64+0x68/0x140 entry_SYSCALL_64_after_hwframe+0x71/0x79
The root cause is that, in lookup_rec(), ftrace record of some address is being searched in ftrace pages of some module, but those ftrace pages at the same time is being freed in ftrace_release_mod() as the corresponding module is being deleted:
CPU1 | CPU2 register_kprobes() { | delete_module() { check_kprobe_address_safe() { | arch_check_ftrace_location() { | ftrace_location() { | lookup_rec() // USE! | ftrace_release_mod() // Free!
To fix this issue: 1. Hold rcu lock as accessing ftrace pages in ftrace_location_range(); 2. Use ftrace_location_range() instead of lookup_rec() in ftrace_location(); 3. Call synchronize_rcu() before freeing any ftrace pages both in ftrace_process_locs()/ftrace_release_mod()/ftrace_free_mem().
Link: https://lore.kernel.org/linux-trace-kernel/20240509192859.1273558-1-zhengyej...
Cc: stable@vger.kernel.org Cc: mhiramat@kernel.org Cc: mark.rutland@arm.com Cc: mathieu.desnoyers@efficios.com Fixes: ae6aa16fdc16 ("kprobes: introduce ftrace based optimization") Suggested-by: Steven Rostedt rostedt@goodmis.org Signed-off-by: Zheng Yejian zhengyejian1@huawei.com Signed-off-by: Steven Rostedt (Google) rostedt@goodmis.org
CVE: CVE-2024-38588
Signed-off-by: He Zhe zhe.he@windriver.com --- kernel/trace/ftrace.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 31fec924b7c4..8dcac51b492b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1566,12 +1566,15 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end) unsigned long ftrace_location_range(unsigned long start, unsigned long end) { struct dyn_ftrace *rec; + unsigned long ip = 0;
+ rcu_read_lock(); rec = lookup_rec(start, end); if (rec) - return rec->ip; + ip = rec->ip; + rcu_read_unlock();
- return 0; + return ip; }
/** @@ -6299,6 +6302,8 @@ static int ftrace_process_locs(struct module *mod, /* We should have used all pages unless we skipped some */ if (pg_unuse) { WARN_ON(!skipped); + /* Need to synchronize with ftrace_location_range() */ + synchronize_rcu(); ftrace_free_pages(pg_unuse); } return ret; @@ -6481,6 +6486,9 @@ void ftrace_release_mod(struct module *mod) out_unlock: mutex_unlock(&ftrace_lock);
+ /* Need to synchronize with ftrace_location_range() */ + if (tmp_page) + synchronize_rcu(); for (pg = tmp_page; pg; pg = tmp_page) {
/* Needs to be called outside of ftrace_lock */ @@ -6803,6 +6811,7 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) unsigned long start = (unsigned long)(start_ptr); unsigned long end = (unsigned long)(end_ptr); struct ftrace_page **last_pg = &ftrace_pages_start; + struct ftrace_page *tmp_page = NULL; struct ftrace_page *pg; struct dyn_ftrace *rec; struct dyn_ftrace key; @@ -6846,12 +6855,8 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) ftrace_update_tot_cnt--; if (!pg->index) { *last_pg = pg->next; - if (pg->records) { - free_pages((unsigned long)pg->records, pg->order); - ftrace_number_of_pages -= 1 << pg->order; - } - ftrace_number_of_groups--; - kfree(pg); + pg->next = tmp_page; + tmp_page = pg; pg = container_of(last_pg, struct ftrace_page, next); if (!(*last_pg)) ftrace_pages = pg; @@ -6868,6 +6873,11 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) clear_func_from_hashes(func); kfree(func); } + /* Need to synchronize with ftrace_location_range() */ + if (tmp_page) { + synchronize_rcu(); + ftrace_free_pages(tmp_page); + } }
void __init ftrace_free_init_mem(void)
On Fri, Oct 18, 2024 at 09:54:26PM +0800, He Zhe wrote:
From: Zheng Yejian zhengyejian1@huawei.com
commit e60b613df8b6253def41215402f72986fee3fc8d upstream.
This is already in the 5.15.227 release, why do you want it applied again?
From: Alex Deucher alexander.deucher@amd.com
commit be4a2a81b6b90d1a47eaeaace4cc8e2cb57b96c7 upstream.
We don't get the right offset in that case. The GPU has an unused 4K area of the register BAR space into which you can remap registers. We remap the HDP flush registers into this space to allow userspace (CPU or GPU) to flush the HDP when it updates VRAM. However, on systems with >4K pages, we end up exposing PAGE_SIZE of MMIO space.
Fixes: d8e408a82704 ("drm/amdkfd: Expose HDP registers to user space") Reviewed-by: Felix Kuehling felix.kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Cc: stable@vger.kernel.org
CVE: CVE-2024-41011
Signed-off-by: He Zhe zhe.he@windriver.com --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 799a91a064a1..9a444b17530a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1311,7 +1311,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, goto err_unlock; } offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); - if (!offset) { + if (!offset || (PAGE_SIZE > 4096)) { err = -ENOMEM; goto err_unlock; } @@ -1969,6 +1969,9 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, if (vma->vm_end - vma->vm_start != PAGE_SIZE) return -EINVAL;
+ if (PAGE_SIZE > 4096) + return -EINVAL; + address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
On Fri, Oct 18, 2024 at 09:54:27PM +0800, He Zhe wrote:
From: Alex Deucher alexander.deucher@amd.com
commit be4a2a81b6b90d1a47eaeaace4cc8e2cb57b96c7 upstream.
This is already in the 5.10.225 kernel release, why do you want to backport it again?
confused,
greg k-h
From: Duoming Zhou duoming@zju.edu.cn
commit 573601521277119f2e2ba5f28ae6e87fc594f4d4 upstream.
When the cpu5wdt module is removing, the origin code uses del_timer() to de-activate the timer. If the timer handler is running, del_timer() could not stop it and will return directly. If the port region is released by release_region() and then the timer handler cpu5wdt_trigger() calls outb() to write into the region that is released, the use-after-free bug will happen.
Change del_timer() to timer_shutdown_sync() in order that the timer handler could be finished before the port region is released.
Fixes: e09d9c3e9f85 ("watchdog: cpu5wdt.c: add missing del_timer call") Signed-off-by: Duoming Zhou duoming@zju.edu.cn Reviewed-by: Guenter Roeck linux@roeck-us.net Link: https://lore.kernel.org/r/20240324140444.119584-1-duoming@zju.edu.cn Signed-off-by: Guenter Roeck linux@roeck-us.net Signed-off-by: Wim Van Sebroeck wim@linux-watchdog.org
CVE: CVE-2024-38630
[Zhe: The function timer_shutdown_sync in the original fix is not introduced to 5.10 yet. As stated in f571faf6e443b6011ccb585d57866177af1f643c "timer_shutdown_sync() has the same functionality as timer_delete_sync() plus the NULL-ification of the timer function." So timer_delete_sync is enough for this case.]
Signed-off-by: He Zhe zhe.he@windriver.com --- drivers/watchdog/cpu5wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/watchdog/cpu5wdt.c b/drivers/watchdog/cpu5wdt.c index 9867a3a936df..91adfb55c972 100644 --- a/drivers/watchdog/cpu5wdt.c +++ b/drivers/watchdog/cpu5wdt.c @@ -252,7 +252,7 @@ static void cpu5wdt_exit(void) if (cpu5wdt_device.queue) { cpu5wdt_device.queue = 0; wait_for_completion(&cpu5wdt_device.stop); - del_timer(&cpu5wdt_device.timer); + timer_delete_sync(&cpu5wdt_device.timer); }
misc_deregister(&cpu5wdt_misc);
On Fri, Oct 18, 2024 at 09:54:28PM +0800, He Zhe wrote:
From: Duoming Zhou duoming@zju.edu.cn
commit 573601521277119f2e2ba5f28ae6e87fc594f4d4 upstream.
When the cpu5wdt module is removing, the origin code uses del_timer() to de-activate the timer. If the timer handler is running, del_timer() could not stop it and will return directly. If the port region is released by release_region() and then the timer handler cpu5wdt_trigger() calls outb() to write into the region that is released, the use-after-free bug will happen.
Change del_timer() to timer_shutdown_sync() in order that the timer handler could be finished before the port region is released.
Fixes: e09d9c3e9f85 ("watchdog: cpu5wdt.c: add missing del_timer call") Signed-off-by: Duoming Zhou duoming@zju.edu.cn Reviewed-by: Guenter Roeck linux@roeck-us.net Link: https://lore.kernel.org/r/20240324140444.119584-1-duoming@zju.edu.cn Signed-off-by: Guenter Roeck linux@roeck-us.net Signed-off-by: Wim Van Sebroeck wim@linux-watchdog.org
CVE: CVE-2024-38630
[Zhe: The function timer_shutdown_sync in the original fix is not introduced to 5.10 yet. As stated in f571faf6e443b6011ccb585d57866177af1f643c
Please refer to commits in the correct way, this would be f571faf6e443 ("timers: Provide timer_shutdown[_sync]()"), right?
thanks,
greg k-h
On Fri, Oct 18, 2024 at 09:54:24PM +0800, He Zhe wrote:
From: Andreas Gruenbacher agruenba@redhat.com
commit 0636b34b44589b142700ac137b5f69802cfe2e37 upstream.
Why are you sending this only for 5.10 when newer kernel trees do not have it? As the documentation says, we can't take changes only for old kernels, as when you upgrade, you would have a regression.
Please send patches for all relevent kernel trees and we will be glad to review them.
Also, please always cc: all of the developers involved in the patch, so they know what is going on.
And:
Fixes: fb6791d100d1b ("GFS2: skip dlm_unlock calls in unmount") Signed-off-by: Andreas Gruenbacher agruenba@redhat.com Cc: David Teigland teigland@redhat.com
CVE: CVE-2024-38570
No blank lines please.
And no need for the "CVE:" tag as our tools already call out what commit ids are for what CVE, don't duplicate it again here.
[Zhe: sd_glock_wait in gfs2_glock_free_later is not renamed to sd_kill_wait yet. So still use sd_glock_wait in gfs2_glock_free_later in this case.]
Again, no blank line.
Please fix this up for all of these patches and resubmit series for all relevant branches. I've dropped these from my queue now.
thanks,
greg k-h
linux-stable-mirror@lists.linaro.org