November 2023 - Linux-stable-mirror

[PATCH AUTOSEL 5.15 1/6] perf/core: Bail out early if the request AUX area is out of bound

by Sasha Levin

From: Shuai Xue <xueshuai(a)linux.alibaba.com> [ Upstream commit 54aee5f15b83437f23b2b2469bcf21bdd9823916 ] When perf-record with a large AUX area, e.g 4GB, it fails with: #perf record -C 0 -m ,4G -e arm_spe_0// -- sleep 1 failed to mmap with 12 (Cannot allocate memory) and it reveals a WARNING with __alloc_pages(): ------------[ cut here ]------------ WARNING: CPU: 44 PID: 17573 at mm/page_alloc.c:5568 __alloc_pages+0x1ec/0x248 Call trace: __alloc_pages+0x1ec/0x248 __kmalloc_large_node+0xc0/0x1f8 __kmalloc_node+0x134/0x1e8 rb_alloc_aux+0xe0/0x298 perf_mmap+0x440/0x660 mmap_region+0x308/0x8a8 do_mmap+0x3c0/0x528 vm_mmap_pgoff+0xf4/0x1b8 ksys_mmap_pgoff+0x18c/0x218 __arm64_sys_mmap+0x38/0x58 invoke_syscall+0x50/0x128 el0_svc_common.constprop.0+0x58/0x188 do_el0_svc+0x34/0x50 el0_svc+0x34/0x108 el0t_64_sync_handler+0xb8/0xc0 el0t_64_sync+0x1a4/0x1a8 'rb->aux_pages' allocated by kcalloc() is a pointer array which is used to maintains AUX trace pages. The allocated page for this array is physically contiguous (and virtually contiguous) with an order of 0..MAX_ORDER. If the size of pointer array crosses the limitation set by MAX_ORDER, it reveals a WARNING. So bail out early with -ENOMEM if the request AUX area is out of bound, e.g.: #perf record -C 0 -m ,4G -e arm_spe_0// -- sleep 1 failed to mmap with 12 (Cannot allocate memory) Signed-off-by: Shuai Xue <xueshuai(a)linux.alibaba.com> Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> Signed-off-by: Ingo Molnar <mingo(a)kernel.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- kernel/events/ring_buffer.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index f40da32f5e753..6808873555f0d 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -696,6 +696,12 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event, watermark = 0; } + /* + * kcalloc_node() is unable to allocate buffer if the size is larger + * than: PAGE_SIZE << MAX_ORDER; directly bail out in this case. + */ + if (get_order((unsigned long)nr_pages * sizeof(void *)) > MAX_ORDER) + return -ENOMEM; rb->aux_pages = kcalloc_node(nr_pages, sizeof(void *), GFP_KERNEL, node); if (!rb->aux_pages) -- 2.42.0

2 years, 1 month

1
5
0 0

[PATCH AUTOSEL 6.1 01/11] perf/core: Bail out early if the request AUX area is out of bound

by Sasha Levin

From: Shuai Xue <xueshuai(a)linux.alibaba.com> [ Upstream commit 54aee5f15b83437f23b2b2469bcf21bdd9823916 ] When perf-record with a large AUX area, e.g 4GB, it fails with: #perf record -C 0 -m ,4G -e arm_spe_0// -- sleep 1 failed to mmap with 12 (Cannot allocate memory) and it reveals a WARNING with __alloc_pages(): ------------[ cut here ]------------ WARNING: CPU: 44 PID: 17573 at mm/page_alloc.c:5568 __alloc_pages+0x1ec/0x248 Call trace: __alloc_pages+0x1ec/0x248 __kmalloc_large_node+0xc0/0x1f8 __kmalloc_node+0x134/0x1e8 rb_alloc_aux+0xe0/0x298 perf_mmap+0x440/0x660 mmap_region+0x308/0x8a8 do_mmap+0x3c0/0x528 vm_mmap_pgoff+0xf4/0x1b8 ksys_mmap_pgoff+0x18c/0x218 __arm64_sys_mmap+0x38/0x58 invoke_syscall+0x50/0x128 el0_svc_common.constprop.0+0x58/0x188 do_el0_svc+0x34/0x50 el0_svc+0x34/0x108 el0t_64_sync_handler+0xb8/0xc0 el0t_64_sync+0x1a4/0x1a8 'rb->aux_pages' allocated by kcalloc() is a pointer array which is used to maintains AUX trace pages. The allocated page for this array is physically contiguous (and virtually contiguous) with an order of 0..MAX_ORDER. If the size of pointer array crosses the limitation set by MAX_ORDER, it reveals a WARNING. So bail out early with -ENOMEM if the request AUX area is out of bound, e.g.: #perf record -C 0 -m ,4G -e arm_spe_0// -- sleep 1 failed to mmap with 12 (Cannot allocate memory) Signed-off-by: Shuai Xue <xueshuai(a)linux.alibaba.com> Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> Signed-off-by: Ingo Molnar <mingo(a)kernel.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- kernel/events/ring_buffer.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 273a0fe7910a5..45965f13757e4 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -699,6 +699,12 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event, watermark = 0; } + /* + * kcalloc_node() is unable to allocate buffer if the size is larger + * than: PAGE_SIZE << MAX_ORDER; directly bail out in this case. + */ + if (get_order((unsigned long)nr_pages * sizeof(void *)) > MAX_ORDER) + return -ENOMEM; rb->aux_pages = kcalloc_node(nr_pages, sizeof(void *), GFP_KERNEL, node); if (!rb->aux_pages) -- 2.42.0

2 years, 1 month

1
10
0 0

[PATCH AUTOSEL 6.5 01/13] perf/core: Bail out early if the request AUX area is out of bound

by Sasha Levin

From: Shuai Xue <xueshuai(a)linux.alibaba.com> [ Upstream commit 54aee5f15b83437f23b2b2469bcf21bdd9823916 ] When perf-record with a large AUX area, e.g 4GB, it fails with: #perf record -C 0 -m ,4G -e arm_spe_0// -- sleep 1 failed to mmap with 12 (Cannot allocate memory) and it reveals a WARNING with __alloc_pages(): ------------[ cut here ]------------ WARNING: CPU: 44 PID: 17573 at mm/page_alloc.c:5568 __alloc_pages+0x1ec/0x248 Call trace: __alloc_pages+0x1ec/0x248 __kmalloc_large_node+0xc0/0x1f8 __kmalloc_node+0x134/0x1e8 rb_alloc_aux+0xe0/0x298 perf_mmap+0x440/0x660 mmap_region+0x308/0x8a8 do_mmap+0x3c0/0x528 vm_mmap_pgoff+0xf4/0x1b8 ksys_mmap_pgoff+0x18c/0x218 __arm64_sys_mmap+0x38/0x58 invoke_syscall+0x50/0x128 el0_svc_common.constprop.0+0x58/0x188 do_el0_svc+0x34/0x50 el0_svc+0x34/0x108 el0t_64_sync_handler+0xb8/0xc0 el0t_64_sync+0x1a4/0x1a8 'rb->aux_pages' allocated by kcalloc() is a pointer array which is used to maintains AUX trace pages. The allocated page for this array is physically contiguous (and virtually contiguous) with an order of 0..MAX_ORDER. If the size of pointer array crosses the limitation set by MAX_ORDER, it reveals a WARNING. So bail out early with -ENOMEM if the request AUX area is out of bound, e.g.: #perf record -C 0 -m ,4G -e arm_spe_0// -- sleep 1 failed to mmap with 12 (Cannot allocate memory) Signed-off-by: Shuai Xue <xueshuai(a)linux.alibaba.com> Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org> Signed-off-by: Ingo Molnar <mingo(a)kernel.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- kernel/events/ring_buffer.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index a0433f37b0243..4a260ceed9c73 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -699,6 +699,12 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event, watermark = 0; } + /* + * kcalloc_node() is unable to allocate buffer if the size is larger + * than: PAGE_SIZE << MAX_ORDER; directly bail out in this case. + */ + if (get_order((unsigned long)nr_pages * sizeof(void *)) > MAX_ORDER) + return -ENOMEM; rb->aux_pages = kcalloc_node(nr_pages, sizeof(void *), GFP_KERNEL, node); if (!rb->aux_pages) -- 2.42.0

2 years, 1 month

1
12
0 0

[PATCH AUTOSEL 4.14] locking/ww_mutex/test: Fix potential workqueue corruption

by Sasha Levin

From: John Stultz <jstultz(a)google.com> [ Upstream commit bccdd808902f8c677317cec47c306e42b93b849e ] In some cases running with the test-ww_mutex code, I was seeing odd behavior where sometimes it seemed flush_workqueue was returning before all the work threads were finished. Often this would cause strange crashes as the mutexes would be freed while they were being used. Looking at the code, there is a lifetime problem as the controlling thread that spawns the work allocates the "struct stress" structures that are passed to the workqueue threads. Then when the workqueue threads are finished, they free the stress struct that was passed to them. Unfortunately the workqueue work_struct node is in the stress struct. Which means the work_struct is freed before the work thread returns and while flush_workqueue is waiting. It seems like a better idea to have the controlling thread both allocate and free the stress structures, so that we can be sure we don't corrupt the workqueue by freeing the structure prematurely. So this patch reworks the test to do so, and with this change I no longer see the early flush_workqueue returns. Signed-off-by: John Stultz <jstultz(a)google.com> Signed-off-by: Ingo Molnar <mingo(a)kernel.org> Link: https://lore.kernel.org/r/20230922043616.19282-3-jstultz@google.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- kernel/locking/test-ww_mutex.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 654977862b06b..8489a01f943e8 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -439,7 +439,6 @@ static void stress_inorder_work(struct work_struct *work) } while (!time_after(jiffies, stress->timeout)); kfree(order); - kfree(stress); } struct reorder_lock { @@ -504,7 +503,6 @@ static void stress_reorder_work(struct work_struct *work) list_for_each_entry_safe(ll, ln, &locks, link) kfree(ll); kfree(order); - kfree(stress); } static void stress_one_work(struct work_struct *work) @@ -525,8 +523,6 @@ static void stress_one_work(struct work_struct *work) break; } } while (!time_after(jiffies, stress->timeout)); - - kfree(stress); } #define STRESS_INORDER BIT(0) @@ -537,15 +533,24 @@ static void stress_one_work(struct work_struct *work) static int stress(int nlocks, int nthreads, unsigned int flags) { struct ww_mutex *locks; - int n; + struct stress *stress_array; + int n, count; locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL); if (!locks) return -ENOMEM; + stress_array = kmalloc_array(nthreads, sizeof(*stress_array), + GFP_KERNEL); + if (!stress_array) { + kfree(locks); + return -ENOMEM; + } + for (n = 0; n < nlocks; n++) ww_mutex_init(&locks[n], &ww_class); + count = 0; for (n = 0; nthreads; n++) { struct stress *stress; void (*fn)(struct work_struct *work); @@ -569,9 +574,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) if (!fn) continue; - stress = kmalloc(sizeof(*stress), GFP_KERNEL); - if (!stress) - break; + stress = &stress_array[count++]; INIT_WORK(&stress->work, fn); stress->locks = locks; @@ -586,6 +589,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) for (n = 0; n < nlocks; n++) ww_mutex_destroy(&locks[n]); + kfree(stress_array); kfree(locks); return 0; -- 2.42.0

2 years, 1 month

1
0
0 0

[PATCH AUTOSEL 4.19] locking/ww_mutex/test: Fix potential workqueue corruption

by Sasha Levin

From: John Stultz <jstultz(a)google.com> [ Upstream commit bccdd808902f8c677317cec47c306e42b93b849e ] In some cases running with the test-ww_mutex code, I was seeing odd behavior where sometimes it seemed flush_workqueue was returning before all the work threads were finished. Often this would cause strange crashes as the mutexes would be freed while they were being used. Looking at the code, there is a lifetime problem as the controlling thread that spawns the work allocates the "struct stress" structures that are passed to the workqueue threads. Then when the workqueue threads are finished, they free the stress struct that was passed to them. Unfortunately the workqueue work_struct node is in the stress struct. Which means the work_struct is freed before the work thread returns and while flush_workqueue is waiting. It seems like a better idea to have the controlling thread both allocate and free the stress structures, so that we can be sure we don't corrupt the workqueue by freeing the structure prematurely. So this patch reworks the test to do so, and with this change I no longer see the early flush_workqueue returns. Signed-off-by: John Stultz <jstultz(a)google.com> Signed-off-by: Ingo Molnar <mingo(a)kernel.org> Link: https://lore.kernel.org/r/20230922043616.19282-3-jstultz@google.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- kernel/locking/test-ww_mutex.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 65a3b7e55b9fc..4fd05d9d5d6d1 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -439,7 +439,6 @@ static void stress_inorder_work(struct work_struct *work) } while (!time_after(jiffies, stress->timeout)); kfree(order); - kfree(stress); } struct reorder_lock { @@ -504,7 +503,6 @@ static void stress_reorder_work(struct work_struct *work) list_for_each_entry_safe(ll, ln, &locks, link) kfree(ll); kfree(order); - kfree(stress); } static void stress_one_work(struct work_struct *work) @@ -525,8 +523,6 @@ static void stress_one_work(struct work_struct *work) break; } } while (!time_after(jiffies, stress->timeout)); - - kfree(stress); } #define STRESS_INORDER BIT(0) @@ -537,15 +533,24 @@ static void stress_one_work(struct work_struct *work) static int stress(int nlocks, int nthreads, unsigned int flags) { struct ww_mutex *locks; - int n; + struct stress *stress_array; + int n, count; locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL); if (!locks) return -ENOMEM; + stress_array = kmalloc_array(nthreads, sizeof(*stress_array), + GFP_KERNEL); + if (!stress_array) { + kfree(locks); + return -ENOMEM; + } + for (n = 0; n < nlocks; n++) ww_mutex_init(&locks[n], &ww_class); + count = 0; for (n = 0; nthreads; n++) { struct stress *stress; void (*fn)(struct work_struct *work); @@ -569,9 +574,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) if (!fn) continue; - stress = kmalloc(sizeof(*stress), GFP_KERNEL); - if (!stress) - break; + stress = &stress_array[count++]; INIT_WORK(&stress->work, fn); stress->locks = locks; @@ -586,6 +589,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) for (n = 0; n < nlocks; n++) ww_mutex_destroy(&locks[n]); + kfree(stress_array); kfree(locks); return 0; -- 2.42.0

2 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.4] locking/ww_mutex/test: Fix potential workqueue corruption

by Sasha Levin

From: John Stultz <jstultz(a)google.com> [ Upstream commit bccdd808902f8c677317cec47c306e42b93b849e ] In some cases running with the test-ww_mutex code, I was seeing odd behavior where sometimes it seemed flush_workqueue was returning before all the work threads were finished. Often this would cause strange crashes as the mutexes would be freed while they were being used. Looking at the code, there is a lifetime problem as the controlling thread that spawns the work allocates the "struct stress" structures that are passed to the workqueue threads. Then when the workqueue threads are finished, they free the stress struct that was passed to them. Unfortunately the workqueue work_struct node is in the stress struct. Which means the work_struct is freed before the work thread returns and while flush_workqueue is waiting. It seems like a better idea to have the controlling thread both allocate and free the stress structures, so that we can be sure we don't corrupt the workqueue by freeing the structure prematurely. So this patch reworks the test to do so, and with this change I no longer see the early flush_workqueue returns. Signed-off-by: John Stultz <jstultz(a)google.com> Signed-off-by: Ingo Molnar <mingo(a)kernel.org> Link: https://lore.kernel.org/r/20230922043616.19282-3-jstultz@google.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- kernel/locking/test-ww_mutex.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 3e82f449b4ff7..da36997d8742c 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -426,7 +426,6 @@ static void stress_inorder_work(struct work_struct *work) } while (!time_after(jiffies, stress->timeout)); kfree(order); - kfree(stress); } struct reorder_lock { @@ -491,7 +490,6 @@ static void stress_reorder_work(struct work_struct *work) list_for_each_entry_safe(ll, ln, &locks, link) kfree(ll); kfree(order); - kfree(stress); } static void stress_one_work(struct work_struct *work) @@ -512,8 +510,6 @@ static void stress_one_work(struct work_struct *work) break; } } while (!time_after(jiffies, stress->timeout)); - - kfree(stress); } #define STRESS_INORDER BIT(0) @@ -524,15 +520,24 @@ static void stress_one_work(struct work_struct *work) static int stress(int nlocks, int nthreads, unsigned int flags) { struct ww_mutex *locks; - int n; + struct stress *stress_array; + int n, count; locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL); if (!locks) return -ENOMEM; + stress_array = kmalloc_array(nthreads, sizeof(*stress_array), + GFP_KERNEL); + if (!stress_array) { + kfree(locks); + return -ENOMEM; + } + for (n = 0; n < nlocks; n++) ww_mutex_init(&locks[n], &ww_class); + count = 0; for (n = 0; nthreads; n++) { struct stress *stress; void (*fn)(struct work_struct *work); @@ -556,9 +561,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) if (!fn) continue; - stress = kmalloc(sizeof(*stress), GFP_KERNEL); - if (!stress) - break; + stress = &stress_array[count++]; INIT_WORK(&stress->work, fn); stress->locks = locks; @@ -573,6 +576,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) for (n = 0; n < nlocks; n++) ww_mutex_destroy(&locks[n]); + kfree(stress_array); kfree(locks); return 0; -- 2.42.0

2 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.10] locking/ww_mutex/test: Fix potential workqueue corruption

by Sasha Levin

From: John Stultz <jstultz(a)google.com> [ Upstream commit bccdd808902f8c677317cec47c306e42b93b849e ] In some cases running with the test-ww_mutex code, I was seeing odd behavior where sometimes it seemed flush_workqueue was returning before all the work threads were finished. Often this would cause strange crashes as the mutexes would be freed while they were being used. Looking at the code, there is a lifetime problem as the controlling thread that spawns the work allocates the "struct stress" structures that are passed to the workqueue threads. Then when the workqueue threads are finished, they free the stress struct that was passed to them. Unfortunately the workqueue work_struct node is in the stress struct. Which means the work_struct is freed before the work thread returns and while flush_workqueue is waiting. It seems like a better idea to have the controlling thread both allocate and free the stress structures, so that we can be sure we don't corrupt the workqueue by freeing the structure prematurely. So this patch reworks the test to do so, and with this change I no longer see the early flush_workqueue returns. Signed-off-by: John Stultz <jstultz(a)google.com> Signed-off-by: Ingo Molnar <mingo(a)kernel.org> Link: https://lore.kernel.org/r/20230922043616.19282-3-jstultz@google.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- kernel/locking/test-ww_mutex.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 3e82f449b4ff7..da36997d8742c 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -426,7 +426,6 @@ static void stress_inorder_work(struct work_struct *work) } while (!time_after(jiffies, stress->timeout)); kfree(order); - kfree(stress); } struct reorder_lock { @@ -491,7 +490,6 @@ static void stress_reorder_work(struct work_struct *work) list_for_each_entry_safe(ll, ln, &locks, link) kfree(ll); kfree(order); - kfree(stress); } static void stress_one_work(struct work_struct *work) @@ -512,8 +510,6 @@ static void stress_one_work(struct work_struct *work) break; } } while (!time_after(jiffies, stress->timeout)); - - kfree(stress); } #define STRESS_INORDER BIT(0) @@ -524,15 +520,24 @@ static void stress_one_work(struct work_struct *work) static int stress(int nlocks, int nthreads, unsigned int flags) { struct ww_mutex *locks; - int n; + struct stress *stress_array; + int n, count; locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL); if (!locks) return -ENOMEM; + stress_array = kmalloc_array(nthreads, sizeof(*stress_array), + GFP_KERNEL); + if (!stress_array) { + kfree(locks); + return -ENOMEM; + } + for (n = 0; n < nlocks; n++) ww_mutex_init(&locks[n], &ww_class); + count = 0; for (n = 0; nthreads; n++) { struct stress *stress; void (*fn)(struct work_struct *work); @@ -556,9 +561,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) if (!fn) continue; - stress = kmalloc(sizeof(*stress), GFP_KERNEL); - if (!stress) - break; + stress = &stress_array[count++]; INIT_WORK(&stress->work, fn); stress->locks = locks; @@ -573,6 +576,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) for (n = 0; n < nlocks; n++) ww_mutex_destroy(&locks[n]); + kfree(stress_array); kfree(locks); return 0; -- 2.42.0

2 years, 1 month

1
0
0 0

[PATCH AUTOSEL 5.15] locking/ww_mutex/test: Fix potential workqueue corruption

by Sasha Levin

From: John Stultz <jstultz(a)google.com> [ Upstream commit bccdd808902f8c677317cec47c306e42b93b849e ] In some cases running with the test-ww_mutex code, I was seeing odd behavior where sometimes it seemed flush_workqueue was returning before all the work threads were finished. Often this would cause strange crashes as the mutexes would be freed while they were being used. Looking at the code, there is a lifetime problem as the controlling thread that spawns the work allocates the "struct stress" structures that are passed to the workqueue threads. Then when the workqueue threads are finished, they free the stress struct that was passed to them. Unfortunately the workqueue work_struct node is in the stress struct. Which means the work_struct is freed before the work thread returns and while flush_workqueue is waiting. It seems like a better idea to have the controlling thread both allocate and free the stress structures, so that we can be sure we don't corrupt the workqueue by freeing the structure prematurely. So this patch reworks the test to do so, and with this change I no longer see the early flush_workqueue returns. Signed-off-by: John Stultz <jstultz(a)google.com> Signed-off-by: Ingo Molnar <mingo(a)kernel.org> Link: https://lore.kernel.org/r/20230922043616.19282-3-jstultz@google.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- kernel/locking/test-ww_mutex.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 3e82f449b4ff7..da36997d8742c 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -426,7 +426,6 @@ static void stress_inorder_work(struct work_struct *work) } while (!time_after(jiffies, stress->timeout)); kfree(order); - kfree(stress); } struct reorder_lock { @@ -491,7 +490,6 @@ static void stress_reorder_work(struct work_struct *work) list_for_each_entry_safe(ll, ln, &locks, link) kfree(ll); kfree(order); - kfree(stress); } static void stress_one_work(struct work_struct *work) @@ -512,8 +510,6 @@ static void stress_one_work(struct work_struct *work) break; } } while (!time_after(jiffies, stress->timeout)); - - kfree(stress); } #define STRESS_INORDER BIT(0) @@ -524,15 +520,24 @@ static void stress_one_work(struct work_struct *work) static int stress(int nlocks, int nthreads, unsigned int flags) { struct ww_mutex *locks; - int n; + struct stress *stress_array; + int n, count; locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL); if (!locks) return -ENOMEM; + stress_array = kmalloc_array(nthreads, sizeof(*stress_array), + GFP_KERNEL); + if (!stress_array) { + kfree(locks); + return -ENOMEM; + } + for (n = 0; n < nlocks; n++) ww_mutex_init(&locks[n], &ww_class); + count = 0; for (n = 0; nthreads; n++) { struct stress *stress; void (*fn)(struct work_struct *work); @@ -556,9 +561,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) if (!fn) continue; - stress = kmalloc(sizeof(*stress), GFP_KERNEL); - if (!stress) - break; + stress = &stress_array[count++]; INIT_WORK(&stress->work, fn); stress->locks = locks; @@ -573,6 +576,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) for (n = 0; n < nlocks; n++) ww_mutex_destroy(&locks[n]); + kfree(stress_array); kfree(locks); return 0; -- 2.42.0

2 years, 1 month

1
0
0 0

[PATCH AUTOSEL 6.1 1/2] locking/ww_mutex/test: Fix potential workqueue corruption

by Sasha Levin

From: John Stultz <jstultz(a)google.com> [ Upstream commit bccdd808902f8c677317cec47c306e42b93b849e ] In some cases running with the test-ww_mutex code, I was seeing odd behavior where sometimes it seemed flush_workqueue was returning before all the work threads were finished. Often this would cause strange crashes as the mutexes would be freed while they were being used. Looking at the code, there is a lifetime problem as the controlling thread that spawns the work allocates the "struct stress" structures that are passed to the workqueue threads. Then when the workqueue threads are finished, they free the stress struct that was passed to them. Unfortunately the workqueue work_struct node is in the stress struct. Which means the work_struct is freed before the work thread returns and while flush_workqueue is waiting. It seems like a better idea to have the controlling thread both allocate and free the stress structures, so that we can be sure we don't corrupt the workqueue by freeing the structure prematurely. So this patch reworks the test to do so, and with this change I no longer see the early flush_workqueue returns. Signed-off-by: John Stultz <jstultz(a)google.com> Signed-off-by: Ingo Molnar <mingo(a)kernel.org> Link: https://lore.kernel.org/r/20230922043616.19282-3-jstultz@google.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- kernel/locking/test-ww_mutex.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 43efb2a041602..b1e25695185a4 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -466,7 +466,6 @@ static void stress_inorder_work(struct work_struct *work) } while (!time_after(jiffies, stress->timeout)); kfree(order); - kfree(stress); } struct reorder_lock { @@ -531,7 +530,6 @@ static void stress_reorder_work(struct work_struct *work) list_for_each_entry_safe(ll, ln, &locks, link) kfree(ll); kfree(order); - kfree(stress); } static void stress_one_work(struct work_struct *work) @@ -552,8 +550,6 @@ static void stress_one_work(struct work_struct *work) break; } } while (!time_after(jiffies, stress->timeout)); - - kfree(stress); } #define STRESS_INORDER BIT(0) @@ -564,15 +560,24 @@ static void stress_one_work(struct work_struct *work) static int stress(int nlocks, int nthreads, unsigned int flags) { struct ww_mutex *locks; - int n; + struct stress *stress_array; + int n, count; locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL); if (!locks) return -ENOMEM; + stress_array = kmalloc_array(nthreads, sizeof(*stress_array), + GFP_KERNEL); + if (!stress_array) { + kfree(locks); + return -ENOMEM; + } + for (n = 0; n < nlocks; n++) ww_mutex_init(&locks[n], &ww_class); + count = 0; for (n = 0; nthreads; n++) { struct stress *stress; void (*fn)(struct work_struct *work); @@ -596,9 +601,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) if (!fn) continue; - stress = kmalloc(sizeof(*stress), GFP_KERNEL); - if (!stress) - break; + stress = &stress_array[count++]; INIT_WORK(&stress->work, fn); stress->locks = locks; @@ -613,6 +616,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) for (n = 0; n < nlocks; n++) ww_mutex_destroy(&locks[n]); + kfree(stress_array); kfree(locks); return 0; -- 2.42.0

2 years, 1 month

1
1
0 0

[PATCH AUTOSEL 6.5 1/5] locking/ww_mutex/test: Fix potential workqueue corruption

by Sasha Levin

From: John Stultz <jstultz(a)google.com> [ Upstream commit bccdd808902f8c677317cec47c306e42b93b849e ] In some cases running with the test-ww_mutex code, I was seeing odd behavior where sometimes it seemed flush_workqueue was returning before all the work threads were finished. Often this would cause strange crashes as the mutexes would be freed while they were being used. Looking at the code, there is a lifetime problem as the controlling thread that spawns the work allocates the "struct stress" structures that are passed to the workqueue threads. Then when the workqueue threads are finished, they free the stress struct that was passed to them. Unfortunately the workqueue work_struct node is in the stress struct. Which means the work_struct is freed before the work thread returns and while flush_workqueue is waiting. It seems like a better idea to have the controlling thread both allocate and free the stress structures, so that we can be sure we don't corrupt the workqueue by freeing the structure prematurely. So this patch reworks the test to do so, and with this change I no longer see the early flush_workqueue returns. Signed-off-by: John Stultz <jstultz(a)google.com> Signed-off-by: Ingo Molnar <mingo(a)kernel.org> Link: https://lore.kernel.org/r/20230922043616.19282-3-jstultz@google.com Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- kernel/locking/test-ww_mutex.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 93cca6e698600..7c5a8f05497f2 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -466,7 +466,6 @@ static void stress_inorder_work(struct work_struct *work) } while (!time_after(jiffies, stress->timeout)); kfree(order); - kfree(stress); } struct reorder_lock { @@ -531,7 +530,6 @@ static void stress_reorder_work(struct work_struct *work) list_for_each_entry_safe(ll, ln, &locks, link) kfree(ll); kfree(order); - kfree(stress); } static void stress_one_work(struct work_struct *work) @@ -552,8 +550,6 @@ static void stress_one_work(struct work_struct *work) break; } } while (!time_after(jiffies, stress->timeout)); - - kfree(stress); } #define STRESS_INORDER BIT(0) @@ -564,15 +560,24 @@ static void stress_one_work(struct work_struct *work) static int stress(int nlocks, int nthreads, unsigned int flags) { struct ww_mutex *locks; - int n; + struct stress *stress_array; + int n, count; locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL); if (!locks) return -ENOMEM; + stress_array = kmalloc_array(nthreads, sizeof(*stress_array), + GFP_KERNEL); + if (!stress_array) { + kfree(locks); + return -ENOMEM; + } + for (n = 0; n < nlocks; n++) ww_mutex_init(&locks[n], &ww_class); + count = 0; for (n = 0; nthreads; n++) { struct stress *stress; void (*fn)(struct work_struct *work); @@ -596,9 +601,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) if (!fn) continue; - stress = kmalloc(sizeof(*stress), GFP_KERNEL); - if (!stress) - break; + stress = &stress_array[count++]; INIT_WORK(&stress->work, fn); stress->locks = locks; @@ -613,6 +616,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags) for (n = 0; n < nlocks; n++) ww_mutex_destroy(&locks[n]); + kfree(stress_array); kfree(locks); return 0; -- 2.42.0

2 years, 1 month

1
4
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror November 2023