The patch titled
Subject: mm,oom_reaper: avoid run queue_oom_reaper if task is not oom
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mmoom_reaper-avoid-run-queue_oom_reaper-if-task-is-not-oom.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: gaoxu <gaoxu2(a)hihonor.com>
Subject: mm,oom_reaper: avoid run queue_oom_reaper if task is not oom
Date: Wed, 22 Nov 2023 12:46:44 +0000
queue_oom_reaper() tests and sets tsk->signal->oom_mm->flags. However, it
is necessary to check if 'tsk' is an OOM victim before executing
'queue_oom_reaper' because the variable may be NULL.
We encountered such an issue, and the log is as follows:
[3701:11_see]Out of memory: Killed process 3154 (system_server)
total-vm:23662044kB, anon-rss:0kB, file-rss:0kB, shmem-rss:0kB,
UID:1000 pgtables:4056kB oom_score_adj:-900
[3701:11_see][RB/E]rb_sreason_str_set: sreason_str set null_pointer
[3701:11_see][RB/E]rb_sreason_str_set: sreason_str set unknown_addr
[3701:11_see]Unable to handle kernel NULL pointer dereference at virtual
address 0000000000000328
[3701:11_see]user pgtable: 4k pages, 39-bit VAs, pgdp=3D00000000821de000
[3701:11_see][0000000000000328] pgd=3D0000000000000000,
p4d=3D0000000000000000,pud=3D0000000000000000
[3701:11_see]tracing off
[3701:11_see]Internal error: Oops: 96000005 [#1] PREEMPT SMP
[3701:11_see]Call trace:
[3701:11_see] queue_oom_reaper+0x30/0x170
[3701:11_see] __oom_kill_process+0x590/0x860
[3701:11_see] oom_kill_process+0x140/0x274
[3701:11_see] out_of_memory+0x2f4/0x54c
[3701:11_see] __alloc_pages_slowpath+0x5d8/0xaac
[3701:11_see] __alloc_pages+0x774/0x800
[3701:11_see] wp_page_copy+0xc4/0x116c
[3701:11_see] do_wp_page+0x4bc/0x6fc
[3701:11_see] handle_pte_fault+0x98/0x2a8
[3701:11_see] __handle_mm_fault+0x368/0x700
[3701:11_see] do_handle_mm_fault+0x160/0x2cc
[3701:11_see] do_page_fault+0x3e0/0x818
[3701:11_see] do_mem_abort+0x68/0x17c
[3701:11_see] el0_da+0x3c/0xa0
[3701:11_see] el0t_64_sync_handler+0xc4/0xec
[3701:11_see] el0t_64_sync+0x1b4/0x1b8
[3701:11_see]tracing off
Link: https://lkml.kernel.org/r/400d13bddb524ef6af37cb2220808c75@hihonor.com
Signed-off-by: Gao Xu <gaoxu2(a)hihonor.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/oom_kill.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/oom_kill.c~mmoom_reaper-avoid-run-queue_oom_reaper-if-task-is-not-oom
+++ a/mm/oom_kill.c
@@ -984,7 +984,7 @@ static void __oom_kill_process(struct ta
}
rcu_read_unlock();
- if (can_oom_reap)
+ if (can_oom_reap && tsk_is_oom_victim(victim))
queue_oom_reaper(victim);
mmdrop(mm);
_
Patches currently in -mm which might be from gaoxu2(a)hihonor.com are
mmoom_reaper-avoid-run-queue_oom_reaper-if-task-is-not-oom.patch
Not all LJCA chips implement SPI and on chips without SPI reading
the SPI descriptors will timeout.
On laptop models like the Dell Latitude 9420, this is expected behavior
and not an error.
Modify the driver to continue without instantiating a SPI auxbus child,
instead of failing to probe() the whole LJCA chip.
Fixes: acd6199f195d ("usb: Add support for Intel LJCA device")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
Reviewed-by: Wentong Wu <wentong.wu(a)intel.com>
Link: https://lore.kernel.org/r/20231104175104.38786-1-hdegoede@redhat.com
---
Changes in v3:
- Fix commit-id in fixes tag
Changes in v2:
- Small commit msg + comment fixes
- Add Fixes tag + Cc: stable
---
drivers/usb/misc/usb-ljca.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c
index c9decd0396d4..a280d3a54b18 100644
--- a/drivers/usb/misc/usb-ljca.c
+++ b/drivers/usb/misc/usb-ljca.c
@@ -656,10 +656,11 @@ static int ljca_enumerate_spi(struct ljca_adapter *adap)
unsigned int i;
int ret;
+ /* Not all LJCA chips implement SPI, a timeout reading the descriptors is normal */
ret = ljca_send(adap, LJCA_CLIENT_MNG, LJCA_MNG_ENUM_SPI, NULL, 0, buf,
sizeof(buf), true, LJCA_ENUM_CLIENT_TIMEOUT_MS);
if (ret < 0)
- return ret;
+ return (ret == -ETIMEDOUT) ? 0 : ret;
/* check firmware response */
desc = (struct ljca_spi_descriptor *)buf;
--
2.41.0
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x cca42bd8eb1b54a4c9bbf48c79d120e66619a3e4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023112203-roping-darling-f64a@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
cca42bd8eb1b ("rcutorture: Fix stuttering races and other issues")
10af43671e8b ("torture: Move stutter_wait() timeouts to hrtimers")
5d248bb39fe1 ("torture: Add lock_torture writer_fifo module parameter")
f8619c300f49 ("locktorture: Add long_hold to adjust lock-hold delays")
45bcf0bd8cbe ("locktorture: With nested locks, occasionally skip main lock")
b63343207da2 ("locktorture: Add nested_[un]lock() hooks and nlocks parameter")
c24501b24074 ("locktorture: Make the rt_boost factor a tunable")
e01f3a1a589e ("locktorture: Allow non-rtmutex lock types to be boosted")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cca42bd8eb1b54a4c9bbf48c79d120e66619a3e4 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel(a)joelfernandes.org>
Date: Sat, 29 Jul 2023 14:27:31 +0000
Subject: [PATCH] rcutorture: Fix stuttering races and other issues
The stuttering code isn't functioning as expected. Ideally, it should
pause the torture threads for a designated period before resuming. Yet,
it fails to halt the test for the correct duration. Additionally, a race
condition exists, potentially causing the stuttering code to pause for
an extended period if the 'spt' variable is non-zero due to the stutter
orchestration thread's inadequate CPU time.
Moreover, over-stuttering can hinder RCU's progress on TREE07 kernels.
This happens as the stuttering code may run within a softirq due to RCU
callbacks. Consequently, ksoftirqd keeps a CPU busy for several seconds,
thus obstructing RCU's progress. This situation triggers a warning
message in the logs:
[ 2169.481783] rcu_torture_writer: rtort_pipe_count: 9
This warning suggests that an RCU torture object, although invisible to
RCU readers, couldn't make it past the pipe array and be freed -- a
strong indication that there weren't enough grace periods during the
stutter interval.
To address these issues, this patch sets the "stutter end" time to an
absolute point in the future set by the main stutter thread. This is
then used for waiting in stutter_wait(). While the stutter thread still
defines this absolute time, the waiters' waiting logic doesn't rely on
the stutter thread receiving sufficient CPU time to halt the stuttering
as the halting is now self-controlled.
Cc: stable(a)vger.kernel.org
Signed-off-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck(a)kernel.org>
Signed-off-by: Frederic Weisbecker <frederic(a)kernel.org>
diff --git a/kernel/torture.c b/kernel/torture.c
index 6ba62e5993e7..fd353f98162f 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -720,7 +720,7 @@ static void torture_shutdown_cleanup(void)
* suddenly applied to or removed from the system.
*/
static struct task_struct *stutter_task;
-static int stutter_pause_test;
+static ktime_t stutter_till_abs_time;
static int stutter;
static int stutter_gap;
@@ -730,30 +730,16 @@ static int stutter_gap;
*/
bool stutter_wait(const char *title)
{
- unsigned int i = 0;
bool ret = false;
- int spt;
+ ktime_t till_ns;
cond_resched_tasks_rcu_qs();
- spt = READ_ONCE(stutter_pause_test);
- for (; spt; spt = READ_ONCE(stutter_pause_test)) {
- if (!ret && !rt_task(current)) {
- sched_set_normal(current, MAX_NICE);
- ret = true;
- }
- if (spt == 1) {
- torture_hrtimeout_jiffies(1, NULL);
- } else if (spt == 2) {
- while (READ_ONCE(stutter_pause_test)) {
- if (!(i++ & 0xffff))
- torture_hrtimeout_us(10, 0, NULL);
- cond_resched();
- }
- } else {
- torture_hrtimeout_jiffies(round_jiffies_relative(HZ), NULL);
- }
- torture_shutdown_absorb(title);
+ till_ns = READ_ONCE(stutter_till_abs_time);
+ if (till_ns && ktime_before(ktime_get(), till_ns)) {
+ torture_hrtimeout_ns(till_ns, 0, HRTIMER_MODE_ABS, NULL);
+ ret = true;
}
+ torture_shutdown_absorb(title);
return ret;
}
EXPORT_SYMBOL_GPL(stutter_wait);
@@ -764,23 +750,16 @@ EXPORT_SYMBOL_GPL(stutter_wait);
*/
static int torture_stutter(void *arg)
{
- DEFINE_TORTURE_RANDOM(rand);
- int wtime;
+ ktime_t till_ns;
VERBOSE_TOROUT_STRING("torture_stutter task started");
do {
if (!torture_must_stop() && stutter > 1) {
- wtime = stutter;
- if (stutter > 2) {
- WRITE_ONCE(stutter_pause_test, 1);
- wtime = stutter - 3;
- torture_hrtimeout_jiffies(wtime, &rand);
- wtime = 2;
- }
- WRITE_ONCE(stutter_pause_test, 2);
- torture_hrtimeout_jiffies(wtime, NULL);
+ till_ns = ktime_add_ns(ktime_get(),
+ jiffies_to_nsecs(stutter));
+ WRITE_ONCE(stutter_till_abs_time, till_ns);
+ torture_hrtimeout_jiffies(stutter - 1, NULL);
}
- WRITE_ONCE(stutter_pause_test, 0);
if (!torture_must_stop())
torture_hrtimeout_jiffies(stutter_gap, NULL);
torture_shutdown_absorb("torture_stutter");
The patch below does not apply to the 6.5-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.5.y
git checkout FETCH_HEAD
git cherry-pick -x cca42bd8eb1b54a4c9bbf48c79d120e66619a3e4
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023112201-parakeet-trustless-649e@gregkh' --subject-prefix 'PATCH 6.5.y' HEAD^..
Possible dependencies:
cca42bd8eb1b ("rcutorture: Fix stuttering races and other issues")
10af43671e8b ("torture: Move stutter_wait() timeouts to hrtimers")
5d248bb39fe1 ("torture: Add lock_torture writer_fifo module parameter")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cca42bd8eb1b54a4c9bbf48c79d120e66619a3e4 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel(a)joelfernandes.org>
Date: Sat, 29 Jul 2023 14:27:31 +0000
Subject: [PATCH] rcutorture: Fix stuttering races and other issues
The stuttering code isn't functioning as expected. Ideally, it should
pause the torture threads for a designated period before resuming. Yet,
it fails to halt the test for the correct duration. Additionally, a race
condition exists, potentially causing the stuttering code to pause for
an extended period if the 'spt' variable is non-zero due to the stutter
orchestration thread's inadequate CPU time.
Moreover, over-stuttering can hinder RCU's progress on TREE07 kernels.
This happens as the stuttering code may run within a softirq due to RCU
callbacks. Consequently, ksoftirqd keeps a CPU busy for several seconds,
thus obstructing RCU's progress. This situation triggers a warning
message in the logs:
[ 2169.481783] rcu_torture_writer: rtort_pipe_count: 9
This warning suggests that an RCU torture object, although invisible to
RCU readers, couldn't make it past the pipe array and be freed -- a
strong indication that there weren't enough grace periods during the
stutter interval.
To address these issues, this patch sets the "stutter end" time to an
absolute point in the future set by the main stutter thread. This is
then used for waiting in stutter_wait(). While the stutter thread still
defines this absolute time, the waiters' waiting logic doesn't rely on
the stutter thread receiving sufficient CPU time to halt the stuttering
as the halting is now self-controlled.
Cc: stable(a)vger.kernel.org
Signed-off-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck(a)kernel.org>
Signed-off-by: Frederic Weisbecker <frederic(a)kernel.org>
diff --git a/kernel/torture.c b/kernel/torture.c
index 6ba62e5993e7..fd353f98162f 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -720,7 +720,7 @@ static void torture_shutdown_cleanup(void)
* suddenly applied to or removed from the system.
*/
static struct task_struct *stutter_task;
-static int stutter_pause_test;
+static ktime_t stutter_till_abs_time;
static int stutter;
static int stutter_gap;
@@ -730,30 +730,16 @@ static int stutter_gap;
*/
bool stutter_wait(const char *title)
{
- unsigned int i = 0;
bool ret = false;
- int spt;
+ ktime_t till_ns;
cond_resched_tasks_rcu_qs();
- spt = READ_ONCE(stutter_pause_test);
- for (; spt; spt = READ_ONCE(stutter_pause_test)) {
- if (!ret && !rt_task(current)) {
- sched_set_normal(current, MAX_NICE);
- ret = true;
- }
- if (spt == 1) {
- torture_hrtimeout_jiffies(1, NULL);
- } else if (spt == 2) {
- while (READ_ONCE(stutter_pause_test)) {
- if (!(i++ & 0xffff))
- torture_hrtimeout_us(10, 0, NULL);
- cond_resched();
- }
- } else {
- torture_hrtimeout_jiffies(round_jiffies_relative(HZ), NULL);
- }
- torture_shutdown_absorb(title);
+ till_ns = READ_ONCE(stutter_till_abs_time);
+ if (till_ns && ktime_before(ktime_get(), till_ns)) {
+ torture_hrtimeout_ns(till_ns, 0, HRTIMER_MODE_ABS, NULL);
+ ret = true;
}
+ torture_shutdown_absorb(title);
return ret;
}
EXPORT_SYMBOL_GPL(stutter_wait);
@@ -764,23 +750,16 @@ EXPORT_SYMBOL_GPL(stutter_wait);
*/
static int torture_stutter(void *arg)
{
- DEFINE_TORTURE_RANDOM(rand);
- int wtime;
+ ktime_t till_ns;
VERBOSE_TOROUT_STRING("torture_stutter task started");
do {
if (!torture_must_stop() && stutter > 1) {
- wtime = stutter;
- if (stutter > 2) {
- WRITE_ONCE(stutter_pause_test, 1);
- wtime = stutter - 3;
- torture_hrtimeout_jiffies(wtime, &rand);
- wtime = 2;
- }
- WRITE_ONCE(stutter_pause_test, 2);
- torture_hrtimeout_jiffies(wtime, NULL);
+ till_ns = ktime_add_ns(ktime_get(),
+ jiffies_to_nsecs(stutter));
+ WRITE_ONCE(stutter_till_abs_time, till_ns);
+ torture_hrtimeout_jiffies(stutter - 1, NULL);
}
- WRITE_ONCE(stutter_pause_test, 0);
if (!torture_must_stop())
torture_hrtimeout_jiffies(stutter_gap, NULL);
torture_shutdown_absorb("torture_stutter");