Currently userspace can ask for any uint32 size allocation for the
SEV_GET_ID2. Limit this allocation size to the max physically
contiguously allocation: MAX_ORDER.
Reported-by: Andy Nguyen <theflow(a)google.com>
Suggested-by: David Rientjes <rientjes(a)google.com>
Signed-off-by: Peter Gonda <pgonda(a)google.com>
Cc: stable(a)vger.kernel.org
Cc: Herbert Xu <herbert(a)gondor.apana.org.au>
Cc: linux-kernel(a)vger.kernel.org
Cc: linux-crypto(a)vger.kernel.org
Cc: John Allen <john.allen(a)amd.com>
Cc: Thomas.Lendacky <thomas.lendacky(a)amd.com>
---
drivers/crypto/ccp/sev-dev.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 06fc7156c04f..5c16c4406764 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -878,6 +878,10 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp)
if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
return -EFAULT;
+ /* Max length that can be allocated physically contiguously */
+ if (get_order(input.length) >= MAX_ORDER)
+ return -ENOMEM;
+
input_address = (void __user *)input.address;
if (input.address && input.length) {
--
2.39.0.rc1.256.g54fd8350bd-goog
From: "Paul E. McKenney" <paulmck(a)kernel.org>
commit 96017bf9039763a2e02dcc6adaa18592cd73a39d upstream.
Currently, trc_wait_for_one_reader() atomically increments
the trc_n_readers_need_end counter before sending the IPI
invoking trc_read_check_handler(). All failure paths out of
trc_read_check_handler() and also from the smp_call_function_single()
within trc_wait_for_one_reader() must carefully atomically decrement
this counter. This is more complex than it needs to be.
This commit therefore simplifies things and saves a few lines of
code by dispensing with the atomic decrements in favor of having
trc_read_check_handler() do the atomic increment only in the success case.
In theory, this represents no change in functionality.
Signed-off-by: Paul E. McKenney <paulmck(a)kernel.org>
Cc: <stable(a)vger.kernel.org> # 5.15.x
Signed-off-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
---
I confirmed the patch fixes the following splat which happens twice on TRACE02 rcutorture test:
[ 765.941351] WARNING: CPU: 0 PID: 80 at kernel/rcu/tasks.h:895 trc_read_check_handler+0x61/0xe0
[ 765.949880] Modules linked in:
[ 765.953006] CPU: 0 PID: 80 Comm: rcu_torture_rea Not tainted 5.15.86-rc1+ #25
[ 765.959982] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014
[ 765.967964] RIP: 0010:trc_read_check_handler+0x61/0xe0
[ 765.973050] Code: 01 00 89 c0 48 03 2c c5 80 f8 a5 ae c6 45 00 00 [..]
[ 765.991768] RSP: 0000:ffffa64ac0003fb0 EFLAGS: 00010047
[ 765.997042] RAX: ffffffffad4f8610 RBX: ffffa26b41bd3000 RCX: ffffa26b5f4ac8c0
[ 766.004418] RDX: 0000000000000000 RSI: ffffffffae978121 RDI: ffffa26b41bd3000
[ 766.011502] RBP: ffffa26b41bd6000 R08: ffffa26b41bd3000 R09: 0000000000000000
[ 766.018778] R10: 0000000000000000 R11: ffffa64ac0003ff8 R12: 0000000000000000
[ 766.025943] R13: ffffa26b5f4ac8c0 R14: 0000000000000000 R15: 0000000000000000
[ 766.034383] FS: 0000000000000000(0000) GS:ffffa26b5f400000(0000) knlGS:0000000000000000
[ 766.042925] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 766.048775] CR2: 0000000000000000 CR3: 0000000001924000 CR4: 00000000000006f0
[ 766.055991] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 766.063135] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 766.070711] Call Trace:
[ 766.073515] <IRQ>
[ 766.075807] flush_smp_call_function_queue+0xec/0x1a0
[ 766.081087] __sysvec_call_function_single+0x3e/0x1d0
[ 766.086466] sysvec_call_function_single+0x89/0xc0
[ 766.091431] </IRQ>
[ 766.093713] <TASK>
[ 766.095930] asm_sysvec_call_function_single+0x16/0x20
kernel/rcu/tasks.h | 20 +++-----------------
1 file changed, 3 insertions(+), 17 deletions(-)
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index ae8396032b5d..4bd07cc3c0ea 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -892,32 +892,24 @@ static void trc_read_check_handler(void *t_in)
// If the task is no longer running on this CPU, leave.
if (unlikely(texp != t)) {
- if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
- wake_up(&trc_wait);
goto reset_ipi; // Already on holdout list, so will check later.
}
// If the task is not in a read-side critical section, and
// if this is the last reader, awaken the grace-period kthread.
if (likely(!READ_ONCE(t->trc_reader_nesting))) {
- if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
- wake_up(&trc_wait);
- // Mark as checked after decrement to avoid false
- // positives on the above WARN_ON_ONCE().
WRITE_ONCE(t->trc_reader_checked, true);
goto reset_ipi;
}
// If we are racing with an rcu_read_unlock_trace(), try again later.
- if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0)) {
- if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
- wake_up(&trc_wait);
+ if (unlikely(READ_ONCE(t->trc_reader_nesting) < 0))
goto reset_ipi;
- }
WRITE_ONCE(t->trc_reader_checked, true);
// Get here if the task is in a read-side critical section. Set
// its state so that it will awaken the grace-period kthread upon
// exit from that critical section.
+ atomic_inc(&trc_n_readers_need_end); // One more to wait on.
WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs));
WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
@@ -1017,21 +1009,15 @@ static void trc_wait_for_one_reader(struct task_struct *t,
if (per_cpu(trc_ipi_to_cpu, cpu) || t->trc_ipi_to_cpu >= 0)
return;
- atomic_inc(&trc_n_readers_need_end);
per_cpu(trc_ipi_to_cpu, cpu) = true;
t->trc_ipi_to_cpu = cpu;
rcu_tasks_trace.n_ipis++;
- if (smp_call_function_single(cpu,
- trc_read_check_handler, t, 0)) {
+ if (smp_call_function_single(cpu, trc_read_check_handler, t, 0)) {
// Just in case there is some other reason for
// failure than the target CPU being offline.
rcu_tasks_trace.n_ipis_fails++;
per_cpu(trc_ipi_to_cpu, cpu) = false;
t->trc_ipi_to_cpu = cpu;
- if (atomic_dec_and_test(&trc_n_readers_need_end)) {
- WARN_ON_ONCE(1);
- wake_up(&trc_wait);
- }
}
}
}
--
2.39.0.314.g84b9a713c41-goog
Since commit 07ec77a1d4e8 ("sched: Allow task CPU affinity to be
restricted on asymmetric systems"), the setting and clearing of
user_cpus_ptr are done under pi_lock for arm64 architecture. However,
dup_user_cpus_ptr() accesses user_cpus_ptr without any lock
protection. Since sched_setaffinity() can be invoked from another
process, the process being modified may be undergoing fork() at
the same time. When racing with the clearing of user_cpus_ptr in
__set_cpus_allowed_ptr_locked(), it can lead to user-after-free and
possibly double-free in arm64 kernel.
Commit 8f9ea86fdf99 ("sched: Always preserve the user requested
cpumask") fixes this problem as user_cpus_ptr, once set, will never
be cleared in a task's lifetime. However, this bug was re-introduced
in commit 851a723e45d1 ("sched: Always clear user_cpus_ptr in
do_set_cpus_allowed()") which allows the clearing of user_cpus_ptr in
do_set_cpus_allowed(). This time, it will affect all arches.
Fix this bug by always clearing the user_cpus_ptr of the newly
cloned/forked task before the copying process starts and check the
user_cpus_ptr state of the source task under pi_lock.
Note to stable, this patch won't be applicable to stable releases.
Just copy the new dup_user_cpus_ptr() function over.
Fixes: 07ec77a1d4e8 ("sched: Allow task CPU affinity to be restricted on asymmetric systems")
Fixes: 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()")
CC: stable(a)vger.kernel.org
Reported-by: David Wang 王标 <wangbiao3(a)xiaomi.com>
Signed-off-by: Waiman Long <longman(a)redhat.com>
---
kernel/sched/core.c | 34 +++++++++++++++++++++++++++++-----
1 file changed, 29 insertions(+), 5 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 25b582b6ee5f..b93d030b9fd5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2612,19 +2612,43 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src,
int node)
{
+ cpumask_t *user_mask;
unsigned long flags;
- if (!src->user_cpus_ptr)
+ /*
+ * Always clear dst->user_cpus_ptr first as their user_cpus_ptr's
+ * may differ by now due to racing.
+ */
+ dst->user_cpus_ptr = NULL;
+
+ /*
+ * This check is racy and losing the race is a valid situation.
+ * It is not worth the extra overhead of taking the pi_lock on
+ * every fork/clone.
+ */
+ if (data_race(!src->user_cpus_ptr))
return 0;
- dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node);
- if (!dst->user_cpus_ptr)
+ user_mask = kmalloc_node(cpumask_size(), GFP_KERNEL, node);
+ if (!user_mask)
return -ENOMEM;
- /* Use pi_lock to protect content of user_cpus_ptr */
+ /*
+ * Use pi_lock to protect content of user_cpus_ptr
+ *
+ * Though unlikely, user_cpus_ptr can be reset to NULL by a concurrent
+ * do_set_cpus_allowed().
+ */
raw_spin_lock_irqsave(&src->pi_lock, flags);
- cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr);
+ if (src->user_cpus_ptr) {
+ swap(dst->user_cpus_ptr, user_mask);
+ cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr);
+ }
raw_spin_unlock_irqrestore(&src->pi_lock, flags);
+
+ if (unlikely(user_mask))
+ kfree(user_mask);
+
return 0;
}
--
2.31.1
From: "Paul E. McKenney" <paulmck(a)kernel.org>
commit 8d68e68a781db80606c8e8f3e4383be6974878fd upstream.
The "NOHZ tick-stop error: Non-RCU local softirq work is pending"
warning happens frequently and appears to be irrelevant to the various
torture tests. This commit therefore filters it out.
If there proves to be a need to pay attention to it a later commit will
add an "advice" category to allow the user to immediately see that
although something happened, it was not an indictment of the system
being tortured.
Signed-off-by: Paul E. McKenney <paulmck(a)kernel.org>
Cc: <stable(a)vger.kernel.org> # 5.10.x
Signed-off-by: Joel Fernandes (Google) <joel(a)joelfernandes.org>
---
tools/testing/selftests/rcutorture/bin/console-badness.sh | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh
index 0e4c0b2eb7f0..80ae7f08b363 100755
--- a/tools/testing/selftests/rcutorture/bin/console-badness.sh
+++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh
@@ -13,4 +13,5 @@
egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
grep -v 'ODEBUG: ' |
grep -v 'This means that this is a DEBUG kernel and it is' |
-grep -v 'Warning: unable to open an initial console'
+grep -v 'Warning: unable to open an initial console' |
+grep -v 'NOHZ tick-stop error: Non-RCU local softirq work is pending, handler'
--
2.39.0.314.g84b9a713c41-goog