Antgroup is using 5.10.y in product environment, we found several patches are
missing in 5.10.y tree. These patches are needed for us. So we backported them
to 5.10.y. Also backport to 5.15.y and 6.1.y to prevent regression.
Jiachen Zhang (1):
fuse: always revalidate rename target dentry
fs/fuse/dir.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--
2.40.0
Antgroup is using 5.10.y in product environment, we found several patches are
missing in 5.10.y tree. These patches are needed for us. So we backported them
to 5.10.y. Also backport to 5.15.y and 6.1.y to prevent regression.
Jiachen Zhang (1):
fuse: always revalidate rename target dentry
Miklos Szeredi (2):
fuse: fix attr version comparison in fuse_read_update_size()
fuse: fix deadlock between atomic O_TRUNC and page invalidation
fs/fuse/dir.c | 9 +++++++--
fs/fuse/file.c | 31 ++++++++++++++++++-------------
2 files changed, 25 insertions(+), 15 deletions(-)
--
2.40.0
Antgroup is using 5.10.y in product environment, we found several patches are
missing in 5.10.y tree. These patches are needed for us. So we backported them
to 5.10.y. Also backport to 5.15.y and 6.1.y to prevent regression.
Connor Kuehl (1):
virtiofs: split requests that exceed virtqueue size
Jiachen Zhang (1):
fuse: always revalidate rename target dentry
Miklos Szeredi (4):
virtiofs: clean up error handling in virtio_fs_get_tree()
fuse: check s_root when destroying sb
fuse: fix attr version comparison in fuse_read_update_size()
fuse: fix deadlock between atomic O_TRUNC and page invalidation
fs/fuse/dir.c | 7 ++++++-
fs/fuse/file.c | 31 +++++++++++++++++-------------
fs/fuse/fuse_i.h | 3 +++
fs/fuse/inode.c | 5 +++--
fs/fuse/virtio_fs.c | 46 +++++++++++++++++++++++++++++----------------
5 files changed, 60 insertions(+), 32 deletions(-)
--
2.40.0
Antgroup is using 5.10.y in product environment, we found several patches are
missing in 5.10.y tree. These patches are needed for us. So we backported them
to 5.10.y. Also backport to 5.15.y and 6.1.y to prevent regression.
Connor Kuehl (1):
virtiofs: split requests that exceed virtqueue size
Jiachen Zhang (1):
fuse: always revalidate rename target dentry
Miklos Szeredi (4):
virtiofs: clean up error handling in virtio_fs_get_tree()
fuse: check s_root when destroying sb
fuse: fix attr version comparison in fuse_read_update_size()
fuse: fix deadlock between atomic O_TRUNC and page invalidation
fs/fuse/dir.c | 7 ++++++-
fs/fuse/file.c | 31 +++++++++++++++++-------------
fs/fuse/fuse_i.h | 3 +++
fs/fuse/inode.c | 5 +++--
fs/fuse/virtio_fs.c | 46 +++++++++++++++++++++++++++++----------------
5 files changed, 60 insertions(+), 32 deletions(-)
--
2.40.0
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x f1581626071c8e37c58c5e8f0b4126b17172a211
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041926-clique-washout-2197@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
f1581626071c ("riscv: Do not set initial_boot_params to the linear address of the dtb")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From f1581626071c8e37c58c5e8f0b4126b17172a211 Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alexghiti(a)rivosinc.com>
Date: Wed, 29 Mar 2023 10:19:31 +0200
Subject: [PATCH] riscv: Do not set initial_boot_params to the linear address
of the dtb
early_init_dt_verify() is already called in parse_dtb() and since the dtb
address does not change anymore (it is now in the fixmap region), no need
to reset initial_boot_params by calling early_init_dt_verify() again.
Signed-off-by: Alexandre Ghiti <alexghiti(a)rivosinc.com>
Link: https://lore.kernel.org/r/20230329081932.79831-3-alexghiti@rivosinc.com
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 542eed85ad2c..a059b73f4ddb 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -278,10 +278,7 @@ void __init setup_arch(char **cmdline_p)
#if IS_ENABLED(CONFIG_BUILTIN_DTB)
unflatten_and_copy_device_tree();
#else
- if (early_init_dt_verify(__va(XIP_FIXUP(dtb_early_pa))))
- unflatten_device_tree();
- else
- pr_err("No DTB found in kernel mappings\n");
+ unflatten_device_tree();
#endif
misc_mem_init();
Per-vcpu flags are updated using a non-atomic RMW operation.
Which means it is possible to get preempted between the read and
write operations.
Another interesting thing to note is that preemption also updates
flags, as we have some flag manipulation in both the load and put
operations.
It is thus possible to lose information communicated by either
load or put, as the preempted flag update will overwrite the flags
when the thread is resumed. This is specially critical if either
load or put has stored information which depends on the physical
CPU the vcpu runs on.
This results in really elusive bugs, and kudos must be given to
Mostafa for the long hours of debugging, and finally spotting
the problem.
Fix it by disabling preemption during the RMW operation, which
ensures that the state stays consistent. Also upgrade vcpu_get_flag
path to use READ_ONCE() to make sure the field is always atomically
accessed.
Fixes: e87abb73e594 ("KVM: arm64: Add helpers to manipulate vcpu flags among a set")
Reported-by: Mostafa Saleh <smostafa(a)google.com>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
Notes:
v2: add READ_ONCE() on the read path, expand commit message
arch/arm64/include/asm/kvm_host.h | 19 ++++++++++++++++++-
1 file changed, 18 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index bcd774d74f34..3dd691c85ca0 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -576,9 +576,22 @@ struct kvm_vcpu_arch {
({ \
__build_check_flag(v, flagset, f, m); \
\
- v->arch.flagset & (m); \
+ READ_ONCE(v->arch.flagset) & (m); \
})
+/*
+ * Note that the set/clear accessors must be preempt-safe in order to
+ * avoid nesting them with load/put which also manipulate flags...
+ */
+#ifdef __KVM_NVHE_HYPERVISOR__
+/* the nVHE hypervisor is always non-preemptible */
+#define __vcpu_flags_preempt_disable()
+#define __vcpu_flags_preempt_enable()
+#else
+#define __vcpu_flags_preempt_disable() preempt_disable()
+#define __vcpu_flags_preempt_enable() preempt_enable()
+#endif
+
#define __vcpu_set_flag(v, flagset, f, m) \
do { \
typeof(v->arch.flagset) *fset; \
@@ -586,9 +599,11 @@ struct kvm_vcpu_arch {
__build_check_flag(v, flagset, f, m); \
\
fset = &v->arch.flagset; \
+ __vcpu_flags_preempt_disable(); \
if (HWEIGHT(m) > 1) \
*fset &= ~(m); \
*fset |= (f); \
+ __vcpu_flags_preempt_enable(); \
} while (0)
#define __vcpu_clear_flag(v, flagset, f, m) \
@@ -598,7 +613,9 @@ struct kvm_vcpu_arch {
__build_check_flag(v, flagset, f, m); \
\
fset = &v->arch.flagset; \
+ __vcpu_flags_preempt_disable(); \
*fset &= ~(m); \
+ __vcpu_flags_preempt_enable(); \
} while (0)
#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__)
--
2.34.1
The following commit has been merged into the timers/core branch of tip:
Commit-ID: 1bb5b68fd3aabb6b9d6b9e9bb092bb8f3c2ade62
Gitweb: https://git.kernel.org/tip/1bb5b68fd3aabb6b9d6b9e9bb092bb8f3c2ade62
Author: Thomas Gleixner <tglx(a)linutronix.de>
AuthorDate: Mon, 17 Apr 2023 15:37:55 +02:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Wed, 19 Apr 2023 10:29:00 +02:00
posix-cpu-timers: Implement the missing timer_wait_running callback
For some unknown reason the introduction of the timer_wait_running callback
missed to fixup posix CPU timers, which went unnoticed for almost four years.
Marco reported recently that the WARN_ON() in timer_wait_running()
triggers with a posix CPU timer test case.
Posix CPU timers have two execution models for expiring timers depending on
CONFIG_POSIX_CPU_TIMERS_TASK_WORK:
1) If not enabled, the expiry happens in hard interrupt context so
spin waiting on the remote CPU is reasonably time bound.
Implement an empty stub function for that case.
2) If enabled, the expiry happens in task work before returning to user
space or guest mode. The expired timers are marked as firing and moved
from the timer queue to a local list head with sighand lock held. Once
the timers are moved, sighand lock is dropped and the expiry happens in
fully preemptible context. That means the expiring task can be scheduled
out, migrated, interrupted etc. So spin waiting on it is more than
suboptimal.
The timer wheel has a timer_wait_running() mechanism for RT, which uses
a per CPU timer-base expiry lock which is held by the expiry code and the
task waiting for the timer function to complete blocks on that lock.
This does not work in the same way for posix CPU timers as there is no
timer base and expiry for process wide timers can run on any task
belonging to that process, but the concept of waiting on an expiry lock
can be used too in a slightly different way:
- Add a mutex to struct posix_cputimers_work. This struct is per task
and used to schedule the expiry task work from the timer interrupt.
- Add a task_struct pointer to struct cpu_timer which is used to store
a the task which runs the expiry. That's filled in when the task
moves the expired timers to the local expiry list. That's not
affecting the size of the k_itimer union as there are bigger union
members already
- Let the task take the expiry mutex around the expiry function
- Let the waiter acquire a task reference with rcu_read_lock() held and
block on the expiry mutex
This avoids spin-waiting on a task which might not even be on a CPU and
works nicely for RT too.
Fixes: ec8f954a40da ("posix-timers: Use a callback for cancel synchronization on PREEMPT_RT")
Reported-by: Marco Elver <elver(a)google.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Marco Elver <elver(a)google.com>
Tested-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/87zg764ojw.ffs@tglx
---
include/linux/posix-timers.h | 17 ++++---
kernel/time/posix-cpu-timers.c | 81 +++++++++++++++++++++++++++------
kernel/time/posix-timers.c | 4 ++-
3 files changed, 82 insertions(+), 20 deletions(-)
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 2c6e99c..d607f51 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -4,6 +4,7 @@
#include <linux/spinlock.h>
#include <linux/list.h>
+#include <linux/mutex.h>
#include <linux/alarmtimer.h>
#include <linux/timerqueue.h>
@@ -62,16 +63,18 @@ static inline int clockid_to_fd(const clockid_t clk)
* cpu_timer - Posix CPU timer representation for k_itimer
* @node: timerqueue node to queue in the task/sig
* @head: timerqueue head on which this timer is queued
- * @task: Pointer to target task
+ * @pid: Pointer to target task PID
* @elist: List head for the expiry list
* @firing: Timer is currently firing
+ * @handling: Pointer to the task which handles expiry
*/
struct cpu_timer {
- struct timerqueue_node node;
- struct timerqueue_head *head;
- struct pid *pid;
- struct list_head elist;
- int firing;
+ struct timerqueue_node node;
+ struct timerqueue_head *head;
+ struct pid *pid;
+ struct list_head elist;
+ int firing;
+ struct task_struct __rcu *handling;
};
static inline bool cpu_timer_enqueue(struct timerqueue_head *head,
@@ -135,10 +138,12 @@ struct posix_cputimers {
/**
* posix_cputimers_work - Container for task work based posix CPU timer expiry
* @work: The task work to be scheduled
+ * @mutex: Mutex held around expiry in context of this task work
* @scheduled: @work has been scheduled already, no further processing
*/
struct posix_cputimers_work {
struct callback_head work;
+ struct mutex mutex;
unsigned int scheduled;
};
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 2f5e9b3..fb56e02 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -846,6 +846,8 @@ static u64 collect_timerqueue(struct timerqueue_head *head,
return expires;
ctmr->firing = 1;
+ /* See posix_cpu_timer_wait_running() */
+ rcu_assign_pointer(ctmr->handling, current);
cpu_timer_dequeue(ctmr);
list_add_tail(&ctmr->elist, firing);
}
@@ -1161,7 +1163,49 @@ static void handle_posix_cpu_timers(struct task_struct *tsk);
#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
static void posix_cpu_timers_work(struct callback_head *work)
{
+ struct posix_cputimers_work *cw = container_of(work, typeof(*cw), work);
+
+ mutex_lock(&cw->mutex);
handle_posix_cpu_timers(current);
+ mutex_unlock(&cw->mutex);
+}
+
+/*
+ * Invoked from the posix-timer core when a cancel operation failed because
+ * the timer is marked firing. The caller holds rcu_read_lock(), which
+ * protects the timer and the task which is expiring it from being freed.
+ */
+static void posix_cpu_timer_wait_running(struct k_itimer *timr)
+{
+ struct task_struct *tsk = rcu_dereference(timr->it.cpu.handling);
+
+ /* Has the handling task completed expiry already? */
+ if (!tsk)
+ return;
+
+ /* Ensure that the task cannot go away */
+ get_task_struct(tsk);
+ /* Now drop the RCU protection so the mutex can be locked */
+ rcu_read_unlock();
+ /* Wait on the expiry mutex */
+ mutex_lock(&tsk->posix_cputimers_work.mutex);
+ /* Release it immediately again. */
+ mutex_unlock(&tsk->posix_cputimers_work.mutex);
+ /* Drop the task reference. */
+ put_task_struct(tsk);
+ /* Relock RCU so the callsite is balanced */
+ rcu_read_lock();
+}
+
+static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr)
+{
+ /* Ensure that timr->it.cpu.handling task cannot go away */
+ rcu_read_lock();
+ spin_unlock_irq(&timr->it_lock);
+ posix_cpu_timer_wait_running(timr);
+ rcu_read_unlock();
+ /* @timr is on stack and is valid */
+ spin_lock_irq(&timr->it_lock);
}
/*
@@ -1177,6 +1221,7 @@ void clear_posix_cputimers_work(struct task_struct *p)
sizeof(p->posix_cputimers_work.work));
init_task_work(&p->posix_cputimers_work.work,
posix_cpu_timers_work);
+ mutex_init(&p->posix_cputimers_work.mutex);
p->posix_cputimers_work.scheduled = false;
}
@@ -1255,6 +1300,18 @@ static inline void __run_posix_cpu_timers(struct task_struct *tsk)
lockdep_posixtimer_exit();
}
+static void posix_cpu_timer_wait_running(struct k_itimer *timr)
+{
+ cpu_relax();
+}
+
+static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr)
+{
+ spin_unlock_irq(&timer.it_lock);
+ cpu_relax();
+ spin_lock_irq(&timer.it_lock);
+}
+
static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
{
return false;
@@ -1363,6 +1420,8 @@ static void handle_posix_cpu_timers(struct task_struct *tsk)
*/
if (likely(cpu_firing >= 0))
cpu_timer_fire(timer);
+ /* See posix_cpu_timer_wait_running() */
+ rcu_assign_pointer(timer->it.cpu.handling, NULL);
spin_unlock(&timer->it_lock);
}
}
@@ -1497,23 +1556,16 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
expires = cpu_timer_getexpires(&timer.it.cpu);
error = posix_cpu_timer_set(&timer, 0, &zero_it, &it);
if (!error) {
- /*
- * Timer is now unarmed, deletion can not fail.
- */
+ /* Timer is now unarmed, deletion can not fail. */
posix_cpu_timer_del(&timer);
+ } else {
+ while (error == TIMER_RETRY) {
+ posix_cpu_timer_wait_running_nsleep(&timer);
+ error = posix_cpu_timer_del(&timer);
+ }
}
- spin_unlock_irq(&timer.it_lock);
- while (error == TIMER_RETRY) {
- /*
- * We need to handle case when timer was or is in the
- * middle of firing. In other cases we already freed
- * resources.
- */
- spin_lock_irq(&timer.it_lock);
- error = posix_cpu_timer_del(&timer);
- spin_unlock_irq(&timer.it_lock);
- }
+ spin_unlock_irq(&timer.it_lock);
if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) {
/*
@@ -1623,6 +1675,7 @@ const struct k_clock clock_posix_cpu = {
.timer_del = posix_cpu_timer_del,
.timer_get = posix_cpu_timer_get,
.timer_rearm = posix_cpu_timer_rearm,
+ .timer_wait_running = posix_cpu_timer_wait_running,
};
const struct k_clock clock_process = {
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 0c8a87a..808a247 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -846,6 +846,10 @@ static struct k_itimer *timer_wait_running(struct k_itimer *timer,
rcu_read_lock();
unlock_timer(timer, *flags);
+ /*
+ * kc->timer_wait_running() might drop RCU lock. So @timer
+ * cannot be touched anymore after the function returns!
+ */
if (!WARN_ON_ONCE(!kc->timer_wait_running))
kc->timer_wait_running(timer);
Christoph Paasch reported a couple of issues found by syzkaller and
linked to operations done by the MPTCP worker on (un)accepted sockets.
Fixing these issues was not obvious and rather complex but Paolo Abeni
nicely managed to propose these excellent patches that seem to satisfy
syzkaller.
Patch 1 partially reverts a recent fix but while still providing a
solution for the previous issue, it also prevents the MPTCP worker from
running concurrently with inet_csk_listen_stop(). A warning is then
avoided. The partially reverted patch has been introduced in v6.3-rc3,
backported up to v6.1 and fixing an issue visible from v5.18.
Patch 2 prevents the MPTCP worker to race with mptcp_accept() causing a
UaF when a fallback to TCP is done while in parallel, the socket is
being accepted by the userspace. This is also a fix of a previous fix
introduced in v6.3-rc3, backported up to v6.1 but here fixing an issue
that is in theory there from v5.7. There is no need to backport it up
to here as it looks like it is only visible later, around v5.18, see the
previous cover-letter linked to this original fix.
Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net>
---
Paolo Abeni (2):
mptcp: stops worker on unaccepted sockets at listener close
mptcp: fix accept vs worker race
net/mptcp/protocol.c | 74 ++++++++++++++++++++++++++++++++----------------
net/mptcp/protocol.h | 2 ++
net/mptcp/subflow.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++--
3 files changed, 129 insertions(+), 27 deletions(-)
---
base-commit: 338469d677e5d426f5ada88761f16f6d2c7c1981
change-id: 20230417-upstream-net-20230417-mptcp-worker-acceptw-31f35d7c3e9a
Best regards,
--
Matthieu Baerts <matthieu.baerts(a)tessares.net>
Commit b94f9ac79a7395c2d6171cc753cc27942df0be73 upstream.
Since commit 1243dc518c9d ("cgroup/cpuset: Convert cpuset_mutex to
percpu_rwsem"), cpuset_mutex has been replaced by cpuset_rwsem which is
a percpu rwsem. However, the comments in kernel/cgroup/cpuset.c still
reference cpuset_mutex which are now incorrect.
Change all the references of cpuset_mutex to cpuset_rwsem.
Fixes: 1243dc518c9d ("cgroup/cpuset: Convert cpuset_mutex to percpu_rwsem")
Signed-off-by: Waiman Long <longman(a)redhat.com>
Signed-off-by: Tejun Heo <tj(a)kernel.org>
---
kernel/cgroup/cpuset.c | 56 ++++++++++++++++++++++--------------------
1 file changed, 29 insertions(+), 27 deletions(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 43270b07b2e0..fc6aedb84b80 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -299,17 +299,19 @@ static struct cpuset top_cpuset = {
if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
/*
- * There are two global locks guarding cpuset structures - cpuset_mutex and
+ * There are two global locks guarding cpuset structures - cpuset_rwsem and
* callback_lock. We also require taking task_lock() when dereferencing a
* task's cpuset pointer. See "The task_lock() exception", at the end of this
- * comment.
+ * comment. The cpuset code uses only cpuset_rwsem write lock. Other
+ * kernel subsystems can use cpuset_read_lock()/cpuset_read_unlock() to
+ * prevent change to cpuset structures.
*
* A task must hold both locks to modify cpusets. If a task holds
- * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it
+ * cpuset_rwsem, it blocks others wanting that rwsem, ensuring that it
* is the only task able to also acquire callback_lock and be able to
* modify cpusets. It can perform various checks on the cpuset structure
* first, knowing nothing will change. It can also allocate memory while
- * just holding cpuset_mutex. While it is performing these checks, various
+ * just holding cpuset_rwsem. While it is performing these checks, various
* callback routines can briefly acquire callback_lock to query cpusets.
* Once it is ready to make the changes, it takes callback_lock, blocking
* everyone else.
@@ -380,7 +382,7 @@ static inline bool is_in_v2_mode(void)
* One way or another, we guarantee to return some non-empty subset
* of cpu_online_mask.
*
- * Call with callback_lock or cpuset_mutex held.
+ * Call with callback_lock or cpuset_rwsem held.
*/
static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
{
@@ -410,7 +412,7 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
* One way or another, we guarantee to return some non-empty subset
* of node_states[N_MEMORY].
*
- * Call with callback_lock or cpuset_mutex held.
+ * Call with callback_lock or cpuset_rwsem held.
*/
static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
{
@@ -422,7 +424,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
/*
* update task's spread flag if cpuset's page/slab spread flag is set
*
- * Call with callback_lock or cpuset_mutex held.
+ * Call with callback_lock or cpuset_rwsem held.
*/
static void cpuset_update_task_spread_flag(struct cpuset *cs,
struct task_struct *tsk)
@@ -443,7 +445,7 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs,
*
* One cpuset is a subset of another if all its allowed CPUs and
* Memory Nodes are a subset of the other, and its exclusive flags
- * are only set if the other's are set. Call holding cpuset_mutex.
+ * are only set if the other's are set. Call holding cpuset_rwsem.
*/
static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
@@ -552,7 +554,7 @@ static inline void free_cpuset(struct cpuset *cs)
* If we replaced the flag and mask values of the current cpuset
* (cur) with those values in the trial cpuset (trial), would
* our various subset and exclusive rules still be valid? Presumes
- * cpuset_mutex held.
+ * cpuset_rwsem held.
*
* 'cur' is the address of an actual, in-use cpuset. Operations
* such as list traversal that depend on the actual address of the
@@ -675,7 +677,7 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
rcu_read_unlock();
}
-/* Must be called with cpuset_mutex held. */
+/* Must be called with cpuset_rwsem held. */
static inline int nr_cpusets(void)
{
/* jump label reference count + the top-level cpuset */
@@ -701,7 +703,7 @@ static inline int nr_cpusets(void)
* domains when operating in the severe memory shortage situations
* that could cause allocation failures below.
*
- * Must be called with cpuset_mutex held.
+ * Must be called with cpuset_rwsem held.
*
* The three key local variables below are:
* cp - cpuset pointer, used (together with pos_css) to perform a
@@ -980,7 +982,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
* 'cpus' is removed, then call this routine to rebuild the
* scheduler's dynamic sched domains.
*
- * Call with cpuset_mutex held. Takes get_online_cpus().
+ * Call with cpuset_rwsem held. Takes get_online_cpus().
*/
static void rebuild_sched_domains_locked(void)
{
@@ -1053,7 +1055,7 @@ void rebuild_sched_domains(void)
* @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
*
* Iterate through each task of @cs updating its cpus_allowed to the
- * effective cpuset's. As this function is called with cpuset_mutex held,
+ * effective cpuset's. As this function is called with cpuset_rwsem held,
* cpuset membership stays stable.
*/
static void update_tasks_cpumask(struct cpuset *cs)
@@ -1328,7 +1330,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
*
* On legacy hierachy, effective_cpus will be the same with cpu_allowed.
*
- * Called with cpuset_mutex held
+ * Called with cpuset_rwsem held
*/
static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
{
@@ -1688,12 +1690,12 @@ static void *cpuset_being_rebound;
* @cs: the cpuset in which each task's mems_allowed mask needs to be changed
*
* Iterate through each task of @cs updating its mems_allowed to the
- * effective cpuset's. As this function is called with cpuset_mutex held,
+ * effective cpuset's. As this function is called with cpuset_rwsem held,
* cpuset membership stays stable.
*/
static void update_tasks_nodemask(struct cpuset *cs)
{
- static nodemask_t newmems; /* protected by cpuset_mutex */
+ static nodemask_t newmems; /* protected by cpuset_rwsem */
struct css_task_iter it;
struct task_struct *task;
@@ -1706,7 +1708,7 @@ static void update_tasks_nodemask(struct cpuset *cs)
* take while holding tasklist_lock. Forks can happen - the
* mpol_dup() cpuset_being_rebound check will catch such forks,
* and rebind their vma mempolicies too. Because we still hold
- * the global cpuset_mutex, we know that no other rebind effort
+ * the global cpuset_rwsem, we know that no other rebind effort
* will be contending for the global variable cpuset_being_rebound.
* It's ok if we rebind the same mm twice; mpol_rebind_mm()
* is idempotent. Also migrate pages in each mm to new nodes.
@@ -1752,7 +1754,7 @@ static void update_tasks_nodemask(struct cpuset *cs)
*
* On legacy hiearchy, effective_mems will be the same with mems_allowed.
*
- * Called with cpuset_mutex held
+ * Called with cpuset_rwsem held
*/
static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
{
@@ -1805,7 +1807,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
* mempolicies and if the cpuset is marked 'memory_migrate',
* migrate the tasks pages to the new memory.
*
- * Call with cpuset_mutex held. May take callback_lock during call.
+ * Call with cpuset_rwsem held. May take callback_lock during call.
* Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
* lock each such tasks mm->mmap_lock, scan its vma's and rebind
* their mempolicies to the cpusets new mems_allowed.
@@ -1895,7 +1897,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
* @cs: the cpuset in which each task's spread flags needs to be changed
*
* Iterate through each task of @cs updating its spread flags. As this
- * function is called with cpuset_mutex held, cpuset membership stays
+ * function is called with cpuset_rwsem held, cpuset membership stays
* stable.
*/
static void update_tasks_flags(struct cpuset *cs)
@@ -1915,7 +1917,7 @@ static void update_tasks_flags(struct cpuset *cs)
* cs: the cpuset to update
* turning_on: whether the flag is being set or cleared
*
- * Call with cpuset_mutex held.
+ * Call with cpuset_rwsem held.
*/
static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
@@ -1964,7 +1966,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
* cs: the cpuset to update
* new_prs: new partition root state
*
- * Call with cpuset_mutex held.
+ * Call with cpuset_rwsem held.
*/
static int update_prstate(struct cpuset *cs, int new_prs)
{
@@ -2145,7 +2147,7 @@ static int fmeter_getrate(struct fmeter *fmp)
static struct cpuset *cpuset_attach_old_cs;
-/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
+/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
static int cpuset_can_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
@@ -2197,7 +2199,7 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
}
/*
- * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach()
+ * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
* but we can't allocate it dynamically there. Define it global and
* allocate from cpuset_init().
*/
@@ -2205,7 +2207,7 @@ static cpumask_var_t cpus_attach;
static void cpuset_attach(struct cgroup_taskset *tset)
{
- /* static buf protected by cpuset_mutex */
+ /* static buf protected by cpuset_rwsem */
static nodemask_t cpuset_attach_nodemask_to;
struct task_struct *task;
struct task_struct *leader;
@@ -2398,7 +2400,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
* operation like this one can lead to a deadlock through kernfs
* active_ref protection. Let's break the protection. Losing the
* protection is okay as we check whether @cs is online after
- * grabbing cpuset_mutex anyway. This only happens on the legacy
+ * grabbing cpuset_rwsem anyway. This only happens on the legacy
* hierarchies.
*/
css_get(&cs->css);
@@ -3637,7 +3639,7 @@ void __cpuset_memory_pressure_bump(void)
* - Used for /proc/<pid>/cpuset.
* - No need to task_lock(tsk) on this tsk->cpuset reference, as it
* doesn't really matter if tsk->cpuset changes after we read it,
- * and we take cpuset_mutex, keeping cpuset_attach() from changing it
+ * and we take cpuset_rwsem, keeping cpuset_attach() from changing it
* anyway.
*/
int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
--
2.31.1
The quilt patch titled
Subject: scripts/gdb: fix lx-timerlist for Python3
has been removed from the -mm tree. Its filename was
scripts-gdb-fix-lx-timerlist-for-python3.patch
This patch was dropped because it was merged into the mm-nonmm-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Peng Liu <liupeng17(a)lenovo.com>
Subject: scripts/gdb: fix lx-timerlist for Python3
Date: Tue, 21 Mar 2023 14:19:29 +0800
Below incompatibilities between Python2 and Python3 made lx-timerlist fail
to run under Python3.
o xrange() is replaced by range() in Python3
o bytes and str are different types in Python3
o the return value of Inferior.read_memory() is memoryview object in
Python3
akpm: cc stable so that older kernels are properly debuggable under newer
Python.
Link: https://lkml.kernel.org/r/TYCP286MB2146EE1180A4D5176CBA8AB2C6819@TYCP286MB2…
Signed-off-by: Peng Liu <liupeng17(a)lenovo.com>
Reviewed-by: Jan Kiszka <jan.kiszka(a)siemens.com>
Cc: Florian Fainelli <f.fainelli(a)gmail.com>
Cc: Kieran Bingham <kbingham(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
scripts/gdb/linux/timerlist.py | 4 +++-
scripts/gdb/linux/utils.py | 5 ++++-
2 files changed, 7 insertions(+), 2 deletions(-)
--- a/scripts/gdb/linux/timerlist.py~scripts-gdb-fix-lx-timerlist-for-python3
+++ a/scripts/gdb/linux/timerlist.py
@@ -72,7 +72,7 @@ def print_cpu(hrtimer_bases, cpu, max_cl
ts = cpus.per_cpu(tick_sched_ptr, cpu)
text = "cpu: {}\n".format(cpu)
- for i in xrange(max_clock_bases):
+ for i in range(max_clock_bases):
text += " clock {}:\n".format(i)
text += print_base(cpu_base['clock_base'][i])
@@ -157,6 +157,8 @@ def pr_cpumask(mask):
num_bytes = (nr_cpu_ids + 7) / 8
buf = utils.read_memoryview(inf, bits, num_bytes).tobytes()
buf = binascii.b2a_hex(buf)
+ if type(buf) is not str:
+ buf=buf.decode()
chunks = []
i = num_bytes
--- a/scripts/gdb/linux/utils.py~scripts-gdb-fix-lx-timerlist-for-python3
+++ a/scripts/gdb/linux/utils.py
@@ -88,7 +88,10 @@ def get_target_endianness():
def read_memoryview(inf, start, length):
- return memoryview(inf.read_memory(start, length))
+ m = inf.read_memory(start, length)
+ if type(m) is memoryview:
+ return m
+ return memoryview(m)
def read_u16(buffer, offset):
_
Patches currently in -mm which might be from liupeng17(a)lenovo.com are
The quilt patch titled
Subject: nilfs2: initialize unused bytes in segment summary blocks
has been removed from the -mm tree. Its filename was
nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: initialize unused bytes in segment summary blocks
Date: Tue, 18 Apr 2023 02:35:13 +0900
Syzbot still reports uninit-value in nilfs_add_checksums_on_logs() for
KMSAN enabled kernels after applying commit 7397031622e0 ("nilfs2:
initialize "struct nilfs_binfo_dat"->bi_pad field").
This is because the unused bytes at the end of each block in segment
summaries are not initialized. So this fixes the issue by padding the
unused bytes with null bytes.
Link: https://lkml.kernel.org/r/20230417173513.12598-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+048585f3f4227bb2b49b(a)syzkaller.appspotmail.com
Link: https://syzkaller.appspot.com/bug?extid=048585f3f4227bb2b49b
Cc: Alexander Potapenko <glider(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/segment.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
--- a/fs/nilfs2/segment.c~nilfs2-initialize-unused-bytes-in-segment-summary-blocks
+++ a/fs/nilfs2/segment.c
@@ -430,6 +430,23 @@ static int nilfs_segctor_reset_segment_b
return 0;
}
+/**
+ * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area
+ * @sci: segment constructor object
+ *
+ * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of
+ * the current segment summary block.
+ */
+static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci)
+{
+ struct nilfs_segsum_pointer *ssp;
+
+ ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr;
+ if (ssp->offset < ssp->bh->b_size)
+ memset(ssp->bh->b_data + ssp->offset, 0,
+ ssp->bh->b_size - ssp->offset);
+}
+
static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
{
sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
@@ -438,6 +455,7 @@ static int nilfs_segctor_feed_segment(st
* The current segment is filled up
* (internal code)
*/
+ nilfs_segctor_zeropad_segsum(sci);
sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
return nilfs_segctor_reset_segment_buffer(sci);
}
@@ -542,6 +560,7 @@ static int nilfs_segctor_add_file_block(
goto retry;
}
if (unlikely(required)) {
+ nilfs_segctor_zeropad_segsum(sci);
err = nilfs_segbuf_extend_segsum(segbuf);
if (unlikely(err))
goto failed;
@@ -1533,6 +1552,7 @@ static int nilfs_segctor_collect(struct
nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
sci->sc_stage = prev_stage;
}
+ nilfs_segctor_zeropad_segsum(sci);
nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
return 0;
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
The quilt patch titled
Subject: mm: page_alloc: skip regions with hugetlbfs pages when allocating 1G pages
has been removed from the -mm tree. Its filename was
mm-page_alloc-skip-regions-with-hugetlbfs-pages-when-allocating-1g-pages.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Mel Gorman <mgorman(a)techsingularity.net>
Subject: mm: page_alloc: skip regions with hugetlbfs pages when allocating 1G pages
Date: Fri, 14 Apr 2023 15:14:29 +0100
A bug was reported by Yuanxi Liu where allocating 1G pages at runtime is
taking an excessive amount of time for large amounts of memory. Further
testing allocating huge pages that the cost is linear i.e. if allocating
1G pages in batches of 10 then the time to allocate nr_hugepages from
10->20->30->etc increases linearly even though 10 pages are allocated at
each step. Profiles indicated that much of the time is spent checking the
validity within already existing huge pages and then attempting a
migration that fails after isolating the range, draining pages and a whole
lot of other useless work.
Commit eb14d4eefdc4 ("mm,page_alloc: drop unnecessary checks from
pfn_range_valid_contig") removed two checks, one which ignored huge pages
for contiguous allocations as huge pages can sometimes migrate. While
there may be value on migrating a 2M page to satisfy a 1G allocation, it's
potentially expensive if the 1G allocation fails and it's pointless to try
moving a 1G page for a new 1G allocation or scan the tail pages for valid
PFNs.
Reintroduce the PageHuge check and assume any contiguous region with
hugetlbfs pages is unsuitable for a new 1G allocation.
The hpagealloc test allocates huge pages in batches and reports the
average latency per page over time. This test happens just after boot
when fragmentation is not an issue. Units are in milliseconds.
hpagealloc
6.3.0-rc6 6.3.0-rc6 6.3.0-rc6
vanilla hugeallocrevert-v1r1 hugeallocsimple-v1r2
Min Latency 26.42 ( 0.00%) 5.07 ( 80.82%) 18.94 ( 28.30%)
1st-qrtle Latency 356.61 ( 0.00%) 5.34 ( 98.50%) 19.85 ( 94.43%)
2nd-qrtle Latency 697.26 ( 0.00%) 5.47 ( 99.22%) 20.44 ( 97.07%)
3rd-qrtle Latency 972.94 ( 0.00%) 5.50 ( 99.43%) 20.81 ( 97.86%)
Max-1 Latency 26.42 ( 0.00%) 5.07 ( 80.82%) 18.94 ( 28.30%)
Max-5 Latency 82.14 ( 0.00%) 5.11 ( 93.78%) 19.31 ( 76.49%)
Max-10 Latency 150.54 ( 0.00%) 5.20 ( 96.55%) 19.43 ( 87.09%)
Max-90 Latency 1164.45 ( 0.00%) 5.53 ( 99.52%) 20.97 ( 98.20%)
Max-95 Latency 1223.06 ( 0.00%) 5.55 ( 99.55%) 21.06 ( 98.28%)
Max-99 Latency 1278.67 ( 0.00%) 5.57 ( 99.56%) 22.56 ( 98.24%)
Max Latency 1310.90 ( 0.00%) 8.06 ( 99.39%) 26.62 ( 97.97%)
Amean Latency 678.36 ( 0.00%) 5.44 * 99.20%* 20.44 * 96.99%*
6.3.0-rc6 6.3.0-rc6 6.3.0-rc6
vanilla revert-v1 hugeallocfix-v2
Duration User 0.28 0.27 0.30
Duration System 808.66 17.77 35.99
Duration Elapsed 830.87 18.08 36.33
The vanilla kernel is poor, taking up to 1.3 second to allocate a huge
page and almost 10 minutes in total to run the test. Reverting the
problematic commit reduces it to 8ms at worst and the patch takes 26ms.
This patch fixes the main issue with skipping huge pages but leaves the
page_count() out because a page with an elevated count potentially can
migrate.
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=217022
Link: https://lkml.kernel.org/r/20230414141429.pwgieuwluxwez3rj@techsingularity.n…
Fixes: eb14d4eefdc4 ("mm,page_alloc: drop unnecessary checks from pfn_range_valid_contig")
Signed-off-by: Mel Gorman <mgorman(a)techsingularity.net>
Reported-by: Yuanxi Liu <y.liu(a)naruida.com>
Acked-by: Vlastimil Babka <vbabka(a)suse.cz>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Oscar Salvador <osalvador(a)suse.de>
Cc: Matthew Wilcox <willy(a)infradead.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 3 +++
1 file changed, 3 insertions(+)
--- a/mm/page_alloc.c~mm-page_alloc-skip-regions-with-hugetlbfs-pages-when-allocating-1g-pages
+++ a/mm/page_alloc.c
@@ -9466,6 +9466,9 @@ static bool pfn_range_valid_contig(struc
if (PageReserved(page))
return false;
+
+ if (PageHuge(page))
+ return false;
}
return true;
}
_
Patches currently in -mm which might be from mgorman(a)techsingularity.net are
The quilt patch titled
Subject: mm/mmap: regression fix for unmapped_area{_topdown}
has been removed from the -mm tree. Its filename was
mm-mmap-regression-fix-for-unmapped_area_topdown.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: "Liam R. Howlett" <Liam.Howlett(a)oracle.com>
Subject: mm/mmap: regression fix for unmapped_area{_topdown}
Date: Fri, 14 Apr 2023 14:59:19 -0400
The maple tree limits the gap returned to a window that specifically fits
what was asked. This may not be optimal in the case of switching search
directions or a gap that does not satisfy the requested space for other
reasons. Fix the search by retrying the operation and limiting the search
window in the rare occasion that a conflict occurs.
Link: https://lkml.kernel.org/r/20230414185919.4175572-1-Liam.Howlett@oracle.com
Fixes: 3499a13168da ("mm/mmap: use maple tree for unmapped_area{_topdown}")
Signed-off-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Reported-by: Rick Edgecombe <rick.p.edgecombe(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/mmap.c | 48 +++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 43 insertions(+), 5 deletions(-)
--- a/mm/mmap.c~mm-mmap-regression-fix-for-unmapped_area_topdown
+++ a/mm/mmap.c
@@ -1518,7 +1518,8 @@ static inline int accountable_mapping(st
*/
static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
{
- unsigned long length, gap;
+ unsigned long length, gap, low_limit;
+ struct vm_area_struct *tmp;
MA_STATE(mas, ¤t->mm->mm_mt, 0, 0);
@@ -1527,12 +1528,29 @@ static unsigned long unmapped_area(struc
if (length < info->length)
return -ENOMEM;
- if (mas_empty_area(&mas, info->low_limit, info->high_limit - 1,
- length))
+ low_limit = info->low_limit;
+retry:
+ if (mas_empty_area(&mas, low_limit, info->high_limit - 1, length))
return -ENOMEM;
gap = mas.index;
gap += (info->align_offset - gap) & info->align_mask;
+ tmp = mas_next(&mas, ULONG_MAX);
+ if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+ if (vm_start_gap(tmp) < gap + length - 1) {
+ low_limit = tmp->vm_end;
+ mas_reset(&mas);
+ goto retry;
+ }
+ } else {
+ tmp = mas_prev(&mas, 0);
+ if (tmp && vm_end_gap(tmp) > gap) {
+ low_limit = vm_end_gap(tmp);
+ mas_reset(&mas);
+ goto retry;
+ }
+ }
+
return gap;
}
@@ -1548,7 +1566,8 @@ static unsigned long unmapped_area(struc
*/
static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
{
- unsigned long length, gap;
+ unsigned long length, gap, high_limit, gap_end;
+ struct vm_area_struct *tmp;
MA_STATE(mas, ¤t->mm->mm_mt, 0, 0);
/* Adjust search length to account for worst case alignment overhead */
@@ -1556,12 +1575,31 @@ static unsigned long unmapped_area_topdo
if (length < info->length)
return -ENOMEM;
- if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1,
+ high_limit = info->high_limit;
+retry:
+ if (mas_empty_area_rev(&mas, info->low_limit, high_limit - 1,
length))
return -ENOMEM;
gap = mas.last + 1 - info->length;
gap -= (gap - info->align_offset) & info->align_mask;
+ gap_end = mas.last;
+ tmp = mas_next(&mas, ULONG_MAX);
+ if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
+ if (vm_start_gap(tmp) <= gap_end) {
+ high_limit = vm_start_gap(tmp);
+ mas_reset(&mas);
+ goto retry;
+ }
+ } else {
+ tmp = mas_prev(&mas, 0);
+ if (tmp && vm_end_gap(tmp) > gap) {
+ high_limit = tmp->vm_start;
+ mas_reset(&mas);
+ goto retry;
+ }
+ }
+
return gap;
}
_
Patches currently in -mm which might be from Liam.Howlett(a)oracle.com are
The quilt patch titled
Subject: maple_tree: fix mas_empty_area() search
has been removed from the -mm tree. Its filename was
maple_tree-fix-mas_empty_area-search.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: "Liam R. Howlett" <Liam.Howlett(a)oracle.com>
Subject: maple_tree: fix mas_empty_area() search
Date: Fri, 14 Apr 2023 10:57:27 -0400
The internal function of mas_awalk() was incorrectly skipping the last
entry in a node, which could potentially be NULL. This is only a problem
for the left-most node in the tree - otherwise that NULL would not exist.
Fix mas_awalk() by using the metadata to obtain the end of the node for
the loop and the logical pivot as apposed to the raw pivot value.
Link: https://lkml.kernel.org/r/20230414145728.4067069-2-Liam.Howlett@oracle.com
Fixes: 54a611b60590 ("Maple Tree: add new data structure")
Signed-off-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Reported-by: Rick Edgecombe <rick.p.edgecombe(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/maple_tree.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
--- a/lib/maple_tree.c~maple_tree-fix-mas_empty_area-search
+++ a/lib/maple_tree.c
@@ -5056,10 +5056,10 @@ static inline bool mas_anode_descend(str
{
enum maple_type type = mte_node_type(mas->node);
unsigned long pivot, min, gap = 0;
- unsigned char offset;
- unsigned long *gaps;
- unsigned long *pivots = ma_pivots(mas_mn(mas), type);
- void __rcu **slots = ma_slots(mas_mn(mas), type);
+ unsigned char offset, data_end;
+ unsigned long *gaps, *pivots;
+ void __rcu **slots;
+ struct maple_node *node;
bool found = false;
if (ma_is_dense(type)) {
@@ -5067,13 +5067,15 @@ static inline bool mas_anode_descend(str
return true;
}
- gaps = ma_gaps(mte_to_node(mas->node), type);
+ node = mas_mn(mas);
+ pivots = ma_pivots(node, type);
+ slots = ma_slots(node, type);
+ gaps = ma_gaps(node, type);
offset = mas->offset;
min = mas_safe_min(mas, pivots, offset);
- for (; offset < mt_slots[type]; offset++) {
- pivot = mas_safe_pivot(mas, pivots, offset, type);
- if (offset && !pivot)
- break;
+ data_end = ma_data_end(node, type, pivots, mas->max);
+ for (; offset <= data_end; offset++) {
+ pivot = mas_logical_pivot(mas, pivots, offset, type);
/* Not within lower bounds */
if (mas->index > pivot)
_
Patches currently in -mm which might be from Liam.Howlett(a)oracle.com are
The quilt patch titled
Subject: maple_tree: make maple state reusable after mas_empty_area_rev()
has been removed from the -mm tree. Its filename was
maple_tree-make-maple-state-reusable-after-mas_empty_area_rev.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: "Liam R. Howlett" <Liam.Howlett(a)oracle.com>
Subject: maple_tree: make maple state reusable after mas_empty_area_rev()
Date: Fri, 14 Apr 2023 10:57:26 -0400
Stop using maple state min/max for the range by passing through pointers
for those values. This will allow the maple state to be reused without
resetting.
Also add some logic to fail out early on searching with invalid
arguments.
Link: https://lkml.kernel.org/r/20230414145728.4067069-1-Liam.Howlett@oracle.com
Fixes: 54a611b60590 ("Maple Tree: add new data structure")
Signed-off-by: Liam R. Howlett <Liam.Howlett(a)oracle.com>
Reported-by: Rick Edgecombe <rick.p.edgecombe(a)intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/maple_tree.c | 27 +++++++++++++--------------
1 file changed, 13 insertions(+), 14 deletions(-)
--- a/lib/maple_tree.c~maple_tree-make-maple-state-reusable-after-mas_empty_area_rev
+++ a/lib/maple_tree.c
@@ -4965,7 +4965,8 @@ not_found:
* Return: True if found in a leaf, false otherwise.
*
*/
-static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
+static bool mas_rev_awalk(struct ma_state *mas, unsigned long size,
+ unsigned long *gap_min, unsigned long *gap_max)
{
enum maple_type type = mte_node_type(mas->node);
struct maple_node *node = mas_mn(mas);
@@ -5030,8 +5031,8 @@ static bool mas_rev_awalk(struct ma_stat
if (unlikely(ma_is_leaf(type))) {
mas->offset = offset;
- mas->min = min;
- mas->max = min + gap - 1;
+ *gap_min = min;
+ *gap_max = min + gap - 1;
return true;
}
@@ -5307,6 +5308,9 @@ int mas_empty_area(struct ma_state *mas,
unsigned long *pivots;
enum maple_type mt;
+ if (min >= max)
+ return -EINVAL;
+
if (mas_is_start(mas))
mas_start(mas);
else if (mas->offset >= 2)
@@ -5361,6 +5365,9 @@ int mas_empty_area_rev(struct ma_state *
{
struct maple_enode *last = mas->node;
+ if (min >= max)
+ return -EINVAL;
+
if (mas_is_start(mas)) {
mas_start(mas);
mas->offset = mas_data_end(mas);
@@ -5380,7 +5387,7 @@ int mas_empty_area_rev(struct ma_state *
mas->index = min;
mas->last = max;
- while (!mas_rev_awalk(mas, size)) {
+ while (!mas_rev_awalk(mas, size, &min, &max)) {
if (last == mas->node) {
if (!mas_rewind_node(mas))
return -EBUSY;
@@ -5395,17 +5402,9 @@ int mas_empty_area_rev(struct ma_state *
if (unlikely(mas->offset == MAPLE_NODE_SLOTS))
return -EBUSY;
- /*
- * mas_rev_awalk() has set mas->min and mas->max to the gap values. If
- * the maximum is outside the window we are searching, then use the last
- * location in the search.
- * mas->max and mas->min is the range of the gap.
- * mas->index and mas->last are currently set to the search range.
- */
-
/* Trim the upper limit to the max. */
- if (mas->max <= mas->last)
- mas->last = mas->max;
+ if (max <= mas->last)
+ mas->last = max;
mas->index = mas->last - size + 1;
return 0;
_
Patches currently in -mm which might be from Liam.Howlett(a)oracle.com are
The quilt patch titled
Subject: mm: fix memory leak on mm_init error handling
has been removed from the -mm tree. Its filename was
mm-fix-memory-leak-on-mm_init-error-handling.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Subject: mm: fix memory leak on mm_init error handling
Date: Thu, 30 Mar 2023 09:38:22 -0400
commit f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter")
introduces a memory leak by missing a call to destroy_context() when a
percpu_counter fails to allocate.
Before introducing the per-cpu counter allocations, init_new_context() was
the last call that could fail in mm_init(), and thus there was no need to
ever invoke destroy_context() in the error paths. Adding the following
percpu counter allocations adds error paths after init_new_context(),
which means its associated destroy_context() needs to be called when
percpu counters fail to allocate.
Link: https://lkml.kernel.org/r/20230330133822.66271-1-mathieu.desnoyers@efficios…
Fixes: f1a7941243c1 ("mm: convert mm's rss stats into percpu_counter")
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Acked-by: Shakeel Butt <shakeelb(a)google.com>
Cc: Marek Szyprowski <m.szyprowski(a)samsung.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/fork.c | 1 +
1 file changed, 1 insertion(+)
--- a/kernel/fork.c~mm-fix-memory-leak-on-mm_init-error-handling
+++ a/kernel/fork.c
@@ -1174,6 +1174,7 @@ static struct mm_struct *mm_init(struct
fail_pcpu:
while (i > 0)
percpu_counter_destroy(&mm->rss_stat[--i]);
+ destroy_context(mm);
fail_nocontext:
mm_free_pgd(mm);
fail_nopgd:
_
Patches currently in -mm which might be from mathieu.desnoyers(a)efficios.com are
The quilt patch titled
Subject: mm/page_alloc: fix potential deadlock on zonelist_update_seq seqlock
has been removed from the -mm tree. Its filename was
mm-page_alloc-fix-potential-deadlock-on-zonelist_update_seq-seqlock.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Subject: mm/page_alloc: fix potential deadlock on zonelist_update_seq seqlock
Date: Tue, 4 Apr 2023 23:31:58 +0900
syzbot is reporting circular locking dependency which involves
zonelist_update_seq seqlock [1], for this lock is checked by memory
allocation requests which do not need to be retried.
One deadlock scenario is kmalloc(GFP_ATOMIC) from an interrupt handler.
CPU0
----
__build_all_zonelists() {
write_seqlock(&zonelist_update_seq); // makes zonelist_update_seq.seqcount odd
// e.g. timer interrupt handler runs at this moment
some_timer_func() {
kmalloc(GFP_ATOMIC) {
__alloc_pages_slowpath() {
read_seqbegin(&zonelist_update_seq) {
// spins forever because zonelist_update_seq.seqcount is odd
}
}
}
}
// e.g. timer interrupt handler finishes
write_sequnlock(&zonelist_update_seq); // makes zonelist_update_seq.seqcount even
}
This deadlock scenario can be easily eliminated by not calling
read_seqbegin(&zonelist_update_seq) from !__GFP_DIRECT_RECLAIM allocation
requests, for retry is applicable to only __GFP_DIRECT_RECLAIM allocation
requests. But Michal Hocko does not know whether we should go with this
approach.
Another deadlock scenario which syzbot is reporting is a race between
kmalloc(GFP_ATOMIC) from tty_insert_flip_string_and_push_buffer() with
port->lock held and printk() from __build_all_zonelists() with
zonelist_update_seq held.
CPU0 CPU1
---- ----
pty_write() {
tty_insert_flip_string_and_push_buffer() {
__build_all_zonelists() {
write_seqlock(&zonelist_update_seq);
build_zonelists() {
printk() {
vprintk() {
vprintk_default() {
vprintk_emit() {
console_unlock() {
console_flush_all() {
console_emit_next_record() {
con->write() = serial8250_console_write() {
spin_lock_irqsave(&port->lock, flags);
tty_insert_flip_string() {
tty_insert_flip_string_fixed_flag() {
__tty_buffer_request_room() {
tty_buffer_alloc() {
kmalloc(GFP_ATOMIC | __GFP_NOWARN) {
__alloc_pages_slowpath() {
zonelist_iter_begin() {
read_seqbegin(&zonelist_update_seq); // spins forever because zonelist_update_seq.seqcount is odd
spin_lock_irqsave(&port->lock, flags); // spins forever because port->lock is held
}
}
}
}
}
}
}
}
spin_unlock_irqrestore(&port->lock, flags);
// message is printed to console
spin_unlock_irqrestore(&port->lock, flags);
}
}
}
}
}
}
}
}
}
write_sequnlock(&zonelist_update_seq);
}
}
}
This deadlock scenario can be eliminated by
preventing interrupt context from calling kmalloc(GFP_ATOMIC)
and
preventing printk() from calling console_flush_all()
while zonelist_update_seq.seqcount is odd.
Since Petr Mladek thinks that __build_all_zonelists() can become a
candidate for deferring printk() [2], let's address this problem by
disabling local interrupts in order to avoid kmalloc(GFP_ATOMIC)
and
disabling synchronous printk() in order to avoid console_flush_all()
.
As a side effect of minimizing duration of zonelist_update_seq.seqcount
being odd by disabling synchronous printk(), latency at
read_seqbegin(&zonelist_update_seq) for both !__GFP_DIRECT_RECLAIM and
__GFP_DIRECT_RECLAIM allocation requests will be reduced. Although, from
lockdep perspective, not calling read_seqbegin(&zonelist_update_seq) (i.e.
do not record unnecessary locking dependency) from interrupt context is
still preferable, even if we don't allow calling kmalloc(GFP_ATOMIC)
inside
write_seqlock(&zonelist_update_seq)/write_sequnlock(&zonelist_update_seq)
section...
Link: https://lkml.kernel.org/r/8796b95c-3da3-5885-fddd-6ef55f30e4d3@I-love.SAKUR…
Fixes: 3d36424b3b58 ("mm/page_alloc: fix race condition between build_all_zonelists and page allocation")
Link: https://lkml.kernel.org/r/ZCrs+1cDqPWTDFNM@alley [2]
Reported-by: syzbot <syzbot+223c7461c58c58a4cb10(a)syzkaller.appspotmail.com>
Link: https://syzkaller.appspot.com/bug?extid=223c7461c58c58a4cb10 [1]
Signed-off-by: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Acked-by: Mel Gorman <mgorman(a)techsingularity.net>
Cc: Petr Mladek <pmladek(a)suse.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: Ilpo J��rvinen <ilpo.jarvinen(a)linux.intel.com>
Cc: John Ogness <john.ogness(a)linutronix.de>
Cc: Patrick Daly <quic_pdaly(a)quicinc.com>
Cc: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Cc: Steven Rostedt <rostedt(a)goodmis.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/page_alloc.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
--- a/mm/page_alloc.c~mm-page_alloc-fix-potential-deadlock-on-zonelist_update_seq-seqlock
+++ a/mm/page_alloc.c
@@ -6632,7 +6632,21 @@ static void __build_all_zonelists(void *
int nid;
int __maybe_unused cpu;
pg_data_t *self = data;
+ unsigned long flags;
+ /*
+ * Explicitly disable this CPU's interrupts before taking seqlock
+ * to prevent any IRQ handler from calling into the page allocator
+ * (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock.
+ */
+ local_irq_save(flags);
+ /*
+ * Explicitly disable this CPU's synchronous printk() before taking
+ * seqlock to prevent any printk() from trying to hold port->lock, for
+ * tty_insert_flip_string_and_push_buffer() on other CPU might be
+ * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held.
+ */
+ printk_deferred_enter();
write_seqlock(&zonelist_update_seq);
#ifdef CONFIG_NUMA
@@ -6671,6 +6685,8 @@ static void __build_all_zonelists(void *
}
write_sequnlock(&zonelist_update_seq);
+ printk_deferred_exit();
+ local_irq_restore(flags);
}
static noinline void __init
_
Patches currently in -mm which might be from penguin-kernel(a)I-love.SAKURA.ne.jp are
The quilt patch titled
Subject: kernel/sys.c: fix and improve control flow in __sys_setres[ug]id()
has been removed from the -mm tree. Its filename was
kernel-sysc-fix-and-improve-control-flow-in-__sys_setresid.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ondrej Mosnacek <omosnace(a)redhat.com>
Subject: kernel/sys.c: fix and improve control flow in __sys_setres[ug]id()
Date: Fri, 17 Feb 2023 17:21:54 +0100
Linux Security Modules (LSMs) that implement the "capable" hook will
usually emit an access denial message to the audit log whenever they
"block" the current task from using the given capability based on their
security policy.
The occurrence of a denial is used as an indication that the given task
has attempted an operation that requires the given access permission, so
the callers of functions that perform LSM permission checks must take care
to avoid calling them too early (before it is decided if the permission is
actually needed to perform the requested operation).
The __sys_setres[ug]id() functions violate this convention by first
calling ns_capable_setid() and only then checking if the operation
requires the capability or not. It means that any caller that has the
capability granted by DAC (task's capability set) but not by MAC (LSMs)
will generate a "denied" audit record, even if is doing an operation for
which the capability is not required.
Fix this by reordering the checks such that ns_capable_setid() is checked
last and -EPERM is returned immediately if it returns false.
While there, also do two small optimizations:
* move the capability check before prepare_creds() and
* bail out early in case of a no-op.
Link: https://lkml.kernel.org/r/20230217162154.837549-1-omosnace@redhat.com
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Ondrej Mosnacek <omosnace(a)redhat.com>
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/sys.c | 69 ++++++++++++++++++++++++++++---------------------
1 file changed, 40 insertions(+), 29 deletions(-)
--- a/kernel/sys.c~kernel-sysc-fix-and-improve-control-flow-in-__sys_setresid
+++ a/kernel/sys.c
@@ -664,6 +664,7 @@ long __sys_setresuid(uid_t ruid, uid_t e
struct cred *new;
int retval;
kuid_t kruid, keuid, ksuid;
+ bool ruid_new, euid_new, suid_new;
kruid = make_kuid(ns, ruid);
keuid = make_kuid(ns, euid);
@@ -678,25 +679,29 @@ long __sys_setresuid(uid_t ruid, uid_t e
if ((suid != (uid_t) -1) && !uid_valid(ksuid))
return -EINVAL;
+ old = current_cred();
+
+ /* check for no-op */
+ if ((ruid == (uid_t) -1 || uid_eq(kruid, old->uid)) &&
+ (euid == (uid_t) -1 || (uid_eq(keuid, old->euid) &&
+ uid_eq(keuid, old->fsuid))) &&
+ (suid == (uid_t) -1 || uid_eq(ksuid, old->suid)))
+ return 0;
+
+ ruid_new = ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
+ !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid);
+ euid_new = euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
+ !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid);
+ suid_new = suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
+ !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid);
+ if ((ruid_new || euid_new || suid_new) &&
+ !ns_capable_setid(old->user_ns, CAP_SETUID))
+ return -EPERM;
+
new = prepare_creds();
if (!new)
return -ENOMEM;
- old = current_cred();
-
- retval = -EPERM;
- if (!ns_capable_setid(old->user_ns, CAP_SETUID)) {
- if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
- !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
- goto error;
- if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
- !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
- goto error;
- if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
- !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
- goto error;
- }
-
if (ruid != (uid_t) -1) {
new->uid = kruid;
if (!uid_eq(kruid, old->uid)) {
@@ -761,6 +766,7 @@ long __sys_setresgid(gid_t rgid, gid_t e
struct cred *new;
int retval;
kgid_t krgid, kegid, ksgid;
+ bool rgid_new, egid_new, sgid_new;
krgid = make_kgid(ns, rgid);
kegid = make_kgid(ns, egid);
@@ -773,23 +779,28 @@ long __sys_setresgid(gid_t rgid, gid_t e
if ((sgid != (gid_t) -1) && !gid_valid(ksgid))
return -EINVAL;
+ old = current_cred();
+
+ /* check for no-op */
+ if ((rgid == (gid_t) -1 || gid_eq(krgid, old->gid)) &&
+ (egid == (gid_t) -1 || (gid_eq(kegid, old->egid) &&
+ gid_eq(kegid, old->fsgid))) &&
+ (sgid == (gid_t) -1 || gid_eq(ksgid, old->sgid)))
+ return 0;
+
+ rgid_new = rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
+ !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid);
+ egid_new = egid != (gid_t) -1 && !gid_eq(kegid, old->gid) &&
+ !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid);
+ sgid_new = sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) &&
+ !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid);
+ if ((rgid_new || egid_new || sgid_new) &&
+ !ns_capable_setid(old->user_ns, CAP_SETGID))
+ return -EPERM;
+
new = prepare_creds();
if (!new)
return -ENOMEM;
- old = current_cred();
-
- retval = -EPERM;
- if (!ns_capable_setid(old->user_ns, CAP_SETGID)) {
- if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
- !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
- goto error;
- if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) &&
- !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid))
- goto error;
- if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) &&
- !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid))
- goto error;
- }
if (rgid != (gid_t) -1)
new->gid = krgid;
_
Patches currently in -mm which might be from omosnace(a)redhat.com are
On Tue, Aug 23, 2022 at 07:20:14AM -0500, Bjorn Helgaas wrote:
> On Tue, Aug 23, 2022, 6:35 AM Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
> wrote:
>
> > From: Stefan Roese <sr(a)denx.de>
> >
> > [ Upstream commit 8795e182b02dc87e343c79e73af6b8b7f9c5e635 ]
> >
>
> There's an open regression related to this commit:
>
> https://bugzilla.kernel.org/show_bug.cgi?id=216373
This is already in the following released stable kernels:
5.10.137 5.15.61 5.18.18 5.19.2
I'll go drop it from the 4.19 and 5.4 queues, but when this gets
resolved in Linus's tree, make sure there's a cc: stable on the fix so
that we know to backport it to the above branches as well. Or at the
least, a "Fixes:" tag.
thanks,
greg k-h
The patch titled
Subject: mm/shmem: Fix race in shmem_undo_range w/THP
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-shmem-fix-race-in-shmem_undo_range-w-thp.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: David Stevens <stevensd(a)chromium.org>
Subject: mm/shmem: Fix race in shmem_undo_range w/THP
Date: Tue, 18 Apr 2023 17:40:31 +0900
Split folios during the second loop of shmem_undo_range. It's not
sufficient to only split folios when dealing with partial pages, since
it's possible for a THP to be faulted in after that point. Calling
truncate_inode_folio in that situation can result in throwing away data
outside of the range being targeted.
Link: https://lkml.kernel.org/r/20230418084031.3439795-1-stevensd@google.com
Fixes: b9a8a4195c7d ("truncate,shmem: Handle truncates that split large folios")
Signed-off-by: David Stevens <stevensd(a)chromium.org>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Suleiman Souhlal <suleiman(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/shmem.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
--- a/mm/shmem.c~mm-shmem-fix-race-in-shmem_undo_range-w-thp
+++ a/mm/shmem.c
@@ -1022,7 +1022,22 @@ whole_folios:
}
VM_BUG_ON_FOLIO(folio_test_writeback(folio),
folio);
- truncate_inode_folio(mapping, folio);
+
+ if (!folio_test_large(folio)) {
+ truncate_inode_folio(mapping, folio);
+ } else if (truncate_inode_partial_folio(folio, lstart, lend)) {
+ /*
+ * If we split a page, reset the loop so that we
+ * pick up the new sub pages. Otherwise the THP
+ * was entirely dropped or the target range was
+ * zeroed, so just continue the loop as is.
+ */
+ if (!folio_test_large(folio)) {
+ folio_unlock(folio);
+ index = start;
+ break;
+ }
+ }
}
folio_unlock(folio);
}
_
Patches currently in -mm which might be from stevensd(a)chromium.org are
mm-shmem-fix-race-in-shmem_undo_range-w-thp.patch
mm-khugepaged-drain-lru-after-swapping-in-shmem.patch
mm-khugepaged-refactor-collapse_file-control-flow.patch
mm-khugepaged-skip-shmem-with-userfaultfd.patch
mm-khugepaged-maintain-page-cache-uptodate-flag.patch
The patch titled
Subject: kasan: hw_tags: avoid invalid virt_to_page()
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
kasan-hw_tags-avoid-invalid-virt_to_page.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Mark Rutland <mark.rutland(a)arm.com>
Subject: kasan: hw_tags: avoid invalid virt_to_page()
Date: Tue, 18 Apr 2023 17:42:12 +0100
When booting with 'kasan.vmalloc=off', a kernel configured with support
for KASAN_HW_TAGS will explode at boot time due to bogus use of
virt_to_page() on a vmalloc adddress. With CONFIG_DEBUG_VIRTUAL selected
this will be reported explicitly, and with or without CONFIG_DEBUG_VIRTUAL
the kernel will dereference a bogus address:
| ------------[ cut here ]------------
| virt_to_phys used for non-linear address: (____ptrval____) (0xffff800008000000)
| WARNING: CPU: 0 PID: 0 at arch/arm64/mm/physaddr.c:15 __virt_to_phys+0x78/0x80
| Modules linked in:
| CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.3.0-rc3-00073-g83865133300d-dirty #4
| Hardware name: linux,dummy-virt (DT)
| pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : __virt_to_phys+0x78/0x80
| lr : __virt_to_phys+0x78/0x80
| sp : ffffcd076afd3c80
| x29: ffffcd076afd3c80 x28: 0068000000000f07 x27: ffff800008000000
| x26: fffffbfff0000000 x25: fffffbffff000000 x24: ff00000000000000
| x23: ffffcd076ad3c000 x22: fffffc0000000000 x21: ffff800008000000
| x20: ffff800008004000 x19: ffff800008000000 x18: ffff800008004000
| x17: 666678302820295f x16: ffffffffffffffff x15: 0000000000000004
| x14: ffffcd076b009e88 x13: 0000000000000fff x12: 0000000000000003
| x11: 00000000ffffefff x10: c0000000ffffefff x9 : 0000000000000000
| x8 : 0000000000000000 x7 : 205d303030303030 x6 : 302e30202020205b
| x5 : ffffcd076b41d63f x4 : ffffcd076afd3827 x3 : 0000000000000000
| x2 : 0000000000000000 x1 : ffffcd076afd3a30 x0 : 000000000000004f
| Call trace:
| __virt_to_phys+0x78/0x80
| __kasan_unpoison_vmalloc+0xd4/0x478
| __vmalloc_node_range+0x77c/0x7b8
| __vmalloc_node+0x54/0x64
| init_IRQ+0x94/0xc8
| start_kernel+0x194/0x420
| __primary_switched+0xbc/0xc4
| ---[ end trace 0000000000000000 ]---
| Unable to handle kernel paging request at virtual address 03fffacbe27b8000
| Mem abort info:
| ESR = 0x0000000096000004
| EC = 0x25: DABT (current EL), IL = 32 bits
| SET = 0, FnV = 0
| EA = 0, S1PTW = 0
| FSC = 0x04: level 0 translation fault
| Data abort info:
| ISV = 0, ISS = 0x00000004
| CM = 0, WnR = 0
| swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000041bc5000
| [03fffacbe27b8000] pgd=0000000000000000, p4d=0000000000000000
| Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP
| Modules linked in:
| CPU: 0 PID: 0 Comm: swapper/0 Tainted: G W 6.3.0-rc3-00073-g83865133300d-dirty #4
| Hardware name: linux,dummy-virt (DT)
| pstate: 200000c5 (nzCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : __kasan_unpoison_vmalloc+0xe4/0x478
| lr : __kasan_unpoison_vmalloc+0xd4/0x478
| sp : ffffcd076afd3ca0
| x29: ffffcd076afd3ca0 x28: 0068000000000f07 x27: ffff800008000000
| x26: 0000000000000000 x25: 03fffacbe27b8000 x24: ff00000000000000
| x23: ffffcd076ad3c000 x22: fffffc0000000000 x21: ffff800008000000
| x20: ffff800008004000 x19: ffff800008000000 x18: ffff800008004000
| x17: 666678302820295f x16: ffffffffffffffff x15: 0000000000000004
| x14: ffffcd076b009e88 x13: 0000000000000fff x12: 0000000000000001
| x11: 0000800008000000 x10: ffff800008000000 x9 : ffffb2f8dee00000
| x8 : 000ffffb2f8dee00 x7 : 205d303030303030 x6 : 302e30202020205b
| x5 : ffffcd076b41d63f x4 : ffffcd076afd3827 x3 : 0000000000000000
| x2 : 0000000000000000 x1 : ffffcd076afd3a30 x0 : ffffb2f8dee00000
| Call trace:
| __kasan_unpoison_vmalloc+0xe4/0x478
| __vmalloc_node_range+0x77c/0x7b8
| __vmalloc_node+0x54/0x64
| init_IRQ+0x94/0xc8
| start_kernel+0x194/0x420
| __primary_switched+0xbc/0xc4
| Code: d34cfc08 aa1f03fa 8b081b39 d503201f (f9400328)
| ---[ end trace 0000000000000000 ]---
| Kernel panic - not syncing: Attempted to kill the idle task!
This is because init_vmalloc_pages() erroneously calls virt_to_page() on
a vmalloc address, while virt_to_page() is only valid for addresses in
the linear/direct map. Since init_vmalloc_pages() expects virtual
addresses in the vmalloc range, it must use vmalloc_to_page() rather
than virt_to_page().
We call init_vmalloc_pages() from __kasan_unpoison_vmalloc(), where we
check !is_vmalloc_or_module_addr(), suggesting that we might encounter a
non-vmalloc address. Luckily, this never happens. By design, we only
call __kasan_unpoison_vmalloc() on pointers in the vmalloc area, and I
have verified that we don't violate that expectation. Given that,
is_vmalloc_or_module_addr() must always be true for any legitimate
argument to __kasan_unpoison_vmalloc().
Correct init_vmalloc_pages() to use vmalloc_to_page(), and remove the
redundant and misleading use of is_vmalloc_or_module_addr() in
__kasan_unpoison_vmalloc().
Link: https://lkml.kernel.org/r/20230418164212.1775741-1-mark.rutland@arm.com
Fixes: 6c2f761dad7851d8 ("kasan: fix zeroing vmalloc memory with HW_TAGS")
Signed-off-by: Mark Rutland <mark.rutland(a)arm.com>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Andrey Konovalov <andreyknvl(a)google.com>
Cc: Andrey Ryabinin <ryabinin.a.a(a)gmail.com>
Cc: Dmitry Vyukov <dvyukov(a)google.com>
Cc: Marco Elver <elver(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/kasan/hw_tags.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- a/mm/kasan/hw_tags.c~kasan-hw_tags-avoid-invalid-virt_to_page
+++ a/mm/kasan/hw_tags.c
@@ -285,7 +285,7 @@ static void init_vmalloc_pages(const voi
const void *addr;
for (addr = start; addr < start + size; addr += PAGE_SIZE) {
- struct page *page = virt_to_page(addr);
+ struct page *page = vmalloc_to_page(addr);
clear_highpage_kasan_tagged(page);
}
@@ -297,7 +297,7 @@ void *__kasan_unpoison_vmalloc(const voi
u8 tag;
unsigned long redzone_start, redzone_size;
- if (!kasan_vmalloc_enabled() || !is_vmalloc_or_module_addr(start)) {
+ if (!kasan_vmalloc_enabled()) {
if (flags & KASAN_VMALLOC_INIT)
init_vmalloc_pages(start, size);
return (void *)start;
_
Patches currently in -mm which might be from mark.rutland(a)arm.com are
kasan-hw_tags-avoid-invalid-virt_to_page.patch
Hi all,
This series is a backport of upstream commit e89c2e815e76 ("riscv:
Handle zicsr/zifencei issues between clang and binutils") to
linux-5.10.y, with the necessary machinery for CONFIG_AS_IS_GNU and
CONFIG_AS_VERSION, which that commit requires.
While the middle two patches are not strictly necessary, they are good
clean ups that ensure consistency with mainline. The first three changes
are already present in 5.15, so there is no risk of a regression moving
forward.
If there are any issues, please let me know.
NOTE: I am sending this series with 'b4 send', as that is what I am used
to at this point. Please accept my apologies if this causes any issues.
---
Masahiro Yamada (2):
kbuild: check the minimum assembler version in Kconfig
kbuild: check CONFIG_AS_IS_LLVM instead of LLVM_IAS
Nathan Chancellor (2):
kbuild: Switch to 'f' variants of integrated assembler flag
riscv: Handle zicsr/zifencei issues between clang and binutils
Makefile | 8 +++---
arch/riscv/Kconfig | 22 ++++++++++++++++
arch/riscv/Makefile | 12 +++++----
init/Kconfig | 12 +++++++++
scripts/Kconfig.include | 6 +++++
scripts/as-version.sh | 69 +++++++++++++++++++++++++++++++++++++++++++++++++
scripts/dummy-tools/gcc | 6 +++++
7 files changed, 127 insertions(+), 8 deletions(-)
---
base-commit: ca9787bdecfa2174b0a169a54916e22b89b0ef5b
change-id: 20230328-riscv-zifencei-zicsr-5-10-65596f2cac9e
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
On Mon, Apr 17, 2023 at 09:27:22PM -0400, Sasha Levin wrote:
> This is a note to let you know that I've just added the patch titled
>
> purgatory: fix disabling debug info
>
> to the 5.15-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
> The filename of the patch is:
> purgatory-fix-disabling-debug-info.patch
> and it can be found in the queue-5.15 subdirectory.
>
> If you, or anyone else, feels it should not be added to the stable tree,
> please let <stable(a)vger.kernel.org> know about it.
There's no need for this patch on 5.15, as the regression it fixes was
only introduced in 6.0. It won't do any harm though — is it considered
good practice to keep the code in sync between stable kernels to make
backports of other patches easier? If so, it would make sense to
backport after all.
> commit 618ea690941689fe28fa9c150f90bb096db5f8a5
> Author: Alyssa Ross <hi(a)alyssa.is>
> Date: Sun Mar 26 18:21:21 2023 +0000
>
> purgatory: fix disabling debug info
>
> [ Upstream commit d83806c4c0cccc0d6d3c3581a11983a9c186a138 ]
>
> Since 32ef9e5054ec, -Wa,-gdwarf-2 is no longer used in KBUILD_AFLAGS.
> Instead, it includes -g, the appropriate -gdwarf-* flag, and also the
> -Wa versions of both of those if building with Clang and GNU as. As a
> result, debug info was being generated for the purgatory objects, even
> though the intention was that it not be.
>
> Fixes: 32ef9e5054ec ("Makefile.debug: re-enable debug info for .S files")
> Signed-off-by: Alyssa Ross <hi(a)alyssa.is>
> Cc: stable(a)vger.kernel.org
> Acked-by: Nick Desaulniers <ndesaulniers(a)google.com>
> Signed-off-by: Masahiro Yamada <masahiroy(a)kernel.org>
> Signed-off-by: Sasha Levin <sashal(a)kernel.org>
>
> diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
> index 95ea17a9d20cb..1d6ccd4214d5a 100644
> --- a/arch/x86/purgatory/Makefile
> +++ b/arch/x86/purgatory/Makefile
> @@ -64,8 +64,7 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
> CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
> CFLAGS_string.o += $(PURGATORY_CFLAGS)
>
> -AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2
> -AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2
> +asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
>
> $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
> $(call if_changed,ld)
When we try to unshare a pinned page for a private hugetlb, uffd-wp bit can
get lost during unsharing. Fix it by carrying it over.
This should be very rare, only if an unsharing happened on a private
hugetlb page with uffd-wp protected (e.g. in a child which shares the same
page with parent with UFFD_FEATURE_EVENT_FORK enabled).
Cc: linux-stable <stable(a)vger.kernel.org>
Fixes: 166f3ecc0daf ("mm/hugetlb: hook page faults for uffd write protection")
Reported-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Signed-off-by: Peter Xu <peterx(a)redhat.com>
---
mm/hugetlb.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0213efaf31be..cd3a9d8f4b70 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5637,13 +5637,16 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
spin_lock(ptl);
ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
+ pte_t newpte = make_huge_pte(vma, &new_folio->page, !unshare);
+
/* Break COW or unshare */
huge_ptep_clear_flush(vma, haddr, ptep);
mmu_notifier_invalidate_range(mm, range.start, range.end);
page_remove_rmap(old_page, vma, true);
hugepage_add_new_anon_rmap(new_folio, vma, haddr);
- set_huge_pte_at(mm, haddr, ptep,
- make_huge_pte(vma, &new_folio->page, !unshare));
+ if (huge_pte_uffd_wp(pte))
+ newpte = huge_pte_mkuffd_wp(newpte);
+ set_huge_pte_at(mm, haddr, ptep, newpte);
folio_set_hugetlb_migratable(new_folio);
/* Make the old page be freed below */
new_folio = page_folio(old_page);
--
2.39.1
This is a resend of
https://lore.kernel.org/stable/20230308162207.2886641-1-qyousef@layalina.io/
Which was dropped because of build errors on 5.10 equivalent backport.
I extended the testing to make sure this series is not impacted like 5.10
backport. And update the cover letter to clarify there's no need to take
further backports which removes capacity inversion detection.
Portion of the fixes were ported in 5.15 but missed some.
This ports the remainder of the fixes.
Based on 5.15.98.
a2e90611b9f4 ("sched/fair: Remove capacity inversion detection") is not
necessary to backport because it has a dependency on e5ed0550c04c ("sched/fair:
unlink misfit task from cpu overutilized") which is nice to have but not
strictly required. It improves the search for best CPU under adverse thermal
pressure to try harder. And the new search effectively replaces the capacity
inversion detection, so it is removed afterwards.
Build tested on (cross compile when necessary; x86_64 otherwise):
1. default ubuntu config which has uclamp + smp
2. default ubuntu config without uclamp + smp
3. default ubunto config without smp (which automatically disables
uclamp)
4. reported riscv-allnoconfig, mips-randconfig, x86_64-randocnfigs
Boot tested on android 5.15 GKI with slight modifications due to other
conflicts there. I need more time to be able to do full functional testing on
5.15 - but since some patches were already taken - posting the remainder now.
Sorry due to job/email change I missed the emails when the other backports were
partially taken.
Qais Yousef (7):
sched/uclamp: Fix fits_capacity() check in feec()
sched/uclamp: Make cpu_overutilized() use util_fits_cpu()
sched/uclamp: Cater for uclamp in find_energy_efficient_cpu()'s early
exit condition
sched/fair: Detect capacity inversion
sched/fair: Consider capacity inversion in util_fits_cpu()
sched/uclamp: Fix a uninitialized variable warnings
sched/fair: Fixes for capacity inversion detection
kernel/sched/core.c | 10 ++--
kernel/sched/fair.c | 128 +++++++++++++++++++++++++++++++++++++------
kernel/sched/sched.h | 61 ++++++++++++++++++++-
3 files changed, 174 insertions(+), 25 deletions(-)
--
2.25.1
Portion of the fixes were ported to 6.1 but 3 were missed.
This ports the remainder of the fixes.
Based on 6.1.24.
a2e90611b9f4 ("sched/fair: Remove capacity inversion detection") is not
necessary to backport because it has a dependency on e5ed0550c04c ("sched/fair:
unlink misfit task from cpu overutilized") which is nice to have but not
strictly required. It improves the search for best CPU under adverse thermal
pressure to try harder. And the new search effectively replaces the capacity
inversion detection, so it is removed afterwards.
Build tested on (cross compile when necessary; x86_64 otherwise):
1. default ubuntu config which has uclamp + smp
2. default ubuntu config without uclamp + smp
3. default ubunto config without smp (which automatically disables
uclamp)
4. reported riscv-allnoconfig, mips-randconfig, x86_64-randocnfigs
Boot tested on x86 qemu environment only.
Qais Yousef (3):
sched/fair: Detect capacity inversion
sched/fair: Consider capacity inversion in util_fits_cpu()
sched/fair: Fixes for capacity inversion detection
kernel/sched/fair.c | 86 +++++++++++++++++++++++++++++++++++++++-----
kernel/sched/sched.h | 19 ++++++++++
2 files changed, 97 insertions(+), 8 deletions(-)
--
2.25.1
This is a resend of
https://lore.kernel.org/stable/20230308162207.2886641-1-qyousef@layalina.io/
Which was dropped because of build errors on 5.10 equivalent backport.
I extended the testing to make sure this series is not impacted like 5.10
backport. And update the cover letter to clarify there's no need to take
further backports which removes capacity inversion detection.
Portion of the fixes were ported in 5.15 but missed some.
This ports the remainder of the fixes.
Based on 5.15.98.
a2e90611b9f4 ("sched/fair: Remove capacity inversion detection") is not
necessary to backport because it has a dependency on e5ed0550c04c ("sched/fair:
unlink misfit task from cpu overutilized") which is nice to have but not
strictly required. It improves the search for best CPU under adverse thermal
pressure to try harder. And the new search effectively replaces the capacity
inversion detection, so it is removed afterwards.
Build tested on (cross compile when necessary; x86_64 otherwise):
1. default ubuntu config which has uclamp + smp
2. default ubuntu config without uclamp + smp
3. default ubunto config without smp (which automatically disables
uclamp)
4. reported riscv-allnoconfig, mips-randconfig, x86_64-randocnfigs
Boot tested on android 5.15 GKI with slight modifications due to other
conflicts there. I need more time to be able to do full functional testing on
5.15 - but since some patches were already taken - posting the remainder now.
Sorry due to job/email change I missed the emails when the other backports were
partially taken.
Qais Yousef (7):
sched/uclamp: Fix fits_capacity() check in feec()
sched/uclamp: Make cpu_overutilized() use util_fits_cpu()
sched/uclamp: Cater for uclamp in find_energy_efficient_cpu()'s early
exit condition
sched/fair: Detect capacity inversion
sched/fair: Consider capacity inversion in util_fits_cpu()
sched/uclamp: Fix a uninitialized variable warnings
sched/fair: Fixes for capacity inversion detection
kernel/sched/core.c | 10 ++--
kernel/sched/fair.c | 128 +++++++++++++++++++++++++++++++++++++------
kernel/sched/sched.h | 61 ++++++++++++++++++++-
3 files changed, 174 insertions(+), 25 deletions(-)
--
2.25.1
On Sun, Apr 16, 2023 at 03:22:18PM +0800, cuigaosheng wrote:
> On 2023/4/15 23:07, Greg KH wrote:
> > On Sat, Apr 15, 2023 at 06:11:55PM +0800, Gaosheng Cui wrote:
> > > From: Herbert Xu <herbert(a)gondor.apana.org.au>
> > >
> > > When complex algorithms that depend on other algorithms are built
> > > into the kernel, the order of registration must be done such that
> > > the underlying algorithms are ready before the ones on top are
> > > registered. As otherwise they would fail during the self-test
> > > which is required during registration.
> > >
> > > In the past we have used subsystem initialisation ordering to
> > > guarantee this. The number of such precedence levels are limited
> > > and they may cause ripple effects in other subsystems.
> > >
> > > This patch solves this problem by delaying all self-tests during
> > > boot-up for built-in algorithms. They will be tested either when
> > > something else in the kernel requests for them, or when we have
> > > finished registering all built-in algorithms, whichever comes
> > > earlier.
> > >
> > > Reported-by: Vladis Dronov <vdronov(a)redhat.com>
> > > Signed-off-by: Herbert Xu <herbert(a)gondor.apana.org.au>
> > > Signed-off-by: Gaosheng Cui <cuigaosheng1(a)huawei.com>
> > > ---
> > > crypto/algapi.c | 73 +++++++++++++++++++++++++++++++++--------------
> > > crypto/api.c | 52 +++++++++++++++++++++++++++++----
> > > crypto/internal.h | 10 +++++++
> > > 3 files changed, 108 insertions(+), 27 deletions(-)
> > What is the git commit id of this, and the other 3 patches, in Linus's
> > tree? That is required to have here, as you know.
> >
> > thanks,
> >
> > greg k-h
> > .
>
> Thanks for taking time to review these patch.
>
> These patches are in Linus's tree, reference as follows:
> Reference 1: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
> Reference 2: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
> Reference 3: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
> Reference 4: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
Please resend the patches with the git commit id in the changelog
somewhere, as is normally done (there are thousands of examples on the
mailing list.)
Also be sure that you are also backporting the patches to newer kernel
releases so that someone does not upgrade and have a regression (i.e. if
a patch is also needed in 5.15.y send a backport for that too.)
Thanks,
greg k-h
Changes in v2:
* Fix compilation error against patch 7 due to misiplace #endif to
protect against CONFIG_SMP which doesn't contain the newly added
field to struct rq.
Commit 2ff401441711 ("sched/uclamp: Fix relationship between uclamp and
migration margin") was cherry-picked into 5.10 kernels but missed the rest of
the series.
This ports the remainder of the fixes.
Based on 5.10.172.
NOTE:
a2e90611b9f4 ("sched/fair: Remove capacity inversion detection") is not
necessary to backport because it has a dependency on e5ed0550c04c ("sched/fair:
unlink misfit task from cpu overutilized") which is nice to have but not
strictly required. It improves the search for best CPU under adverse thermal
pressure to try harder. And the new search effectively replaces the capacity
inversion detection, so it is removed afterwards.
Build tested on (cross compile when necessary; x86_64 otherwise):
1. default ubuntu config which has uclamp + smp
2. default ubuntu config without uclamp + smp
3. default ubunto config without smp (which automatically disables
uclamp)
4. reported riscv-allnoconfig, mips-randconfig, x86_64-randocnfigs
Tested on 5.10 Android GKI kernel and android device (with slight modifications
due to other conflicts on there).
Qais Yousef (10):
sched/uclamp: Make task_fits_capacity() use util_fits_cpu()
sched/uclamp: Fix fits_capacity() check in feec()
sched/uclamp: Make select_idle_capacity() use util_fits_cpu()
sched/uclamp: Make asym_fits_capacity() use util_fits_cpu()
sched/uclamp: Make cpu_overutilized() use util_fits_cpu()
sched/uclamp: Cater for uclamp in find_energy_efficient_cpu()'s early
exit condition
sched/fair: Detect capacity inversion
sched/fair: Consider capacity inversion in util_fits_cpu()
sched/uclamp: Fix a uninitialized variable warnings
sched/fair: Fixes for capacity inversion detection
kernel/sched/core.c | 10 +--
kernel/sched/fair.c | 183 ++++++++++++++++++++++++++++++++++---------
kernel/sched/sched.h | 70 ++++++++++++++++-
3 files changed, 217 insertions(+), 46 deletions(-)
--
2.25.1
Hi Greg,
This 5.4.y backport series contains XFS fixes from v5.11 & v5.12. The
patchset has been acked by Darrick.
As a side note, where applicable, patches have been cherry picked from
5.10.y rather than from v5.11/v5.12.
Brian Foster (2):
xfs: consider shutdown in bmapbt cursor delete assert
xfs: don't reuse busy extents on extent trim
Christoph Hellwig (10):
xfs: merge the projid fields in struct xfs_icdinode
xfs: ensure that the inode uid/gid match values match the icdinode
ones
xfs: remove the icdinode di_uid/di_gid members
xfs: remove the kuid/kgid conversion wrappers
xfs: add a new xfs_sb_version_has_v3inode helper
xfs: only check the superblock version for dinode size calculation
xfs: simplify di_flags2 inheritance in xfs_ialloc
xfs: simplify a check in xfs_ioctl_setattr_check_cowextsize
xfs: remove the di_version field from struct icdinode
xfs: fix up non-directory creation in SGID directories
Darrick J. Wong (3):
xfs: report corruption only as a regular error
xfs: shut down the filesystem if we screw up quota reservation
xfs: force log and push AIL to clear pinned inodes when aborting mount
Jeffrey Mitchell (1):
xfs: set inode size after creating symlink
Kaixu Xia (1):
xfs: show the proper user quota options
fs/xfs/libxfs/xfs_attr_leaf.c | 5 +-
fs/xfs/libxfs/xfs_bmap.c | 10 ++--
fs/xfs/libxfs/xfs_btree.c | 30 +++++-------
fs/xfs/libxfs/xfs_format.h | 33 ++++++++++---
fs/xfs/libxfs/xfs_ialloc.c | 6 +--
fs/xfs/libxfs/xfs_inode_buf.c | 54 +++++++-------------
fs/xfs/libxfs/xfs_inode_buf.h | 8 +--
fs/xfs/libxfs/xfs_inode_fork.c | 2 +-
fs/xfs/libxfs/xfs_inode_fork.h | 9 +---
fs/xfs/libxfs/xfs_log_format.h | 10 ++--
fs/xfs/libxfs/xfs_trans_resv.c | 2 +-
fs/xfs/xfs_acl.c | 12 +++--
fs/xfs/xfs_bmap_util.c | 16 +++---
fs/xfs/xfs_buf_item.c | 2 +-
fs/xfs/xfs_dquot.c | 6 +--
fs/xfs/xfs_error.c | 2 +-
fs/xfs/xfs_extent_busy.c | 14 ------
fs/xfs/xfs_icache.c | 8 ++-
fs/xfs/xfs_inode.c | 61 ++++++++---------------
fs/xfs/xfs_inode.h | 21 +-------
fs/xfs/xfs_inode_item.c | 20 ++++----
fs/xfs/xfs_ioctl.c | 22 ++++-----
fs/xfs/xfs_iops.c | 11 +----
fs/xfs/xfs_itable.c | 8 +--
fs/xfs/xfs_linux.h | 32 +++---------
fs/xfs/xfs_log_recover.c | 6 +--
fs/xfs/xfs_mount.c | 90 +++++++++++++++++-----------------
fs/xfs/xfs_qm.c | 43 +++++++++-------
fs/xfs/xfs_qm_bhv.c | 2 +-
fs/xfs/xfs_quota.h | 4 +-
fs/xfs/xfs_super.c | 10 ++--
fs/xfs/xfs_symlink.c | 7 ++-
fs/xfs/xfs_trans_dquot.c | 16 ++++--
33 files changed, 248 insertions(+), 334 deletions(-)
--
2.39.1
Apparently despite it being marked inline, the compiler
may not inline __down_read_common() which makes it difficult
to identify the cause of lock contention, as the blocked
function will always be listed as __down_read_common().
So this patch adds __sched annotation to the function so
the calling function will instead be listed.
Cc: Minchan Kim <minchan(a)kernel.org>
Cc: Tim Murray <timmurray(a)google.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Will Deacon <will(a)kernel.org>
Cc: Waiman Long <longman(a)redhat.com>
Cc: Boqun Feng <boqun.feng(a)gmail.com>
Cc: kernel-team(a)android.com
Cc: stable(a)vger.kernel.org
Fixes: c995e638ccbb ("locking/rwsem: Fold __down_{read,write}*()")
Reported-by: Tim Murray <timmurray(a)google.com>
Signed-off-by: John Stultz <jstultz(a)google.com>
---
kernel/locking/rwsem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index acb5a50309a1..cde2f22e65a8 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1240,7 +1240,7 @@ static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
/*
* lock for reading
*/
-static inline int __down_read_common(struct rw_semaphore *sem, int state)
+static inline __sched int __down_read_common(struct rw_semaphore *sem, int state)
{
int ret = 0;
long count;
--
2.40.0.577.gac1e443424-goog
Hi!
The Tegra20 requires an enabled VDE power domain during startup. As the
VDE is currently not used, it is disabled during runtime.
Since 8f0c714ad9be, there is a workaround for the "normal restart path"
which enables the VDE before doing PMC's warm reboot. This workaround is
not executed in the "emergency restart path", leading to a hang-up
during start.
This series implements and registers a new pmic-based restart handler
for boards with tps6586x. This cold reboot ensures that the VDE power
domain is enabled during startup on tegra20-based boards.
Since bae1d3a05a8b, i2c transfers are non-atomic while preemption is
disabled (which is e.g. done during panic()). This could lead to
warnings ("Voluntary context switch within RCU") in i2c-based restart
handlers during emergency restart. The state of preemption should be
detected by i2c_in_atomic_xfer_mode() to use atomic i2c xfer when
required. Beside the new system_state check, the check is the same as
the one pre v5.2.
v4: https://lore.kernel.org/r/20230327-tegra-pmic-reboot-v4-0-b24af219fb47@skid…
v3: https://lore.kernel.org/r/20230327-tegra-pmic-reboot-v3-0-3c0ee3567e14@skid…
v2: https://lore.kernel.org/all/20230320220345.1463687-1-bbara93@gmail.com/
system_state: https://lore.kernel.org/all/20230320213230.1459532-1-bbara93@gmail.com/
v1: https://lore.kernel.org/all/20230316164703.1157813-1-bbara93@gmail.com/
v5:
- introduce new 3 & 4, therefore 3 -> 5, 4 -> 6
- 3: provide dev to sys_off handler, if it is known
- 4: return NOTIFY_DONE from sys_off_notify, to avoid skipping
- 5: drop Reviewed-by from Dmitry, add poweroff timeout
- 5,6: return notifier value instead of direct errno from handler
- 5,6: use new dev field instead of passing dev as cb_data
- 5,6: increase timeout values based on error observations
- 6: skip unsupported reboot modes in restart handler
v4:
- 1,2: add "Fixes" and adapt commit messages
- 4: reduce delay after requesting the restart (as suggested by Dmitry)
v3:
- bring system_state back in this series
- do atomic i2c xfer if not preemptible (as suggested by Dmitry)
- fix style issues mentioned by Dmitry
- add cc stable as suggested by Dmitry
- add explanation why this is needed for Jon
v2:
- use devm-based restart handler
- convert the existing power_off handler to a devm-based handler
- handle system_state in extra series
---
Benjamin Bara (6):
kernel/reboot: emergency_restart: set correct system_state
i2c: core: run atomic i2c xfer when !preemptible
kernel/reboot: add device to sys_off_handler
kernel/reboot: sys_off_notify: always return NOTIFY_DONE
mfd: tps6586x: use devm-based power off handler
mfd: tps6586x: register restart handler
drivers/i2c/i2c-core.h | 2 +-
drivers/mfd/tps6586x.c | 55 ++++++++++++++++++++++++++++++++++++++++++--------
include/linux/reboot.h | 3 +++
kernel/reboot.c | 11 +++++++++-
4 files changed, 61 insertions(+), 10 deletions(-)
---
base-commit: 197b6b60ae7bc51dd0814953c562833143b292aa
change-id: 20230327-tegra-pmic-reboot-4175ff814a4b
Best regards,
--
Benjamin Bara <benjamin.bara(a)skidata.com>
This is for pre-6.4 kernels, as scrub code goes through a huge rework.
[BUG]
Even before the scrub rework, if we have some corrupted metadata failed
to be repaired during replace, we still continue replace and let it
finish just as there is nothing wrong:
BTRFS info (device dm-4): dev_replace from /dev/mapper/test-scratch1 (devid 1) to /dev/mapper/test-scratch2 started
BTRFS warning (device dm-4): tree block 5578752 mirror 1 has bad csum, has 0x00000000 want 0xade80ca1
BTRFS warning (device dm-4): tree block 5578752 mirror 0 has bad csum, has 0x00000000 want 0xade80ca1
BTRFS warning (device dm-4): checksum error at logical 5578752 on dev /dev/mapper/test-scratch1, physical 5578752: metadata leaf (level 0) in tree 5
BTRFS warning (device dm-4): checksum error at logical 5578752 on dev /dev/mapper/test-scratch1, physical 5578752: metadata leaf (level 0) in tree 5
BTRFS error (device dm-4): bdev /dev/mapper/test-scratch1 errs: wr 0, rd 0, flush 0, corrupt 1, gen 0
BTRFS warning (device dm-4): tree block 5578752 mirror 1 has bad bytenr, has 0 want 5578752
BTRFS error (device dm-4): unable to fixup (regular) error at logical 5578752 on dev /dev/mapper/test-scratch1
BTRFS info (device dm-4): dev_replace from /dev/mapper/test-scratch1 (devid 1) to /dev/mapper/test-scratch2 finished
This can lead to unexpected problems for the result fs.
[CAUSE]
Btrfs reuses scrub code path for dev-replace to iterate all dev extents.
But unlike scrub, dev-replace doesn't really bother to check the scrub
progress, which records all the errors found during replace.
And even if we checks the progress, we can not really determine which
errors are minor, which are critical just by the plain numbers.
(remember we don't treat metadata/data checksum error differently).
This behavior is there from the very beginning.
[FIX]
Instead of continue the replace, just error out if we hit an unrepaired
metadata sector.
Now the dev-replace would be rejected with -EIO, to inform the user.
Although it also means, the fs has some metadata error which can not be
repaired, the user would be super upset anyway.
The new dmesg would look like this:
BTRFS info (device dm-4): dev_replace from /dev/mapper/test-scratch1 (devid 1) to /dev/mapper/test-scratch2 started
BTRFS warning (device dm-4): tree block 5578752 mirror 1 has bad csum, has 0x00000000 want 0xade80ca1
BTRFS warning (device dm-4): tree block 5578752 mirror 1 has bad csum, has 0x00000000 want 0xade80ca1
BTRFS error (device dm-4): unable to fixup (regular) error at logical 5570560 on dev /dev/mapper/test-scratch1 physical 5570560
BTRFS warning (device dm-4): header error at logical 5570560 on dev /dev/mapper/test-scratch1, physical 5570560: metadata leaf (level 0) in tree 5
BTRFS warning (device dm-4): header error at logical 5570560 on dev /dev/mapper/test-scratch1, physical 5570560: metadata leaf (level 0) in tree 5
BTRFS error (device dm-4): stripe 5570560 has unrepaired metadata sector at 5578752
BTRFS error (device dm-4): btrfs_scrub_dev(/dev/mapper/test-scratch1, 1, /dev/mapper/test-scratch2) failed -5
CC: stable(a)vger.kernel.org
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
I'm not sure how should we merge this patch.
The misc-next is already merging the new scrub code, but the problem is
there for all old kernels thus we need such fixes.
Maybe we can merge this fix before the scrub rework, then the rework,
and finally the better fix using reworked interface?
---
fs/btrfs/scrub.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index ef4046a2572c..71f64b9bcd9f 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -195,6 +195,7 @@ struct scrub_ctx {
struct mutex wr_lock;
struct btrfs_device *wr_tgtdev;
bool flush_all_writes;
+ bool has_meta_failed;
/*
* statistics
@@ -1380,6 +1381,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
btrfs_err_rl_in_rcu(fs_info,
"unable to fixup (regular) error at logical %llu on dev %s",
logical, btrfs_dev_name(dev));
+ if (is_metadata)
+ sctx->has_meta_failed = true;
}
out:
@@ -3838,6 +3841,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
blk_finish_plug(&plug);
+ /*
+ * If we have metadata unable to be repaired, we should error
+ * out the dev-replace.
+ */
+ if (sctx->is_dev_replace && sctx->has_meta_failed && ret >= 0)
+ ret = -EIO;
if (sctx->is_dev_replace && ret >= 0) {
int ret2;
--
2.39.2
Antgroup is using 5.10.y in product environment, we found several patches are
missing in 5.10.y tree. These patches are needed for us. So we backported them
to 5.10.y
Connor Kuehl (1):
virtiofs: split requests that exceed virtqueue size
Jiachen Zhang (1):
fuse: always revalidate rename target dentry
Miklos Szeredi (4):
virtiofs: clean up error handling in virtio_fs_get_tree()
fuse: check s_root when destroying sb
fuse: fix attr version comparison in fuse_read_update_size()
fuse: fix deadlock between atomic O_TRUNC and page invalidation
fs/fuse/dir.c | 7 ++++++-
fs/fuse/file.c | 31 +++++++++++++++++-------------
fs/fuse/fuse_i.h | 3 +++
fs/fuse/inode.c | 5 +++--
fs/fuse/virtio_fs.c | 46 +++++++++++++++++++++++++++++----------------
5 files changed, 60 insertions(+), 32 deletions(-)
--
2.40.0
Hi stable maintainers,
This is a backport patch for commit df205b5c6328 ("KVM: arm64: Filter
out invalid core register IDs in KVM_GET_REG_LIST") to 4.14 and 4.19.
This commit was not applied to the 4.14-stable tree due to merge
conflict [1]. To backport this, cherry-picked commit be25bbb392fa
("KVM: arm64: Factor out core register ID enumeration") that has no
functional changes but makes it easy to merge, and commit 5d8d4af24460
("arm64: KVM: Fix system register enumeration") that is a fix patch for
the commit.
I'd appreciate it if you could consider backporting this to 4.14 and
4.19.
Best regards,
Takahiro
[1] https://lore.kernel.org/all/1560343489-22906-1-git-send-email-Dave.Martin@a…
Dave Martin (2):
KVM: arm64: Factor out core register ID enumeration
KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST
arch/arm64/kvm/guest.c | 79 ++++++++++++++++++++++++++++++++++--------
1 file changed, 65 insertions(+), 14 deletions(-)
--
2.39.2
v2 -> v3:
* Cherry-pick an additional fix patch.
* Link to v2: https://lore.kernel.org/all/20230404034649.77915-1-itazur@amazon.com/
v1 -> v2:
* Fix a compile error for core_reg_size_from_offset().
* Link to v1: https://lore.kernel.org/all/20230403223028.45131-1-itazur@amazon.com/
From: David Stevens <stevensd(a)chromium.org>
Split folios during the second loop of shmem_undo_range. It's not
sufficient to only split folios when dealing with partial pages, since
it's possible for a THP to be faulted in after that point. Calling
truncate_inode_folio in that situation can result in throwing away data
outside of the range being targeted.
Fixes: b9a8a4195c7d ("truncate,shmem: Handle truncates that split large folios")
Cc: stable(a)vger.kernel.org
Signed-off-by: David Stevens <stevensd(a)chromium.org>
---
v1 -> v2:
- Actually drop pages after splitting a THP
mm/shmem.c | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 9218c955f482..226c94a257b1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1033,7 +1033,22 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
}
VM_BUG_ON_FOLIO(folio_test_writeback(folio),
folio);
- truncate_inode_folio(mapping, folio);
+
+ if (!folio_test_large(folio)) {
+ truncate_inode_folio(mapping, folio);
+ } else if (truncate_inode_partial_folio(folio, lstart, lend)) {
+ /*
+ * If we split a page, reset the loop so that we
+ * pick up the new sub pages. Otherwise the THP
+ * was entirely dropped or the target range was
+ * zeroed, so just continue the loop as is.
+ */
+ if (!folio_test_large(folio)) {
+ folio_unlock(folio);
+ index = start;
+ break;
+ }
+ }
}
folio_unlock(folio);
}
--
2.40.0.634.g4ca3ef3211-goog
From: David Stevens <stevensd(a)chromium.org>
Split folios during the second loop of shmem_undo_range. It's not
sufficient to only split folios when dealing with partial pages, since
it's possible for a THP to be faulted in after that point. Calling
truncate_inode_folio in that situation can result in throwing away data
outside of the range being targeted.
Fixes: b9a8a4195c7d ("truncate,shmem: Handle truncates that split large folios")
Cc: stable(a)vger.kernel.org
Signed-off-by: David Stevens <stevensd(a)chromium.org>
---
mm/shmem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 9218c955f482..317cbeb0fb6b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1033,7 +1033,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
}
VM_BUG_ON_FOLIO(folio_test_writeback(folio),
folio);
- truncate_inode_folio(mapping, folio);
+ truncate_inode_partial_folio(folio, lstart, lend);
}
folio_unlock(folio);
}
--
2.40.0.634.g4ca3ef3211-goog
From: Damien Le Moal <dlemoal(a)kernel.org>
The address translation unit of the rockchip EP controller does not use
the lower 8 bits of a PCIe-space address to map local memory. Thus we
must set the align feature field to 256 to let the user know about this
constraint.
Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller")
Cc: stable(a)vger.kernel.org
Signed-off-by: Damien Le Moal <dlemoal(a)kernel.org>
Signed-off-by: Rick Wertenbroek <rick.wertenbroek(a)gmail.com>
---
drivers/pci/controller/pcie-rockchip-ep.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c
index edfced311a9f..0af0e965fb57 100644
--- a/drivers/pci/controller/pcie-rockchip-ep.c
+++ b/drivers/pci/controller/pcie-rockchip-ep.c
@@ -442,6 +442,7 @@ static const struct pci_epc_features rockchip_pcie_epc_features = {
.linkup_notifier = false,
.msi_capable = true,
.msix_capable = false,
+ .align = 256,
};
static const struct pci_epc_features*
--
2.25.1
The RK3399 PCIe endpoint controller cannot generate MSI-X IRQs.
This is documented in the RK3399 technical reference manual (TRM)
section 17.5.9 "Interrupt Support".
MSI-X capability should therefore not be advertised. Remove the
MSI-X capability by editing the capability linked-list. The
previous entry is the MSI capability, therefore get the next
entry from the MSI-X capability entry and set it as next entry
for the MSI capability. This in effect removes MSI-X from the list.
Linked list before : MSI cap -> MSI-X cap -> PCIe Device cap -> ...
Linked list now : MSI cap -> PCIe Device cap -> ...
Tested-by: Damien Le Moal <dlemoal(a)kernel.org>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller")
Cc: stable(a)vger.kernel.org
Signed-off-by: Rick Wertenbroek <rick.wertenbroek(a)gmail.com>
---
drivers/pci/controller/pcie-rockchip-ep.c | 24 +++++++++++++++++++++++
drivers/pci/controller/pcie-rockchip.h | 5 +++++
2 files changed, 29 insertions(+)
diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c
index 63fbb379638b..edfced311a9f 100644
--- a/drivers/pci/controller/pcie-rockchip-ep.c
+++ b/drivers/pci/controller/pcie-rockchip-ep.c
@@ -507,6 +507,7 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev)
size_t max_regions;
struct pci_epc_mem_window *windows = NULL;
int err, i;
+ u32 cfg_msi, cfg_msix_cp;
ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL);
if (!ep)
@@ -582,6 +583,29 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev)
ep->irq_pci_addr = ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR;
+ /*
+ * MSI-X is not supported but the controller still advertises the MSI-X
+ * capability by default, which can lead to the Root Complex side
+ * allocating MSI-X vectors which cannot be used. Avoid this by skipping
+ * the MSI-X capability entry in the PCIe capabilities linked-list: get
+ * the next pointer from the MSI-X entry and set that in the MSI
+ * capability entry (which is the previous entry). This way the MSI-X
+ * entry is skipped (left out of the linked-list) and not advertised.
+ */
+ cfg_msi = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE +
+ ROCKCHIP_PCIE_EP_MSI_CTRL_REG);
+
+ cfg_msi &= ~ROCKCHIP_PCIE_EP_MSI_CP1_MASK;
+
+ cfg_msix_cp = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_BASE +
+ ROCKCHIP_PCIE_EP_MSIX_CAP_REG) &
+ ROCKCHIP_PCIE_EP_MSIX_CAP_CP_MASK;
+
+ cfg_msi |= cfg_msix_cp;
+
+ rockchip_pcie_write(rockchip, cfg_msi,
+ PCIE_EP_CONFIG_BASE + ROCKCHIP_PCIE_EP_MSI_CTRL_REG);
+
rockchip_pcie_write(rockchip, PCIE_CLIENT_CONF_ENABLE,
PCIE_CLIENT_CONFIG);
diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h
index 501d859420b4..fe0333778fd9 100644
--- a/drivers/pci/controller/pcie-rockchip.h
+++ b/drivers/pci/controller/pcie-rockchip.h
@@ -227,6 +227,8 @@
#define ROCKCHIP_PCIE_EP_CMD_STATUS 0x4
#define ROCKCHIP_PCIE_EP_CMD_STATUS_IS BIT(19)
#define ROCKCHIP_PCIE_EP_MSI_CTRL_REG 0x90
+#define ROCKCHIP_PCIE_EP_MSI_CP1_OFFSET 8
+#define ROCKCHIP_PCIE_EP_MSI_CP1_MASK GENMASK(15, 8)
#define ROCKCHIP_PCIE_EP_MSI_FLAGS_OFFSET 16
#define ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_OFFSET 17
#define ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_MASK GENMASK(19, 17)
@@ -234,6 +236,9 @@
#define ROCKCHIP_PCIE_EP_MSI_CTRL_MME_MASK GENMASK(22, 20)
#define ROCKCHIP_PCIE_EP_MSI_CTRL_ME BIT(16)
#define ROCKCHIP_PCIE_EP_MSI_CTRL_MASK_MSI_CAP BIT(24)
+#define ROCKCHIP_PCIE_EP_MSIX_CAP_REG 0xb0
+#define ROCKCHIP_PCIE_EP_MSIX_CAP_CP_OFFSET 8
+#define ROCKCHIP_PCIE_EP_MSIX_CAP_CP_MASK GENMASK(15, 8)
#define ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR 0x1
#define ROCKCHIP_PCIE_EP_PCI_LEGACY_IRQ_ADDR 0x3
#define ROCKCHIP_PCIE_EP_FUNC_BASE(fn) \
--
2.25.1
Assert PCI Configuration Enable bit after probe. When this bit is left to
0 in the endpoint mode, the RK3399 PCIe endpoint core will generate
configuration request retry status (CRS) messages back to the root complex.
Assert this bit after probe to allow the RK3399 PCIe endpoint core to reply
to configuration requests from the root complex.
This is documented in section 17.5.8.1.2 of the RK3399 TRM.
Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller")
Cc: stable(a)vger.kernel.org
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Tested-by: Damien Le Moal <dlemoal(a)kernel.org>
Signed-off-by: Rick Wertenbroek <rick.wertenbroek(a)gmail.com>
---
drivers/pci/controller/pcie-rockchip-ep.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c
index 9b835377bd9e..d00baed65eba 100644
--- a/drivers/pci/controller/pcie-rockchip-ep.c
+++ b/drivers/pci/controller/pcie-rockchip-ep.c
@@ -623,6 +623,9 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev)
ep->irq_pci_addr = ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR;
+ rockchip_pcie_write(rockchip, PCIE_CLIENT_CONF_ENABLE,
+ PCIE_CLIENT_CONFIG);
+
return 0;
err_epc_mem_exit:
pci_epc_mem_exit(epc);
--
2.25.1
It was observed that there are hosts that may complete pending SETUP
transactions before the stop active transfers and controller halt occurs,
leading to lingering endxfer commands on DEPs on subsequent pullup/gadget
start iterations.
dwc3_gadget_ep_disable name=ep8in flags=0x3009 direction=1
dwc3_gadget_ep_disable name=ep4in flags=1 direction=1
dwc3_gadget_ep_disable name=ep3out flags=1 direction=0
usb_gadget_disconnect deactivated=0 connected=0 ret=0
The sequence shows that the USB gadget disconnect (dwc3_gadget_pullup(0))
routine completed successfully, allowing for the USB gadget to proceed with
a USB gadget connect. However, if this occurs the system runs into an
issue where:
BUG: spinlock already unlocked on CPU
spin_bug+0x0
dwc3_remove_requests+0x278
dwc3_ep0_out_start+0xb0
__dwc3_gadget_start+0x25c
This is due to the pending endxfers, leading to gadget start (w/o lock
held) to execute the remove requests, which will unlock the dwc3
spinlock as part of giveback.
To mitigate this, resolve the pending endxfers on the pullup disable
path by re-locating the SETUP phase check after stop active transfers, since
that is where the DWC3_EP_DELAY_STOP is potentially set. This also allows
for handling of a host that may be unresponsive by using the completion
timeout to trigger the stall and restart for EP0.
Fixes: c96683798e27 ("usb: dwc3: ep0: Don't prepare beyond Setup stage")
Cc: stable(a)vger.kernel.org
Signed-off-by: Wesley Cheng <quic_wcheng(a)quicinc.com>
---
drivers/usb/dwc3/gadget.c | 49 +++++++++++++++++++++++++--------------
1 file changed, 32 insertions(+), 17 deletions(-)
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 3c63fa97a680..be84c133f0d7 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2528,29 +2528,17 @@ static int __dwc3_gadget_start(struct dwc3 *dwc);
static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc)
{
unsigned long flags;
+ int ret;
spin_lock_irqsave(&dwc->lock, flags);
dwc->connected = false;
/*
- * Per databook, when we want to stop the gadget, if a control transfer
- * is still in process, complete it and get the core into setup phase.
+ * Attempt to end pending SETUP status phase, and not wait for the
+ * function to do so.
*/
- if (dwc->ep0state != EP0_SETUP_PHASE) {
- int ret;
-
- if (dwc->delayed_status)
- dwc3_ep0_send_delayed_status(dwc);
-
- reinit_completion(&dwc->ep0_in_setup);
-
- spin_unlock_irqrestore(&dwc->lock, flags);
- ret = wait_for_completion_timeout(&dwc->ep0_in_setup,
- msecs_to_jiffies(DWC3_PULL_UP_TIMEOUT));
- spin_lock_irqsave(&dwc->lock, flags);
- if (ret == 0)
- dev_warn(dwc->dev, "timed out waiting for SETUP phase\n");
- }
+ if (dwc->delayed_status)
+ dwc3_ep0_send_delayed_status(dwc);
/*
* In the Synopsys DesignWare Cores USB3 Databook Rev. 3.30a
@@ -2563,6 +2551,33 @@ static int dwc3_gadget_soft_disconnect(struct dwc3 *dwc)
__dwc3_gadget_stop(dwc);
spin_unlock_irqrestore(&dwc->lock, flags);
+ /*
+ * Per databook, when we want to stop the gadget, if a control transfer
+ * is still in process, complete it and get the core into setup phase.
+ * In case the host is unresponsive to a SETUP transaction, forcefully
+ * stall the transfer, and move back to the SETUP phase, so that any
+ * pending endxfers can be executed.
+ */
+ if (dwc->ep0state != EP0_SETUP_PHASE) {
+ reinit_completion(&dwc->ep0_in_setup);
+
+ ret = wait_for_completion_timeout(&dwc->ep0_in_setup,
+ msecs_to_jiffies(DWC3_PULL_UP_TIMEOUT));
+ if (ret == 0) {
+ unsigned int dir;
+
+ dev_warn(dwc->dev, "wait for SETUP phase timed out\n");
+ spin_lock_irqsave(&dwc->lock, flags);
+ dir = !!dwc->ep0_expect_in;
+ if (dwc->ep0state == EP0_DATA_PHASE)
+ dwc3_ep0_end_control_data(dwc, dwc->eps[dir]);
+ else
+ dwc3_ep0_end_control_data(dwc, dwc->eps[!dir]);
+ dwc3_ep0_stall_and_restart(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+ }
+ }
+
/*
* Note: if the GEVNTCOUNT indicates events in the event buffer, the
* driver needs to acknowledge them before the controller can halt.
The patch titled
Subject: ia64: fix an addr to taddr in huge_pte_offset()
has been added to the -mm mm-nonmm-unstable branch. Its filename is
ia64-fix-an-addr-to-taddr-in-huge_pte_offset.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-nonmm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Hugh Dickins <hughd(a)google.com>
Subject: ia64: fix an addr to taddr in huge_pte_offset()
Date: Sun, 16 Apr 2023 22:17:05 -0700 (PDT)
I know nothing of ia64 htlbpage_to_page(), but guess that the p4d
line should be using taddr rather than addr, like everywhere else.
Link: https://lkml.kernel.org/r/732eae88-3beb-246-2c72-281de786740@google.com
Fixes: c03ab9e32a2c ("ia64: add support for folded p4d page tables")
Signed-off-by: Hugh Dickins <hughd(a)google.com
Cc: Ard Biesheuvel <ardb(a)kernel.org>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Mike Rapoport <rppt(a)linux.ibm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/ia64/mm/hugetlbpage.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/arch/ia64/mm/hugetlbpage.c~ia64-fix-an-addr-to-taddr-in-huge_pte_offset
+++ a/arch/ia64/mm/hugetlbpage.c
@@ -58,7 +58,7 @@ huge_pte_offset (struct mm_struct *mm, u
pgd = pgd_offset(mm, taddr);
if (pgd_present(*pgd)) {
- p4d = p4d_offset(pgd, addr);
+ p4d = p4d_offset(pgd, taddr);
if (p4d_present(*p4d)) {
pud = pud_offset(p4d, taddr);
if (pud_present(*pud)) {
_
Patches currently in -mm which might be from hughd(a)google.com are
ia64-fix-an-addr-to-taddr-in-huge_pte_offset.patch
Mi dispiace disturbarti e invadere la tua privacy. Sono single,
solitario e bisognoso di un compagno premuroso, amorevole e romantico.
Sono un ammiratore segreto e vorrei esplorare l'opportunità di farlo
saperne di più l'uno sull'altro. So che è strano contattarti
in questo modo e spero che tu possa perdonarmi. Sono una persona timida
e
questo è l'unico modo in cui so di poter attirare la tua attenzione.
Voglio semplicemente
per sapere cosa ne pensate e la mia intenzione non è di offendervi.
Spero che possiamo essere amici se è quello che vuoi, anche se lo vorrei
essere più di un semplice amico. So che hai alcune domande da fare
chiedi e spero di poter soddisfare alcune delle tue curiosità con alcuni
risposte.
Credo nel detto che "per il mondo sei solo una persona,
ma per qualcuno di speciale tu sei il mondo'. Tutto quello che voglio è
amore,
cure e attenzioni romantiche da una compagna speciale quale sono io
sperando saresti tu.
Spero che questo messaggio sia l'inizio di un lungo periodo
comunicazione tra di noi, è sufficiente inviare una risposta a questo
messaggio, it
mi renderà felice.
Baci e abbracci,
Mario.
On my RTW8821CU chipset rfe_option reads as 0x22. Looking at the
vendor driver suggests that the field width of rfe_option is 5 bit,
so rfe_option should be masked with 0x1f.
Without this the rfe_option comparisons with 2 further down the
driver evaluate as false when they should really evaluate as true.
The effect is that 2G channels do not work.
rfe_option is also used as an array index into rtw8821c_rfe_defs[].
rtw8821c_rfe_defs[34] (0x22) was added as part of adding USB support,
likely because rfe_option reads as 0x22. As this now becomes 0x2,
rtw8821c_rfe_defs[34] is no longer used and can be removed.
Note that this might not be the whole truth. In the vendor driver
there are indeed places where the unmasked rfe_option value is used.
However, the driver has several places where rfe_option is tested
with the pattern if (rfe_option == 2 || rfe_option == 0x22) or
if (rfe_option == 4 || rfe_option == 0x24), so that rfe_option BIT(5)
has no influence on the code path taken. We therefore mask BIT(5)
out from rfe_option entirely until this assumption is proved wrong
by some chip variant we do not know yet.
Signed-off-by: Sascha Hauer <s.hauer(a)pengutronix.de>
Tested-by: Alexandru gagniuc <mr.nuke.me(a)gmail.com>
Tested-by: Larry Finger <Larry.Finger(a)lwfinger.net>
Tested-by: ValdikSS <iam(a)valdikss.org.ru>
Cc: stable(a)vger.kernel.org
---
drivers/net/wireless/realtek/rtw88/rtw8821c.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c
index 17f800f6efbd0..67efa58dd78ee 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c
@@ -47,7 +47,7 @@ static int rtw8821c_read_efuse(struct rtw_dev *rtwdev, u8 *log_map)
map = (struct rtw8821c_efuse *)log_map;
- efuse->rfe_option = map->rfe_option;
+ efuse->rfe_option = map->rfe_option & 0x1f;
efuse->rf_board_option = map->rf_board_option;
efuse->crystal_cap = map->xtal_k;
efuse->pa_type_2g = map->pa_type;
@@ -1537,7 +1537,6 @@ static const struct rtw_rfe_def rtw8821c_rfe_defs[] = {
[2] = RTW_DEF_RFE_EXT(8821c, 0, 0, 2),
[4] = RTW_DEF_RFE_EXT(8821c, 0, 0, 2),
[6] = RTW_DEF_RFE(8821c, 0, 0),
- [34] = RTW_DEF_RFE(8821c, 0, 0),
};
static struct rtw_hw_reg rtw8821c_dig[] = {
--
2.39.2
Hello Dear,
I am sorry to bother you and intrude on your privacy. I am single,
lonely and in need of a caring, loving, and romantic companion.
I am a secret admirer and would like to explore the opportunity to
learn more about each other. I know it is strange to contact you
this way and I hope you can forgive me. I am a shy person and
this is the only way I know I could get your attention. I just want
to know what you think and my intention is not to offend you.
I hope we can be friends if that is what you want, although I wish
to be more than just a friend. I know you have a few questions to
ask and I hope I can satisfy some of your curiosity with a few
answers.
I believe in the saying that 'to the world, you are just one person,
but to someone special, you are the world'. All I want is love,
romantic care and attention from a special companion which I am
hoping would be you.
I hope this message will be the beginning of a long term
communication between us, simply send a reply to this message, it
will make me happy.
Hugs and kisses,
Secret admirer.
Hej min kära,
Jag är ledsen att jag stör dig och inkräktar på din integritet. Jag är
singel, ensam och i behov av en omtänksam, kärleksfull och romantisk
följeslagare.
Jag är en hemlig beundrare och skulle vilja utforska möjligheten att
lära mig mer om varandra. Jag vet att det är konstigt att kontakta dig
på det här sättet och jag hoppas att du kan förlåta mig. Jag är en blyg
person och det är det enda sättet jag vet att jag kan få din
uppmärksamhet. Jag vill bara veta vad du tycker och min avsikt är inte
att förolämpa dig.
Jag hoppas att vi kan vara vänner om det är vad du vill, även om jag
vill vara mer än bara en vän. Jag vet att du har några frågor att ställa
och jag hoppas att jag kan tillfredsställa en del av din nyfikenhet med
några svar.
Jag tror på talesättet att "för världen är du bara en person, men för
någon speciell är du världen". Allt jag vill ha är kärlek, romantisk
omsorg och uppmärksamhet från en speciell följeslagare som jag hoppas
skulle vara du.
Jag hoppas att detta meddelande kommer att bli början på en långsiktig
kommunikation mellan oss, skicka bara ett svar på detta meddelande, det
kommer att göra mig glad.
Puss och kram,
Marion.
The patch titled
Subject: nilfs2: initialize unused bytes in segment summary blocks
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: initialize unused bytes in segment summary blocks
Date: Tue, 18 Apr 2023 02:35:13 +0900
Syzbot still reports uninit-value in nilfs_add_checksums_on_logs() for
KMSAN enabled kernels after applying commit 7397031622e0 ("nilfs2:
initialize "struct nilfs_binfo_dat"->bi_pad field").
This is because the unused bytes at the end of each block in segment
summaries are not initialized. So this fixes the issue by padding the
unused bytes with null bytes.
Link: https://lkml.kernel.org/r/20230417173513.12598-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+048585f3f4227bb2b49b(a)syzkaller.appspotmail.com
Link: https://syzkaller.appspot.com/bug?extid=048585f3f4227bb2b49b
Cc: Alexander Potapenko <glider(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/segment.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
--- a/fs/nilfs2/segment.c~nilfs2-initialize-unused-bytes-in-segment-summary-blocks
+++ a/fs/nilfs2/segment.c
@@ -430,6 +430,23 @@ static int nilfs_segctor_reset_segment_b
return 0;
}
+/**
+ * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area
+ * @sci: segment constructor object
+ *
+ * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of
+ * the current segment summary block.
+ */
+static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci)
+{
+ struct nilfs_segsum_pointer *ssp;
+
+ ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr;
+ if (ssp->offset < ssp->bh->b_size)
+ memset(ssp->bh->b_data + ssp->offset, 0,
+ ssp->bh->b_size - ssp->offset);
+}
+
static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
{
sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
@@ -438,6 +455,7 @@ static int nilfs_segctor_feed_segment(st
* The current segment is filled up
* (internal code)
*/
+ nilfs_segctor_zeropad_segsum(sci);
sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
return nilfs_segctor_reset_segment_buffer(sci);
}
@@ -542,6 +560,7 @@ static int nilfs_segctor_add_file_block(
goto retry;
}
if (unlikely(required)) {
+ nilfs_segctor_zeropad_segsum(sci);
err = nilfs_segbuf_extend_segsum(segbuf);
if (unlikely(err))
goto failed;
@@ -1533,6 +1552,7 @@ static int nilfs_segctor_collect(struct
nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
sci->sc_stage = prev_stage;
}
+ nilfs_segctor_zeropad_segsum(sci);
nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
return 0;
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
nilfs2-initialize-unused-bytes-in-segment-summary-blocks.patch
The patch titled
Subject: mm/hugetlb: fix uffd-wp bit lost when unsharing happens
has been added to the -mm mm-unstable branch. Its filename is
mm-hugetlb-fix-uffd-wp-bit-lost-when-unsharing-happens.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Peter Xu <peterx(a)redhat.com>
Subject: mm/hugetlb: fix uffd-wp bit lost when unsharing happens
Date: Mon, 17 Apr 2023 15:53:13 -0400
When we try to unshare a pinned page for a private hugetlb, uffd-wp bit
can get lost during unsharing. Fix it by carrying it over.
This should be very rare, only if an unsharing happened on a private
hugetlb page with uffd-wp protected (e.g. in a child which shares the
same page with parent with UFFD_FEATURE_EVENT_FORK enabled).
Link: https://lkml.kernel.org/r/20230417195317.898696-3-peterx@redhat.com
Fixes: 166f3ecc0daf ("mm/hugetlb: hook page faults for uffd write protection")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Reported-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: Mika Penttil�� <mpenttil(a)redhat.com>
Cc: Nadav Amit <nadav.amit(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
--- a/mm/hugetlb.c~mm-hugetlb-fix-uffd-wp-bit-lost-when-unsharing-happens
+++ a/mm/hugetlb.c
@@ -5644,13 +5644,16 @@ retry_avoidcopy:
spin_lock(ptl);
ptep = hugetlb_walk(vma, haddr, huge_page_size(h));
if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
+ pte_t newpte = make_huge_pte(vma, &new_folio->page, !unshare);
+
/* Break COW or unshare */
huge_ptep_clear_flush(vma, haddr, ptep);
mmu_notifier_invalidate_range(mm, range.start, range.end);
page_remove_rmap(old_page, vma, true);
hugepage_add_new_anon_rmap(new_folio, vma, haddr);
- set_huge_pte_at(mm, haddr, ptep,
- make_huge_pte(vma, &new_folio->page, !unshare));
+ if (huge_pte_uffd_wp(pte))
+ newpte = huge_pte_mkuffd_wp(newpte);
+ set_huge_pte_at(mm, haddr, ptep, newpte);
folio_set_hugetlb_migratable(new_folio);
/* Make the old page be freed below */
new_folio = page_folio(old_page);
_
Patches currently in -mm which might be from peterx(a)redhat.com are
selftests-mm-update-gitignore-with-two-missing-tests.patch
selftests-mm-dump-a-summary-in-run_vmtestssh.patch
selftests-mm-merge-utilh-into-vm_utilh.patch
selftests-mm-use-test_gen_progs-where-proper.patch
selftests-mm-link-vm_utilc-always.patch
selftests-mm-merge-default_huge_page_size-into-one.patch
selftests-mm-use-pm_-macros-in-vm_utilsh.patch
selftests-mm-reuse-pagemap_get_entry-in-vm_utilh.patch
selftests-mm-test-uffdio_zeropage-only-when-hugetlb.patch
selftests-mm-drop-test_uffdio_zeropage_eexist.patch
selftests-mm-create-uffd-common.patch
selftests-mm-split-uffd-tests-into-uffd-stress-and-uffd-unit-tests.patch
selftests-mm-uffd_register.patch
selftests-mm-uffd_open_devsys.patch
selftests-mm-uffdio_api-test.patch
selftests-mm-drop-global-mem_fd-in-uffd-tests.patch
selftests-mm-drop-global-hpage_size-in-uffd-tests.patch
selftests-mm-rename-uffd_stats-to-uffd_args.patch
selftests-mm-let-uffd_handle_page_fault-take-wp-parameter.patch
selftests-mm-allow-allocate_area-to-fail-properly.patch
selftests-mm-add-framework-for-uffd-unit-test.patch
selftests-mm-move-uffd-pagemap-test-to-unit-test.patch
selftests-mm-move-uffd-minor-test-to-unit-test.patch
selftests-mm-move-uffd-sig-events-tests-into-uffd-unit-tests.patch
selftests-mm-move-zeropage-test-into-uffd-unit-tests.patch
selftests-mm-workaround-no-way-to-detect-uffd-minor-wp.patch
selftests-mm-allow-uffd-test-to-skip-properly-with-no-privilege.patch
selftests-mm-drop-sys-dev-test-in-uffd-stress-test.patch
selftests-mm-add-shmem-private-test-to-uffd-stress.patch
selftests-mm-add-uffdio-register-ioctls-test.patch
mm-hugetlb-fix-uffd-wp-during-fork.patch
mm-hugetlb-fix-uffd-wp-bit-lost-when-unsharing-happens.patch
selftests-mm-add-a-few-options-for-uffd-unit-test.patch
selftests-mm-extend-and-rename-uffd-pagemap-test.patch
selftests-mm-rename-cow_extra_libs-to-iouring_extra_libs.patch
selftests-mm-add-tests-for-ro-pinning-vs-fork.patch
The patch titled
Subject: mm/hugetlb: fix uffd-wp during fork()
has been added to the -mm mm-unstable branch. Its filename is
mm-hugetlb-fix-uffd-wp-during-fork.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Peter Xu <peterx(a)redhat.com>
Subject: mm/hugetlb: fix uffd-wp during fork()
Date: Mon, 17 Apr 2023 15:53:12 -0400
Patch series "mm/hugetlb: More fixes around uffd-wp vs fork() / RO pins",
v2.
This patch (of 6):
There're a bunch of things that were wrong:
- Reading uffd-wp bit from a swap entry should use pte_swp_uffd_wp()
rather than huge_pte_uffd_wp().
- When copying over a pte, we should drop uffd-wp bit when
!EVENT_FORK (aka, when !userfaultfd_wp(dst_vma)).
- When doing early CoW for private hugetlb (e.g. when the parent page was
pinned), uffd-wp bit should be properly carried over if necessary.
No bug reported probably because most people do not even care about these
corner cases, but they are still bugs and can be exposed by the recent unit
tests introduced, so fix all of them in one shot.
Link: https://lkml.kernel.org/r/20230417195317.898696-1-peterx@redhat.com
Link: https://lkml.kernel.org/r/20230417195317.898696-2-peterx@redhat.com
Fixes: bc70fbf269fd ("mm/hugetlb: handle uffd-wp during fork()")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: Mika Penttil�� <mpenttil(a)redhat.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Nadav Amit <nadav.amit(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 24 +++++++++++++++---------
1 file changed, 15 insertions(+), 9 deletions(-)
--- a/mm/hugetlb.c~mm-hugetlb-fix-uffd-wp-during-fork
+++ a/mm/hugetlb.c
@@ -4953,11 +4953,15 @@ static bool is_hugetlb_entry_hwpoisoned(
static void
hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr,
- struct folio *new_folio)
+ struct folio *new_folio, pte_t old)
{
+ pte_t newpte = make_huge_pte(vma, &new_folio->page, 1);
+
__folio_mark_uptodate(new_folio);
hugepage_add_new_anon_rmap(new_folio, vma, addr);
- set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, &new_folio->page, 1));
+ if (userfaultfd_wp(vma) && huge_pte_uffd_wp(old))
+ newpte = huge_pte_mkuffd_wp(newpte);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, newpte);
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
folio_set_hugetlb_migratable(new_folio);
}
@@ -5032,14 +5036,12 @@ again:
*/
;
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) {
- bool uffd_wp = huge_pte_uffd_wp(entry);
-
- if (!userfaultfd_wp(dst_vma) && uffd_wp)
+ if (!userfaultfd_wp(dst_vma))
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
} else if (unlikely(is_hugetlb_entry_migration(entry))) {
swp_entry_t swp_entry = pte_to_swp_entry(entry);
- bool uffd_wp = huge_pte_uffd_wp(entry);
+ bool uffd_wp = pte_swp_uffd_wp(entry);
if (!is_readable_migration_entry(swp_entry) && cow) {
/*
@@ -5050,10 +5052,10 @@ again:
swp_offset(swp_entry));
entry = swp_entry_to_pte(swp_entry);
if (userfaultfd_wp(src_vma) && uffd_wp)
- entry = huge_pte_mkuffd_wp(entry);
+ entry = pte_swp_mkuffd_wp(entry);
set_huge_pte_at(src, addr, src_pte, entry);
}
- if (!userfaultfd_wp(dst_vma) && uffd_wp)
+ if (!userfaultfd_wp(dst_vma))
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
} else if (unlikely(is_pte_marker(entry))) {
@@ -5118,7 +5120,8 @@ again:
/* huge_ptep of dst_pte won't change as in child */
goto again;
}
- hugetlb_install_folio(dst_vma, dst_pte, addr, new_folio);
+ hugetlb_install_folio(dst_vma, dst_pte, addr,
+ new_folio, src_pte_old);
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
continue;
@@ -5136,6 +5139,9 @@ again:
entry = huge_pte_wrprotect(entry);
}
+ if (!userfaultfd_wp(dst_vma))
+ entry = huge_pte_clear_uffd_wp(entry);
+
set_huge_pte_at(dst, addr, dst_pte, entry);
hugetlb_count_add(npages, dst);
}
_
Patches currently in -mm which might be from peterx(a)redhat.com are
selftests-mm-update-gitignore-with-two-missing-tests.patch
selftests-mm-dump-a-summary-in-run_vmtestssh.patch
selftests-mm-merge-utilh-into-vm_utilh.patch
selftests-mm-use-test_gen_progs-where-proper.patch
selftests-mm-link-vm_utilc-always.patch
selftests-mm-merge-default_huge_page_size-into-one.patch
selftests-mm-use-pm_-macros-in-vm_utilsh.patch
selftests-mm-reuse-pagemap_get_entry-in-vm_utilh.patch
selftests-mm-test-uffdio_zeropage-only-when-hugetlb.patch
selftests-mm-drop-test_uffdio_zeropage_eexist.patch
selftests-mm-create-uffd-common.patch
selftests-mm-split-uffd-tests-into-uffd-stress-and-uffd-unit-tests.patch
selftests-mm-uffd_register.patch
selftests-mm-uffd_open_devsys.patch
selftests-mm-uffdio_api-test.patch
selftests-mm-drop-global-mem_fd-in-uffd-tests.patch
selftests-mm-drop-global-hpage_size-in-uffd-tests.patch
selftests-mm-rename-uffd_stats-to-uffd_args.patch
selftests-mm-let-uffd_handle_page_fault-take-wp-parameter.patch
selftests-mm-allow-allocate_area-to-fail-properly.patch
selftests-mm-add-framework-for-uffd-unit-test.patch
selftests-mm-move-uffd-pagemap-test-to-unit-test.patch
selftests-mm-move-uffd-minor-test-to-unit-test.patch
selftests-mm-move-uffd-sig-events-tests-into-uffd-unit-tests.patch
selftests-mm-move-zeropage-test-into-uffd-unit-tests.patch
selftests-mm-workaround-no-way-to-detect-uffd-minor-wp.patch
selftests-mm-allow-uffd-test-to-skip-properly-with-no-privilege.patch
selftests-mm-drop-sys-dev-test-in-uffd-stress-test.patch
selftests-mm-add-shmem-private-test-to-uffd-stress.patch
selftests-mm-add-uffdio-register-ioctls-test.patch
mm-hugetlb-fix-uffd-wp-during-fork.patch
mm-hugetlb-fix-uffd-wp-bit-lost-when-unsharing-happens.patch
selftests-mm-add-a-few-options-for-uffd-unit-test.patch
selftests-mm-extend-and-rename-uffd-pagemap-test.patch
selftests-mm-rename-cow_extra_libs-to-iouring_extra_libs.patch
selftests-mm-add-tests-for-ro-pinning-vs-fork.patch
There're a bunch of things that were wrong:
- Reading uffd-wp bit from a swap entry should use pte_swp_uffd_wp()
rather than huge_pte_uffd_wp().
- When copying over a pte, we should drop uffd-wp bit when
!EVENT_FORK (aka, when !userfaultfd_wp(dst_vma)).
- When doing early CoW for private hugetlb (e.g. when the parent page was
pinned), uffd-wp bit should be properly carried over if necessary.
No bug reported probably because most people do not even care about these
corner cases, but they are still bugs and can be exposed by the recent unit
tests introduced, so fix all of them in one shot.
Cc: linux-stable <stable(a)vger.kernel.org>
Fixes: bc70fbf269fd ("mm/hugetlb: handle uffd-wp during fork()")
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Signed-off-by: Peter Xu <peterx(a)redhat.com>
---
mm/hugetlb.c | 24 +++++++++++++++---------
1 file changed, 15 insertions(+), 9 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f16b25b1a6b9..0213efaf31be 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4953,11 +4953,15 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
static void
hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr,
- struct folio *new_folio)
+ struct folio *new_folio, pte_t old)
{
+ pte_t newpte = make_huge_pte(vma, &new_folio->page, 1);
+
__folio_mark_uptodate(new_folio);
hugepage_add_new_anon_rmap(new_folio, vma, addr);
- set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, &new_folio->page, 1));
+ if (userfaultfd_wp(vma) && huge_pte_uffd_wp(old))
+ newpte = huge_pte_mkuffd_wp(newpte);
+ set_huge_pte_at(vma->vm_mm, addr, ptep, newpte);
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
folio_set_hugetlb_migratable(new_folio);
}
@@ -5032,14 +5036,12 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
*/
;
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) {
- bool uffd_wp = huge_pte_uffd_wp(entry);
-
- if (!userfaultfd_wp(dst_vma) && uffd_wp)
+ if (!userfaultfd_wp(dst_vma))
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
} else if (unlikely(is_hugetlb_entry_migration(entry))) {
swp_entry_t swp_entry = pte_to_swp_entry(entry);
- bool uffd_wp = huge_pte_uffd_wp(entry);
+ bool uffd_wp = pte_swp_uffd_wp(entry);
if (!is_readable_migration_entry(swp_entry) && cow) {
/*
@@ -5050,10 +5052,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
swp_offset(swp_entry));
entry = swp_entry_to_pte(swp_entry);
if (userfaultfd_wp(src_vma) && uffd_wp)
- entry = huge_pte_mkuffd_wp(entry);
+ entry = pte_swp_mkuffd_wp(entry);
set_huge_pte_at(src, addr, src_pte, entry);
}
- if (!userfaultfd_wp(dst_vma) && uffd_wp)
+ if (!userfaultfd_wp(dst_vma))
entry = huge_pte_clear_uffd_wp(entry);
set_huge_pte_at(dst, addr, dst_pte, entry);
} else if (unlikely(is_pte_marker(entry))) {
@@ -5114,7 +5116,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
/* huge_ptep of dst_pte won't change as in child */
goto again;
}
- hugetlb_install_folio(dst_vma, dst_pte, addr, new_folio);
+ hugetlb_install_folio(dst_vma, dst_pte, addr,
+ new_folio, src_pte_old);
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
continue;
@@ -5132,6 +5135,9 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
entry = huge_pte_wrprotect(entry);
}
+ if (!userfaultfd_wp(dst_vma))
+ entry = huge_pte_clear_uffd_wp(entry);
+
set_huge_pte_at(dst, addr, dst_pte, entry);
hugetlb_count_add(npages, dst);
}
--
2.39.1
Several calls to ci_dpm_fini() will attempt to free resources that
either have been freed before or haven't been allocated yet. This
may lead to undefined or dangerous behaviour.
For instance, if r600_parse_extended_power_table() fails, it might
call r600_free_extended_power_table() as will ci_dpm_fini() later
during error handling.
Fix this by only freeing pointers to objects previously allocated.
Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.
Fixes: cc8dbbb4f62a ("drm/radeon: add dpm support for CI dGPUs (v2)")
Cc: stable(a)vger.kernel.org
Co-developed-by: Natalia Petrova <n.petrova(a)fintech.ru>
Signed-off-by: Nikita Zhandarovich <n.zhandarovich(a)fintech.ru>
---
v2: free only resouces allocated prior, do not remove ci_dpm_fini()
or other deallocating calls altogether; fix commit message.
v1: https://lore.kernel.org/all/20230403182808.8699-1-n.zhandarovich@fintech.ru/
drivers/gpu/drm/radeon/ci_dpm.c | 28 ++++++++++++++++++++--------
1 file changed, 20 insertions(+), 8 deletions(-)
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index 8ef25ab305ae..b8f4dac68d85 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -5517,6 +5517,7 @@ static int ci_parse_power_table(struct radeon_device *rdev)
u8 frev, crev;
u8 *power_state_offset;
struct ci_ps *ps;
+ int ret;
if (!atom_parse_data_header(mode_info->atom_context, index, NULL,
&frev, &crev, &data_offset))
@@ -5546,11 +5547,15 @@ static int ci_parse_power_table(struct radeon_device *rdev)
non_clock_array_index = power_state->v2.nonClockInfoIndex;
non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
&non_clock_info_array->nonClockInfo[non_clock_array_index];
- if (!rdev->pm.power_state[i].clock_info)
- return -EINVAL;
+ if (!rdev->pm.power_state[i].clock_info) {
+ ret = -EINVAL;
+ goto err_free_ps;
+ }
ps = kzalloc(sizeof(struct ci_ps), GFP_KERNEL);
- if (ps == NULL)
- return -ENOMEM;
+ if (ps == NULL) {
+ ret = -ENOMEM;
+ goto err_free_ps;
+ }
rdev->pm.dpm.ps[i].ps_priv = ps;
ci_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i],
non_clock_info,
@@ -5590,6 +5595,12 @@ static int ci_parse_power_table(struct radeon_device *rdev)
}
return 0;
+
+err_free_ps:
+ for (i = 0; i < rdev->pm.dpm.num_ps; i++)
+ kfree(rdev->pm.dpm.ps[i].ps_priv);
+ kfree(rdev->pm.dpm.ps);
+ return ret;
}
static int ci_get_vbios_boot_values(struct radeon_device *rdev,
@@ -5678,25 +5689,26 @@ int ci_dpm_init(struct radeon_device *rdev)
ret = ci_get_vbios_boot_values(rdev, &pi->vbios_boot_state);
if (ret) {
- ci_dpm_fini(rdev);
+ kfree(rdev->pm.dpm.priv);
return ret;
}
ret = r600_get_platform_caps(rdev);
if (ret) {
- ci_dpm_fini(rdev);
+ kfree(rdev->pm.dpm.priv);
return ret;
}
ret = r600_parse_extended_power_table(rdev);
if (ret) {
- ci_dpm_fini(rdev);
+ kfree(rdev->pm.dpm.priv);
return ret;
}
ret = ci_parse_power_table(rdev);
if (ret) {
- ci_dpm_fini(rdev);
+ kfree(rdev->pm.dpm.priv);
+ r600_free_extended_power_table(rdev);
return ret;
}
Smatch detected a double free path because lpfc_nlp_not_used releases an
ndlp object before reaching lpfc_nlp_put at the end of
lpfc_cmpl_els_logo_acc.
Remove the outdated lpfc_nlp_not_used routine. In
lpfc_mbx_cmpl_ns_reg_login, replace the call with lpfc_nlp_put. In
lpfc_cmpl_els_logo_acc, replace the call with lpfc_unreg_rpi and keep the
lpfc_nlp_put at the end of the routine. If ndlp's rpi was registered, then
lpfc_unreg_rpi's completion routine performs the final ndlp clean up after
lpfc_nlp_put is called from lpfc_cmpl_els_logo_acc. Otherwise if ndlp has
no rpi registered, the lpfc_nlp_put at the end of lpfc_cmpl_els_logo_acc is
the final ndlp clean up.
Fixes: 4430f7fd09ec ("scsi: lpfc: Rework locations of ndlp reference taking")
Cc: <stable(a)vger.kernel.org> # v5.11+
Reported-by: Dan Carpenter <error27(a)gmail.com>
Link: https://lore.kernel.org/all/Y3OefhyyJNKH%2Fiaf@kili/
Signed-off-by: Justin Tee <justin.tee(a)broadcom.com>
---
drivers/scsi/lpfc/lpfc_crtn.h | 1 -
drivers/scsi/lpfc/lpfc_els.c | 30 +++++++-----------------------
drivers/scsi/lpfc/lpfc_hbadisc.c | 24 +++---------------------
3 files changed, 10 insertions(+), 45 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index b833b983e69d..0b9edde26abd 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -134,7 +134,6 @@ void lpfc_check_nlp_post_devloss(struct lpfc_vport *vport,
struct lpfc_nodelist *ndlp);
void lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
struct lpfc_iocbq *rspiocb);
-int lpfc_nlp_not_used(struct lpfc_nodelist *ndlp);
struct lpfc_nodelist *lpfc_setup_disc_node(struct lpfc_vport *, uint32_t);
void lpfc_disc_list_loopmap(struct lpfc_vport *);
void lpfc_disc_start(struct lpfc_vport *);
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 6a15f879e517..a3c8550e9985 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -5205,14 +5205,9 @@ lpfc_els_free_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *elsiocb)
*
* This routine is the completion callback function to the Logout (LOGO)
* Accept (ACC) Response ELS command. This routine is invoked to indicate
- * the completion of the LOGO process. It invokes the lpfc_nlp_not_used() to
- * release the ndlp if it has the last reference remaining (reference count
- * is 1). If succeeded (meaning ndlp released), it sets the iocb ndlp
- * field to NULL to inform the following lpfc_els_free_iocb() routine no
- * ndlp reference count needs to be decremented. Otherwise, the ndlp
- * reference use-count shall be decremented by the lpfc_els_free_iocb()
- * routine. Finally, the lpfc_els_free_iocb() is invoked to release the
- * IOCB data structure.
+ * the completion of the LOGO process. If the node has transitioned to NPR,
+ * this routine unregisters the RPI if it is still registered. The
+ * lpfc_els_free_iocb() is invoked to release the IOCB data structure.
**/
static void
lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
@@ -5253,19 +5248,9 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
(ndlp->nlp_last_elscmd == ELS_CMD_PLOGI))
goto out;
- /* NPort Recovery mode or node is just allocated */
- if (!lpfc_nlp_not_used(ndlp)) {
- /* A LOGO is completing and the node is in NPR state.
- * Just unregister the RPI because the node is still
- * required.
- */
+ if (ndlp->nlp_flag & NLP_RPI_REGISTERED)
lpfc_unreg_rpi(vport, ndlp);
- } else {
- /* Indicate the node has already released, should
- * not reference to it from within lpfc_els_free_iocb.
- */
- cmdiocb->ndlp = NULL;
- }
+
}
out:
/*
@@ -5285,9 +5270,8 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
* RPI (Remote Port Index) mailbox command to the @phba. It simply releases
* the associated lpfc Direct Memory Access (DMA) buffer back to the pool and
* decrements the ndlp reference count held for this completion callback
- * function. After that, it invokes the lpfc_nlp_not_used() to check
- * whether there is only one reference left on the ndlp. If so, it will
- * perform one more decrement and trigger the release of the ndlp.
+ * function. After that, it invokes the lpfc_drop_node to check
+ * whether it is appropriate to release the node.
**/
void
lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 5ba3a9ad9501..67bfdddb897c 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -4333,13 +4333,14 @@ lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
/* If the node is not registered with the scsi or nvme
* transport, remove the fabric node. The failed reg_login
- * is terminal.
+ * is terminal and forces the removal of the last node
+ * reference.
*/
if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD))) {
spin_lock_irq(&ndlp->lock);
ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
spin_unlock_irq(&ndlp->lock);
- lpfc_nlp_not_used(ndlp);
+ lpfc_nlp_put(ndlp);
}
if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) {
@@ -6704,25 +6705,6 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp)
return ndlp ? kref_put(&ndlp->kref, lpfc_nlp_release) : 0;
}
-/* This routine free's the specified nodelist if it is not in use
- * by any other discovery thread. This routine returns 1 if the
- * ndlp has been freed. A return value of 0 indicates the ndlp is
- * not yet been released.
- */
-int
-lpfc_nlp_not_used(struct lpfc_nodelist *ndlp)
-{
- lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE,
- "node not used: did:x%x flg:x%x refcnt:x%x",
- ndlp->nlp_DID, ndlp->nlp_flag,
- kref_read(&ndlp->kref));
-
- if (kref_read(&ndlp->kref) == 1)
- if (lpfc_nlp_put(ndlp))
- return 1;
- return 0;
-}
-
/**
* lpfc_fcf_inuse - Check if FCF can be unregistered.
* @phba: Pointer to hba context object.
--
2.38.0
Previously, capability was checked using capable(), which verified that the
caller of the ioctl system call had the required capability. In addition,
the result of the check would be stored in the HCI_SOCK_TRUSTED flag,
making it persistent for the socket.
However, malicious programs can abuse this approach by deliberately sharing
an HCI socket with a privileged task. The HCI socket will be marked as
trusted when the privileged task occasionally makes an ioctl call.
This problem can be solved by using sk_capable() to check capability, which
ensures that not only the current task but also the socket opener has the
specified capability, thus reducing the risk of privilege escalation
through the previously identified vulnerability.
Cc: stable(a)vger.kernel.org
Fixes: f81f5b2db869 ("Bluetooth: Send control open and close messages for HCI raw sockets")
Signed-off-by: Ruihan Li <lrh2000(a)pku.edu.cn>
---
net/bluetooth/hci_sock.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 065812232..f597fe0db 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1003,7 +1003,14 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd,
if (hci_sock_gen_cookie(sk)) {
struct sk_buff *skb;
- if (capable(CAP_NET_ADMIN))
+ /* Perform careful checks before setting the HCI_SOCK_TRUSTED
+ * flag. Make sure that not only the current task but also
+ * the socket opener has the required capability, since
+ * privileged programs can be tricked into making ioctl calls
+ * on HCI sockets, and the socket should not be marked as
+ * trusted simply because the ioctl caller is privileged.
+ */
+ if (sk_capable(sk, CAP_NET_ADMIN))
hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);
/* Send event to monitor */
--
2.40.0
commit ba9182a89626d5f83c2ee4594f55cb9c1e60f0e2 upstream.
After a successful cpuset_can_attach() call which increments the
attach_in_progress flag, either cpuset_cancel_attach() or cpuset_attach()
will be called later. In cpuset_attach(), tasks in cpuset_attach_wq,
if present, will be woken up at the end. That is not the case in
cpuset_cancel_attach(). So missed wakeup is possible if the attach
operation is somehow cancelled. Fix that by doing the wakeup in
cpuset_cancel_attach() as well.
Fixes: e44193d39e8d ("cpuset: let hotplug propagation work wait for task attaching")
Signed-off-by: Waiman Long <longman(a)redhat.com>
Reviewed-by: Michal Koutný <mkoutny(a)suse.com>
Cc: stable(a)vger.kernel.org # v3.11+
Signed-off-by: Tejun Heo <tj(a)kernel.org>
---
kernel/cgroup/cpuset.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 0b82e3857d33..20c1ccc6a0bb 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1508,7 +1508,9 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
cs = css_cs(css);
mutex_lock(&cpuset_mutex);
- css_cs(css)->attach_in_progress--;
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
mutex_unlock(&cpuset_mutex);
}
--
2.31.1
commit ba9182a89626d5f83c2ee4594f55cb9c1e60f0e2 upstream.
After a successful cpuset_can_attach() call which increments the
attach_in_progress flag, either cpuset_cancel_attach() or cpuset_attach()
will be called later. In cpuset_attach(), tasks in cpuset_attach_wq,
if present, will be woken up at the end. That is not the case in
cpuset_cancel_attach(). So missed wakeup is possible if the attach
operation is somehow cancelled. Fix that by doing the wakeup in
cpuset_cancel_attach() as well.
Fixes: e44193d39e8d ("cpuset: let hotplug propagation work wait for task attaching")
Signed-off-by: Waiman Long <longman(a)redhat.com>
Reviewed-by: Michal Koutný <mkoutny(a)suse.com>
Cc: stable(a)vger.kernel.org # v3.11+
Signed-off-by: Tejun Heo <tj(a)kernel.org>
---
kernel/cgroup/cpuset.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index c6d412cebc43..3067d3e5a51d 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1504,7 +1504,9 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
cs = css_cs(css);
mutex_lock(&cpuset_mutex);
- css_cs(css)->attach_in_progress--;
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
mutex_unlock(&cpuset_mutex);
}
--
2.31.1
Hi,
please pick
1e020e1b96afd ("ubi: Fix failure attaching when vid_hdr offset equals to (sub)page size")
from linux-next to stable trees.
The commit fixes a problem with another recent commit
1b42b1a36fc94 ("ubi: ensure that VID header offset ... size")
which has already made it to stable trees and thereby broke attaching
UBI and hence renders devices unbootable.
As a temporary fix I have applied this patch downstream in OpenWrt.
Thank you!
Best regards
Daniel
----- Forwarded message from Daniel Golle <daniel(a)makrotopia.org> -----
Date: Sat, 15 Apr 2023 00:12:46 +0100
From: Daniel Golle <daniel(a)makrotopia.org>
To: Richard Weinberger <richard(a)nod.at>
Cc: Miquel Raynal <miquel.raynal(a)bootlin.com>, chengzhihao1 <chengzhihao1(a)huawei.com>, Nicolas Schichan <nschichan(a)freebox.fr>, George Kennedy <george.kennedy(a)oracle.com>, linux-kernel <linux-kernel(a)vger.kernel.org>, linux-mtd
<linux-mtd(a)lists.infradead.org>, Sascha Hauer <s.hauer(a)pengutronix.de>, yi zhang <yi.zhang(a)huawei.com>
Subject: Re: [PATCH -next v3] ubi: Fix failure attaching when vid_hdr offset equals to (sub)page size
Hi Richard,
On Wed, Mar 29, 2023 at 11:33:40PM +0200, Richard Weinberger wrote:
> ----- Ursprüngliche Mail -----
> > Von: "Miquel Raynal" <miquel.raynal(a)bootlin.com>
> >> Thanks for testing.
> >>
> >> > Tested-by: Nicolas Schichan <nschichan(a)freebox.fr>
> >
> > Same here.
> >
> > Tested-by: Miquel Raynal <miquel.raynal(a)bootlin.com> # v5.10, v4.19
>
> Applied to next, PR will follow soon.
As stable linux trees are affected I wonder when this will hit
linux-stable, ie. will it be part of 5.15.108, for example?
Cheers
Daniel
----- End forwarded message -----
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x eee87853794187f6adbe19533ed79c8b44b36a91
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041705-gauze-elated-d7c7@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
eee878537941 ("cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods")
42a11bf5c543 ("cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly")
18f9a4d47527 ("cgroup/cpuset: Skip spread flags update on v2")
e2d59900d936 ("cgroup/cpuset: Allow no-task partition to have empty cpuset.cpus.effective")
18065ebe9b33 ("cgroup/cpuset: Miscellaneous cleanups & add helper functions")
f9da322e864e ("cgroup: cleanup comments")
8ca1b5a49885 ("mm/page_alloc: detect allocation forbidden by cpuset and bail out early")
b94f9ac79a73 ("cgroup/cpuset: Change references of cpuset_mutex to cpuset_rwsem")
69dc8010b8fc ("Merge branch 'for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From eee87853794187f6adbe19533ed79c8b44b36a91 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman(a)redhat.com>
Date: Tue, 11 Apr 2023 09:35:59 -0400
Subject: [PATCH] cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork()
methods
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In the case of CLONE_INTO_CGROUP, not all cpusets are ready to accept
new tasks. It is too late to check that in cpuset_fork(). So we need
to add the cpuset_can_fork() and cpuset_cancel_fork() methods to
pre-check it before we can allow attachment to a different cpuset.
We also need to set the attach_in_progress flag to alert other code
that a new task is going to be added to the cpuset.
Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
Suggested-by: Michal Koutný <mkoutny(a)suse.com>
Signed-off-by: Waiman Long <longman(a)redhat.com>
Cc: stable(a)vger.kernel.org # v5.7+
Signed-off-by: Tejun Heo <tj(a)kernel.org>
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 2ccfae74acf9..166a45019f66 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2453,6 +2453,20 @@ static int fmeter_getrate(struct fmeter *fmp)
static struct cpuset *cpuset_attach_old_cs;
+/*
+ * Check to see if a cpuset can accept a new task
+ * For v1, cpus_allowed and mems_allowed can't be empty.
+ * For v2, effective_cpus can't be empty.
+ * Note that in v1, effective_cpus = cpus_allowed.
+ */
+static int cpuset_can_attach_check(struct cpuset *cs)
+{
+ if (cpumask_empty(cs->effective_cpus) ||
+ (!is_in_v2_mode() && nodes_empty(cs->mems_allowed)))
+ return -ENOSPC;
+ return 0;
+}
+
/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
static int cpuset_can_attach(struct cgroup_taskset *tset)
{
@@ -2467,16 +2481,9 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
percpu_down_write(&cpuset_rwsem);
- /* allow moving tasks into an empty cpuset if on default hierarchy */
- ret = -ENOSPC;
- if (!is_in_v2_mode() &&
- (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
- goto out_unlock;
-
- /*
- * Task cannot be moved to a cpuset with empty effective cpus.
- */
- if (cpumask_empty(cs->effective_cpus))
+ /* Check to see if task is allowed in the cpuset */
+ ret = cpuset_can_attach_check(cs);
+ if (ret)
goto out_unlock;
cgroup_taskset_for_each(task, css, tset) {
@@ -2493,7 +2500,6 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
* changes which zero cpus/mems_allowed.
*/
cs->attach_in_progress++;
- ret = 0;
out_unlock:
percpu_up_write(&cpuset_rwsem);
return ret;
@@ -3264,6 +3270,68 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
percpu_up_write(&cpuset_rwsem);
}
+/*
+ * In case the child is cloned into a cpuset different from its parent,
+ * additional checks are done to see if the move is allowed.
+ */
+static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
+{
+ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
+ bool same_cs;
+ int ret;
+
+ rcu_read_lock();
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs)
+ return 0;
+
+ lockdep_assert_held(&cgroup_mutex);
+ percpu_down_write(&cpuset_rwsem);
+
+ /* Check to see if task is allowed in the cpuset */
+ ret = cpuset_can_attach_check(cs);
+ if (ret)
+ goto out_unlock;
+
+ ret = task_can_attach(task, cs->effective_cpus);
+ if (ret)
+ goto out_unlock;
+
+ ret = security_task_setscheduler(task);
+ if (ret)
+ goto out_unlock;
+
+ /*
+ * Mark attach is in progress. This makes validate_change() fail
+ * changes which zero cpus/mems_allowed.
+ */
+ cs->attach_in_progress++;
+out_unlock:
+ percpu_up_write(&cpuset_rwsem);
+ return ret;
+}
+
+static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
+{
+ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
+ bool same_cs;
+
+ rcu_read_lock();
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs)
+ return;
+
+ percpu_down_write(&cpuset_rwsem);
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
+ percpu_up_write(&cpuset_rwsem);
+}
+
/*
* Make sure the new task conform to the current state of its parent,
* which could have been changed by cpuset just after it inherits the
@@ -3292,6 +3360,11 @@ static void cpuset_fork(struct task_struct *task)
percpu_down_write(&cpuset_rwsem);
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
cpuset_attach_task(cs, task);
+
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
+
percpu_up_write(&cpuset_rwsem);
}
@@ -3305,6 +3378,8 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.attach = cpuset_attach,
.post_attach = cpuset_post_attach,
.bind = cpuset_bind,
+ .can_fork = cpuset_can_fork,
+ .cancel_fork = cpuset_cancel_fork,
.fork = cpuset_fork,
.legacy_cftypes = legacy_files,
.dfl_cftypes = dfl_files,
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x eee87853794187f6adbe19533ed79c8b44b36a91
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041703-wildland-privacy-e6d6@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
eee878537941 ("cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods")
42a11bf5c543 ("cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly")
18f9a4d47527 ("cgroup/cpuset: Skip spread flags update on v2")
e2d59900d936 ("cgroup/cpuset: Allow no-task partition to have empty cpuset.cpus.effective")
18065ebe9b33 ("cgroup/cpuset: Miscellaneous cleanups & add helper functions")
f9da322e864e ("cgroup: cleanup comments")
8ca1b5a49885 ("mm/page_alloc: detect allocation forbidden by cpuset and bail out early")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From eee87853794187f6adbe19533ed79c8b44b36a91 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman(a)redhat.com>
Date: Tue, 11 Apr 2023 09:35:59 -0400
Subject: [PATCH] cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork()
methods
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In the case of CLONE_INTO_CGROUP, not all cpusets are ready to accept
new tasks. It is too late to check that in cpuset_fork(). So we need
to add the cpuset_can_fork() and cpuset_cancel_fork() methods to
pre-check it before we can allow attachment to a different cpuset.
We also need to set the attach_in_progress flag to alert other code
that a new task is going to be added to the cpuset.
Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
Suggested-by: Michal Koutný <mkoutny(a)suse.com>
Signed-off-by: Waiman Long <longman(a)redhat.com>
Cc: stable(a)vger.kernel.org # v5.7+
Signed-off-by: Tejun Heo <tj(a)kernel.org>
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 2ccfae74acf9..166a45019f66 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2453,6 +2453,20 @@ static int fmeter_getrate(struct fmeter *fmp)
static struct cpuset *cpuset_attach_old_cs;
+/*
+ * Check to see if a cpuset can accept a new task
+ * For v1, cpus_allowed and mems_allowed can't be empty.
+ * For v2, effective_cpus can't be empty.
+ * Note that in v1, effective_cpus = cpus_allowed.
+ */
+static int cpuset_can_attach_check(struct cpuset *cs)
+{
+ if (cpumask_empty(cs->effective_cpus) ||
+ (!is_in_v2_mode() && nodes_empty(cs->mems_allowed)))
+ return -ENOSPC;
+ return 0;
+}
+
/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
static int cpuset_can_attach(struct cgroup_taskset *tset)
{
@@ -2467,16 +2481,9 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
percpu_down_write(&cpuset_rwsem);
- /* allow moving tasks into an empty cpuset if on default hierarchy */
- ret = -ENOSPC;
- if (!is_in_v2_mode() &&
- (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
- goto out_unlock;
-
- /*
- * Task cannot be moved to a cpuset with empty effective cpus.
- */
- if (cpumask_empty(cs->effective_cpus))
+ /* Check to see if task is allowed in the cpuset */
+ ret = cpuset_can_attach_check(cs);
+ if (ret)
goto out_unlock;
cgroup_taskset_for_each(task, css, tset) {
@@ -2493,7 +2500,6 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
* changes which zero cpus/mems_allowed.
*/
cs->attach_in_progress++;
- ret = 0;
out_unlock:
percpu_up_write(&cpuset_rwsem);
return ret;
@@ -3264,6 +3270,68 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
percpu_up_write(&cpuset_rwsem);
}
+/*
+ * In case the child is cloned into a cpuset different from its parent,
+ * additional checks are done to see if the move is allowed.
+ */
+static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
+{
+ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
+ bool same_cs;
+ int ret;
+
+ rcu_read_lock();
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs)
+ return 0;
+
+ lockdep_assert_held(&cgroup_mutex);
+ percpu_down_write(&cpuset_rwsem);
+
+ /* Check to see if task is allowed in the cpuset */
+ ret = cpuset_can_attach_check(cs);
+ if (ret)
+ goto out_unlock;
+
+ ret = task_can_attach(task, cs->effective_cpus);
+ if (ret)
+ goto out_unlock;
+
+ ret = security_task_setscheduler(task);
+ if (ret)
+ goto out_unlock;
+
+ /*
+ * Mark attach is in progress. This makes validate_change() fail
+ * changes which zero cpus/mems_allowed.
+ */
+ cs->attach_in_progress++;
+out_unlock:
+ percpu_up_write(&cpuset_rwsem);
+ return ret;
+}
+
+static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
+{
+ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
+ bool same_cs;
+
+ rcu_read_lock();
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs)
+ return;
+
+ percpu_down_write(&cpuset_rwsem);
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
+ percpu_up_write(&cpuset_rwsem);
+}
+
/*
* Make sure the new task conform to the current state of its parent,
* which could have been changed by cpuset just after it inherits the
@@ -3292,6 +3360,11 @@ static void cpuset_fork(struct task_struct *task)
percpu_down_write(&cpuset_rwsem);
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
cpuset_attach_task(cs, task);
+
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
+
percpu_up_write(&cpuset_rwsem);
}
@@ -3305,6 +3378,8 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.attach = cpuset_attach,
.post_attach = cpuset_post_attach,
.bind = cpuset_bind,
+ .can_fork = cpuset_can_fork,
+ .cancel_fork = cpuset_cancel_fork,
.fork = cpuset_fork,
.legacy_cftypes = legacy_files,
.dfl_cftypes = dfl_files,
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x d83806c4c0cccc0d6d3c3581a11983a9c186a138
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041747-monogram-playpen-8e77@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
d83806c4c0cc ("purgatory: fix disabling debug info")
56e0790c7f9e ("RISC-V: add infrastructure to allow different str* implementations")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d83806c4c0cccc0d6d3c3581a11983a9c186a138 Mon Sep 17 00:00:00 2001
From: Alyssa Ross <hi(a)alyssa.is>
Date: Sun, 26 Mar 2023 18:21:21 +0000
Subject: [PATCH] purgatory: fix disabling debug info
Since 32ef9e5054ec, -Wa,-gdwarf-2 is no longer used in KBUILD_AFLAGS.
Instead, it includes -g, the appropriate -gdwarf-* flag, and also the
-Wa versions of both of those if building with Clang and GNU as. As a
result, debug info was being generated for the purgatory objects, even
though the intention was that it not be.
Fixes: 32ef9e5054ec ("Makefile.debug: re-enable debug info for .S files")
Signed-off-by: Alyssa Ross <hi(a)alyssa.is>
Cc: stable(a)vger.kernel.org
Acked-by: Nick Desaulniers <ndesaulniers(a)google.com>
Signed-off-by: Masahiro Yamada <masahiroy(a)kernel.org>
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index d16bf715a586..5730797a6b40 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -84,12 +84,7 @@ CFLAGS_string.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_ctype.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_ctype.o += $(PURGATORY_CFLAGS)
-AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2
+asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 17f09dc26381..82fec66d46d2 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -69,8 +69,7 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_string.o += $(PURGATORY_CFLAGS)
-AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2
+asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x eee87853794187f6adbe19533ed79c8b44b36a91
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041702-vertebrae-bonsai-f1de@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
eee878537941 ("cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods")
42a11bf5c543 ("cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly")
18f9a4d47527 ("cgroup/cpuset: Skip spread flags update on v2")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From eee87853794187f6adbe19533ed79c8b44b36a91 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman(a)redhat.com>
Date: Tue, 11 Apr 2023 09:35:59 -0400
Subject: [PATCH] cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork()
methods
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
In the case of CLONE_INTO_CGROUP, not all cpusets are ready to accept
new tasks. It is too late to check that in cpuset_fork(). So we need
to add the cpuset_can_fork() and cpuset_cancel_fork() methods to
pre-check it before we can allow attachment to a different cpuset.
We also need to set the attach_in_progress flag to alert other code
that a new task is going to be added to the cpuset.
Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
Suggested-by: Michal Koutný <mkoutny(a)suse.com>
Signed-off-by: Waiman Long <longman(a)redhat.com>
Cc: stable(a)vger.kernel.org # v5.7+
Signed-off-by: Tejun Heo <tj(a)kernel.org>
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 2ccfae74acf9..166a45019f66 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2453,6 +2453,20 @@ static int fmeter_getrate(struct fmeter *fmp)
static struct cpuset *cpuset_attach_old_cs;
+/*
+ * Check to see if a cpuset can accept a new task
+ * For v1, cpus_allowed and mems_allowed can't be empty.
+ * For v2, effective_cpus can't be empty.
+ * Note that in v1, effective_cpus = cpus_allowed.
+ */
+static int cpuset_can_attach_check(struct cpuset *cs)
+{
+ if (cpumask_empty(cs->effective_cpus) ||
+ (!is_in_v2_mode() && nodes_empty(cs->mems_allowed)))
+ return -ENOSPC;
+ return 0;
+}
+
/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
static int cpuset_can_attach(struct cgroup_taskset *tset)
{
@@ -2467,16 +2481,9 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
percpu_down_write(&cpuset_rwsem);
- /* allow moving tasks into an empty cpuset if on default hierarchy */
- ret = -ENOSPC;
- if (!is_in_v2_mode() &&
- (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
- goto out_unlock;
-
- /*
- * Task cannot be moved to a cpuset with empty effective cpus.
- */
- if (cpumask_empty(cs->effective_cpus))
+ /* Check to see if task is allowed in the cpuset */
+ ret = cpuset_can_attach_check(cs);
+ if (ret)
goto out_unlock;
cgroup_taskset_for_each(task, css, tset) {
@@ -2493,7 +2500,6 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
* changes which zero cpus/mems_allowed.
*/
cs->attach_in_progress++;
- ret = 0;
out_unlock:
percpu_up_write(&cpuset_rwsem);
return ret;
@@ -3264,6 +3270,68 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
percpu_up_write(&cpuset_rwsem);
}
+/*
+ * In case the child is cloned into a cpuset different from its parent,
+ * additional checks are done to see if the move is allowed.
+ */
+static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
+{
+ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
+ bool same_cs;
+ int ret;
+
+ rcu_read_lock();
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs)
+ return 0;
+
+ lockdep_assert_held(&cgroup_mutex);
+ percpu_down_write(&cpuset_rwsem);
+
+ /* Check to see if task is allowed in the cpuset */
+ ret = cpuset_can_attach_check(cs);
+ if (ret)
+ goto out_unlock;
+
+ ret = task_can_attach(task, cs->effective_cpus);
+ if (ret)
+ goto out_unlock;
+
+ ret = security_task_setscheduler(task);
+ if (ret)
+ goto out_unlock;
+
+ /*
+ * Mark attach is in progress. This makes validate_change() fail
+ * changes which zero cpus/mems_allowed.
+ */
+ cs->attach_in_progress++;
+out_unlock:
+ percpu_up_write(&cpuset_rwsem);
+ return ret;
+}
+
+static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
+{
+ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
+ bool same_cs;
+
+ rcu_read_lock();
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs)
+ return;
+
+ percpu_down_write(&cpuset_rwsem);
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
+ percpu_up_write(&cpuset_rwsem);
+}
+
/*
* Make sure the new task conform to the current state of its parent,
* which could have been changed by cpuset just after it inherits the
@@ -3292,6 +3360,11 @@ static void cpuset_fork(struct task_struct *task)
percpu_down_write(&cpuset_rwsem);
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
cpuset_attach_task(cs, task);
+
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
+
percpu_up_write(&cpuset_rwsem);
}
@@ -3305,6 +3378,8 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.attach = cpuset_attach,
.post_attach = cpuset_post_attach,
.bind = cpuset_bind,
+ .can_fork = cpuset_can_fork,
+ .cancel_fork = cpuset_cancel_fork,
.fork = cpuset_fork,
.legacy_cftypes = legacy_files,
.dfl_cftypes = dfl_files,
The patch below does not apply to the 6.2-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.2.y
git checkout FETCH_HEAD
git cherry-pick -x d83806c4c0cccc0d6d3c3581a11983a9c186a138
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041746-daybed-posing-b660@gregkh' --subject-prefix 'PATCH 6.2.y' HEAD^..
Possible dependencies:
d83806c4c0cc ("purgatory: fix disabling debug info")
56e0790c7f9e ("RISC-V: add infrastructure to allow different str* implementations")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d83806c4c0cccc0d6d3c3581a11983a9c186a138 Mon Sep 17 00:00:00 2001
From: Alyssa Ross <hi(a)alyssa.is>
Date: Sun, 26 Mar 2023 18:21:21 +0000
Subject: [PATCH] purgatory: fix disabling debug info
Since 32ef9e5054ec, -Wa,-gdwarf-2 is no longer used in KBUILD_AFLAGS.
Instead, it includes -g, the appropriate -gdwarf-* flag, and also the
-Wa versions of both of those if building with Clang and GNU as. As a
result, debug info was being generated for the purgatory objects, even
though the intention was that it not be.
Fixes: 32ef9e5054ec ("Makefile.debug: re-enable debug info for .S files")
Signed-off-by: Alyssa Ross <hi(a)alyssa.is>
Cc: stable(a)vger.kernel.org
Acked-by: Nick Desaulniers <ndesaulniers(a)google.com>
Signed-off-by: Masahiro Yamada <masahiroy(a)kernel.org>
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index d16bf715a586..5730797a6b40 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -84,12 +84,7 @@ CFLAGS_string.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_ctype.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_ctype.o += $(PURGATORY_CFLAGS)
-AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2
+asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 17f09dc26381..82fec66d46d2 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -69,8 +69,7 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_string.o += $(PURGATORY_CFLAGS)
-AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2
+asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
During a suspend/resume cycle the VO power domain will be disabled and
the VOP2 registers will reset to their default values. After that the
cached register values will be out of sync and the read/modify/write
operations we do on the window registers will result in bogus values
written. Fix this by marking the regcache as dirty each time we disable
the VOP2 and call regcache_sync() each time we enable it again. With
this the VOP2 will show a picture after a suspend/resume cycle whereas
without this the screen stays dark.
Fixes: 604be85547ce4 ("drm/rockchip: Add VOP2 driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sascha Hauer <s.hauer(a)pengutronix.de>
---
Changes since v1:
- Use regcache_mark_dirty()/regcache_sync() instead of regmap_reinit_cache()
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index ba3b817895091..293c228a83f90 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -839,6 +839,8 @@ static void vop2_enable(struct vop2 *vop2)
return;
}
+ regcache_sync(vop2->map);
+
if (vop2->data->soc_id == 3566)
vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
@@ -867,6 +869,8 @@ static void vop2_disable(struct vop2 *vop2)
pm_runtime_put_sync(vop2->dev);
+ regcache_mark_dirty(vop2->map);
+
clk_disable_unprepare(vop2->aclk);
clk_disable_unprepare(vop2->hclk);
}
--
2.39.2
Per-vcpu flags are updated using a non-atomic RMW operation.
Which means it is possible to get preempted between the read and
write operations.
Another interesting thing to note is that preemption also updates
flags, as we have some flag manipulation in both the load and put
operations.
It is thus possible to lose information communicated by either
load or put, as the preempted flag update will overwrite the flags
when the thread is resumed. This is specially critical if either
load or put has stored information which depends on the physical
CPU the vcpu runs on.
This results in really elusive bugs, and kudos must be given to
Mostafa for the long hours of debugging, and finally spotting
the problem.
Fixes: e87abb73e594 ("KVM: arm64: Add helpers to manipulate vcpu flags among a set")
Reported-by: Mostafa Saleh <smostafa(a)google.com>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
arch/arm64/include/asm/kvm_host.h | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index bcd774d74f34..d716cfd806e8 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -579,6 +579,19 @@ struct kvm_vcpu_arch {
v->arch.flagset & (m); \
})
+/*
+ * Note that the set/clear accessors must be preempt-safe in order to
+ * avoid nesting them with load/put which also manipulate flags...
+ */
+#ifdef __KVM_NVHE_HYPERVISOR__
+/* the nVHE hypervisor is always non-preemptible */
+#define __vcpu_flags_preempt_disable()
+#define __vcpu_flags_preempt_enable()
+#else
+#define __vcpu_flags_preempt_disable() preempt_disable()
+#define __vcpu_flags_preempt_enable() preempt_enable()
+#endif
+
#define __vcpu_set_flag(v, flagset, f, m) \
do { \
typeof(v->arch.flagset) *fset; \
@@ -586,9 +599,11 @@ struct kvm_vcpu_arch {
__build_check_flag(v, flagset, f, m); \
\
fset = &v->arch.flagset; \
+ __vcpu_flags_preempt_disable(); \
if (HWEIGHT(m) > 1) \
*fset &= ~(m); \
*fset |= (f); \
+ __vcpu_flags_preempt_enable(); \
} while (0)
#define __vcpu_clear_flag(v, flagset, f, m) \
@@ -598,7 +613,9 @@ struct kvm_vcpu_arch {
__build_check_flag(v, flagset, f, m); \
\
fset = &v->arch.flagset; \
+ __vcpu_flags_preempt_disable(); \
*fset &= ~(m); \
+ __vcpu_flags_preempt_enable(); \
} while (0)
#define vcpu_get_flag(v, ...) __vcpu_get_flag((v), __VA_ARGS__)
--
2.34.1
On my RTW8821CU chipset rfe_option reads as 0x22. Looking at the
downstream driver suggests that the field width of rfe_option is 5 bit,
so rfe_option should be masked with 0x1f.
Without this the rfe_option comparisons with 2 further down the
driver evaluate as false when they should really evaluate as true.
The effect is that 2G channels do not work.
rfe_option is also used as an array index into rtw8821c_rfe_defs[].
rtw8821c_rfe_defs[34] (0x22) was added as part of adding USB support,
likely because rfe_option reads as 0x22. As this now becomes 0x2,
rtw8821c_rfe_defs[34] is no longer used and can be removed.
Signed-off-by: Sascha Hauer <s.hauer(a)pengutronix.de>
Tested-by: ValdikSS <iam(a)valdikss.org.ru>
Tested-by: Alexandru gagniuc <mr.nuke.me(a)gmail.com>
Tested-by: Larry Finger <Larry.Finger(a)lwfinger.net>
Cc: stable(a)vger.kernel.org
---
drivers/net/wireless/realtek/rtw88/rtw8821c.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.c b/drivers/net/wireless/realtek/rtw88/rtw8821c.c
index 17f800f6efbd0..67efa58dd78ee 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8821c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.c
@@ -47,7 +47,7 @@ static int rtw8821c_read_efuse(struct rtw_dev *rtwdev, u8 *log_map)
map = (struct rtw8821c_efuse *)log_map;
- efuse->rfe_option = map->rfe_option;
+ efuse->rfe_option = map->rfe_option & 0x1f;
efuse->rf_board_option = map->rf_board_option;
efuse->crystal_cap = map->xtal_k;
efuse->pa_type_2g = map->pa_type;
@@ -1537,7 +1537,6 @@ static const struct rtw_rfe_def rtw8821c_rfe_defs[] = {
[2] = RTW_DEF_RFE_EXT(8821c, 0, 0, 2),
[4] = RTW_DEF_RFE_EXT(8821c, 0, 0, 2),
[6] = RTW_DEF_RFE(8821c, 0, 0),
- [34] = RTW_DEF_RFE(8821c, 0, 0),
};
static struct rtw_hw_reg rtw8821c_dig[] = {
--
2.39.2
During a suspend/resume cycle the VO power domain will be disabled and
the VOP2 registers will reset to their default values. After that the
cached register values will be out of sync and the read/modify/write
operations we do on the window registers will result in bogus values
written. Fix this by re-initializing the register cache each time we
enable the VOP2. With this the VOP2 will show a picture after a
suspend/resume cycle whereas without this the screen stays dark.
Fixes: 604be85547ce4 ("drm/rockchip: Add VOP2 driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Sascha Hauer <s.hauer(a)pengutronix.de>
---
drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index ba3b817895091..d9daa686b014d 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -215,6 +215,8 @@ struct vop2 {
struct vop2_win win[];
};
+static const struct regmap_config vop2_regmap_config;
+
static struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc)
{
return container_of(crtc, struct vop2_video_port, crtc);
@@ -839,6 +841,12 @@ static void vop2_enable(struct vop2 *vop2)
return;
}
+ ret = regmap_reinit_cache(vop2->map, &vop2_regmap_config);
+ if (ret) {
+ drm_err(vop2->drm, "failed to reinit cache: %d\n", ret);
+ return;
+ }
+
if (vop2->data->soc_id == 3566)
vop2_writel(vop2, RK3568_OTP_WIN_EN, 1);
--
2.39.2
Assert PCI Configuration Enable bit after probe. When this bit is left to
0 in the endpoint mode, the RK3399 PCIe endpoint core will generate
configuration request retry status (CRS) messages back to the root complex.
Assert this bit after probe to allow the RK3399 PCIe endpoint core to reply
to configuration requests from the root complex.
This is documented in section 17.5.8.1.2 of the RK3399 TRM.
Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller")
Cc: stable(a)vger.kernel.org
Signed-off-by: Rick Wertenbroek <rick.wertenbroek(a)gmail.com>
Reviewed-by: Damien Le Moal <dlemoal(a)kernel.org>
Tested-by: Damien Le Moal <dlemoal(a)kernel.org>
---
drivers/pci/controller/pcie-rockchip-ep.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c
index 9b835377bd9e..4c84e403e155 100644
--- a/drivers/pci/controller/pcie-rockchip-ep.c
+++ b/drivers/pci/controller/pcie-rockchip-ep.c
@@ -623,6 +623,8 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev)
ep->irq_pci_addr = ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR;
+ rockchip_pcie_write(rockchip, PCIE_CLIENT_CONF_ENABLE, PCIE_CLIENT_CONFIG);
+
return 0;
err_epc_mem_exit:
pci_epc_mem_exit(epc);
--
2.25.1
brcmfmac has some messages that are KERN_ERR even though they are
harmless. This is spooking and confusing people, because they end up
being the *only* kernel messages on their boot console with common
error-only printk levels (at least on Apple Macs).
Then, when their system does not boot to a GUI for some other reason,
the brcmfmac errors are the only thing on their TTY (which also does
not show a login prompt on tty1 in typical systemd setups) and they are
thoroughly confused into believing their problem has something to do
with brcmfmac.
Seriously, I've had 10 or so people mention this by now, and multiple
confused Reddit threads about it. Let's fix it.
Signed-off-by: Hector Martin <marcan(a)marcan.st>
---
Hector Martin (2):
wifi: brcmfmac: Demote vendor-specific attach/detach messages to info
wifi: brcmfmac: Demote p2p unknown frame error to info (once)
drivers/net/wireless/broadcom/brcm80211/brcmfmac/bca/core.c | 4 ++--
drivers/net/wireless/broadcom/brcm80211/brcmfmac/cyw/core.c | 4 ++--
drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 4 ++--
drivers/net/wireless/broadcom/brcm80211/brcmfmac/wcc/core.c | 4 ++--
4 files changed, 8 insertions(+), 8 deletions(-)
---
base-commit: fe15c26ee26efa11741a7b632e9f23b01aca4cc6
change-id: 20230416-brcmfmac-noise-2bae83836f2d
Best regards,
--
Hector Martin <marcan(a)marcan.st>
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 42a11bf5c5436e91b040aeb04063be1710bb9f9c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041751-latter-oaf-71e3@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
42a11bf5c543 ("cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly")
18f9a4d47527 ("cgroup/cpuset: Skip spread flags update on v2")
b94f9ac79a73 ("cgroup/cpuset: Change references of cpuset_mutex to cpuset_rwsem")
69dc8010b8fc ("Merge branch 'for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 42a11bf5c5436e91b040aeb04063be1710bb9f9c Mon Sep 17 00:00:00 2001
From: Waiman Long <longman(a)redhat.com>
Date: Tue, 11 Apr 2023 09:35:58 -0400
Subject: [PATCH] cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP
properly
By default, the clone(2) syscall spawn a child process into the same
cgroup as its parent. With the use of the CLONE_INTO_CGROUP flag
introduced by commit ef2c41cf38a7 ("clone3: allow spawning processes
into cgroups"), the child will be spawned into a different cgroup which
is somewhat similar to writing the child's tid into "cgroup.threads".
The current cpuset_fork() method does not properly handle the
CLONE_INTO_CGROUP case where the cpuset of the child may be different
from that of its parent. Update the cpuset_fork() method to treat the
CLONE_INTO_CGROUP case similar to cpuset_attach().
Since the newly cloned task has not been running yet, its actual
memory usage isn't known. So it is not necessary to make change to mm
in cpuset_fork().
Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
Reported-by: Giuseppe Scrivano <gscrivan(a)redhat.com>
Signed-off-by: Waiman Long <longman(a)redhat.com>
Cc: stable(a)vger.kernel.org # v5.7+
Signed-off-by: Tejun Heo <tj(a)kernel.org>
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index ff7eb8e2efdc..2ccfae74acf9 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2515,16 +2515,33 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
}
/*
- * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
+ * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
* but we can't allocate it dynamically there. Define it global and
* allocate from cpuset_init().
*/
static cpumask_var_t cpus_attach;
+static nodemask_t cpuset_attach_nodemask_to;
+
+static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
+{
+ percpu_rwsem_assert_held(&cpuset_rwsem);
+
+ if (cs != &top_cpuset)
+ guarantee_online_cpus(task, cpus_attach);
+ else
+ cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
+ /*
+ * can_attach beforehand should guarantee that this doesn't
+ * fail. TODO: have a better way to handle failure here
+ */
+ WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
+
+ cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
+ cpuset_update_task_spread_flags(cs, task);
+}
static void cpuset_attach(struct cgroup_taskset *tset)
{
- /* static buf protected by cpuset_rwsem */
- static nodemask_t cpuset_attach_nodemask_to;
struct task_struct *task;
struct task_struct *leader;
struct cgroup_subsys_state *css;
@@ -2555,20 +2572,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
- cgroup_taskset_for_each(task, css, tset) {
- if (cs != &top_cpuset)
- guarantee_online_cpus(task, cpus_attach);
- else
- cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
- /*
- * can_attach beforehand should guarantee that this doesn't
- * fail. TODO: have a better way to handle failure here
- */
- WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
-
- cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
- cpuset_update_task_spread_flags(cs, task);
- }
+ cgroup_taskset_for_each(task, css, tset)
+ cpuset_attach_task(cs, task);
/*
* Change mm for all threadgroup leaders. This is expensive and may
@@ -3266,11 +3271,28 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
*/
static void cpuset_fork(struct task_struct *task)
{
- if (task_css_is_root(task, cpuset_cgrp_id))
+ struct cpuset *cs;
+ bool same_cs;
+
+ rcu_read_lock();
+ cs = task_cs(task);
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs) {
+ if (cs == &top_cpuset)
+ return;
+
+ set_cpus_allowed_ptr(task, current->cpus_ptr);
+ task->mems_allowed = current->mems_allowed;
return;
+ }
- set_cpus_allowed_ptr(task, current->cpus_ptr);
- task->mems_allowed = current->mems_allowed;
+ /* CLONE_INTO_CGROUP */
+ percpu_down_write(&cpuset_rwsem);
+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+ cpuset_attach_task(cs, task);
+ percpu_up_write(&cpuset_rwsem);
}
struct cgroup_subsys cpuset_cgrp_subsys = {
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 42a11bf5c5436e91b040aeb04063be1710bb9f9c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041750-matchbook-operable-3285@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
42a11bf5c543 ("cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly")
18f9a4d47527 ("cgroup/cpuset: Skip spread flags update on v2")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 42a11bf5c5436e91b040aeb04063be1710bb9f9c Mon Sep 17 00:00:00 2001
From: Waiman Long <longman(a)redhat.com>
Date: Tue, 11 Apr 2023 09:35:58 -0400
Subject: [PATCH] cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP
properly
By default, the clone(2) syscall spawn a child process into the same
cgroup as its parent. With the use of the CLONE_INTO_CGROUP flag
introduced by commit ef2c41cf38a7 ("clone3: allow spawning processes
into cgroups"), the child will be spawned into a different cgroup which
is somewhat similar to writing the child's tid into "cgroup.threads".
The current cpuset_fork() method does not properly handle the
CLONE_INTO_CGROUP case where the cpuset of the child may be different
from that of its parent. Update the cpuset_fork() method to treat the
CLONE_INTO_CGROUP case similar to cpuset_attach().
Since the newly cloned task has not been running yet, its actual
memory usage isn't known. So it is not necessary to make change to mm
in cpuset_fork().
Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
Reported-by: Giuseppe Scrivano <gscrivan(a)redhat.com>
Signed-off-by: Waiman Long <longman(a)redhat.com>
Cc: stable(a)vger.kernel.org # v5.7+
Signed-off-by: Tejun Heo <tj(a)kernel.org>
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index ff7eb8e2efdc..2ccfae74acf9 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2515,16 +2515,33 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
}
/*
- * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
+ * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
* but we can't allocate it dynamically there. Define it global and
* allocate from cpuset_init().
*/
static cpumask_var_t cpus_attach;
+static nodemask_t cpuset_attach_nodemask_to;
+
+static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
+{
+ percpu_rwsem_assert_held(&cpuset_rwsem);
+
+ if (cs != &top_cpuset)
+ guarantee_online_cpus(task, cpus_attach);
+ else
+ cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
+ /*
+ * can_attach beforehand should guarantee that this doesn't
+ * fail. TODO: have a better way to handle failure here
+ */
+ WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
+
+ cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
+ cpuset_update_task_spread_flags(cs, task);
+}
static void cpuset_attach(struct cgroup_taskset *tset)
{
- /* static buf protected by cpuset_rwsem */
- static nodemask_t cpuset_attach_nodemask_to;
struct task_struct *task;
struct task_struct *leader;
struct cgroup_subsys_state *css;
@@ -2555,20 +2572,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
- cgroup_taskset_for_each(task, css, tset) {
- if (cs != &top_cpuset)
- guarantee_online_cpus(task, cpus_attach);
- else
- cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
- /*
- * can_attach beforehand should guarantee that this doesn't
- * fail. TODO: have a better way to handle failure here
- */
- WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
-
- cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
- cpuset_update_task_spread_flags(cs, task);
- }
+ cgroup_taskset_for_each(task, css, tset)
+ cpuset_attach_task(cs, task);
/*
* Change mm for all threadgroup leaders. This is expensive and may
@@ -3266,11 +3271,28 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
*/
static void cpuset_fork(struct task_struct *task)
{
- if (task_css_is_root(task, cpuset_cgrp_id))
+ struct cpuset *cs;
+ bool same_cs;
+
+ rcu_read_lock();
+ cs = task_cs(task);
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs) {
+ if (cs == &top_cpuset)
+ return;
+
+ set_cpus_allowed_ptr(task, current->cpus_ptr);
+ task->mems_allowed = current->mems_allowed;
return;
+ }
- set_cpus_allowed_ptr(task, current->cpus_ptr);
- task->mems_allowed = current->mems_allowed;
+ /* CLONE_INTO_CGROUP */
+ percpu_down_write(&cpuset_rwsem);
+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+ cpuset_attach_task(cs, task);
+ percpu_up_write(&cpuset_rwsem);
}
struct cgroup_subsys cpuset_cgrp_subsys = {
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 42a11bf5c5436e91b040aeb04063be1710bb9f9c
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041748-juniper-slogan-cfd2@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
42a11bf5c543 ("cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly")
18f9a4d47527 ("cgroup/cpuset: Skip spread flags update on v2")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 42a11bf5c5436e91b040aeb04063be1710bb9f9c Mon Sep 17 00:00:00 2001
From: Waiman Long <longman(a)redhat.com>
Date: Tue, 11 Apr 2023 09:35:58 -0400
Subject: [PATCH] cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP
properly
By default, the clone(2) syscall spawn a child process into the same
cgroup as its parent. With the use of the CLONE_INTO_CGROUP flag
introduced by commit ef2c41cf38a7 ("clone3: allow spawning processes
into cgroups"), the child will be spawned into a different cgroup which
is somewhat similar to writing the child's tid into "cgroup.threads".
The current cpuset_fork() method does not properly handle the
CLONE_INTO_CGROUP case where the cpuset of the child may be different
from that of its parent. Update the cpuset_fork() method to treat the
CLONE_INTO_CGROUP case similar to cpuset_attach().
Since the newly cloned task has not been running yet, its actual
memory usage isn't known. So it is not necessary to make change to mm
in cpuset_fork().
Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
Reported-by: Giuseppe Scrivano <gscrivan(a)redhat.com>
Signed-off-by: Waiman Long <longman(a)redhat.com>
Cc: stable(a)vger.kernel.org # v5.7+
Signed-off-by: Tejun Heo <tj(a)kernel.org>
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index ff7eb8e2efdc..2ccfae74acf9 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2515,16 +2515,33 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
}
/*
- * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
+ * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
* but we can't allocate it dynamically there. Define it global and
* allocate from cpuset_init().
*/
static cpumask_var_t cpus_attach;
+static nodemask_t cpuset_attach_nodemask_to;
+
+static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
+{
+ percpu_rwsem_assert_held(&cpuset_rwsem);
+
+ if (cs != &top_cpuset)
+ guarantee_online_cpus(task, cpus_attach);
+ else
+ cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
+ /*
+ * can_attach beforehand should guarantee that this doesn't
+ * fail. TODO: have a better way to handle failure here
+ */
+ WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
+
+ cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
+ cpuset_update_task_spread_flags(cs, task);
+}
static void cpuset_attach(struct cgroup_taskset *tset)
{
- /* static buf protected by cpuset_rwsem */
- static nodemask_t cpuset_attach_nodemask_to;
struct task_struct *task;
struct task_struct *leader;
struct cgroup_subsys_state *css;
@@ -2555,20 +2572,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
- cgroup_taskset_for_each(task, css, tset) {
- if (cs != &top_cpuset)
- guarantee_online_cpus(task, cpus_attach);
- else
- cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
- /*
- * can_attach beforehand should guarantee that this doesn't
- * fail. TODO: have a better way to handle failure here
- */
- WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
-
- cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
- cpuset_update_task_spread_flags(cs, task);
- }
+ cgroup_taskset_for_each(task, css, tset)
+ cpuset_attach_task(cs, task);
/*
* Change mm for all threadgroup leaders. This is expensive and may
@@ -3266,11 +3271,28 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
*/
static void cpuset_fork(struct task_struct *task)
{
- if (task_css_is_root(task, cpuset_cgrp_id))
+ struct cpuset *cs;
+ bool same_cs;
+
+ rcu_read_lock();
+ cs = task_cs(task);
+ same_cs = (cs == task_cs(current));
+ rcu_read_unlock();
+
+ if (same_cs) {
+ if (cs == &top_cpuset)
+ return;
+
+ set_cpus_allowed_ptr(task, current->cpus_ptr);
+ task->mems_allowed = current->mems_allowed;
return;
+ }
- set_cpus_allowed_ptr(task, current->cpus_ptr);
- task->mems_allowed = current->mems_allowed;
+ /* CLONE_INTO_CGROUP */
+ percpu_down_write(&cpuset_rwsem);
+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+ cpuset_attach_task(cs, task);
+ percpu_up_write(&cpuset_rwsem);
}
struct cgroup_subsys cpuset_cgrp_subsys = {
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x ba9182a89626d5f83c2ee4594f55cb9c1e60f0e2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041704-banister-dreamt-9dfd@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
ba9182a89626 ("cgroup/cpuset: Wake up cpuset_attach_wq tasks in cpuset_cancel_attach()")
1243dc518c9d ("cgroup/cpuset: Convert cpuset_mutex to percpu_rwsem")
f9a25f776d78 ("cpusets: Rebuild root domain deadline accounting information")
933a90bf4f35 ("Merge branch 'work.mount0' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ba9182a89626d5f83c2ee4594f55cb9c1e60f0e2 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman(a)redhat.com>
Date: Tue, 11 Apr 2023 09:35:57 -0400
Subject: [PATCH] cgroup/cpuset: Wake up cpuset_attach_wq tasks in
cpuset_cancel_attach()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
After a successful cpuset_can_attach() call which increments the
attach_in_progress flag, either cpuset_cancel_attach() or cpuset_attach()
will be called later. In cpuset_attach(), tasks in cpuset_attach_wq,
if present, will be woken up at the end. That is not the case in
cpuset_cancel_attach(). So missed wakeup is possible if the attach
operation is somehow cancelled. Fix that by doing the wakeup in
cpuset_cancel_attach() as well.
Fixes: e44193d39e8d ("cpuset: let hotplug propagation work wait for task attaching")
Signed-off-by: Waiman Long <longman(a)redhat.com>
Reviewed-by: Michal Koutný <mkoutny(a)suse.com>
Cc: stable(a)vger.kernel.org # v3.11+
Signed-off-by: Tejun Heo <tj(a)kernel.org>
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index f310915d1257..ff7eb8e2efdc 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2502,11 +2502,15 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
static void cpuset_cancel_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
+ struct cpuset *cs;
cgroup_taskset_first(tset, &css);
+ cs = css_cs(css);
percpu_down_write(&cpuset_rwsem);
- css_cs(css)->attach_in_progress--;
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
percpu_up_write(&cpuset_rwsem);
}
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x ba9182a89626d5f83c2ee4594f55cb9c1e60f0e2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041703-swoop-hankering-5a8c@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
ba9182a89626 ("cgroup/cpuset: Wake up cpuset_attach_wq tasks in cpuset_cancel_attach()")
1243dc518c9d ("cgroup/cpuset: Convert cpuset_mutex to percpu_rwsem")
f9a25f776d78 ("cpusets: Rebuild root domain deadline accounting information")
933a90bf4f35 ("Merge branch 'work.mount0' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ba9182a89626d5f83c2ee4594f55cb9c1e60f0e2 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman(a)redhat.com>
Date: Tue, 11 Apr 2023 09:35:57 -0400
Subject: [PATCH] cgroup/cpuset: Wake up cpuset_attach_wq tasks in
cpuset_cancel_attach()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
After a successful cpuset_can_attach() call which increments the
attach_in_progress flag, either cpuset_cancel_attach() or cpuset_attach()
will be called later. In cpuset_attach(), tasks in cpuset_attach_wq,
if present, will be woken up at the end. That is not the case in
cpuset_cancel_attach(). So missed wakeup is possible if the attach
operation is somehow cancelled. Fix that by doing the wakeup in
cpuset_cancel_attach() as well.
Fixes: e44193d39e8d ("cpuset: let hotplug propagation work wait for task attaching")
Signed-off-by: Waiman Long <longman(a)redhat.com>
Reviewed-by: Michal Koutný <mkoutny(a)suse.com>
Cc: stable(a)vger.kernel.org # v3.11+
Signed-off-by: Tejun Heo <tj(a)kernel.org>
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index f310915d1257..ff7eb8e2efdc 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2502,11 +2502,15 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
static void cpuset_cancel_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
+ struct cpuset *cs;
cgroup_taskset_first(tset, &css);
+ cs = css_cs(css);
percpu_down_write(&cpuset_rwsem);
- css_cs(css)->attach_in_progress--;
+ cs->attach_in_progress--;
+ if (!cs->attach_in_progress)
+ wake_up(&cpuset_attach_wq);
percpu_up_write(&cpuset_rwsem);
}
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x 74391b3e69855e7dd65a9cef36baf5fc1345affd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041708-unsavory-tapping-076a@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
74391b3e6985 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for T-FORCE Z330 SSD")
1231363aec86 ("nvme-pci: mark Lexar NM760 as IGNORE_DEV_SUBNQN")
80b2624094c8 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for Lexar NM760")
200dccd07df2 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for Lexar NM610")
d6c52fa3e955 ("nvme-pci: Crucial P2 has bogus namespace ids")
6b961bce50e4 ("nvme-pci: avoid the deepest sleep state on ZHITAI TiPro7000 SSDs")
3765fad50896 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for ADATA XPG GAMMIX S50")
a98a945b80f8 ("nvme-pci: disable namespace identifiers for the MAXIO MAP1002/1202")
4bdf260362b3 ("nvme: add 48-bit DMA address quirk for Amazon NVMe controllers")
02ca079c9931 ("nvme-pci: disable Write Zeroes on Sandisk Skyhawk")
0b85f59d30b9 ("nvme-pci: Move enumeration by class to be last in the table")
5bedd3afee8e ("nvme: add a Identify Namespace Identification Descriptor list quirk")
5611ec2b9814 ("nvme-pci: prevent SK hynix PC400 from using Write Zeroes command")
ea43d9709f72 ("nvme: fix identify error status silent ignore")
59c7c3caaaf8 ("nvme: fix possible hang when ns scanning fails during error recovery")
fb314eb0cbb2 ("nvme: refactor nvme_identify_ns_descs error handling")
98f7b86a0bec ("nvme-pci: Use single IRQ vector for old Apple models")
22802bf742c2 ("nvme: Namepace identification descriptor list is optional")
6c6aa2f26c68 ("nvme: hwmon: add quirk to avoid changing temperature threshold")
52deba0f02a9 ("nvme: hwmon: provide temperature min and max values for each sensor")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 74391b3e69855e7dd65a9cef36baf5fc1345affd Mon Sep 17 00:00:00 2001
From: Duy Truong <dory(a)dory.moe>
Date: Thu, 13 Apr 2023 17:55:48 -0700
Subject: [PATCH] nvme-pci: add NVME_QUIRK_BOGUS_NID for T-FORCE Z330 SSD
Added a quirk to fix the TeamGroup T-Force Cardea Zero Z330 SSDs reporting
duplicate NGUIDs.
Signed-off-by: Duy Truong <dory(a)dory.moe>
Cc: stable(a)vger.kernel.org
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 282d808400c5..cd7873de3121 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3443,6 +3443,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */
.driver_data = NVME_QUIRK_BOGUS_NID |
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE(0x10ec, 0x5763), /* TEAMGROUP T-FORCE CARDEA ZERO Z330 SSD */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 74391b3e69855e7dd65a9cef36baf5fc1345affd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041707-reprise-scallion-693d@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
74391b3e6985 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for T-FORCE Z330 SSD")
1231363aec86 ("nvme-pci: mark Lexar NM760 as IGNORE_DEV_SUBNQN")
80b2624094c8 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for Lexar NM760")
200dccd07df2 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for Lexar NM610")
d6c52fa3e955 ("nvme-pci: Crucial P2 has bogus namespace ids")
6b961bce50e4 ("nvme-pci: avoid the deepest sleep state on ZHITAI TiPro7000 SSDs")
3765fad50896 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for ADATA XPG GAMMIX S50")
a98a945b80f8 ("nvme-pci: disable namespace identifiers for the MAXIO MAP1002/1202")
4bdf260362b3 ("nvme: add 48-bit DMA address quirk for Amazon NVMe controllers")
02ca079c9931 ("nvme-pci: disable Write Zeroes on Sandisk Skyhawk")
0b85f59d30b9 ("nvme-pci: Move enumeration by class to be last in the table")
5bedd3afee8e ("nvme: add a Identify Namespace Identification Descriptor list quirk")
5611ec2b9814 ("nvme-pci: prevent SK hynix PC400 from using Write Zeroes command")
ea43d9709f72 ("nvme: fix identify error status silent ignore")
59c7c3caaaf8 ("nvme: fix possible hang when ns scanning fails during error recovery")
fb314eb0cbb2 ("nvme: refactor nvme_identify_ns_descs error handling")
98f7b86a0bec ("nvme-pci: Use single IRQ vector for old Apple models")
22802bf742c2 ("nvme: Namepace identification descriptor list is optional")
6c6aa2f26c68 ("nvme: hwmon: add quirk to avoid changing temperature threshold")
52deba0f02a9 ("nvme: hwmon: provide temperature min and max values for each sensor")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 74391b3e69855e7dd65a9cef36baf5fc1345affd Mon Sep 17 00:00:00 2001
From: Duy Truong <dory(a)dory.moe>
Date: Thu, 13 Apr 2023 17:55:48 -0700
Subject: [PATCH] nvme-pci: add NVME_QUIRK_BOGUS_NID for T-FORCE Z330 SSD
Added a quirk to fix the TeamGroup T-Force Cardea Zero Z330 SSDs reporting
duplicate NGUIDs.
Signed-off-by: Duy Truong <dory(a)dory.moe>
Cc: stable(a)vger.kernel.org
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 282d808400c5..cd7873de3121 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3443,6 +3443,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */
.driver_data = NVME_QUIRK_BOGUS_NID |
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE(0x10ec, 0x5763), /* TEAMGROUP T-FORCE CARDEA ZERO Z330 SSD */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 74391b3e69855e7dd65a9cef36baf5fc1345affd
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041706-brewery-reliant-ee72@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
74391b3e6985 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for T-FORCE Z330 SSD")
1231363aec86 ("nvme-pci: mark Lexar NM760 as IGNORE_DEV_SUBNQN")
80b2624094c8 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for Lexar NM760")
200dccd07df2 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for Lexar NM610")
d6c52fa3e955 ("nvme-pci: Crucial P2 has bogus namespace ids")
6b961bce50e4 ("nvme-pci: avoid the deepest sleep state on ZHITAI TiPro7000 SSDs")
3765fad50896 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for ADATA XPG GAMMIX S50")
a98a945b80f8 ("nvme-pci: disable namespace identifiers for the MAXIO MAP1002/1202")
4bdf260362b3 ("nvme: add 48-bit DMA address quirk for Amazon NVMe controllers")
02ca079c9931 ("nvme-pci: disable Write Zeroes on Sandisk Skyhawk")
0b85f59d30b9 ("nvme-pci: Move enumeration by class to be last in the table")
5bedd3afee8e ("nvme: add a Identify Namespace Identification Descriptor list quirk")
5611ec2b9814 ("nvme-pci: prevent SK hynix PC400 from using Write Zeroes command")
ea43d9709f72 ("nvme: fix identify error status silent ignore")
59c7c3caaaf8 ("nvme: fix possible hang when ns scanning fails during error recovery")
fb314eb0cbb2 ("nvme: refactor nvme_identify_ns_descs error handling")
98f7b86a0bec ("nvme-pci: Use single IRQ vector for old Apple models")
22802bf742c2 ("nvme: Namepace identification descriptor list is optional")
6c6aa2f26c68 ("nvme: hwmon: add quirk to avoid changing temperature threshold")
52deba0f02a9 ("nvme: hwmon: provide temperature min and max values for each sensor")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 74391b3e69855e7dd65a9cef36baf5fc1345affd Mon Sep 17 00:00:00 2001
From: Duy Truong <dory(a)dory.moe>
Date: Thu, 13 Apr 2023 17:55:48 -0700
Subject: [PATCH] nvme-pci: add NVME_QUIRK_BOGUS_NID for T-FORCE Z330 SSD
Added a quirk to fix the TeamGroup T-Force Cardea Zero Z330 SSDs reporting
duplicate NGUIDs.
Signed-off-by: Duy Truong <dory(a)dory.moe>
Cc: stable(a)vger.kernel.org
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 282d808400c5..cd7873de3121 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3443,6 +3443,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */
.driver_data = NVME_QUIRK_BOGUS_NID |
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE(0x10ec, 0x5763), /* TEAMGROUP T-FORCE CARDEA ZERO Z330 SSD */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
The patch below does not apply to the 4.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.14.y
git checkout FETCH_HEAD
git cherry-pick -x c8e22b7a1694bb8d025ea636816472739d859145
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041705-wages-cabbage-66be@gregkh' --subject-prefix 'PATCH 4.14.y' HEAD^..
Possible dependencies:
c8e22b7a1694 ("scsi: ses: Handle enclosure with just a primary component gracefully")
3fe97ff3d949 ("scsi: ses: Don't attach if enclosure has no components")
6396bb221514 ("treewide: kzalloc() -> kcalloc()")
5f85942c2ea2 ("Merge tag 'scsi-misc' of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From c8e22b7a1694bb8d025ea636816472739d859145 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina(a)suse.cz>
Date: Tue, 4 Apr 2023 21:23:42 +0200
Subject: [PATCH] scsi: ses: Handle enclosure with just a primary component
gracefully
This reverts commit 3fe97ff3d949 ("scsi: ses: Don't attach if enclosure
has no components") and introduces proper handling of case where there are
no detected secondary components, but primary component (enumerated in
num_enclosures) does exist. That fix was originally proposed by Ding Hui
<dinghui(a)sangfor.com.cn>.
Completely ignoring devices that have one primary enclosure and no
secondary one results in ses_intf_add() bailing completely
scsi 2:0:0:254: enclosure has no enumerated components
scsi 2:0:0:254: Failed to bind enclosure -12ven in valid configurations such
even on valid configurations with 1 primary and 0 secondary enclosures as
below:
# sg_ses /dev/sg0
3PARdata SES 3321
Supported diagnostic pages:
Supported Diagnostic Pages [sdp] [0x0]
Configuration (SES) [cf] [0x1]
Short Enclosure Status (SES) [ses] [0x8]
# sg_ses -p cf /dev/sg0
3PARdata SES 3321
Configuration diagnostic page:
number of secondary subenclosures: 0
generation code: 0x0
enclosure descriptor list
Subenclosure identifier: 0 [primary]
relative ES process id: 0, number of ES processes: 1
number of type descriptor headers: 1
enclosure logical identifier (hex): 20000002ac02068d
enclosure vendor: 3PARdata product: VV rev: 3321
type descriptor header and text list
Element type: Unspecified, subenclosure id: 0
number of possible elements: 1
The changelog for the original fix follows
=====
We can get a crash when disconnecting the iSCSI session,
the call trace like this:
[ffff00002a00fb70] kfree at ffff00000830e224
[ffff00002a00fba0] ses_intf_remove at ffff000001f200e4
[ffff00002a00fbd0] device_del at ffff0000086b6a98
[ffff00002a00fc50] device_unregister at ffff0000086b6d58
[ffff00002a00fc70] __scsi_remove_device at ffff00000870608c
[ffff00002a00fca0] scsi_remove_device at ffff000008706134
[ffff00002a00fcc0] __scsi_remove_target at ffff0000087062e4
[ffff00002a00fd10] scsi_remove_target at ffff0000087064c0
[ffff00002a00fd70] __iscsi_unbind_session at ffff000001c872c4
[ffff00002a00fdb0] process_one_work at ffff00000810f35c
[ffff00002a00fe00] worker_thread at ffff00000810f648
[ffff00002a00fe70] kthread at ffff000008116e98
In ses_intf_add, components count could be 0, and kcalloc 0 size scomp,
but not saved in edev->component[i].scratch
In this situation, edev->component[0].scratch is an invalid pointer,
when kfree it in ses_intf_remove_enclosure, a crash like above would happen
The call trace also could be other random cases when kfree cannot catch
the invalid pointer
We should not use edev->component[] array when the components count is 0
We also need check index when use edev->component[] array in
ses_enclosure_data_process
=====
Reported-by: Michal Kolar <mich.k(a)seznam.cz>
Originally-by: Ding Hui <dinghui(a)sangfor.com.cn>
Cc: stable(a)vger.kernel.org
Fixes: 3fe97ff3d949 ("scsi: ses: Don't attach if enclosure has no components")
Signed-off-by: Jiri Kosina <jkosina(a)suse.cz>
Link: https://lore.kernel.org/r/nycvar.YFH.7.76.2304042122270.29760@cbobk.fhfr.pm
Tested-by: Michal Kolar <mich.k(a)seznam.cz>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c
index b11a9162e73a..b54f2c6c08c3 100644
--- a/drivers/scsi/ses.c
+++ b/drivers/scsi/ses.c
@@ -509,9 +509,6 @@ static int ses_enclosure_find_by_addr(struct enclosure_device *edev,
int i;
struct ses_component *scomp;
- if (!edev->component[0].scratch)
- return 0;
-
for (i = 0; i < edev->components; i++) {
scomp = edev->component[i].scratch;
if (scomp->addr != efd->addr)
@@ -602,8 +599,10 @@ static void ses_enclosure_data_process(struct enclosure_device *edev,
components++,
type_ptr[0],
name);
- else
+ else if (components < edev->components)
ecomp = &edev->component[components++];
+ else
+ ecomp = ERR_PTR(-EINVAL);
if (!IS_ERR(ecomp)) {
if (addl_desc_ptr) {
@@ -734,11 +733,6 @@ static int ses_intf_add(struct device *cdev,
components += type_ptr[1];
}
- if (components == 0) {
- sdev_printk(KERN_WARNING, sdev, "enclosure has no enumerated components\n");
- goto err_free;
- }
-
ses_dev->page1 = buf;
ses_dev->page1_len = len;
buf = NULL;
@@ -780,9 +774,11 @@ static int ses_intf_add(struct device *cdev,
buf = NULL;
}
page2_not_supported:
- scomp = kcalloc(components, sizeof(struct ses_component), GFP_KERNEL);
- if (!scomp)
- goto err_free;
+ if (components > 0) {
+ scomp = kcalloc(components, sizeof(struct ses_component), GFP_KERNEL);
+ if (!scomp)
+ goto err_free;
+ }
edev = enclosure_register(cdev->parent, dev_name(&sdev->sdev_gendev),
components, &ses_enclosure_callbacks);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x e7067a446264a7514fa1cfaa4052cdb6803bc6a2
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041754-retread-approach-96af@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
e7067a446264 ("ksmbd: avoid out of bounds access in decode_preauth_ctxt()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e7067a446264a7514fa1cfaa4052cdb6803bc6a2 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss(a)suse.de>
Date: Thu, 13 Apr 2023 23:49:57 +0900
Subject: [PATCH] ksmbd: avoid out of bounds access in decode_preauth_ctxt()
Confirm that the accessed pneg_ctxt->HashAlgorithms address sits within
the SMB request boundary; deassemble_neg_contexts() only checks that the
eight byte smb2_neg_context header + (client controlled) DataLength are
within the packet boundary, which is insufficient.
Checking for sizeof(struct smb2_preauth_neg_context) is overkill given
that the type currently assumes SMB311_SALT_SIZE bytes of trailing Salt.
Signed-off-by: David Disseldorp <ddiss(a)suse.de>
Acked-by: Namjae Jeon <linkinjeon(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 8af939a181be..67b7e766a06b 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -876,17 +876,21 @@ static void assemble_neg_contexts(struct ksmbd_conn *conn,
}
static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn,
- struct smb2_preauth_neg_context *pneg_ctxt)
+ struct smb2_preauth_neg_context *pneg_ctxt,
+ int len_of_ctxts)
{
- __le32 err = STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP;
+ /*
+ * sizeof(smb2_preauth_neg_context) assumes SMB311_SALT_SIZE Salt,
+ * which may not be present. Only check for used HashAlgorithms[1].
+ */
+ if (len_of_ctxts < MIN_PREAUTH_CTXT_DATA_LEN)
+ return STATUS_INVALID_PARAMETER;
- if (pneg_ctxt->HashAlgorithms == SMB2_PREAUTH_INTEGRITY_SHA512) {
- conn->preauth_info->Preauth_HashId =
- SMB2_PREAUTH_INTEGRITY_SHA512;
- err = STATUS_SUCCESS;
- }
+ if (pneg_ctxt->HashAlgorithms != SMB2_PREAUTH_INTEGRITY_SHA512)
+ return STATUS_NO_PREAUTH_INTEGRITY_HASH_OVERLAP;
- return err;
+ conn->preauth_info->Preauth_HashId = SMB2_PREAUTH_INTEGRITY_SHA512;
+ return STATUS_SUCCESS;
}
static void decode_encrypt_ctxt(struct ksmbd_conn *conn,
@@ -1014,7 +1018,8 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
break;
status = decode_preauth_ctxt(conn,
- (struct smb2_preauth_neg_context *)pctx);
+ (struct smb2_preauth_neg_context *)pctx,
+ len_of_ctxts);
if (status != STATUS_SUCCESS)
break;
} else if (pctx->ContextType == SMB2_ENCRYPTION_CAPABILITIES) {
This is the logical place to put the backlight device, and it also
fixes a kernel crash if the MIPI host is removed. Previously the
backlight device would be unregistered twice when this happened - once
as a child of the MIPI host through `mipi_dsi_host_unregister`, and
once when the panel device is destroyed.
Fixes: 12a6cbd4f3f1 ("drm/panel: otm8009a: Use new backlight API")
Signed-off-by: James Cowgill <james.cowgill(a)blaize.com>
Cc: stable(a)vger.kernel.org
---
drivers/gpu/drm/panel/panel-orisetech-otm8009a.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c
index b4729a94c34a8..898b892f11439 100644
--- a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c
+++ b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c
@@ -471,7 +471,7 @@ static int otm8009a_probe(struct mipi_dsi_device *dsi)
DRM_MODE_CONNECTOR_DSI);
ctx->bl_dev = devm_backlight_device_register(dev, dev_name(dev),
- dsi->host->dev, ctx,
+ dev, ctx,
&otm8009a_backlight_ops,
NULL);
if (IS_ERR(ctx->bl_dev)) {
--
2.39.1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x d83806c4c0cccc0d6d3c3581a11983a9c186a138
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041751-pod-deafness-ec04@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
d83806c4c0cc ("purgatory: fix disabling debug info")
56e0790c7f9e ("RISC-V: add infrastructure to allow different str* implementations")
736e30af583f ("RISC-V: Add purgatory")
6261586e0c91 ("RISC-V: Add kexec_file support")
8212f8986d31 ("kbuild: use more subdir- for visiting subdirectories while cleaning")
10c6ae274fe2 ("sh: remove meaningless archclean line")
b79bd0d5102b ("Merge tag 'riscv-for-linus-5.15-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d83806c4c0cccc0d6d3c3581a11983a9c186a138 Mon Sep 17 00:00:00 2001
From: Alyssa Ross <hi(a)alyssa.is>
Date: Sun, 26 Mar 2023 18:21:21 +0000
Subject: [PATCH] purgatory: fix disabling debug info
Since 32ef9e5054ec, -Wa,-gdwarf-2 is no longer used in KBUILD_AFLAGS.
Instead, it includes -g, the appropriate -gdwarf-* flag, and also the
-Wa versions of both of those if building with Clang and GNU as. As a
result, debug info was being generated for the purgatory objects, even
though the intention was that it not be.
Fixes: 32ef9e5054ec ("Makefile.debug: re-enable debug info for .S files")
Signed-off-by: Alyssa Ross <hi(a)alyssa.is>
Cc: stable(a)vger.kernel.org
Acked-by: Nick Desaulniers <ndesaulniers(a)google.com>
Signed-off-by: Masahiro Yamada <masahiroy(a)kernel.org>
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index d16bf715a586..5730797a6b40 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -84,12 +84,7 @@ CFLAGS_string.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_ctype.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_ctype.o += $(PURGATORY_CFLAGS)
-AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2
+asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 17f09dc26381..82fec66d46d2 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -69,8 +69,7 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_string.o += $(PURGATORY_CFLAGS)
-AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2
+asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x d83806c4c0cccc0d6d3c3581a11983a9c186a138
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041750-shriek-pointy-db27@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
d83806c4c0cc ("purgatory: fix disabling debug info")
56e0790c7f9e ("RISC-V: add infrastructure to allow different str* implementations")
736e30af583f ("RISC-V: Add purgatory")
6261586e0c91 ("RISC-V: Add kexec_file support")
8212f8986d31 ("kbuild: use more subdir- for visiting subdirectories while cleaning")
10c6ae274fe2 ("sh: remove meaningless archclean line")
b79bd0d5102b ("Merge tag 'riscv-for-linus-5.15-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d83806c4c0cccc0d6d3c3581a11983a9c186a138 Mon Sep 17 00:00:00 2001
From: Alyssa Ross <hi(a)alyssa.is>
Date: Sun, 26 Mar 2023 18:21:21 +0000
Subject: [PATCH] purgatory: fix disabling debug info
Since 32ef9e5054ec, -Wa,-gdwarf-2 is no longer used in KBUILD_AFLAGS.
Instead, it includes -g, the appropriate -gdwarf-* flag, and also the
-Wa versions of both of those if building with Clang and GNU as. As a
result, debug info was being generated for the purgatory objects, even
though the intention was that it not be.
Fixes: 32ef9e5054ec ("Makefile.debug: re-enable debug info for .S files")
Signed-off-by: Alyssa Ross <hi(a)alyssa.is>
Cc: stable(a)vger.kernel.org
Acked-by: Nick Desaulniers <ndesaulniers(a)google.com>
Signed-off-by: Masahiro Yamada <masahiroy(a)kernel.org>
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index d16bf715a586..5730797a6b40 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -84,12 +84,7 @@ CFLAGS_string.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_ctype.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_ctype.o += $(PURGATORY_CFLAGS)
-AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2
+asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 17f09dc26381..82fec66d46d2 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -69,8 +69,7 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_string.o += $(PURGATORY_CFLAGS)
-AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2
+asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x d83806c4c0cccc0d6d3c3581a11983a9c186a138
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041749-unburned-repacking-8dcc@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
d83806c4c0cc ("purgatory: fix disabling debug info")
56e0790c7f9e ("RISC-V: add infrastructure to allow different str* implementations")
736e30af583f ("RISC-V: Add purgatory")
6261586e0c91 ("RISC-V: Add kexec_file support")
8212f8986d31 ("kbuild: use more subdir- for visiting subdirectories while cleaning")
10c6ae274fe2 ("sh: remove meaningless archclean line")
b79bd0d5102b ("Merge tag 'riscv-for-linus-5.15-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d83806c4c0cccc0d6d3c3581a11983a9c186a138 Mon Sep 17 00:00:00 2001
From: Alyssa Ross <hi(a)alyssa.is>
Date: Sun, 26 Mar 2023 18:21:21 +0000
Subject: [PATCH] purgatory: fix disabling debug info
Since 32ef9e5054ec, -Wa,-gdwarf-2 is no longer used in KBUILD_AFLAGS.
Instead, it includes -g, the appropriate -gdwarf-* flag, and also the
-Wa versions of both of those if building with Clang and GNU as. As a
result, debug info was being generated for the purgatory objects, even
though the intention was that it not be.
Fixes: 32ef9e5054ec ("Makefile.debug: re-enable debug info for .S files")
Signed-off-by: Alyssa Ross <hi(a)alyssa.is>
Cc: stable(a)vger.kernel.org
Acked-by: Nick Desaulniers <ndesaulniers(a)google.com>
Signed-off-by: Masahiro Yamada <masahiroy(a)kernel.org>
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index d16bf715a586..5730797a6b40 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -84,12 +84,7 @@ CFLAGS_string.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_ctype.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_ctype.o += $(PURGATORY_CFLAGS)
-AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2
+asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 17f09dc26381..82fec66d46d2 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -69,8 +69,7 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_string.o += $(PURGATORY_CFLAGS)
-AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2
+asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x d83806c4c0cccc0d6d3c3581a11983a9c186a138
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023041748-enchilada-greyhound-9209@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
d83806c4c0cc ("purgatory: fix disabling debug info")
56e0790c7f9e ("RISC-V: add infrastructure to allow different str* implementations")
736e30af583f ("RISC-V: Add purgatory")
6261586e0c91 ("RISC-V: Add kexec_file support")
8212f8986d31 ("kbuild: use more subdir- for visiting subdirectories while cleaning")
10c6ae274fe2 ("sh: remove meaningless archclean line")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From d83806c4c0cccc0d6d3c3581a11983a9c186a138 Mon Sep 17 00:00:00 2001
From: Alyssa Ross <hi(a)alyssa.is>
Date: Sun, 26 Mar 2023 18:21:21 +0000
Subject: [PATCH] purgatory: fix disabling debug info
Since 32ef9e5054ec, -Wa,-gdwarf-2 is no longer used in KBUILD_AFLAGS.
Instead, it includes -g, the appropriate -gdwarf-* flag, and also the
-Wa versions of both of those if building with Clang and GNU as. As a
result, debug info was being generated for the purgatory objects, even
though the intention was that it not be.
Fixes: 32ef9e5054ec ("Makefile.debug: re-enable debug info for .S files")
Signed-off-by: Alyssa Ross <hi(a)alyssa.is>
Cc: stable(a)vger.kernel.org
Acked-by: Nick Desaulniers <ndesaulniers(a)google.com>
Signed-off-by: Masahiro Yamada <masahiroy(a)kernel.org>
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index d16bf715a586..5730797a6b40 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -84,12 +84,7 @@ CFLAGS_string.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_ctype.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_ctype.o += $(PURGATORY_CFLAGS)
-AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2
+asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 17f09dc26381..82fec66d46d2 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -69,8 +69,7 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_string.o += $(PURGATORY_CFLAGS)
-AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2
-AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2
+asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
Please dear, did you receive the last email I sent to you
I am Aisha Muammar Gaddafi, the only daughter of the embattled former
president of Libya, Hon. Muammar Gaddafi.i have a business proposal
for you that worth $27.5M (Twenty Seven Million Five Hundred Thousand
Dollars)
Please reply for more information. Sorry for my pictures. I will
enclose it in my next mail when I hear from you okay.
Yours Sincerely
Best Regard,
Aisha Gaddafi
Hello,
I am writing to report an issue on a nfs mount that disappears due to
an inode revalide failure (already sent in January but probably banned
with html format...).
This very old commit
(https://github.com/torvalds/linux/commit/cc89684c9a265828ce061037f1f79f4a68…)
exactly show the problem I have and this old resolved issue
(https://bugzilla.kernel.org/show_bug.cgi?id=117651) is probably
failing again today
To sum up, I have a NFS mount inside another NFS mount (for example:
/opt/nfs/mount1 & /opt/nfs/mount1/mount2).
If I kill a task trying to get a file descriptor on
/opt/nfs/mount1/mount2 then it will be unmounted. My simple test code
to reproduce very easily:
int main(int argc, char *argv[]) {
while (1) {
close(open(argv[1], O_RDONLY));
}
}
In logs, I have: "nfs_revalidate_inode: (0:62/845965) getattr failed,
error=-512"
Tested on 5.19 and 6.1 kernel
Best regards,
Sylvain Menu
The quilt patch titled
Subject: Revert "userfaultfd: don't fail on unrecognized features"
has been removed from the -mm tree. Its filename was
revert-userfaultfd-dont-fail-on-unrecognized-features.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Peter Xu <peterx(a)redhat.com>
Subject: Revert "userfaultfd: don't fail on unrecognized features"
Date: Wed, 12 Apr 2023 12:38:52 -0400
This is a proposal to revert commit 914eedcb9ba0ff53c33808.
I found this when writing a simple UFFDIO_API test to be the first unit
test in this set. Two things breaks with the commit:
- UFFDIO_API check was lost and missing. According to man page, the
kernel should reject ioctl(UFFDIO_API) if uffdio_api.api != 0xaa. This
check is needed if the api version will be extended in the future, or
user app won't be able to identify which is a new kernel.
- Feature flags checks were removed, which means UFFDIO_API with a
feature that does not exist will also succeed. According to the man
page, we should (and it makes sense) to reject ioctl(UFFDIO_API) if
unknown features passed in.
Link: https://lore.kernel.org/r/20220722201513.1624158-1-axelrasmussen@google.com
Link: https://lkml.kernel.org/r/20230412163922.327282-2-peterx@redhat.com
Fixes: 914eedcb9ba0 ("userfaultfd: don't fail on unrecognized features")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Acked-by: David Hildenbrand <david(a)redhat.com>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: Dmitry Safonov <0x7f454c46(a)gmail.com>
Cc: Mike Kravetz <mike.kravetz(a)oracle.com>
Cc: Mike Rapoport (IBM) <rppt(a)kernel.org>
Cc: Zach O'Keefe <zokeefe(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/userfaultfd.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
--- a/fs/userfaultfd.c~revert-userfaultfd-dont-fail-on-unrecognized-features
+++ a/fs/userfaultfd.c
@@ -1955,8 +1955,10 @@ static int userfaultfd_api(struct userfa
ret = -EFAULT;
if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api)))
goto out;
- /* Ignore unsupported features (userspace built against newer kernel) */
- features = uffdio_api.features & UFFD_API_FEATURES;
+ features = uffdio_api.features;
+ ret = -EINVAL;
+ if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES))
+ goto err_out;
ret = -EPERM;
if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE))
goto err_out;
_
Patches currently in -mm which might be from peterx(a)redhat.com are
selftests-mm-update-gitignore-with-two-missing-tests.patch
selftests-mm-dump-a-summary-in-run_vmtestssh.patch
selftests-mm-merge-utilh-into-vm_utilh.patch
selftests-mm-use-test_gen_progs-where-proper.patch
selftests-mm-link-vm_utilc-always.patch
selftests-mm-merge-default_huge_page_size-into-one.patch
selftests-mm-use-pm_-macros-in-vm_utilsh.patch
selftests-mm-reuse-pagemap_get_entry-in-vm_utilh.patch
selftests-mm-test-uffdio_zeropage-only-when-hugetlb.patch
selftests-mm-drop-test_uffdio_zeropage_eexist.patch
selftests-mm-create-uffd-common.patch
selftests-mm-split-uffd-tests-into-uffd-stress-and-uffd-unit-tests.patch
selftests-mm-uffd_register.patch
selftests-mm-uffd_open_devsys.patch
selftests-mm-uffdio_api-test.patch
selftests-mm-drop-global-mem_fd-in-uffd-tests.patch
selftests-mm-drop-global-hpage_size-in-uffd-tests.patch
selftests-mm-rename-uffd_stats-to-uffd_args.patch
selftests-mm-let-uffd_handle_page_fault-take-wp-parameter.patch
selftests-mm-allow-allocate_area-to-fail-properly.patch
selftests-mm-add-framework-for-uffd-unit-test.patch
selftests-mm-move-uffd-pagemap-test-to-unit-test.patch
selftests-mm-move-uffd-minor-test-to-unit-test.patch
selftests-mm-move-uffd-sig-events-tests-into-uffd-unit-tests.patch
selftests-mm-move-zeropage-test-into-uffd-unit-tests.patch
selftests-mm-workaround-no-way-to-detect-uffd-minor-wp.patch
selftests-mm-allow-uffd-test-to-skip-properly-with-no-privilege.patch
selftests-mm-drop-sys-dev-test-in-uffd-stress-test.patch
selftests-mm-add-shmem-private-test-to-uffd-stress.patch
selftests-mm-add-uffdio-register-ioctls-test.patch
The quilt patch titled
Subject: writeback, cgroup: fix null-ptr-deref write in bdi_split_work_to_wbs
has been removed from the -mm tree. Its filename was
writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Baokun Li <libaokun1(a)huawei.com>
Subject: writeback, cgroup: fix null-ptr-deref write in bdi_split_work_to_wbs
Date: Mon, 10 Apr 2023 21:08:26 +0800
KASAN report null-ptr-deref:
==================================================================
BUG: KASAN: null-ptr-deref in bdi_split_work_to_wbs+0x5c5/0x7b0
Write of size 8 at addr 0000000000000000 by task sync/943
CPU: 5 PID: 943 Comm: sync Tainted: 6.3.0-rc5-next-20230406-dirty #461
Call Trace:
<TASK>
dump_stack_lvl+0x7f/0xc0
print_report+0x2ba/0x340
kasan_report+0xc4/0x120
kasan_check_range+0x1b7/0x2e0
__kasan_check_write+0x24/0x40
bdi_split_work_to_wbs+0x5c5/0x7b0
sync_inodes_sb+0x195/0x630
sync_inodes_one_sb+0x3a/0x50
iterate_supers+0x106/0x1b0
ksys_sync+0x98/0x160
[...]
==================================================================
The race that causes the above issue is as follows:
cpu1 cpu2
-------------------------|-------------------------
inode_switch_wbs
INIT_WORK(&isw->work, inode_switch_wbs_work_fn)
queue_rcu_work(isw_wq, &isw->work)
// queue_work async
inode_switch_wbs_work_fn
wb_put_many(old_wb, nr_switched)
percpu_ref_put_many
ref->data->release(ref)
cgwb_release
queue_work(cgwb_release_wq, &wb->release_work)
// queue_work async
&wb->release_work
cgwb_release_workfn
ksys_sync
iterate_supers
sync_inodes_one_sb
sync_inodes_sb
bdi_split_work_to_wbs
kmalloc(sizeof(*work), GFP_ATOMIC)
// alloc memory failed
percpu_ref_exit
ref->data = NULL
kfree(data)
wb_get(wb)
percpu_ref_get(&wb->refcnt)
percpu_ref_get_many(ref, 1)
atomic_long_add(nr, &ref->data->count)
atomic64_add(i, v)
// trigger null-ptr-deref
bdi_split_work_to_wbs() traverses &bdi->wb_list to split work into all
wbs. If the allocation of new work fails, the on-stack fallback will be
used and the reference count of the current wb is increased afterwards.
If cgroup writeback membership switches occur before getting the reference
count and the current wb is released as old_wd, then calling wb_get() or
wb_put() will trigger the null pointer dereference above.
This issue was introduced in v4.3-rc7 (see fix tag1). Both
sync_inodes_sb() and __writeback_inodes_sb_nr() calls to
bdi_split_work_to_wbs() can trigger this issue. For scenarios called via
sync_inodes_sb(), originally commit 7fc5854f8c6e ("writeback: synchronize
sync(2) against cgroup writeback membership switches") reduced the
possibility of the issue by adding wb_switch_rwsem, but in v5.14-rc1 (see
fix tag2) removed the "inode_io_list_del_locked(inode, old_wb)" from
inode_switch_wbs_work_fn() so that wb->state contains WB_has_dirty_io,
thus old_wb is not skipped when traversing wbs in bdi_split_work_to_wbs(),
and the issue becomes easily reproducible again.
To solve this problem, percpu_ref_exit() is called under RCU protection to
avoid race between cgwb_release_workfn() and bdi_split_work_to_wbs().
Moreover, replace wb_get() with wb_tryget() in bdi_split_work_to_wbs(),
and skip the current wb if wb_tryget() fails because the wb has already
been shutdown.
Link: https://lkml.kernel.org/r/20230410130826.1492525-1-libaokun1@huawei.com
Fixes: b817525a4a80 ("writeback: bdi_writeback iteration must not skip dying ones")
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Acked-by: Tejun Heo <tj(a)kernel.org>
Cc: Alexander Viro <viro(a)zeniv.linux.org.uk>
Cc: Andreas Dilger <adilger.kernel(a)dilger.ca>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: Dennis Zhou <dennis(a)kernel.org>
Cc: Hou Tao <houtao1(a)huawei.com>
Cc: yangerkun <yangerkun(a)huawei.com>
Cc: Zhang Yi <yi.zhang(a)huawei.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/fs-writeback.c | 17 ++++++++++-------
mm/backing-dev.c | 12 ++++++++++--
2 files changed, 20 insertions(+), 9 deletions(-)
--- a/fs/fs-writeback.c~writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs
+++ a/fs/fs-writeback.c
@@ -978,6 +978,16 @@ restart:
continue;
}
+ /*
+ * If wb_tryget fails, the wb has been shutdown, skip it.
+ *
+ * Pin @wb so that it stays on @bdi->wb_list. This allows
+ * continuing iteration from @wb after dropping and
+ * regrabbing rcu read lock.
+ */
+ if (!wb_tryget(wb))
+ continue;
+
/* alloc failed, execute synchronously using on-stack fallback */
work = &fallback_work;
*work = *base_work;
@@ -986,13 +996,6 @@ restart:
work->done = &fallback_work_done;
wb_queue_work(wb, work);
-
- /*
- * Pin @wb so that it stays on @bdi->wb_list. This allows
- * continuing iteration from @wb after dropping and
- * regrabbing rcu read lock.
- */
- wb_get(wb);
last_wb = wb;
rcu_read_unlock();
--- a/mm/backing-dev.c~writeback-cgroup-fix-null-ptr-deref-write-in-bdi_split_work_to_wbs
+++ a/mm/backing-dev.c
@@ -507,6 +507,15 @@ static LIST_HEAD(offline_cgwbs);
static void cleanup_offline_cgwbs_workfn(struct work_struct *work);
static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn);
+static void cgwb_free_rcu(struct rcu_head *rcu_head)
+{
+ struct bdi_writeback *wb = container_of(rcu_head,
+ struct bdi_writeback, rcu);
+
+ percpu_ref_exit(&wb->refcnt);
+ kfree(wb);
+}
+
static void cgwb_release_workfn(struct work_struct *work)
{
struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
@@ -529,11 +538,10 @@ static void cgwb_release_workfn(struct w
list_del(&wb->offline_node);
spin_unlock_irq(&cgwb_lock);
- percpu_ref_exit(&wb->refcnt);
wb_exit(wb);
bdi_put(bdi);
WARN_ON_ONCE(!list_empty(&wb->b_attached));
- kfree_rcu(wb, rcu);
+ call_rcu(&wb->rcu, cgwb_free_rcu);
}
static void cgwb_release(struct percpu_ref *refcnt)
_
Patches currently in -mm which might be from libaokun1(a)huawei.com are