The version is fetched once in check_version(), which then does some
validation and then overwrites the version in userspace with the API
version supported by the kernel. copy_params() then fetches the version
from userspace *again*, and this time no validation is done. The result
is that the kernel's version number is completely controllable by
userspace, provided that userspace can win a race condition.
Fix this flaw by not copying the version back to the kernel the second
time. This is not exploitable as the version is not further used in the
kernel. However, it could become a problem if future patches start
relying on the version field.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi(a)invisiblethingslab.com>
---
drivers/md/dm-ioctl.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 491ef55b9e8662c3b02a2162b8c93ee086c078a1..20f452b6c61c1c4d20259fd0fc5443977e4454a0 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1873,12 +1873,13 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
* As well as checking the version compatibility this always
* copies the kernel interface version out.
*/
-static int check_version(unsigned int cmd, struct dm_ioctl __user *user)
+static int check_version(unsigned int cmd, struct dm_ioctl __user *user,
+ struct dm_ioctl *kernel_params)
{
- uint32_t version[3];
int r = 0;
+ uint32_t *version = kernel_params->version;
- if (copy_from_user(version, user->version, sizeof(version)))
+ if (copy_from_user(version, user->version, sizeof(user->version)))
return -EFAULT;
if ((version[0] != DM_VERSION_MAJOR) ||
@@ -1922,7 +1923,10 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kern
const size_t minimum_data_size = offsetof(struct dm_ioctl, data);
unsigned int noio_flag;
- if (copy_from_user(param_kernel, user, minimum_data_size))
+ /* Version has been copied from userspace already, avoid TOCTOU */
+ if (copy_from_user((char *)param_kernel + sizeof(param_kernel->version),
+ (char __user *)user + sizeof(param_kernel->version),
+ minimum_data_size - sizeof(param_kernel->version)))
return -EFAULT;
if (param_kernel->data_size < minimum_data_size) {
@@ -2034,7 +2038,7 @@ static int ctl_ioctl(struct file *file, uint command, struct dm_ioctl __user *us
* Check the interface version passed in. This also
* writes out the kernel's interface version.
*/
- r = check_version(cmd, user);
+ r = check_version(cmd, user, ¶m_kernel);
if (r)
return r;
--
Sincerely,
Demi Marie Obenour (she/her/hers)
Invisible Things Lab
Especially on 32-bit systems, it is possible for the pointer arithmetic
to overflow and cause a userspace pointer to be dereferenced in the
kernel.
Signed-off-by: Demi Marie Obenour <demi(a)invisiblethingslab.com>
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Cc: stable(a)vger.kernel.org
---
drivers/md/dm-ioctl.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 34fa74c6a70db8aa67aaba3f6a2fc4f38ef736bc..64e8f16d344c47057de5e2d29e3d63202197dca0 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1396,6 +1396,25 @@ static int next_target(struct dm_target_spec *last, uint32_t next, void *end,
{
static_assert(_Alignof(struct dm_target_spec) <= 8,
"struct dm_target_spec has excessive alignment requirements");
+ static_assert(offsetof(struct dm_ioctl, data) >= sizeof(struct dm_target_spec),
+ "struct dm_target_spec too big");
+
+ /*
+ * Number of bytes remaining, starting with last. This is always
+ * sizeof(struct dm_target_spec) or more, as otherwise *last was
+ * out of bounds already.
+ */
+ size_t remaining = (char *)end - (char *)last;
+
+ /*
+ * There must be room for both the next target spec and the
+ * NUL-terminator of the target itself.
+ */
+ if (remaining - sizeof(struct dm_target_spec) <= next) {
+ DMERR("Target spec extends beyond end of parameters");
+ return -EINVAL;
+ }
+
if (next % 8) {
DMERR("Next target spec (offset %u) is not 8-byte aligned", next);
return -EINVAL;
--
Sincerely,
Demi Marie Obenour (she/her/hers)
Invisible Things Lab
From: Pierre Gondois <pierre.gondois(a)arm.com>
fetch_cache_info() tries to get the number of cache leaves/levels
for each CPU in order to pre-allocate memory for cacheinfo struct.
Allocating this memory later triggers a:
'BUG: sleeping function called from invalid context'
in PREEMPT_RT kernels.
If there is no cache related information available in DT or ACPI,
fetch_cache_info() fails and an error message is printed:
'Early cacheinfo failed, ret = ...'
Not having cache information should be a valid configuration.
Remove the error message if fetch_cache_info() fails with -ENOENT.
Suggested-by: Conor Dooley <conor.dooley(a)microchip.com>
Link: https://lore.kernel.org/all/20230404-hatred-swimmer-6fecdf33b57a@spud/
Signed-off-by: Pierre Gondois <pierre.gondois(a)arm.com>
Reviewed-by: Conor Dooley <conor.dooley(a)microchip.com>
Link: https://lore.kernel.org/r/20230414081453.244787-4-pierre.gondois@arm.com
Signed-off-by: Sudeep Holla <sudeep.holla(a)arm.com>
---
drivers/base/arch_topology.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 147fb7d4af96..b741b5ba82bd 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -843,10 +843,11 @@ void __init init_cpu_topology(void)
for_each_possible_cpu(cpu) {
ret = fetch_cache_info(cpu);
- if (ret) {
+ if (!ret)
+ continue;
+ else if (ret != -ENOENT)
pr_err("Early cacheinfo failed, ret = %d\n", ret);
- break;
- }
+ return;
}
}
--
2.25.1
The patch titled
Subject: epoll: ep_autoremove_wake_function should use list_del_init_careful
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
epoll-ep_autoremove_wake_function-should-use-list_del_init_careful.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Benjamin Segall <bsegall(a)google.com>
Subject: epoll: ep_autoremove_wake_function should use list_del_init_careful
Date: Tue, 30 May 2023 11:32:28 -0700
autoremove_wake_function uses list_del_init_careful, so should epoll's
more aggressive variant. It only doesn't because it was copied from an
older wait.c rather than the most recent.
Link: https://lkml.kernel.org/r/xm26pm6hvfer.fsf@google.com
Fixes: a16ceb139610 ("epoll: autoremove wakers even more aggressively")
Signed-off-by: Ben Segall <bsegall(a)google.com>
Cc: Al Viro <viro(a)zeniv.linux.org.uk>
Cc: Christian Brauner <brauner(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/eventpoll.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/fs/eventpoll.c~epoll-ep_autoremove_wake_function-should-use-list_del_init_careful
+++ a/fs/eventpoll.c
@@ -1805,7 +1805,7 @@ static int ep_autoremove_wake_function(s
{
int ret = default_wake_function(wq_entry, mode, sync, key);
- list_del_init(&wq_entry->entry);
+ list_del_init_careful(&wq_entry->entry);
return ret;
}
_
Patches currently in -mm which might be from bsegall(a)google.com are
epoll-ep_autoremove_wake_function-should-use-list_del_init_careful.patch
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x bdc1c5fac982845a58d28690cdb56db8c88a530d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052806-sprite-program-51a5@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
bdc1c5fac982 ("binder: fix UAF caused by faulty buffer cleanup")
9864bb480133 ("Binder: add TF_UPDATE_TXN to replace outdated txn")
32e9f56a96d8 ("binder: don't detect sender/target during buffer cleanup")
5fdb55c1ac95 ("binder: make sure fd closes complete")
432ff1e91694 ("binder: BINDER_FREEZE ioctl")
0f966cba95c7 ("binder: add flag to clear buffer on txn complete")
421518a2740f ("binder: move structs from core file to header file")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bdc1c5fac982845a58d28690cdb56db8c88a530d Mon Sep 17 00:00:00 2001
From: Carlos Llamas <cmllamas(a)google.com>
Date: Fri, 5 May 2023 20:30:20 +0000
Subject: [PATCH] binder: fix UAF caused by faulty buffer cleanup
In binder_transaction_buffer_release() the 'failed_at' offset indicates
the number of objects to clean up. However, this function was changed by
commit 44d8047f1d87 ("binder: use standard functions to allocate fds"),
to release all the objects in the buffer when 'failed_at' is zero.
This introduced an issue when a transaction buffer is released without
any objects having been processed so far. In this case, 'failed_at' is
indeed zero yet it is misinterpreted as releasing the entire buffer.
This leads to use-after-free errors where nodes are incorrectly freed
and subsequently accessed. Such is the case in the following KASAN
report:
==================================================================
BUG: KASAN: slab-use-after-free in binder_thread_read+0xc40/0x1f30
Read of size 8 at addr ffff4faf037cfc58 by task poc/474
CPU: 6 PID: 474 Comm: poc Not tainted 6.3.0-12570-g7df047b3f0aa #5
Hardware name: linux,dummy-virt (DT)
Call trace:
dump_backtrace+0x94/0xec
show_stack+0x18/0x24
dump_stack_lvl+0x48/0x60
print_report+0xf8/0x5b8
kasan_report+0xb8/0xfc
__asan_load8+0x9c/0xb8
binder_thread_read+0xc40/0x1f30
binder_ioctl+0xd9c/0x1768
__arm64_sys_ioctl+0xd4/0x118
invoke_syscall+0x60/0x188
[...]
Allocated by task 474:
kasan_save_stack+0x3c/0x64
kasan_set_track+0x2c/0x40
kasan_save_alloc_info+0x24/0x34
__kasan_kmalloc+0xb8/0xbc
kmalloc_trace+0x48/0x5c
binder_new_node+0x3c/0x3a4
binder_transaction+0x2b58/0x36f0
binder_thread_write+0x8e0/0x1b78
binder_ioctl+0x14a0/0x1768
__arm64_sys_ioctl+0xd4/0x118
invoke_syscall+0x60/0x188
[...]
Freed by task 475:
kasan_save_stack+0x3c/0x64
kasan_set_track+0x2c/0x40
kasan_save_free_info+0x38/0x5c
__kasan_slab_free+0xe8/0x154
__kmem_cache_free+0x128/0x2bc
kfree+0x58/0x70
binder_dec_node_tmpref+0x178/0x1fc
binder_transaction_buffer_release+0x430/0x628
binder_transaction+0x1954/0x36f0
binder_thread_write+0x8e0/0x1b78
binder_ioctl+0x14a0/0x1768
__arm64_sys_ioctl+0xd4/0x118
invoke_syscall+0x60/0x188
[...]
==================================================================
In order to avoid these issues, let's always calculate the intended
'failed_at' offset beforehand. This is renamed and wrapped in a helper
function to make it clear and convenient.
Fixes: 32e9f56a96d8 ("binder: don't detect sender/target during buffer cleanup")
Reported-by: Zi Fan Tan <zifantan(a)google.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Carlos Llamas <cmllamas(a)google.com>
Acked-by: Todd Kjos <tkjos(a)google.com>
Link: https://lore.kernel.org/r/20230505203020.4101154-1-cmllamas@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index fb56bfc45096..8fb7672021ee 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -1934,24 +1934,23 @@ static void binder_deferred_fd_close(int fd)
static void binder_transaction_buffer_release(struct binder_proc *proc,
struct binder_thread *thread,
struct binder_buffer *buffer,
- binder_size_t failed_at,
+ binder_size_t off_end_offset,
bool is_failure)
{
int debug_id = buffer->debug_id;
- binder_size_t off_start_offset, buffer_offset, off_end_offset;
+ binder_size_t off_start_offset, buffer_offset;
binder_debug(BINDER_DEBUG_TRANSACTION,
"%d buffer release %d, size %zd-%zd, failed at %llx\n",
proc->pid, buffer->debug_id,
buffer->data_size, buffer->offsets_size,
- (unsigned long long)failed_at);
+ (unsigned long long)off_end_offset);
if (buffer->target_node)
binder_dec_node(buffer->target_node, 1, 0);
off_start_offset = ALIGN(buffer->data_size, sizeof(void *));
- off_end_offset = is_failure && failed_at ? failed_at :
- off_start_offset + buffer->offsets_size;
+
for (buffer_offset = off_start_offset; buffer_offset < off_end_offset;
buffer_offset += sizeof(binder_size_t)) {
struct binder_object_header *hdr;
@@ -2111,6 +2110,21 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
}
}
+/* Clean up all the objects in the buffer */
+static inline void binder_release_entire_buffer(struct binder_proc *proc,
+ struct binder_thread *thread,
+ struct binder_buffer *buffer,
+ bool is_failure)
+{
+ binder_size_t off_end_offset;
+
+ off_end_offset = ALIGN(buffer->data_size, sizeof(void *));
+ off_end_offset += buffer->offsets_size;
+
+ binder_transaction_buffer_release(proc, thread, buffer,
+ off_end_offset, is_failure);
+}
+
static int binder_translate_binder(struct flat_binder_object *fp,
struct binder_transaction *t,
struct binder_thread *thread)
@@ -2806,7 +2820,7 @@ static int binder_proc_transaction(struct binder_transaction *t,
t_outdated->buffer = NULL;
buffer->transaction = NULL;
trace_binder_transaction_update_buffer_release(buffer);
- binder_transaction_buffer_release(proc, NULL, buffer, 0, 0);
+ binder_release_entire_buffer(proc, NULL, buffer, false);
binder_alloc_free_buf(&proc->alloc, buffer);
kfree(t_outdated);
binder_stats_deleted(BINDER_STAT_TRANSACTION);
@@ -3775,7 +3789,7 @@ binder_free_buf(struct binder_proc *proc,
binder_node_inner_unlock(buf_node);
}
trace_binder_transaction_buffer_release(buffer);
- binder_transaction_buffer_release(proc, thread, buffer, 0, is_failure);
+ binder_release_entire_buffer(proc, thread, buffer, is_failure);
binder_alloc_free_buf(&proc->alloc, buffer);
}
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x bdc1c5fac982845a58d28690cdb56db8c88a530d
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052807-utopia-paddling-0f87@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
bdc1c5fac982 ("binder: fix UAF caused by faulty buffer cleanup")
9864bb480133 ("Binder: add TF_UPDATE_TXN to replace outdated txn")
32e9f56a96d8 ("binder: don't detect sender/target during buffer cleanup")
5fdb55c1ac95 ("binder: make sure fd closes complete")
432ff1e91694 ("binder: BINDER_FREEZE ioctl")
0f966cba95c7 ("binder: add flag to clear buffer on txn complete")
421518a2740f ("binder: move structs from core file to header file")
f3277cbfba76 ("binder: fix UAF when releasing todo list")
d35d3660e065 ("binder: fix null deref of proc->context")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From bdc1c5fac982845a58d28690cdb56db8c88a530d Mon Sep 17 00:00:00 2001
From: Carlos Llamas <cmllamas(a)google.com>
Date: Fri, 5 May 2023 20:30:20 +0000
Subject: [PATCH] binder: fix UAF caused by faulty buffer cleanup
In binder_transaction_buffer_release() the 'failed_at' offset indicates
the number of objects to clean up. However, this function was changed by
commit 44d8047f1d87 ("binder: use standard functions to allocate fds"),
to release all the objects in the buffer when 'failed_at' is zero.
This introduced an issue when a transaction buffer is released without
any objects having been processed so far. In this case, 'failed_at' is
indeed zero yet it is misinterpreted as releasing the entire buffer.
This leads to use-after-free errors where nodes are incorrectly freed
and subsequently accessed. Such is the case in the following KASAN
report:
==================================================================
BUG: KASAN: slab-use-after-free in binder_thread_read+0xc40/0x1f30
Read of size 8 at addr ffff4faf037cfc58 by task poc/474
CPU: 6 PID: 474 Comm: poc Not tainted 6.3.0-12570-g7df047b3f0aa #5
Hardware name: linux,dummy-virt (DT)
Call trace:
dump_backtrace+0x94/0xec
show_stack+0x18/0x24
dump_stack_lvl+0x48/0x60
print_report+0xf8/0x5b8
kasan_report+0xb8/0xfc
__asan_load8+0x9c/0xb8
binder_thread_read+0xc40/0x1f30
binder_ioctl+0xd9c/0x1768
__arm64_sys_ioctl+0xd4/0x118
invoke_syscall+0x60/0x188
[...]
Allocated by task 474:
kasan_save_stack+0x3c/0x64
kasan_set_track+0x2c/0x40
kasan_save_alloc_info+0x24/0x34
__kasan_kmalloc+0xb8/0xbc
kmalloc_trace+0x48/0x5c
binder_new_node+0x3c/0x3a4
binder_transaction+0x2b58/0x36f0
binder_thread_write+0x8e0/0x1b78
binder_ioctl+0x14a0/0x1768
__arm64_sys_ioctl+0xd4/0x118
invoke_syscall+0x60/0x188
[...]
Freed by task 475:
kasan_save_stack+0x3c/0x64
kasan_set_track+0x2c/0x40
kasan_save_free_info+0x38/0x5c
__kasan_slab_free+0xe8/0x154
__kmem_cache_free+0x128/0x2bc
kfree+0x58/0x70
binder_dec_node_tmpref+0x178/0x1fc
binder_transaction_buffer_release+0x430/0x628
binder_transaction+0x1954/0x36f0
binder_thread_write+0x8e0/0x1b78
binder_ioctl+0x14a0/0x1768
__arm64_sys_ioctl+0xd4/0x118
invoke_syscall+0x60/0x188
[...]
==================================================================
In order to avoid these issues, let's always calculate the intended
'failed_at' offset beforehand. This is renamed and wrapped in a helper
function to make it clear and convenient.
Fixes: 32e9f56a96d8 ("binder: don't detect sender/target during buffer cleanup")
Reported-by: Zi Fan Tan <zifantan(a)google.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Carlos Llamas <cmllamas(a)google.com>
Acked-by: Todd Kjos <tkjos(a)google.com>
Link: https://lore.kernel.org/r/20230505203020.4101154-1-cmllamas@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index fb56bfc45096..8fb7672021ee 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -1934,24 +1934,23 @@ static void binder_deferred_fd_close(int fd)
static void binder_transaction_buffer_release(struct binder_proc *proc,
struct binder_thread *thread,
struct binder_buffer *buffer,
- binder_size_t failed_at,
+ binder_size_t off_end_offset,
bool is_failure)
{
int debug_id = buffer->debug_id;
- binder_size_t off_start_offset, buffer_offset, off_end_offset;
+ binder_size_t off_start_offset, buffer_offset;
binder_debug(BINDER_DEBUG_TRANSACTION,
"%d buffer release %d, size %zd-%zd, failed at %llx\n",
proc->pid, buffer->debug_id,
buffer->data_size, buffer->offsets_size,
- (unsigned long long)failed_at);
+ (unsigned long long)off_end_offset);
if (buffer->target_node)
binder_dec_node(buffer->target_node, 1, 0);
off_start_offset = ALIGN(buffer->data_size, sizeof(void *));
- off_end_offset = is_failure && failed_at ? failed_at :
- off_start_offset + buffer->offsets_size;
+
for (buffer_offset = off_start_offset; buffer_offset < off_end_offset;
buffer_offset += sizeof(binder_size_t)) {
struct binder_object_header *hdr;
@@ -2111,6 +2110,21 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
}
}
+/* Clean up all the objects in the buffer */
+static inline void binder_release_entire_buffer(struct binder_proc *proc,
+ struct binder_thread *thread,
+ struct binder_buffer *buffer,
+ bool is_failure)
+{
+ binder_size_t off_end_offset;
+
+ off_end_offset = ALIGN(buffer->data_size, sizeof(void *));
+ off_end_offset += buffer->offsets_size;
+
+ binder_transaction_buffer_release(proc, thread, buffer,
+ off_end_offset, is_failure);
+}
+
static int binder_translate_binder(struct flat_binder_object *fp,
struct binder_transaction *t,
struct binder_thread *thread)
@@ -2806,7 +2820,7 @@ static int binder_proc_transaction(struct binder_transaction *t,
t_outdated->buffer = NULL;
buffer->transaction = NULL;
trace_binder_transaction_update_buffer_release(buffer);
- binder_transaction_buffer_release(proc, NULL, buffer, 0, 0);
+ binder_release_entire_buffer(proc, NULL, buffer, false);
binder_alloc_free_buf(&proc->alloc, buffer);
kfree(t_outdated);
binder_stats_deleted(BINDER_STAT_TRANSACTION);
@@ -3775,7 +3789,7 @@ binder_free_buf(struct binder_proc *proc,
binder_node_inner_unlock(buf_node);
}
trace_binder_transaction_buffer_release(buffer);
- binder_transaction_buffer_release(proc, thread, buffer, 0, is_failure);
+ binder_release_entire_buffer(proc, thread, buffer, is_failure);
binder_alloc_free_buf(&proc->alloc, buffer);
}
When dev_pm_opp_of_find_icc_paths() in _allocate_opp_table() returns
-EPROBE_DEFER, the opp_table is freed again, to wait until all the
interconnect paths are available.
However, if the OPP table is using required-opps then it may already
have been added to the global lazy_opp_tables list. The error path
does not remove the opp_table from the list again.
This can cause crashes later when the provider of the required-opps
is added, since we will iterate over OPP tables that have already been
freed. E.g.:
Unable to handle kernel NULL pointer dereference when read
CPU: 0 PID: 7 Comm: kworker/0:0 Not tainted 6.4.0-rc3
PC is at _of_add_opp_table_v2 (include/linux/of.h:949
drivers/opp/of.c:98 drivers/opp/of.c:344 drivers/opp/of.c:404
drivers/opp/of.c:1032) -> lazy_link_required_opp_table()
Fix this by removing the opp_table from the list before freeing it.
Cc: stable(a)vger.kernel.org
Fixes: 7eba0c7641b0 ("opp: Allow lazy-linking of required-opps")
Signed-off-by: Stephan Gerhold <stephan.gerhold(a)kernkonzept.com>
---
This fixes the crash I ran into after adding an OPP table with
both "required-opps" and interconnect paths (opp-peak-kBps).
By the way, the "lazy_opp_tables" does not seem to be protected by any
locks(?) so I could imagine that theoretically there could be a race
condition while adding/removing OPP tables there. This is unrelated
to the crash I saw, though.
---
drivers/opp/core.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 85cbc8de407c..6a3a320be7df 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -1358,6 +1358,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index)
return opp_table;
remove_opp_dev:
+ list_del(&opp_table->lazy);
_remove_opp_dev(opp_dev, opp_table);
err:
kfree(opp_table);
---
base-commit: 9e28f7a74581204807f20ae46568939038e327aa
change-id: 20230524-opp-lazy-uaf-60a004b385ec
Best regards,
--
Stephan Gerhold
Kernkonzept GmbH at Dresden, Germany, HRB 31129, CEO Dr.-Ing. Michael Hohmuth
From: Ruihan Li <lrh2000(a)pku.edu.cn>
commit 000c2fa2c144c499c881a101819cf1936a1f7cf2 upstream.
Previously, channel open messages were always sent to monitors on the first
ioctl() call for unbound HCI sockets, even if the command and arguments
were completely invalid. This can leave an exploitable hole with the abuse
of invalid ioctl calls.
This commit hardens the ioctl processing logic by first checking if the
command is valid, and immediately returning with an ENOIOCTLCMD error code
if it is not. This ensures that ioctl calls with invalid commands are free
of side effects, and increases the difficulty of further exploitation by
forcing exploitation to find a way to pass a valid command first.
Signed-off-by: Ruihan Li <lrh2000(a)pku.edu.cn>
Co-developed-by: Marcel Holtmann <marcel(a)holtmann.org>
Signed-off-by: Marcel Holtmann <marcel(a)holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Signed-off-by: Dragos-Marian Panait <dragos.panait(a)windriver.com>
---
net/bluetooth/hci_sock.c | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f597fe0db9f8..1d249d839819 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -987,6 +987,34 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd,
BT_DBG("cmd %x arg %lx", cmd, arg);
+ /* Make sure the cmd is valid before doing anything */
+ switch (cmd) {
+ case HCIGETDEVLIST:
+ case HCIGETDEVINFO:
+ case HCIGETCONNLIST:
+ case HCIDEVUP:
+ case HCIDEVDOWN:
+ case HCIDEVRESET:
+ case HCIDEVRESTAT:
+ case HCISETSCAN:
+ case HCISETAUTH:
+ case HCISETENCRYPT:
+ case HCISETPTYPE:
+ case HCISETLINKPOL:
+ case HCISETLINKMODE:
+ case HCISETACLMTU:
+ case HCISETSCOMTU:
+ case HCIINQUIRY:
+ case HCISETRAW:
+ case HCIGETCONNINFO:
+ case HCIGETAUTHINFO:
+ case HCIBLOCKADDR:
+ case HCIUNBLOCKADDR:
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
lock_sock(sk);
if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) {
--
2.40.1
From: Ruihan Li <lrh2000(a)pku.edu.cn>
commit 000c2fa2c144c499c881a101819cf1936a1f7cf2 upstream.
Previously, channel open messages were always sent to monitors on the first
ioctl() call for unbound HCI sockets, even if the command and arguments
were completely invalid. This can leave an exploitable hole with the abuse
of invalid ioctl calls.
This commit hardens the ioctl processing logic by first checking if the
command is valid, and immediately returning with an ENOIOCTLCMD error code
if it is not. This ensures that ioctl calls with invalid commands are free
of side effects, and increases the difficulty of further exploitation by
forcing exploitation to find a way to pass a valid command first.
Signed-off-by: Ruihan Li <lrh2000(a)pku.edu.cn>
Co-developed-by: Marcel Holtmann <marcel(a)holtmann.org>
Signed-off-by: Marcel Holtmann <marcel(a)holtmann.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com>
Signed-off-by: Dragos-Marian Panait <dragos.panait(a)windriver.com>
---
net/bluetooth/hci_sock.c | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f597fe0db9f8..1d249d839819 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -987,6 +987,34 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd,
BT_DBG("cmd %x arg %lx", cmd, arg);
+ /* Make sure the cmd is valid before doing anything */
+ switch (cmd) {
+ case HCIGETDEVLIST:
+ case HCIGETDEVINFO:
+ case HCIGETCONNLIST:
+ case HCIDEVUP:
+ case HCIDEVDOWN:
+ case HCIDEVRESET:
+ case HCIDEVRESTAT:
+ case HCISETSCAN:
+ case HCISETAUTH:
+ case HCISETENCRYPT:
+ case HCISETPTYPE:
+ case HCISETLINKPOL:
+ case HCISETLINKMODE:
+ case HCISETACLMTU:
+ case HCISETSCOMTU:
+ case HCIINQUIRY:
+ case HCISETRAW:
+ case HCIGETCONNINFO:
+ case HCIGETAUTHINFO:
+ case HCIBLOCKADDR:
+ case HCIUNBLOCKADDR:
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
lock_sock(sk);
if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) {
--
2.40.1
The following commit is needed to harden against CVE-2023-2002:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
Ruihan Li (1):
bluetooth: Add cmd validity checks at the start of hci_sock_ioctl()
net/bluetooth/hci_sock.c | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
base-commit: b3f141a5bc7f877e96528dd31a139854ec4d6017
--
2.40.1
Syzbot reported a BUG_ON:
==================================================================
EXT4-fs (loop0): mounted filesystem without journal. Quota mode: none.
EXT4-fs error (device loop0): ext4_mb_generate_buddy:1098: group 0, block
bitmap and bg descriptor inconsistent: 25 vs 150994969 free clusters
------------[ cut here ]------------
kernel BUG at fs/ext4/ext4_jbd2.c:53!
invalid opcode: 0000 [#1] PREEMPT SMP KASAN
CPU: 1 PID: 494 Comm: syz-executor.0 6.1.0-rc7-syzkaller-ga4412fdd49dc #0
RIP: 0010:__ext4_journal_stop+0x1b3/0x1c0
[...]
Call Trace:
ext4_write_inline_data_end+0xa39/0xdf0
ext4_da_write_end+0x1e2/0x950
generic_perform_write+0x401/0x5f0
ext4_buffered_write_iter+0x35f/0x640
ext4_file_write_iter+0x198/0x1cd0
vfs_write+0x8b5/0xef0
[...]
==================================================================
The above BUG_ON is triggered by the following race:
cpu1 cpu2
________________________|________________________
ksys_write
vfs_write
new_sync_write
ext4_file_write_iter
ext4_buffered_write_iter
generic_perform_write
ext4_da_write_begin
do_fault
do_page_mkwrite
ext4_page_mkwrite
ext4_convert_inline_data
ext4_convert_inline_data_nolock
ext4_destroy_inline_data_nolock
//clear EXT4_STATE_MAY_INLINE_DATA
ext4_map_blocks --> return error
ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)
ext4_block_write_begin
ext4_restore_inline_data
// set EXT4_STATE_MAY_INLINE_DATA
ext4_da_write_end
ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)
ext4_write_inline_data_end
handle=NULL
ext4_journal_stop(handle)
__ext4_journal_stop
ext4_put_nojournal(handle)
ref_cnt = (unsigned long)handle
BUG_ON(ref_cnt == 0) ---> BUG_ON
The root cause of this problem is that the ext4_convert_inline_data() in
ext4_page_mkwrite() does not grab i_rwsem, so it may race with
ext4_buffered_write_iter() and cause the write_begin() and write_end()
functions to be inconsistent and trigger BUG_ON.
To solve the above issue, we cannot add inode_lock directly to
ext4_page_mkwrite(), because this function is a hot path and frequent calls
to inode_lock will cause performance degradation for multi-threaded reads
and writes. Hence, we move ext4_convert_inline_data() to ext4_file_mmap(),
and only when inline_data is enabled and mmap a file in shared write mode,
we hold the lock to convert, which can reduce the impact on performance.
Reported-by: Jun Nie <jun.nie(a)linaro.org>
Closes: https://lore.kernel.org/lkml/63903521.5040307@huawei.com/t/
Reported-by: syzbot+a158d886ca08a3fecca4(a)syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?id=899b37f20ce4072bcdfecfe1647b39602e956e…
Fixes: 7b4cc9787fe3 ("ext4: evict inline data when writing to memory map")
CC: stable(a)vger.kernel.org # 4.12+
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
---
fs/ext4/file.c | 24 +++++++++++++++++++++++-
fs/ext4/inode.c | 4 ----
2 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d101b3b0c7da..7a04376c33f2 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -795,7 +795,8 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file->f_mapping->host;
- struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ struct super_block *sb = inode->i_sb;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
struct dax_device *dax_dev = sbi->s_daxdev;
if (unlikely(ext4_forced_shutdown(sbi)))
@@ -808,6 +809,27 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
if (!daxdev_mapping_supported(vma, dax_dev))
return -EOPNOTSUPP;
+ /*
+ * Writing via mmap has no logic to handle inline data, so we
+ * need to call ext4_convert_inline_data() to convert the inode
+ * to normal format before doing so, otherwise a BUG_ON will be
+ * triggered in ext4_writepages() due to the
+ * EXT4_STATE_MAY_INLINE_DATA flag. Moreover, we need to grab
+ * i_rwsem during conversion, since clearing and setting the
+ * inline data flag may race with ext4_buffered_write_iter()
+ * to trigger a BUG_ON.
+ */
+ if (ext4_has_feature_inline_data(sb) &&
+ vma->vm_flags & VM_SHARED && vma->vm_flags & VM_WRITE) {
+ int err;
+
+ inode_lock(inode);
+ err = ext4_convert_inline_data(inode);
+ inode_unlock(inode);
+ if (err)
+ return err;
+ }
+
file_accessed(file);
if (IS_DAX(file_inode(file))) {
vma->vm_ops = &ext4_dax_vm_ops;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ce5f21b6c2b3..31844c4ec9fe 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -6043,10 +6043,6 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
filemap_invalidate_lock_shared(mapping);
- err = ext4_convert_inline_data(inode);
- if (err)
- goto out_ret;
-
/*
* On data journalling we skip straight to the transaction handle:
* there's no delalloc; page truncated will be checked later; the
--
2.31.1
The following commit is needed to harden against CVE-2023-2002:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
Ruihan Li (1):
bluetooth: Add cmd validity checks at the start of hci_sock_ioctl()
net/bluetooth/hci_sock.c | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
base-commit: 3f57fb8b1bd06b277556601133823bec370d723f
--
2.40.1
The following commit is needed to harden against CVE-2023-2002:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
Ruihan Li (1):
bluetooth: Add cmd validity checks at the start of hci_sock_ioctl()
net/bluetooth/hci_sock.c | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
base-commit: fa74641fb6b93a19ccb50579886ecc98320230f9
--
2.40.1
The following commit is needed to harden against CVE-2023-2002:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
Ruihan Li (1):
bluetooth: Add cmd validity checks at the start of hci_sock_ioctl()
net/bluetooth/hci_sock.c | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
base-commit: f53660ec669f60c772fdf7d75d1c24d288547cee
--
2.40.1
The following commit is needed to harden against CVE-2023-2002:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
Ruihan Li (1):
bluetooth: Add cmd validity checks at the start of hci_sock_ioctl()
net/bluetooth/hci_sock.c | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
base-commit: 4c893ff55907c61456bcb917781c0dd687a1e123
--
2.40.1
Currently the locking order of inode locks for directories that are not
in ancestor relationship is not defined because all operations that
needed to lock two directories like this were serialized by
sb->s_vfs_rename_mutex. However some filesystems need to lock two
subdirectories for RENAME_EXCHANGE operations and for this we need the
locking order established even for two tree-unrelated directories.
Provide a helper function lock_two_inodes() that establishes lock
ordering for any two inodes and use it in lock_two_directories().
CC: stable(a)vger.kernel.org
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
fs/inode.c | 34 ++++++++++++++++++++++++++++++++++
fs/internal.h | 2 ++
fs/namei.c | 4 ++--
3 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index 577799b7855f..2015fa50d34a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1103,6 +1103,40 @@ void discard_new_inode(struct inode *inode)
}
EXPORT_SYMBOL(discard_new_inode);
+/**
+ * lock_two_inodes - lock two inodes (may be regular files but also dirs)
+ *
+ * Lock any non-NULL argument. The caller must make sure that if he is passing
+ * in two directories, one is not ancestor of the other. Zero, one or two
+ * objects may be locked by this function.
+ *
+ * @inode1: first inode to lock
+ * @inode2: second inode to lock
+ * @subclass1: inode lock subclass for the first lock obtained
+ * @subclass2: inode lock subclass for the second lock obtained
+ */
+void lock_two_inodes(struct inode *inode1, struct inode *inode2,
+ unsigned subclass1, unsigned subclass2)
+{
+ if (!inode1 || !inode2)
+ goto lock;
+
+ /*
+ * If one object is directory and the other is not, we must make sure
+ * to lock directory first as the other object may be its child.
+ */
+ if (S_ISDIR(inode2->i_mode) == S_ISDIR(inode1->i_mode)) {
+ if (inode1 > inode2)
+ swap(inode1, inode2);
+ } else if (!S_ISDIR(inode1->i_mode))
+ swap(inode1, inode2);
+lock:
+ if (inode1)
+ inode_lock_nested(inode1, subclass1);
+ if (inode2 && inode2 != inode1)
+ inode_lock_nested(inode2, subclass2);
+}
+
/**
* lock_two_nondirectories - take two i_mutexes on non-directory objects
*
diff --git a/fs/internal.h b/fs/internal.h
index bd3b2810a36b..377030a50aca 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -152,6 +152,8 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
int dentry_needs_remove_privs(struct mnt_idmap *, struct dentry *dentry);
bool in_group_or_capable(struct mnt_idmap *idmap,
const struct inode *inode, vfsgid_t vfsgid);
+void lock_two_inodes(struct inode *inode1, struct inode *inode2,
+ unsigned subclass1, unsigned subclass2);
/*
* fs-writeback.c
diff --git a/fs/namei.c b/fs/namei.c
index e4fe0879ae55..148570aabe74 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3028,8 +3028,8 @@ static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
return p;
}
- inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
- inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
+ lock_two_inodes(p1->d_inode, p2->d_inode,
+ I_MUTEX_PARENT, I_MUTEX_PARENT2);
return NULL;
}
--
2.35.3
The following commit is needed to harden against CVE-2023-2002:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
Ruihan Li (1):
bluetooth: Add cmd validity checks at the start of hci_sock_ioctl()
net/bluetooth/hci_sock.c | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
base-commit: 1fe619a7d25218e9b9fdcce9fcac6a05cd62abed
--
2.40.1
Buenos días querido amigo, por favor, ¿ha recibido la compensación?
dinero que te envié a través de mi secretaria MR. PEDRO RAMOS?
....................................
Good morning dear friend, Please have you received the compensation
money I sent to you through my secretary MR. PETER RAMOS?
The patch below does not apply to the 6.3-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.3.y
git checkout FETCH_HEAD
git cherry-pick -x 3bf8c6307bad5c0cc09cde982e146d847859b651
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052834-enlighten-vacate-bfd6@gregkh' --subject-prefix 'PATCH 6.3.y' HEAD^..
Possible dependencies:
3bf8c6307bad ("cpufreq: amd-pstate: Update policy->cur in amd_pstate_adjust_perf()")
2dd6d0ebf740 ("cpufreq: amd-pstate: Add guided autonomous mode")
3e6e07805764 ("Documentation: cpufreq: amd-pstate: Move amd_pstate param to alphabetical order")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3bf8c6307bad5c0cc09cde982e146d847859b651 Mon Sep 17 00:00:00 2001
From: Wyes Karny <wyes.karny(a)amd.com>
Date: Thu, 18 May 2023 05:58:19 +0000
Subject: [PATCH] cpufreq: amd-pstate: Update policy->cur in
amd_pstate_adjust_perf()
Driver should update policy->cur after updating the frequency.
Currently amd_pstate doesn't update policy->cur when `adjust_perf`
is used. Which causes /proc/cpuinfo to show wrong cpu frequency.
Fix this by updating policy->cur with correct frequency value in
adjust_perf function callback.
- Before the fix: (setting min freq to 1.5 MHz)
[root@amd]# cat /proc/cpuinfo | grep "cpu MHz" | sort | uniq --count
1 cpu MHz : 1777.016
1 cpu MHz : 1797.160
1 cpu MHz : 1797.270
189 cpu MHz : 400.000
- After the fix: (setting min freq to 1.5 MHz)
[root@amd]# cat /proc/cpuinfo | grep "cpu MHz" | sort | uniq --count
1 cpu MHz : 1753.353
1 cpu MHz : 1756.838
1 cpu MHz : 1776.466
1 cpu MHz : 1776.873
1 cpu MHz : 1777.308
1 cpu MHz : 1779.900
183 cpu MHz : 1805.231
1 cpu MHz : 1956.815
1 cpu MHz : 2246.203
1 cpu MHz : 2259.984
Fixes: 1d215f0319c2 ("cpufreq: amd-pstate: Add fast switch function for AMD P-State")
Signed-off-by: Wyes Karny <wyes.karny(a)amd.com>
[ rjw: Subject edits ]
Cc: 5.17+ <stable(a)vger.kernel.org> # 5.17+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index ac54779f5a49..ddd346a239e0 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -501,12 +501,14 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
unsigned long capacity)
{
unsigned long max_perf, min_perf, des_perf,
- cap_perf, lowest_nonlinear_perf;
+ cap_perf, lowest_nonlinear_perf, max_freq;
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
struct amd_cpudata *cpudata = policy->driver_data;
+ unsigned int target_freq;
cap_perf = READ_ONCE(cpudata->highest_perf);
lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
+ max_freq = READ_ONCE(cpudata->max_freq);
des_perf = cap_perf;
if (target_perf < capacity)
@@ -523,6 +525,10 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
if (max_perf < min_perf)
max_perf = min_perf;
+ des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
+ target_freq = div_u64(des_perf * max_freq, max_perf);
+ policy->cur = target_freq;
+
amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
policy->governor->flags);
cpufreq_cpu_put(policy);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 3bf8c6307bad5c0cc09cde982e146d847859b651
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052835-scoring-scary-c0c6@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
3bf8c6307bad ("cpufreq: amd-pstate: Update policy->cur in amd_pstate_adjust_perf()")
2dd6d0ebf740 ("cpufreq: amd-pstate: Add guided autonomous mode")
3e6e07805764 ("Documentation: cpufreq: amd-pstate: Move amd_pstate param to alphabetical order")
5014603e409b ("Documentation: introduce amd pstate active mode kernel command line options")
ffa5096a7c33 ("cpufreq: amd-pstate: implement Pstate EPP support for the AMD processors")
36c5014e5460 ("cpufreq: amd-pstate: optimize driver working mode selection in amd_pstate_param()")
4f3085f87b51 ("cpufreq: amd-pstate: fix kernel hang issue while amd-pstate unregistering")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3bf8c6307bad5c0cc09cde982e146d847859b651 Mon Sep 17 00:00:00 2001
From: Wyes Karny <wyes.karny(a)amd.com>
Date: Thu, 18 May 2023 05:58:19 +0000
Subject: [PATCH] cpufreq: amd-pstate: Update policy->cur in
amd_pstate_adjust_perf()
Driver should update policy->cur after updating the frequency.
Currently amd_pstate doesn't update policy->cur when `adjust_perf`
is used. Which causes /proc/cpuinfo to show wrong cpu frequency.
Fix this by updating policy->cur with correct frequency value in
adjust_perf function callback.
- Before the fix: (setting min freq to 1.5 MHz)
[root@amd]# cat /proc/cpuinfo | grep "cpu MHz" | sort | uniq --count
1 cpu MHz : 1777.016
1 cpu MHz : 1797.160
1 cpu MHz : 1797.270
189 cpu MHz : 400.000
- After the fix: (setting min freq to 1.5 MHz)
[root@amd]# cat /proc/cpuinfo | grep "cpu MHz" | sort | uniq --count
1 cpu MHz : 1753.353
1 cpu MHz : 1756.838
1 cpu MHz : 1776.466
1 cpu MHz : 1776.873
1 cpu MHz : 1777.308
1 cpu MHz : 1779.900
183 cpu MHz : 1805.231
1 cpu MHz : 1956.815
1 cpu MHz : 2246.203
1 cpu MHz : 2259.984
Fixes: 1d215f0319c2 ("cpufreq: amd-pstate: Add fast switch function for AMD P-State")
Signed-off-by: Wyes Karny <wyes.karny(a)amd.com>
[ rjw: Subject edits ]
Cc: 5.17+ <stable(a)vger.kernel.org> # 5.17+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index ac54779f5a49..ddd346a239e0 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -501,12 +501,14 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
unsigned long capacity)
{
unsigned long max_perf, min_perf, des_perf,
- cap_perf, lowest_nonlinear_perf;
+ cap_perf, lowest_nonlinear_perf, max_freq;
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
struct amd_cpudata *cpudata = policy->driver_data;
+ unsigned int target_freq;
cap_perf = READ_ONCE(cpudata->highest_perf);
lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
+ max_freq = READ_ONCE(cpudata->max_freq);
des_perf = cap_perf;
if (target_perf < capacity)
@@ -523,6 +525,10 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
if (max_perf < min_perf)
max_perf = min_perf;
+ des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
+ target_freq = div_u64(des_perf * max_freq, max_perf);
+ policy->cur = target_freq;
+
amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true,
policy->governor->flags);
cpufreq_cpu_put(policy);
After a few years of increasing test coverage in the MPTCP selftests, we
realised [1] the last version of the selftests is supposed to run on old
kernels without issues.
Supporting older versions is not that easy for this MPTCP case: these
selftests are often validating the internals by checking packets that
are exchanged, when some MIB counters are incremented after some
actions, how connections are getting opened and closed in some cases,
etc. In other words, it is not limited to the socket interface between
the userspace and the kernelspace. In addition, the current selftests
run a lot of different sub-tests but the TAP13 protocol used in the
selftests don't support sub-tests: in other words, one failure in
sub-tests implies that the whole selftest is seen as failed at the end
because sub-tests are not tracked. It is then important to skip
sub-tests not supported by old kernels.
To minimise the modifications and reduce the complexity to support old
versions, the idea is to look at external signs and skip the whole
selftests or just some sub-tests before starting them.
This first part focuses on marking the different selftests as skipped
if MPTCP is not even supported. That's what is done in patches 2 to 8.
Patch 2/8 introduces a new file (mptcp_lib.sh) to be able to re-use some
helpers in the different selftests. The first MPTCP selftest has been
introduced in v5.6.
Patch 1/8 is a bit different but still linked: it modifies mptcp_join.sh
selftest not to use 'cmp --bytes' which is not supported by the BusyBox
implementation. It is apparently quite common to use BusyBox in CI
environments. This tool is needed for a subtest introduced in v6.1.
Link: https://lore.kernel.org/stable/CA+G9fYtDGpgT4dckXD-y-N92nqUxuvue_7AtDdBcHrb… [1]
Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368
Signed-off-by: Matthieu Baerts <matthieu.baerts(a)tessares.net>
---
Matthieu Baerts (8):
selftests: mptcp: join: avoid using 'cmp --bytes'
selftests: mptcp: connect: skip if MPTCP is not supported
selftests: mptcp: pm nl: skip if MPTCP is not supported
selftests: mptcp: join: skip if MPTCP is not supported
selftests: mptcp: diag: skip if MPTCP is not supported
selftests: mptcp: simult flows: skip if MPTCP is not supported
selftests: mptcp: sockopt: skip if MPTCP is not supported
selftests: mptcp: userspace pm: skip if MPTCP is not supported
tools/testing/selftests/net/mptcp/Makefile | 2 +-
tools/testing/selftests/net/mptcp/diag.sh | 4 +++
tools/testing/selftests/net/mptcp/mptcp_connect.sh | 4 +++
tools/testing/selftests/net/mptcp/mptcp_join.sh | 17 +++++++--
tools/testing/selftests/net/mptcp/mptcp_lib.sh | 40 ++++++++++++++++++++++
tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 4 +++
tools/testing/selftests/net/mptcp/pm_netlink.sh | 4 +++
tools/testing/selftests/net/mptcp/simult_flows.sh | 4 +++
tools/testing/selftests/net/mptcp/userspace_pm.sh | 4 +++
9 files changed, 80 insertions(+), 3 deletions(-)
---
base-commit: 9b9e46aa07273ceb96866b2e812b46f1ee0b8d2f
change-id: 20230528-upstream-net-20230528-mptcp-selftests-support-old-kernels-part-1-305638f4dbc0
Best regards,
--
Matthieu Baerts <matthieu.baerts(a)tessares.net>
From: Alexandr Sapozhnikov <alsp705(a)gmail.com>
[ Upstream commit 7245e629dcaaf308f1868aeffa218e9849c77893 ]
After having been compared to NULL value at cirrus.c:455, pointer
'pipe->plane.state->fb' is passed as 1st parameter in call to function
'cirrus_fb_blit_rect' at cirrus.c:461, where it is dereferenced at
cirrus.c:316.
Found by Linux Verification Center (linuxtesting.org) with SVACE.
v2:
* aligned commit message to line-length limits
Signed-off-by: Alexandr Sapozhnikov <alsp705(a)gmail.com>
Reviewed-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Signed-off-by: Thomas Zimmermann <tzimmermann(a)suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20230215171549.16305-1-alsp70…
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/gpu/drm/tiny/cirrus.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
index 678c2ef1cae70..ffa7e61dd1835 100644
--- a/drivers/gpu/drm/tiny/cirrus.c
+++ b/drivers/gpu/drm/tiny/cirrus.c
@@ -455,7 +455,7 @@ static void cirrus_pipe_update(struct drm_simple_display_pipe *pipe,
if (state->fb && cirrus->cpp != cirrus_cpp(state->fb))
cirrus_mode_set(cirrus, &crtc->mode, state->fb);
- if (drm_atomic_helper_damage_merged(old_state, state, &rect))
+ if (state->fb && drm_atomic_helper_damage_merged(old_state, state, &rect))
cirrus_fb_blit_rect(state->fb, &shadow_plane_state->data[0], &rect);
}
--
2.39.2
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 4badf2eb1e986bdbf34dd2f5d4c979553a86fe54
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052858-overgrown-profile-8066@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
4badf2eb1e98 ("cpufreq: amd-pstate: Add ->fast_switch() callback")
2dd6d0ebf740 ("cpufreq: amd-pstate: Add guided autonomous mode")
3e6e07805764 ("Documentation: cpufreq: amd-pstate: Move amd_pstate param to alphabetical order")
5014603e409b ("Documentation: introduce amd pstate active mode kernel command line options")
ffa5096a7c33 ("cpufreq: amd-pstate: implement Pstate EPP support for the AMD processors")
36c5014e5460 ("cpufreq: amd-pstate: optimize driver working mode selection in amd_pstate_param()")
4f3085f87b51 ("cpufreq: amd-pstate: fix kernel hang issue while amd-pstate unregistering")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4badf2eb1e986bdbf34dd2f5d4c979553a86fe54 Mon Sep 17 00:00:00 2001
From: "Gautham R. Shenoy" <gautham.shenoy(a)amd.com>
Date: Wed, 17 May 2023 16:28:15 +0000
Subject: [PATCH] cpufreq: amd-pstate: Add ->fast_switch() callback
Schedutil normally calls the adjust_perf callback for drivers with
adjust_perf callback available and fast_switch_possible flag set.
However, when frequency invariance is disabled and schedutil tries to
invoke fast_switch. So, there is a chance of kernel crash if this
function pointer is not set. To protect against this scenario add
fast_switch callback to amd_pstate driver.
Fixes: 1d215f0319c2 ("cpufreq: amd-pstate: Add fast switch function for AMD P-State")
Signed-off-by: Gautham R. Shenoy <gautham.shenoy(a)amd.com>
Signed-off-by: Wyes Karny <wyes.karny(a)amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 5a3d4aa0f45a..45711fc0a856 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -444,9 +444,8 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy)
return 0;
}
-static int amd_pstate_target(struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation)
+static int amd_pstate_update_freq(struct cpufreq_policy *policy,
+ unsigned int target_freq, bool fast_switch)
{
struct cpufreq_freqs freqs;
struct amd_cpudata *cpudata = policy->driver_data;
@@ -465,14 +464,37 @@ static int amd_pstate_target(struct cpufreq_policy *policy,
des_perf = DIV_ROUND_CLOSEST(target_freq * cap_perf,
cpudata->max_freq);
- cpufreq_freq_transition_begin(policy, &freqs);
+ WARN_ON(fast_switch && !policy->fast_switch_enabled);
+ /*
+ * If fast_switch is desired, then there aren't any registered
+ * transition notifiers. See comment for
+ * cpufreq_enable_fast_switch().
+ */
+ if (!fast_switch)
+ cpufreq_freq_transition_begin(policy, &freqs);
+
amd_pstate_update(cpudata, min_perf, des_perf,
- max_perf, false, policy->governor->flags);
- cpufreq_freq_transition_end(policy, &freqs, false);
+ max_perf, fast_switch, policy->governor->flags);
+
+ if (!fast_switch)
+ cpufreq_freq_transition_end(policy, &freqs, false);
return 0;
}
+static int amd_pstate_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation)
+{
+ return amd_pstate_update_freq(policy, target_freq, false);
+}
+
+static unsigned int amd_pstate_fast_switch(struct cpufreq_policy *policy,
+ unsigned int target_freq)
+{
+ return amd_pstate_update_freq(policy, target_freq, true);
+}
+
static void amd_pstate_adjust_perf(unsigned int cpu,
unsigned long _min_perf,
unsigned long target_perf,
@@ -715,6 +737,7 @@ static int amd_pstate_cpu_exit(struct cpufreq_policy *policy)
freq_qos_remove_request(&cpudata->req[1]);
freq_qos_remove_request(&cpudata->req[0]);
+ policy->fast_switch_possible = false;
kfree(cpudata);
return 0;
@@ -1309,6 +1332,7 @@ static struct cpufreq_driver amd_pstate_driver = {
.flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
.verify = amd_pstate_verify,
.target = amd_pstate_target,
+ .fast_switch = amd_pstate_fast_switch,
.init = amd_pstate_cpu_init,
.exit = amd_pstate_cpu_exit,
.suspend = amd_pstate_cpu_suspend,
In RDDM EE, device can not process MHI reset issued by host. In case of MHI
power off, host is issuing MHI reset and polls for it to get cleared until
it times out. Since this timeout can not be avoided in case of RDDM, skip
the MHI reset in this scenarios.
Cc: <stable(a)vger.kernel.org>
Fixes: a6e2e3522f29 ("bus: mhi: core: Add support for PM state transitions")
Signed-off-by: Qiang Yu <quic_qianyu(a)quicinc.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo(a)quicinc.com>
---
v1->v2: use ~75 columns in commit text, add Fixes tag
v2->v3: update Fixes tag
v3->v4: add review tag and CC stable
drivers/bus/mhi/host/pm.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/bus/mhi/host/pm.c b/drivers/bus/mhi/host/pm.c
index 0834590..8a4362d 100644
--- a/drivers/bus/mhi/host/pm.c
+++ b/drivers/bus/mhi/host/pm.c
@@ -470,6 +470,10 @@ static void mhi_pm_disable_transition(struct mhi_controller *mhi_cntrl)
/* Trigger MHI RESET so that the device will not access host memory */
if (!MHI_PM_IN_FATAL_STATE(mhi_cntrl->pm_state)) {
+ /* Skip MHI RESET if in RDDM state */
+ if (mhi_cntrl->rddm_image && mhi_get_exec_env(mhi_cntrl) == MHI_EE_RDDM)
+ goto skip_mhi_reset;
+
dev_dbg(dev, "Triggering MHI Reset in device\n");
mhi_set_mhi_state(mhi_cntrl, MHI_STATE_RESET);
@@ -495,6 +499,7 @@ static void mhi_pm_disable_transition(struct mhi_controller *mhi_cntrl)
}
}
+skip_mhi_reset:
dev_dbg(dev,
"Waiting for all pending event ring processing to complete\n");
mhi_event = mhi_cntrl->mhi_event;
--
2.7.4
The apc->eth_stats.rx_cqes is one per NIC (vport), and it's on the
frequent and parallel code path of all queues. So, r/w into this
single shared variable by many threads on different CPUs creates a
lot caching and memory overhead, hence perf regression. And, it's
not accurate due to the high volume concurrent r/w.
For example, a workload is iperf with 128 threads, and with RPS
enabled. We saw perf regression of 25% with the previous patch
adding the counters. And this patch eliminates the regression.
Since the error path of mana_poll_rx_cq() already has warnings, so
keeping the counter and convert it to a per-queue variable is not
necessary. So, just remove this counter from this high frequency
code path.
Also, remove the tx_cqes counter for the same reason. We have
warnings & other counters for errors on that path, and don't need
to count every normal cqe processing.
Cc: stable(a)vger.kernel.org
Fixes: bd7fc6e1957c ("net: mana: Add new MANA VF performance counters for easier troubleshooting")
Signed-off-by: Haiyang Zhang <haiyangz(a)microsoft.com>
Reviewed-by: Horatiu Vultur <horatiu.vultur(a)microchip.com>
Reviewed-by: Jiri Pirko <jiri(a)nvidia.com>
---
V3:
Add test example as suggested by Jakub Kicinski.
V2:
Same as V1, except adding more Cc's.
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 10 ----------
drivers/net/ethernet/microsoft/mana/mana_ethtool.c | 2 --
include/net/mana/mana.h | 2 --
3 files changed, 14 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 06d6292e09b3..d907727c7b7a 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1279,8 +1279,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
if (comp_read < 1)
return;
- apc->eth_stats.tx_cqes = comp_read;
-
for (i = 0; i < comp_read; i++) {
struct mana_tx_comp_oob *cqe_oob;
@@ -1363,8 +1361,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
WARN_ON_ONCE(1);
cq->work_done = pkt_transmitted;
-
- apc->eth_stats.tx_cqes -= pkt_transmitted;
}
static void mana_post_pkt_rxq(struct mana_rxq *rxq)
@@ -1626,15 +1622,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
{
struct gdma_comp *comp = cq->gdma_comp_buf;
struct mana_rxq *rxq = cq->rxq;
- struct mana_port_context *apc;
int comp_read, i;
- apc = netdev_priv(rxq->ndev);
-
comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER);
- apc->eth_stats.rx_cqes = comp_read;
rxq->xdp_flush = false;
for (i = 0; i < comp_read; i++) {
@@ -1646,8 +1638,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
return;
mana_process_rx_cqe(rxq, cq, &comp[i]);
-
- apc->eth_stats.rx_cqes--;
}
if (rxq->xdp_flush)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index a64c81410dc1..0dc78679f620 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -13,11 +13,9 @@ static const struct {
} mana_eth_stats[] = {
{"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)},
{"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)},
- {"tx_cqes", offsetof(struct mana_ethtool_stats, tx_cqes)},
{"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)},
{"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
tx_cqe_unknown_type)},
- {"rx_cqes", offsetof(struct mana_ethtool_stats, rx_cqes)},
{"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
rx_coalesced_err)},
{"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index cd386aa7c7cc..9eef19972845 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -347,10 +347,8 @@ struct mana_tx_qp {
struct mana_ethtool_stats {
u64 stop_queue;
u64 wake_queue;
- u64 tx_cqes;
u64 tx_cqe_err;
u64 tx_cqe_unknown_type;
- u64 rx_cqes;
u64 rx_coalesced_err;
u64 rx_cqe_unknown_type;
};
--
2.25.1
Herzlichen Glückwunsch, Sie haben am 25. May, 2023 €650.000,00 bei den
monatlichen Euro Millions/Google Promo-Gewinnspielen gewonnen.
Bitte geben Sie die folgenden Informationen ein, damit Ihr
Gewinnbetrag an Sie überwiesen werden kann.
1.) Vollständiger Name:
2.) Telefon- und Mobilfunknummern:
3.) Postanschrift:
4.) Beruf:
5.) Geburtsdatum:
6.) Geschlecht:
Herr Anthony Deiderich
Online-Koordinator
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 368d3cb406cdd074d1df2ad9ec06d1bfcb664882
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2023052821-wired-primate-24c3@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
368d3cb406cd ("page_pool: fix inconsistency for page_pool_ring_[un]lock()")
542bcea4be86 ("net: page_pool: use in_softirq() instead")
590032a4d213 ("page_pool: Add recycle stats to page_pool_put_page_bulk")
6b95e3388b1e ("page_pool: Add function to batch and return stats")
ad6fa1e1ab1b ("page_pool: Add recycle stats")
8610037e8106 ("page_pool: Add allocation stats")
64693ec7774e ("page_pool: Store the XDP mem id")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 368d3cb406cdd074d1df2ad9ec06d1bfcb664882 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng(a)huawei.com>
Date: Mon, 22 May 2023 11:17:14 +0800
Subject: [PATCH] page_pool: fix inconsistency for page_pool_ring_[un]lock()
page_pool_ring_[un]lock() use in_softirq() to decide which
spin lock variant to use, and when they are called in the
context with in_softirq() being false, spin_lock_bh() is
called in page_pool_ring_lock() while spin_unlock() is
called in page_pool_ring_unlock(), because spin_lock_bh()
has disabled the softirq in page_pool_ring_lock(), which
causes inconsistency for spin lock pair calling.
This patch fixes it by returning in_softirq state from
page_pool_producer_lock(), and use it to decide which
spin lock variant to use in page_pool_producer_unlock().
As pool->ring has both producer and consumer lock, so
rename it to page_pool_producer_[un]lock() to reflect
the actual usage. Also move them to page_pool.c as they
are only used there, and remove the 'inline' as the
compiler may have better idea to do inlining or not.
Fixes: 7886244736a4 ("net: page_pool: Add bulk support for ptr_ring")
Signed-off-by: Yunsheng Lin <linyunsheng(a)huawei.com>
Acked-by: Jesper Dangaard Brouer <brouer(a)redhat.com>
Acked-by: Ilias Apalodimas <ilias.apalodimas(a)linaro.org>
Link: https://lore.kernel.org/r/20230522031714.5089-1-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index c8ec2f34722b..126f9e294389 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -399,22 +399,4 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
page_pool_update_nid(pool, new_nid);
}
-static inline void page_pool_ring_lock(struct page_pool *pool)
- __acquires(&pool->ring.producer_lock)
-{
- if (in_softirq())
- spin_lock(&pool->ring.producer_lock);
- else
- spin_lock_bh(&pool->ring.producer_lock);
-}
-
-static inline void page_pool_ring_unlock(struct page_pool *pool)
- __releases(&pool->ring.producer_lock)
-{
- if (in_softirq())
- spin_unlock(&pool->ring.producer_lock);
- else
- spin_unlock_bh(&pool->ring.producer_lock);
-}
-
#endif /* _NET_PAGE_POOL_H */
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index e212e9d7edcb..a3e12a61d456 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -134,6 +134,29 @@ EXPORT_SYMBOL(page_pool_ethtool_stats_get);
#define recycle_stat_add(pool, __stat, val)
#endif
+static bool page_pool_producer_lock(struct page_pool *pool)
+ __acquires(&pool->ring.producer_lock)
+{
+ bool in_softirq = in_softirq();
+
+ if (in_softirq)
+ spin_lock(&pool->ring.producer_lock);
+ else
+ spin_lock_bh(&pool->ring.producer_lock);
+
+ return in_softirq;
+}
+
+static void page_pool_producer_unlock(struct page_pool *pool,
+ bool in_softirq)
+ __releases(&pool->ring.producer_lock)
+{
+ if (in_softirq)
+ spin_unlock(&pool->ring.producer_lock);
+ else
+ spin_unlock_bh(&pool->ring.producer_lock);
+}
+
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params)
{
@@ -617,6 +640,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count)
{
int i, bulk_len = 0;
+ bool in_softirq;
for (i = 0; i < count; i++) {
struct page *page = virt_to_head_page(data[i]);
@@ -635,7 +659,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
return;
/* Bulk producer into ptr_ring page_pool cache */
- page_pool_ring_lock(pool);
+ in_softirq = page_pool_producer_lock(pool);
for (i = 0; i < bulk_len; i++) {
if (__ptr_ring_produce(&pool->ring, data[i])) {
/* ring full */
@@ -644,7 +668,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
}
}
recycle_stat_add(pool, ring, i);
- page_pool_ring_unlock(pool);
+ page_pool_producer_unlock(pool, in_softirq);
/* Hopefully all pages was return into ptr_ring */
if (likely(i == bulk_len))
gcc 13 may assign another type to enumeration constants than gcc 12. Split
the large enum at the top of source file stex.c such that the type of the
constants used in time expressions is changed back to the same type chosen
by gcc 12. This patch suppresses compiler warnings like this one:
In file included from ./include/linux/bitops.h:7,
from ./include/linux/kernel.h:22,
from drivers/scsi/stex.c:13:
drivers/scsi/stex.c: In function ‘stex_common_handshake’:
./include/linux/typecheck.h:12:25: error: comparison of distinct pointer types lacks a cast [-Werror]
12 | (void)(&__dummy == &__dummy2); \
| ^~
./include/linux/jiffies.h:106:10: note: in expansion of macro ‘typecheck’
106 | typecheck(unsigned long, b) && \
| ^~~~~~~~~
drivers/scsi/stex.c:1035:29: note: in expansion of macro ‘time_after’
1035 | if (time_after(jiffies, before + MU_MAX_DELAY * HZ)) {
| ^~~~~~~~~~
See also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107405.
Cc: stable(a)vger.kernel.org
Acked-by: Randy Dunlap <rdunlap(a)infradead.org>
Tested-by: Randy Dunlap <rdunlap(a)infradead.org> # build-tested
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
drivers/scsi/stex.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/scsi/stex.c b/drivers/scsi/stex.c
index 5b230e149c3d..8ffb75be99bc 100644
--- a/drivers/scsi/stex.c
+++ b/drivers/scsi/stex.c
@@ -109,7 +109,9 @@ enum {
TASK_ATTRIBUTE_HEADOFQUEUE = 0x1,
TASK_ATTRIBUTE_ORDERED = 0x2,
TASK_ATTRIBUTE_ACA = 0x4,
+};
+enum {
SS_STS_NORMAL = 0x80000000,
SS_STS_DONE = 0x40000000,
SS_STS_HANDSHAKE = 0x20000000,
@@ -121,7 +123,9 @@ enum {
SS_I2H_REQUEST_RESET = 0x2000,
SS_MU_OPERATIONAL = 0x80000000,
+};
+enum {
STEX_CDB_LENGTH = 16,
STATUS_VAR_LEN = 128,