From: Kees Cook <keescook(a)chromium.org>
commit 398d5843c03261a2b68730f2f00643826bcec6ba upstream.
The kernel is globally removing the ambiguous 0-length and 1-element
arrays in favor of flexible arrays, so that we can gain both compile-time
and run-time array bounds checking[1].
While struct fealist is defined as a "fake" flexible array (via a
1-element array), it is only used for examination of the first array
element. Walking the list is performed separately, so there is no reason
to treat the "list" member of struct fealist as anything other than a
single entry. Adjust the struct and code to match.
Additionally, struct fea uses the "name" member either as a dynamic
string, or is manually calculated from the start of the struct. Redefine
the member as a flexible array.
No machine code output differences are produced after these changes.
[1] For lots of details, see both:
https://docs.kernel.org/process/deprecated.html#zero-length-and-one-element…https://people.kernel.org/kees/bounded-flexible-arrays-in-c
Cc: Steve French <sfrench(a)samba.org>
Cc: Paulo Alcantara <pc(a)cjr.nz>
Cc: Ronnie Sahlberg <lsahlber(a)redhat.com>
Cc: Shyam Prasad N <sprasad(a)microsoft.com>
Cc: Tom Talpey <tom(a)talpey.com>
Cc: linux-cifs(a)vger.kernel.org
Cc: samba-technical(a)lists.samba.org
Signed-off-by: Kees Cook <keescook(a)chromium.org>
Signed-off-by: Steve French <stfrench(a)microsoft.com>
[ vt: Tested to not break build on x86_64 over v6.1.78. Bug report at [1]. ]
Link: https://lore.kernel.org/all/qjyfz2xftsbch6aozgplxyjfyqnuhn7j44udrucls4pqa5e…
Cc: stable(a)vger.kernel.org # 6.1
Signed-off-by: Vitaly Chikunov <vt(a)altlinux.org>
---
fs/smb/client/cifspdu.h | 4 ++--
fs/smb/client/cifssmb.c | 16 ++++++++--------
2 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h
index 97bb1838555b..96ed0a4a2ce2 100644
--- a/fs/smb/client/cifspdu.h
+++ b/fs/smb/client/cifspdu.h
@@ -2593,7 +2593,7 @@ struct fea {
unsigned char EA_flags;
__u8 name_len;
__le16 value_len;
- char name[1];
+ char name[];
/* optionally followed by value */
} __attribute__((packed));
/* flags for _FEA.fEA */
@@ -2601,7 +2601,7 @@ struct fea {
struct fealist {
__le32 list_len;
- struct fea list[1];
+ struct fea list;
} __attribute__((packed));
/* used to hold an arbitrary blob of data */
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 67c5fc2b2db9..784fc5ba2c44 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -5697,7 +5697,7 @@ CIFSSMBQAllEAs(const unsigned int xid, struct cifs_tcon *tcon,
/* account for ea list len */
list_len -= 4;
- temp_fea = ea_response_data->list;
+ temp_fea = &ea_response_data->list;
temp_ptr = (char *)temp_fea;
while (list_len > 0) {
unsigned int name_len;
@@ -5812,7 +5812,7 @@ CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon,
else
name_len = strnlen(ea_name, 255);
- count = sizeof(*parm_data) + ea_value_len + name_len;
+ count = sizeof(*parm_data) + 1 + ea_value_len + name_len;
pSMB->MaxParameterCount = cpu_to_le16(2);
/* BB find max SMB PDU from sess */
pSMB->MaxDataCount = cpu_to_le16(1000);
@@ -5836,14 +5836,14 @@ CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon,
byte_count = 3 /* pad */ + params + count;
pSMB->DataCount = cpu_to_le16(count);
parm_data->list_len = cpu_to_le32(count);
- parm_data->list[0].EA_flags = 0;
+ parm_data->list.EA_flags = 0;
/* we checked above that name len is less than 255 */
- parm_data->list[0].name_len = (__u8)name_len;
+ parm_data->list.name_len = (__u8)name_len;
/* EA names are always ASCII */
if (ea_name)
- strncpy(parm_data->list[0].name, ea_name, name_len);
- parm_data->list[0].name[name_len] = 0;
- parm_data->list[0].value_len = cpu_to_le16(ea_value_len);
+ strncpy(parm_data->list.name, ea_name, name_len);
+ parm_data->list.name[name_len] = '\0';
+ parm_data->list.value_len = cpu_to_le16(ea_value_len);
/* caller ensures that ea_value_len is less than 64K but
we need to ensure that it fits within the smb */
@@ -5851,7 +5851,7 @@ CIFSSMBSetEA(const unsigned int xid, struct cifs_tcon *tcon,
negotiated SMB buffer size BB */
/* if (ea_value_len > buffer_size - 512 (enough for header)) */
if (ea_value_len)
- memcpy(parm_data->list[0].name+name_len+1,
+ memcpy(parm_data->list.name + name_len + 1,
ea_value, ea_value_len);
pSMB->TotalDataCount = pSMB->DataCount;
--
2.42.1
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021802-crunchy-presoak-d1f4@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
5571e41ec6e5 ("btrfs: don't drop extent_map for free space inode on write error")
4c0c8cfc8433 ("btrfs: move btrfs_drop_extent_cache() to extent_map.c")
cef7820d6abf ("btrfs: fix missed extent on fsync after dropping extent maps")
570eb97bace8 ("btrfs: unify the lock/unlock extent variants")
dbbf49928f2e ("btrfs: remove the wake argument from clear_extent_bits")
e3974c669472 ("btrfs: move core extent_io_tree functions to extent-io-tree.c")
38830018387e ("btrfs: move a few exported extent_io_tree helpers to extent-io-tree.c")
04eba8932392 ("btrfs: temporarily export and then move extent state helpers")
91af24e48474 ("btrfs: temporarily export and move core extent_io_tree tree functions")
6962541e964f ("btrfs: move btrfs_debug_check_extent_io_range into extent-io-tree.c")
ec39e39bbf97 ("btrfs: export wait_extent_bit")
a66318872c41 ("btrfs: move simple extent bit helpers out of extent_io.c")
ad795329574c ("btrfs: convert BUG_ON(EXTENT_BIT_LOCKED) checks to ASSERT's")
83cf709a89fb ("btrfs: move extent state init and alloc functions to their own file")
c45379a20fbc ("btrfs: temporarily export alloc_extent_state helpers")
a40246e8afc0 ("btrfs: separate out the eb and extent state leak helpers")
a62a3bd9546b ("btrfs: separate out the extent state and extent buffer init code")
87c11705cc94 ("btrfs: convert the io_failure_tree to a plain rb_tree")
a2061748052c ("btrfs: unexport internal failrec functions")
0d0a762c419a ("btrfs: rename clean_io_failure and remove extraneous args")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Wed, 31 Jan 2024 14:27:25 -0500
Subject: [PATCH] btrfs: don't drop extent_map for free space inode on write
error
While running the CI for an unrelated change I hit the following panic
with generic/648 on btrfs_holes_spacecache.
assertion failed: block_start != EXTENT_MAP_HOLE, in fs/btrfs/extent_io.c:1385
------------[ cut here ]------------
kernel BUG at fs/btrfs/extent_io.c:1385!
invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 2695096 Comm: fsstress Kdump: loaded Tainted: G W 6.8.0-rc2+ #1
RIP: 0010:__extent_writepage_io.constprop.0+0x4c1/0x5c0
Call Trace:
<TASK>
extent_write_cache_pages+0x2ac/0x8f0
extent_writepages+0x87/0x110
do_writepages+0xd5/0x1f0
filemap_fdatawrite_wbc+0x63/0x90
__filemap_fdatawrite_range+0x5c/0x80
btrfs_fdatawrite_range+0x1f/0x50
btrfs_write_out_cache+0x507/0x560
btrfs_write_dirty_block_groups+0x32a/0x420
commit_cowonly_roots+0x21b/0x290
btrfs_commit_transaction+0x813/0x1360
btrfs_sync_file+0x51a/0x640
__x64_sys_fdatasync+0x52/0x90
do_syscall_64+0x9c/0x190
entry_SYSCALL_64_after_hwframe+0x6e/0x76
This happens because we fail to write out the free space cache in one
instance, come back around and attempt to write it again. However on
the second pass through we go to call btrfs_get_extent() on the inode to
get the extent mapping. Because this is a new block group, and with the
free space inode we always search the commit root to avoid deadlocking
with the tree, we find nothing and return a EXTENT_MAP_HOLE for the
requested range.
This happens because the first time we try to write the space cache out
we hit an error, and on an error we drop the extent mapping. This is
normal for normal files, but the free space cache inode is special. We
always expect the extent map to be correct. Thus the second time
through we end up with a bogus extent map.
Since we're deprecating this feature, the most straightforward way to
fix this is to simply skip dropping the extent map range for this failed
range.
I shortened the test by using error injection to stress the area to make
it easier to reproduce. With this patch in place we no longer panic
with my error injection test.
CC: stable(a)vger.kernel.org # 4.14+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7bcc1c03437a..d232eca1bbee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3184,8 +3184,23 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
unwritten_start += logical_len;
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
- /* Drop extent maps for the part of the extent we didn't write. */
- btrfs_drop_extent_map_range(inode, unwritten_start, end, false);
+ /*
+ * Drop extent maps for the part of the extent we didn't write.
+ *
+ * We have an exception here for the free_space_inode, this is
+ * because when we do btrfs_get_extent() on the free space inode
+ * we will search the commit root. If this is a new block group
+ * we won't find anything, and we will trip over the assert in
+ * writepage where we do ASSERT(em->block_start !=
+ * EXTENT_MAP_HOLE).
+ *
+ * Theoretically we could also skip this for any NOCOW extent as
+ * we don't mess with the extent map tree in the NOCOW case, but
+ * for now simply skip this if we are the free space inode.
+ */
+ if (!btrfs_is_free_space_inode(inode))
+ btrfs_drop_extent_map_range(inode, unwritten_start,
+ end, false);
/*
* If the ordered extent had an IOERR or something else went
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021800-willed-chug-3616@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
5571e41ec6e5 ("btrfs: don't drop extent_map for free space inode on write error")
4c0c8cfc8433 ("btrfs: move btrfs_drop_extent_cache() to extent_map.c")
cef7820d6abf ("btrfs: fix missed extent on fsync after dropping extent maps")
570eb97bace8 ("btrfs: unify the lock/unlock extent variants")
dbbf49928f2e ("btrfs: remove the wake argument from clear_extent_bits")
e3974c669472 ("btrfs: move core extent_io_tree functions to extent-io-tree.c")
38830018387e ("btrfs: move a few exported extent_io_tree helpers to extent-io-tree.c")
04eba8932392 ("btrfs: temporarily export and then move extent state helpers")
91af24e48474 ("btrfs: temporarily export and move core extent_io_tree tree functions")
6962541e964f ("btrfs: move btrfs_debug_check_extent_io_range into extent-io-tree.c")
ec39e39bbf97 ("btrfs: export wait_extent_bit")
a66318872c41 ("btrfs: move simple extent bit helpers out of extent_io.c")
ad795329574c ("btrfs: convert BUG_ON(EXTENT_BIT_LOCKED) checks to ASSERT's")
83cf709a89fb ("btrfs: move extent state init and alloc functions to their own file")
c45379a20fbc ("btrfs: temporarily export alloc_extent_state helpers")
a40246e8afc0 ("btrfs: separate out the eb and extent state leak helpers")
a62a3bd9546b ("btrfs: separate out the extent state and extent buffer init code")
87c11705cc94 ("btrfs: convert the io_failure_tree to a plain rb_tree")
a2061748052c ("btrfs: unexport internal failrec functions")
0d0a762c419a ("btrfs: rename clean_io_failure and remove extraneous args")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Wed, 31 Jan 2024 14:27:25 -0500
Subject: [PATCH] btrfs: don't drop extent_map for free space inode on write
error
While running the CI for an unrelated change I hit the following panic
with generic/648 on btrfs_holes_spacecache.
assertion failed: block_start != EXTENT_MAP_HOLE, in fs/btrfs/extent_io.c:1385
------------[ cut here ]------------
kernel BUG at fs/btrfs/extent_io.c:1385!
invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 2695096 Comm: fsstress Kdump: loaded Tainted: G W 6.8.0-rc2+ #1
RIP: 0010:__extent_writepage_io.constprop.0+0x4c1/0x5c0
Call Trace:
<TASK>
extent_write_cache_pages+0x2ac/0x8f0
extent_writepages+0x87/0x110
do_writepages+0xd5/0x1f0
filemap_fdatawrite_wbc+0x63/0x90
__filemap_fdatawrite_range+0x5c/0x80
btrfs_fdatawrite_range+0x1f/0x50
btrfs_write_out_cache+0x507/0x560
btrfs_write_dirty_block_groups+0x32a/0x420
commit_cowonly_roots+0x21b/0x290
btrfs_commit_transaction+0x813/0x1360
btrfs_sync_file+0x51a/0x640
__x64_sys_fdatasync+0x52/0x90
do_syscall_64+0x9c/0x190
entry_SYSCALL_64_after_hwframe+0x6e/0x76
This happens because we fail to write out the free space cache in one
instance, come back around and attempt to write it again. However on
the second pass through we go to call btrfs_get_extent() on the inode to
get the extent mapping. Because this is a new block group, and with the
free space inode we always search the commit root to avoid deadlocking
with the tree, we find nothing and return a EXTENT_MAP_HOLE for the
requested range.
This happens because the first time we try to write the space cache out
we hit an error, and on an error we drop the extent mapping. This is
normal for normal files, but the free space cache inode is special. We
always expect the extent map to be correct. Thus the second time
through we end up with a bogus extent map.
Since we're deprecating this feature, the most straightforward way to
fix this is to simply skip dropping the extent map range for this failed
range.
I shortened the test by using error injection to stress the area to make
it easier to reproduce. With this patch in place we no longer panic
with my error injection test.
CC: stable(a)vger.kernel.org # 4.14+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7bcc1c03437a..d232eca1bbee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3184,8 +3184,23 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
unwritten_start += logical_len;
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
- /* Drop extent maps for the part of the extent we didn't write. */
- btrfs_drop_extent_map_range(inode, unwritten_start, end, false);
+ /*
+ * Drop extent maps for the part of the extent we didn't write.
+ *
+ * We have an exception here for the free_space_inode, this is
+ * because when we do btrfs_get_extent() on the free space inode
+ * we will search the commit root. If this is a new block group
+ * we won't find anything, and we will trip over the assert in
+ * writepage where we do ASSERT(em->block_start !=
+ * EXTENT_MAP_HOLE).
+ *
+ * Theoretically we could also skip this for any NOCOW extent as
+ * we don't mess with the extent map tree in the NOCOW case, but
+ * for now simply skip this if we are the free space inode.
+ */
+ if (!btrfs_is_free_space_inode(inode))
+ btrfs_drop_extent_map_range(inode, unwritten_start,
+ end, false);
/*
* If the ordered extent had an IOERR or something else went
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021858-sharpie-diffusive-52e1@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
5571e41ec6e5 ("btrfs: don't drop extent_map for free space inode on write error")
4c0c8cfc8433 ("btrfs: move btrfs_drop_extent_cache() to extent_map.c")
cef7820d6abf ("btrfs: fix missed extent on fsync after dropping extent maps")
570eb97bace8 ("btrfs: unify the lock/unlock extent variants")
dbbf49928f2e ("btrfs: remove the wake argument from clear_extent_bits")
e3974c669472 ("btrfs: move core extent_io_tree functions to extent-io-tree.c")
38830018387e ("btrfs: move a few exported extent_io_tree helpers to extent-io-tree.c")
04eba8932392 ("btrfs: temporarily export and then move extent state helpers")
91af24e48474 ("btrfs: temporarily export and move core extent_io_tree tree functions")
6962541e964f ("btrfs: move btrfs_debug_check_extent_io_range into extent-io-tree.c")
ec39e39bbf97 ("btrfs: export wait_extent_bit")
a66318872c41 ("btrfs: move simple extent bit helpers out of extent_io.c")
ad795329574c ("btrfs: convert BUG_ON(EXTENT_BIT_LOCKED) checks to ASSERT's")
83cf709a89fb ("btrfs: move extent state init and alloc functions to their own file")
c45379a20fbc ("btrfs: temporarily export alloc_extent_state helpers")
a40246e8afc0 ("btrfs: separate out the eb and extent state leak helpers")
a62a3bd9546b ("btrfs: separate out the extent state and extent buffer init code")
87c11705cc94 ("btrfs: convert the io_failure_tree to a plain rb_tree")
a2061748052c ("btrfs: unexport internal failrec functions")
0d0a762c419a ("btrfs: rename clean_io_failure and remove extraneous args")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Wed, 31 Jan 2024 14:27:25 -0500
Subject: [PATCH] btrfs: don't drop extent_map for free space inode on write
error
While running the CI for an unrelated change I hit the following panic
with generic/648 on btrfs_holes_spacecache.
assertion failed: block_start != EXTENT_MAP_HOLE, in fs/btrfs/extent_io.c:1385
------------[ cut here ]------------
kernel BUG at fs/btrfs/extent_io.c:1385!
invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 2695096 Comm: fsstress Kdump: loaded Tainted: G W 6.8.0-rc2+ #1
RIP: 0010:__extent_writepage_io.constprop.0+0x4c1/0x5c0
Call Trace:
<TASK>
extent_write_cache_pages+0x2ac/0x8f0
extent_writepages+0x87/0x110
do_writepages+0xd5/0x1f0
filemap_fdatawrite_wbc+0x63/0x90
__filemap_fdatawrite_range+0x5c/0x80
btrfs_fdatawrite_range+0x1f/0x50
btrfs_write_out_cache+0x507/0x560
btrfs_write_dirty_block_groups+0x32a/0x420
commit_cowonly_roots+0x21b/0x290
btrfs_commit_transaction+0x813/0x1360
btrfs_sync_file+0x51a/0x640
__x64_sys_fdatasync+0x52/0x90
do_syscall_64+0x9c/0x190
entry_SYSCALL_64_after_hwframe+0x6e/0x76
This happens because we fail to write out the free space cache in one
instance, come back around and attempt to write it again. However on
the second pass through we go to call btrfs_get_extent() on the inode to
get the extent mapping. Because this is a new block group, and with the
free space inode we always search the commit root to avoid deadlocking
with the tree, we find nothing and return a EXTENT_MAP_HOLE for the
requested range.
This happens because the first time we try to write the space cache out
we hit an error, and on an error we drop the extent mapping. This is
normal for normal files, but the free space cache inode is special. We
always expect the extent map to be correct. Thus the second time
through we end up with a bogus extent map.
Since we're deprecating this feature, the most straightforward way to
fix this is to simply skip dropping the extent map range for this failed
range.
I shortened the test by using error injection to stress the area to make
it easier to reproduce. With this patch in place we no longer panic
with my error injection test.
CC: stable(a)vger.kernel.org # 4.14+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7bcc1c03437a..d232eca1bbee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3184,8 +3184,23 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
unwritten_start += logical_len;
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
- /* Drop extent maps for the part of the extent we didn't write. */
- btrfs_drop_extent_map_range(inode, unwritten_start, end, false);
+ /*
+ * Drop extent maps for the part of the extent we didn't write.
+ *
+ * We have an exception here for the free_space_inode, this is
+ * because when we do btrfs_get_extent() on the free space inode
+ * we will search the commit root. If this is a new block group
+ * we won't find anything, and we will trip over the assert in
+ * writepage where we do ASSERT(em->block_start !=
+ * EXTENT_MAP_HOLE).
+ *
+ * Theoretically we could also skip this for any NOCOW extent as
+ * we don't mess with the extent map tree in the NOCOW case, but
+ * for now simply skip this if we are the free space inode.
+ */
+ if (!btrfs_is_free_space_inode(inode))
+ btrfs_drop_extent_map_range(inode, unwritten_start,
+ end, false);
/*
* If the ordered extent had an IOERR or something else went
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021856-vigorous-supper-1ca1@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
5571e41ec6e5 ("btrfs: don't drop extent_map for free space inode on write error")
4c0c8cfc8433 ("btrfs: move btrfs_drop_extent_cache() to extent_map.c")
cef7820d6abf ("btrfs: fix missed extent on fsync after dropping extent maps")
570eb97bace8 ("btrfs: unify the lock/unlock extent variants")
dbbf49928f2e ("btrfs: remove the wake argument from clear_extent_bits")
e3974c669472 ("btrfs: move core extent_io_tree functions to extent-io-tree.c")
38830018387e ("btrfs: move a few exported extent_io_tree helpers to extent-io-tree.c")
04eba8932392 ("btrfs: temporarily export and then move extent state helpers")
91af24e48474 ("btrfs: temporarily export and move core extent_io_tree tree functions")
6962541e964f ("btrfs: move btrfs_debug_check_extent_io_range into extent-io-tree.c")
ec39e39bbf97 ("btrfs: export wait_extent_bit")
a66318872c41 ("btrfs: move simple extent bit helpers out of extent_io.c")
ad795329574c ("btrfs: convert BUG_ON(EXTENT_BIT_LOCKED) checks to ASSERT's")
83cf709a89fb ("btrfs: move extent state init and alloc functions to their own file")
c45379a20fbc ("btrfs: temporarily export alloc_extent_state helpers")
a40246e8afc0 ("btrfs: separate out the eb and extent state leak helpers")
a62a3bd9546b ("btrfs: separate out the extent state and extent buffer init code")
87c11705cc94 ("btrfs: convert the io_failure_tree to a plain rb_tree")
a2061748052c ("btrfs: unexport internal failrec functions")
0d0a762c419a ("btrfs: rename clean_io_failure and remove extraneous args")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef(a)toxicpanda.com>
Date: Wed, 31 Jan 2024 14:27:25 -0500
Subject: [PATCH] btrfs: don't drop extent_map for free space inode on write
error
While running the CI for an unrelated change I hit the following panic
with generic/648 on btrfs_holes_spacecache.
assertion failed: block_start != EXTENT_MAP_HOLE, in fs/btrfs/extent_io.c:1385
------------[ cut here ]------------
kernel BUG at fs/btrfs/extent_io.c:1385!
invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 2695096 Comm: fsstress Kdump: loaded Tainted: G W 6.8.0-rc2+ #1
RIP: 0010:__extent_writepage_io.constprop.0+0x4c1/0x5c0
Call Trace:
<TASK>
extent_write_cache_pages+0x2ac/0x8f0
extent_writepages+0x87/0x110
do_writepages+0xd5/0x1f0
filemap_fdatawrite_wbc+0x63/0x90
__filemap_fdatawrite_range+0x5c/0x80
btrfs_fdatawrite_range+0x1f/0x50
btrfs_write_out_cache+0x507/0x560
btrfs_write_dirty_block_groups+0x32a/0x420
commit_cowonly_roots+0x21b/0x290
btrfs_commit_transaction+0x813/0x1360
btrfs_sync_file+0x51a/0x640
__x64_sys_fdatasync+0x52/0x90
do_syscall_64+0x9c/0x190
entry_SYSCALL_64_after_hwframe+0x6e/0x76
This happens because we fail to write out the free space cache in one
instance, come back around and attempt to write it again. However on
the second pass through we go to call btrfs_get_extent() on the inode to
get the extent mapping. Because this is a new block group, and with the
free space inode we always search the commit root to avoid deadlocking
with the tree, we find nothing and return a EXTENT_MAP_HOLE for the
requested range.
This happens because the first time we try to write the space cache out
we hit an error, and on an error we drop the extent mapping. This is
normal for normal files, but the free space cache inode is special. We
always expect the extent map to be correct. Thus the second time
through we end up with a bogus extent map.
Since we're deprecating this feature, the most straightforward way to
fix this is to simply skip dropping the extent map range for this failed
range.
I shortened the test by using error injection to stress the area to make
it easier to reproduce. With this patch in place we no longer panic
with my error injection test.
CC: stable(a)vger.kernel.org # 4.14+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7bcc1c03437a..d232eca1bbee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3184,8 +3184,23 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
unwritten_start += logical_len;
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
- /* Drop extent maps for the part of the extent we didn't write. */
- btrfs_drop_extent_map_range(inode, unwritten_start, end, false);
+ /*
+ * Drop extent maps for the part of the extent we didn't write.
+ *
+ * We have an exception here for the free_space_inode, this is
+ * because when we do btrfs_get_extent() on the free space inode
+ * we will search the commit root. If this is a new block group
+ * we won't find anything, and we will trip over the assert in
+ * writepage where we do ASSERT(em->block_start !=
+ * EXTENT_MAP_HOLE).
+ *
+ * Theoretically we could also skip this for any NOCOW extent as
+ * we don't mess with the extent map tree in the NOCOW case, but
+ * for now simply skip this if we are the free space inode.
+ */
+ if (!btrfs_is_free_space_inode(inode))
+ btrfs_drop_extent_map_range(inode, unwritten_start,
+ end, false);
/*
* If the ordered extent had an IOERR or something else went
Hi,
there's been a bug in btrfs space reservation since 6.7 that is now affecting
quite some users. I'd like to ask to add the fix right after it got merged to
Linus' tree so it can possibly be released in 6.7.5.
All apply cleanly on top of current 6.7.x tree. Thanks.
1693d5442c458ae8d5b0d58463b873cd879569ed
f4a9f219411f318ae60d6ff7f129082a75686c6c
12c5128f101bfa47a08e4c0e1a75cfa2d0872bcd
2f6397e448e689adf57e6788c90f913abd7e1af8
Short ids with subjects:
1693d5442c45 btrfs: add and use helper to check if block group is used
f4a9f219411f btrfs: do not delete unused block group if it may be used soon
12c5128f101b btrfs: add new unused block groups to the list of unused block groups
2f6397e448e6 btrfs: don't refill whole delayed refs block reserve when starting transaction
[adding the stable team]
On 05.02.24 18:07, Yang Shi wrote:
> On Sat, Feb 3, 2024 at 1:24 AM Thorsten Leemhuis
> <regressions(a)leemhuis.info> wrote:
>> On 18.01.24 14:35, Yang Shi wrote:
>>>
>>> The commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP
>>> boundaries") caused two issues [1] [2] reported on 32 bit system or compat
>>> userspace.
>>>
>>> It doesn't make too much sense to force huge page alignment on 32 bit
>>> system due to the constrained virtual address space.
>>>
>>> [1] https://lore.kernel.org/linux-mm/CAHbLzkqa1SCBA10yjWTtA2mKCsoK5+M1BthSDL8RO…
>>> [2] https://lore.kernel.org/linux-mm/CAHbLzkqa1SCBA10yjWTtA2mKCsoK5+M1BthSDL8RO…
>>
>> [FWIW, this is now 4ef9ad19e17676 ("mm: huge_memory: don't force huge
>> page alignment on 32 bit") in mainline]
>>
>> Quick question: it it okay to ask Greg to pick this up for linux-6.7.y
>> series?
>
> Yes, definitely. Thanks for following up.
In that case: Greg, could you please consider picking up 4ef9ad19e17676
("mm: huge_memory: don't force huge page alignment on 32 bit") for the
next linux-6.7 rc round? tia!
Ohh, and btw: you might also want to pick up c4608d1bf7c653 ("mm: mmap:
map MAP_STACK to VM_NOHUGEPAGE") if you haven't already done so: its
stable tag contains a typo, hence I guess your scripts might have missed
it (I only noticed that by chance).
Ciao, Thorsten
>> I'm wondering because Jiri's report ([1] in above quote) sounded like
>> this is something that will affect and annoy quite a few people with the
>> linux-6.7.y.
>>
>> Ciao, Thorsten
>>
>>> Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries")
>>> Reported-by: Jiri Slaby <jirislaby(a)kernel.org>
>>> Reported-by: Suren Baghdasaryan <surenb(a)google.com>
>>> Tested-by: Jiri Slaby <jirislaby(a)kernel.org>
>>> Tested-by: Suren Baghdasaryan <surenb(a)google.com>
>>> Cc: Rik van Riel <riel(a)surriel.com>
>>> Cc: Matthew Wilcox <willy(a)infradead.org>
>>> Cc: Christopher Lameter <cl(a)linux.com>
>>> Signed-off-by: Yang Shi <yang(a)os.amperecomputing.com>
>>> ---
>>> mm/huge_memory.c | 9 +++++++++
>>> 1 file changed, 9 insertions(+)
>>>
>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>> index 94ef5c02b459..e9fbaccbe0c0 100644
>>> --- a/mm/huge_memory.c
>>> +++ b/mm/huge_memory.c
>>> @@ -37,6 +37,7 @@
>>> #include <linux/page_owner.h>
>>> #include <linux/sched/sysctl.h>
>>> #include <linux/memory-tiers.h>
>>> +#include <linux/compat.h>
>>>
>>> #include <asm/tlb.h>
>>> #include <asm/pgalloc.h>
>>> @@ -811,6 +812,14 @@ static unsigned long __thp_get_unmapped_area(struct file *filp,
>>> loff_t off_align = round_up(off, size);
>>> unsigned long len_pad, ret;
>>>
>>> + /*
>>> + * It doesn't make too much sense to froce huge page alignment on
>>> + * 32 bit system or compat userspace due to the contrained virtual
>>> + * address space and address entropy.
>>> + */
>>> + if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall())
>>> + return 0;
>>> +
>>> if (off_end <= off_align || (off_end - off_align) < size)
>>> return 0;
>>>
>
>
Xtensa has two-argument MAKE_PC_FROM_RA macro to convert a0 to an actual
return address because when windowed ABI is used call{,x}{4,8,12}
opcodes stuff encoded window size into the top 2 bits of the register
that becomes a return address in the called function. Second argument of
that macro is supposed to be an address having these 2 topmost bits set
correctly, but the comment suggested that that could be the stack
address. However the stack doesn't have to be in the same 1GByte region
as the code, especially in noMMU XIP configurations.
Fix the comment and use either _text or regs->pc as the second argument
for the MAKE_PC_FROM_RA macro.
Cc: stable(a)vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc(a)gmail.com>
---
arch/xtensa/include/asm/processor.h | 8 ++++----
arch/xtensa/include/asm/ptrace.h | 2 +-
arch/xtensa/kernel/process.c | 5 +++--
arch/xtensa/kernel/stacktrace.c | 3 ++-
4 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index d008a153a2b9..7ed1a2085bd7 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -115,9 +115,9 @@
#define MAKE_RA_FOR_CALL(ra,ws) (((ra) & 0x3fffffff) | (ws) << 30)
/* Convert return address to a valid pc
- * Note: We assume that the stack pointer is in the same 1GB ranges as the ra
+ * Note: 'text' is the address within the same 1GB range as the ra
*/
-#define MAKE_PC_FROM_RA(ra,sp) (((ra) & 0x3fffffff) | ((sp) & 0xc0000000))
+#define MAKE_PC_FROM_RA(ra, text) (((ra) & 0x3fffffff) | ((unsigned long)(text) & 0xc0000000))
#elif defined(__XTENSA_CALL0_ABI__)
@@ -127,9 +127,9 @@
#define MAKE_RA_FOR_CALL(ra, ws) (ra)
/* Convert return address to a valid pc
- * Note: We assume that the stack pointer is in the same 1GB ranges as the ra
+ * Note: 'text' is not used as 'ra' is always the full address
*/
-#define MAKE_PC_FROM_RA(ra, sp) (ra)
+#define MAKE_PC_FROM_RA(ra, text) (ra)
#else
#error Unsupported Xtensa ABI
diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h
index a270467556dc..86c70117371b 100644
--- a/arch/xtensa/include/asm/ptrace.h
+++ b/arch/xtensa/include/asm/ptrace.h
@@ -87,7 +87,7 @@ struct pt_regs {
# define user_mode(regs) (((regs)->ps & 0x00000020)!=0)
# define instruction_pointer(regs) ((regs)->pc)
# define return_pointer(regs) (MAKE_PC_FROM_RA((regs)->areg[0], \
- (regs)->areg[1]))
+ (regs)->pc))
# ifndef CONFIG_SMP
# define profile_pc(regs) instruction_pointer(regs)
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index a815577d25fd..7bd66677f7b6 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -47,6 +47,7 @@
#include <asm/asm-offsets.h>
#include <asm/regs.h>
#include <asm/hw_breakpoint.h>
+#include <asm/sections.h>
#include <asm/traps.h>
extern void ret_from_fork(void);
@@ -380,7 +381,7 @@ unsigned long __get_wchan(struct task_struct *p)
int count = 0;
sp = p->thread.sp;
- pc = MAKE_PC_FROM_RA(p->thread.ra, p->thread.sp);
+ pc = MAKE_PC_FROM_RA(p->thread.ra, _text);
do {
if (sp < stack_page + sizeof(struct task_struct) ||
@@ -392,7 +393,7 @@ unsigned long __get_wchan(struct task_struct *p)
/* Stack layout: sp-4: ra, sp-3: sp' */
- pc = MAKE_PC_FROM_RA(SPILL_SLOT(sp, 0), sp);
+ pc = MAKE_PC_FROM_RA(SPILL_SLOT(sp, 0), _text);
sp = SPILL_SLOT(sp, 1);
} while (count++ < 16);
return 0;
diff --git a/arch/xtensa/kernel/stacktrace.c b/arch/xtensa/kernel/stacktrace.c
index 831ffb648bda..ed324fdf2a2f 100644
--- a/arch/xtensa/kernel/stacktrace.c
+++ b/arch/xtensa/kernel/stacktrace.c
@@ -13,6 +13,7 @@
#include <linux/stacktrace.h>
#include <asm/ftrace.h>
+#include <asm/sections.h>
#include <asm/stacktrace.h>
#include <asm/traps.h>
#include <linux/uaccess.h>
@@ -189,7 +190,7 @@ void walk_stackframe(unsigned long *sp,
if (a1 <= (unsigned long)sp)
break;
- frame.pc = MAKE_PC_FROM_RA(a0, a1);
+ frame.pc = MAKE_PC_FROM_RA(a0, _text);
frame.sp = a1;
if (fn(&frame, data))
--
2.39.2
I didn't think to mark these for stable in the commits, but they
definitely should go into the stable queue, since it's a known
mis-compilation of the kvm nested guest code with gcc-11 otherwise.
The bug technically affects other gcc versions too, but apparently not
so that we'd actually notice.
It's two commits:
4356e9f841f7 ("work around gcc bugs with 'asm goto' with outputs")
68fb3ca0e408 ("update workarounds for gcc "asm goto" issue")
where the first one works around the problem, and the second one
("update") just ends up pinpointing exactly which gcc versions are
affected so that future gcc releases won't get the unnecessary
workaround.
Technically only the first one really needs to go into stable. The
second one is more of a judgement call - do you want to match
upstream, and do you care about the (very slight) code generation
improvement with updated gcc versions?
Linus
From: David Woodhouse <dwmw(a)amazon.co.uk>
Linux guests since commit b1c3497e604d ("x86/xen: Add support for
HVMOP_set_evtchn_upcall_vector") in v6.0 onwards will use the per-vCPU
upcall vector when it's advertised in the Xen CPUID leaves.
This upcall is injected through the guest's local APIC as an MSI, unlike
the older system vector which was merely injected by the hypervisor any
time the CPU was able to receive an interrupt and the upcall_pending
flags is set in its vcpu_info.
Effectively, that makes the per-CPU upcall edge triggered instead of
level triggered, which results in the upcall being lost if the MSI is
delivered when the local APIC is *disabled*.
Xen checks the vcpu_info->evtchn_upcall_pending flag when the local APIC
for a vCPU is software enabled (in fact, on any write to the SPIV
register which doesn't disable the APIC). Do the same in KVM since KVM
doesn't provide a way for userspace to intervene and trap accesses to
the SPIV register of a local APIC emulated by KVM.
Fixes: fde0451be8fb3 ("KVM: x86/xen: Support per-vCPU event channel upcall via local APIC")
Signed-off-by: David Woodhouse <dwmw(a)amazon.co.uk>
Reviewed-by: Paul Durrant <paul(a)xen.org>
Cc: stable(a)vger.kernel.org
---
arch/x86/kvm/lapic.c | 5 ++++-
arch/x86/kvm/xen.c | 2 +-
arch/x86/kvm/xen.h | 18 ++++++++++++++++++
3 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3242f3da2457..75bc7d3f0022 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -41,6 +41,7 @@
#include "ioapic.h"
#include "trace.h"
#include "x86.h"
+#include "xen.h"
#include "cpuid.h"
#include "hyperv.h"
#include "smm.h"
@@ -499,8 +500,10 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
}
/* Check if there are APF page ready requests pending */
- if (enabled)
+ if (enabled) {
kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
+ kvm_xen_sw_enable_lapic(apic->vcpu);
+ }
}
static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 4a24899bbcfa..847d9e75df6d 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -568,7 +568,7 @@ void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
kvm_xen_update_runstate_guest(v, state == RUNSTATE_runnable);
}
-static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
+void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
{
struct kvm_lapic_irq irq = { };
int r;
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index f8f1fe22d090..f5841d9000ae 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -18,6 +18,7 @@ extern struct static_key_false_deferred kvm_xen_enabled;
int __kvm_xen_has_interrupt(struct kvm_vcpu *vcpu);
void kvm_xen_inject_pending_events(struct kvm_vcpu *vcpu);
+void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *vcpu);
int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
@@ -36,6 +37,19 @@ int kvm_xen_setup_evtchn(struct kvm *kvm,
const struct kvm_irq_routing_entry *ue);
void kvm_xen_update_tsc_info(struct kvm_vcpu *vcpu);
+static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu)
+{
+ /*
+ * The local APIC is being enabled. If the per-vCPU upcall vector is
+ * set and the vCPU's evtchn_upcall_pending flag is set, inject the
+ * interrupt.
+ */
+ if (static_branch_unlikely(&kvm_xen_enabled.key) &&
+ vcpu->arch.xen.vcpu_info_cache.active &&
+ vcpu->arch.xen.upcall_vector && __kvm_xen_has_interrupt(vcpu))
+ kvm_xen_inject_vcpu_vector(vcpu);
+}
+
static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
{
return static_branch_unlikely(&kvm_xen_enabled.key) &&
@@ -101,6 +115,10 @@ static inline void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
{
}
+static inline void kvm_xen_sw_enable_lapic(struct kvm_vcpu *vcpu)
+{
+}
+
static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
{
return false;
--
2.43.0
During the PCI AER system's error recovery process, the kernel driver
may encounter a race condition with freeing the reset_data structure's
memory. If the device restart will take more than 10 seconds the function
scheduling that restart will exit due to a timeout, and the reset_data
structure will be freed. However, this data structure is used for
completion notification after the restart is completed, which leads
to a UAF bug.
This results in a KFENCE bug notice.
BUG: KFENCE: use-after-free read in adf_device_reset_worker+0x38/0xa0 [intel_qat]
Use-after-free read at 0x00000000bc56fddf (in kfence-#142):
adf_device_reset_worker+0x38/0xa0 [intel_qat]
process_one_work+0x173/0x340
To resolve this race condition, the memory associated to the container
of the work_struct is freed on the worker if the timeout expired,
otherwise on the function that schedules the worker.
The timeout detection can be done by checking if the caller is
still waiting for completion or not by using completion_done() function.
Fixes: d8cba25d2c68 ("crypto: qat - Intel(R) QAT driver framework")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Damian Muszynski <damian.muszynski(a)intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu(a)intel.com>
---
drivers/crypto/intel/qat/qat_common/adf_aer.c | 22 ++++++++++++++-----
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/drivers/crypto/intel/qat/qat_common/adf_aer.c b/drivers/crypto/intel/qat/qat_common/adf_aer.c
index 3597e7605a14..9da2278bd5b7 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_aer.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_aer.c
@@ -130,7 +130,8 @@ static void adf_device_reset_worker(struct work_struct *work)
if (adf_dev_restart(accel_dev)) {
/* The device hanged and we can't restart it so stop here */
dev_err(&GET_DEV(accel_dev), "Restart device failed\n");
- if (reset_data->mode == ADF_DEV_RESET_ASYNC)
+ if (reset_data->mode == ADF_DEV_RESET_ASYNC ||
+ completion_done(&reset_data->compl))
kfree(reset_data);
WARN(1, "QAT: device restart failed. Device is unusable\n");
return;
@@ -146,11 +147,19 @@ static void adf_device_reset_worker(struct work_struct *work)
adf_dev_restarted_notify(accel_dev);
clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
- /* The dev is back alive. Notify the caller if in sync mode */
- if (reset_data->mode == ADF_DEV_RESET_SYNC)
- complete(&reset_data->compl);
- else
+ /*
+ * The dev is back alive. Notify the caller if in sync mode
+ *
+ * If device restart will take a more time than expected,
+ * the schedule_reset() function can timeout and exit. This can be
+ * detected by calling the completion_done() function. In this case
+ * the reset_data structure needs to be freed here.
+ */
+ if (reset_data->mode == ADF_DEV_RESET_ASYNC ||
+ completion_done(&reset_data->compl))
kfree(reset_data);
+ else
+ complete(&reset_data->compl);
}
static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
@@ -183,8 +192,9 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
dev_err(&GET_DEV(accel_dev),
"Reset device timeout expired\n");
ret = -EFAULT;
+ } else {
+ kfree(reset_data);
}
- kfree(reset_data);
return ret;
}
return 0;
base-commit: 86f2ff2d4ec09a7eea931a56fbed2105037ba2ee
--
2.43.0
The implementation of the Rate Limiting (RL) feature includes the cleanup
of all SLAs during device shutdown. For each SLA, the firmware is notified
of the removal through an admin message, the data structures that take
into account the budgets are updated and the memory is freed.
However, this explicit cleanup is not necessary as (1) the device is
reset, and the firmware state is lost and (2) all RL data structures
are freed anyway.
In addition, if the device is unresponsive, for example after a PCI
AER error is detected, the admin interface might not be available.
This might slow down the shutdown sequence and cause a timeout in
the recovery flows which in turn makes the driver believe that the
device is not recoverable.
Fix by replacing the explicit SLAs removal with just a free of the
SLA data structures.
Fixes: d9fb8408376e ("crypto: qat - add rate limiting feature to qat_4xxx")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Damian Muszynski <damian.muszynski(a)intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu(a)intel.com>
---
drivers/crypto/intel/qat/qat_common/adf_rl.c | 20 +++++++++++++++++++-
1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.c b/drivers/crypto/intel/qat/qat_common/adf_rl.c
index de1b214dba1f..d4f2db3c53d8 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_rl.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_rl.c
@@ -788,6 +788,24 @@ static void clear_sla(struct adf_rl *rl_data, struct rl_sla *sla)
sla_type_arr[node_id] = NULL;
}
+static void free_all_sla(struct adf_accel_dev *accel_dev)
+{
+ struct adf_rl *rl_data = accel_dev->rate_limiting;
+ int sla_id;
+
+ mutex_lock(&rl_data->rl_lock);
+
+ for (sla_id = 0; sla_id < RL_NODES_CNT_MAX; sla_id++) {
+ if (!rl_data->sla[sla_id])
+ continue;
+
+ kfree(rl_data->sla[sla_id]);
+ rl_data->sla[sla_id] = NULL;
+ }
+
+ mutex_unlock(&rl_data->rl_lock);
+}
+
/**
* add_update_sla() - handles the creation and the update of an SLA
* @accel_dev: pointer to acceleration device structure
@@ -1155,7 +1173,7 @@ void adf_rl_stop(struct adf_accel_dev *accel_dev)
return;
adf_sysfs_rl_rm(accel_dev);
- adf_rl_remove_sla_all(accel_dev, true);
+ free_all_sla(accel_dev);
}
void adf_rl_exit(struct adf_accel_dev *accel_dev)
base-commit: 84c2f23ad68a847e36dc9cae44f21c5e321a321c
--
2.43.0
[BUG]
There is a bug report that with zoned device and sectorsize is smaller
than page size (aka, subpage), btrfs would crash with a very basic
workload:
# getconfig PAGESIZE
16384
# mkfs.btrfs -f $dev -s 4k
# mount $dev $mnt
# $fsstress -w -n 8 -s 1707820327 -v -d $mnt
# umount $mnt
The crash would look like this (with CONFIG_BTRFS_ASSERT enabled):
assertion failed: block_start != EXTENT_MAP_HOLE, in fs/btrfs/extent_io.c:1384
------------[ cut here ]------------
kernel BUG at fs/btrfs/extent_io.c:1384!
CPU: 0 PID: 872 Comm: kworker/u9:2 Tainted: G OE 6.8.0-rc3-custom+ #7
Hardware name: QEMU KVM Virtual Machine, BIOS edk2-20231122-12.fc39 11/22/2023
Workqueue: writeback wb_workfn (flush-btrfs-8)
pc : __extent_writepage_io+0x404/0x460 [btrfs]
lr : __extent_writepage_io+0x404/0x460 [btrfs]
Call trace:
__extent_writepage_io+0x404/0x460 [btrfs]
extent_write_locked_range+0x16c/0x460 [btrfs]
run_delalloc_cow+0x88/0x118 [btrfs]
btrfs_run_delalloc_range+0x128/0x228 [btrfs]
writepage_delalloc+0xb8/0x178 [btrfs]
__extent_writepage+0xc8/0x3a0 [btrfs]
extent_write_cache_pages+0x1cc/0x460 [btrfs]
extent_writepages+0x8c/0x120 [btrfs]
btrfs_writepages+0x18/0x30 [btrfs]
do_writepages+0x94/0x1f8
__writeback_single_inode+0x4c/0x388
writeback_sb_inodes+0x208/0x4b0
wb_writeback+0x118/0x3c0
wb_do_writeback+0xbc/0x388
wb_workfn+0x80/0x240
process_one_work+0x154/0x3c8
worker_thread+0x2bc/0x3e0
kthread+0xf4/0x108
ret_from_fork+0x10/0x20
Code: 9102c021 90000be0 91378000 9402bf53 (d4210000)
---[ end trace 0000000000000000 ]---
[CAUSE]
There are several factors causing the problem:
1. __extent_writepage_io() requires all dirty ranges to have delalloc
executed
This can be solved by adding @start and @len parameter to only submit
IO for a subset of the page, and update several involved helpers to
do subpage checks.
So this is not a big deal.
2. Subpage only accepts for full page aligned ranges for
extent_write_locked_range()
For zoned device, regular COW is switched to utilize
extent_write_locked_range() to submit the IO.
But the caller, run_delalloc_cow() can be called to run on a subpage
range, e.g.
0 4K 8K 12K 16K
|/////| |/////|
Where |///| is the dirtied range.
In that case, btrfs_run_delalloc_range() would call run_delalloc_cow(),
which would call extent_write_locked_range() for [0, 4K), and unlock
the whole [0, 16K) page.
But btrfs_run_delalloc_range() would again be called for range [8K,
12K), as there are still dirty range left.
In that case, since the whole page is already unlocked by previous
iteration, and would cause different ASSERT()s inside
extent_write_locked_range().
That's also why compression for subpage cases require fully page
aligned range.
[WORKAROUND]
A proper fix requires some big changes to delalloc workload, to allow
extent_write_locked_range() to handle multiple different entries with
the same @locked_page.
So for now, disable read-write support for subpage zoned btrfs.
The problem can only be solved if subpage btrfs can handle subpage
compression, which need quite some work on the delalloc procedure for
the @locked_page handling.
Reported-by: HAN Yuwei <hrx(a)bupt.moe>
Link: https://lore.kernel.org/all/1ACD2E3643008A17+da260584-2c7f-432a-9e22-9d390a…
CC: stable(a)vger.kernel.org # 5.10+
Signed-off-by: Qu Wenruo <wqu(a)suse.com>
---
fs/btrfs/disk-io.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c3ab268533ca..85cd23aebdd6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3193,7 +3193,8 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
* part of @locked_page.
* That's also why compression for subpage only work for page aligned ranges.
*/
- if (fs_info->sectorsize < PAGE_SIZE && btrfs_is_zoned(fs_info) && is_rw_mount) {
+ if (fs_info->sectorsize < PAGE_SIZE &&
+ btrfs_fs_incompat(fs_info, ZONED) && is_rw_mount) {
btrfs_warn(fs_info,
"no zoned read-write support for page size %lu with sectorsize %u",
PAGE_SIZE, fs_info->sectorsize);
--
2.43.0
Before commit 07c30ea5861f ("serial: Do not hold the port lock when setting
rx-during-tx GPIO") the SER_RS485_RX_DURING_TX flag was only set if the
rx-during-tx mode was not controlled by a GPIO. Now the flag is set
unconditionally when RS485 is enabled. This results in an incorrect setting
if the rx-during-tx GPIO is not asserted.
Fix this by setting the flag only if the rx-during-tx mode is not
controlled by a GPIO and thus restore the correct behaviour.
Cc: <stable(a)vger.kernel.org> # 6.6+
Fixes: 07c30ea5861f ("serial: Do not hold the port lock when setting rx-during-tx GPIO")
Signed-off-by: Lino Sanfilippo <l.sanfilippo(a)kunbus.com>
---
drivers/tty/serial/stm32-usart.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index 794b77512740..693e932d6feb 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -251,7 +251,9 @@ static int stm32_usart_config_rs485(struct uart_port *port, struct ktermios *ter
writel_relaxed(cr3, port->membase + ofs->cr3);
writel_relaxed(cr1, port->membase + ofs->cr1);
- rs485conf->flags |= SER_RS485_RX_DURING_TX;
+ if (!port->rs485_rx_during_tx_gpio)
+ rs485conf->flags |= SER_RS485_RX_DURING_TX;
+
} else {
stm32_usart_clr_bits(port, ofs->cr3,
USART_CR3_DEM | USART_CR3_DEP);
base-commit: 841c35169323cd833294798e58b9bf63fa4fa1de
--
2.43.0
From: Damien Le Moal <dlemoal(a)kernel.org>
For regular system shutdown, ata_dev_power_set_standby() will be
executed twice: once the scsi device is removed and another when
ata_pci_shutdown_one() executes and EH completes unloading the devices.
Make the second call to ata_dev_power_set_standby() do nothing by using
ata_dev_power_is_active() and return if the device is already in
standby.
Fixes: 2da4c5e24e86 ("ata: libata-core: Improve ata_dev_power_set_active()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Damien Le Moal <dlemoal(a)kernel.org>
Signed-off-by: Niklas Cassel <cassel(a)kernel.org>
---
This fix was originally part of patch that contained both a fix and
a revert in a single patch:
https://lore.kernel.org/linux-ide/20240111115123.1258422-3-dlemoal@kernel.o…
This patch contains the only the fix (as it is valid even without the
revert), without the revert.
Updated the Fixes tag to point to a more appropriate commit, since we
no longer revert any code.
drivers/ata/libata-core.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index d9f80f4f70f5..af2334bc806d 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -85,6 +85,7 @@ static unsigned int ata_dev_init_params(struct ata_device *dev,
static unsigned int ata_dev_set_xfermode(struct ata_device *dev);
static void ata_dev_xfermask(struct ata_device *dev);
static unsigned long ata_dev_blacklisted(const struct ata_device *dev);
+static bool ata_dev_power_is_active(struct ata_device *dev);
atomic_t ata_print_id = ATOMIC_INIT(0);
@@ -2017,8 +2018,9 @@ void ata_dev_power_set_standby(struct ata_device *dev)
struct ata_taskfile tf;
unsigned int err_mask;
- /* If the device is already sleeping, do nothing. */
- if (dev->flags & ATA_DFLAG_SLEEPING)
+ /* If the device is already sleeping or in standby, do nothing. */
+ if ((dev->flags & ATA_DFLAG_SLEEPING) ||
+ !ata_dev_power_is_active(dev))
return;
/*
--
2.43.1
After commit 4014ae236b1d ("platform/x86: x86-android-tablets: Stop using
gpiolib private APIs") the touchscreen in the keyboard half of
the Lenovo Yogabook1 X90 stopped working with the following error:
Goodix-TS i2c-goodix_ts: error -EBUSY: Failed to get irq GPIO
The problem is that when getting the IRQ for instantiated i2c_client-s
from a GPIO (rather then using an IRQ directly from the IOAPIC),
x86_acpi_irq_helper_get() now properly requests the GPIO, which disallows
other drivers from requesting it. Normally this is a good thing, but
the goodix touchscreen also uses the IRQ as an output during reset
to select which of its 2 possible I2C addresses should be used.
Add a new free_gpio flag to struct x86_acpi_irq_data to deal with this
and release the GPIO after getting the IRQ in this special case.
Fixes: 4014ae236b1d ("platform/x86: x86-android-tablets: Stop using gpiolib private APIs")
Cc: stable(a)vger.kernel.org
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/platform/x86/x86-android-tablets/core.c | 3 +++
drivers/platform/x86/x86-android-tablets/lenovo.c | 1 +
drivers/platform/x86/x86-android-tablets/x86-android-tablets.h | 1 +
3 files changed, 5 insertions(+)
diff --git a/drivers/platform/x86/x86-android-tablets/core.c b/drivers/platform/x86/x86-android-tablets/core.c
index f8221a15575b..f6547c9d7584 100644
--- a/drivers/platform/x86/x86-android-tablets/core.c
+++ b/drivers/platform/x86/x86-android-tablets/core.c
@@ -113,6 +113,9 @@ int x86_acpi_irq_helper_get(const struct x86_acpi_irq_data *data)
if (irq_type != IRQ_TYPE_NONE && irq_type != irq_get_trigger_type(irq))
irq_set_irq_type(irq, irq_type);
+ if (data->free_gpio)
+ devm_gpiod_put(&x86_android_tablet_device->dev, gpiod);
+
return irq;
case X86_ACPI_IRQ_TYPE_PMIC:
status = acpi_get_handle(NULL, data->chip, &handle);
diff --git a/drivers/platform/x86/x86-android-tablets/lenovo.c b/drivers/platform/x86/x86-android-tablets/lenovo.c
index f1c66a61bfc5..c297391955ad 100644
--- a/drivers/platform/x86/x86-android-tablets/lenovo.c
+++ b/drivers/platform/x86/x86-android-tablets/lenovo.c
@@ -116,6 +116,7 @@ static const struct x86_i2c_client_info lenovo_yb1_x90_i2c_clients[] __initconst
.trigger = ACPI_EDGE_SENSITIVE,
.polarity = ACPI_ACTIVE_LOW,
.con_id = "goodix_ts_irq",
+ .free_gpio = true,
},
}, {
/* Wacom Digitizer in keyboard half */
diff --git a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h
index 49fed9410adb..468993edfeee 100644
--- a/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h
+++ b/drivers/platform/x86/x86-android-tablets/x86-android-tablets.h
@@ -39,6 +39,7 @@ struct x86_acpi_irq_data {
int index;
int trigger; /* ACPI_EDGE_SENSITIVE / ACPI_LEVEL_SENSITIVE */
int polarity; /* ACPI_ACTIVE_HIGH / ACPI_ACTIVE_LOW / ACPI_ACTIVE_BOTH */
+ bool free_gpio; /* Release GPIO after getting IRQ (for TYPE_GPIOINT) */
const char *con_id;
};
--
2.43.0
DAMON_RECLAIM and DAMON_LRU_SORT is not preserving internal quota status
when applying new user parameters, and hence could cause temporal quota
accuracy degradation. Fix it by preserving the status.
SeongJae Park (2):
mm/damon/reclaim: fix quota stauts loss due to online tunings
mm/damon/lru_sort: fix quota status loss due to online tunings
mm/damon/lru_sort.c | 43 ++++++++++++++++++++++++++++++++++++-------
mm/damon/reclaim.c | 18 +++++++++++++++++-
2 files changed, 53 insertions(+), 8 deletions(-)
base-commit: 0f8cac70960349ba21deb424bd41bc4f4362c113
--
2.39.2
__setup() handlers should return 1 to obsolete_checksetup() in
init/main.c to indicate that the boot option has been handled.
A return of 0 causes the boot option/value to be listed as an Unknown
kernel parameter and added to init's (limited) argument or environment
strings. Also, error return codes don't mean anything to
obsolete_checksetup() -- only non-zero (usually 1) or zero.
So return 1 from vdso_setup().
Fixes: 9a08862a5d2e ("vDSO for sparc")
Signed-off-by: Randy Dunlap <rdunlap(a)infradead.org>
Reported-by: Igor Zhbanov <izh1979(a)gmail.com>
Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru
Cc: "David S. Miller" <davem(a)davemloft.net>
Cc: sparclinux(a)vger.kernel.org
Cc: Dan Carpenter <dan.carpenter(a)oracle.com>
Cc: Nick Alcock <nick.alcock(a)oracle.com>
Cc: Sam Ravnborg <sam(a)ravnborg.org>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: Andreas Larsson <andreas(a)gaisler.com>
---
v2: correct the Fixes: tag (Dan Carpenter)
v3: add more Cc's;
correct Igor's email address;
change From: Igor to Reported-by: Igor;
v4: add Arnd to Cc: list
v5: add Andreas to Cc: list
arch/sparc/vdso/vma.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff -- a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c
--- a/arch/sparc/vdso/vma.c
+++ b/arch/sparc/vdso/vma.c
@@ -449,9 +449,8 @@ static __init int vdso_setup(char *s)
unsigned long val;
err = kstrtoul(s, 10, &val);
- if (err)
- return err;
- vdso_enabled = val;
- return 0;
+ if (!err)
+ vdso_enabled = val;
+ return 1;
}
__setup("vdso=", vdso_setup);
__setup() handlers should return 1 to obsolete_checksetup() in
init/main.c to indicate that the boot option has been handled.
A return of 0 causes the boot option/value to be listed as an Unknown
kernel parameter and added to init's (limited) argument or environment
strings. Also, error return codes don't mean anything to
obsolete_checksetup() -- only non-zero (usually 1) or zero.
So return 1 from setup_nmi_watchdog().
Fixes: e5553a6d0442 ("sparc64: Implement NMI watchdog on capable cpus.")
Signed-off-by: Randy Dunlap <rdunlap(a)infradead.org>
Reported-by: Igor Zhbanov <izh1979(a)gmail.com>
Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru
Cc: "David S. Miller" <davem(a)davemloft.net>
Cc: sparclinux(a)vger.kernel.org
Cc: Sam Ravnborg <sam(a)ravnborg.org>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: Andreas Larsson <andreas(a)gaisler.com>
---
v2: change From: Igor to Reported-by:
add more Cc's
v3: use Igor's current email address
v4: add Arnd to Cc: list
v5: add Andreas to Cc: list
arch/sparc/kernel/nmi.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff -- a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -279,7 +279,7 @@ static int __init setup_nmi_watchdog(cha
if (!strncmp(str, "panic", 5))
panic_on_timeout = 1;
- return 0;
+ return 1;
}
__setup("nmi_watchdog=", setup_nmi_watchdog);
Commit c92a6b5d6335 ("scsi: core: Query VPD size before getting full
page") removed the logic which checks whether a VPD page is present on
the supported pages list before asking for the page itself. That was
done because SPC helpfully states "The Supported VPD Pages VPD page
list may or may not include all the VPD pages that are able to be
returned by the device server". Testing had revealed a few devices
that supported some of the 0xBn pages but didn't actually list them in
page 0.
Julian Sikorski bisected a problem with his drive resetting during
discovery to the commit above. As it turns out, this particular drive
firmware will crash if we attempt to fetch page 0xB9.
Various approaches were attempted to work around this. In the end,
reinstating the logic that consults VPD page 0 before fetching any
other page was the path of least resistance. A firmware update for the
devices which originally compelled us to remove the check has since
been released.
Cc: stable(a)vger.kernel.org
Cc: Bart Van Assche <bvanassche(a)acm.org>
Fixes: c92a6b5d6335 ("scsi: core: Query VPD size before getting full page")
Reported-by: Julian Sikorski <belegdol(a)gmail.com>
Tested-by: Julian Sikorski <belegdol(a)gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
---
v2: Address Bart's comments.
---
drivers/scsi/scsi.c | 22 ++++++++++++++++++++--
include/scsi/scsi_device.h | 4 ----
2 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 76d369343c7a..8cad9792a562 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -328,21 +328,39 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer,
return result + 4;
}
+enum scsi_vpd_parameters {
+ SCSI_VPD_HEADER_SIZE = 4,
+ SCSI_VPD_LIST_SIZE = 36,
+};
+
static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page)
{
- unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4);
+ unsigned char vpd[SCSI_VPD_LIST_SIZE] __aligned(4);
int result;
if (sdev->no_vpd_size)
return SCSI_DEFAULT_VPD_LEN;
+ /*
+ * Fetch the supported pages VPD and validate that the requested page
+ * number is present.
+ */
+ if (page != 0) {
+ result = scsi_vpd_inquiry(sdev, vpd, 0, sizeof(vpd));
+ if (result < SCSI_VPD_HEADER_SIZE)
+ return 0;
+
+ result -= SCSI_VPD_HEADER_SIZE;
+ if (!memchr(&vpd[SCSI_VPD_HEADER_SIZE], page, result))
+ return 0;
+ }
/*
* Fetch the VPD page header to find out how big the page
* is. This is done to prevent problems on legacy devices
* which can not handle allocation lengths as large as
* potentially requested by the caller.
*/
- result = scsi_vpd_inquiry(sdev, vpd_header, page, sizeof(vpd_header));
+ result = scsi_vpd_inquiry(sdev, vpd, page, SCSI_VPD_HEADER_SIZE);
if (result < 0)
return 0;
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index cb019c80763b..72a6b3923fc7 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -100,10 +100,6 @@ struct scsi_vpd {
unsigned char data[];
};
-enum scsi_vpd_parameters {
- SCSI_VPD_HEADER_SIZE = 4,
-};
-
struct scsi_device {
struct Scsi_Host *host;
struct request_queue *request_queue;
--
2.42.1
I've found a regression somewhere between 6.7.4 and 6.8.0-rc0 that causes my Framework 7840 AMD laptop to freeze after waking it from a suspend. I can reliably trigger the issue, but unfortunately cannot provide useful logs for now and hope to get some help with doing so.
How to reproduce
----------------
The last working kernel release was 6.7.4, the issue appeared in all 6.8 release candidates from 1-4.
1. normally boot the system with a 6.8 kernel
2. suspend and resume the system 2 to 4 times. Usually, the freeze already occurs at the 2nd resume.
2.1 graphical approach: I've reproduced this directly from the "Suspend" button in my SDDM display manager (X11)
2.2 TTY approach: You can also directly swith to a tty after boot, log in, and then issue `systemctl suspend` 2 to 4 times
3. After resume number 2 or later, the system freezes while resuming and cannot be used anymore. The screen is switched on and displays something, but no inputs are processed and the image is static.
3.1 graphical approach: When suspending and resuming by closing and opening the laptop lid, the screen is black with the cursor displayed. When doing so while keeping the lid open, the display manager's background image and a cursor are displayed, but only statically frozen. No keyboard or touchpad input is processed.
3.2 on a TTY: When suspending and resuming from a tty, a few kernel messages still manage to be printed, but after that no new information is displayed. E.g. when keeping a `journalctl -f` session open in another tmux pane, that journal output does not update anymore. Keyboard inputs are not processed anymore. Nonetheless, the cursor continues to blink regulary.
I've attached two screenshots showing the situation with kernels 6.7.4 (working) and 6.8 (broken).
Detailed Description
--------------------
In most cases, the freeze already occurs after the 2nd suspend-resume-cycle.
Unfortunately, this freeze also appears to block IO, as after a forced hard reboot I cannot retrieve relevant information from `journalctl -b "-1"`. The last retrievable log messages are from the successful suspend action.
I welcome any recommendation on how to retrieve valuable information. I have not yet played around with cmdline parameters relevant for debug.
System Details
--------------
Distro: NixOS 23.11, kernel 6.8 rc1-4 compiled manually
kernel: Linux version 6.8.0-rc4 (nixbld@localhost) (gcc (GCC) 12.3.0, GNU ld (GNU Binutils) 2.40) #1-NixOS SMP PREEMPT_DYNAMIC Sun Feb 11 20:18:13 UTC 2024
hardware: Framework 13 laptop, 7840 AMD series, CPU AMD Ryzen 7 7840U x86_64
In the attachments you find:
- the used kernel config
- my cmdline params
- 2 screenshots from the bug occuring when triggered from a tty
Next Steps
----------
I intend to bisect the issue, but this can take a while due to the need for manual testing and a kernel compile cycle requiring >30min for now.
I am mostly reporting this now already to still raise awareness in the RC phase and to have something where I can point other Framework laptop users towards for reproduction of the bug.
All the best
schmittlauch
Disclaimer:
I read the kernel docs on reporting issues, IMHO these were a bit unclear on whether RC kernels are in the scope of the stable and regressions lists.
If they aren't please point me to where to do this. So far I had only reported bugs via the bugzilla.
In current scenario if Plug-out and Plug-In performed continuously
there could be a chance while checking for dwc->gadget_driver in
dwc3_gadget_suspend, a NULL pointer dereference may occur.
Call Stack:
CPU1: CPU2:
gadget_unbind_driver dwc3_suspend_common
dwc3_gadget_stop dwc3_gadget_suspend
dwc3_disconnect_gadget
CPU1 basically clears the variable and CPU2 checks the variable.
Consider CPU1 is running and right before gadget_driver is cleared
and in parallel CPU2 executes dwc3_gadget_suspend where it finds
dwc->gadget_driver which is not NULL and resumes execution and then
CPU1 completes execution. CPU2 executes dwc3_disconnect_gadget where
it checks dwc->gadget_driver is already NULL because of which the
NULL pointer deference occur.
Cc: <stable(a)vger.kernel.org>
Fixes: 9772b47a4c29 ("usb: dwc3: gadget: Fix suspend/resume during device mode")
Acked-by: Thinh Nguyen <Thinh.Nguyen(a)synopsys.com>
Signed-off-by: Uttkarsh Aggarwal <quic_uaggarwa(a)quicinc.com>
---
changes in v3:
Corrected fixes tag and typo mistake in commit message dw3_gadget_stop -> dwc3_gadget_stop.
Link to v2:
https://lore.kernel.org/linux-usb/CAKzKK0r8RUqgXy1o5dndU21KuTKtyZ5rn5Fb9sZq…
Changes in v2:
Added cc and fixes tag missing in v1.
Link to v1:
https://lore.kernel.org/linux-usb/20240110095532.4776-1-quic_uaggarwa@quici…
drivers/usb/dwc3/gadget.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 019368f8e9c4..564976b3e2b9 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4709,15 +4709,13 @@ int dwc3_gadget_suspend(struct dwc3 *dwc)
unsigned long flags;
int ret;
- if (!dwc->gadget_driver)
- return 0;
-
ret = dwc3_gadget_soft_disconnect(dwc);
if (ret)
goto err;
spin_lock_irqsave(&dwc->lock, flags);
- dwc3_disconnect_gadget(dwc);
+ if (dwc->gadget_driver)
+ dwc3_disconnect_gadget(dwc);
spin_unlock_irqrestore(&dwc->lock, flags);
return 0;
--
2.17.1
This is the start of the stable review cycle for the 6.1.78 release.
There are 65 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Fri, 16 Feb 2024 14:28:54 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v6.x/stable-review/patch-6.1.78-rc2…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-6.1.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 6.1.78-rc2
Furong Xu <0x1207(a)gmail.com>
net: stmmac: xgmac: fix a typo of register name in DPP safety handling
Takashi Iwai <tiwai(a)suse.de>
ALSA: usb-audio: Sort quirk table entries
Simon Horman <horms(a)kernel.org>
net: stmmac: xgmac: use #define for string constants
Jiri Wiesner <jwiesner(a)suse.de>
clocksource: Skip watchdog check for large watchdog intervals
Jens Axboe <axboe(a)kernel.dk>
block: treat poll queue enter similarly to timeouts
Sheng Yong <shengyong(a)oppo.com>
f2fs: add helper to check compression level
Mike Marciniszyn <mike.marciniszyn(a)intel.com>
RDMA/irdma: Fix support for 64k pages
Prathu Baronia <prathubaronia2011(a)gmail.com>
vhost: use kzalloc() instead of kmalloc() followed by memset()
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "ASoC: amd: Add new dmi entries for acp5x platform"
Jens Axboe <axboe(a)kernel.dk>
io_uring/net: fix sr->len for IORING_OP_RECV with MSG_WAITALL and buffers
Hans de Goede <hdegoede(a)redhat.com>
Input: atkbd - skip ATKBD_CMD_SETLEDS when skipping ATKBD_CMD_GETID
Werner Sembach <wse(a)tuxedocomputers.com>
Input: i8042 - fix strange behavior of touchpad on Clevo NS70PU
Frederic Weisbecker <frederic(a)kernel.org>
hrtimer: Report offline hrtimer enqueue
Prashanth K <quic_prashk(a)quicinc.com>
usb: host: xhci-plat: Add support for XHCI_SG_TRB_CACHE_SIZE_QUIRK
Prashanth K <quic_prashk(a)quicinc.com>
usb: dwc3: host: Set XHCI_SG_TRB_CACHE_SIZE_QUIRK
Leonard Dallmayr <leonard.dallmayr(a)mailbox.org>
USB: serial: cp210x: add ID for IMST iM871A-USB
Puliang Lu <puliang.lu(a)fibocom.com>
USB: serial: option: add Fibocom FM101-GL variant
JackBB Wu <wojackbb(a)gmail.com>
USB: serial: qcserial: add new usb-id for Dell Wireless DW5826e
Sean Young <sean(a)mess.org>
ALSA: usb-audio: add quirk for RODE NT-USB+
Julian Sikorski <belegdol+github(a)gmail.com>
ALSA: usb-audio: Add a quirk for Yamaha YIT-W12TX transmitter
Alexander Tsoy <alexander(a)tsoy.me>
ALSA: usb-audio: Add delay quirk for MOTU M Series 2nd revision
Francesco Dolcini <francesco.dolcini(a)toradex.com>
mtd: parsers: ofpart: add workaround for #size-cells 0
Alexander Aring <aahringo(a)redhat.com>
fs: dlm: don't put dlm_local_addrs on heap
Tejun Heo <tj(a)kernel.org>
blk-iocost: Fix an UBSAN shift-out-of-bounds warning
Ming Lei <ming.lei(a)redhat.com>
scsi: core: Move scsi_host_busy() out of host lock if it is for per-command
Dan Carpenter <dan.carpenter(a)linaro.org>
fs/ntfs3: Fix an NULL dereference bug
Florian Westphal <fw(a)strlen.de>
netfilter: nft_set_pipapo: remove scratch_aligned pointer
Florian Westphal <fw(a)strlen.de>
netfilter: nft_set_pipapo: add helper to release pcpu scratch area
Florian Westphal <fw(a)strlen.de>
netfilter: nft_set_pipapo: store index in scratch maps
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nft_ct: reject direction for ct id
Srinivasan Shanmugam <srinivasan.shanmugam(a)amd.com>
drm/amd/display: Implement bounds check for stream encoder creation in DCN301
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nft_compat: restrict match/target protocol to u16
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nft_compat: reject unused compat flag
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nft_compat: narrow down revision to unsigned 8-bits
Jakub Kicinski <kuba(a)kernel.org>
selftests: cmsg_ipv6: repeat the exact packet
Eric Dumazet <edumazet(a)google.com>
ppp_async: limit MRU to 64K
Kuniyuki Iwashima <kuniyu(a)amazon.com>
af_unix: Call kfree_skb() for dead unix_(sk)->oob_skb in GC.
Shigeru Yoshida <syoshida(a)redhat.com>
tipc: Check the bearer type before calling tipc_udp_nl_bearer_add()
David Howells <dhowells(a)redhat.com>
rxrpc: Fix response to PING RESPONSE ACKs to a dead call
Dan Carpenter <dan.carpenter(a)linaro.org>
drm/i915/gvt: Fix uninitialized variable in handle_mmio()
Eric Dumazet <edumazet(a)google.com>
inet: read sk->sk_family once in inet_recv_error()
Zhang Rui <rui.zhang(a)intel.com>
hwmon: (coretemp) Fix bogus core_id to attr name mapping
Zhang Rui <rui.zhang(a)intel.com>
hwmon: (coretemp) Fix out-of-bounds memory access
Loic Prylli <lprylli(a)netflix.com>
hwmon: (aspeed-pwm-tacho) mutex for tach reading
Zhipeng Lu <alexious(a)zju.edu.cn>
octeontx2-pf: Fix a memleak otx2_sq_init
Zhipeng Lu <alexious(a)zju.edu.cn>
atm: idt77252: fix a memleak in open_card_ubr0
Antoine Tenart <atenart(a)kernel.org>
tunnels: fix out of bounds access when building IPv6 PMTU error
Paolo Abeni <pabeni(a)redhat.com>
selftests: net: avoid just another constant wait
Paolo Abeni <pabeni(a)redhat.com>
selftests: net: cut more slack for gro fwd tests.
Ivan Vecera <ivecera(a)redhat.com>
net: atlantic: Fix DMA mapping for PTP hwts ring
Eric Dumazet <edumazet(a)google.com>
netdevsim: avoid potential loop in nsim_dev_trap_report_work()
Johannes Berg <johannes.berg(a)intel.com>
wifi: mac80211: fix waiting for beacons logic
Furong Xu <0x1207(a)gmail.com>
net: stmmac: xgmac: fix handling of DPP safety error for DMA channels
Abhinav Kumar <quic_abhinavk(a)quicinc.com>
drm/msm/dpu: check for valid hw_pp in dpu_encoder_helper_phys_cleanup
Kuogee Hsieh <quic_khsieh(a)quicinc.com>
drm/msm/dp: return correct Colorimetry for DP_TEST_DYNAMIC_RANGE_CEA case
Kuogee Hsieh <quic_khsieh(a)quicinc.com>
drm/msms/dp: fixed link clock divider bits be over written in BPC unknown case
Shyam Prasad N <sprasad(a)microsoft.com>
cifs: failure to add channel on iface should bump up weight
Tony Lindgren <tony(a)atomide.com>
phy: ti: phy-omap-usb2: Fix NULL pointer dereference for SRP
Frank Li <Frank.Li(a)nxp.com>
dmaengine: fix is_slave_direction() return false when DMA_DEV_TO_DEV
Yoshihiro Shimoda <yoshihiro.shimoda.uh(a)renesas.com>
phy: renesas: rcar-gen3-usb2: Fix returning wrong error code
Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
dmaengine: fsl-qdma: Fix a memory leak related to the queue command DMA
Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
dmaengine: fsl-qdma: Fix a memory leak related to the status queue DMA
Jai Luthra <j-luthra(a)ti.com>
dmaengine: ti: k3-udma: Report short packet errors
Guanhua Gao <guanhua.gao(a)nxp.com>
dmaengine: fsl-dpaa2-qdma: Fix the size of dma pools
Baokun Li <libaokun1(a)huawei.com>
ext4: regenerate buddy after block freeing failed if under fc replay
-------------
Diffstat:
Makefile | 4 +-
block/blk-core.c | 11 ++-
block/blk-iocost.c | 7 ++
drivers/atm/idt77252.c | 2 +
drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c | 10 +-
drivers/dma/fsl-qdma.c | 27 ++----
drivers/dma/ti/k3-udma.c | 10 +-
.../drm/amd/display/dc/dcn301/dcn301_resource.c | 2 +-
drivers/gpu/drm/i915/gvt/handlers.c | 3 +-
drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 4 +-
drivers/gpu/drm/msm/dp/dp_ctrl.c | 5 -
drivers/gpu/drm/msm/dp/dp_link.c | 22 +++--
drivers/gpu/drm/msm/dp/dp_reg.h | 3 +
drivers/hwmon/aspeed-pwm-tacho.c | 7 ++
drivers/hwmon/coretemp.c | 40 ++++----
drivers/infiniband/hw/irdma/verbs.c | 2 +-
drivers/input/keyboard/atkbd.c | 13 ++-
drivers/input/serio/i8042-acpipnpio.h | 6 ++
drivers/mtd/parsers/ofpart_core.c | 19 ++++
drivers/net/ethernet/aquantia/atlantic/aq_ptp.c | 4 +-
drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 13 +++
drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 1 +
.../ethernet/marvell/octeontx2/nic/otx2_common.c | 14 ++-
drivers/net/ethernet/stmicro/stmmac/common.h | 1 +
drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 3 +
.../net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 58 ++++++++++-
drivers/net/netdevsim/dev.c | 8 +-
drivers/net/ppp/ppp_async.c | 4 +
drivers/phy/renesas/phy-rcar-gen3-usb2.c | 4 -
drivers/phy/ti/phy-omap-usb2.c | 4 +-
drivers/scsi/scsi_error.c | 3 +-
drivers/scsi/scsi_lib.c | 4 +-
drivers/usb/dwc3/host.c | 4 +-
drivers/usb/host/xhci-plat.c | 3 +
drivers/usb/serial/cp210x.c | 1 +
drivers/usb/serial/option.c | 1 +
drivers/usb/serial/qcserial.c | 2 +
drivers/vhost/vhost.c | 5 +-
fs/dlm/lowcomms.c | 38 +++-----
fs/ext4/mballoc.c | 20 ++++
fs/f2fs/compress.c | 27 ++++++
fs/f2fs/f2fs.h | 2 +
fs/f2fs/super.c | 4 +-
fs/ntfs3/ntfs_fs.h | 2 +-
fs/smb/client/sess.c | 2 +
include/linux/dmaengine.h | 3 +-
include/linux/hrtimer.h | 4 +-
include/uapi/linux/netfilter/nf_tables.h | 2 +
io_uring/net.c | 1 +
kernel/time/clocksource.c | 25 ++++-
kernel/time/hrtimer.c | 3 +
net/ipv4/af_inet.c | 6 +-
net/ipv4/ip_tunnel_core.c | 2 +-
net/mac80211/mlme.c | 3 +-
net/netfilter/nft_compat.c | 17 +++-
net/netfilter/nft_ct.c | 3 +
net/netfilter/nft_set_pipapo.c | 108 ++++++++++-----------
net/netfilter/nft_set_pipapo.h | 18 +++-
net/netfilter/nft_set_pipapo_avx2.c | 17 ++--
net/rxrpc/conn_event.c | 8 ++
net/tipc/bearer.c | 6 ++
net/unix/garbage.c | 11 +++
sound/soc/amd/acp-config.c | 15 +--
sound/usb/quirks.c | 38 +++++---
tools/testing/selftests/net/cmsg_ipv6.sh | 4 +-
tools/testing/selftests/net/pmtu.sh | 18 +++-
tools/testing/selftests/net/udpgro_fwd.sh | 14 ++-
tools/testing/selftests/net/udpgso_bench_rx.c | 2 +-
68 files changed, 517 insertions(+), 240 deletions(-)
I have a Framework 13 with a 7840U and started having massive GPU
driver issues a few weeks ago (including system freezes).
Unfortunately the information of when exactly this started to happen
is gone, but It should be somewhere in between 6.6.0 and 6.7.4.
I got many different and random dmesg-errors and system behaviors, but
I currently can only reproduce one, so let's focus on that for now.
First some basic info:
I'm on Arch Linux using the `linux` kernel package.(currently at 6.7.4).
I have an external monitor connected via a thinkpad thunderbolt 4 dock.
I am using amdgpu.sg_display=0 and VRAM sharing is configured to
UMA_GAME_OPTIMIZED in the firmware settings.
If I start playing a youtube video in firefox with hardware
acceleration enabled, it stutters until it stops playing after a few
seconds. I can see this in the kernel log. I see this multiple times
for many different addresses.
[ 5641.070540] amdgpu 0000:c1:00.0: amdgpu: [mmhub] page fault
(src_id:0 ring:40 vmid:1 pasid:32786, for process RDD Process pid 3680
thread firefox-bi:cs0 pid 3852)
[ 5641.070549] amdgpu 0000:c1:00.0: amdgpu: in page starting at
address 0x0000000000020000 from client 18
[ 5641.070553] amdgpu 0000:c1:00.0: amdgpu:
MMVM_L2_PROTECTION_FAULT_STATUS:0x00143A51
[ 5641.070556] amdgpu 0000:c1:00.0: amdgpu: Faulty UTCL2 client
ID: unknown (0x1d)
[ 5641.070559] amdgpu 0000:c1:00.0: amdgpu: MORE_FAULTS: 0x1
[ 5641.070561] amdgpu 0000:c1:00.0: amdgpu: WALKER_ERROR: 0x0
[ 5641.070563] amdgpu 0000:c1:00.0: amdgpu: PERMISSION_FAULTS: 0x5
[ 5641.070565] amdgpu 0000:c1:00.0: amdgpu: MAPPING_ERROR: 0x0
[ 5641.070567] amdgpu 0000:c1:00.0: amdgpu: RW: 0x1
Thanks
Michael
There is a corner case here where start/end is after/before the block
range we are currently checking. If so we need to be sure that splitting
the block will eventually give use the block size we need. To do that we
should adjust the block range to account for the start/end, and only
continue with the split if the size/alignment will fit the requested
size. Not doing so can result in leaving split blocks unmerged when it
eventually fails.
Fixes: afea229fe102 ("drm: improve drm_buddy_alloc function")
Signed-off-by: Matthew Auld <matthew.auld(a)intel.com>
Cc: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam(a)amd.com>
Cc: Christian König <christian.koenig(a)amd.com>
Cc: <stable(a)vger.kernel.org> # v5.18+
---
drivers/gpu/drm/drm_buddy.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index c1a99bf4dffd..d09540d4065b 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -332,6 +332,7 @@ alloc_range_bias(struct drm_buddy *mm,
u64 start, u64 end,
unsigned int order)
{
+ u64 req_size = mm->chunk_size << order;
struct drm_buddy_block *block;
struct drm_buddy_block *buddy;
LIST_HEAD(dfs);
@@ -367,6 +368,15 @@ alloc_range_bias(struct drm_buddy *mm,
if (drm_buddy_block_is_allocated(block))
continue;
+ if (block_start < start || block_end > end) {
+ u64 adjusted_start = max(block_start, start);
+ u64 adjusted_end = min(block_end, end);
+
+ if (round_down(adjusted_end + 1, req_size) <=
+ round_up(adjusted_start, req_size))
+ continue;
+ }
+
if (contains(start, end, block_start, block_end) &&
order == drm_buddy_block_order(block)) {
/*
--
2.43.0
Hi,
I hope you are the right person to discuss about *Healthcare Leads*, which
includes complete contact details, and tele-verified email addresses.
Please find the Leads Breakdown Chart below:
*Criteria*
*Counts*
*Criteria*
*Counts*
*Criteria*
*Counts*
Allergy immunology
5,064
Healthcare Technology
20,540
Plastic surgery
8,371
Anesthesiology
30,155
Nephrology
6,606
Preventive medicine
6,642
Cardiology
24,577
Neurological surgery
7,066
Psychiatry
4,315
Dermatology
8,467
Neurology
13,354
Radiology
32,763
Emergency medicine
22,300
Obgyn
35,163
Surgery
39,517
Endocrinology diabetes metabolism
3,756
Oncology
17,881
Urology
10,135
Family practice1
62,544
Ophthalmology
15,237
Physician
100,000
Gastroenterology
11,913
Orthopedics
22,145
Doctors
128,000
General practice
12,957
Other
15,559
Dentists
150,200
Geriatrics Doctors
9,634
Otolaryngology
9,539
Osteopathic
25,000
Infectious disease
5,677
Pathology
15,467
Acupuncture
5,000
Internal medicine1
120,029
Pediatrics
55,684
Chiropractors
11,000
Haematology Doctors
12,850
Physical medicine
8,437
Rheumatology
5,000
*Data Fields:* Every lead includes Name, Company, Job Title, Website,
Physical Address, Industry, *Phone Number and Verified/Opt-In Email
Address.* Please let me know if you have any queries about our custom
opt-in list and I would love to answer them.
Kindly share your thoughts.
Warm Regards,
*Deanna Holder*
*Marketing Executive *
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
We respect your privacy, if you want to remove it from this list. Please
reply with the subject line as “Leave Out”.
commit 67b8bcbaed4777871bb0dcc888fb02a614a98ab1 upstream.
The helper function nilfs_recovery_copy_block() of
nilfs_recovery_dsync_blocks(), which recovers data from logs created by
data sync writes during a mount after an unclean shutdown, incorrectly
calculates the on-page offset when copying repair data to the file's page
cache. In environments where the block size is smaller than the page
size, this flaw can cause data corruption and leak uninitialized memory
bytes during the recovery process.
Fix these issues by correcting this byte offset calculation on the page.
Link: https://lkml.kernel.org/r/20240124121936.10575-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
Please apply this patch to the stable trees.
This patch is identical to the upstream commit.
I have confirmed that the bug this patch fixes reproduces in all these
stable trees, so I believe it should be applied to them.
I have also confirmed that the build passes and the issue is fixed on
all target stable trees.
Thanks,
Ryusuke Konishi
fs/nilfs2/recovery.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 0955b657938f..a9b8d77c8c1d 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
struct nilfs_recovery_block *rb,
- struct page *page)
+ loff_t pos, struct page *page)
{
struct buffer_head *bh_org;
+ size_t from = pos & ~PAGE_MASK;
void *kaddr;
bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
@@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
return -EIO;
kaddr = kmap_atomic(page);
- memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
+ memcpy(kaddr + from, bh_org->b_data, bh_org->b_size);
kunmap_atomic(kaddr);
brelse(bh_org);
return 0;
@@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
goto failed_inode;
}
- err = nilfs_recovery_copy_block(nilfs, rb, page);
+ err = nilfs_recovery_copy_block(nilfs, rb, pos, page);
if (unlikely(err))
goto failed_page;
--
2.39.3
Each RPMh VRM accelerator resource has 3 or 4 contiguous 4-byte aligned
addresses associated with it. These control voltage, enable state, mode,
and in legacy targets, voltage headroom. The current in-flight request
checking logic looks for exact address matches. Requests for different
addresses of the same RPMh resource as thus not detected as in-flight.
Add new cmd-db API cmd_db_match_resource_addr() to enhance the in-flight
request check for VRM requests by ignoring the address offset.
This ensures that only one request is allowed to be in-flight for a given
VRM resource. This is needed to avoid scenarios where request commands are
carried out by RPMh hardware out-of-order leading to LDO regulator
over-current protection triggering.
Fixes: 658628e7ef78 ("drivers: qcom: rpmh-rsc: add RPMH controller for QCOM SoCs")
cc: stable(a)vger.kernel.org
Reviewed-by: Konrad Dybcio <konrad.dybcio(a)linaro.org>
Signed-off-by: Elliot Berman <quic_eberman(a)quicinc.com>
Signed-off-by: Maulik Shah <quic_mkshah(a)quicinc.com>
---
Changes in v3:
- Fix s-o-b chain
- Add cmd-db API to compare addresses
- Reuse already defined resource types in cmd-db
- Add Fixes tag and Cc to stable
- Retain Reviewed-by tag of v2
- Link to v2: https://lore.kernel.org/r/20240119-rpmh-rsc-fixes-v2-1-e42c0a9e36f0@quicinc…
Changes in v2:
- Use GENMASK() and FIELD_GET()
- Link to v1: https://lore.kernel.org/r/20240117-rpmh-rsc-fixes-v1-1-71ee4f8f72a4@quicinc…
---
drivers/soc/qcom/cmd-db.c | 41 +++++++++++++++++++++++++++++++++++------
drivers/soc/qcom/rpmh-rsc.c | 3 ++-
include/soc/qcom/cmd-db.h | 10 +++++++++-
3 files changed, 46 insertions(+), 8 deletions(-)
diff --git a/drivers/soc/qcom/cmd-db.c b/drivers/soc/qcom/cmd-db.c
index a5fd68411bed..e87682b9755e 100644
--- a/drivers/soc/qcom/cmd-db.c
+++ b/drivers/soc/qcom/cmd-db.c
@@ -1,6 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2016-2018, 2020, The Linux Foundation. All rights reserved. */
+/*
+ * Copyright (c) 2016-2018, 2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+#include <linux/bitfield.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -15,8 +19,8 @@
#define NUM_PRIORITY 2
#define MAX_SLV_ID 8
-#define SLAVE_ID_MASK 0x7
-#define SLAVE_ID_SHIFT 16
+#define SLAVE_ID(addr) FIELD_GET(GENMASK(19, 16), addr)
+#define VRM_ADDR(addr) FIELD_GET(GENMASK(19, 4), addr)
/**
* struct entry_header: header for each entry in cmddb
@@ -221,9 +225,34 @@ const void *cmd_db_read_aux_data(const char *id, size_t *len)
EXPORT_SYMBOL_GPL(cmd_db_read_aux_data);
/**
- * cmd_db_read_slave_id - Get the slave ID for a given resource address
+ * cmd_db_match_resource_addr - Compare if both Resource addresses are same
+ *
+ * @addr1: Resource address to compare
+ * @addr2: Resource address to compare
+ *
+ * Return: true on matching addresses, false otherwise
+ */
+bool cmd_db_match_resource_addr(u32 addr1, u32 addr2)
+{
+ /*
+ * Each RPMh VRM accelerator resource has 3 or 4 contiguous 4-byte
+ * aligned addresses associated with it. Ignore the offset to check
+ * for VRM requests.
+ */
+ if (SLAVE_ID(addr1) == CMD_DB_HW_VRM
+ && VRM_ADDR(addr1) == VRM_ADDR(addr2))
+ return true;
+ else if (addr1 == addr2)
+ return true;
+ else
+ return false;
+}
+EXPORT_SYMBOL_GPL(cmd_db_match_resource_addr);
+
+/**
+ * cmd_db_read_slave_id - Get the slave ID for a given resource name
*
- * @id: Resource id to query the DB for version
+ * @id: Resource id to query the DB for slave id
*
* Return: cmd_db_hw_type enum on success, CMD_DB_HW_INVALID on error
*/
@@ -238,7 +267,7 @@ enum cmd_db_hw_type cmd_db_read_slave_id(const char *id)
return CMD_DB_HW_INVALID;
addr = le32_to_cpu(ent->addr);
- return (addr >> SLAVE_ID_SHIFT) & SLAVE_ID_MASK;
+ return SLAVE_ID(addr);
}
EXPORT_SYMBOL_GPL(cmd_db_read_slave_id);
diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
index a021dc71807b..daf64be966fe 100644
--- a/drivers/soc/qcom/rpmh-rsc.c
+++ b/drivers/soc/qcom/rpmh-rsc.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2023-2024, Qualcomm Innovation Center, Inc. All rights reserved.
*/
#define pr_fmt(fmt) "%s " fmt, KBUILD_MODNAME
@@ -557,7 +558,7 @@ static int check_for_req_inflight(struct rsc_drv *drv, struct tcs_group *tcs,
for_each_set_bit(j, &curr_enabled, MAX_CMDS_PER_TCS) {
addr = read_tcs_cmd(drv, drv->regs[RSC_DRV_CMD_ADDR], i, j);
for (k = 0; k < msg->num_cmds; k++) {
- if (addr == msg->cmds[k].addr)
+ if (cmd_db_match_resource_addr(msg->cmds[k].addr, addr))
return -EBUSY;
}
}
diff --git a/include/soc/qcom/cmd-db.h b/include/soc/qcom/cmd-db.h
index c8bb56e6852a..47a6cab75e63 100644
--- a/include/soc/qcom/cmd-db.h
+++ b/include/soc/qcom/cmd-db.h
@@ -1,5 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2016-2018, The Linux Foundation. All rights reserved. */
+/*
+ * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved.
+ */
#ifndef __QCOM_COMMAND_DB_H__
#define __QCOM_COMMAND_DB_H__
@@ -21,6 +24,8 @@ u32 cmd_db_read_addr(const char *resource_id);
const void *cmd_db_read_aux_data(const char *resource_id, size_t *len);
+bool cmd_db_match_resource_addr(u32 addr1, u32 addr2);
+
enum cmd_db_hw_type cmd_db_read_slave_id(const char *resource_id);
int cmd_db_ready(void);
@@ -31,6 +36,9 @@ static inline u32 cmd_db_read_addr(const char *resource_id)
static inline const void *cmd_db_read_aux_data(const char *resource_id, size_t *len)
{ return ERR_PTR(-ENODEV); }
+static inline bool cmd_db_match_resource_addr(u32 addr1, u32 addr2)
+{ return false; }
+
static inline enum cmd_db_hw_type cmd_db_read_slave_id(const char *resource_id)
{ return -ENODEV; }
---
base-commit: 615d300648869c774bd1fe54b4627bb0c20faed4
change-id: 20240210-rpmh-rsc-fixes-372a79ab364b
Best regards,
--
Maulik Shah <quic_mkshah(a)quicinc.com>
Commit c92a6b5d6335 ("scsi: core: Query VPD size before getting full
page") removed the logic which checks whether a VPD page is present on
the supported pages list before asking for the page itself. That was
done because SPC helpfully states "The Supported VPD Pages VPD page
list may or may not include all the VPD pages that are able to be
returned by the device server". Testing had revealed a few devices
that supported some of the 0xBn pages but didn't actually list them in
page 0.
Julian Sikorski bisected a problem with his drive resetting during
discovery to the commit above. As it turns out, this particular drive
firmware will crash if we attempt to fetch page 0xB9.
Various approaches were attempted to work around this. In the end,
reinstating the logic that consults VPD page 0 before fetching any
other page was the path of least resistance. A firmware update for the
devices which originally compelled us to remove the check has since
been released.
Cc: <stable(a)vger.kernel.org>
Fixes: c92a6b5d6335 ("scsi: core: Query VPD size before getting full page")
Reported-by: Julian Sikorski <belegdol(a)gmail.com>
Tested-by: Julian Sikorski <belegdol(a)gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
---
drivers/scsi/scsi.c | 21 +++++++++++++++++++--
include/scsi/scsi_device.h | 1 +
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 76d369343c7a..5ef5fcf022ed 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -330,19 +330,36 @@ static int scsi_vpd_inquiry(struct scsi_device *sdev, unsigned char *buffer,
static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page)
{
- unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4);
+ unsigned char vpd[SCSI_VPD_LIST_SIZE] __aligned(4);
int result;
if (sdev->no_vpd_size)
return SCSI_DEFAULT_VPD_LEN;
+ /*
+ * Fetch the supported pages VPD and validate that the requested page
+ * number is present.
+ */
+ if (page != 0) {
+ result = scsi_vpd_inquiry(sdev, vpd, 0, sizeof(vpd));
+ if (result < SCSI_VPD_HEADER_SIZE)
+ return 0;
+
+ for (unsigned int i = SCSI_VPD_HEADER_SIZE ; i < result ; i++) {
+ if (vpd[i] == page)
+ goto found;
+ }
+
+ return 0;
+ }
+found:
/*
* Fetch the VPD page header to find out how big the page
* is. This is done to prevent problems on legacy devices
* which can not handle allocation lengths as large as
* potentially requested by the caller.
*/
- result = scsi_vpd_inquiry(sdev, vpd_header, page, sizeof(vpd_header));
+ result = scsi_vpd_inquiry(sdev, vpd, page, SCSI_VPD_HEADER_SIZE);
if (result < 0)
return 0;
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index cb019c80763b..6673885565e3 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -102,6 +102,7 @@ struct scsi_vpd {
enum scsi_vpd_parameters {
SCSI_VPD_HEADER_SIZE = 4,
+ SCSI_VPD_LIST_SIZE = 36,
};
struct scsi_device {
--
2.42.1
Commit 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF")
changed the ELF type of .btf.vmlinux.bin.o to ET_REL via dd, which works
fine for little endian platforms:
00000000 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 |.ELF............|
-00000010 03 00 b7 00 01 00 00 00 00 00 00 80 00 80 ff ff |................|
+00000010 01 00 b7 00 01 00 00 00 00 00 00 80 00 80 ff ff |................|
However, for big endian platforms, it changes the wrong byte, resulting
in an invalid ELF file type, which ld.lld rejects:
00000000 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 |.ELF............|
-00000010 00 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................|
+00000010 01 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................|
Type: <unknown>: 103
ld.lld: error: .btf.vmlinux.bin.o: unknown file type
Fix this by updating the entire 16-bit e_type field rather than just a
single byte, so that everything works correctly for all platforms and
linkers.
00000000 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 |.ELF............|
-00000010 00 03 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................|
+00000010 00 01 00 16 00 00 00 01 00 00 00 00 00 10 00 00 |................|
Type: REL (Relocatable file)
While in the area, update the comment to mention that binutils 2.35+
matches LLD's behavior of rejecting an ET_EXEC input, which occurred
after the comment was added.
Cc: stable(a)vger.kernel.org
Fixes: 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF")
Link: https://github.com/llvm/llvm-project/pull/75643
Suggested-by: Masahiro Yamada <masahiroy(a)kernel.org>
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
---
Changes in v2:
- Rather than change the seek value for dd, update the entire e_type
field (Masahiro). Due to this change, I did not carry forward the
tags of v1.
- Slightly update commit message to remove mention of ET_EXEC, which
does not match the dump (Masahiro).
- Update comment to mention binutils 2.35+ has the same behavior as LLD
(Fangrui).
- Link to v1: https://lore.kernel.org/r/20240208-fix-elf-type-btf-vmlinux-bin-o-big-endia…
---
scripts/link-vmlinux.sh | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index a432b171be82..7862a8101747 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -135,8 +135,13 @@ gen_btf()
${OBJCOPY} --only-section=.BTF --set-section-flags .BTF=alloc,readonly \
--strip-all ${1} ${2} 2>/dev/null
# Change e_type to ET_REL so that it can be used to link final vmlinux.
- # Unlike GNU ld, lld does not allow an ET_EXEC input.
- printf '\1' | dd of=${2} conv=notrunc bs=1 seek=16 status=none
+ # GNU ld 2.35+ and lld do not allow an ET_EXEC input.
+ if is_enabled CONFIG_CPU_BIG_ENDIAN; then
+ et_rel='\0\1'
+ else
+ et_rel='\1\0'
+ fi
+ printf "${et_rel}" | dd of=${2} conv=notrunc bs=1 seek=16 status=none
}
# Create ${2} .S file with all symbols from the ${1} object file
---
base-commit: 54be6c6c5ae8e0d93a6c4641cb7528eb0b6ba478
change-id: 20240208-fix-elf-type-btf-vmlinux-bin-o-big-endian-dbc55a1e1296
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
From: Johannes Berg <johannes.berg(a)intel.com>
It's currently possible to change the mesh ID when the
interface isn't yet in mesh mode, at the same time as
changing it into mesh mode. This leads to an overwrite
of data in the wdev->u union for the interface type it
currently has, causing cfg80211_change_iface() to do
wrong things when switching.
We could probably allow setting an interface to mesh
while setting the mesh ID at the same time by doing a
different order of operations here, but realistically
there's no userspace that's going to do this, so just
disallow changes in iftype when setting mesh ID.
Cc: stable(a)vger.kernel.org
Fixes: 29cbe68c516a ("cfg80211/mac80211: add mesh join/leave commands")
Reported-by: syzbot+dd4779978217b1973180(a)syzkaller.appspotmail.com
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
---
net/wireless/nl80211.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b09700400d09..bd54a928bab4 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4197,6 +4197,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
if (ntype != NL80211_IFTYPE_MESH_POINT)
return -EINVAL;
+ if (otype != NL80211_IFTYPE_MESH_POINT)
+ return -EINVAL;
if (netif_running(dev))
return -EBUSY;
--
2.43.0
From: Wayne Lin <wayne.lin(a)amd.com>
[Why]
Observe error message "Can't retrieve aconnector in hpd_rx_irq_offload_work"
when boot up with a mst tbt4 dock connected. After analyzing, there are few
parts needed to be adjusted:
1. hpd_rx_offload_wq[].aconnector is not initialzed before the dmub outbox
hpd_irq handler get registered which causes the error message.
2. registeration of hpd and hpd_rx_irq event for usb4 dp tunneling is not
aligned with legacy interface sequence
[How]
Put DMUB_NOTIFICATION_HPD and DMUB_NOTIFICATION_HPD_IRQ handler
registration into register_hpd_handlers() to align other interfaces and
get hpd_rx_offload_wq[].aconnector initialized earlier than that.
Leave DMUB_NOTIFICATION_AUX_REPLY registered as it was since we need that
while calling dc_link_detect(). USB4 connection status will be proactively
detected by dc_link_detect_connection_type() in amdgpu_dm_initialize_drm_device()
Cc: Stable <stable(a)vger.kernel.org>
Reviewed-by: Aurabindo Pillai <aurabindo.pillai(a)amd.com>
Acked-by: Rodrigo Siqueira <rodrigo.siqueira(a)amd.com>
Signed-off-by: Wayne Lin <wayne.lin(a)amd.com>
---
.../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 37 +++++++++----------
1 file changed, 18 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b9ac3d2f8029..ed0ad44dd1d8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1843,21 +1843,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
DRM_ERROR("amdgpu: fail to register dmub aux callback");
goto error;
}
- if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true)) {
- DRM_ERROR("amdgpu: fail to register dmub hpd callback");
- goto error;
- }
- if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true)) {
- DRM_ERROR("amdgpu: fail to register dmub hpd callback");
- goto error;
- }
- }
-
- /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
- * It is expected that DMUB will resend any pending notifications at this point, for
- * example HPD from DPIA.
- */
- if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+ /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
+ * It is expected that DMUB will resend any pending notifications at this point. Note
+ * that hpd and hpd_irq handler registration are deferred to register_hpd_handlers() to
+ * align legacy interface initialization sequence. Connection status will be proactivly
+ * detected once in the amdgpu_dm_initialize_drm_device.
+ */
dc_enable_dmub_outbox(adev->dm.dc);
/* DPIA trace goes to dmesg logs only if outbox is enabled */
@@ -3546,6 +3537,14 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
+ if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD, dmub_hpd_callback, true))
+ DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+
+ if (!register_dmub_notify_callback(adev, DMUB_NOTIFICATION_HPD_IRQ, dmub_hpd_callback, true))
+ DRM_ERROR("amdgpu: fail to register dmub hpd callback");
+ }
+
list_for_each_entry(connector,
&dev->mode_config.connector_list, head) {
@@ -3574,10 +3573,6 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
handle_hpd_rx_irq,
(void *) aconnector);
}
-
- if (adev->dm.hpd_rx_offload_wq)
- adev->dm.hpd_rx_offload_wq[connector->index].aconnector =
- aconnector;
}
}
@@ -4589,6 +4584,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev)
goto fail;
}
+ if (dm->hpd_rx_offload_wq)
+ dm->hpd_rx_offload_wq[aconnector->base.index].aconnector =
+ aconnector;
+
if (!dc_link_detect_connection_type(link, &new_connection_type))
DRM_ERROR("KMS: Failed to detect connector\n");
--
2.43.0
There's a syzbot report that device name buffers passed to device
replace are not properly checked for string termination which could lead
to a read out of bounds in getname_kernel().
Add a helper that validates both source and target device name buffers.
For devid as the source initialize the buffer to empty string in case
something tries to read it later.
This was originally analyzed and fixed in a different way by Edward Adam
Davis (see links).
Link: https://lore.kernel.org/linux-btrfs/000000000000d1a1d1060cc9c5e7@google.com/
Link: https://lore.kernel.org/linux-btrfs/tencent_44CA0665C9836EF9EEC80CB9E7E206D…
CC: stable(a)vger.kernel.org # 4.19+
CC: Edward Adam Davis <eadavis(a)qq.com>
Reported-and-tested-by: syzbot+33f23b49ac24f986c9e8(a)syzkaller.appspotmail.com
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/dev-replace.c | 24 ++++++++++++++++++++----
1 file changed, 20 insertions(+), 4 deletions(-)
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 1502d664c892..79c4293ddf37 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -725,6 +725,23 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
return ret;
}
+static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args)
+{
+ if (args->start.srcdevid == 0) {
+ if (memchr(args->start.srcdev_name, 0,
+ sizeof(args->start.srcdev_name)) == NULL)
+ return -ENAMETOOLONG;
+ } else {
+ args->start.srcdev_name[0] = 0;
+ }
+
+ if (memchr(args->start.tgtdev_name, 0,
+ sizeof(args->start.tgtdev_name)) == NULL)
+ return -ENAMETOOLONG;
+
+ return 0;
+}
+
int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args)
{
@@ -737,10 +754,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
default:
return -EINVAL;
}
-
- if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') ||
- args->start.tgtdev_name[0] == '\0')
- return -EINVAL;
+ ret = btrfs_check_replace_dev_names(args);
+ if (ret < 0)
+ return ret;
ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name,
args->start.srcdevid,
--
2.42.1
commit 38296afe3c6ee07319e01bb249aa4bb47c07b534 upstream.
Syzbot reported a hang issue in migrate_pages_batch() called by mbind()
and nilfs_lookup_dirty_data_buffers() called in the log writer of nilfs2.
While migrate_pages_batch() locks a folio and waits for the writeback to
complete, the log writer thread that should bring the writeback to
completion picks up the folio being written back in
nilfs_lookup_dirty_data_buffers() that it calls for subsequent log
creation and was trying to lock the folio. Thus causing a deadlock.
In the first place, it is unexpected that folios/pages in the middle of
writeback will be updated and become dirty. Nilfs2 adds a checksum to
verify the validity of the log being written and uses it for recovery at
mount, so data changes during writeback are suppressed. Since this is
broken, an unclean shutdown could potentially cause recovery to fail.
Investigation revealed that the root cause is that the wait for writeback
completion in nilfs_page_mkwrite() is conditional, and if the backing
device does not require stable writes, data may be modified without
waiting.
Fix these issues by making nilfs_page_mkwrite() wait for writeback to
finish regardless of the stable write requirement of the backing device.
Link: https://lkml.kernel.org/r/20240131145657.4209-1-konishi.ryusuke@gmail.com
Fixes: 1d1d1a767206 ("mm: only enforce stable page writes if the backing device requires it")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+ee2ae68da3b22d04cd8d(a)syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/00000000000047d819061004ad6c@google.com
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
Please apply this patch to the stable trees indicated by the subject line
prefix.
This patch is tailored to account for page/folio conversion and an fs-wide
change around page_mkwrite, and is applicable as-is to all versions from
v3.9 (where the issue was introduced) to v6.5.
Also, all the builds and retests I did on each stable tree passed.
Thanks,
Ryusuke Konishi
fs/nilfs2/file.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index a265d391ffe9..822e8d95d31e 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -105,7 +105,13 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
nilfs_transaction_commit(inode->i_sb);
mapped:
- wait_for_stable_page(page);
+ /*
+ * Since checksumming including data blocks is performed to determine
+ * the validity of the log to be written and used for recovery, it is
+ * necessary to wait for writeback to finish here, regardless of the
+ * stable write requirement of the backing device.
+ */
+ wait_on_page_writeback(page);
out:
sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(ret);
--
2.39.3
This is the start of the stable review cycle for the 6.1.78 release.
There are 64 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Thu, 15 Feb 2024 17:18:29 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v6.x/stable-review/patch-6.1.78-rc1…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-6.1.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 6.1.78-rc1
Jiri Wiesner <jwiesner(a)suse.de>
clocksource: Skip watchdog check for large watchdog intervals
Jens Axboe <axboe(a)kernel.dk>
block: treat poll queue enter similarly to timeouts
Sheng Yong <shengyong(a)oppo.com>
f2fs: add helper to check compression level
Mike Marciniszyn <mike.marciniszyn(a)intel.com>
RDMA/irdma: Fix support for 64k pages
Prathu Baronia <prathubaronia2011(a)gmail.com>
vhost: use kzalloc() instead of kmalloc() followed by memset()
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "ASoC: amd: Add new dmi entries for acp5x platform"
Jens Axboe <axboe(a)kernel.dk>
io_uring/net: fix sr->len for IORING_OP_RECV with MSG_WAITALL and buffers
Hans de Goede <hdegoede(a)redhat.com>
Input: atkbd - skip ATKBD_CMD_SETLEDS when skipping ATKBD_CMD_GETID
Werner Sembach <wse(a)tuxedocomputers.com>
Input: i8042 - fix strange behavior of touchpad on Clevo NS70PU
Frederic Weisbecker <frederic(a)kernel.org>
hrtimer: Report offline hrtimer enqueue
Michal Pecio <michal.pecio(a)gmail.com>
xhci: handle isoc Babble and Buffer Overrun events properly
Mathias Nyman <mathias.nyman(a)linux.intel.com>
xhci: process isoc TD properly when there was a transaction error mid TD.
Prashanth K <quic_prashk(a)quicinc.com>
usb: host: xhci-plat: Add support for XHCI_SG_TRB_CACHE_SIZE_QUIRK
Prashanth K <quic_prashk(a)quicinc.com>
usb: dwc3: host: Set XHCI_SG_TRB_CACHE_SIZE_QUIRK
Leonard Dallmayr <leonard.dallmayr(a)mailbox.org>
USB: serial: cp210x: add ID for IMST iM871A-USB
Puliang Lu <puliang.lu(a)fibocom.com>
USB: serial: option: add Fibocom FM101-GL variant
JackBB Wu <wojackbb(a)gmail.com>
USB: serial: qcserial: add new usb-id for Dell Wireless DW5826e
Sean Young <sean(a)mess.org>
ALSA: usb-audio: add quirk for RODE NT-USB+
Julian Sikorski <belegdol+github(a)gmail.com>
ALSA: usb-audio: Add a quirk for Yamaha YIT-W12TX transmitter
Alexander Tsoy <alexander(a)tsoy.me>
ALSA: usb-audio: Add delay quirk for MOTU M Series 2nd revision
Francesco Dolcini <francesco.dolcini(a)toradex.com>
mtd: parsers: ofpart: add workaround for #size-cells 0
Alexander Aring <aahringo(a)redhat.com>
fs: dlm: don't put dlm_local_addrs on heap
Tejun Heo <tj(a)kernel.org>
blk-iocost: Fix an UBSAN shift-out-of-bounds warning
Ming Lei <ming.lei(a)redhat.com>
scsi: core: Move scsi_host_busy() out of host lock if it is for per-command
Dan Carpenter <dan.carpenter(a)linaro.org>
fs/ntfs3: Fix an NULL dereference bug
Florian Westphal <fw(a)strlen.de>
netfilter: nft_set_pipapo: remove scratch_aligned pointer
Florian Westphal <fw(a)strlen.de>
netfilter: nft_set_pipapo: add helper to release pcpu scratch area
Florian Westphal <fw(a)strlen.de>
netfilter: nft_set_pipapo: store index in scratch maps
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nft_ct: reject direction for ct id
Srinivasan Shanmugam <srinivasan.shanmugam(a)amd.com>
drm/amd/display: Implement bounds check for stream encoder creation in DCN301
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nft_compat: restrict match/target protocol to u16
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nft_compat: reject unused compat flag
Pablo Neira Ayuso <pablo(a)netfilter.org>
netfilter: nft_compat: narrow down revision to unsigned 8-bits
Jakub Kicinski <kuba(a)kernel.org>
selftests: cmsg_ipv6: repeat the exact packet
Eric Dumazet <edumazet(a)google.com>
ppp_async: limit MRU to 64K
Kuniyuki Iwashima <kuniyu(a)amazon.com>
af_unix: Call kfree_skb() for dead unix_(sk)->oob_skb in GC.
Shigeru Yoshida <syoshida(a)redhat.com>
tipc: Check the bearer type before calling tipc_udp_nl_bearer_add()
David Howells <dhowells(a)redhat.com>
rxrpc: Fix response to PING RESPONSE ACKs to a dead call
Dan Carpenter <dan.carpenter(a)linaro.org>
drm/i915/gvt: Fix uninitialized variable in handle_mmio()
Eric Dumazet <edumazet(a)google.com>
inet: read sk->sk_family once in inet_recv_error()
Zhang Rui <rui.zhang(a)intel.com>
hwmon: (coretemp) Fix bogus core_id to attr name mapping
Zhang Rui <rui.zhang(a)intel.com>
hwmon: (coretemp) Fix out-of-bounds memory access
Loic Prylli <lprylli(a)netflix.com>
hwmon: (aspeed-pwm-tacho) mutex for tach reading
Zhipeng Lu <alexious(a)zju.edu.cn>
octeontx2-pf: Fix a memleak otx2_sq_init
Zhipeng Lu <alexious(a)zju.edu.cn>
atm: idt77252: fix a memleak in open_card_ubr0
Antoine Tenart <atenart(a)kernel.org>
tunnels: fix out of bounds access when building IPv6 PMTU error
Paolo Abeni <pabeni(a)redhat.com>
selftests: net: avoid just another constant wait
Paolo Abeni <pabeni(a)redhat.com>
selftests: net: cut more slack for gro fwd tests.
Ivan Vecera <ivecera(a)redhat.com>
net: atlantic: Fix DMA mapping for PTP hwts ring
Eric Dumazet <edumazet(a)google.com>
netdevsim: avoid potential loop in nsim_dev_trap_report_work()
Johannes Berg <johannes.berg(a)intel.com>
wifi: mac80211: fix waiting for beacons logic
Furong Xu <0x1207(a)gmail.com>
net: stmmac: xgmac: fix handling of DPP safety error for DMA channels
Abhinav Kumar <quic_abhinavk(a)quicinc.com>
drm/msm/dpu: check for valid hw_pp in dpu_encoder_helper_phys_cleanup
Kuogee Hsieh <quic_khsieh(a)quicinc.com>
drm/msm/dp: return correct Colorimetry for DP_TEST_DYNAMIC_RANGE_CEA case
Kuogee Hsieh <quic_khsieh(a)quicinc.com>
drm/msms/dp: fixed link clock divider bits be over written in BPC unknown case
Shyam Prasad N <sprasad(a)microsoft.com>
cifs: failure to add channel on iface should bump up weight
Tony Lindgren <tony(a)atomide.com>
phy: ti: phy-omap-usb2: Fix NULL pointer dereference for SRP
Frank Li <Frank.Li(a)nxp.com>
dmaengine: fix is_slave_direction() return false when DMA_DEV_TO_DEV
Yoshihiro Shimoda <yoshihiro.shimoda.uh(a)renesas.com>
phy: renesas: rcar-gen3-usb2: Fix returning wrong error code
Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
dmaengine: fsl-qdma: Fix a memory leak related to the queue command DMA
Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
dmaengine: fsl-qdma: Fix a memory leak related to the status queue DMA
Jai Luthra <j-luthra(a)ti.com>
dmaengine: ti: k3-udma: Report short packet errors
Guanhua Gao <guanhua.gao(a)nxp.com>
dmaengine: fsl-dpaa2-qdma: Fix the size of dma pools
Baokun Li <libaokun1(a)huawei.com>
ext4: regenerate buddy after block freeing failed if under fc replay
-------------
Diffstat:
Makefile | 4 +-
block/blk-core.c | 11 ++-
block/blk-iocost.c | 7 ++
drivers/atm/idt77252.c | 2 +
drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c | 10 +-
drivers/dma/fsl-qdma.c | 27 ++----
drivers/dma/ti/k3-udma.c | 10 +-
.../drm/amd/display/dc/dcn301/dcn301_resource.c | 2 +-
drivers/gpu/drm/i915/gvt/handlers.c | 3 +-
drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 4 +-
drivers/gpu/drm/msm/dp/dp_ctrl.c | 5 -
drivers/gpu/drm/msm/dp/dp_link.c | 22 +++--
drivers/gpu/drm/msm/dp/dp_reg.h | 3 +
drivers/hwmon/aspeed-pwm-tacho.c | 7 ++
drivers/hwmon/coretemp.c | 40 ++++----
drivers/infiniband/hw/irdma/verbs.c | 2 +-
drivers/input/keyboard/atkbd.c | 13 ++-
drivers/input/serio/i8042-acpipnpio.h | 6 ++
drivers/mtd/parsers/ofpart_core.c | 19 ++++
drivers/net/ethernet/aquantia/atlantic/aq_ptp.c | 4 +-
drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 13 +++
drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 1 +
.../ethernet/marvell/octeontx2/nic/otx2_common.c | 14 ++-
drivers/net/ethernet/stmicro/stmmac/common.h | 1 +
drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 3 +
.../net/ethernet/stmicro/stmmac/dwxgmac2_core.c | 57 ++++++++++-
drivers/net/netdevsim/dev.c | 8 +-
drivers/net/ppp/ppp_async.c | 4 +
drivers/phy/renesas/phy-rcar-gen3-usb2.c | 4 -
drivers/phy/ti/phy-omap-usb2.c | 4 +-
drivers/scsi/scsi_error.c | 3 +-
drivers/scsi/scsi_lib.c | 4 +-
drivers/usb/dwc3/host.c | 4 +-
drivers/usb/host/xhci-plat.c | 3 +
drivers/usb/host/xhci-ring.c | 80 ++++++++++++---
drivers/usb/host/xhci.h | 1 +
drivers/usb/serial/cp210x.c | 1 +
drivers/usb/serial/option.c | 1 +
drivers/usb/serial/qcserial.c | 2 +
drivers/vhost/vhost.c | 5 +-
fs/dlm/lowcomms.c | 38 +++-----
fs/ext4/mballoc.c | 20 ++++
fs/f2fs/compress.c | 27 ++++++
fs/f2fs/f2fs.h | 2 +
fs/f2fs/super.c | 4 +-
fs/ntfs3/ntfs_fs.h | 2 +-
fs/smb/client/sess.c | 2 +
include/linux/dmaengine.h | 3 +-
include/linux/hrtimer.h | 4 +-
include/uapi/linux/netfilter/nf_tables.h | 2 +
io_uring/net.c | 1 +
kernel/time/clocksource.c | 25 ++++-
kernel/time/hrtimer.c | 3 +
net/ipv4/af_inet.c | 6 +-
net/ipv4/ip_tunnel_core.c | 2 +-
net/mac80211/mlme.c | 3 +-
net/netfilter/nft_compat.c | 17 +++-
net/netfilter/nft_ct.c | 3 +
net/netfilter/nft_set_pipapo.c | 108 ++++++++++-----------
net/netfilter/nft_set_pipapo.h | 18 +++-
net/netfilter/nft_set_pipapo_avx2.c | 17 ++--
net/rxrpc/conn_event.c | 8 ++
net/tipc/bearer.c | 6 ++
net/unix/garbage.c | 11 +++
sound/soc/amd/acp-config.c | 15 +--
sound/usb/quirks.c | 6 ++
tools/testing/selftests/net/cmsg_ipv6.sh | 4 +-
tools/testing/selftests/net/pmtu.sh | 18 +++-
tools/testing/selftests/net/udpgro_fwd.sh | 14 ++-
tools/testing/selftests/net/udpgso_bench_rx.c | 2 +-
70 files changed, 566 insertions(+), 239 deletions(-)
This is a note to let you know that I've just added the patch titled
iio: accel: bma400: Fix a compilation problem
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 4cb81840d8f29b66d9d05c6d7f360c9560f7e2f4 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello(a)amd.com>
Date: Wed, 31 Jan 2024 16:52:46 -0600
Subject: iio: accel: bma400: Fix a compilation problem
The kernel fails when compiling without `CONFIG_REGMAP_I2C` but with
`CONFIG_BMA400`.
```
ld: drivers/iio/accel/bma400_i2c.o: in function `bma400_i2c_probe':
bma400_i2c.c:(.text+0x23): undefined reference to `__devm_regmap_init_i2c'
```
Link: https://download.01.org/0day-ci/archive/20240131/202401311634.FE5CBVwe-lkp@…
Fixes: 465c811f1f20 ("iio: accel: Add driver for the BMA400")
Fixes: 9bea10642396 ("iio: accel: bma400: add support for bma400 spi")
Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com>
Link: https://lore.kernel.org/r/20240131225246.14169-1-mario.limonciello@amd.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/accel/Kconfig | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig
index 91adcac875a4..c9d7afe489e8 100644
--- a/drivers/iio/accel/Kconfig
+++ b/drivers/iio/accel/Kconfig
@@ -219,10 +219,12 @@ config BMA400
config BMA400_I2C
tristate
+ select REGMAP_I2C
depends on BMA400
config BMA400_SPI
tristate
+ select REGMAP_SPI
depends on BMA400
config BMC150_ACCEL
--
2.43.1
This is a note to let you know that I've just added the patch titled
iio: commom: st_sensors: ensure proper DMA alignment
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 862cf85fef85becc55a173387527adb4f076fab0 Mon Sep 17 00:00:00 2001
From: Nuno Sa <nuno.sa(a)analog.com>
Date: Wed, 31 Jan 2024 10:16:47 +0100
Subject: iio: commom: st_sensors: ensure proper DMA alignment
Aligning the buffer to the L1 cache is not sufficient in some platforms
as they might have larger cacheline sizes for caches after L1 and thus,
we can't guarantee DMA safety.
That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same
for st_sensors common buffer.
While at it, moved the odr_lock before buffer_data as we definitely
don't want any other data to share a cacheline with the buffer.
[1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/
Fixes: e031d5f558f1 ("iio:st_sensors: remove buffer allocation at each buffer enable")
Signed-off-by: Nuno Sa <nuno.sa(a)analog.com>
Cc: <Stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20240131-dev_dma_safety_stm-v2-1-580c07fae51b@ana…
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
include/linux/iio/common/st_sensors.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 607c3a89a647..f9ae5cdd884f 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -258,9 +258,9 @@ struct st_sensor_data {
bool hw_irq_trigger;
s64 hw_timestamp;
- char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned;
-
struct mutex odr_lock;
+
+ char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] __aligned(IIO_DMA_MINALIGN);
};
#ifdef CONFIG_IIO_BUFFER
--
2.43.1
This is a note to let you know that I've just added the patch titled
iio: hid-sensor-als: Return 0 for HID_USAGE_SENSOR_TIME_TIMESTAMP
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 621c6257128149e45b36ffb973a01c3f3461b893 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Date: Sun, 4 Feb 2024 04:56:17 -0800
Subject: iio: hid-sensor-als: Return 0 for HID_USAGE_SENSOR_TIME_TIMESTAMP
When als_capture_sample() is called with usage ID
HID_USAGE_SENSOR_TIME_TIMESTAMP, return 0. The HID sensor core ignores
the return value for capture_sample() callback, so return value doesn't
make difference. But correct the return value to return success instead
of -EINVAL.
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Link: https://lore.kernel.org/r/20240204125617.2635574-1-srinivas.pandruvada@linu…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/light/hid-sensor-als.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c
index 5cd27f04b45e..b6c4bef2a7bb 100644
--- a/drivers/iio/light/hid-sensor-als.c
+++ b/drivers/iio/light/hid-sensor-als.c
@@ -226,6 +226,7 @@ static int als_capture_sample(struct hid_sensor_hub_device *hsdev,
case HID_USAGE_SENSOR_TIME_TIMESTAMP:
als_state->timestamp = hid_sensor_convert_timestamp(&als_state->common_attributes,
*(s64 *)raw_data);
+ ret = 0;
break;
default:
break;
--
2.43.1
This is a note to let you know that I've just added the patch titled
staging: iio: ad5933: fix type mismatch regression
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 6db053cd949fcd6254cea9f2cd5d39f7bd64379c Mon Sep 17 00:00:00 2001
From: David Schiller <david.schiller(a)jku.at>
Date: Mon, 22 Jan 2024 14:49:17 +0100
Subject: staging: iio: ad5933: fix type mismatch regression
Commit 4c3577db3e4f ("Staging: iio: impedance-analyzer: Fix sparse
warning") fixed a compiler warning, but introduced a bug that resulted
in one of the two 16 bit IIO channels always being zero (when both are
enabled).
This is because int is 32 bits wide on most architectures and in the
case of a little-endian machine the two most significant bytes would
occupy the buffer for the second channel as 'val' is being passed as a
void pointer to 'iio_push_to_buffers()'.
Fix by defining 'val' as u16. Tested working on ARM64.
Fixes: 4c3577db3e4f ("Staging: iio: impedance-analyzer: Fix sparse warning")
Signed-off-by: David Schiller <david.schiller(a)jku.at>
Link: https://lore.kernel.org/r/20240122134916.2137957-1-david.schiller@jku.at
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/staging/iio/impedance-analyzer/ad5933.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index e748a5d04e97..9149d41fe65b 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -608,7 +608,7 @@ static void ad5933_work(struct work_struct *work)
struct ad5933_state, work.work);
struct iio_dev *indio_dev = i2c_get_clientdata(st->client);
__be16 buf[2];
- int val[2];
+ u16 val[2];
unsigned char status;
int ret;
--
2.43.1
This is a note to let you know that I've just added the patch titled
iio: adc: ad_sigma_delta: ensure proper DMA alignment
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 59598510be1d49e1cff7fd7593293bb8e1b2398b Mon Sep 17 00:00:00 2001
From: Nuno Sa <nuno.sa(a)analog.com>
Date: Wed, 17 Jan 2024 13:41:03 +0100
Subject: iio: adc: ad_sigma_delta: ensure proper DMA alignment
Aligning the buffer to the L1 cache is not sufficient in some platforms
as they might have larger cacheline sizes for caches after L1 and thus,
we can't guarantee DMA safety.
That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same
for the sigma_delta ADCs.
[1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/
Fixes: 0fb6ee8d0b5e ("iio: ad_sigma_delta: Don't put SPI transfer buffer on the stack")
Signed-off-by: Nuno Sa <nuno.sa(a)analog.com>
Link: https://lore.kernel.org/r/20240117-dev_sigma_delta_no_irq_flags-v1-1-db3926…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
include/linux/iio/adc/ad_sigma_delta.h | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h
index 7852f6c9a714..719cf9cc6e1a 100644
--- a/include/linux/iio/adc/ad_sigma_delta.h
+++ b/include/linux/iio/adc/ad_sigma_delta.h
@@ -8,6 +8,8 @@
#ifndef __AD_SIGMA_DELTA_H__
#define __AD_SIGMA_DELTA_H__
+#include <linux/iio/iio.h>
+
enum ad_sigma_delta_mode {
AD_SD_MODE_CONTINUOUS = 0,
AD_SD_MODE_SINGLE = 1,
@@ -99,7 +101,7 @@ struct ad_sigma_delta {
* 'rx_buf' is up to 32 bits per sample + 64 bit timestamp,
* rounded to 16 bytes to take into account padding.
*/
- uint8_t tx_buf[4] ____cacheline_aligned;
+ uint8_t tx_buf[4] __aligned(IIO_DMA_MINALIGN);
uint8_t rx_buf[16] __aligned(8);
};
--
2.43.1
This is a note to let you know that I've just added the patch titled
iio: imu: adis: ensure proper DMA alignment
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 8e98b87f515d8c4bae521048a037b2cc431c3fd5 Mon Sep 17 00:00:00 2001
From: Nuno Sa <nuno.sa(a)analog.com>
Date: Wed, 17 Jan 2024 14:10:49 +0100
Subject: iio: imu: adis: ensure proper DMA alignment
Aligning the buffer to the L1 cache is not sufficient in some platforms
as they might have larger cacheline sizes for caches after L1 and thus,
we can't guarantee DMA safety.
That was the whole reason to introduce IIO_DMA_MINALIGN in [1]. Do the same
for the sigma_delta ADCs.
[1]: https://lore.kernel.org/linux-iio/20220508175712.647246-2-jic23@kernel.org/
Fixes: ccd2b52f4ac6 ("staging:iio: Add common ADIS library")
Signed-off-by: Nuno Sa <nuno.sa(a)analog.com>
Link: https://lore.kernel.org/r/20240117-adis-improv-v1-1-7f90e9fad200@analog.com
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
include/linux/iio/imu/adis.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index dc9ea299e088..8898966bc0f0 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -11,6 +11,7 @@
#include <linux/spi/spi.h>
#include <linux/interrupt.h>
+#include <linux/iio/iio.h>
#include <linux/iio/types.h>
#define ADIS_WRITE_REG(reg) ((0x80 | (reg)))
@@ -131,7 +132,7 @@ struct adis {
unsigned long irq_flag;
void *buffer;
- u8 tx[10] ____cacheline_aligned;
+ u8 tx[10] __aligned(IIO_DMA_MINALIGN);
u8 rx[4];
};
--
2.43.1
This is a note to let you know that I've just added the patch titled
iio: imu: bno055: serdev requires REGMAP
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 35ec2d03b282a939949090bd8c39eb37a5856721 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap(a)infradead.org>
Date: Wed, 10 Jan 2024 10:56:11 -0800
Subject: iio: imu: bno055: serdev requires REGMAP
There are a ton of build errors when REGMAP is not set, so select
REGMAP to fix all of them.
Examples (not all of them):
../drivers/iio/imu/bno055/bno055_ser_core.c:495:15: error: variable 'bno055_ser_regmap_bus' has initializer but incomplete type
495 | static struct regmap_bus bno055_ser_regmap_bus = {
../drivers/iio/imu/bno055/bno055_ser_core.c:496:10: error: 'struct regmap_bus' has no member named 'write'
496 | .write = bno055_ser_write_reg,
../drivers/iio/imu/bno055/bno055_ser_core.c:497:10: error: 'struct regmap_bus' has no member named 'read'
497 | .read = bno055_ser_read_reg,
../drivers/iio/imu/bno055/bno055_ser_core.c: In function 'bno055_ser_probe':
../drivers/iio/imu/bno055/bno055_ser_core.c:532:18: error: implicit declaration of function 'devm_regmap_init'; did you mean 'vmem_map_init'? [-Werror=implicit-function-declaration]
532 | regmap = devm_regmap_init(&serdev->dev, &bno055_ser_regmap_bus,
../drivers/iio/imu/bno055/bno055_ser_core.c:532:16: warning: assignment to 'struct regmap *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
532 | regmap = devm_regmap_init(&serdev->dev, &bno055_ser_regmap_bus,
../drivers/iio/imu/bno055/bno055_ser_core.c: At top level:
../drivers/iio/imu/bno055/bno055_ser_core.c:495:26: error: storage size of 'bno055_ser_regmap_bus' isn't known
495 | static struct regmap_bus bno055_ser_regmap_bus = {
Fixes: 2eef5a9cc643 ("iio: imu: add BNO055 serdev driver")
Signed-off-by: Randy Dunlap <rdunlap(a)infradead.org>
Cc: Andrea Merello <andrea.merello(a)iit.it>
Cc: Jonathan Cameron <jic23(a)kernel.org>
Cc: Lars-Peter Clausen <lars(a)metafoo.de>
Cc: linux-iio(a)vger.kernel.org
Cc: <Stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/20240110185611.19723-1-rdunlap@infradead.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/imu/bno055/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/iio/imu/bno055/Kconfig b/drivers/iio/imu/bno055/Kconfig
index 83e53acfbe88..c7f5866a177d 100644
--- a/drivers/iio/imu/bno055/Kconfig
+++ b/drivers/iio/imu/bno055/Kconfig
@@ -8,6 +8,7 @@ config BOSCH_BNO055
config BOSCH_BNO055_SERIAL
tristate "Bosch BNO055 attached via UART"
depends on SERIAL_DEV_BUS
+ select REGMAP
select BOSCH_BNO055
help
Enable this to support Bosch BNO055 IMUs attached via UART.
--
2.43.1
This is a note to let you know that I've just added the patch titled
iio: pressure: bmp280: Add missing bmp085 to SPI id table
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From b67f3e653e305abf1471934d7b9fdb9ad2df3eef Mon Sep 17 00:00:00 2001
From: Sam Protsenko <semen.protsenko(a)linaro.org>
Date: Wed, 20 Dec 2023 12:47:53 -0600
Subject: iio: pressure: bmp280: Add missing bmp085 to SPI id table
"bmp085" is missing in bmp280_spi_id[] table, which leads to the next
warning in dmesg:
SPI driver bmp280 has no spi_device_id for bosch,bmp085
Add "bmp085" to bmp280_spi_id[] by mimicking its existing description in
bmp280_of_spi_match[] table to fix the above warning.
Signed-off-by: Sam Protsenko <semen.protsenko(a)linaro.org>
Fixes: b26b4e91700f ("iio: pressure: bmp280: add SPI interface driver")
Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org>
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/pressure/bmp280-spi.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/iio/pressure/bmp280-spi.c b/drivers/iio/pressure/bmp280-spi.c
index 433d6fac83c4..e8a5fed07e88 100644
--- a/drivers/iio/pressure/bmp280-spi.c
+++ b/drivers/iio/pressure/bmp280-spi.c
@@ -87,6 +87,7 @@ static const struct of_device_id bmp280_of_spi_match[] = {
MODULE_DEVICE_TABLE(of, bmp280_of_spi_match);
static const struct spi_device_id bmp280_spi_id[] = {
+ { "bmp085", (kernel_ulong_t)&bmp180_chip_info },
{ "bmp180", (kernel_ulong_t)&bmp180_chip_info },
{ "bmp181", (kernel_ulong_t)&bmp180_chip_info },
{ "bmp280", (kernel_ulong_t)&bmp280_chip_info },
--
2.43.1
This is a note to let you know that I've just added the patch titled
iio: magnetometer: rm3100: add boundary check for the value read from
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 792595bab4925aa06532a14dd256db523eb4fa5e Mon Sep 17 00:00:00 2001
From: "zhili.liu" <zhili.liu(a)ucas.com.cn>
Date: Tue, 2 Jan 2024 09:07:11 +0800
Subject: iio: magnetometer: rm3100: add boundary check for the value read from
RM3100_REG_TMRC
Recently, we encounter kernel crash in function rm3100_common_probe
caused by out of bound access of array rm3100_samp_rates (because of
underlying hardware failures). Add boundary check to prevent out of
bound access.
Fixes: 121354b2eceb ("iio: magnetometer: Add driver support for PNI RM3100")
Suggested-by: Zhouyi Zhou <zhouzhouyi(a)gmail.com>
Signed-off-by: zhili.liu <zhili.liu(a)ucas.com.cn>
Link: https://lore.kernel.org/r/1704157631-3814-1-git-send-email-zhouzhouyi@gmail…
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/magnetometer/rm3100-core.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/iio/magnetometer/rm3100-core.c b/drivers/iio/magnetometer/rm3100-core.c
index 69938204456f..42b70cd42b39 100644
--- a/drivers/iio/magnetometer/rm3100-core.c
+++ b/drivers/iio/magnetometer/rm3100-core.c
@@ -530,6 +530,7 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq)
struct rm3100_data *data;
unsigned int tmp;
int ret;
+ int samp_rate_index;
indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
if (!indio_dev)
@@ -586,9 +587,14 @@ int rm3100_common_probe(struct device *dev, struct regmap *regmap, int irq)
ret = regmap_read(regmap, RM3100_REG_TMRC, &tmp);
if (ret < 0)
return ret;
+
+ samp_rate_index = tmp - RM3100_TMRC_OFFSET;
+ if (samp_rate_index < 0 || samp_rate_index >= RM3100_SAMP_NUM) {
+ dev_err(dev, "The value read from RM3100_REG_TMRC is invalid!\n");
+ return -EINVAL;
+ }
/* Initializing max wait time, which is double conversion time. */
- data->conversion_time = rm3100_samp_rates[tmp - RM3100_TMRC_OFFSET][2]
- * 2;
+ data->conversion_time = rm3100_samp_rates[samp_rate_index][2] * 2;
/* Cycle count values may not be what we want. */
if ((tmp - RM3100_TMRC_OFFSET) == 0)
--
2.43.1
This is a note to let you know that I've just added the patch titled
iio: core: fix memleak in iio_device_register_sysfs
to my char-misc git tree which can be found at
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
in the char-misc-linus branch.
The patch will show up in the next release of the linux-next tree
(usually sometime within the next 24 hours during the week.)
The patch will hopefully also be merged in Linus's tree for the
next -rc kernel release.
If you have any questions about this process, please let me know.
From 95a0d596bbd0552a78e13ced43f2be1038883c81 Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu(a)zju.edu.cn>
Date: Fri, 8 Dec 2023 15:31:19 +0800
Subject: iio: core: fix memleak in iio_device_register_sysfs
When iio_device_register_sysfs_group() fails, we should
free iio_dev_opaque->chan_attr_group.attrs to prevent
potential memleak.
Fixes: 32f171724e5c ("iio: core: rework iio device group creation")
Signed-off-by: Dinghao Liu <dinghao.liu(a)zju.edu.cn>
Link: https://lore.kernel.org/r/20231208073119.29283-1-dinghao.liu@zju.edu.cn
Cc: <Stable(a)vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
---
drivers/iio/industrialio-core.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 9a85752124dd..173dc00762a1 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1584,10 +1584,13 @@ static int iio_device_register_sysfs(struct iio_dev *indio_dev)
ret = iio_device_register_sysfs_group(indio_dev,
&iio_dev_opaque->chan_attr_group);
if (ret)
- goto error_clear_attrs;
+ goto error_free_chan_attrs;
return 0;
+error_free_chan_attrs:
+ kfree(iio_dev_opaque->chan_attr_group.attrs);
+ iio_dev_opaque->chan_attr_group.attrs = NULL;
error_clear_attrs:
iio_free_chan_devattr_list(&iio_dev_opaque->channel_attr_list);
--
2.43.1
From: Maxime Jayat <maxime.jayat(a)mobile-devices.fr>
The TDCO calculation was done using the currently applied data bittiming,
instead of the newly computed data bittiming, which means that the TDCO
had an invalid value unless setting the same data bittiming twice.
Fixes: d99755f71a80 ("can: netlink: add interface for CAN-FD Transmitter Delay Compensation (TDC)")
Signed-off-by: Maxime Jayat <maxime.jayat(a)mobile-devices.fr>
Reviewed-by: Vincent Mailhol <mailhol.vincent(a)wanadoo.fr>
Link: https://lore.kernel.org/all/40579c18-63c0-43a4-8d4c-f3a6c1c0b417@munic.io
Cc: stable(a)vger.kernel.org
Signed-off-by: Marc Kleine-Budde <mkl(a)pengutronix.de>
---
drivers/net/can/dev/netlink.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c
index 036d85ef07f5..dfdc039d92a6 100644
--- a/drivers/net/can/dev/netlink.c
+++ b/drivers/net/can/dev/netlink.c
@@ -346,7 +346,7 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
/* Neither of TDC parameters nor TDC flags are
* provided: do calculation
*/
- can_calc_tdco(&priv->tdc, priv->tdc_const, &priv->data_bittiming,
+ can_calc_tdco(&priv->tdc, priv->tdc_const, &dbt,
&priv->ctrlmode, priv->ctrlmode_supported);
} /* else: both CAN_CTRLMODE_TDC_{AUTO,MANUAL} are explicitly
* turned off. TDC is disabled: do nothing
--
2.43.0
Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.
The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.
Cc: <stable(a)vger.kernel.org> # 6.7+
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Tested-by: Mario Limonciello <mario.limonciello(a)amd.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20240207174456.341121-…
Acked-by: Christian König <christian.koenig(a)amd.com>
Reviewed-by: Matthew Auld <matthew.auld(a)intel.com>
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam(a)amd.com>
---
drivers/gpu/drm/drm_buddy.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
} while (1);
list_splice_tail(&allocated, blocks);
+
+ if (total_allocated < size) {
+ err = -ENOSPC;
+ goto err_free;
+ }
+
return 0;
err_undo:
base-commit: b6ddaa63f728d26c12048aed76be99c24f435c41
--
2.25.1
It has been observed that some USB/UAS devices return generic
properties hardcoded in firmware for mode pages and vital product data
for a period of time after a device has been discovered. The reported
properties are either garbage or they do not accurately reflect the
properties of the physical storage device attached in the case of a
bridge.
Prior to commit 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to
avoid calling revalidate twice") we would call revalidate several
times during device discovery. As a result, incorrect values would
eventually get replaced with ones accurately describing the attached
storage. When we did away with the redundant revalidate pass, several
cases were reported where devices reported nonsensical values or would
end up in write-protected state.
An initial attempt at addressing this issue involved introducing a
delayed second revalidate invocation. However, this approach still
left some devices reporting incorrect characteristics.
Tasos Sahanidis debugged the problem further and identified that
introducing a READ operation prior to MODE SENSE fixed the problem and
that it wasn't a timing issue. Issuing a READ appears to cause the
devices to update their SCSI pages to reflect the actual properties of
the storage media. Device properties like vendor, model, and storage
capacity appear to be correctly reported from the get-go. It is
unclear why these device defer populating the remaining
characteristics.
Match the behavior of a well known commercial operating system and
trigger a READ operation prior to querying device characteristics to
force the device to populate mode pages and VPDs.
The additional READ is triggered by a flag set in the USB storage and
UAS drivers. We avoid issuing the READ for other transport classes
since some storage devices identify Linux through our particular
discovery command sequence.
Cc: <stable(a)vger.kernel.org>
Fixes: 1e029397d12f ("scsi: sd: Reorganize DIF/DIX code to avoid calling revalidate twice")
Reported-by: Tasos Sahanidis <tasos(a)tasossah.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
---
drivers/scsi/sd.c | 27 ++++++++++++++++++++++++++-
drivers/usb/storage/scsiglue.c | 7 +++++++
drivers/usb/storage/uas.c | 7 +++++++
include/scsi/scsi_device.h | 1 +
4 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 530918cbfce2..c284628f702c 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3405,6 +3405,24 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp,
return true;
}
+static void sd_read_block_zero(struct scsi_disk *sdkp)
+{
+ unsigned int buf_len = sdkp->device->sector_size;
+ char *buffer, cmd[10] = { };
+
+ buffer = kmalloc(buf_len, GFP_KERNEL);
+ if (!buffer)
+ return;
+
+ cmd[0] = READ_10;
+ put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */
+ put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */
+
+ scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len,
+ SD_TIMEOUT, sdkp->max_retries, NULL);
+ kfree(buffer);
+}
+
/**
* sd_revalidate_disk - called the first time a new disk is seen,
* performs disk spin up, read_capacity, etc.
@@ -3444,7 +3462,14 @@ static int sd_revalidate_disk(struct gendisk *disk)
*/
if (sdkp->media_present) {
sd_read_capacity(sdkp, buffer);
-
+ /*
+ * Some USB/UAS devices return generic values for mode pages
+ * and VPDs until the media has been accessed. Trigger a READ
+ * operation to force the device to populate mode pages and
+ * VPDs.
+ */
+ if (sdp->read_before_ms)
+ sd_read_block_zero(sdkp);
/*
* set the default to rotational. All non-rotational devices
* support the block characteristics VPD page, which will
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index c54e9805da53..12cf9940e5b6 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -179,6 +179,13 @@ static int slave_configure(struct scsi_device *sdev)
*/
sdev->use_192_bytes_for_3f = 1;
+ /*
+ * Some devices report generic values until the media has been
+ * accessed. Force a READ(10) prior to querying device
+ * characteristics.
+ */
+ sdev->read_before_ms = 1;
+
/*
* Some devices don't like MODE SENSE with page=0x3f,
* which is the command used for checking if a device
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 696bb0b23599..299a6767b7b3 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -878,6 +878,13 @@ static int uas_slave_configure(struct scsi_device *sdev)
if (devinfo->flags & US_FL_CAPACITY_HEURISTICS)
sdev->guess_capacity = 1;
+ /*
+ * Some devices report generic values until the media has been
+ * accessed. Force a READ(10) prior to querying device
+ * characteristics.
+ */
+ sdev->read_before_ms = 1;
+
/*
* Some devices don't like MODE SENSE with page=0x3f,
* which is the command used for checking if a device
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 10480eb582b2..cb019c80763b 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -202,6 +202,7 @@ struct scsi_device {
unsigned use_10_for_rw:1; /* first try 10-byte read / write */
unsigned use_10_for_ms:1; /* first try 10-byte mode sense/select */
unsigned set_dbd_for_ms:1; /* Set "DBD" field in mode sense */
+ unsigned read_before_ms:1; /* perform a READ before MODE SENSE */
unsigned no_report_opcodes:1; /* no REPORT SUPPORTED OPERATION CODES */
unsigned no_write_same:1; /* no WRITE SAME command */
unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */
--
2.42.1
It's already into 6.6 and fixes the Syzkaller issue
Link: https://syzkaller.appspot.com/bug?id=e4aaa78795e490421c79f76ec3679006c8ff4c…
Theoretically the issue boils down to
commit 51ae846cff56 ("ext4: fix warning in ext4_iomap_begin
as race between bmap and write")
so it should be in 5.10, 5.15 and 6.1 kernels.
But we at Linux Verification Center can reproduce it with 5.15 and 6.1 only
so I'm asking to apply the fix for those two.
Theodore Ts'o (1):
ext4, jbd2: add an optimized bmap for the journal inode
fs/ext4/super.c | 23 +++++++++++++++++++++++
fs/jbd2/journal.c | 9 ++++++---
include/linux/jbd2.h | 8 ++++++++
3 files changed, 37 insertions(+), 3 deletions(-)
--
2.34.1
The comedi_test devices have a couple of timers (ai_timer and ao_timer)
that can be started to simulate hardware interrupts. Their expiry
functions normally reschedule the timer. The driver code calls either
del_timer_sync() or del_timer() to delete the timers from the queue, but
does not currently prevent the timers from rescheduling themselves so
synchronized deletion may be ineffective.
Add a couple of boolean members (one for each timer: ai_timer_enable and
ao_timer_enable) to the device private data structure to indicate
whether the timers are allowed to reschedule themselves. Set the member
to true when adding the timer to the queue, and to false when deleting
the timer from the queue in the waveform_ai_cancel() and
waveform_ao_cancel() functions.
The del_timer_sync() function is also called from the waveform_detach()
function, but the timer enable members will already be set to false when
that function is called, so no change is needed there.
Fixes: 403fe7f34e33 ("staging: comedi: comedi_test: fix timer race conditions")
Cc: <stable(a)vger.kernel.org> # 4.4+
Signed-off-by: Ian Abbott <abbotti(a)mev.co.uk>
---
drivers/comedi/drivers/comedi_test.c | 37 +++++++++++++++++++++++++---
1 file changed, 33 insertions(+), 4 deletions(-)
diff --git a/drivers/comedi/drivers/comedi_test.c b/drivers/comedi/drivers/comedi_test.c
index 30ea8b53ebf8..7fefe0de0bcc 100644
--- a/drivers/comedi/drivers/comedi_test.c
+++ b/drivers/comedi/drivers/comedi_test.c
@@ -87,6 +87,8 @@ struct waveform_private {
struct comedi_device *dev; /* parent comedi device */
u64 ao_last_scan_time; /* time of previous AO scan in usec */
unsigned int ao_scan_period; /* AO scan period in usec */
+ bool ai_timer_enable:1; /* should AI timer be running? */
+ bool ao_timer_enable:1; /* should AO timer be running? */
unsigned short ao_loopbacks[N_CHANS];
};
@@ -232,12 +234,18 @@ static void waveform_ai_timer(struct timer_list *t)
if (cmd->stop_src == TRIG_COUNT && async->scans_done >= cmd->stop_arg) {
async->events |= COMEDI_CB_EOA;
} else {
+ unsigned long flags;
+
if (devpriv->ai_convert_time > now)
time_increment = devpriv->ai_convert_time - now;
else
time_increment = 1;
- mod_timer(&devpriv->ai_timer,
- jiffies + usecs_to_jiffies(time_increment));
+ spin_lock_irqsave(&dev->spinlock, flags);
+ if (devpriv->ai_timer_enable) {
+ mod_timer(&devpriv->ai_timer,
+ jiffies + usecs_to_jiffies(time_increment));
+ }
+ spin_unlock_irqrestore(&dev->spinlock, flags);
}
overrun:
@@ -352,6 +360,7 @@ static int waveform_ai_cmd(struct comedi_device *dev,
struct comedi_cmd *cmd = &s->async->cmd;
unsigned int first_convert_time;
u64 wf_current;
+ unsigned long flags;
if (cmd->flags & CMDF_PRIORITY) {
dev_err(dev->class_dev,
@@ -393,9 +402,12 @@ static int waveform_ai_cmd(struct comedi_device *dev,
* Seem to need an extra jiffy here, otherwise timer expires slightly
* early!
*/
+ spin_lock_irqsave(&dev->spinlock, flags);
+ devpriv->ai_timer_enable = true;
devpriv->ai_timer.expires =
jiffies + usecs_to_jiffies(devpriv->ai_convert_period) + 1;
add_timer(&devpriv->ai_timer);
+ spin_unlock_irqrestore(&dev->spinlock, flags);
return 0;
}
@@ -403,7 +415,11 @@ static int waveform_ai_cancel(struct comedi_device *dev,
struct comedi_subdevice *s)
{
struct waveform_private *devpriv = dev->private;
+ unsigned long flags;
+ spin_lock_irqsave(&dev->spinlock, flags);
+ devpriv->ai_timer_enable = false;
+ spin_unlock_irqrestore(&dev->spinlock, flags);
if (in_softirq()) {
/* Assume we were called from the timer routine itself. */
del_timer(&devpriv->ai_timer);
@@ -494,9 +510,14 @@ static void waveform_ao_timer(struct timer_list *t)
} else {
unsigned int time_inc = devpriv->ao_last_scan_time +
devpriv->ao_scan_period - now;
+ unsigned long flags;
- mod_timer(&devpriv->ao_timer,
- jiffies + usecs_to_jiffies(time_inc));
+ spin_lock_irqsave(&dev->spinlock, flags);
+ if (devpriv->ao_timer_enable) {
+ mod_timer(&devpriv->ao_timer,
+ jiffies + usecs_to_jiffies(time_inc));
+ }
+ spin_unlock_irqrestore(&dev->spinlock, flags);
}
underrun:
@@ -510,6 +531,7 @@ static int waveform_ao_inttrig_start(struct comedi_device *dev,
struct waveform_private *devpriv = dev->private;
struct comedi_async *async = s->async;
struct comedi_cmd *cmd = &async->cmd;
+ unsigned long flags;
if (trig_num != cmd->start_arg)
return -EINVAL;
@@ -517,9 +539,12 @@ static int waveform_ao_inttrig_start(struct comedi_device *dev,
async->inttrig = NULL;
devpriv->ao_last_scan_time = ktime_to_us(ktime_get());
+ spin_lock_irqsave(&dev->spinlock, flags);
+ devpriv->ao_timer_enable = true;
devpriv->ao_timer.expires =
jiffies + usecs_to_jiffies(devpriv->ao_scan_period);
add_timer(&devpriv->ao_timer);
+ spin_unlock_irqrestore(&dev->spinlock, flags);
return 1;
}
@@ -602,8 +627,12 @@ static int waveform_ao_cancel(struct comedi_device *dev,
struct comedi_subdevice *s)
{
struct waveform_private *devpriv = dev->private;
+ unsigned long flags;
s->async->inttrig = NULL;
+ spin_lock_irqsave(&dev->spinlock, flags);
+ devpriv->ao_timer_enable = false;
+ spin_unlock_irqrestore(&dev->spinlock, flags);
if (in_softirq()) {
/* Assume we were called from the timer routine itself. */
del_timer(&devpriv->ao_timer);
--
2.43.0
The value of the [ms]envcfg CSR is lost when entering a nonretentive
idle state, so the CSR must be rewritten when resuming the CPU.
The [ms]envcfg CSR was added in version 1.12 of the privileged ISA, and
is used by extensions other than Zicboz. However, the kernel currenly
has no way to determine the privileged ISA version. Since Zicboz is the
only in-kernel user of this CSR so far, use it as a proxy for
determining if the CSR is implemented.
Cc: <stable(a)vger.kernel.org> # v6.7+
Fixes: 43c16d51a19b ("RISC-V: Enable cbo.zero in usermode")
Signed-off-by: Samuel Holland <samuel.holland(a)sifive.com>
---
Changes in v3:
- Check for Zicboz instead of the privileged ISA version
Changes in v2:
- Check for privileged ISA v1.12 instead of the specific CSR
- Use riscv_has_extension_likely() instead of new ALTERNATIVE()s
arch/riscv/include/asm/suspend.h | 1 +
arch/riscv/kernel/suspend.c | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/arch/riscv/include/asm/suspend.h b/arch/riscv/include/asm/suspend.h
index 02f87867389a..491296a335d0 100644
--- a/arch/riscv/include/asm/suspend.h
+++ b/arch/riscv/include/asm/suspend.h
@@ -14,6 +14,7 @@ struct suspend_context {
struct pt_regs regs;
/* Saved and restored by high-level functions */
unsigned long scratch;
+ unsigned long envcfg;
unsigned long tvec;
unsigned long ie;
#ifdef CONFIG_MMU
diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c
index 239509367e42..28166006688e 100644
--- a/arch/riscv/kernel/suspend.c
+++ b/arch/riscv/kernel/suspend.c
@@ -15,6 +15,8 @@
void suspend_save_csrs(struct suspend_context *context)
{
context->scratch = csr_read(CSR_SCRATCH);
+ if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ))
+ context->envcfg = csr_read(CSR_ENVCFG);
context->tvec = csr_read(CSR_TVEC);
context->ie = csr_read(CSR_IE);
@@ -36,6 +38,8 @@ void suspend_save_csrs(struct suspend_context *context)
void suspend_restore_csrs(struct suspend_context *context)
{
csr_write(CSR_SCRATCH, context->scratch);
+ if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ))
+ csr_write(CSR_ENVCFG, context->envcfg);
csr_write(CSR_TVEC, context->tvec);
csr_write(CSR_IE, context->ie);
--
2.43.0
Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.
The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.
Cc: <stable(a)vger.kernel.org> # 6.7+
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Tested-by: Mario Limonciello <mario.limonciello(a)amd.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20240207174456.341121-…
Acked-by: Christian König <christian.koenig(a)amd.com>
Reviewed-by: Matthew Auld <matthew.auld(a)intel.com>
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam(a)amd.com>
---
drivers/gpu/drm/drm_buddy.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
} while (1);
list_splice_tail(&allocated, blocks);
+
+ if (total_allocated < size) {
+ err = -ENOSPC;
+ goto err_free;
+ }
+
return 0;
err_undo:
base-commit: 2c80a2b715df75881359d07dbaacff8ad411f40e
--
2.25.1
Backport e11dea8 ("dlm: use kernel_connect() and kernel_bind()") to
Linux stable 6.1 caused a regression. The original patch expected
dlm_local_addrs[0] to be of type sockaddr_storage, because c51c9cd ("fs:
dlm: don't put dlm_local_addrs on heap") changed its type from
sockaddr_storage* to sockaddr_storage in Linux 6.5+ while in older Linux
versions this is still the original sockaddr_storage*.
Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1063338
Cc: <stable(a)vger.kernel.org> # 6.1.x
Fixes: e11dea8f5033 ("dlm: use kernel_connect() and kernel_bind()")
Signed-off-by: Jordan Rife <jrife(a)google.com>
---
fs/dlm/lowcomms.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 72f34f96d0155..8426073e73cf2 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1900,7 +1900,7 @@ static int dlm_tcp_listen_bind(struct socket *sock)
/* Bind to our port */
make_sockaddr(dlm_local_addr[0], dlm_config.ci_tcp_port, &addr_len);
- return kernel_bind(sock, (struct sockaddr *)&dlm_local_addr[0],
+ return kernel_bind(sock, (struct sockaddr *)dlm_local_addr[0],
addr_len);
}
--
2.43.0.687.g38aa6559b0-goog
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 76b367a2d83163cf19173d5cb0b562acbabc8eac
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021330-twice-pacify-2be5@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
76b367a2d831 ("io_uring/net: limit inline multishot retries")
91e5d765a82f ("io_uring/net: un-indent mshot retry path in io_recv_finish()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 76b367a2d83163cf19173d5cb0b562acbabc8eac Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Mon, 29 Jan 2024 12:00:58 -0700
Subject: [PATCH] io_uring/net: limit inline multishot retries
If we have multiple clients and some/all are flooding the receives to
such an extent that we can retry a LOT handling multishot receives, then
we can be starving some clients and hence serving traffic in an
imbalanced fashion.
Limit multishot retry attempts to some arbitrary value, whose only
purpose serves to ensure that we don't keep serving a single connection
for way too long. We default to 32 retries, which should be more than
enough to provide fairness, yet not so small that we'll spend too much
time requeuing rather than handling traffic.
Cc: stable(a)vger.kernel.org
Depends-on: 704ea888d646 ("io_uring/poll: add requeue return code from poll multishot handling")
Depends-on: 1e5d765a82f ("io_uring/net: un-indent mshot retry path in io_recv_finish()")
Depends-on: e84b01a880f6 ("io_uring/poll: move poll execution helpers higher up")
Fixes: b3fdea6ecb55 ("io_uring: multishot recv")
Fixes: 9bb66906f23e ("io_uring: support multishot in recvmsg")
Link: https://github.com/axboe/liburing/issues/1043
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/net.c b/io_uring/net.c
index 740c6bfa5b59..a12ff69e6843 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -60,6 +60,7 @@ struct io_sr_msg {
unsigned len;
unsigned done_io;
unsigned msg_flags;
+ unsigned nr_multishot_loops;
u16 flags;
/* initialised and used only by !msg send variants */
u16 addr_len;
@@ -70,6 +71,13 @@ struct io_sr_msg {
struct io_kiocb *notif;
};
+/*
+ * Number of times we'll try and do receives if there's more data. If we
+ * exceed this limit, then add us to the back of the queue and retry from
+ * there. This helps fairness between flooding clients.
+ */
+#define MULTISHOT_MAX_RETRY 32
+
static inline bool io_check_multishot(struct io_kiocb *req,
unsigned int issue_flags)
{
@@ -611,6 +619,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
sr->msg_flags |= MSG_CMSG_COMPAT;
#endif
sr->done_io = 0;
+ sr->nr_multishot_loops = 0;
return 0;
}
@@ -654,12 +663,20 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
*/
if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
*ret, cflags | IORING_CQE_F_MORE)) {
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
+
io_recv_prep_retry(req);
/* Known not-empty or unknown state, retry */
- if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1)
- return false;
+ if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) {
+ if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
+ return false;
+ /* mshot retries exceeded, force a requeue */
+ sr->nr_multishot_loops = 0;
+ mshot_retry_ret = IOU_REQUEUE;
+ }
if (issue_flags & IO_URING_F_MULTISHOT)
- *ret = IOU_ISSUE_SKIP_COMPLETE;
+ *ret = mshot_retry_ret;
else
*ret = -EAGAIN;
return true;
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x dad6a09f3148257ac1773cd90934d721d68ab595
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021300-sagging-enhance-9113@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
dad6a09f3148 ("hrtimer: Report offline hrtimer enqueue")
5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier")
f61eff83cec9 ("hrtimer: Prepare support for PREEMPT_RT")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From dad6a09f3148257ac1773cd90934d721d68ab595 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic(a)kernel.org>
Date: Mon, 29 Jan 2024 15:56:36 -0800
Subject: [PATCH] hrtimer: Report offline hrtimer enqueue
The hrtimers migration on CPU-down hotplug process has been moved
earlier, before the CPU actually goes to die. This leaves a small window
of opportunity to queue an hrtimer in a blind spot, leaving it ignored.
For example a practical case has been reported with RCU waking up a
SCHED_FIFO task right before the CPUHP_AP_IDLE_DEAD stage, queuing that
way a sched/rt timer to the local offline CPU.
Make sure such situations never go unnoticed and warn when that happens.
Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier")
Reported-by: Paul E. McKenney <paulmck(a)kernel.org>
Signed-off-by: Frederic Weisbecker <frederic(a)kernel.org>
Signed-off-by: Paul E. McKenney <paulmck(a)kernel.org>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240129235646.3171983-4-boqun.feng@gmail.com
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 87e3bedf8eb0..641c4567cfa7 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -157,6 +157,7 @@ enum hrtimer_base_type {
* @max_hang_time: Maximum time spent in hrtimer_interrupt
* @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
* expired
+ * @online: CPU is online from an hrtimers point of view
* @timer_waiters: A hrtimer_cancel() invocation waits for the timer
* callback to finish.
* @expires_next: absolute time of the next event, is required for remote
@@ -179,7 +180,8 @@ struct hrtimer_cpu_base {
unsigned int hres_active : 1,
in_hrtirq : 1,
hang_detected : 1,
- softirq_activated : 1;
+ softirq_activated : 1,
+ online : 1;
#ifdef CONFIG_HIGH_RES_TIMERS
unsigned int nr_events;
unsigned short nr_retries;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 760793998cdd..edb0f821dcea 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1085,6 +1085,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
enum hrtimer_mode mode)
{
debug_activate(timer, mode);
+ WARN_ON_ONCE(!base->cpu_base->online);
base->cpu_base->active_bases |= 1 << base->index;
@@ -2183,6 +2184,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
cpu_base->softirq_next_timer = NULL;
cpu_base->expires_next = KTIME_MAX;
cpu_base->softirq_expires_next = KTIME_MAX;
+ cpu_base->online = 1;
hrtimer_cpu_base_init_expiry_lock(cpu_base);
return 0;
}
@@ -2250,6 +2252,7 @@ int hrtimers_cpu_dying(unsigned int dying_cpu)
smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);
raw_spin_unlock(&new_base->lock);
+ old_base->online = 0;
raw_spin_unlock(&old_base->lock);
return 0;
The WCN6855 firmware on the Lenovo ThinkPad X13s expects the Bluetooth
device address in MSB order when setting it using the
EDL_WRITE_BD_ADDR_OPCODE command.
Presumably, this is the case for all non-ROME devices which all use the
EDL_WRITE_BD_ADDR_OPCODE command for this (unlike the ROME devices which
use a different command and expect the address in LSB order).
Reverse the little-endian address before setting it to make sure that
the address can be configured using tools like btmgmt or using the
'local-bd-address' devicetree property.
Note that this can potentially break systems with boot firmware which
has started relying on the broken behaviour and is incorrectly passing
the address via devicetree in MSB order.
Fixes: 5c0a1001c8be ("Bluetooth: hci_qca: Add helper to set device address")
Cc: stable(a)vger.kernel.org # 5.1
Cc: Balakrishna Godavarthi <quic_bgodavar(a)quicinc.com>
Cc: Matthias Kaehlcke <mka(a)chromium.org>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
Hi Qualcomm people,
Could you please verify with your documentation that all non-ROME
devices expect the address provided in the EDL_WRITE_BD_ADDR_OPCODE
command in MSB order?
I assume this is not something that anyone would change between firmware
revisions, but if that turns out to be the case, we'd need to reverse
the address based on firmware revision or similar.
Johan
drivers/bluetooth/btqca.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index fdb0fae88d1c..29035daf21bc 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -826,11 +826,15 @@ EXPORT_SYMBOL_GPL(qca_uart_setup);
int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
{
+ bdaddr_t bdaddr_swapped;
struct sk_buff *skb;
int err;
- skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr,
- HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
+ baswap(&bdaddr_swapped, bdaddr);
+
+ skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6,
+ &bdaddr_swapped, HCI_EV_VENDOR,
+ HCI_INIT_TIMEOUT);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err);
--
2.41.0
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 644649553508b9bacf0fc7a5bdc4f9e0165576a5
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012905-carry-revolt-b8d5@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
644649553508 ("clocksource: Skip watchdog check for large watchdog intervals")
c37e85c135ce ("clocksource: Loosen clocksource watchdog constraints")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 644649553508b9bacf0fc7a5bdc4f9e0165576a5 Mon Sep 17 00:00:00 2001
From: Jiri Wiesner <jwiesner(a)suse.de>
Date: Mon, 22 Jan 2024 18:23:50 +0100
Subject: [PATCH] clocksource: Skip watchdog check for large watchdog intervals
There have been reports of the watchdog marking clocksources unstable on
machines with 8 NUMA nodes:
clocksource: timekeeping watchdog on CPU373:
Marking clocksource 'tsc' as unstable because the skew is too large:
clocksource: 'hpet' wd_nsec: 14523447520
clocksource: 'tsc' cs_nsec: 14524115132
The measured clocksource skew - the absolute difference between cs_nsec
and wd_nsec - was 668 microseconds:
cs_nsec - wd_nsec = 14524115132 - 14523447520 = 667612
The kernel used 200 microseconds for the uncertainty_margin of both the
clocksource and watchdog, resulting in a threshold of 400 microseconds (the
md variable). Both the cs_nsec and the wd_nsec value indicate that the
readout interval was circa 14.5 seconds. The observed behaviour is that
watchdog checks failed for large readout intervals on 8 NUMA node
machines. This indicates that the size of the skew was directly proportinal
to the length of the readout interval on those machines. The measured
clocksource skew, 668 microseconds, was evaluated against a threshold (the
md variable) that is suited for readout intervals of roughly
WATCHDOG_INTERVAL, i.e. HZ >> 1, which is 0.5 second.
The intention of 2e27e793e280 ("clocksource: Reduce clocksource-skew
threshold") was to tighten the threshold for evaluating skew and set the
lower bound for the uncertainty_margin of clocksources to twice
WATCHDOG_MAX_SKEW. Later in c37e85c135ce ("clocksource: Loosen clocksource
watchdog constraints"), the WATCHDOG_MAX_SKEW constant was increased to
125 microseconds to fit the limit of NTP, which is able to use a
clocksource that suffers from up to 500 microseconds of skew per second.
Both the TSC and the HPET use default uncertainty_margin. When the
readout interval gets stretched the default uncertainty_margin is no
longer a suitable lower bound for evaluating skew - it imposes a limit
that is far stricter than the skew with which NTP can deal.
The root causes of the skew being directly proportinal to the length of
the readout interval are:
* the inaccuracy of the shift/mult pairs of clocksources and the watchdog
* the conversion to nanoseconds is imprecise for large readout intervals
Prevent this by skipping the current watchdog check if the readout
interval exceeds 2 * WATCHDOG_INTERVAL. Considering the maximum readout
interval of 2 * WATCHDOG_INTERVAL, the current default uncertainty margin
(of the TSC and HPET) corresponds to a limit on clocksource skew of 250
ppm (microseconds of skew per second). To keep the limit imposed by NTP
(500 microseconds of skew per second) for all possible readout intervals,
the margins would have to be scaled so that the threshold value is
proportional to the length of the actual readout interval.
As for why the readout interval may get stretched: Since the watchdog is
executed in softirq context the expiration of the watchdog timer can get
severely delayed on account of a ksoftirqd thread not getting to run in a
timely manner. Surely, a system with such belated softirq execution is not
working well and the scheduling issue should be looked into but the
clocksource watchdog should be able to deal with it accordingly.
Fixes: 2e27e793e280 ("clocksource: Reduce clocksource-skew threshold")
Suggested-by: Feng Tang <feng.tang(a)intel.com>
Signed-off-by: Jiri Wiesner <jwiesner(a)suse.de>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Tested-by: Paul E. McKenney <paulmck(a)kernel.org>
Reviewed-by: Feng Tang <feng.tang(a)intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240122172350.GA740@incl
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c108ed8a9804..3052b1f1168e 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -99,6 +99,7 @@ static u64 suspend_start;
* Interval: 0.5sec.
*/
#define WATCHDOG_INTERVAL (HZ >> 1)
+#define WATCHDOG_INTERVAL_MAX_NS ((2 * WATCHDOG_INTERVAL) * (NSEC_PER_SEC / HZ))
/*
* Threshold: 0.0312s, when doubled: 0.0625s.
@@ -134,6 +135,7 @@ static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
static DEFINE_SPINLOCK(watchdog_lock);
static int watchdog_running;
static atomic_t watchdog_reset_pending;
+static int64_t watchdog_max_interval;
static inline void clocksource_watchdog_lock(unsigned long *flags)
{
@@ -399,8 +401,8 @@ static inline void clocksource_reset_watchdog(void)
static void clocksource_watchdog(struct timer_list *unused)
{
u64 csnow, wdnow, cslast, wdlast, delta;
+ int64_t wd_nsec, cs_nsec, interval;
int next_cpu, reset_pending;
- int64_t wd_nsec, cs_nsec;
struct clocksource *cs;
enum wd_read_status read_ret;
unsigned long extra_wait = 0;
@@ -470,6 +472,27 @@ static void clocksource_watchdog(struct timer_list *unused)
if (atomic_read(&watchdog_reset_pending))
continue;
+ /*
+ * The processing of timer softirqs can get delayed (usually
+ * on account of ksoftirqd not getting to run in a timely
+ * manner), which causes the watchdog interval to stretch.
+ * Skew detection may fail for longer watchdog intervals
+ * on account of fixed margins being used.
+ * Some clocksources, e.g. acpi_pm, cannot tolerate
+ * watchdog intervals longer than a few seconds.
+ */
+ interval = max(cs_nsec, wd_nsec);
+ if (unlikely(interval > WATCHDOG_INTERVAL_MAX_NS)) {
+ if (system_state > SYSTEM_SCHEDULING &&
+ interval > 2 * watchdog_max_interval) {
+ watchdog_max_interval = interval;
+ pr_warn("Long readout interval, skipping watchdog check: cs_nsec: %lld wd_nsec: %lld\n",
+ cs_nsec, wd_nsec);
+ }
+ watchdog_timer.expires = jiffies;
+ continue;
+ }
+
/* Check the deviation from the watchdog clocksource. */
md = cs->uncertainty_margin + watchdog->uncertainty_margin;
if (abs(cs_nsec - wd_nsec) > md) {
Please backport to stable 6.7 the following patch which was merged
upstream as
commit fe92f874f09145a6951deacaa4961390238bbe0d
Author: Michael Lass <bevan(a)bi-co.net>
Date: Wed Jan 31 16:52:20 2024 +0100
net: Fix from address in memcpy_to_iter_csum()
While inlining csum_and_memcpy() into memcpy_to_iter_csum(), the from
address passed to csum_partial_copy_nocheck() was accidentally changed.
This causes a regression in applications using UDP, as for example
OpenAFS, causing loss of datagrams.
Fixes: dc32bff195b4 ("iov_iter, net: Fold in csum_and_memcpy()")
Cc: David Howells <dhowells(a)redhat.com>
Cc: stable(a)vger.kernel.org
Cc: regressions(a)lists.linux.dev
Signed-off-by: Michael Lass <bevan(a)bi-co.net>
Reviewed-by: Jeffrey Altman <jaltman(a)auristor.com>
Acked-by: David Howells <dhowells(a)redhat.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Thank you.
-------- Forwarded Message --------
Subject: [PATCH] net: Fix from address in memcpy_to_iter_csum()
Date: Wed, 31 Jan 2024 16:52:20 +0100
From: Michael Lass <bevan(a)bi-co.net>
To: netdev(a)vger.kernel.org
CC: David Howells <dhowells(a)redhat.com>, regressions(a)lists.linux.dev
While inlining csum_and_memcpy() into memcpy_to_iter_csum(), the from
address passed to csum_partial_copy_nocheck() was accidentally changed.
This causes a regression in applications using UDP, as for example
OpenAFS, causing loss of datagrams.
Fixes: dc32bff195b4 ("iov_iter, net: Fold in csum_and_memcpy()")
Cc: David Howells <dhowells(a)redhat.com>
Cc: stable(a)vger.kernel.org
Cc: regressions(a)lists.linux.dev
Signed-off-by: Michael Lass <bevan(a)bi-co.net>
---
net/core/datagram.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 103d46fa0eeb..a8b625abe242 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -751,7 +751,7 @@ size_t memcpy_to_iter_csum(void *iter_to, size_t
progress,
size_t len, void *from, void *priv2)
{
__wsum *csum = priv2;
- __wsum next = csum_partial_copy_nocheck(from, iter_to, len);
+ __wsum next = csum_partial_copy_nocheck(from + progress, iter_to, len);
*csum = csum_block_add(*csum, next, progress);
return 0;
--
2.43.0
Hi,
Can you cherry pick commit 33391eecd631 to 6.1-stable? Looks like we
never got that one marked appropriately. For full reference:
commit 33391eecd63158536fb5257fee5be3a3bdc30e3c
Author: Jens Axboe <axboe(a)kernel.dk>
Date: Fri Jan 20 07:51:07 2023 -0700
block: treat poll queue enter similarly to timeouts
Thanks,
--
Jens Axboe
Hi Greg, Sasha, and David,
I noticed a regression report in bugzilla.kernel.org that seems to be
specific to the linux-6.6.y series:
Quoting from https://bugzilla.kernel.org/show_bug.cgi?id=218484 :
> After upgrading to version 6.6.16, the kernel compilation on a i586
> arch (on a 32bit chroot in a 64bit host) fails with a message:
>
> virtual memory exhausted: Cannot allocate memory
>
> this happens even lowering the number of parallel compilation
> threads. On a x86_64 arch the same problem doesn't occur. It's not
> clear whether some weird recursion is triggered that exhausts the
> memory, but it seems that the problem is caused by the patchset
> 'minmax' added to the 6.6.16 version, in particular it seems caused
> by these patches:
>
> - minmax-allow-min-max-clamp-if-the-arguments-have-the-same-signedness.patch
> - minmax-fix-indentation-of-__cmp_once-and-__clamp_once.patch
> - minmax-allow-comparisons-of-int-against-unsigned-char-short.patch
> - minmax-relax-check-to-allow-comparison-between-unsigned-arguments-and-signed-constants.patch
>
> Reverting those patches fixes the memory exhaustion problem during compilation.
The reporter later added:
> From a quick test the same problem doesn't occur in 6.8-rc4.
See the ticket for more details.
Note, you have to use bugzilla to reach the reporter, as I sadly[1] can
not CCed them in mails like this.
[TLDR for the rest of this mail: I'm adding this report to the list of
tracked Linux kernel regressions; the text you find below is based on a
few templates paragraphs you might have encountered already in similar
form.]
BTW, let me use this mail to also add the report to the list of tracked
regressions to ensure it's doesn't fall through the cracks:
#regzbot introduced: 204c653d5d0c79940..9487d93f172acef
https://bugzilla.kernel.org/show_bug.cgi?id=218484
#regzbot title: minmax: virtual memory exhausted in 6.6.16 with i586 chroot
#regzbot ignore-activity
Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
If I did something stupid, please tell me, as explained on that page.
[1] because bugzilla.kernel.org tells users upon registration their
"email address will never be displayed to logged out users"
The patch below does not apply to the 6.7-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.7.y
git checkout FETCH_HEAD
git cherry-pick -x 76b367a2d83163cf19173d5cb0b562acbabc8eac
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021328-washboard-crevice-aaa0@gregkh' --subject-prefix 'PATCH 6.7.y' HEAD^..
Possible dependencies:
76b367a2d831 ("io_uring/net: limit inline multishot retries")
91e5d765a82f ("io_uring/net: un-indent mshot retry path in io_recv_finish()")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 76b367a2d83163cf19173d5cb0b562acbabc8eac Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Mon, 29 Jan 2024 12:00:58 -0700
Subject: [PATCH] io_uring/net: limit inline multishot retries
If we have multiple clients and some/all are flooding the receives to
such an extent that we can retry a LOT handling multishot receives, then
we can be starving some clients and hence serving traffic in an
imbalanced fashion.
Limit multishot retry attempts to some arbitrary value, whose only
purpose serves to ensure that we don't keep serving a single connection
for way too long. We default to 32 retries, which should be more than
enough to provide fairness, yet not so small that we'll spend too much
time requeuing rather than handling traffic.
Cc: stable(a)vger.kernel.org
Depends-on: 704ea888d646 ("io_uring/poll: add requeue return code from poll multishot handling")
Depends-on: 1e5d765a82f ("io_uring/net: un-indent mshot retry path in io_recv_finish()")
Depends-on: e84b01a880f6 ("io_uring/poll: move poll execution helpers higher up")
Fixes: b3fdea6ecb55 ("io_uring: multishot recv")
Fixes: 9bb66906f23e ("io_uring: support multishot in recvmsg")
Link: https://github.com/axboe/liburing/issues/1043
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/net.c b/io_uring/net.c
index 740c6bfa5b59..a12ff69e6843 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -60,6 +60,7 @@ struct io_sr_msg {
unsigned len;
unsigned done_io;
unsigned msg_flags;
+ unsigned nr_multishot_loops;
u16 flags;
/* initialised and used only by !msg send variants */
u16 addr_len;
@@ -70,6 +71,13 @@ struct io_sr_msg {
struct io_kiocb *notif;
};
+/*
+ * Number of times we'll try and do receives if there's more data. If we
+ * exceed this limit, then add us to the back of the queue and retry from
+ * there. This helps fairness between flooding clients.
+ */
+#define MULTISHOT_MAX_RETRY 32
+
static inline bool io_check_multishot(struct io_kiocb *req,
unsigned int issue_flags)
{
@@ -611,6 +619,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
sr->msg_flags |= MSG_CMSG_COMPAT;
#endif
sr->done_io = 0;
+ sr->nr_multishot_loops = 0;
return 0;
}
@@ -654,12 +663,20 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
*/
if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
*ret, cflags | IORING_CQE_F_MORE)) {
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
+
io_recv_prep_retry(req);
/* Known not-empty or unknown state, retry */
- if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1)
- return false;
+ if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) {
+ if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
+ return false;
+ /* mshot retries exceeded, force a requeue */
+ sr->nr_multishot_loops = 0;
+ mshot_retry_ret = IOU_REQUEUE;
+ }
if (issue_flags & IO_URING_F_MULTISHOT)
- *ret = IOU_ISSUE_SKIP_COMPLETE;
+ *ret = mshot_retry_ret;
else
*ret = -EAGAIN;
return true;
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 72bd80252feeb3bef8724230ee15d9f7ab541c6e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021339-flick-facsimile-65c3@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
72bd80252fee ("io_uring/net: fix sr->len for IORING_OP_RECV with MSG_WAITALL and buffers")
f9ead18c1058 ("io_uring: split network related opcodes into its own file")
e0da14def1ee ("io_uring: move statx handling to its own file")
a9c210cebe13 ("io_uring: move epoll handler to its own file")
4cf90495281b ("io_uring: add a dummy -EOPNOTSUPP prep handler")
99f15d8d6136 ("io_uring: move uring_cmd handling to its own file")
cd40cae29ef8 ("io_uring: split out open/close operations")
453b329be5ea ("io_uring: separate out file table handling code")
f4c163dd7d4b ("io_uring: split out fadvise/madvise operations")
0d5847274037 ("io_uring: split out fs related sync/fallocate functions")
531113bbd5bf ("io_uring: split out splice related operations")
11aeb71406dd ("io_uring: split out filesystem related operations")
e28683bdfc2f ("io_uring: move nop into its own file")
5e2a18d93fec ("io_uring: move xattr related opcodes to its own file")
97b388d70b53 ("io_uring: handle completions in the core")
de23077eda61 ("io_uring: set completion results upfront")
e27f928ee1cb ("io_uring: add io_uring_types.h")
4d4c9cff4f70 ("io_uring: define a request type cleanup handler")
890968dc0336 ("io_uring: unify struct io_symlink and io_hardlink")
9a3a11f977f9 ("io_uring: convert iouring_cmd to io_cmd_type")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 72bd80252feeb3bef8724230ee15d9f7ab541c6e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Thu, 1 Feb 2024 06:42:36 -0700
Subject: [PATCH] io_uring/net: fix sr->len for IORING_OP_RECV with MSG_WAITALL
and buffers
If we use IORING_OP_RECV with provided buffers and pass in '0' as the
length of the request, the length is retrieved from the selected buffer.
If MSG_WAITALL is also set and we get a short receive, then we may hit
the retry path which decrements sr->len and increments the buffer for
a retry. However, the length is still zero at this point, which means
that sr->len now becomes huge and import_ubuf() will cap it to
MAX_RW_COUNT and subsequently return -EFAULT for the range as a whole.
Fix this by always assigning sr->len once the buffer has been selected.
Cc: stable(a)vger.kernel.org
Fixes: 7ba89d2af17a ("io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly")
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/net.c b/io_uring/net.c
index a12ff69e6843..43bc9a5f96f9 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -923,6 +923,7 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
if (!buf)
return -ENOBUFS;
sr->buf = buf;
+ sr->len = len;
}
ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter);
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 76b367a2d83163cf19173d5cb0b562acbabc8eac
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021331-stool-hash-b0c4@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
76b367a2d831 ("io_uring/net: limit inline multishot retries")
91e5d765a82f ("io_uring/net: un-indent mshot retry path in io_recv_finish()")
b6b2bb58a754 ("io_uring: never overflow io_aux_cqe")
b2e74db55dd9 ("io_uring/net: don't overflow multishot recv")
1bfed2334971 ("io_uring/net: don't overflow multishot accept")
b65db9211ecb ("io_uring/net: use proper value for msg_inq")
0aa69d53ac7c ("Merge tag 'for-6.5/io_uring-2023-06-23' of git://git.kernel.dk/linux")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 76b367a2d83163cf19173d5cb0b562acbabc8eac Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Mon, 29 Jan 2024 12:00:58 -0700
Subject: [PATCH] io_uring/net: limit inline multishot retries
If we have multiple clients and some/all are flooding the receives to
such an extent that we can retry a LOT handling multishot receives, then
we can be starving some clients and hence serving traffic in an
imbalanced fashion.
Limit multishot retry attempts to some arbitrary value, whose only
purpose serves to ensure that we don't keep serving a single connection
for way too long. We default to 32 retries, which should be more than
enough to provide fairness, yet not so small that we'll spend too much
time requeuing rather than handling traffic.
Cc: stable(a)vger.kernel.org
Depends-on: 704ea888d646 ("io_uring/poll: add requeue return code from poll multishot handling")
Depends-on: 1e5d765a82f ("io_uring/net: un-indent mshot retry path in io_recv_finish()")
Depends-on: e84b01a880f6 ("io_uring/poll: move poll execution helpers higher up")
Fixes: b3fdea6ecb55 ("io_uring: multishot recv")
Fixes: 9bb66906f23e ("io_uring: support multishot in recvmsg")
Link: https://github.com/axboe/liburing/issues/1043
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/net.c b/io_uring/net.c
index 740c6bfa5b59..a12ff69e6843 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -60,6 +60,7 @@ struct io_sr_msg {
unsigned len;
unsigned done_io;
unsigned msg_flags;
+ unsigned nr_multishot_loops;
u16 flags;
/* initialised and used only by !msg send variants */
u16 addr_len;
@@ -70,6 +71,13 @@ struct io_sr_msg {
struct io_kiocb *notif;
};
+/*
+ * Number of times we'll try and do receives if there's more data. If we
+ * exceed this limit, then add us to the back of the queue and retry from
+ * there. This helps fairness between flooding clients.
+ */
+#define MULTISHOT_MAX_RETRY 32
+
static inline bool io_check_multishot(struct io_kiocb *req,
unsigned int issue_flags)
{
@@ -611,6 +619,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
sr->msg_flags |= MSG_CMSG_COMPAT;
#endif
sr->done_io = 0;
+ sr->nr_multishot_loops = 0;
return 0;
}
@@ -654,12 +663,20 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
*/
if (io_fill_cqe_req_aux(req, issue_flags & IO_URING_F_COMPLETE_DEFER,
*ret, cflags | IORING_CQE_F_MORE)) {
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ int mshot_retry_ret = IOU_ISSUE_SKIP_COMPLETE;
+
io_recv_prep_retry(req);
/* Known not-empty or unknown state, retry */
- if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1)
- return false;
+ if (cflags & IORING_CQE_F_SOCK_NONEMPTY || msg->msg_inq == -1) {
+ if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
+ return false;
+ /* mshot retries exceeded, force a requeue */
+ sr->nr_multishot_loops = 0;
+ mshot_retry_ret = IOU_REQUEUE;
+ }
if (issue_flags & IO_URING_F_MULTISHOT)
- *ret = IOU_ISSUE_SKIP_COMPLETE;
+ *ret = mshot_retry_ret;
else
*ret = -EAGAIN;
return true;
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 7c4650ded49e5b88929ecbbb631efb8b0838e811
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021308-hardness-undercook-6840@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
7c4650ded49e ("xhci: handle isoc Babble and Buffer Overrun events properly")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7c4650ded49e5b88929ecbbb631efb8b0838e811 Mon Sep 17 00:00:00 2001
From: Michal Pecio <michal.pecio(a)gmail.com>
Date: Thu, 25 Jan 2024 17:27:37 +0200
Subject: [PATCH] xhci: handle isoc Babble and Buffer Overrun events properly
xHCI 4.9 explicitly forbids assuming that the xHC has released its
ownership of a multi-TRB TD when it reports an error on one of the
early TRBs. Yet the driver makes such assumption and releases the TD,
allowing the remaining TRBs to be freed or overwritten by new TDs.
The xHC should also report completion of the final TRB due to its IOC
flag being set by us, regardless of prior errors. This event cannot
be recognized if the TD has already been freed earlier, resulting in
"Transfer event TRB DMA ptr not part of current TD" error message.
Fix this by reusing the logic for processing isoc Transaction Errors.
This also handles hosts which fail to report the final completion.
Fix transfer length reporting on Babble errors. They may be caused by
device malfunction, no guarantee that the buffer has been filled.
Signed-off-by: Michal Pecio <michal.pecio(a)gmail.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.inte…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 41be7d31a36e..f0d8a607ff21 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2394,9 +2394,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
case COMP_BANDWIDTH_OVERRUN_ERROR:
frame->status = -ECOMM;
break;
- case COMP_ISOCH_BUFFER_OVERRUN:
case COMP_BABBLE_DETECTED_ERROR:
+ sum_trbs_for_length = true;
+ fallthrough;
+ case COMP_ISOCH_BUFFER_OVERRUN:
frame->status = -EOVERFLOW;
+ if (ep_trb != td->last_trb)
+ td->error_mid_td = true;
break;
case COMP_INCOMPATIBLE_DEVICE_ERROR:
case COMP_STALL_ERROR:
Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.
The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.
Cc: <stable(a)vger.kernel.org> # 6.7+
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Tested-by: Mario Limonciello <mario.limonciello(a)amd.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20240207174456.341121-…
Acked-by: Christian König <christian.koenig(a)amd.com>
Reviewed-by: Matthew Auld <matthew.auld(a)intel.com>
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam(a)amd.com>
---
drivers/gpu/drm/drm_buddy.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
} while (1);
list_splice_tail(&allocated, blocks);
+
+ if (total_allocated < size) {
+ err = -ENOSPC;
+ goto err_free;
+ }
+
return 0;
err_undo:
base-commit: 2c80a2b715df75881359d07dbaacff8ad411f40e
--
2.25.1
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 1e560864159d002b453da42bd2c13a1805515a20
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021339-undesired-juicy-366a@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
1e560864159d ("PCI/ASPM: Fix deadlock when enabling ASPM")
ac865f00af29 ("Merge tag 'pci-v6.7-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 1e560864159d002b453da42bd2c13a1805515a20 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro(a)kernel.org>
Date: Tue, 30 Jan 2024 11:02:43 +0100
Subject: [PATCH] PCI/ASPM: Fix deadlock when enabling ASPM
A last minute revert in 6.7-final introduced a potential deadlock when
enabling ASPM during probe of Qualcomm PCIe controllers as reported by
lockdep:
============================================
WARNING: possible recursive locking detected
6.7.0 #40 Not tainted
--------------------------------------------
kworker/u16:5/90 is trying to acquire lock:
ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pcie_aspm_pm_state_change+0x58/0xdc
but task is already holding lock:
ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pci_walk_bus+0x34/0xbc
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(pci_bus_sem);
lock(pci_bus_sem);
*** DEADLOCK ***
Call trace:
print_deadlock_bug+0x25c/0x348
__lock_acquire+0x10a4/0x2064
lock_acquire+0x1e8/0x318
down_read+0x60/0x184
pcie_aspm_pm_state_change+0x58/0xdc
pci_set_full_power_state+0xa8/0x114
pci_set_power_state+0xc4/0x120
qcom_pcie_enable_aspm+0x1c/0x3c [pcie_qcom]
pci_walk_bus+0x64/0xbc
qcom_pcie_host_post_init_2_7_0+0x28/0x34 [pcie_qcom]
The deadlock can easily be reproduced on machines like the Lenovo ThinkPad
X13s by adding a delay to increase the race window during asynchronous
probe where another thread can take a write lock.
Add a new pci_set_power_state_locked() and associated helper functions that
can be called with the PCI bus semaphore held to avoid taking the read lock
twice.
Link: https://lore.kernel.org/r/ZZu0qx2cmn7IwTyQ@hovoldconsulting.com
Link: https://lore.kernel.org/r/20240130100243.11011-1-johan+linaro@kernel.org
Fixes: f93e71aea6c6 ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()"")
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Cc: <stable(a)vger.kernel.org> # 6.7
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 9c2137dae429..826b5016a101 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -386,21 +386,8 @@ void pci_bus_add_devices(const struct pci_bus *bus)
}
EXPORT_SYMBOL(pci_bus_add_devices);
-/** pci_walk_bus - walk devices on/under bus, calling callback.
- * @top bus whose devices should be walked
- * @cb callback to be called for each device found
- * @userdata arbitrary pointer to be passed to callback.
- *
- * Walk the given bus, including any bridged devices
- * on buses under this bus. Call the provided callback
- * on each device found.
- *
- * We check the return of @cb each time. If it returns anything
- * other than 0, we break out.
- *
- */
-void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
- void *userdata)
+static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
+ void *userdata, bool locked)
{
struct pci_dev *dev;
struct pci_bus *bus;
@@ -408,7 +395,8 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
int retval;
bus = top;
- down_read(&pci_bus_sem);
+ if (!locked)
+ down_read(&pci_bus_sem);
next = top->devices.next;
for (;;) {
if (next == &bus->devices) {
@@ -431,10 +419,37 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
if (retval)
break;
}
- up_read(&pci_bus_sem);
+ if (!locked)
+ up_read(&pci_bus_sem);
+}
+
+/**
+ * pci_walk_bus - walk devices on/under bus, calling callback.
+ * @top: bus whose devices should be walked
+ * @cb: callback to be called for each device found
+ * @userdata: arbitrary pointer to be passed to callback
+ *
+ * Walk the given bus, including any bridged devices
+ * on buses under this bus. Call the provided callback
+ * on each device found.
+ *
+ * We check the return of @cb each time. If it returns anything
+ * other than 0, we break out.
+ */
+void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
+{
+ __pci_walk_bus(top, cb, userdata, false);
}
EXPORT_SYMBOL_GPL(pci_walk_bus);
+void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
+{
+ lockdep_assert_held(&pci_bus_sem);
+
+ __pci_walk_bus(top, cb, userdata, true);
+}
+EXPORT_SYMBOL_GPL(pci_walk_bus_locked);
+
struct pci_bus *pci_bus_get(struct pci_bus *bus)
{
if (bus)
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index 10f2d0bb86be..2ce2a3bd932b 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -972,7 +972,7 @@ static int qcom_pcie_enable_aspm(struct pci_dev *pdev, void *userdata)
* Downstream devices need to be in D0 state before enabling PCI PM
* substates.
*/
- pci_set_power_state(pdev, PCI_D0);
+ pci_set_power_state_locked(pdev, PCI_D0);
pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL);
return 0;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d8f11a078924..9ab9b1008d8b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1354,6 +1354,7 @@ int pci_power_up(struct pci_dev *dev)
/**
* pci_set_full_power_state - Put a PCI device into D0 and update its state
* @dev: PCI device to power up
+ * @locked: whether pci_bus_sem is held
*
* Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register
* to confirm the state change, restore its BARs if they might be lost and
@@ -1363,7 +1364,7 @@ int pci_power_up(struct pci_dev *dev)
* to D0, it is more efficient to use pci_power_up() directly instead of this
* function.
*/
-static int pci_set_full_power_state(struct pci_dev *dev)
+static int pci_set_full_power_state(struct pci_dev *dev, bool locked)
{
u16 pmcsr;
int ret;
@@ -1399,7 +1400,7 @@ static int pci_set_full_power_state(struct pci_dev *dev)
}
if (dev->bus->self)
- pcie_aspm_pm_state_change(dev->bus->self);
+ pcie_aspm_pm_state_change(dev->bus->self, locked);
return 0;
}
@@ -1428,10 +1429,22 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
pci_walk_bus(bus, __pci_dev_set_current_state, &state);
}
+static void __pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state, bool locked)
+{
+ if (!bus)
+ return;
+
+ if (locked)
+ pci_walk_bus_locked(bus, __pci_dev_set_current_state, &state);
+ else
+ pci_walk_bus(bus, __pci_dev_set_current_state, &state);
+}
+
/**
* pci_set_low_power_state - Put a PCI device into a low-power state.
* @dev: PCI device to handle.
* @state: PCI power state (D1, D2, D3hot) to put the device into.
+ * @locked: whether pci_bus_sem is held
*
* Use the device's PCI_PM_CTRL register to put it into a low-power state.
*
@@ -1442,7 +1455,7 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
* 0 if device already is in the requested state.
* 0 if device's power state has been successfully changed.
*/
-static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state)
+static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
{
u16 pmcsr;
@@ -1496,29 +1509,12 @@ static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state)
pci_power_name(state));
if (dev->bus->self)
- pcie_aspm_pm_state_change(dev->bus->self);
+ pcie_aspm_pm_state_change(dev->bus->self, locked);
return 0;
}
-/**
- * pci_set_power_state - Set the power state of a PCI device
- * @dev: PCI device to handle.
- * @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
- *
- * Transition a device to a new power state, using the platform firmware and/or
- * the device's PCI PM registers.
- *
- * RETURN VALUE:
- * -EINVAL if the requested state is invalid.
- * -EIO if device does not support PCI PM or its PM capabilities register has a
- * wrong version, or device doesn't support the requested state.
- * 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
- * 0 if device already is in the requested state.
- * 0 if the transition is to D3 but D3 is not supported.
- * 0 if device's power state has been successfully changed.
- */
-int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
+static int __pci_set_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
{
int error;
@@ -1542,7 +1538,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
return 0;
if (state == PCI_D0)
- return pci_set_full_power_state(dev);
+ return pci_set_full_power_state(dev, locked);
/*
* This device is quirked not to be put into D3, so don't put it in
@@ -1556,16 +1552,16 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
* To put the device in D3cold, put it into D3hot in the native
* way, then put it into D3cold using platform ops.
*/
- error = pci_set_low_power_state(dev, PCI_D3hot);
+ error = pci_set_low_power_state(dev, PCI_D3hot, locked);
if (pci_platform_power_transition(dev, PCI_D3cold))
return error;
/* Powering off a bridge may power off the whole hierarchy */
if (dev->current_state == PCI_D3cold)
- pci_bus_set_current_state(dev->subordinate, PCI_D3cold);
+ __pci_bus_set_current_state(dev->subordinate, PCI_D3cold, locked);
} else {
- error = pci_set_low_power_state(dev, state);
+ error = pci_set_low_power_state(dev, state, locked);
if (pci_platform_power_transition(dev, state))
return error;
@@ -1573,8 +1569,38 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
return 0;
}
+
+/**
+ * pci_set_power_state - Set the power state of a PCI device
+ * @dev: PCI device to handle.
+ * @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
+ *
+ * Transition a device to a new power state, using the platform firmware and/or
+ * the device's PCI PM registers.
+ *
+ * RETURN VALUE:
+ * -EINVAL if the requested state is invalid.
+ * -EIO if device does not support PCI PM or its PM capabilities register has a
+ * wrong version, or device doesn't support the requested state.
+ * 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
+ * 0 if device already is in the requested state.
+ * 0 if the transition is to D3 but D3 is not supported.
+ * 0 if device's power state has been successfully changed.
+ */
+int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
+{
+ return __pci_set_power_state(dev, state, false);
+}
EXPORT_SYMBOL(pci_set_power_state);
+int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
+{
+ lockdep_assert_held(&pci_bus_sem);
+
+ return __pci_set_power_state(dev, state, true);
+}
+EXPORT_SYMBOL(pci_set_power_state_locked);
+
#define PCI_EXP_SAVE_REGS 7
static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev,
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 2336a8d1edab..e9750b1b19ba 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -571,12 +571,12 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt);
#ifdef CONFIG_PCIEASPM
void pcie_aspm_init_link_state(struct pci_dev *pdev);
void pcie_aspm_exit_link_state(struct pci_dev *pdev);
-void pcie_aspm_pm_state_change(struct pci_dev *pdev);
+void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked);
void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
#else
static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
-static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { }
+static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { }
static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
#endif
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index 5a0066ecc3c5..bc0bd86695ec 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -1003,8 +1003,11 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
up_read(&pci_bus_sem);
}
-/* @pdev: the root port or switch downstream port */
-void pcie_aspm_pm_state_change(struct pci_dev *pdev)
+/*
+ * @pdev: the root port or switch downstream port
+ * @locked: whether pci_bus_sem is held
+ */
+void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked)
{
struct pcie_link_state *link = pdev->link_state;
@@ -1014,12 +1017,14 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev)
* Devices changed PM state, we should recheck if latency
* meets all functions' requirement
*/
- down_read(&pci_bus_sem);
+ if (!locked)
+ down_read(&pci_bus_sem);
mutex_lock(&aspm_lock);
pcie_update_aspm_capable(link->root);
pcie_config_aspm_path(link);
mutex_unlock(&aspm_lock);
- up_read(&pci_bus_sem);
+ if (!locked)
+ up_read(&pci_bus_sem);
}
void pcie_aspm_powersave_config_link(struct pci_dev *pdev)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index add9368e6314..7ab0d13672da 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1422,6 +1422,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev,
struct pci_saved_state **state);
int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state);
int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
+int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state);
pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
void pci_pme_active(struct pci_dev *dev, bool enable);
@@ -1625,6 +1626,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
void *userdata);
+void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
+ void *userdata);
int pci_cfg_space_size(struct pci_dev *dev);
unsigned char pci_bus_max_busnr(struct pci_bus *bus);
void pci_setup_bridge(struct pci_bus *bus);
@@ -2025,6 +2028,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; }
static inline void pci_restore_state(struct pci_dev *dev) { }
static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
{ return 0; }
+static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
+{ return 0; }
static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable)
{ return 0; }
static inline pci_power_t pci_choose_state(struct pci_dev *dev,
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-4.19.y
git checkout FETCH_HEAD
git cherry-pick -x 7c4650ded49e5b88929ecbbb631efb8b0838e811
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021310-prelaunch-earmark-e533@gregkh' --subject-prefix 'PATCH 4.19.y' HEAD^..
Possible dependencies:
7c4650ded49e ("xhci: handle isoc Babble and Buffer Overrun events properly")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7c4650ded49e5b88929ecbbb631efb8b0838e811 Mon Sep 17 00:00:00 2001
From: Michal Pecio <michal.pecio(a)gmail.com>
Date: Thu, 25 Jan 2024 17:27:37 +0200
Subject: [PATCH] xhci: handle isoc Babble and Buffer Overrun events properly
xHCI 4.9 explicitly forbids assuming that the xHC has released its
ownership of a multi-TRB TD when it reports an error on one of the
early TRBs. Yet the driver makes such assumption and releases the TD,
allowing the remaining TRBs to be freed or overwritten by new TDs.
The xHC should also report completion of the final TRB due to its IOC
flag being set by us, regardless of prior errors. This event cannot
be recognized if the TD has already been freed earlier, resulting in
"Transfer event TRB DMA ptr not part of current TD" error message.
Fix this by reusing the logic for processing isoc Transaction Errors.
This also handles hosts which fail to report the final completion.
Fix transfer length reporting on Babble errors. They may be caused by
device malfunction, no guarantee that the buffer has been filled.
Signed-off-by: Michal Pecio <michal.pecio(a)gmail.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.inte…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 41be7d31a36e..f0d8a607ff21 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2394,9 +2394,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
case COMP_BANDWIDTH_OVERRUN_ERROR:
frame->status = -ECOMM;
break;
- case COMP_ISOCH_BUFFER_OVERRUN:
case COMP_BABBLE_DETECTED_ERROR:
+ sum_trbs_for_length = true;
+ fallthrough;
+ case COMP_ISOCH_BUFFER_OVERRUN:
frame->status = -EOVERFLOW;
+ if (ep_trb != td->last_trb)
+ td->error_mid_td = true;
break;
case COMP_INCOMPATIBLE_DEVICE_ERROR:
case COMP_STALL_ERROR:
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 7c4650ded49e5b88929ecbbb631efb8b0838e811
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021309-hypertext-gush-3da8@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
7c4650ded49e ("xhci: handle isoc Babble and Buffer Overrun events properly")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 7c4650ded49e5b88929ecbbb631efb8b0838e811 Mon Sep 17 00:00:00 2001
From: Michal Pecio <michal.pecio(a)gmail.com>
Date: Thu, 25 Jan 2024 17:27:37 +0200
Subject: [PATCH] xhci: handle isoc Babble and Buffer Overrun events properly
xHCI 4.9 explicitly forbids assuming that the xHC has released its
ownership of a multi-TRB TD when it reports an error on one of the
early TRBs. Yet the driver makes such assumption and releases the TD,
allowing the remaining TRBs to be freed or overwritten by new TDs.
The xHC should also report completion of the final TRB due to its IOC
flag being set by us, regardless of prior errors. This event cannot
be recognized if the TD has already been freed earlier, resulting in
"Transfer event TRB DMA ptr not part of current TD" error message.
Fix this by reusing the logic for processing isoc Transaction Errors.
This also handles hosts which fail to report the final completion.
Fix transfer length reporting on Babble errors. They may be caused by
device malfunction, no guarantee that the buffer has been filled.
Signed-off-by: Michal Pecio <michal.pecio(a)gmail.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Link: https://lore.kernel.org/r/20240125152737.2983959-5-mathias.nyman@linux.inte…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 41be7d31a36e..f0d8a607ff21 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2394,9 +2394,13 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
case COMP_BANDWIDTH_OVERRUN_ERROR:
frame->status = -ECOMM;
break;
- case COMP_ISOCH_BUFFER_OVERRUN:
case COMP_BABBLE_DETECTED_ERROR:
+ sum_trbs_for_length = true;
+ fallthrough;
+ case COMP_ISOCH_BUFFER_OVERRUN:
frame->status = -EOVERFLOW;
+ if (ep_trb != td->last_trb)
+ td->error_mid_td = true;
break;
case COMP_INCOMPATIBLE_DEVICE_ERROR:
case COMP_STALL_ERROR:
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 72bd80252feeb3bef8724230ee15d9f7ab541c6e
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024021340-playable-subsiding-a8db@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
72bd80252fee ("io_uring/net: fix sr->len for IORING_OP_RECV with MSG_WAITALL and buffers")
f9ead18c1058 ("io_uring: split network related opcodes into its own file")
e0da14def1ee ("io_uring: move statx handling to its own file")
a9c210cebe13 ("io_uring: move epoll handler to its own file")
4cf90495281b ("io_uring: add a dummy -EOPNOTSUPP prep handler")
99f15d8d6136 ("io_uring: move uring_cmd handling to its own file")
cd40cae29ef8 ("io_uring: split out open/close operations")
453b329be5ea ("io_uring: separate out file table handling code")
f4c163dd7d4b ("io_uring: split out fadvise/madvise operations")
0d5847274037 ("io_uring: split out fs related sync/fallocate functions")
531113bbd5bf ("io_uring: split out splice related operations")
11aeb71406dd ("io_uring: split out filesystem related operations")
e28683bdfc2f ("io_uring: move nop into its own file")
5e2a18d93fec ("io_uring: move xattr related opcodes to its own file")
97b388d70b53 ("io_uring: handle completions in the core")
de23077eda61 ("io_uring: set completion results upfront")
e27f928ee1cb ("io_uring: add io_uring_types.h")
4d4c9cff4f70 ("io_uring: define a request type cleanup handler")
890968dc0336 ("io_uring: unify struct io_symlink and io_hardlink")
9a3a11f977f9 ("io_uring: convert iouring_cmd to io_cmd_type")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 72bd80252feeb3bef8724230ee15d9f7ab541c6e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe(a)kernel.dk>
Date: Thu, 1 Feb 2024 06:42:36 -0700
Subject: [PATCH] io_uring/net: fix sr->len for IORING_OP_RECV with MSG_WAITALL
and buffers
If we use IORING_OP_RECV with provided buffers and pass in '0' as the
length of the request, the length is retrieved from the selected buffer.
If MSG_WAITALL is also set and we get a short receive, then we may hit
the retry path which decrements sr->len and increments the buffer for
a retry. However, the length is still zero at this point, which means
that sr->len now becomes huge and import_ubuf() will cap it to
MAX_RW_COUNT and subsequently return -EFAULT for the range as a whole.
Fix this by always assigning sr->len once the buffer has been selected.
Cc: stable(a)vger.kernel.org
Fixes: 7ba89d2af17a ("io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly")
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
diff --git a/io_uring/net.c b/io_uring/net.c
index a12ff69e6843..43bc9a5f96f9 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -923,6 +923,7 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
if (!buf)
return -ENOBUFS;
sr->buf = buf;
+ sr->len = len;
}
ret = import_ubuf(ITER_DEST, sr->buf, len, &msg.msg_iter);
Hello,
* update to 5.10.197 included upstream changes from 6.x kernel.
* in particular this:
* https://lore.kernel.org/all/20230917191101.257176910@linuxfoundation.org/
* this isn't an issue with the upstream state in the 6.x kernel as
the problems this change might cause are alleviated by this commit
that altered the way the iucv_if symbol was loaded:
* https://github.com/torvalds/linux/commit/4eb9eda6ba64114d98827e2870e024d5ab…
In the current state the symbol is not loaded correctly and the
feature is not working properly.
This issue hasn't been resolved still and is in a broken state up to
the current 5.10.x version. Is it possible to include this fix in the
next 5.10.x version?
Sincerely,
Leonardo Martinho
Hello Sasha,
kernel v6.1.77 introduces a regression, with a boot failure, on i.MX7,
with commit db30f469ae8b ("ARM: dts: imx7s: Fix nand-controller #size-cells").
The issue is known [1][2], changing `#size-cells = <0>` is formally
correct, but do not play well with the firmware that are deployed on
those embedded devices, leading to a boot failure.
A mitigation was implemented in the Linux kernel,
commit 84549c816dc3 ("mtd: parsers: ofpart: add workaround for #size-cells 0")
that was merged into v6.3, the firmware was also fixed, however existing
device using old firmware will not boot anymore if updating to a newer
kernel.
I would ask you to drop such a patch from any stable patches queue and
not backport it to any older kernel.
To fix v6.1.y I see two options:
- backport 84549c816dc3
- revert db30f469ae8b
What do you prefer? Should I send myself a patch?
[1] https://lore.kernel.org/all/Y4dgBTGNWpM6SQXI@francesco-nb.int.toradex.com/
[2] https://lore.kernel.org/all/20221202071900.1143950-1-francesco@dolcini.it/
Francesco
From: Tatsunosuke Tobita <tatsunosuke.tobita(a)wacom.com>
The xf86-input-wacom driver does not treat '0' as a valid serial
number and will drop any input report which contains an
MSC_SERIAL = 0 event. The kernel driver already takes care to
avoid sending any MSC_SERIAL event if the value of serial[0] == 0
(which is the case for devices that don't actually report a
serial number), but this is not quite sufficient.
Only the lower 32 bits of the serial get reported to userspace,
so if this portion of the serial is zero then there can still
be problems.
This commit allows the driver to report either the lower 32 bits
if they are non-zero or the upper 32 bits otherwise.
Signed-off-by: Jason Gerecke <jason.gerecke(a)wacom.com>
Signed-off-by: Tatsunosuke Tobita <tatsunosuke.tobita(a)wacom.com>
Fixes: f85c9dc678a5 ("HID: wacom: generic: Support tool ID and additional tool types")
CC: stable(a)vger.kernel.org # v4.10
---
drivers/hid/wacom_wac.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index da8a01fedd39..fbe10fbc5769 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2575,7 +2575,14 @@ static void wacom_wac_pen_report(struct hid_device *hdev,
wacom_wac->hid_data.tipswitch);
input_report_key(input, wacom_wac->tool[0], sense);
if (wacom_wac->serial[0]) {
- input_event(input, EV_MSC, MSC_SERIAL, wacom_wac->serial[0]);
+ /*
+ * xf86-input-wacom does not accept a serial number
+ * of '0'. Report the low 32 bits if possible, but
+ * if they are zero, report the upper ones instead.
+ */
+ __u32 serial_lo = wacom_wac->serial[0] & 0xFFFFFFFFu;
+ __u32 serial_hi = wacom_wac->serial[0] >> 32;
+ input_event(input, EV_MSC, MSC_SERIAL, (int)(serial_lo ? serial_lo : serial_hi));
input_report_abs(input, ABS_MISC, sense ? id : 0);
}
--
2.34.1
While refactoring the way the ITSs are probed, the handling of
quirks applicable to ACPI-based platforms was lost. As a result,
systems such as HIP07 lose their GICv4 functionnality, and some
other may even fail to boot, unless they are configured to boot
with DT.
Move the enabling of quirks into its_probe_one(), making it
common to all firmware implementations.
Fixes: 9585a495ac93 ("irqchip/gic-v3-its: Split allocation from initialisation of its_node")
Reviewed-by: Lorenzo Pieralisi <lpieralisi(a)kernel.org>
Reviewed-by: Zenghui Yu <yuzenghui(a)huawei.com>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
drivers/irqchip/irq-gic-v3-its.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index fec1b58470df..250b4562f308 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -5091,6 +5091,8 @@ static int __init its_probe_one(struct its_node *its)
u32 ctlr;
int err;
+ its_enable_quirks(its);
+
if (is_v4(its)) {
if (!(its->typer & GITS_TYPER_VMOVP)) {
err = its_compute_its_list_map(its);
@@ -5442,7 +5444,6 @@ static int __init its_of_probe(struct device_node *node)
if (!its)
return -ENOMEM;
- its_enable_quirks(its);
err = its_probe_one(its);
if (err) {
its_node_destroy(its);
--
2.39.2
When updating the affinity of a VPE, we currently skip the VMOVP
command if the two CPUs are part of the same VPE affinity.
But this is wrong, as the doorbell corresponding to this VPE
is still delivered on the 'old' CPU, which screws up the balancing.
Furthermore, offlining that 'old' CPU results in doorbell interrupts
generated for this VPE being discarded.
The harsh reality is that we cannot easily elide VMOVP when
a set_affinity request occurs. It needs to be obeyed, and if
an optimisation is to be made, it is at the point where the affinity
change request is made (such as in KVM).
Drop the VMOVP elision altogether, and only use the vpe_table_mask
to try and stay within the same ITS affinity group if at all possible.
Fixes: dd3f050a216e (irqchip/gic-v4.1: Implement the v4.1 flavour of VMOVP)
Reported-by: Kunkun Jiang <jiangkunkun(a)huawei.com>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
drivers/irqchip/irq-gic-v3-its.c | 22 +++++++++++++---------
1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 250b4562f308..53abd4779914 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -3826,8 +3826,9 @@ static int its_vpe_set_affinity(struct irq_data *d,
bool force)
{
struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
- int from, cpu = cpumask_first(mask_val);
+ struct cpumask common, *table_mask;
unsigned long flags;
+ int from, cpu;
/*
* Changing affinity is mega expensive, so let's be as lazy as
@@ -3843,19 +3844,22 @@ static int its_vpe_set_affinity(struct irq_data *d,
* taken on any vLPI handling path that evaluates vpe->col_idx.
*/
from = vpe_to_cpuid_lock(vpe, &flags);
- if (from == cpu)
- goto out;
-
- vpe->col_idx = cpu;
+ table_mask = gic_data_rdist_cpu(from)->vpe_table_mask;
/*
- * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD
- * is sharing its VPE table with the current one.
+ * If we are offered another CPU in the same GICv4.1 ITS
+ * affinity, pick this one. Otherwise, any CPU will do.
*/
- if (gic_data_rdist_cpu(cpu)->vpe_table_mask &&
- cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask))
+ if (table_mask && cpumask_and(&common, mask_val, table_mask))
+ cpu = cpumask_test_cpu(from, &common) ? from : cpumask_first(&common);
+ else
+ cpu = cpumask_first(mask_val);
+
+ if (from == cpu)
goto out;
+ vpe->col_idx = cpu;
+
its_send_vmovp(vpe);
its_vpe_db_proxy_move(vpe, from, cpu);
--
2.39.2
Syzkaller reports warning in ext4_set_page_dirty() in 5.10 and 5.15
stable releases. It happens because invalidate_inode_page() frees pages
that are needed for the system. To fix this we need to add additional
checks to the function. page_mapped() checks if a page exists in the
page tables, but this is not enough. The page can be used in other places:
https://elixir.bootlin.com/linux/v6.8-rc1/source/include/linux/page_ref.h#L…
Kernel outputs an error line related to direct I/O:
https://syzkaller.appspot.com/text?tag=CrashLog&x=14ab52dac80000
The problem can be fixed in 5.10 and 5.15 stable releases by the
following patch.
The patch replaces page_mapped() call with check that finds additional
references to the page excluding page cache and filesystem private data.
If additional references exist, the page cannot be freed.
This version does not include the first patch from the first version.
The problem can be fixed without it.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Link: https://syzkaller.appspot.com/bug?extid=02f21431b65c214aa1d6
Matthew Wilcox (Oracle) (1):
mm/truncate: Replace page_mapped() call in invalidate_inode_page()
mm/truncate.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
--
2.34.1
shutdown_pirq and startup_pirq are not taking the
irq_mapping_update_lock because they can't due to lock inversion. Both
are called with the irq_desc->lock being taking. The lock order,
however, is first irq_mapping_update_lock and then irq_desc->lock.
This opens multiple races:
- shutdown_pirq can be interrupted by a function that allocates an event
channel:
CPU0 CPU1
shutdown_pirq {
xen_evtchn_close(e)
__startup_pirq {
EVTCHNOP_bind_pirq
-> returns just freed evtchn e
set_evtchn_to_irq(e, irq)
}
xen_irq_info_cleanup() {
set_evtchn_to_irq(e, -1)
}
}
Assume here event channel e refers here to the same event channel
number.
After this race the evtchn_to_irq mapping for e is invalid (-1).
- __startup_pirq races with __unbind_from_irq in a similar way. Because
__startup_pirq doesn't take irq_mapping_update_lock it can grab the
evtchn that __unbind_from_irq is currently freeing and cleaning up. In
this case even though the event channel is allocated, its mapping can
be unset in evtchn_to_irq.
The fix is to first cleanup the mappings and then close the event
channel. In this way, when an event channel gets allocated it's
potential previous evtchn_to_irq mappings are guaranteed to be unset already.
This is also the reverse order of the allocation where first the event
channel is allocated and then the mappings are setup.
On a 5.10 kernel prior to commit 3fcdaf3d7634 ("xen/events: modify internal
[un]bind interfaces"), we hit a BUG like the following during probing of NVMe
devices. The issue is that during nvme_setup_io_queues, pci_free_irq
is called for every device which results in a call to shutdown_pirq.
With many nvme devices it's therefore likely to hit this race during
boot because there will be multiple calls to shutdown_pirq and
startup_pirq are running potentially in parallel.
------------[ cut here ]------------
blkfront: xvda: barrier or flush: disabled; persistent grants: enabled; indirect descriptors: enabled; bounce buffer: enabled
kernel BUG at drivers/xen/events/events_base.c:499!
invalid opcode: 0000 [#1] SMP PTI
CPU: 44 PID: 375 Comm: kworker/u257:23 Not tainted 5.10.201-191.748.amzn2.x86_64 #1
Hardware name: Xen HVM domU, BIOS 4.11.amazon 08/24/2006
Workqueue: nvme-reset-wq nvme_reset_work
RIP: 0010:bind_evtchn_to_cpu+0xdf/0xf0
Code: 5d 41 5e c3 cc cc cc cc 44 89 f7 e8 2b 55 ad ff 49 89 c5 48 85 c0 0f 84 64 ff ff ff 4c 8b 68 30 41 83 fe ff 0f 85 60 ff ff ff <0f> 0b 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00
RSP: 0000:ffffc9000d533b08 EFLAGS: 00010046
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000006
RDX: 0000000000000028 RSI: 00000000ffffffff RDI: 00000000ffffffff
RBP: ffff888107419680 R08: 0000000000000000 R09: ffffffff82d72b00
R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000001ed
R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000002
FS: 0000000000000000(0000) GS:ffff88bc8b500000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 0000000002610001 CR4: 00000000001706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
? show_trace_log_lvl+0x1c1/0x2d9
? show_trace_log_lvl+0x1c1/0x2d9
? set_affinity_irq+0xdc/0x1c0
? __die_body.cold+0x8/0xd
? die+0x2b/0x50
? do_trap+0x90/0x110
? bind_evtchn_to_cpu+0xdf/0xf0
? do_error_trap+0x65/0x80
? bind_evtchn_to_cpu+0xdf/0xf0
? exc_invalid_op+0x4e/0x70
? bind_evtchn_to_cpu+0xdf/0xf0
? asm_exc_invalid_op+0x12/0x20
? bind_evtchn_to_cpu+0xdf/0xf0
? bind_evtchn_to_cpu+0xc5/0xf0
set_affinity_irq+0xdc/0x1c0
irq_do_set_affinity+0x1d7/0x1f0
irq_setup_affinity+0xd6/0x1a0
irq_startup+0x8a/0xf0
__setup_irq+0x639/0x6d0
? nvme_suspend+0x150/0x150
request_threaded_irq+0x10c/0x180
? nvme_suspend+0x150/0x150
pci_request_irq+0xa8/0xf0
? __blk_mq_free_request+0x74/0xa0
queue_request_irq+0x6f/0x80
nvme_create_queue+0x1af/0x200
nvme_create_io_queues+0xbd/0xf0
nvme_setup_io_queues+0x246/0x320
? nvme_irq_check+0x30/0x30
nvme_reset_work+0x1c8/0x400
process_one_work+0x1b0/0x350
worker_thread+0x49/0x310
? process_one_work+0x350/0x350
kthread+0x11b/0x140
? __kthread_bind_mask+0x60/0x60
ret_from_fork+0x22/0x30
Modules linked in:
---[ end trace a11715de1eee1873 ]---
Fixes: d46a78b05c0e ("xen: implement pirq type event channels")
Cc: stable(a)vger.kernel.org
Co-debugged-by: Andrew Panyakin <apanyaki(a)amazon.com>
Signed-off-by: Maximilian Heyne <mheyne(a)amazon.de>
---
drivers/xen/events/events_base.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index b8cfea7812d6..3b9f080109d7 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -923,8 +923,8 @@ static void shutdown_pirq(struct irq_data *data)
return;
do_mask(info, EVT_MASK_REASON_EXPLICIT);
- xen_evtchn_close(evtchn);
xen_irq_info_cleanup(info);
+ xen_evtchn_close(evtchn);
}
static void enable_pirq(struct irq_data *data)
@@ -956,6 +956,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
{
evtchn_port_t evtchn;
+ bool close_evtchn = false;
if (!info) {
xen_irq_free_desc(irq);
@@ -975,7 +976,7 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
struct xenbus_device *dev;
if (!info->is_static)
- xen_evtchn_close(evtchn);
+ close_evtchn = true;
switch (info->type) {
case IRQT_VIRQ:
@@ -995,6 +996,9 @@ static void __unbind_from_irq(struct irq_info *info, unsigned int irq)
}
xen_irq_info_cleanup(info);
+
+ if (close_evtchn)
+ xen_evtchn_close(evtchn);
}
xen_free_irq(info);
--
2.40.1
Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
The following commit has been merged into the irq/urgent branch of tip:
Commit-ID: b0344d6854d25a8b3b901c778b1728885dd99007
Gitweb: https://git.kernel.org/tip/b0344d6854d25a8b3b901c778b1728885dd99007
Author: Doug Berger <opendmb(a)gmail.com>
AuthorDate: Fri, 09 Feb 2024 17:24:49 -08:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Tue, 13 Feb 2024 09:33:31 +01:00
irqchip/irq-brcmstb-l2: Add write memory barrier before exit
It was observed on Broadcom devices that use GIC v3 architecture L1
interrupt controllers as the parent of brcmstb-l2 interrupt controllers
that the deactivation of the parent interrupt could happen before the
brcmstb-l2 deasserted its output. This would lead the GIC to reactivate the
interrupt only to find that no L2 interrupt was pending. The result was a
spurious interrupt invoking handle_bad_irq() with its associated
messaging. While this did not create a functional problem it is a waste of
cycles.
The hazard exists because the memory mapped bus writes to the brcmstb-l2
registers are buffered and the GIC v3 architecture uses a very efficient
system register write to deactivate the interrupt.
Add a write memory barrier prior to invoking chained_irq_exit() to
introduce a dsb(st) on those systems to ensure the system register write
cannot be executed until the memory mapped writes are visible to the
system.
[ florian: Added Fixes tag ]
Fixes: 7f646e92766e ("irqchip: brcmstb-l2: Add Broadcom Set Top Box Level-2 interrupt controller")
Signed-off-by: Doug Berger <opendmb(a)gmail.com>
Signed-off-by: Florian Fainelli <florian.fainelli(a)broadcom.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Acked-by: Florian Fainelli <florian.fainelli(a)broadcom.com>
Acked-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240210012449.3009125-1-florian.fainelli@broadco…
---
drivers/irqchip/irq-brcmstb-l2.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c
index 5559c94..2b0b317 100644
--- a/drivers/irqchip/irq-brcmstb-l2.c
+++ b/drivers/irqchip/irq-brcmstb-l2.c
@@ -2,7 +2,7 @@
/*
* Generic Broadcom Set Top Box Level 2 Interrupt controller driver
*
- * Copyright (C) 2014-2017 Broadcom
+ * Copyright (C) 2014-2024 Broadcom
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -112,6 +112,9 @@ static void brcmstb_l2_intc_irq_handle(struct irq_desc *desc)
generic_handle_domain_irq(b->domain, irq);
} while (status);
out:
+ /* Don't ack parent before all device writes are done */
+ wmb();
+
chained_irq_exit(chip, desc);
}
[Cc: stable]
On Mon, 12 Feb 2024, Ulrich Hecht wrote:
>
> > On 02/08/2024 11:51 PM CET Finn Thain <fthain(a)linux-m68k.org> wrote:
> > Ulrich, I imagine that you would normally receive fixes via the
> > corresponding -stable trees. If Michael's series went into
> > stable/linux-4.19.y you could cherry-pick from there for your v4.4.y tree
> > and maybe avoid some merge conflicts that way.
>
> That would work for me.
>
OK. Here's the relevant commit. It fixes bd6f56a75bb2 which first appeared
in v2.6.38-rc1. I believe this can be cherry-picked without any conflicts.
50e43a573344 m68k: Update ->thread.esp0 before calling syscall_trace() in ret_from_signal
From: Zhang Rui <rui.zhang(a)intel.com>
[ Upstream commit 34cf8c657cf0365791cdc658ddbca9cc907726ce ]
Currently, coretemp driver supports only 128 cores per package.
This loses some core temperature information on systems that have more
than 128 cores per package.
[ 58.685033] coretemp coretemp.0: Adding Core 128 failed
[ 58.692009] coretemp coretemp.0: Adding Core 129 failed
...
Enlarge the limitation to 512 because there are platforms with more than
256 cores per package.
Signed-off-by: Zhang Rui <rui.zhang(a)intel.com>
Link: https://lore.kernel.org/r/20240202092144.71180-4-rui.zhang@intel.com
Signed-off-by: Guenter Roeck <linux(a)roeck-us.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/hwmon/coretemp.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 33371f7a4c0f..e08bb28ec427 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -53,7 +53,7 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
#define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */
#define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
-#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */
+#define NUM_REAL_CORES 512 /* Number of Real cores per cpu */
#define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */
#define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */
#define TOTAL_ATTRS (MAX_CORE_ATTRS + 1)
--
2.43.0
From: Zhang Rui <rui.zhang(a)intel.com>
[ Upstream commit 34cf8c657cf0365791cdc658ddbca9cc907726ce ]
Currently, coretemp driver supports only 128 cores per package.
This loses some core temperature information on systems that have more
than 128 cores per package.
[ 58.685033] coretemp coretemp.0: Adding Core 128 failed
[ 58.692009] coretemp coretemp.0: Adding Core 129 failed
...
Enlarge the limitation to 512 because there are platforms with more than
256 cores per package.
Signed-off-by: Zhang Rui <rui.zhang(a)intel.com>
Link: https://lore.kernel.org/r/20240202092144.71180-4-rui.zhang@intel.com
Signed-off-by: Guenter Roeck <linux(a)roeck-us.net>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/hwmon/coretemp.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 0eabad344961..b8d5087da65b 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -40,7 +40,7 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
#define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */
#define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */
-#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */
+#define NUM_REAL_CORES 512 /* Number of Real cores per cpu */
#define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */
#define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */
#define TOTAL_ATTRS (MAX_CORE_ATTRS + 1)
--
2.43.0
From: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
[ Upstream commit 22457c047ed971f2f2e33be593ddfabd9639a409 ]
Unfortunately reparse attribute is used for many purposes (several dozens).
It is not possible here to know is this name symlink or not.
To get exactly the type of name we should to open inode (read mft).
getattr for opened file (fstat) correctly returns symlink.
Signed-off-by: Konstantin Komarov <almaz.alexandrovich(a)paragon-software.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/ntfs3/dir.c | 30 +++++++++++++++++++++++++-----
1 file changed, 25 insertions(+), 5 deletions(-)
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c
index d4d9f4ffb6d9..c2fb76bb28f4 100644
--- a/fs/ntfs3/dir.c
+++ b/fs/ntfs3/dir.c
@@ -309,11 +309,31 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni,
return 0;
}
- /* NTFS: symlinks are "dir + reparse" or "file + reparse" */
- if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT)
- dt_type = DT_LNK;
- else
- dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG;
+ /*
+ * NTFS: symlinks are "dir + reparse" or "file + reparse"
+ * Unfortunately reparse attribute is used for many purposes (several dozens).
+ * It is not possible here to know is this name symlink or not.
+ * To get exactly the type of name we should to open inode (read mft).
+ * getattr for opened file (fstat) correctly returns symlink.
+ */
+ dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG;
+
+ /*
+ * It is not reliable to detect the type of name using duplicated information
+ * stored in parent directory.
+ * The only correct way to get the type of name - read MFT record and find ATTR_STD.
+ * The code below is not good idea.
+ * It does additional locks/reads just to get the type of name.
+ * Should we use additional mount option to enable branch below?
+ */
+ if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) &&
+ ino != ni->mi.rno) {
+ struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL);
+ if (!IS_ERR_OR_NULL(inode)) {
+ dt_type = fs_umode_to_dtype(inode->i_mode);
+ iput(inode);
+ }
+ }
return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type);
}
--
2.43.0
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
While looking at improving the saved_cmdlines cache I found a huge amount
of wasted memory that should be used for the cmdlines.
The tracing data saves pids during the trace. At sched switch, if a trace
occurred, it will save the comm of the task that did the trace. This is
saved in a "cache" that maps pids to comms and exposed to user space via
the /sys/kernel/tracing/saved_cmdlines file. Currently it only caches by
default 128 comms.
The structure that uses this creates an array to store the pids using
PID_MAX_DEFAULT (which is usually set to 32768). This causes the structure
to be of the size of 131104 bytes on 64 bit machines.
In hex: 131104 = 0x20020, and since the kernel allocates generic memory in
powers of two, the kernel would allocate 0x40000 or 262144 bytes to store
this structure. That leaves 131040 bytes of wasted space.
Worse, the structure points to an allocated array to store the comm names,
which is 16 bytes times the amount of names to save (currently 128), which
is 2048 bytes. Instead of allocating a separate array, make the structure
end with a variable length string and use the extra space for that.
This is similar to a recommendation that Linus had made about eventfs_inode names:
https://lore.kernel.org/all/20240130190355.11486-5-torvalds@linux-foundatio…
Instead of allocating a separate string array to hold the saved comms,
have the structure end with: char saved_cmdlines[]; and round up to the
next power of two over sizeof(struct saved_cmdline_buffers) + num_cmdlines * TASK_COMM_LEN
It will use this extra space for the saved_cmdline portion.
Now, instead of saving only 128 comms by default, by using this wasted
space at the end of the structure it can save over 8000 comms and even
saves space by removing the need for allocating the other array.
Cc: stable(a)vger.kernel.org
Fixes: 939c7a4f04fcd ("tracing: Introduce saved_cmdlines_size file")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/trace.c | 73 +++++++++++++++++++++-----------------------
1 file changed, 34 insertions(+), 39 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2a7c6fd934e9..0b3e60b827f7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2320,7 +2320,7 @@ struct saved_cmdlines_buffer {
unsigned *map_cmdline_to_pid;
unsigned cmdline_num;
int cmdline_idx;
- char *saved_cmdlines;
+ char saved_cmdlines[];
};
static struct saved_cmdlines_buffer *savedcmd;
@@ -2334,47 +2334,54 @@ static inline void set_cmdline(int idx, const char *cmdline)
strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
}
-static int allocate_cmdlines_buffer(unsigned int val,
- struct saved_cmdlines_buffer *s)
+static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
+{
+ int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
+
+ kfree(s->map_cmdline_to_pid);
+ free_pages((unsigned long)s, order);
+}
+
+static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
{
+ struct saved_cmdlines_buffer *s;
+ struct page *page;
+ int orig_size, size;
+ int order;
+
+ /* Figure out how much is needed to hold the given number of cmdlines */
+ orig_size = sizeof(*s) + val * TASK_COMM_LEN;
+ order = get_order(orig_size);
+ size = 1 << (order + PAGE_SHIFT);
+ page = alloc_pages(GFP_KERNEL, order);
+ if (!page)
+ return NULL;
+
+ s = page_address(page);
+ memset(s, 0, sizeof(*s));
+
+ /* Round up to actual allocation */
+ val = (size - sizeof(*s)) / TASK_COMM_LEN;
+ s->cmdline_num = val;
+
s->map_cmdline_to_pid = kmalloc_array(val,
sizeof(*s->map_cmdline_to_pid),
GFP_KERNEL);
- if (!s->map_cmdline_to_pid)
- return -ENOMEM;
-
- s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
- if (!s->saved_cmdlines) {
- kfree(s->map_cmdline_to_pid);
- return -ENOMEM;
- }
s->cmdline_idx = 0;
- s->cmdline_num = val;
memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
sizeof(s->map_pid_to_cmdline));
memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
val * sizeof(*s->map_cmdline_to_pid));
- return 0;
+ return s;
}
static int trace_create_savedcmd(void)
{
- int ret;
-
- savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
- if (!savedcmd)
- return -ENOMEM;
-
- ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
- if (ret < 0) {
- kfree(savedcmd);
- savedcmd = NULL;
- return -ENOMEM;
- }
+ savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
- return 0;
+ return savedcmd ? 0 : -ENOMEM;
}
int is_tracing_stopped(void)
@@ -6056,26 +6063,14 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
-static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
-{
- kfree(s->saved_cmdlines);
- kfree(s->map_cmdline_to_pid);
- kfree(s);
-}
-
static int tracing_resize_saved_cmdlines(unsigned int val)
{
struct saved_cmdlines_buffer *s, *savedcmd_temp;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = allocate_cmdlines_buffer(val);
if (!s)
return -ENOMEM;
- if (allocate_cmdlines_buffer(val, s) < 0) {
- kfree(s);
- return -ENOMEM;
- }
-
preempt_disable();
arch_spin_lock(&trace_cmdline_lock);
savedcmd_temp = savedcmd;
--
2.43.0
The quilt patch titled
Subject: fs/proc/task_mmu: add display flag for VM_MAYOVERLAY
has been removed from the -mm tree. Its filename was
fs-proc-task_mmu-add-display-flag-for-vm_mayoverlay.patch
This patch was dropped because it was withdrawn
------------------------------------------------------
From: Anshuman Khandual <anshuman.khandual(a)arm.com>
Subject: fs/proc/task_mmu: add display flag for VM_MAYOVERLAY
Date: Thu, 8 Feb 2024 14:18:05 +0530
VM_UFFD_MISSING flag is mutually exclussive with VM_MAYOVERLAY flag as
they both use the same bit position i.e 0x00000200 in the vm_flags. Let's
update show_smap_vma_flags() to display the correct flags depending on
CONFIG_MMU.
Link: https://lkml.kernel.org/r/20240208084805.1252337-1-anshuman.khandual@arm.com
Fixes: b6b7a8faf05c ("mm/nommu: don't use VM_MAYSHARE for MAP_PRIVATE mappings")
Signed-off-by: Anshuman Khandual <anshuman.khandual(a)arm.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/task_mmu.c | 4 ++++
1 file changed, 4 insertions(+)
--- a/fs/proc/task_mmu.c~fs-proc-task_mmu-add-display-flag-for-vm_mayoverlay
+++ a/fs/proc/task_mmu.c
@@ -681,7 +681,11 @@ static void show_smap_vma_flags(struct s
[ilog2(VM_HUGEPAGE)] = "hg",
[ilog2(VM_NOHUGEPAGE)] = "nh",
[ilog2(VM_MERGEABLE)] = "mg",
+#ifdef CONFIG_MMU
[ilog2(VM_UFFD_MISSING)]= "um",
+#else
+ [ilog2(VM_MAYOVERLAY)] = "ov",
+#endif /* CONFIG_MMU */
[ilog2(VM_UFFD_WP)] = "uw",
#ifdef CONFIG_ARM64_MTE
[ilog2(VM_MTE)] = "mt",
_
Patches currently in -mm which might be from anshuman.khandual(a)arm.com are
mm-memblock-add-memblock_rsrv_noinit-into-flagname-array.patch
mm-cma-dont-treat-bad-input-arguments-for-cma_alloc-as-its-failure.patch
mm-cma-drop-config_cma_debug.patch
mm-cma-make-max_cma_areas-=-config_cma_areas.patch
mm-cma-add-sysfs-file-release_pages_success.patch
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: d794734c9bbfe22f86686dc2909c25f5ffe1a572
Gitweb: https://git.kernel.org/tip/d794734c9bbfe22f86686dc2909c25f5ffe1a572
Author: Steve Wahl <steve.wahl(a)hpe.com>
AuthorDate: Fri, 26 Jan 2024 10:48:41 -06:00
Committer: Dave Hansen <dave.hansen(a)linux.intel.com>
CommitterDate: Mon, 12 Feb 2024 14:53:42 -08:00
x86/mm/ident_map: Use gbpages only where full GB page should be mapped.
When ident_pud_init() uses only gbpages to create identity maps, large
ranges of addresses not actually requested can be included in the
resulting table; a 4K request will map a full GB. On UV systems, this
ends up including regions that will cause hardware to halt the system
if accessed (these are marked "reserved" by BIOS). Even processor
speculation into these regions is enough to trigger the system halt.
Only use gbpages when map creation requests include the full GB page
of space. Fall back to using smaller 2M pages when only portions of a
GB page are included in the request.
No attempt is made to coalesce mapping requests. If a request requires
a map entry at the 2M (pmd) level, subsequent mapping requests within
the same 1G region will also be at the pmd level, even if adjacent or
overlapping such requests could have been combined to map a full
gbpage. Existing usage starts with larger regions and then adds
smaller regions, so this should not have any great consequence.
[ dhansen: fix up comment formatting, simplifty changelog ]
Signed-off-by: Steve Wahl <steve.wahl(a)hpe.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/all/20240126164841.170866-1-steve.wahl%40hpe.com
---
arch/x86/mm/ident_map.c | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 968d700..f50cc21 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
for (; addr < end; addr = next) {
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
+ bool use_gbpage;
next = (addr & PUD_MASK) + PUD_SIZE;
if (next > end)
next = end;
- if (info->direct_gbpages) {
- pud_t pudval;
+ /* if this is already a gbpage, this portion is already mapped */
+ if (pud_large(*pud))
+ continue;
+
+ /* Is using a gbpage allowed? */
+ use_gbpage = info->direct_gbpages;
- if (pud_present(*pud))
- continue;
+ /* Don't use gbpage if it maps more than the requested region. */
+ /* at the begining: */
+ use_gbpage &= ((addr & ~PUD_MASK) == 0);
+ /* ... or at the end: */
+ use_gbpage &= ((next & ~PUD_MASK) == 0);
+
+ /* Never overwrite existing mappings */
+ use_gbpage &= !pud_present(*pud);
+
+ if (use_gbpage) {
+ pud_t pudval;
- addr &= PUD_MASK;
pudval = __pud((addr - info->offset) | info->page_flag);
set_pud(pud, pudval);
continue;
The patch titled
Subject: kasan/test: avoid gcc warning for intentional overflow
has been added to the -mm mm-unstable branch. Its filename is
kasan-test-avoid-gcc-warning-for-intentional-overflow.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Arnd Bergmann <arnd(a)arndb.de>
Subject: kasan/test: avoid gcc warning for intentional overflow
Date: Mon, 12 Feb 2024 12:15:52 +0100
The out-of-bounds test allocates an object that is three bytes too short
in order to validate the bounds checking. Starting with gcc-14, this
causes a compile-time warning as gcc has grown smart enough to understand
the sizeof() logic:
mm/kasan/kasan_test.c: In function 'kmalloc_oob_16':
mm/kasan/kasan_test.c:443:14: error: allocation of insufficient size '13' for type 'struct <anonymous>' with size '16' [-Werror=alloc-size]
443 | ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL);
| ^
Hide the actual computation behind a RELOC_HIDE() that ensures
the compiler misses the intentional bug.
Link: https://lkml.kernel.org/r/20240212111609.869266-1-arnd@kernel.org
Fixes: 3f15801cdc23 ("lib: add kasan test module")
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Cc: Alexander Potapenko <glider(a)google.com>
Cc: Andrey Konovalov <andreyknvl(a)gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a(a)gmail.com>
Cc: Arnd Bergmann <arnd(a)arndb.de>
Cc: Dmitry Vyukov <dvyukov(a)google.com>
Cc: Marco Elver <elver(a)google.com>
Cc: Vincenzo Frascino <vincenzo.frascino(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/kasan/kasan_test.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
--- a/mm/kasan/kasan_test.c~kasan-test-avoid-gcc-warning-for-intentional-overflow
+++ a/mm/kasan/kasan_test.c
@@ -440,7 +440,8 @@ static void kmalloc_oob_16(struct kunit
/* This test is specifically crafted for the generic mode. */
KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
- ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL);
+ /* RELOC_HIDE to prevent gcc from warning about short alloc */
+ ptr1 = RELOC_HIDE(kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL), 0);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL);
_
Patches currently in -mm which might be from arnd(a)arndb.de are
mm-damon-dbgfs-implement-deprecation-notice-file-fix.patch
kasan-test-avoid-gcc-warning-for-intentional-overflow.patch
Limit the WiFi PCIe link speed to Gen2 speed (500 GB/s), which is the
speed that the boot firmware has brought up the link at (and that
Windows uses).
This is specifically needed to avoid a large amount of link errors when
restarting the link during boot (but which are currently not reported).
This may potentially also help with intermittent failures to download
the ath11k firmware during boot which can be seen when there is a
longer delay between restarting the link and loading the WiFi driver
(e.g. when using full disk encryption).
Fixes: 123b30a75623 ("arm64: dts: qcom: sc8280xp-x13s: enable WiFi controller")
Cc: stable(a)vger.kernel.org # 6.2
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
index 511d53d9c5a1..ff4b896b1bbf 100644
--- a/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
+++ b/arch/arm64/boot/dts/qcom/sc8280xp-lenovo-thinkpad-x13s.dts
@@ -863,6 +863,8 @@ &pcie3a_phy {
};
&pcie4 {
+ max-link-speed = <2>;
+
perst-gpios = <&tlmm 141 GPIO_ACTIVE_LOW>;
wake-gpios = <&tlmm 139 GPIO_ACTIVE_LOW>;
--
2.43.0
Originally io_cancel() only supported cancelling USB reads and writes.
If I/O was cancelled successfully, information about the cancelled I/O
operation was copied to the data structure the io_cancel() 'result'
argument points at. Commit 63b05203af57 ("[PATCH] AIO: retry
infrastructure fixes and enhancements") changed the io_cancel() behavior
from reporting status information via the 'result' argument into
reporting status information on the completion ring. Commit 41003a7bcfed
("aio: remove retry-based AIO") accidentally changed the behavior into
not reporting a completion event on the completion ring for cancelled
requests. This is a bug because successful cancellation leads to an iocb
leak in user space. Since this bug was introduced more than ten years
ago and since nobody has complained since then, remove support for I/O
cancellation. Keep support for cancellation of IOCB_CMD_POLL requests.
Calling kiocb_set_cancel_fn() without knowing whether the caller
submitted a struct kiocb or a struct aio_kiocb is unsafe. The
following call trace illustrates that without this patch an
out-of-bounds write happens if I/O is submitted by io_uring (from a
phone with an ARM CPU and kernel 6.1):
WARNING: CPU: 3 PID: 368 at fs/aio.c:598 kiocb_set_cancel_fn+0x9c/0xa8
Call trace:
kiocb_set_cancel_fn+0x9c/0xa8
ffs_epfile_read_iter+0x144/0x1d0
io_read+0x19c/0x498
io_issue_sqe+0x118/0x27c
io_submit_sqes+0x25c/0x5fc
__arm64_sys_io_uring_enter+0x104/0xab0
invoke_syscall+0x58/0x11c
el0_svc_common+0xb4/0xf4
do_el0_svc+0x2c/0xb0
el0_svc+0x2c/0xa4
el0t_64_sync_handler+0x68/0xb4
el0t_64_sync+0x1a4/0x1a8
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Avi Kivity <avi(a)scylladb.com>
Cc: Sandeep Dhavale <dhavale(a)google.com>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Fixes: 63b05203af57 ("[PATCH] AIO: retry infrastructure fixes and enhancements")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
drivers/usb/gadget/function/f_fs.c | 25 -------------------
drivers/usb/gadget/legacy/inode.c | 20 ---------------
fs/aio.c | 39 +++++-------------------------
include/linux/aio.h | 9 -------
4 files changed, 6 insertions(+), 87 deletions(-)
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 6bff6cb93789..59789292f4f7 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1157,25 +1157,6 @@ ffs_epfile_open(struct inode *inode, struct file *file)
return stream_open(inode, file);
}
-static int ffs_aio_cancel(struct kiocb *kiocb)
-{
- struct ffs_io_data *io_data = kiocb->private;
- struct ffs_epfile *epfile = kiocb->ki_filp->private_data;
- unsigned long flags;
- int value;
-
- spin_lock_irqsave(&epfile->ffs->eps_lock, flags);
-
- if (io_data && io_data->ep && io_data->req)
- value = usb_ep_dequeue(io_data->ep, io_data->req);
- else
- value = -EINVAL;
-
- spin_unlock_irqrestore(&epfile->ffs->eps_lock, flags);
-
- return value;
-}
-
static ssize_t ffs_epfile_write_iter(struct kiocb *kiocb, struct iov_iter *from)
{
struct ffs_io_data io_data, *p = &io_data;
@@ -1198,9 +1179,6 @@ static ssize_t ffs_epfile_write_iter(struct kiocb *kiocb, struct iov_iter *from)
kiocb->private = p;
- if (p->aio)
- kiocb_set_cancel_fn(kiocb, ffs_aio_cancel);
-
res = ffs_epfile_io(kiocb->ki_filp, p);
if (res == -EIOCBQUEUED)
return res;
@@ -1242,9 +1220,6 @@ static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to)
kiocb->private = p;
- if (p->aio)
- kiocb_set_cancel_fn(kiocb, ffs_aio_cancel);
-
res = ffs_epfile_io(kiocb->ki_filp, p);
if (res == -EIOCBQUEUED)
return res;
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 03179b1880fd..99b7366d77af 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -446,25 +446,6 @@ struct kiocb_priv {
unsigned actual;
};
-static int ep_aio_cancel(struct kiocb *iocb)
-{
- struct kiocb_priv *priv = iocb->private;
- struct ep_data *epdata;
- int value;
-
- local_irq_disable();
- epdata = priv->epdata;
- // spin_lock(&epdata->dev->lock);
- if (likely(epdata && epdata->ep && priv->req))
- value = usb_ep_dequeue (epdata->ep, priv->req);
- else
- value = -EINVAL;
- // spin_unlock(&epdata->dev->lock);
- local_irq_enable();
-
- return value;
-}
-
static void ep_user_copy_worker(struct work_struct *work)
{
struct kiocb_priv *priv = container_of(work, struct kiocb_priv, work);
@@ -537,7 +518,6 @@ static ssize_t ep_aio(struct kiocb *iocb,
iocb->private = priv;
priv->iocb = iocb;
- kiocb_set_cancel_fn(iocb, ep_aio_cancel);
get_ep(epdata);
priv->epdata = epdata;
priv->actual = 0;
diff --git a/fs/aio.c b/fs/aio.c
index bb2ff48991f3..c20946d5fcf3 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -203,7 +203,7 @@ struct aio_kiocb {
};
struct kioctx *ki_ctx;
- kiocb_cancel_fn *ki_cancel;
+ int (*ki_cancel)(struct kiocb *);
struct io_event ki_res;
@@ -587,22 +587,6 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
-void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
-{
- struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
- struct kioctx *ctx = req->ki_ctx;
- unsigned long flags;
-
- if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
- return;
-
- spin_lock_irqsave(&ctx->ctx_lock, flags);
- list_add_tail(&req->ki_list, &ctx->active_reqs);
- req->ki_cancel = cancel;
- spin_unlock_irqrestore(&ctx->ctx_lock, flags);
-}
-EXPORT_SYMBOL(kiocb_set_cancel_fn);
-
/*
* free_ioctx() should be RCU delayed to synchronize against the RCU
* protected lookup_ioctx() and also needs process context to call
@@ -2158,13 +2142,11 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
#endif
/* sys_io_cancel:
- * Attempts to cancel an iocb previously passed to io_submit. If
- * the operation is successfully cancelled, the resulting event is
- * copied into the memory pointed to by result without being placed
- * into the completion queue and 0 is returned. May fail with
- * -EFAULT if any of the data structures pointed to are invalid.
- * May fail with -EINVAL if aio_context specified by ctx_id is
- * invalid. May fail with -EAGAIN if the iocb specified was not
+ * Attempts to cancel an IOCB_CMD_POLL iocb previously passed to
+ * io_submit(). If the operation is successfully cancelled 0 is returned.
+ * May fail with -EFAULT if any of the data structures pointed to are
+ * invalid. May fail with -EINVAL if aio_context specified by ctx_id is
+ * invalid. May fail with -EINPROGRESS if the iocb specified was not
* cancelled. Will fail with -ENOSYS if not implemented.
*/
SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
@@ -2196,15 +2178,6 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
}
spin_unlock_irq(&ctx->ctx_lock);
- if (!ret) {
- /*
- * The result argument is no longer used - the io_event is
- * always delivered via the ring buffer. -EINPROGRESS indicates
- * cancellation is progress:
- */
- ret = -EINPROGRESS;
- }
-
percpu_ref_put(&ctx->users);
return ret;
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 86892a4fe7c8..9aabca4a0eed 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -2,22 +2,13 @@
#ifndef __LINUX__AIO_H
#define __LINUX__AIO_H
-#include <linux/aio_abi.h>
-
-struct kioctx;
-struct kiocb;
struct mm_struct;
-typedef int (kiocb_cancel_fn)(struct kiocb *);
-
/* prototypes */
#ifdef CONFIG_AIO
extern void exit_aio(struct mm_struct *mm);
-void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
#else
static inline void exit_aio(struct mm_struct *mm) { }
-static inline void kiocb_set_cancel_fn(struct kiocb *req,
- kiocb_cancel_fn *cancel) { }
#endif /* CONFIG_AIO */
#endif /* __LINUX__AIO_H */
Patches 1-4 are fixes for issues found by Paolo while working on adding
TCP_NOTSENT_LOWAT support. The latter will need to track more states
under the msk data lock. Since the locking msk locking schema is already
quite complex, do a long awaited clean-up step by moving several
confusing lockless initialization under the relevant locks. Note that it
is unlikely a real race could happen even prior to such patches as the
MPTCP-level state machine implicitly ensures proper serialization of the
write accesses, even lacking explicit lock. But still, simplification is
welcome and this will help for the maintenance. This can be backported
up to v5.6.
Patch 5 is a fix for the userspace PM, not to add new local address
entries if the address is already in the list. This behaviour can be
seen since v5.19.
Patch 6 fixes an issue when Fastopen is used. The issue can happen since
v6.2. A previous fix has already been applied, but not taking care of
all cases according to syzbot.
Patch 7 updates Geliang's email address in the MAINTAINERS file.
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
---
Geliang Tang (2):
mptcp: check addrs list in userspace_pm_get_local_id
MAINTAINERS: update Geliang's email address
Paolo Abeni (5):
mptcp: drop the push_pending field
mptcp: fix rcv space initialization
mptcp: fix more tx path fields initialization
mptcp: corner case locking for rx path fields initialization
mptcp: really cope with fastopen race
.mailmap | 9 +++---
MAINTAINERS | 2 +-
net/mptcp/fastopen.c | 6 ++--
net/mptcp/options.c | 9 +++---
net/mptcp/pm_userspace.c | 13 ++++++++-
net/mptcp/protocol.c | 31 +++++++++++----------
net/mptcp/protocol.h | 16 ++++++-----
net/mptcp/subflow.c | 71 ++++++++++++++++++++++++++++++------------------
8 files changed, 95 insertions(+), 62 deletions(-)
---
base-commit: 335bac1daae3fd9070d0f9f34d7d7ba708729256
change-id: 20240202-upstream-net-20240202-locking-cleanup-misc-5f2ee79d8356
Best regards,
--
Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
IO and WQE buffers are allocated once per HW and can be reused later. If
WQE buffers allocation fails then the whole allocation is marked as failed
but already created IO array internal objects are not freed. hw->io is
freed but not nullified in that specific case - it may become a problem
later as efct_hw_setup_io() is supposed to be reusable for the same HW.
While at it, use kcalloc instead of kmalloc_array/memset-zero combination
and get rid of some needless NULL assignments: nullifying hw->io[i]
elements just before freeing hw->io is not really useful.
Found by Linux Verification Center (linuxtesting.org).
Fixes: 4df84e846624 ("scsi: elx: efct: Driver initialization routines")
Cc: stable(a)vger.kernel.org
Signed-off-by: Fedor Pchelkin <pchelkin(a)ispras.ru>
---
drivers/scsi/elx/efct/efct_hw.c | 14 ++++----------
1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/drivers/scsi/elx/efct/efct_hw.c b/drivers/scsi/elx/efct/efct_hw.c
index 5a5525054d71..e5486e6949f9 100644
--- a/drivers/scsi/elx/efct/efct_hw.c
+++ b/drivers/scsi/elx/efct/efct_hw.c
@@ -487,12 +487,10 @@ efct_hw_setup_io(struct efct_hw *hw)
struct efct *efct = hw->os;
if (!hw->io) {
- hw->io = kmalloc_array(hw->config.n_io, sizeof(io), GFP_KERNEL);
+ hw->io = kcalloc(hw->config.n_io, sizeof(io), GFP_KERNEL);
if (!hw->io)
return -ENOMEM;
- memset(hw->io, 0, hw->config.n_io * sizeof(io));
-
for (i = 0; i < hw->config.n_io; i++) {
hw->io[i] = kzalloc(sizeof(*io), GFP_KERNEL);
if (!hw->io[i])
@@ -502,10 +500,8 @@ efct_hw_setup_io(struct efct_hw *hw)
/* Create WQE buffs for IO */
hw->wqe_buffs = kzalloc((hw->config.n_io * hw->sli.wqe_size),
GFP_KERNEL);
- if (!hw->wqe_buffs) {
- kfree(hw->io);
- return -ENOMEM;
- }
+ if (!hw->wqe_buffs)
+ goto error;
} else {
/* re-use existing IOs, including SGLs */
@@ -586,10 +582,8 @@ efct_hw_setup_io(struct efct_hw *hw)
return 0;
error:
- for (i = 0; i < hw->config.n_io && hw->io[i]; i++) {
+ for (i = 0; i < hw->config.n_io && hw->io[i]; i++)
kfree(hw->io[i]);
- hw->io[i] = NULL;
- }
kfree(hw->io);
hw->io = NULL;
--
2.39.2
The driver uses regmap APIs so it should make sure they are available.
Fixes: c75f4bf6800b ("power: supply: Introduce MM8013 fuel gauge driver")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Thomas Weißschuh <linux(a)weissschuh.net>
---
drivers/power/supply/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig
index f21cb05815ec..3e31375491d5 100644
--- a/drivers/power/supply/Kconfig
+++ b/drivers/power/supply/Kconfig
@@ -978,6 +978,7 @@ config CHARGER_QCOM_SMB2
config FUEL_GAUGE_MM8013
tristate "Mitsumi MM8013 fuel gauge driver"
depends on I2C
+ select REGMAP_I2C
help
Say Y here to enable the Mitsumi MM8013 fuel gauge driver.
It enables the monitoring of many battery parameters, including
---
base-commit: 54be6c6c5ae8e0d93a6c4641cb7528eb0b6ba478
change-id: 20240204-mm8013-regmap-d8d18ada07c2
Best regards,
--
Thomas Weißschuh <linux(a)weissschuh.net>
Hi,
please consider applying the following patch to v5.15.y to fix
a build error seen with various test builds (m68k:allmodconfig,
powerpc:allmodconfig, powerpc:ppc32_allmodconfig, and
xtensa:allmodconfig).
b4909252da9b ("drivers: lkdtm: fix clang -Wformat warning")
Thanks,
Guenter
Correction: The subject line in my previous message erroneously stated
"5.10.y" in patch 2/3 and 3/3, instead of the correct "5.15.y." Sending
again after correction.
Here are the three backported patches aimed at addressing a potential
crash and an actual crash.
Patch 1 Fix potential OOB access in receive_encrypted_standard() if
server returned a large shdr->NextCommand in cifs.
Patch 2 fix validate offsets and lengths before dereferencing create
contexts in smb2_parse_contexts().
Patch 3 fix issue in patch 2.
The original patches were authored by Paulo Alcantara <pc(a)manguebit.com>.
Original Patches:
1. eec04ea11969 ("smb: client: fix OOB in receive_encrypted_standard()")
2. af1689a9b770 ("smb: client: fix potential OOBs in smb2_parse_contexts()")
3. 76025cc2285d ("smb: client: fix parsing of SMB3.1.1 POSIX create context")
Please review and consider applying these patches.
https://lore.kernel.org/all/2023121834-semisoft-snarl-49ad@gregkh/
fs/cifs/smb2ops.c | 4 +++-
fs/cifs/smb2pdu.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------
fs/cifs/smb2proto.h | 12 +++++++-----
3 files changed, 66 insertions(+), 43 deletions(-)
This reverts commit 1e35f074399dece73d5df11847d4a0d7a6f49434.
Given that ERROR_RECOVERY calls into PORT_RESET for Hi-Zing
the CC pins, setting CC pins to default state during PORT_RESET
breaks error recovery.
4.5.2.2.2.1 ErrorRecovery State Requirements
The port shall not drive VBUS or VCONN, and shall present a
high-impedance to ground (above zOPEN) on its CC1 and CC2 pins.
Hi-Zing the CC pins is the inteded behavior for PORT_RESET.
CC pins are set to default state after tErrorRecovery in
PORT_RESET_WAIT_OFF.
4.5.2.2.2.2 Exiting From ErrorRecovery State
A Sink shall transition to Unattached.SNK after tErrorRecovery.
A Source shall transition to Unattached.SRC after tErrorRecovery.
Cc: stable(a)vger.kernel.org
Cc: Frank Wang <frank.wang(a)rock-chips.com>
Fixes: 1e35f074399d ("usb: typec: tcpm: fix cc role at port reset")
Signed-off-by: Badhri Jagan Sridharan <badhri(a)google.com>
---
drivers/usb/typec/tcpm/tcpm.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 5945e3a2b0f7..9d410718eaf4 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -4876,8 +4876,7 @@ static void run_state_machine(struct tcpm_port *port)
break;
case PORT_RESET:
tcpm_reset_port(port);
- tcpm_set_cc(port, tcpm_default_state(port) == SNK_UNATTACHED ?
- TYPEC_CC_RD : tcpm_rp_cc(port));
+ tcpm_set_cc(port, TYPEC_CC_OPEN);
tcpm_set_state(port, PORT_RESET_WAIT_OFF,
PD_T_ERROR_RECOVERY);
break;
base-commit: 933bb7b878ddd0f8c094db45551a7daddf806e00
--
2.43.0.429.g432eaa2c6b-goog
I'm relatively new here, first time reporting a regression, so apologies in
advance if I'm doing something wrong.
I'm using Arch Linux (linux-mainline kernel) on my chromebook Acer Spin 713-2W
(Voxel), and after upgrading linux-mainline from 6.7-rc4 to 6.7-rc5 the audio
setup isn't working anymore. Firstly I suspected its some changes in the sof-
firmware, yet got no luck in upgrading sof-firmware (2023.09.2 -> 2023.12).
On certain chromebooks, audio setup needs custom ALSA ucm confs [1], but after
contacting the chromebook-linux-audio developer [2], I think it's not a conf
problem, but rather a kernel regression.
After hours of bisecting, the first bad commit 31ed8da (ASoC: SOF: sof-audio:
Modify logic for enabling/disabling topology cores) ensures the regression.
demsg log pasted below:
// before 31ed8da, working
[ 61.572587] sof-audio-pci-intel-tgl 0000:00:1f.3: ipc tx error for
0x60010000 (msg/reply size: 108/20): -22
[ 61.572593] sof-audio-pci-intel-tgl 0000:00:1f.3: HW params ipc failed for
stream 1
[ 61.572594] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_hw_params on 0000:00:1f.3: -22
[ 61.573247] sof-audio-pci-intel-tgl 0000:00:1f.3: ipc tx error for
0x60010000 (msg/reply size: 108/20): -22
[ 61.573250] sof-audio-pci-intel-tgl 0000:00:1f.3: HW params ipc failed for
stream 1
[ 61.573251] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_hw_params on 0000:00:1f.3: -22
[ 61.573888] sof-audio-pci-intel-tgl 0000:00:1f.3: ipc tx error for
0x60010000 (msg/reply size: 108/20): -22
[ 61.573892] sof-audio-pci-intel-tgl 0000:00:1f.3: HW params ipc failed for
stream 1
[ 61.573893] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_hw_params on 0000:00:1f.3: -22
[ 61.574570] sof-audio-pci-intel-tgl 0000:00:1f.3: ipc tx error for
0x60010000 (msg/reply size: 108/20): -22
[ 61.574572] sof-audio-pci-intel-tgl 0000:00:1f.3: HW params ipc failed for
stream 1
[ 61.574573] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_hw_params on 0000:00:1f.3: -22
// after 31ed8da, broken
[ 48.930740] sof-audio-pci-intel-tgl 0000:00:1f.3: ipc tx error for
0x30020000 (msg/reply size: 12/0): -13
[ 48.930762] sof-audio-pci-intel-tgl 0000:00:1f.3: failed to free widget
DMIC0.IN
[ 57.235697] sof-audio-pci-intel-tgl 0000:00:1f.3: ipc tx error for
0x30030000 (msg/reply size: 16/0): -22
[ 57.235701] sof-audio-pci-intel-tgl 0000:00:1f.3: sof_ipc3_route_setup:
route DMIC0.IN -> BUF4.0 failed
[ 57.235703] sof-audio-pci-intel-tgl 0000:00:1f.3:
sof_ipc3_set_up_all_pipelines: route set up failed
[ 57.235704] sof-audio-pci-intel-tgl 0000:00:1f.3: Failed to restore
pipeline after resume -22
[ 57.235706] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_pm_runtime_get on 0000:00:1f.3: -22
[ 57.235926] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_pm_runtime_get on 0000:00:1f.3: -22
[ 57.235966] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_pm_runtime_get on 0000:00:1f.3: -22
[ 57.236006] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_pm_runtime_get on 0000:00:1f.3: -22
[ 57.236041] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_pm_runtime_get on 0000:00:1f.3: -22
[ 57.236074] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_pm_runtime_get on 0000:00:1f.3: -22
[ 57.236107] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_pm_runtime_get on 0000:00:1f.3: -22
[ 57.236141] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_pm_runtime_get on 0000:00:1f.3: -22
[ 57.236173] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_pm_runtime_get on 0000:00:1f.3: -22
[ 57.236205] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_pm_runtime_get on 0000:00:1f.3: -22
[ 57.236239] sof-audio-pci-intel-tgl 0000:00:1f.3: ASoC: error at
snd_soc_pcm_component_pm_runtime_get on 0000:00:1f.3: -22
Steps to reproduce:
- Run chromebook-linux-audio script [2] (this would install custom sof-rt5682
ucm confs [1] into /usr/share/alsa/ucm2/conf.d/sof-rt5682 and write a line
`options snd-intel-dspcfg dsp_driver=3` to /etc/modprobe.d/snd-sof.conf)
- Reboot, gets audio working on kernels before 31ed8da
- Install latest stable kernel (6.7.3), audio broken
- Install latest git kernel (6.8-rc2), audio broken
- Revert 31ed8da, build upon latest linux-git (6.8-rc2), audio working
Apologies for my busyiness that I should have reported it long before the
regression enters stable and lts, but sadly linux-lts (6.6.13) seems to be
affected too [3].
Please let me know if there's any other thing that I can help debugging.
-- Mole Shang
[1]: https://github.com/WeirdTreeThing/chromebook-ucm-conf/tree/main/sof-rt5682
[2]: https://github.com/WeirdTreeThing/chromebook-linux-audio/issues/70
[3]: https://github.com/WeirdTreeThing/chromebook-linux-audio/issues/
70#issuecomment-1911048309
#regzbot introduced: 31ed8da1c8e5e504710bb36863700e3389f8fc81
On 3/2/23 09:54, Qu Wenruo wrote:
> [BUG]
> During my scrub rework, I did a stupid thing like this:
>
> bio->bi_iter.bi_sector = stripe->logical;
> btrfs_submit_bio(fs_info, bio, stripe->mirror_num);
>
> Above bi_sector assignment is using logical address directly, which
> lacks ">> SECTOR_SHIFT".
>
> This results a read on a range which has no chunk mapping.
>
> This results the following crash:
>
> BTRFS critical (device dm-1): unable to find logical 11274289152 length 65536
> assertion failed: !IS_ERR(em), in fs/btrfs/volumes.c:6387
> ------------[ cut here ]------------
>
> Sure this is all my fault, but this shows a possible problem in real
> world, that some bitflip in file extents/tree block can point to
> unmapped ranges, and trigger above ASSERT(), or if CONFIG_BTRFS_ASSERT
> is not configured, cause invalid pointer.
>
> [PROBLEMS]
> In above call chain, we just don't handle the possible error from
> btrfs_get_chunk_map() inside __btrfs_map_block().
>
> [FIX]
> The fix is pretty straightforward, just replace the ASSERT() with proper
> error handling.
>
> Signed-off-by: Qu Wenruo <wqu(a)suse.com>
> ---
> Changelog:
> v2:
> - Rebased to latest misc-next
> The error path in bio.c is already fixed, thus only need to replace
> the ASSERT() in __btrfs_map_block().
> ---
> fs/btrfs/volumes.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index 4d479ac233a4..93bc45001e68 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -6242,7 +6242,8 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
> return -EINVAL;
>
> em = btrfs_get_chunk_map(fs_info, logical, *length);
> - ASSERT(!IS_ERR(em));
> + if (IS_ERR(em))
> + return PTR_ERR(em);
>
> map = em->map_lookup;
> data_stripes = nr_data_stripes(map);
This bug affects 6.1.y LTS branch but no backport commit of this fix in
6.1.y branch. Please include it. Thanks.
The patch titled
Subject: mm/memblock: add MEMBLOCK_RSRV_NOINIT into flagname[] array
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-memblock-add-memblock_rsrv_noinit-into-flagname-array.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Anshuman Khandual <anshuman.khandual(a)arm.com>
Subject: mm/memblock: add MEMBLOCK_RSRV_NOINIT into flagname[] array
Date: Fri, 9 Feb 2024 08:39:12 +0530
The commit 77e6c43e137c ("memblock: introduce MEMBLOCK_RSRV_NOINIT flag")
skipped adding this newly introduced memblock flag into flagname[] array,
thus preventing a correct memblock flags output for applicable memblock
regions.
Link: https://lkml.kernel.org/r/20240209030912.1382251-1-anshuman.khandual@arm.com
Fixes: 77e6c43e137c ("memblock: introduce MEMBLOCK_RSRV_NOINIT flag")
Signed-off-by: Anshuman Khandual <anshuman.khandual(a)arm.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memblock.c | 1 +
1 file changed, 1 insertion(+)
--- a/mm/memblock.c~mm-memblock-add-memblock_rsrv_noinit-into-flagname-array
+++ a/mm/memblock.c
@@ -2249,6 +2249,7 @@ static const char * const flagname[] = {
[ilog2(MEMBLOCK_MIRROR)] = "MIRROR",
[ilog2(MEMBLOCK_NOMAP)] = "NOMAP",
[ilog2(MEMBLOCK_DRIVER_MANAGED)] = "DRV_MNG",
+ [ilog2(MEMBLOCK_RSRV_NOINIT)] = "RSV_NIT",
};
static int memblock_debug_show(struct seq_file *m, void *private)
_
Patches currently in -mm which might be from anshuman.khandual(a)arm.com are
fs-proc-task_mmu-add-display-flag-for-vm_mayoverlay.patch
mm-memblock-add-memblock_rsrv_noinit-into-flagname-array.patch
mm-cma-dont-treat-bad-input-arguments-for-cma_alloc-as-its-failure.patch
mm-cma-drop-config_cma_debug.patch
mm-cma-make-max_cma_areas-=-config_cma_areas.patch
mm-cma-add-sysfs-file-release_pages_success.patch
Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.
The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.
Cc: <stable(a)vger.kernel.org> # 6.7+
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Tested-by: Mario Limonciello <mario.limonciello(a)amd.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20240207174456.341121-…
Acked-by: Christian König <christian.koenig(a)amd.com>
Reviewed-by: Matthew Auld <matthew.auld(a)intel.com>
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam(a)amd.com>
---
drivers/gpu/drm/drm_buddy.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
} while (1);
list_splice_tail(&allocated, blocks);
+
+ if (total_allocated < size) {
+ err = -ENOSPC;
+ goto err_free;
+ }
+
return 0;
err_undo:
--
2.25.1
Hello,
This series contains backports for 6.6 from the 6.7 release. This patchset
has gone through xfs testing and review.
Anthony Iliopoulos (1):
xfs: fix again select in kconfig XFS_ONLINE_SCRUB_STATS
Catherine Hoang (2):
MAINTAINERS: add Catherine as xfs maintainer for 6.6.y
xfs: allow read IO and FICLONE to run concurrently
Cheng Lin (1):
xfs: introduce protection for drop nlink
Christoph Hellwig (4):
xfs: handle nimaps=0 from xfs_bmapi_write in xfs_alloc_file_space
xfs: only remap the written blocks in xfs_reflink_end_cow_extent
xfs: clean up FS_XFLAG_REALTIME handling in xfs_ioctl_setattr_xflags
xfs: respect the stable writes flag on the RT device
Darrick J. Wong (8):
xfs: bump max fsgeom struct version
xfs: hoist freeing of rt data fork extent mappings
xfs: prevent rt growfs when quota is enabled
xfs: rt stubs should return negative errnos when rt disabled
xfs: fix units conversion error in xfs_bmap_del_extent_delay
xfs: make sure maxlen is still congruent with prod when rounding down
xfs: clean up dqblk extraction
xfs: dquot recovery does not validate the recovered dquot
Dave Chinner (1):
xfs: inode recovery does not validate the recovered inode
Leah Rumancik (1):
xfs: up(ic_sema) if flushing data device fails
Long Li (2):
xfs: factor out xfs_defer_pending_abort
xfs: abort intent items when recovery intents fail
Omar Sandoval (1):
xfs: fix internal error from AGFL exhaustion
MAINTAINERS | 1 +
fs/xfs/Kconfig | 2 +-
fs/xfs/libxfs/xfs_alloc.c | 27 ++++++++++++--
fs/xfs/libxfs/xfs_bmap.c | 21 +++--------
fs/xfs/libxfs/xfs_defer.c | 28 +++++++++------
fs/xfs/libxfs/xfs_defer.h | 2 +-
fs/xfs/libxfs/xfs_inode_buf.c | 3 ++
fs/xfs/libxfs/xfs_rtbitmap.c | 33 +++++++++++++++++
fs/xfs/libxfs/xfs_sb.h | 2 +-
fs/xfs/xfs_bmap_util.c | 24 +++++++------
fs/xfs/xfs_dquot.c | 5 +--
fs/xfs/xfs_dquot_item_recover.c | 21 +++++++++--
fs/xfs/xfs_file.c | 63 ++++++++++++++++++++++++++-------
fs/xfs/xfs_inode.c | 24 +++++++++++++
fs/xfs/xfs_inode.h | 17 +++++++++
fs/xfs/xfs_inode_item_recover.c | 14 +++++++-
fs/xfs/xfs_ioctl.c | 30 ++++++++++------
fs/xfs/xfs_iops.c | 7 ++++
fs/xfs/xfs_log.c | 23 ++++++------
fs/xfs/xfs_log_recover.c | 2 +-
fs/xfs/xfs_reflink.c | 5 +++
fs/xfs/xfs_rtalloc.c | 33 +++++++++++++----
fs/xfs/xfs_rtalloc.h | 27 ++++++++------
23 files changed, 312 insertions(+), 102 deletions(-)
--
2.39.3
When we are in a syscall we will only save the FPSIMD subset even though
the task still has access to the full register set, and on context switch
we will only remove TIF_SVE when loading the register state. This means
that the signal handling code should not assume that TIF_SVE means that
the register state is stored in SVE format, it should instead check the
format that was recorded during save.
Fixes: 8c845e273104 ("arm64/sve: Leave SVE enabled on syscall if we don't context switch")
Signed-off-by: Mark Brown <broonie(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
Changes in v2:
- Rebase onto v6.8-rc2.
- Link to v1: https://lore.kernel.org/r/20240119-arm64-sve-signal-regs-v1-1-b9fd61b0289a@…
---
arch/arm64/kernel/fpsimd.c | 2 +-
arch/arm64/kernel/signal.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index a5dc6f764195..25ceaee6b025 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1635,7 +1635,7 @@ void fpsimd_preserve_current_state(void)
void fpsimd_signal_preserve_current_state(void)
{
fpsimd_preserve_current_state();
- if (test_thread_flag(TIF_SVE))
+ if (current->thread.fp_type == FP_STATE_SVE)
sve_to_fpsimd(current);
}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 0e8beb3349ea..425b1bc17a3f 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -242,7 +242,7 @@ static int preserve_sve_context(struct sve_context __user *ctx)
vl = task_get_sme_vl(current);
vq = sve_vq_from_vl(vl);
flags |= SVE_SIG_FLAG_SM;
- } else if (test_thread_flag(TIF_SVE)) {
+ } else if (current->thread.fp_type == FP_STATE_SVE) {
vq = sve_vq_from_vl(vl);
}
@@ -878,7 +878,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user,
if (system_supports_sve() || system_supports_sme()) {
unsigned int vq = 0;
- if (add_all || test_thread_flag(TIF_SVE) ||
+ if (add_all || current->thread.fp_type == FP_STATE_SVE ||
thread_sm_enabled(¤t->thread)) {
int vl = max(sve_max_vl(), sme_max_vl());
---
base-commit: 41bccc98fb7931d63d03f326a746ac4d429c1dd3
change-id: 20240118-arm64-sve-signal-regs-5711e0d10425
Best regards,
--
Mark Brown <broonie(a)kernel.org>
Hi Valentin, hi all
[This is about a regression reported in Debian for 6.1.67]
On Tue, Feb 06, 2024 at 01:00:11PM +0100, Valentin Kleibel wrote:
> Package: linux-image-amd64
> Version: 6.1.76+1
> Source: linux
> Source-Version: 6.1.76+1
> Severity: important
> Control: notfound -1 6.6.15-2
>
> Dear Maintainers,
>
> We discovered a bug affecting dlm that prevents any tcp communications by
> dlm when booted with debian kernel 6.1.76-1.
>
> Dlm startup works (corosync-cpgtool shows the dlm:controld group with all
> expected nodes) but as soon as we try to add a lockspace dmesg shows:
> ```
> dlm: Using TCP for communications
> dlm: cannot start dlm midcomms -97
> ```
>
> It seems that commit "dlm: use kernel_connect() and kernel_bind()"
> (e9cdebbe) was merged to 6.1.
>
> Checking the code it seems that the changed function dlm_tcp_listen_bind()
> fails with exit code 97 (EAFNOSUPPORT)
> It is called from
>
> dlm/lockspace.c: threads_start() -> dlm_midcomms_start()
> dlm/midcomms.c: dlm_midcomms_start() -> dlm_lowcomms_start()
> dlm/lowcomms.c: dlm_lowcomms_start() -> dlm_listen_for_all() ->
> dlm_proto_ops->listen_bind() = dlm_tcp_listen_bind()
>
> The error code is returned all the way to threads_start() where the error
> message is emmitted.
>
> Booting with the unsigned kernel from testing (6.6.15-2), which also
> contains this commit, works without issues.
>
> I'm not sure what additional changes are required to get this working or if
> rolling back this change is an option.
>
> We'd be happy to test patches that might fix this issue.
Thanks for your report. So we have a 6.1.76 specific regression for
the backport of e9cdebbe23f1 ("dlm: use kernel_connect() and
kernel_bind()") .
Let's loop in the upstream regression list for tracking and people
involved for the subsystem to see if the issue can be identified. As
it is working for 6.6.15 which includes the commit backport as well it
might be very well that a prerequisite is missing.
# annotate regression with 6.1.y specific commit
#regzbot ^introduced e11dea8f503341507018b60906c4a9e7332f3663
#regzbot link: https://bugs.debian.org/1063338
Any ideas?
Regards,
Salvatore
Few users have observed display corruption when they boot
the machine to KDE Plasma or playing games. We have root
caused the problem that whenever alloc_range() couldn't
find the required memory blocks the function was returning
SUCCESS in some of the corner cases.
The right approach would be if the total allocated size
is less than the required size, the function should
return -ENOSPC.
Cc: <stable(a)vger.kernel.org> # 6.7+
Fixes: 0a1844bf0b53 ("drm/buddy: Improve contiguous memory allocation")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3097
Tested-by: Mario Limonciello <mario.limonciello(a)amd.com>
Link: https://patchwork.kernel.org/project/dri-devel/patch/20240207174456.341121-…
Reviewed-by: Matthew Auld <matthew.auld(a)intel.com>
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam(a)amd.com>
---
drivers/gpu/drm/drm_buddy.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c
index f57e6d74fb0e..c1a99bf4dffd 100644
--- a/drivers/gpu/drm/drm_buddy.c
+++ b/drivers/gpu/drm/drm_buddy.c
@@ -539,6 +539,12 @@ static int __alloc_range(struct drm_buddy *mm,
} while (1);
list_splice_tail(&allocated, blocks);
+
+ if (total_allocated < size) {
+ err = -ENOSPC;
+ goto err_free;
+ }
+
return 0;
err_undo:
--
2.25.1
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
While looking at improving the saved_cmdlines cache I found a huge amount
of wasted memory that should be used for the cmdlines.
The tracing data saves pids during the trace. At sched switch, if a trace
occurred, it will save the comm of the task that did the trace. This is
saved in a "cache" that maps pids to comms and exposed to user space via
the /sys/kernel/tracing/saved_cmdlines file. Currently it only caches by
default 128 comms.
The structure that uses this creates an array to store the pids using
PID_MAX_DEFAULT (which is usually set to 32768). This causes the structure
to be of the size of 131104 bytes on 64 bit machines.
In hex: 131104 = 0x20020, and since the kernel allocates generic memory in
powers of two, the kernel would allocate 0x40000 or 262144 bytes to store
this structure. That leaves 131040 bytes of wasted space.
Worse, the structure points to an allocated array to store the comm names,
which is 16 bytes times the amount of names to save (currently 128), which
is 2048 bytes. Instead of allocating a separate array, make the structure
end with a variable length string and use the extra space for that.
This is similar to a recommendation that Linus had made about eventfs_inode names:
https://lore.kernel.org/all/20240130190355.11486-5-torvalds@linux-foundatio…
Instead of allocating a separate string array to hold the saved comms,
have the structure end with: char saved_cmdlines[]; and round up to the
next power of two over sizeof(struct saved_cmdline_buffers) + num_cmdlines * TASK_COMM_LEN
It will use this extra space for the saved_cmdline portion.
Now, instead of saving only 128 comms by default, by using this wasted
space at the end of the structure it can save over 8000 comms and even
saves space by removing the need for allocating the other array.
Link: https://lore.kernel.org/linux-trace-kernel/20240208105328.7e73f71d@rorschac…
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Vincent Donnefort <vdonnefort(a)google.com>
Cc: Sven Schnelle <svens(a)linux.ibm.com>
Cc: Mete Durlu <meted(a)linux.ibm.com>
Fixes: 939c7a4f04fcd ("tracing: Introduce saved_cmdlines_size file")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/trace.c | 73 +++++++++++++++++++++-----------------------
1 file changed, 34 insertions(+), 39 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2a7c6fd934e9..ea6d13ff256c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2320,7 +2320,7 @@ struct saved_cmdlines_buffer {
unsigned *map_cmdline_to_pid;
unsigned cmdline_num;
int cmdline_idx;
- char *saved_cmdlines;
+ char saved_cmdlines[];
};
static struct saved_cmdlines_buffer *savedcmd;
@@ -2334,47 +2334,54 @@ static inline void set_cmdline(int idx, const char *cmdline)
strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
}
-static int allocate_cmdlines_buffer(unsigned int val,
- struct saved_cmdlines_buffer *s)
+static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
+{
+ int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
+
+ kfree(s->map_cmdline_to_pid);
+ free_pages((unsigned long)s, order);
+}
+
+static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
{
+ struct saved_cmdlines_buffer *s;
+ struct page *page;
+ int orig_size, size;
+ int order;
+
+ /* Figure out how much is needed to hold the given number of cmdlines */
+ orig_size = sizeof(*s) + val * TASK_COMM_LEN;
+ order = get_order(orig_size);
+ size = 1 << (order + PAGE_SHIFT);
+ page = alloc_pages(GFP_KERNEL, order);
+ if (!page)
+ return NULL;
+
+ s = page_address(page);
+ memset(s, 0, sizeof(*s));
+
+ /* Round up to actual allocation */
+ val = (size - sizeof(*s)) / TASK_COMM_LEN;
+ s->cmdline_num = val;
+
s->map_cmdline_to_pid = kmalloc_array(val,
sizeof(*s->map_cmdline_to_pid),
GFP_KERNEL);
- if (!s->map_cmdline_to_pid)
- return -ENOMEM;
-
- s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
- if (!s->saved_cmdlines) {
- kfree(s->map_cmdline_to_pid);
- return -ENOMEM;
- }
s->cmdline_idx = 0;
- s->cmdline_num = val;
memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
sizeof(s->map_pid_to_cmdline));
memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
val * sizeof(*s->map_cmdline_to_pid));
- return 0;
+ return s;
}
static int trace_create_savedcmd(void)
{
- int ret;
-
- savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
- if (!savedcmd)
- return -ENOMEM;
-
- ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
- if (ret < 0) {
- kfree(savedcmd);
- savedcmd = NULL;
- return -ENOMEM;
- }
+ savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
- return 0;
+ return savedcmd ? 0 : -ENOMEM;
}
int is_tracing_stopped(void)
@@ -6056,26 +6063,14 @@ tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
-static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
-{
- kfree(s->saved_cmdlines);
- kfree(s->map_cmdline_to_pid);
- kfree(s);
-}
-
static int tracing_resize_saved_cmdlines(unsigned int val)
{
struct saved_cmdlines_buffer *s, *savedcmd_temp;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = allocate_cmdlines_buffer(val);
if (!s)
return -ENOMEM;
- if (allocate_cmdlines_buffer(val, s) < 0) {
- kfree(s);
- return -ENOMEM;
- }
-
preempt_disable();
arch_spin_lock(&trace_cmdline_lock);
savedcmd_temp = savedcmd;
--
2.43.0
Currently, drivers have no mechanism to block requests to unbind
devices. However, this can cause resource leaks and leave the device in
an inconsistent state, such that rebinding the device may cause a hang
or otherwise prevent the device from being rebound. So, introduce
the can_remove() callback to allow drivers to indicate if it isn't
appropriate to remove a device at the given time.
Cc: stable(a)vger.kernel.org
Signed-off-by: Hamza Mahfooz <hamza.mahfooz(a)amd.com>
---
drivers/base/bus.c | 4 ++++
include/linux/device/bus.h | 2 ++
2 files changed, 6 insertions(+)
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index daee55c9b2d9..7c259b01ea99 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -239,6 +239,10 @@ static ssize_t unbind_store(struct device_driver *drv, const char *buf,
dev = bus_find_device_by_name(bus, NULL, buf);
if (dev && dev->driver == drv) {
+ if (dev->bus && dev->bus->can_remove &&
+ !dev->bus->can_remove(dev))
+ return -EBUSY;
+
device_driver_detach(dev);
err = count;
}
diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index 5ef4ec1c36c3..c9d4af0ed3b8 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -46,6 +46,7 @@ struct fwnode_handle;
* be called at late_initcall_sync level. If the device has
* consumers that are never bound to a driver, this function
* will never get called until they do.
+ * @can_remove: Called before attempting to remove a device from this bus.
* @remove: Called when a device removed from this bus.
* @shutdown: Called at shut-down time to quiesce the device.
*
@@ -85,6 +86,7 @@ struct bus_type {
int (*uevent)(const struct device *dev, struct kobj_uevent_env *env);
int (*probe)(struct device *dev);
void (*sync_state)(struct device *dev);
+ bool (*can_remove)(struct device *dev);
void (*remove)(struct device *dev);
void (*shutdown)(struct device *dev);
--
2.43.0
From: "Masami Hiramatsu (Google)" <mhiramat(a)kernel.org>
The commit 60c8971899f3 ("ftrace: Make DIRECT_CALLS work WITH_ARGS
and !WITH_REGS") changed DIRECT_CALLS to use SAVE_ARGS when there
are multiple ftrace_ops at the same function, but since the x86 only
support to jump to direct_call from ftrace_regs_caller, when we set
the function tracer on the same target function on x86, ftrace-direct
does not work as below (this actually works on arm64.)
At first, insmod ftrace-direct.ko to put a direct_call on
'wake_up_process()'.
# insmod kernel/samples/ftrace/ftrace-direct.ko
# less trace
...
<idle>-0 [006] ..s1. 564.686958: my_direct_func: waking up rcu_preempt-17
<idle>-0 [007] ..s1. 564.687836: my_direct_func: waking up kcompactd0-63
<idle>-0 [006] ..s1. 564.690926: my_direct_func: waking up rcu_preempt-17
<idle>-0 [006] ..s1. 564.696872: my_direct_func: waking up rcu_preempt-17
<idle>-0 [007] ..s1. 565.191982: my_direct_func: waking up kcompactd0-63
Setup a function filter to the 'wake_up_process' too, and enable it.
# cd /sys/kernel/tracing/
# echo wake_up_process > set_ftrace_filter
# echo function > current_tracer
# less trace
...
<idle>-0 [006] ..s3. 686.180972: wake_up_process <-call_timer_fn
<idle>-0 [006] ..s3. 686.186919: wake_up_process <-call_timer_fn
<idle>-0 [002] ..s3. 686.264049: wake_up_process <-call_timer_fn
<idle>-0 [002] d.h6. 686.515216: wake_up_process <-kick_pool
<idle>-0 [002] d.h6. 686.691386: wake_up_process <-kick_pool
Then, only function tracer is shown on x86.
But if you enable 'kprobe on ftrace' event (which uses SAVE_REGS flag)
on the same function, it is shown again.
# echo 'p wake_up_process' >> dynamic_events
# echo 1 > events/kprobes/p_wake_up_process_0/enable
# echo > trace
# less trace
...
<idle>-0 [006] ..s2. 2710.345919: p_wake_up_process_0: (wake_up_process+0x4/0x20)
<idle>-0 [006] ..s3. 2710.345923: wake_up_process <-call_timer_fn
<idle>-0 [006] ..s1. 2710.345928: my_direct_func: waking up rcu_preempt-17
<idle>-0 [006] ..s2. 2710.349931: p_wake_up_process_0: (wake_up_process+0x4/0x20)
<idle>-0 [006] ..s3. 2710.349934: wake_up_process <-call_timer_fn
<idle>-0 [006] ..s1. 2710.349937: my_direct_func: waking up rcu_preempt-17
To fix this issue, use SAVE_REGS flag for multiple ftrace_ops flag of
direct_call by default.
Link: https://lore.kernel.org/linux-trace-kernel/170484558617.178953.159051694939…
Fixes: 60c8971899f3 ("ftrace: Make DIRECT_CALLS work WITH_ARGS and !WITH_REGS")
Cc: stable(a)vger.kernel.org
Cc: Florent Revest <revest(a)chromium.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat(a)kernel.org>
Reviewed-by: Mark Rutland <mark.rutland(a)arm.com>
Tested-by: Mark Rutland <mark.rutland(a)arm.com> [arm64]
Acked-by: Jiri Olsa <jolsa(a)kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b01ae7d36021..c060d5b47910 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5325,7 +5325,17 @@ static LIST_HEAD(ftrace_direct_funcs);
static int register_ftrace_function_nolock(struct ftrace_ops *ops);
+/*
+ * If there are multiple ftrace_ops, use SAVE_REGS by default, so that direct
+ * call will be jumped from ftrace_regs_caller. Only if the architecture does
+ * not support ftrace_regs_caller but direct_call, use SAVE_ARGS so that it
+ * jumps from ftrace_caller for multiple ftrace_ops.
+ */
+#ifndef HAVE_DYNAMIC_FTRACE_WITH_REGS
#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_ARGS)
+#else
+#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS)
+#endif
static int check_direct_multi(struct ftrace_ops *ops)
{
--
2.43.0
From: Chengming Zhou <zhouchengming(a)bytedance.com>
We may encounter duplicate entry in the zswap_store():
1. swap slot that freed to per-cpu swap cache, doesn't invalidate
the zswap entry, then got reused. This has been fixed.
2. !exclusive load mode, swapin folio will leave its zswap entry
on the tree, then swapout again. This has been removed.
3. one folio can be dirtied again after zswap_store(), so need to
zswap_store() again. This should be handled correctly.
So we must invalidate the old duplicate entry before insert the
new one, which actually doesn't have to be done at the beginning
of zswap_store(). And this is a normal situation, we shouldn't
WARN_ON(1) in this case, so delete it. (The WARN_ON(1) seems want
to detect swap entry UAF problem? But not very necessary here.)
The good point is that we don't need to lock tree twice in the
store success path.
Note we still need to invalidate the old duplicate entry in the
store failure path, otherwise the new data in swapfile could be
overwrite by the old data in zswap pool when lru writeback.
We have to do this even when !zswap_enabled since zswap can be
disabled anytime. If the folio store success before, then got
dirtied again but zswap disabled, we won't invalidate the old
duplicate entry in the zswap_store(). So later lru writeback
may overwrite the new data in swapfile.
Fixes: 42c06a0e8ebe ("mm: kill frontswap")
Cc: <stable(a)vger.kernel.org>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Acked-by: Yosry Ahmed <yosryahmed(a)google.com>
Acked-by: Chris Li <chrisl(a)kernel.org>
Signed-off-by: Chengming Zhou <zhouchengming(a)bytedance.com>
---
v4:
- VM_WARN_ON generate no code when !CONFIG_DEBUG_VM, change
to use WARN_ON.
v3:
- Fix a few grammatical problems in comments, per Yosry.
v2:
- Change the duplicate entry invalidation loop to if, since we hold
the lock, we won't find it once we invalidate it, per Yosry.
- Add Fixes tag.
---
mm/zswap.c | 33 ++++++++++++++++-----------------
1 file changed, 16 insertions(+), 17 deletions(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index cd67f7f6b302..62fe307521c9 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1518,18 +1518,8 @@ bool zswap_store(struct folio *folio)
return false;
if (!zswap_enabled)
- return false;
+ goto check_old;
- /*
- * If this is a duplicate, it must be removed before attempting to store
- * it, otherwise, if the store fails the old page won't be removed from
- * the tree, and it might be written back overriding the new data.
- */
- spin_lock(&tree->lock);
- entry = zswap_rb_search(&tree->rbroot, offset);
- if (entry)
- zswap_invalidate_entry(tree, entry);
- spin_unlock(&tree->lock);
objcg = get_obj_cgroup_from_folio(folio);
if (objcg && !obj_cgroup_may_zswap(objcg)) {
memcg = get_mem_cgroup_from_objcg(objcg);
@@ -1608,14 +1598,12 @@ bool zswap_store(struct folio *folio)
/* map */
spin_lock(&tree->lock);
/*
- * A duplicate entry should have been removed at the beginning of this
- * function. Since the swap entry should be pinned, if a duplicate is
- * found again here it means that something went wrong in the swap
- * cache.
+ * The folio may have been dirtied again, invalidate the
+ * possibly stale entry before inserting the new entry.
*/
- while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) {
- WARN_ON(1);
+ if (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) {
zswap_invalidate_entry(tree, dupentry);
+ WARN_ON(zswap_rb_insert(&tree->rbroot, entry, &dupentry));
}
if (entry->length) {
INIT_LIST_HEAD(&entry->lru);
@@ -1638,6 +1626,17 @@ bool zswap_store(struct folio *folio)
reject:
if (objcg)
obj_cgroup_put(objcg);
+check_old:
+ /*
+ * If the zswap store fails or zswap is disabled, we must invalidate the
+ * possibly stale entry which was previously stored at this offset.
+ * Otherwise, writeback could overwrite the new data in the swapfile.
+ */
+ spin_lock(&tree->lock);
+ entry = zswap_rb_search(&tree->rbroot, offset);
+ if (entry)
+ zswap_invalidate_entry(tree, entry);
+ spin_unlock(&tree->lock);
return false;
shrink:
--
2.40.1
The patch titled
Subject: mm/zswap: invalidate duplicate entry when !zswap_enabled
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-zswap-invalidate-duplicate-entry-when-zswap_enabled.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Chengming Zhou <zhouchengming(a)bytedance.com>
Subject: mm/zswap: invalidate duplicate entry when !zswap_enabled
Date: Thu, 8 Feb 2024 02:32:54 +0000
We have to invalidate any duplicate entry even when !zswap_enabled since
zswap can be disabled anytime. If the folio store success before, then
got dirtied again but zswap disabled, we won't invalidate the old
duplicate entry in the zswap_store(). So later lru writeback may
overwrite the new data in swapfile.
Link: https://lkml.kernel.org/r/20240208023254.3873823-1-chengming.zhou@linux.dev
Fixes: 42c06a0e8ebe ("mm: kill frontswap")
Signed-off-by: Chengming Zhou <zhouchengming(a)bytedance.com>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Nhat Pham <nphamcs(a)gmail.com>
Cc: Yosry Ahmed <yosryahmed(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/zswap.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
--- a/mm/zswap.c~mm-zswap-invalidate-duplicate-entry-when-zswap_enabled
+++ a/mm/zswap.c
@@ -1516,7 +1516,7 @@ bool zswap_store(struct folio *folio)
if (folio_test_large(folio))
return false;
- if (!zswap_enabled || !tree)
+ if (!tree)
return false;
/*
@@ -1531,6 +1531,10 @@ bool zswap_store(struct folio *folio)
zswap_invalidate_entry(tree, dupentry);
}
spin_unlock(&tree->lock);
+
+ if (!zswap_enabled)
+ return false;
+
objcg = get_obj_cgroup_from_folio(folio);
if (objcg && !obj_cgroup_may_zswap(objcg)) {
memcg = get_mem_cgroup_from_objcg(objcg);
_
Patches currently in -mm which might be from zhouchengming(a)bytedance.com are
mm-zswap-invalidate-duplicate-entry-when-zswap_enabled.patch
mm-zswap-make-sure-each-swapfile-always-have-zswap-rb-tree.patch
mm-zswap-split-zswap-rb-tree.patch
mm-zswap-fix-race-between-lru-writeback-and-swapoff.patch
mm-list_lru-remove-list_lru_putback.patch
mm-zswap-add-more-comments-in-shrink_memcg_cb.patch
mm-zswap-invalidate-zswap-entry-when-swap-entry-free.patch
mm-zswap-stop-lru-list-shrinking-when-encounter-warm-region.patch
mm-zswap-remove-duplicate_entry-debug-value.patch
mm-zswap-only-support-zswap_exclusive_loads_enabled.patch
mm-zswap-zswap-entry-doesnt-need-refcount-anymore.patch
The patch titled
Subject: fs/proc/task_mmu: add display flag for VM_MAYOVERLAY
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
fs-proc-task_mmu-add-display-flag-for-vm_mayoverlay.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Anshuman Khandual <anshuman.khandual(a)arm.com>
Subject: fs/proc/task_mmu: add display flag for VM_MAYOVERLAY
Date: Thu, 8 Feb 2024 14:18:05 +0530
VM_UFFD_MISSING flag is mutually exclussive with VM_MAYOVERLAY flag as
they both use the same bit position i.e 0x00000200 in the vm_flags. Let's
update show_smap_vma_flags() to display the correct flags depending on
CONFIG_MMU.
Link: https://lkml.kernel.org/r/20240208084805.1252337-1-anshuman.khandual@arm.com
Fixes: b6b7a8faf05c ("mm/nommu: don't use VM_MAYSHARE for MAP_PRIVATE mappings")
Signed-off-by: Anshuman Khandual <anshuman.khandual(a)arm.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/task_mmu.c | 4 ++++
1 file changed, 4 insertions(+)
--- a/fs/proc/task_mmu.c~fs-proc-task_mmu-add-display-flag-for-vm_mayoverlay
+++ a/fs/proc/task_mmu.c
@@ -681,7 +681,11 @@ static void show_smap_vma_flags(struct s
[ilog2(VM_HUGEPAGE)] = "hg",
[ilog2(VM_NOHUGEPAGE)] = "nh",
[ilog2(VM_MERGEABLE)] = "mg",
+#ifdef CONFIG_MMU
[ilog2(VM_UFFD_MISSING)]= "um",
+#else
+ [ilog2(VM_MAYOVERLAY)] = "ov",
+#endif /* CONFIG_MMU */
[ilog2(VM_UFFD_WP)] = "uw",
#ifdef CONFIG_ARM64_MTE
[ilog2(VM_MTE)] = "mt",
_
Patches currently in -mm which might be from anshuman.khandual(a)arm.com are
fs-proc-task_mmu-add-display-flag-for-vm_mayoverlay.patch
mm-cma-dont-treat-bad-input-arguments-for-cma_alloc-as-its-failure.patch
mm-cma-drop-config_cma_debug.patch
mm-cma-make-max_cma_areas-=-config_cma_areas.patch
mm-cma-add-sysfs-file-release_pages_success.patch
The patch titled
Subject: lib/Kconfig.debug: TEST_IOV_ITER depends on MMU
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
lib-kconfigdebug-test_iov_iter-depends-on-mmu.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Guenter Roeck <linux(a)roeck-us.net>
Subject: lib/Kconfig.debug: TEST_IOV_ITER depends on MMU
Date: Thu, 8 Feb 2024 07:30:10 -0800
Trying to run the iov_iter unit test on a nommu system such as the qemu
kc705-nommu emulation results in a crash.
KTAP version 1
# Subtest: iov_iter
# module: kunit_iov_iter
1..9
BUG: failure at mm/nommu.c:318/vmap()!
Kernel panic - not syncing: BUG!
The test calls vmap() directly, but vmap() is not supported on nommu
systems, causing the crash. TEST_IOV_ITER therefore needs to depend on
MMU.
Link: https://lkml.kernel.org/r/20240208153010.1439753-1-linux@roeck-us.net
Fixes: 2d71340ff1d4 ("iov_iter: Kunit tests for copying to/from an iterator")
Signed-off-by: Guenter Roeck <linux(a)roeck-us.net>
Cc: David Howells <dhowells(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
lib/Kconfig.debug | 1 +
1 file changed, 1 insertion(+)
--- a/lib/Kconfig.debug~lib-kconfigdebug-test_iov_iter-depends-on-mmu
+++ a/lib/Kconfig.debug
@@ -2235,6 +2235,7 @@ config TEST_DIV64
config TEST_IOV_ITER
tristate "Test iov_iter operation" if !KUNIT_ALL_TESTS
depends on KUNIT
+ depends on MMU
default KUNIT_ALL_TESTS
help
Enable this to turn on testing of the operation of the I/O iterator
_
Patches currently in -mm which might be from linux(a)roeck-us.net are
lib-kconfigdebug-test_iov_iter-depends-on-mmu.patch
The quilt patch titled
Subject: xfs: disable large folio support in xfile_create
has been removed from the -mm tree. Its filename was
xfs-disable-large-folio-support-in-xfile_create.patch
This patch was dropped because it is obsolete
------------------------------------------------------
From: Christoph Hellwig <hch(a)lst.de>
Subject: xfs: disable large folio support in xfile_create
Date: Wed, 10 Jan 2024 10:21:09 +0100
The xfarray code will crash if large folios are force enabled using:
echo force > /sys/kernel/mm/transparent_hugepage/shmem_enabled
Fixing this will require a bit of an API change, and prefeably sorting out
the hwpoison story for pages vs folio and where it is placed in the shmem
API. For now use this one liner to disable large folios.
Link: https://lkml.kernel.org/r/20240110092109.1950011-3-hch@lst.de
Fixes: 3934e8ebb7cc ("xfs: create a big array data structure")
Reported-by: Darrick J. Wong <djwong(a)kernel.org>
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Reviewed-by: Darrick J. Wong <djwong(a)kernel.org>
Cc: Chandan Babu R <chandan.babu(a)oracle.com>
Cc: Christian K��nig <christian.koenig(a)amd.com>
Cc: Daniel Vetter <daniel(a)ffwll.ch>
Cc: Dave Airlie <airlied(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Howells <dhowells(a)redhat.com>
Cc: Huang Rui <ray.huang(a)amd.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Jani Nikula <jani.nikula(a)linux.intel.com>
Cc: Jarkko Sakkinen <jarkko(a)kernel.org>
Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Maxime Ripard <mripard(a)kernel.org>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Cc: Thomas Zimmermann <tzimmermann(a)suse.de>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/xfs/scrub/xfile.c | 5 +++++
1 file changed, 5 insertions(+)
--- a/fs/xfs/scrub/xfile.c~xfs-disable-large-folio-support-in-xfile_create
+++ a/fs/xfs/scrub/xfile.c
@@ -94,6 +94,11 @@ xfile_create(
lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key);
+ /*
+ * We're not quite ready for large folios yet.
+ */
+ mapping_clear_large_folios(inode->i_mapping);
+
trace_xfile_create(xf);
*xfilep = xf;
_
Patches currently in -mm which might be from hch(a)lst.de are
The quilt patch titled
Subject: mm: add a mapping_clear_large_folios helper
has been removed from the -mm tree. Its filename was
mm-add-a-mapping_clear_large_folios-helper.patch
This patch was dropped because it is obsolete
------------------------------------------------------
From: Christoph Hellwig <hch(a)lst.de>
Subject: mm: add a mapping_clear_large_folios helper
Date: Wed, 10 Jan 2024 10:21:08 +0100
Patch series "disable large folios for shmem file used by xfs xfile".
Darrick reported that the fairly new XFS xfile code blows up when force
enabling large folio for shmem. This series fixes this quickly by
disabling large folios for this particular shmem file for now until it can
be fixed properly, which will be a lot more invasive.
This patch (of 2):
Users of shmem_kernel_file_setup might not be able to deal with large
folios (yet). Give them a way to disable large folio support on their
mapping.
Link: https://lkml.kernel.org/r/20240110092109.1950011-1-hch@lst.de
Link: https://lkml.kernel.org/r/20240110092109.1950011-2-hch@lst.de
Fixes: 3934e8ebb7cc ("xfs: create a big array data structure")
Signed-off-by: Christoph Hellwig <hch(a)lst.de>
Cc: Chandan Babu R <chandan.babu(a)oracle.com>
Cc: Christian K��nig <christian.koenig(a)amd.com>
Cc: Daniel Vetter <daniel(a)ffwll.ch>
Cc: "Darrick J. Wong" <djwong(a)kernel.org>
Cc: Dave Airlie <airlied(a)gmail.com>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: David Howells <dhowells(a)redhat.com>
Cc: Huang Rui <ray.huang(a)amd.com>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Jani Nikula <jani.nikula(a)linux.intel.com>
Cc: Jarkko Sakkinen <jarkko(a)kernel.org>
Cc: Joonas Lahtinen <joonas.lahtinen(a)linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst(a)linux.intel.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Maxime Ripard <mripard(a)kernel.org>
Cc: Rodrigo Vivi <rodrigo.vivi(a)intel.com>
Cc: Thomas Zimmermann <tzimmermann(a)suse.de>
Cc: Tvrtko Ursulin <tvrtko.ursulin(a)linux.intel.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/pagemap.h | 14 ++++++++++++++
1 file changed, 14 insertions(+)
--- a/include/linux/pagemap.h~mm-add-a-mapping_clear_large_folios-helper
+++ a/include/linux/pagemap.h
@@ -360,6 +360,20 @@ static inline void mapping_set_large_fol
__set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
}
+/**
+ * mapping_clear_large_folios() - Disable large folio support for a mapping
+ * @mapping: The mapping.
+ *
+ * This can be called to undo the effect of mapping_set_large_folios().
+ *
+ * Context: This should not be called while the inode is active as it
+ * is non-atomic.
+ */
+static inline void mapping_clear_large_folios(struct address_space *mapping)
+{
+ __clear_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
+}
+
/*
* Large folio support currently depends on THP. These dependencies are
* being worked on but are not yet fixed.
_
Patches currently in -mm which might be from hch(a)lst.de are
xfs-disable-large-folio-support-in-xfile_create.patch
Hi Greg, Sasha,
Please consider applying commit e08ff622c91a to 6.6.y. It requires the
following chain:
828176d037e2 ("rust: arc: add explicit `drop()` around `Box::from_raw()`")
ae6df65dabc3 ("rust: upgrade to Rust 1.72.1")
c61bcc278b19 ("rust: task: remove redundant explicit link")
a53d8cdd5a0a ("rust: print: use explicit link in documentation")
e08ff622c91a ("rust: upgrade to Rust 1.73.0")
which applies cleanly to 6.6.y. This upgrades the Rust compiler
version from 1.71.1 to 1.73.0 (2 version upgrades + 3 prerequisites
for the upgrades), fixing a couple issues with the Rust compiler
version currently used in 6.6.y. In particular:
- A build error with `CONFIG_RUST_DEBUG_ASSERTIONS` enabled
(`.eh_frame` section unexpected generation). This is solved applying
up to ae6df65dabc3.
- A developer-only Make target error (building `.rsi` single-target
files, i.e. the equivalent to requesting a preprocessed file in C).
This is solved applying all of them.
Thanks!
Cheers,
Miguel
If the directory passed to the '.. kernel-feat::' directive does not
exist or the get_feat.pl script does not find any files to extract
features from, Sphinx will report the following error:
Sphinx parallel build error:
UnboundLocalError: local variable 'fname' referenced before assignment
make[2]: *** [Documentation/Makefile:102: htmldocs] Error 2
This is due to how I changed the script in c48a7c44a1d0 ("docs:
kernel_feat.py: fix potential command injection"). Before that, the
filename passed along to self.nestedParse() in this case was weirdly
just the whole get_feat.pl invocation.
We can fix it by doing what kernel_abi.py does -- just pass
self.arguments[0] as 'fname'.
Fixes: c48a7c44a1d0 ("docs: kernel_feat.py: fix potential command injection")
Cc: Justin Forbes <jforbes(a)fedoraproject.org>
Cc: Salvatore Bonaccorso <carnil(a)debian.org>
Cc: Jani Nikula <jani.nikula(a)intel.com>
Cc: Mauro Carvalho Chehab <mchehab(a)kernel.org>
Cc: stable(a)vger.kernel.org
Signed-off-by: Vegard Nossum <vegard.nossum(a)oracle.com>
---
Documentation/sphinx/kernel_feat.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Documentation/sphinx/kernel_feat.py b/Documentation/sphinx/kernel_feat.py
index b9df61eb4501..03ace5f01b5c 100644
--- a/Documentation/sphinx/kernel_feat.py
+++ b/Documentation/sphinx/kernel_feat.py
@@ -109,7 +109,7 @@ class KernelFeat(Directive):
else:
out_lines += line + "\n"
- nodeList = self.nestedParse(out_lines, fname)
+ nodeList = self.nestedParse(out_lines, self.arguments[0])
return nodeList
def nestedParse(self, lines, fname):
--
2.34.1
Commit d5e01266e7f5 ("leds: trigger: netdev: add additional specific link
speed mode") in the various changes, reworked the way to set the LINKUP
mode in commit cee4bd16c319 ("leds: trigger: netdev: Recheck
NETDEV_LED_MODE_LINKUP on dev rename") and moved it to a generic function.
This changed the logic where, in the previous implementation the dev
from the trigger event was used to check if the carrier was ok, but in
the new implementation with the generic function, the dev in
trigger_data is used instead.
This is problematic and cause a possible kernel panic due to the fact
that the dev in the trigger_data still reference the old one as the
new one (passed from the trigger event) still has to be hold and saved
in the trigger_data struct (done in the NETDEV_REGISTER case).
On calling of get_device_state(), an invalid net_dev is used and this
cause a kernel panic.
To handle this correctly, move the call to get_device_state() after the
new net_dev is correctly set in trigger_data (in the NETDEV_REGISTER
case) and correctly parse the new dev.
Fixes: d5e01266e7f5 ("leds: trigger: netdev: add additional specific link speed mode")
Cc: stable(a)vger.kernel.org
Signed-off-by: Christian Marangi <ansuelsmth(a)gmail.com>
---
drivers/leds/trigger/ledtrig-netdev.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c
index 8e5475819590..df1b1d8468e6 100644
--- a/drivers/leds/trigger/ledtrig-netdev.c
+++ b/drivers/leds/trigger/ledtrig-netdev.c
@@ -504,12 +504,12 @@ static int netdev_trig_notify(struct notifier_block *nb,
trigger_data->duplex = DUPLEX_UNKNOWN;
switch (evt) {
case NETDEV_CHANGENAME:
- get_device_state(trigger_data);
- fallthrough;
case NETDEV_REGISTER:
dev_put(trigger_data->net_dev);
dev_hold(dev);
trigger_data->net_dev = dev;
+ if (evt == NETDEV_CHANGENAME)
+ get_device_state(trigger_data);
break;
case NETDEV_UNREGISTER:
dev_put(trigger_data->net_dev);
--
2.43.0
When we added mount_setattr() I added additional checks compared to the
legacy do_reconfigure_mnt() and do_change_type() helpers used by regular
mount(2). If that mount had a parent then verify that the caller and the
mount namespace the mount is attached to match and if not make sure that
it's an anonymous mount.
The real rootfs falls into neither category. It is neither an anoymous
mount because it is obviously attached to the initial mount namespace
but it also obviously doesn't have a parent mount. So that means legacy
mount(2) allows changing mount properties on the real rootfs but
mount_setattr(2) blocks this. I never thought much about this but of
course someone on this planet of earth changes properties on the real
rootfs as can be seen in [1].
Since util-linux finally switched to the new mount api in 2.39 not so
long ago it also relies on mount_setattr() and that surfaced this issue
when Fedora 39 finally switched to it. Fix this.
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2256843
Reported-by: Karel Zak <kzak(a)redhat.com>
Cc: stable(a)vger.kernel.org # v5.12+
Signed-off-by: Christian Brauner <brauner(a)kernel.org>
---
fs/namespace.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/fs/namespace.c b/fs/namespace.c
index 437f60e96d40..fb0286920bce 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4472,10 +4472,15 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
/*
* If this is an attached mount make sure it's located in the callers
* mount namespace. If it's not don't let the caller interact with it.
- * If this is a detached mount make sure it has an anonymous mount
- * namespace attached to it, i.e. we've created it via OPEN_TREE_CLONE.
+ *
+ * If this mount doesn't have a parent it's most often simply a
+ * detached mount with an anonymous mount namespace. IOW, something
+ * that's simply not attached yet. But there are apparently also users
+ * that do change mount properties on the rootfs itself. That obviously
+ * neither has a parent nor is it a detached mount so we cannot
+ * unconditionally check for detached mounts.
*/
- if (!(mnt_has_parent(mnt) ? check_mnt(mnt) : is_anon_ns(mnt->mnt_ns)))
+ if (mnt_has_parent(mnt) && !check_mnt(mnt))
goto out;
/*
---
base-commit: 2a42e144dd0b62eaf79148394ab057145afbc3c5
change-id: 20240206-vfs-mount-rootfs-70aff2e3956d
Syzkaller reports "memory leak in cpu_map_update_elem" in 5.10 stable release.
The problem has been fixed by the following patches which can be cleanly applied
to the 5.10 branch.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Symptom:
In case of a bad cable connection (e.g. dirty optics) a fast sequence of
network DOWN-UP-DOWN-UP could happen. UP triggers recovery of the qeth
interface. In case of a second DOWN while recovery is still ongoing, it
can happen that the IP@ of a Layer3 qeth interface is lost and will not
be recovered by the second UP.
Problem:
When registration of IP addresses with Layer 3 qeth devices fails, (e.g.
because of bad address format) the respective IP address is deleted from
its hash-table in the driver. If registration fails because of a ENETDOWN
condition, the address should stay in the hashtable, so a subsequent
recovery can restore it.
3caa4af834df ("qeth: keep ip-address after LAN_OFFLINE failure")
fixes this for registration failures during normal operation, but not
during recovery.
Solution:
Keep L3-IP address in case of ENETDOWN in qeth_l3_recover_ip(). For
consistency with qeth_l3_add_ip() we also keep it in case of EADDRINUSE,
i.e. for some reason the card already/still has this address registered.
Fixes: 4a71df50047f ("qeth: new qeth device driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Alexandra Winter <wintera(a)linux.ibm.com>
---
drivers/s390/net/qeth_l3_main.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index b92a32b4b114..04c64ce0a1ca 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -255,9 +255,10 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover)
if (!recover) {
hash_del(&addr->hnode);
kfree(addr);
- continue;
+ } else {
+ /* prepare for recovery */
+ addr->disp_flag = QETH_DISP_ADDR_ADD;
}
- addr->disp_flag = QETH_DISP_ADDR_ADD;
}
mutex_unlock(&card->ip_lock);
@@ -278,9 +279,11 @@ static void qeth_l3_recover_ip(struct qeth_card *card)
if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
rc = qeth_l3_register_addr_entry(card, addr);
- if (!rc) {
+ if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) {
+ /* keep it in the records */
addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
} else {
+ /* bad address */
hash_del(&addr->hnode);
kfree(addr);
}
--
2.40.1
RHEL people reported some errors when compiling rtla and rv with
clang. The command line used to compile the tools is:
$ make HOSTCC=clang CC=clang LLVM_IAS=1
The first problem is two unsupported flags passed to the compiler:
-ffat-lto-objects and -Wno-maybe-uninitialized. They will be
removed if the compile is clang.
Also, the clang linker does not automatically recognize the
-flto=auto option used at compilation time, so it is explicitly
set.
With the compiler working, it starts pointing to some warnings
and errors about uninitialized variables, variable size, and an
unused function. These problems are also fixed.
Daniel Bristot de Oliveira (6):
tools/rtla: Fix Makefile compiler options for clang
tools/rtla: Fix uninitialized bucket/data->bucket_size warning
tools/rtla: Fix clang warning about mount_point var size
tools/rtla: Remove unused sched_getattr() function
tools/rv: Fix Makefile compiler options for clang
tools/rv: Fix curr_reactor uninitialized variable
tools/tracing/rtla/Makefile | 7 ++++++-
tools/tracing/rtla/src/osnoise_hist.c | 3 +--
tools/tracing/rtla/src/timerlat_hist.c | 3 +--
tools/tracing/rtla/src/utils.c | 8 +-------
tools/verification/rv/Makefile | 7 ++++++-
tools/verification/rv/src/in_kernel.c | 2 +-
6 files changed, 16 insertions(+), 14 deletions(-)
--
2.43.0
From: Sinthu Raja <sinthu.raja(a)ti.com>
The below commit introduced a WARN when phy state is not in the states:
PHY_HALTED, PHY_READY and PHY_UP.
commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state")
When cpsw_new resumes, there have port in PHY_NOLINK state, so the below
warning comes out. Set mac_managed_pm be true to tell mdio that the phy
resume/suspend is managed by the mac, to fix the following warning:
WARNING: CPU: 0 PID: 965 at drivers/net/phy/phy_device.c:326 mdio_bus_phy_resume+0x140/0x144
CPU: 0 PID: 965 Comm: sh Tainted: G O 6.1.46-g247b2535b2 #1
Hardware name: Generic AM33XX (Flattened Device Tree)
unwind_backtrace from show_stack+0x18/0x1c
show_stack from dump_stack_lvl+0x24/0x2c
dump_stack_lvl from __warn+0x84/0x15c
__warn from warn_slowpath_fmt+0x1a8/0x1c8
warn_slowpath_fmt from mdio_bus_phy_resume+0x140/0x144
mdio_bus_phy_resume from dpm_run_callback+0x3c/0x140
dpm_run_callback from device_resume+0xb8/0x2b8
device_resume from dpm_resume+0x144/0x314
dpm_resume from dpm_resume_end+0x14/0x20
dpm_resume_end from suspend_devices_and_enter+0xd0/0x924
suspend_devices_and_enter from pm_suspend+0x2e0/0x33c
pm_suspend from state_store+0x74/0xd0
state_store from kernfs_fop_write_iter+0x104/0x1ec
kernfs_fop_write_iter from vfs_write+0x1b8/0x358
vfs_write from ksys_write+0x78/0xf8
ksys_write from ret_fast_syscall+0x0/0x54
Exception stack(0xe094dfa8 to 0xe094dff0)
dfa0: 00000004 005c3fb8 00000001 005c3fb8 00000004 00000001
dfc0: 00000004 005c3fb8 b6f6bba0 00000004 00000004 0059edb8 00000000 00000000
dfe0: 00000004 bed918f0 b6f09bd3 b6e89a66
Cc: <stable(a)vger.kernel.org> # v6.0+
Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state")
Signed-off-by: Sinthu Raja <sinthu.raja(a)ti.com>
---
Changes in V3:
- No Change
Changes in V2:
- Add fixes tag.
drivers/net/ethernet/ti/cpsw_new.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 498c50c6d1a7..087dcb67505a 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -773,6 +773,9 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
slave->slave_num);
return;
}
+
+ phy->mac_managed_pm = true;
+
slave->phy = phy;
phy_attached_info(slave->phy);
--
2.36.1
Hello,
I noticed after a recent kernel update that my ARM926 system started
segfaulting on any execve() after calling prctl(PR_SET_MDWE). After
some investigation it appears that ARMv5 is incapable of providing the
appropriate protections for MDWE, since any readable memory is also
implicitly executable.
(Note that I'm not an expert in either ARM arch details or the mm
subsystem, so please bear with me if I've botched something in the
above analysis.)
The prctl_set_mdwe() function already had some special-case logic
added disabling it on PARISC (commit 793838138c15, "prctl: Disable
prctl(PR_SET_MDWE) on parisc"); this patch series (1) generalizes that
check to use an arch_*() function, and (2) adds a corresponding
override for ARM to disable MDWE on pre-ARMv6 CPUs.
With the series applied, prctl(PR_SET_MDWE) is rejected on ARMv5 and
subsequent execve() calls (as well as mmap(PROT_READ|PROT_WRITE)) can
succeed instead of unconditionally failing; on ARMv6 the prctl works
as it did previously.
Since this was effectively a userspace-breaking change in v6.3 (with
newer MDWE-aware userspace on older pre-MDWE kernels the prctl would
simply fail safely) I've CCed -stable for v6.3+, though since the
patches depend on the PARISC one above it will only apply cleanly on
the linux-6.6.y and linux-6.7.y branches, since at least at time of
writing the 6.3 through 6.5 branches don't have that patch backported
(due to further missing dependencies [0]).
Thanks,
Zev
[0] https://lore.kernel.org/all/2023112456-linked-nape-bf19@gregkh/
Zev Weiss (2):
prctl: Generalize PR_SET_MDWE support check to be per-arch
ARM: prctl: Reject PR_SET_MDWE on pre-ARMv6
arch/arm/include/asm/mman.h | 14 ++++++++++++++
arch/parisc/include/asm/mman.h | 14 ++++++++++++++
include/linux/mman.h | 8 ++++++++
kernel/sys.c | 7 +++++--
4 files changed, 41 insertions(+), 2 deletions(-)
create mode 100644 arch/arm/include/asm/mman.h
create mode 100644 arch/parisc/include/asm/mman.h
--
2.43.0
The quilt patch titled
Subject: mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE
has been removed from the -mm tree. Its filename was
hugetlb-pages-should-not-be-reserved-by-shmat-if-shm_noreserve.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Prakash Sangappa <prakash.sangappa(a)oracle.com>
Subject: mm: hugetlb pages should not be reserved by shmat() if SHM_NORESERVE
Date: Tue, 23 Jan 2024 12:04:42 -0800
For shared memory of type SHM_HUGETLB, hugetlb pages are reserved in
shmget() call. If SHM_NORESERVE flags is specified then the hugetlb pages
are not reserved. However when the shared memory is attached with the
shmat() call the hugetlb pages are getting reserved incorrectly for
SHM_HUGETLB shared memory created with SHM_NORESERVE which is a bug.
-------------------------------
Following test shows the issue.
$cat shmhtb.c
int main()
{
int shmflags = 0660 | IPC_CREAT | SHM_HUGETLB | SHM_NORESERVE;
int shmid;
shmid = shmget(SKEY, SHMSZ, shmflags);
if (shmid < 0)
{
printf("shmat: shmget() failed, %d\n", errno);
return 1;
}
printf("After shmget()\n");
system("cat /proc/meminfo | grep -i hugepages_");
shmat(shmid, NULL, 0);
printf("\nAfter shmat()\n");
system("cat /proc/meminfo | grep -i hugepages_");
shmctl(shmid, IPC_RMID, NULL);
return 0;
}
#sysctl -w vm.nr_hugepages=20
#./shmhtb
After shmget()
HugePages_Total: 20
HugePages_Free: 20
HugePages_Rsvd: 0
HugePages_Surp: 0
After shmat()
HugePages_Total: 20
HugePages_Free: 20
HugePages_Rsvd: 5 <--
HugePages_Surp: 0
--------------------------------
Fix is to ensure that hugetlb pages are not reserved for SHM_HUGETLB shared
memory in the shmat() call.
Link: https://lkml.kernel.org/r/1706040282-12388-1-git-send-email-prakash.sangapp…
Signed-off-by: Prakash Sangappa <prakash.sangappa(a)oracle.com>
Acked-by: Muchun Song <muchun.song(a)linux.dev>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/hugetlbfs/inode.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
--- a/fs/hugetlbfs/inode.c~hugetlb-pages-should-not-be-reserved-by-shmat-if-shm_noreserve
+++ a/fs/hugetlbfs/inode.c
@@ -100,6 +100,7 @@ static int hugetlbfs_file_mmap(struct fi
loff_t len, vma_len;
int ret;
struct hstate *h = hstate_file(file);
+ vm_flags_t vm_flags;
/*
* vma address alignment (but not the pgoff alignment) has
@@ -141,10 +142,20 @@ static int hugetlbfs_file_mmap(struct fi
file_accessed(file);
ret = -ENOMEM;
+
+ vm_flags = vma->vm_flags;
+ /*
+ * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
+ * reserving here. Note: only for SHM hugetlbfs file, the inode
+ * flag S_PRIVATE is set.
+ */
+ if (inode->i_flags & S_PRIVATE)
+ vm_flags |= VM_NORESERVE;
+
if (!hugetlb_reserve_pages(inode,
vma->vm_pgoff >> huge_page_order(h),
len >> huge_page_shift(h), vma,
- vma->vm_flags))
+ vm_flags))
goto out;
ret = 0;
_
Patches currently in -mm which might be from prakash.sangappa(a)oracle.com are
The quilt patch titled
Subject: nilfs2: fix potential bug in end_buffer_async_write
has been removed from the -mm tree. Its filename was
nilfs2-fix-potential-bug-in-end_buffer_async_write.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: fix potential bug in end_buffer_async_write
Date: Sun, 4 Feb 2024 01:16:45 +0900
According to a syzbot report, end_buffer_async_write(), which handles the
completion of block device writes, may detect abnormal condition of the
buffer async_write flag and cause a BUG_ON failure when using nilfs2.
Nilfs2 itself does not use end_buffer_async_write(). But, the async_write
flag is now used as a marker by commit 7f42ec394156 ("nilfs2: fix issue
with race condition of competition between segments for dirty blocks") as
a means of resolving double list insertion of dirty blocks in
nilfs_lookup_dirty_data_buffers() and nilfs_lookup_node_buffers() and the
resulting crash.
This modification is safe as long as it is used for file data and b-tree
node blocks where the page caches are independent. However, it was
irrelevant and redundant to also introduce async_write for segment summary
and super root blocks that share buffers with the backing device. This
led to the possibility that the BUG_ON check in end_buffer_async_write
would fail as described above, if independent writebacks of the backing
device occurred in parallel.
The use of async_write for segment summary buffers has already been
removed in a previous change.
Fix this issue by removing the manipulation of the async_write flag for
the remaining super root block buffer.
Link: https://lkml.kernel.org/r/20240203161645.4992-1-konishi.ryusuke@gmail.com
Fixes: 7f42ec394156 ("nilfs2: fix issue with race condition of competition between segments for dirty blocks")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+5c04210f7c7f897c1e7f(a)syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/00000000000019a97c05fd42f8c8@google.com
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/segment.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
--- a/fs/nilfs2/segment.c~nilfs2-fix-potential-bug-in-end_buffer_async_write
+++ a/fs/nilfs2/segment.c
@@ -1703,7 +1703,6 @@ static void nilfs_segctor_prepare_write(
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- set_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
if (bh->b_folio != bd_folio) {
folio_lock(bd_folio);
@@ -1714,6 +1713,7 @@ static void nilfs_segctor_prepare_write(
}
break;
}
+ set_buffer_async_write(bh);
if (bh->b_folio != fs_folio) {
nilfs_begin_folio_io(fs_folio);
fs_folio = bh->b_folio;
@@ -1800,7 +1800,6 @@ static void nilfs_abort_logs(struct list
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
clear_buffer_uptodate(bh);
if (bh->b_folio != bd_folio) {
@@ -1809,6 +1808,7 @@ static void nilfs_abort_logs(struct list
}
break;
}
+ clear_buffer_async_write(bh);
if (bh->b_folio != fs_folio) {
nilfs_end_folio_io(fs_folio, err);
fs_folio = bh->b_folio;
@@ -1896,8 +1896,9 @@ static void nilfs_segctor_complete_write
BIT(BH_Delay) | BIT(BH_NILFS_Volatile) |
BIT(BH_NILFS_Redirected));
- set_mask_bits(&bh->b_state, clear_bits, set_bits);
if (bh == segbuf->sb_super_root) {
+ set_buffer_uptodate(bh);
+ clear_buffer_dirty(bh);
if (bh->b_folio != bd_folio) {
folio_end_writeback(bd_folio);
bd_folio = bh->b_folio;
@@ -1905,6 +1906,7 @@ static void nilfs_segctor_complete_write
update_sr = true;
break;
}
+ set_mask_bits(&bh->b_state, clear_bits, set_bits);
if (bh->b_folio != fs_folio) {
nilfs_end_folio_io(fs_folio, 0);
fs_folio = bh->b_folio;
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
nilfs2-convert-segment-buffer-to-use-kmap_local.patch
nilfs2-convert-nilfs_copy_buffer-to-use-kmap_local.patch
nilfs2-convert-metadata-file-common-code-to-use-kmap_local.patch
nilfs2-convert-sufile-to-use-kmap_local.patch
nilfs2-convert-persistent-object-allocator-to-use-kmap_local.patch
nilfs2-convert-dat-to-use-kmap_local.patch
nilfs2-move-nilfs_bmap_write-call-out-of-nilfs_write_inode_common.patch
nilfs2-do-not-acquire-rwsem-in-nilfs_bmap_write.patch
nilfs2-convert-ifile-to-use-kmap_local.patch
nilfs2-localize-highmem-mapping-for-checkpoint-creation-within-cpfile.patch
nilfs2-localize-highmem-mapping-for-checkpoint-finalization-within-cpfile.patch
nilfs2-localize-highmem-mapping-for-checkpoint-reading-within-cpfile.patch
nilfs2-remove-nilfs_cpfile_getput_checkpoint.patch
nilfs2-convert-cpfile-to-use-kmap_local.patch
The quilt patch titled
Subject: mm/damon/sysfs-schemes: fix wrong DAMOS tried regions update timeout setup
has been removed from the -mm tree. Its filename was
mm-damon-sysfs-schemes-fix-wrong-damos-tried-regions-update-timeout-setup.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: SeongJae Park <sj(a)kernel.org>
Subject: mm/damon/sysfs-schemes: fix wrong DAMOS tried regions update timeout setup
Date: Fri, 2 Feb 2024 11:19:56 -0800
DAMON sysfs interface's update_schemes_tried_regions command has a timeout
of two apply intervals of the DAMOS scheme. Having zero value DAMOS
scheme apply interval means it will use the aggregation interval as the
value. However, the timeout setup logic is mistakenly using the sampling
interval insted of the aggregartion interval for the case. This could
cause earlier-than-expected timeout of the command. Fix it.
Link: https://lkml.kernel.org/r/20240202191956.88791-1-sj@kernel.org
Fixes: 7d6fa31a2fd7 ("mm/damon/sysfs-schemes: add timeout for update_schemes_tried_regions")
Signed-off-by: SeongJae Park <sj(a)kernel.org>
Cc: <stable(a)vger.kernel.org> # 6.7.x
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/damon/sysfs-schemes.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/mm/damon/sysfs-schemes.c~mm-damon-sysfs-schemes-fix-wrong-damos-tried-regions-update-timeout-setup
+++ a/mm/damon/sysfs-schemes.c
@@ -2194,7 +2194,7 @@ static void damos_tried_regions_init_upd
sysfs_regions->upd_timeout_jiffies = jiffies +
2 * usecs_to_jiffies(scheme->apply_interval_us ?
scheme->apply_interval_us :
- ctx->attrs.sample_interval);
+ ctx->attrs.aggr_interval);
}
}
_
Patches currently in -mm which might be from sj(a)kernel.org are
mm-damon-core-check-apply-interval-in-damon_do_apply_schemes.patch
docs-admin-guide-mm-damon-usage-use-sysfs-interface-for-tracepoints-example.patch
mm-damon-rename-config_damon_dbgfs-to-damon_dbgfs_deprecated.patch
mm-damon-dbgfs-implement-deprecation-notice-file.patch
mm-damon-dbgfs-make-debugfs-interface-deprecation-message-a-macro.patch
docs-admin-guide-mm-damon-usage-document-deprecated-file-of-damon-debugfs-interface.patch
selftets-damon-prepare-for-monitor_on-file-renaming.patch
mm-damon-dbgfs-rename-monitor_on-file-to-monitor_on_deprecated.patch
docs-admin-guide-mm-damon-usage-update-for-monitor_on-renaming.patch
docs-translations-damon-usage-update-for-monitor_on-renaming.patch
mm-damon-sysfs-handle-state-file-inputs-for-every-sampling-interval-if-possible.patch
selftests-damon-_damon_sysfs-support-damos-quota.patch
selftests-damon-_damon_sysfs-support-damos-stats.patch
selftests-damon-_damon_sysfs-support-damos-apply-interval.patch
selftests-damon-add-a-test-for-damos-quota.patch
selftests-damon-add-a-test-for-damos-apply-intervals.patch
selftests-damon-add-a-test-for-a-race-between-target_ids_read-and-dbgfs_before_terminate.patch
selftests-damon-add-a-test-for-the-pid-leak-of-dbgfs_target_ids_write.patch
selftests-damon-_chk_dependency-get-debugfs-mount-point-from-proc-mounts.patch
The quilt patch titled
Subject: nilfs2: fix hang in nilfs_lookup_dirty_data_buffers()
has been removed from the -mm tree. Its filename was
nilfs2-fix-hang-in-nilfs_lookup_dirty_data_buffers.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: fix hang in nilfs_lookup_dirty_data_buffers()
Date: Wed, 31 Jan 2024 23:56:57 +0900
Syzbot reported a hang issue in migrate_pages_batch() called by mbind()
and nilfs_lookup_dirty_data_buffers() called in the log writer of nilfs2.
While migrate_pages_batch() locks a folio and waits for the writeback to
complete, the log writer thread that should bring the writeback to
completion picks up the folio being written back in
nilfs_lookup_dirty_data_buffers() that it calls for subsequent log
creation and was trying to lock the folio. Thus causing a deadlock.
In the first place, it is unexpected that folios/pages in the middle of
writeback will be updated and become dirty. Nilfs2 adds a checksum to
verify the validity of the log being written and uses it for recovery at
mount, so data changes during writeback are suppressed. Since this is
broken, an unclean shutdown could potentially cause recovery to fail.
Investigation revealed that the root cause is that the wait for writeback
completion in nilfs_page_mkwrite() is conditional, and if the backing
device does not require stable writes, data may be modified without
waiting.
Fix these issues by making nilfs_page_mkwrite() wait for writeback to
finish regardless of the stable write requirement of the backing device.
Link: https://lkml.kernel.org/r/20240131145657.4209-1-konishi.ryusuke@gmail.com
Fixes: 1d1d1a767206 ("mm: only enforce stable page writes if the backing device requires it")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Reported-by: syzbot+ee2ae68da3b22d04cd8d(a)syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/00000000000047d819061004ad6c@google.com
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/file.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
--- a/fs/nilfs2/file.c~nilfs2-fix-hang-in-nilfs_lookup_dirty_data_buffers
+++ a/fs/nilfs2/file.c
@@ -107,7 +107,13 @@ static vm_fault_t nilfs_page_mkwrite(str
nilfs_transaction_commit(inode->i_sb);
mapped:
- folio_wait_stable(folio);
+ /*
+ * Since checksumming including data blocks is performed to determine
+ * the validity of the log to be written and used for recovery, it is
+ * necessary to wait for writeback to finish here, regardless of the
+ * stable write requirement of the backing device.
+ */
+ folio_wait_writeback(folio);
out:
sb_end_pagefault(inode->i_sb);
return vmf_fs_error(ret);
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
nilfs2-convert-segment-buffer-to-use-kmap_local.patch
nilfs2-convert-nilfs_copy_buffer-to-use-kmap_local.patch
nilfs2-convert-metadata-file-common-code-to-use-kmap_local.patch
nilfs2-convert-sufile-to-use-kmap_local.patch
nilfs2-convert-persistent-object-allocator-to-use-kmap_local.patch
nilfs2-convert-dat-to-use-kmap_local.patch
nilfs2-move-nilfs_bmap_write-call-out-of-nilfs_write_inode_common.patch
nilfs2-do-not-acquire-rwsem-in-nilfs_bmap_write.patch
nilfs2-convert-ifile-to-use-kmap_local.patch
nilfs2-localize-highmem-mapping-for-checkpoint-creation-within-cpfile.patch
nilfs2-localize-highmem-mapping-for-checkpoint-finalization-within-cpfile.patch
nilfs2-localize-highmem-mapping-for-checkpoint-reading-within-cpfile.patch
nilfs2-remove-nilfs_cpfile_getput_checkpoint.patch
nilfs2-convert-cpfile-to-use-kmap_local.patch
The quilt patch titled
Subject: arch/arm/mm: fix major fault accounting when retrying under per-VMA lock
has been removed from the -mm tree. Its filename was
arch-arm-mm-fix-major-fault-accounting-when-retrying-under-per-vma-lock.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Suren Baghdasaryan <surenb(a)google.com>
Subject: arch/arm/mm: fix major fault accounting when retrying under per-VMA lock
Date: Mon, 22 Jan 2024 22:43:05 -0800
The change [1] missed ARM architecture when fixing major fault accounting
for page fault retry under per-VMA lock.
The user-visible effects is that it restores correct major fault
accounting that was broken after [2] was merged in 6.7 kernel. The
more detailed description is in [3] and this patch simply adds the
same fix to ARM architecture which I missed in [3].
Add missing code to fix ARM architecture fault accounting.
[1] 46e714c729c8 ("arch/mm/fault: fix major fault accounting when retrying under per-VMA lock")
[2] https://lore.kernel.org/all/20231006195318.4087158-6-willy@infradead.org/
[3] https://lore.kernel.org/all/20231226214610.109282-1-surenb@google.com/
Link: https://lkml.kernel.org/r/20240123064305.2829244-1-surenb@google.com
Fixes: 12214eba1992 ("mm: handle read faults under the VMA lock")
Reported-by: Russell King (Oracle) <rmk+kernel(a)armlinux.org.uk>
Signed-off-by: Suren Baghdasaryan <surenb(a)google.com>
Cc: Alexander Gordeev <agordeev(a)linux.ibm.com>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Michael Ellerman <mpe(a)ellerman.id.au>
Cc: Palmer Dabbelt <palmer(a)dabbelt.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Will Deacon <will(a)kernel.org>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
arch/arm/mm/fault.c | 2 ++
1 file changed, 2 insertions(+)
--- a/arch/arm/mm/fault.c~arch-arm-mm-fix-major-fault-accounting-when-retrying-under-per-vma-lock
+++ a/arch/arm/mm/fault.c
@@ -298,6 +298,8 @@ do_page_fault(unsigned long addr, unsign
goto done;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
+ if (fault & VM_FAULT_MAJOR)
+ flags |= FAULT_FLAG_TRIED;
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
_
Patches currently in -mm which might be from surenb(a)google.com are
userfaultfd-handle-zeropage-moves-by-uffdio_move.patch
The quilt patch titled
Subject: nilfs2: fix data corruption in dsync block recovery for small block sizes
has been removed from the -mm tree. Its filename was
nilfs2-fix-data-corruption-in-dsync-block-recovery-for-small-block-sizes.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Subject: nilfs2: fix data corruption in dsync block recovery for small block sizes
Date: Wed, 24 Jan 2024 21:19:36 +0900
The helper function nilfs_recovery_copy_block() of
nilfs_recovery_dsync_blocks(), which recovers data from logs created by
data sync writes during a mount after an unclean shutdown, incorrectly
calculates the on-page offset when copying repair data to the file's page
cache. In environments where the block size is smaller than the page
size, this flaw can cause data corruption and leak uninitialized memory
bytes during the recovery process.
Fix these issues by correcting this byte offset calculation on the page.
Link: https://lkml.kernel.org/r/20240124121936.10575-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Tested-by: Ryusuke Konishi <konishi.ryusuke(a)gmail.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/nilfs2/recovery.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
--- a/fs/nilfs2/recovery.c~nilfs2-fix-data-corruption-in-dsync-block-recovery-for-small-block-sizes
+++ a/fs/nilfs2/recovery.c
@@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_rec
static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
struct nilfs_recovery_block *rb,
- struct page *page)
+ loff_t pos, struct page *page)
{
struct buffer_head *bh_org;
+ size_t from = pos & ~PAGE_MASK;
void *kaddr;
bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
@@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(str
return -EIO;
kaddr = kmap_atomic(page);
- memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
+ memcpy(kaddr + from, bh_org->b_data, bh_org->b_size);
kunmap_atomic(kaddr);
brelse(bh_org);
return 0;
@@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(st
goto failed_inode;
}
- err = nilfs_recovery_copy_block(nilfs, rb, page);
+ err = nilfs_recovery_copy_block(nilfs, rb, pos, page);
if (unlikely(err))
goto failed_page;
_
Patches currently in -mm which might be from konishi.ryusuke(a)gmail.com are
nilfs2-convert-segment-buffer-to-use-kmap_local.patch
nilfs2-convert-nilfs_copy_buffer-to-use-kmap_local.patch
nilfs2-convert-metadata-file-common-code-to-use-kmap_local.patch
nilfs2-convert-sufile-to-use-kmap_local.patch
nilfs2-convert-persistent-object-allocator-to-use-kmap_local.patch
nilfs2-convert-dat-to-use-kmap_local.patch
nilfs2-move-nilfs_bmap_write-call-out-of-nilfs_write_inode_common.patch
nilfs2-do-not-acquire-rwsem-in-nilfs_bmap_write.patch
nilfs2-convert-ifile-to-use-kmap_local.patch
nilfs2-localize-highmem-mapping-for-checkpoint-creation-within-cpfile.patch
nilfs2-localize-highmem-mapping-for-checkpoint-finalization-within-cpfile.patch
nilfs2-localize-highmem-mapping-for-checkpoint-reading-within-cpfile.patch
nilfs2-remove-nilfs_cpfile_getput_checkpoint.patch
nilfs2-convert-cpfile-to-use-kmap_local.patch
The quilt patch titled
Subject: exit: wait_task_zombie: kill the no longer necessary spin_lock_irq(siglock)
has been removed from the -mm tree. Its filename was
exit-wait_task_zombie-kill-the-no-longer-necessary-spin_lock_irqsiglock.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Oleg Nesterov <oleg(a)redhat.com>
Subject: exit: wait_task_zombie: kill the no longer necessary spin_lock_irq(siglock)
Date: Tue, 23 Jan 2024 16:34:00 +0100
After the recent changes nobody use siglock to read the values protected
by stats_lock, we can kill spin_lock_irq(¤t->sighand->siglock) and
update the comment.
With this patch only __exit_signal() and thread_group_start_cputime() take
stats_lock under siglock.
Link: https://lkml.kernel.org/r/20240123153359.GA21866@redhat.com
Signed-off-by: Oleg Nesterov <oleg(a)redhat.com>
Signed-off-by: Dylan Hatch <dylanbhatch(a)google.com>
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/exit.c | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
--- a/kernel/exit.c~exit-wait_task_zombie-kill-the-no-longer-necessary-spin_lock_irqsiglock
+++ a/kernel/exit.c
@@ -1127,17 +1127,14 @@ static int wait_task_zombie(struct wait_
* and nobody can change them.
*
* psig->stats_lock also protects us from our sub-threads
- * which can reap other children at the same time. Until
- * we change k_getrusage()-like users to rely on this lock
- * we have to take ->siglock as well.
+ * which can reap other children at the same time.
*
* We use thread_group_cputime_adjusted() to get times for
* the thread group, which consolidates times for all threads
* in the group including the group leader.
*/
thread_group_cputime_adjusted(p, &tgutime, &tgstime);
- spin_lock_irq(¤t->sighand->siglock);
- write_seqlock(&psig->stats_lock);
+ write_seqlock_irq(&psig->stats_lock);
psig->cutime += tgutime + sig->cutime;
psig->cstime += tgstime + sig->cstime;
psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
@@ -1160,8 +1157,7 @@ static int wait_task_zombie(struct wait_
psig->cmaxrss = maxrss;
task_io_accounting_add(&psig->ioac, &p->ioac);
task_io_accounting_add(&psig->ioac, &sig->ioac);
- write_sequnlock(&psig->stats_lock);
- spin_unlock_irq(¤t->sighand->siglock);
+ write_sequnlock_irq(&psig->stats_lock);
}
if (wo->wo_rusage)
_
Patches currently in -mm which might be from oleg(a)redhat.com are
ptrace_attach-shift-sendsigstop-into-ptrace_set_stopped.patch
The quilt patch titled
Subject: fs/proc: do_task_stat: move thread_group_cputime_adjusted() outside of lock_task_sighand()
has been removed from the -mm tree. Its filename was
fs-proc-do_task_stat-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Oleg Nesterov <oleg(a)redhat.com>
Subject: fs/proc: do_task_stat: move thread_group_cputime_adjusted() outside of lock_task_sighand()
Date: Tue, 23 Jan 2024 16:33:55 +0100
Patch series "fs/proc: do_task_stat: use sig->stats_".
do_task_stat() has the same problem as getrusage() had before "getrusage:
use sig->stats_lock rather than lock_task_sighand()": a hard lockup. If
NR_CPUS threads call lock_task_sighand() at the same time and the process
has NR_THREADS, spin_lock_irq will spin with irqs disabled O(NR_CPUS *
NR_THREADS) time.
This patch (of 3):
thread_group_cputime() does its own locking, we can safely shift
thread_group_cputime_adjusted() which does another for_each_thread loop
outside of ->siglock protected section.
Not only this removes for_each_thread() from the critical section with
irqs disabled, this removes another case when stats_lock is taken with
siglock held. We want to remove this dependency, then we can change the
users of stats_lock to not disable irqs.
Link: https://lkml.kernel.org/r/20240123153313.GA21832@redhat.com
Link: https://lkml.kernel.org/r/20240123153355.GA21854@redhat.com
Signed-off-by: Oleg Nesterov <oleg(a)redhat.com>
Signed-off-by: Dylan Hatch <dylanbhatch(a)google.com>
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/array.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
--- a/fs/proc/array.c~fs-proc-do_task_stat-move-thread_group_cputime_adjusted-outside-of-lock_task_sighand
+++ a/fs/proc/array.c
@@ -511,7 +511,7 @@ static int do_task_stat(struct seq_file
sigemptyset(&sigign);
sigemptyset(&sigcatch);
- cutime = cstime = utime = stime = 0;
+ cutime = cstime = 0;
cgtime = gtime = 0;
if (lock_task_sighand(task, &flags)) {
@@ -546,7 +546,6 @@ static int do_task_stat(struct seq_file
min_flt += sig->min_flt;
maj_flt += sig->maj_flt;
- thread_group_cputime_adjusted(task, &utime, &stime);
gtime += sig->gtime;
if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED))
@@ -562,10 +561,13 @@ static int do_task_stat(struct seq_file
if (permitted && (!whole || num_threads < 2))
wchan = !task_is_running(task);
- if (!whole) {
+
+ if (whole) {
+ thread_group_cputime_adjusted(task, &utime, &stime);
+ } else {
+ task_cputime_adjusted(task, &utime, &stime);
min_flt = task->min_flt;
maj_flt = task->maj_flt;
- task_cputime_adjusted(task, &utime, &stime);
gtime = task_gtime(task);
}
_
Patches currently in -mm which might be from oleg(a)redhat.com are
ptrace_attach-shift-sendsigstop-into-ptrace_set_stopped.patch
The quilt patch titled
Subject: getrusage: use sig->stats_lock rather than lock_task_sighand()
has been removed from the -mm tree. Its filename was
getrusage-use-sig-stats_lock-rather-than-lock_task_sighand.patch
This patch was dropped because it was merged into the mm-hotfixes-stable branch
of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
------------------------------------------------------
From: Oleg Nesterov <oleg(a)redhat.com>
Subject: getrusage: use sig->stats_lock rather than lock_task_sighand()
Date: Mon, 22 Jan 2024 16:50:53 +0100
lock_task_sighand() can trigger a hard lockup. If NR_CPUS threads call
getrusage() at the same time and the process has NR_THREADS, spin_lock_irq
will spin with irqs disabled O(NR_CPUS * NR_THREADS) time.
Change getrusage() to use sig->stats_lock, it was specifically designed
for this type of use. This way it runs lockless in the likely case.
TODO:
- Change do_task_stat() to use sig->stats_lock too, then we can
remove spin_lock_irq(siglock) in wait_task_zombie().
- Turn sig->stats_lock into seqcount_rwlock_t, this way the
readers in the slow mode won't exclude each other. See
https://lore.kernel.org/all/20230913154907.GA26210@redhat.com/
- stats_lock has to disable irqs because ->siglock can be taken
in irq context, it would be very nice to change __exit_signal()
to avoid the siglock->stats_lock dependency.
Link: https://lkml.kernel.org/r/20240122155053.GA26214@redhat.com
Signed-off-by: Oleg Nesterov <oleg(a)redhat.com>
Reported-by: Dylan Hatch <dylanbhatch(a)google.com>
Tested-by: Dylan Hatch <dylanbhatch(a)google.com>
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
kernel/sys.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
--- a/kernel/sys.c~getrusage-use-sig-stats_lock-rather-than-lock_task_sighand
+++ a/kernel/sys.c
@@ -1788,7 +1788,9 @@ void getrusage(struct task_struct *p, in
unsigned long maxrss;
struct mm_struct *mm;
struct signal_struct *sig = p->signal;
+ unsigned int seq = 0;
+retry:
memset(r, 0, sizeof(*r));
utime = stime = 0;
maxrss = 0;
@@ -1800,8 +1802,7 @@ void getrusage(struct task_struct *p, in
goto out_thread;
}
- if (!lock_task_sighand(p, &flags))
- return;
+ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
switch (who) {
case RUSAGE_BOTH:
@@ -1829,14 +1830,23 @@ void getrusage(struct task_struct *p, in
r->ru_oublock += sig->oublock;
if (maxrss < sig->maxrss)
maxrss = sig->maxrss;
+
+ rcu_read_lock();
__for_each_thread(sig, t)
accumulate_thread_rusage(t, r);
+ rcu_read_unlock();
+
break;
default:
BUG();
}
- unlock_task_sighand(p, &flags);
+
+ if (need_seqretry(&sig->stats_lock, seq)) {
+ seq = 1;
+ goto retry;
+ }
+ done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
if (who == RUSAGE_CHILDREN)
goto out_children;
_
Patches currently in -mm which might be from oleg(a)redhat.com are
ptrace_attach-shift-sendsigstop-into-ptrace_set_stopped.patch
From: Baokun Li <libaokun1(a)huawei.com>
[ Upstream commit 4530b3660d396a646aad91a787b6ab37cf604b53 ]
Determine if the group block bitmap is corrupted before using ac_b_ex in
ext4_mb_try_best_found() to avoid allocating blocks from a group with a
corrupted block bitmap in the following concurrency and making the
situation worse.
ext4_mb_regular_allocator
ext4_lock_group(sb, group)
ext4_mb_good_group
// check if the group bbitmap is corrupted
ext4_mb_complex_scan_group
// Scan group gets ac_b_ex but doesn't use it
ext4_unlock_group(sb, group)
ext4_mark_group_bitmap_corrupted(group)
// The block bitmap was corrupted during
// the group unlock gap.
ext4_mb_try_best_found
ext4_lock_group(ac->ac_sb, group)
ext4_mb_use_best_found
mb_mark_used
// Allocating blocks in block bitmap corrupted group
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20240104142040.2835097-7-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/ext4/mballoc.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8875fac9f958..cf034a38e8ba 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1802,6 +1802,9 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
return err;
ext4_lock_group(ac->ac_sb, group);
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+ goto out;
+
max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
if (max > 0) {
@@ -1809,6 +1812,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
ext4_mb_use_best_found(ac, e4b);
}
+out:
ext4_unlock_group(ac->ac_sb, group);
ext4_mb_unload_buddy(e4b);
--
2.43.0
From: Baokun Li <libaokun1(a)huawei.com>
[ Upstream commit 4530b3660d396a646aad91a787b6ab37cf604b53 ]
Determine if the group block bitmap is corrupted before using ac_b_ex in
ext4_mb_try_best_found() to avoid allocating blocks from a group with a
corrupted block bitmap in the following concurrency and making the
situation worse.
ext4_mb_regular_allocator
ext4_lock_group(sb, group)
ext4_mb_good_group
// check if the group bbitmap is corrupted
ext4_mb_complex_scan_group
// Scan group gets ac_b_ex but doesn't use it
ext4_unlock_group(sb, group)
ext4_mark_group_bitmap_corrupted(group)
// The block bitmap was corrupted during
// the group unlock gap.
ext4_mb_try_best_found
ext4_lock_group(ac->ac_sb, group)
ext4_mb_use_best_found
mb_mark_used
// Allocating blocks in block bitmap corrupted group
Signed-off-by: Baokun Li <libaokun1(a)huawei.com>
Reviewed-by: Jan Kara <jack(a)suse.cz>
Link: https://lore.kernel.org/r/20240104142040.2835097-7-libaokun1@huawei.com
Signed-off-by: Theodore Ts'o <tytso(a)mit.edu>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
fs/ext4/mballoc.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 9e5aa625ab30..c1af95898aa4 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1802,6 +1802,9 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
return err;
ext4_lock_group(ac->ac_sb, group);
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+ goto out;
+
max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
if (max > 0) {
@@ -1809,6 +1812,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
ext4_mb_use_best_found(ac, e4b);
}
+out:
ext4_unlock_group(ac->ac_sb, group);
ext4_mb_unload_buddy(e4b);
--
2.43.0
In the RISC-V specification, the stimecmp register doesn't have a default
value. To prevent the timer interrupt from being triggered during timer
initialization, clear the timer interrupt by writing stimecmp with a
maximum value.
Fixes: 9f7a8ff6391f ("RISC-V: Prefer sstc extension if available")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Ley Foon Tan <leyfoon.tan(a)starfivetech.com>
---
v2:
Resolved comments from Anup.
- Moved riscv_clock_event_stop() to riscv_timer_starting_cpu().
- Added Fixes tag
---
drivers/clocksource/timer-riscv.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c
index e66dcbd66566..672669eb7281 100644
--- a/drivers/clocksource/timer-riscv.c
+++ b/drivers/clocksource/timer-riscv.c
@@ -116,6 +116,9 @@ static int riscv_timer_starting_cpu(unsigned int cpu)
ce->rating = 450;
clockevents_config_and_register(ce, riscv_timebase, 100, 0x7fffffff);
+ /* Clear timer interrupt */
+ riscv_clock_event_stop();
+
enable_percpu_irq(riscv_clock_event_irq,
irq_get_trigger_type(riscv_clock_event_irq));
return 0;
--
2.43.0
Hello,
I am sending this patch for inclusion in the stable tree, as it fixes
a critical stack-out-of-bounds bug in the cifs module related to the
`smb2_set_next_command()` function.
Problem Summary:
A problem was observed in the `statfs` system call for cifs, where it
failed with a "Resource temporarily unavailable" message. Further
investigation with KASAN revealed a stack-out-of-bounds error. The
root cause was a miscalculation of the size of the `smb2_query_info_req`
structure in the `SMB2_query_info_init()` function.
This situation arose due to a dependency on a prior commit
(`eb3e28c1e89b`) that replaced a 1-element array with a flexible
array member in the `smb2_query_info_req` structure. This commit was
not backported to the 5.10.y and 5.15.y stable branch, leading to an
incorrect size calculation after the backport of commit `33eae65c6f49`.
Fix Details:
The patch corrects the size calculation to ensure the correct length
is used when initializing the `smb2_query_info_req` structure. It has
been tested and confirmed to resolve the issue without introducing
any regressions.
Maybe the prior commit eb3e28c1e89b ("smb3: Replace smb2pdu 1-element
arrays with flex-arrays") should be backported to solve this problem
directly. The patch does not seem to conflict.
Best regards,
ZhaoLong Wang
ZhaoLong Wang (1):
cifs: Fix stack-out-of-bounds in smb2_set_next_command()
fs/cifs/smb2pdu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--
2.39.2
From: Chengming Zhou <zhouchengming(a)bytedance.com>
We may encounter duplicate entry in the zswap_store():
1. swap slot that freed to per-cpu swap cache, doesn't invalidate
the zswap entry, then got reused. This has been fixed.
2. !exclusive load mode, swapin folio will leave its zswap entry
on the tree, then swapout again. This has been removed.
3. one folio can be dirtied again after zswap_store(), so need to
zswap_store() again. This should be handled correctly.
So we must invalidate the old duplicate entry before insert the
new one, which actually doesn't have to be done at the beginning
of zswap_store(). And this is a normal situation, we shouldn't
WARN_ON(1) in this case, so delete it. (The WARN_ON(1) seems want
to detect swap entry UAF problem? But not very necessary here.)
The good point is that we don't need to lock tree twice in the
store success path.
Note we still need to invalidate the old duplicate entry in the
store failure path, otherwise the new data in swapfile could be
overwrite by the old data in zswap pool when lru writeback.
We have to do this even when !zswap_enabled since zswap can be
disabled anytime. If the folio store success before, then got
dirtied again but zswap disabled, we won't invalidate the old
duplicate entry in the zswap_store(). So later lru writeback
may overwrite the new data in swapfile.
Fixes: 42c06a0e8ebe ("mm: kill frontswap")
Cc: <stable(a)vger.kernel.org>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Acked-by: Yosry Ahmed <yosryahmed(a)google.com>
Signed-off-by: Chengming Zhou <zhouchengming(a)bytedance.com>
---
v3:
- Fix a few grammatical problems in comments, per Yosry.
v2:
- Change the duplicate entry invalidation loop to if, since we hold
the lock, we won't find it once we invalidate it, per Yosry.
- Add Fixes tag.
---
mm/zswap.c | 33 ++++++++++++++++-----------------
1 file changed, 16 insertions(+), 17 deletions(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index cd67f7f6b302..d9d8947d6761 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1518,18 +1518,8 @@ bool zswap_store(struct folio *folio)
return false;
if (!zswap_enabled)
- return false;
+ goto check_old;
- /*
- * If this is a duplicate, it must be removed before attempting to store
- * it, otherwise, if the store fails the old page won't be removed from
- * the tree, and it might be written back overriding the new data.
- */
- spin_lock(&tree->lock);
- entry = zswap_rb_search(&tree->rbroot, offset);
- if (entry)
- zswap_invalidate_entry(tree, entry);
- spin_unlock(&tree->lock);
objcg = get_obj_cgroup_from_folio(folio);
if (objcg && !obj_cgroup_may_zswap(objcg)) {
memcg = get_mem_cgroup_from_objcg(objcg);
@@ -1608,14 +1598,12 @@ bool zswap_store(struct folio *folio)
/* map */
spin_lock(&tree->lock);
/*
- * A duplicate entry should have been removed at the beginning of this
- * function. Since the swap entry should be pinned, if a duplicate is
- * found again here it means that something went wrong in the swap
- * cache.
+ * The folio may have been dirtied again, invalidate the
+ * possibly stale entry before inserting the new entry.
*/
- while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) {
- WARN_ON(1);
+ if (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) {
zswap_invalidate_entry(tree, dupentry);
+ VM_WARN_ON(zswap_rb_insert(&tree->rbroot, entry, &dupentry));
}
if (entry->length) {
INIT_LIST_HEAD(&entry->lru);
@@ -1638,6 +1626,17 @@ bool zswap_store(struct folio *folio)
reject:
if (objcg)
obj_cgroup_put(objcg);
+check_old:
+ /*
+ * If the zswap store fails or zswap is disabled, we must invalidate the
+ * possibly stale entry which was previously stored at this offset.
+ * Otherwise, writeback could overwrite the new data in the swapfile.
+ */
+ spin_lock(&tree->lock);
+ entry = zswap_rb_search(&tree->rbroot, offset);
+ if (entry)
+ zswap_invalidate_entry(tree, entry);
+ spin_unlock(&tree->lock);
return false;
shrink:
--
2.40.1
There are reports that since version 6.7 update-grub fails to find the
device of the root on systems without initrd and on a single device.
This looks like the device name changed in the output of
/proc/self/mountinfo:
6.5-rc5 working
18 1 0:16 / / rw,noatime - btrfs /dev/sda8 ...
6.7 not working:
17 1 0:15 / / rw,noatime - btrfs /dev/root ...
and "update-grub" shows this error:
/usr/sbin/grub-probe: error: cannot find a device for / (is /dev mounted?)
This looks like it's related to the device name, but grub-probe
recognizes the "/dev/root" path and tries to find the underlying device.
However there's a special case for some filesystems, for btrfs in
particular.
The generic root device detection heuristic is not done and it all
relies on reading the device infos by a btrfs specific ioctl. This ioctl
returns the device name as it was saved at the time of device scan (in
this case it's /dev/root).
The change in 6.7 for temp_fsid to allow several single device
filesystem to exist with the same fsid (and transparently generate a new
UUID at mount time) was to skip caching/registering such devices.
This also skipped mounted device. One step of scanning is to check if
the device name hasn't changed, and if yes then update the cached value.
This broke the grub-probe as it always read the device /dev/root and
couldn't find it in the system. A temporary workaround is to create a
symlink but this does not survive reboot.
The right fix is to allow updating the device path of a mounted
filesystem even if this is a single device one. This does not affect the
temp_fsid feature, the UUID of the mounted filesystem remains the same
and the matching is based on device major:minor which is unique per
mounted filesystem.
As the main part of device scanning and list update is done in
device_list_add() that handles all corner cases and locking, it is
extended to take a parameter that tells it to do everything as before,
except adding a new device entry.
This covers the path when the device (that exists for all mounted
devices) name changes, updating /dev/root to /dev/sdx. Any other single
device with filesystem is skipped.
Note that if a system is booted and initial mount is done on the
/dev/root device, this will be the cached name of the device. Only after
the command "btrfs device rescan" it will change as it triggers the
rename.
The fix was verified by users whose systems were affected.
CC: stable(a)vger.kernel.org # 6.7+
Fixes: bc27d6f0aa0e ("btrfs: scan but don't register device on single device filesystem")
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=218353
Link: https://lore.kernel.org/lkml/CAKLYgeJ1tUuqLcsquwuFqjDXPSJpEiokrWK2gisPKDZLs…
Signed-off-by: David Sterba <dsterba(a)suse.com>
---
fs/btrfs/volumes.c | 30 ++++++++++++++----------------
1 file changed, 14 insertions(+), 16 deletions(-)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 474ab7ed65ea..f2c2f7ca5c3d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -738,6 +738,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
bool same_fsid_diff_dev = false;
bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
+ bool can_create_new = *new_device_added;
if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID_V2) {
btrfs_err(NULL,
@@ -753,6 +754,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
return ERR_PTR(error);
}
+ *new_device_added = false;
fs_devices = find_fsid_by_device(disk_super, path_devt, &same_fsid_diff_dev);
if (!fs_devices) {
@@ -804,6 +806,15 @@ static noinline struct btrfs_device *device_list_add(const char *path,
return ERR_PTR(-EBUSY);
}
+ if (!can_create_new) {
+ pr_info(
+ "BTRFS: device fsid %pU devid %llu transid %llu %s skip registration scanned by %s (%d)\n",
+ disk_super->fsid, devid, found_transid, path,
+ current->comm, task_pid_nr(current));
+ mutex_unlock(&fs_devices->device_list_mutex);
+ return NULL;
+ }
+
nofs_flag = memalloc_nofs_save();
device = btrfs_alloc_device(NULL, &devid,
disk_super->dev_item.uuid, path);
@@ -1355,27 +1366,14 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
goto error_bdev_put;
}
- if (!mount_arg_dev && btrfs_super_num_devices(disk_super) == 1 &&
- !(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING)) {
- dev_t devt;
-
- ret = lookup_bdev(path, &devt);
- if (ret)
- btrfs_warn(NULL, "lookup bdev failed for path %s: %d",
- path, ret);
- else
- btrfs_free_stale_devices(devt, NULL);
-
- pr_debug("BTRFS: skip registering single non-seed device %s\n", path);
- device = NULL;
- goto free_disk_super;
- }
+ if (mount_arg_dev || btrfs_super_num_devices(disk_super) != 1 ||
+ (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING))
+ new_device_added = true;
device = device_list_add(path, disk_super, &new_device_added);
if (!IS_ERR(device) && new_device_added)
btrfs_free_stale_devices(device->devt, device);
-free_disk_super:
btrfs_release_disk_super(disk_super);
error_bdev_put:
--
2.42.1
From: Chengming Zhou <zhouchengming(a)bytedance.com>
We may encounter duplicate entry in the zswap_store():
1. swap slot that freed to per-cpu swap cache, doesn't invalidate
the zswap entry, then got reused. This has been fixed.
2. !exclusive load mode, swapin folio will leave its zswap entry
on the tree, then swapout again. This has been removed.
3. one folio can be dirtied again after zswap_store(), so need to
zswap_store() again. This should be handled correctly.
So we must invalidate the old duplicate entry before insert the
new one, which actually doesn't have to be done at the beginning
of zswap_store(). And this is a normal situation, we shouldn't
WARN_ON(1) in this case, so delete it. (The WARN_ON(1) seems want
to detect swap entry UAF problem? But not very necessary here.)
The good point is that we don't need to lock tree twice in the
store success path.
Note we still need to invalidate the old duplicate entry in the
store failure path, otherwise the new data in swapfile could be
overwrite by the old data in zswap pool when lru writeback.
We have to do this even when !zswap_enabled since zswap can be
disabled anytime. If the folio store success before, then got
dirtied again but zswap disabled, we won't invalidate the old
duplicate entry in the zswap_store(). So later lru writeback
may overwrite the new data in swapfile.
Fixes: 42c06a0e8ebe ("mm: kill frontswap")
Cc: <stable(a)vger.kernel.org>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Signed-off-by: Chengming Zhou <zhouchengming(a)bytedance.com>
---
v2:
- Change the duplicate entry invalidation loop to if, since we hold
the lock, we won't find it once we invalidate it, per Yosry.
- Add Fixes tag.
---
mm/zswap.c | 33 ++++++++++++++++-----------------
1 file changed, 16 insertions(+), 17 deletions(-)
diff --git a/mm/zswap.c b/mm/zswap.c
index cd67f7f6b302..6c1466633274 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1518,18 +1518,8 @@ bool zswap_store(struct folio *folio)
return false;
if (!zswap_enabled)
- return false;
+ goto check_old;
- /*
- * If this is a duplicate, it must be removed before attempting to store
- * it, otherwise, if the store fails the old page won't be removed from
- * the tree, and it might be written back overriding the new data.
- */
- spin_lock(&tree->lock);
- entry = zswap_rb_search(&tree->rbroot, offset);
- if (entry)
- zswap_invalidate_entry(tree, entry);
- spin_unlock(&tree->lock);
objcg = get_obj_cgroup_from_folio(folio);
if (objcg && !obj_cgroup_may_zswap(objcg)) {
memcg = get_mem_cgroup_from_objcg(objcg);
@@ -1608,14 +1598,12 @@ bool zswap_store(struct folio *folio)
/* map */
spin_lock(&tree->lock);
/*
- * A duplicate entry should have been removed at the beginning of this
- * function. Since the swap entry should be pinned, if a duplicate is
- * found again here it means that something went wrong in the swap
- * cache.
+ * The folio could be dirtied again, invalidate the possible old entry
+ * before insert this new entry.
*/
- while (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) {
- WARN_ON(1);
+ if (zswap_rb_insert(&tree->rbroot, entry, &dupentry) == -EEXIST) {
zswap_invalidate_entry(tree, dupentry);
+ VM_WARN_ON(zswap_rb_insert(&tree->rbroot, entry, &dupentry));
}
if (entry->length) {
INIT_LIST_HEAD(&entry->lru);
@@ -1638,6 +1626,17 @@ bool zswap_store(struct folio *folio)
reject:
if (objcg)
obj_cgroup_put(objcg);
+check_old:
+ /*
+ * If zswap store fail or zswap disabled, we must invalidate possible
+ * old entry which previously stored by this folio. Otherwise, later
+ * writeback could overwrite the new data in swapfile.
+ */
+ spin_lock(&tree->lock);
+ entry = zswap_rb_search(&tree->rbroot, offset);
+ if (entry)
+ zswap_invalidate_entry(tree, entry);
+ spin_unlock(&tree->lock);
return false;
shrink:
--
2.40.1
Dear stable kernel maintainers,
I am writing to request that 3 related patches be merged to various LTS kernels. I'm not sure if it would have
been preferable for me to send 3 separate emails, so please forgive me if I chose wrongly. (This is my first foray
into interacting with the kernel community) :)
The patches are as follows:
1. 0cd3be51733f (HID: apple: Add support for the 2021 Magic Keyboard, 2021-10-08)
2. 346338ef00d3 (HID: apple: Swap the Fn and Left Control keys on Apple keyboards, 2020-05-15)
3. 531cb56972f2 (HID: apple: Add 2021 magic keyboard FN key mapping, 2021-11-08)
These patches have all been merged to mainline, but I believe when they were submitted, backporting may not have been considered. The Apple Magic Keyboard 2021 (Model # A2450) seems to be a popular keyboard, and without these
patches, for users on certain LTS kernels that use this keyboard, the function keys do not behave as expected. e.g. Pressing the brightness down or brightness up key didn't work, and bizarrely pressing the globe/Fn key alone caused the brightness to decrease. None of the top row keys worked as expected.
I checked to see where the patches were missing and figured that it would be good to have those patches in those
kernels.
I would ask that patches 1 & 3 be merged to v4.19, v5.4, v5.10, and v5.15.
I would ask that patch 2 be merged to: v5.4 and v4.19.
For patch 3 to apply cleanly, it needed patch 2 to be present in the tree.
Thanks,
--
Aseda Aboagye
On x86 each cpu_hw_events maintains a table for counter assignment but
it missed to update one for the deleted event in x86_pmu_del(). This
can make perf_clear_dirty_counters() reset used counter if it's called
before event scheduling or enabling. Then it would return out of range
data which doesn't make sense.
The following code can reproduce the problem.
$ cat repro.c
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <linux/perf_event.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/syscall.h>
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
.disabled = 1,
};
void *worker(void *arg)
{
int cpu = (long)arg;
int fd1 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
int fd2 = syscall(SYS_perf_event_open, &attr, -1, cpu, -1, 0);
void *p;
do {
ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
p = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd1, 0);
ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
munmap(p, 4096);
ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
} while (1);
return NULL;
}
int main(void)
{
int i;
int n = sysconf(_SC_NPROCESSORS_ONLN);
pthread_t *th = calloc(n, sizeof(*th));
for (i = 0; i < n; i++)
pthread_create(&th[i], NULL, worker, (void *)(long)i);
for (i = 0; i < n; i++)
pthread_join(th[i], NULL);
free(th);
return 0;
}
And you can see the out of range data using perf stat like this.
Probably it'd be easier to see on a large machine.
$ gcc -o repro repro.c -pthread
$ ./repro &
$ sudo perf stat -A -I 1000 2>&1 | awk '{ if (length($3) > 15) print }'
1.001028462 CPU6 196,719,295,683,763 cycles # 194290.996 GHz (71.54%)
1.001028462 CPU3 396,077,485,787,730 branch-misses # 15804359784.80% of all branches (71.07%)
1.001028462 CPU17 197,608,350,727,877 branch-misses # 14594186554.56% of all branches (71.22%)
2.020064073 CPU4 198,372,472,612,140 cycles # 194681.113 GHz (70.95%)
2.020064073 CPU6 199,419,277,896,696 cycles # 195720.007 GHz (70.57%)
2.020064073 CPU20 198,147,174,025,639 cycles # 194474.654 GHz (71.03%)
2.020064073 CPU20 198,421,240,580,145 stalled-cycles-frontend # 100.14% frontend cycles idle (70.93%)
3.037443155 CPU4 197,382,689,923,416 cycles # 194043.065 GHz (71.30%)
3.037443155 CPU20 196,324,797,879,414 cycles # 193003.773 GHz (71.69%)
3.037443155 CPU5 197,679,956,608,205 stalled-cycles-backend # 1315606428.66% backend cycles idle (71.19%)
3.037443155 CPU5 198,571,860,474,851 instructions # 13215422.58 insn per cycle
It should move the contents in the cpuc->assign as well.
Fixes: 5471eea5d3bf ("perf/x86: Reset the dirty counter to prevent the leak for an RDPMC task")
Cc: Kan Liang <kan.liang(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Namhyung Kim <namhyung(a)kernel.org>
---
arch/x86/events/core.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 09050641ce5d..5b0dd07b1ef1 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
while (++i < cpuc->n_events) {
cpuc->event_list[i-1] = cpuc->event_list[i];
cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+ cpuc->assign[i-1] = cpuc->assign[i];
}
cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
--
2.43.0.472.g3155946c3a-goog
From: Finn Thain <fthain(a)linux-m68k.org>
commit b845b574f86dcb6a70dfa698aa87a237b0878d2a upstream.
On 68030/020, an instruction such as, moveml %a2-%a3/%a5,%sp@- may cause
a stack page fault during instruction execution (i.e. not at an
instruction boundary) and produce a format 0xB exception frame.
In this situation, the value of USP will be unreliable. If a signal is
to be delivered following the exception, this USP value is used to
calculate the location for a signal frame. This can result in a
corrupted user stack.
The corruption was detected in dash (actually in glibc) where it showed
up as an intermittent "stack smashing detected" message and crash
following signal delivery for SIGCHLD.
It was hard to reproduce that failure because delivery of the signal
raced with the page fault and because the kernel places an unpredictable
gap of up to 7 bytes between the USP and the signal frame.
A format 0xB exception frame can be produced by a bus error or an
address error. The 68030 Users Manual says that address errors occur
immediately upon detection during instruction prefetch. The instruction
pipeline allows prefetch to overlap with other instructions, which means
an address error can arise during the execution of a different
instruction. So it seems likely that this patch may help in the address
error case also.
Reported-and-tested-by: Stan Johnson <userm57(a)yahoo.com>
Link: https://lore.kernel.org/all/CAMuHMdW3yD22_ApemzW_6me3adq6A458u1_F0v-1EYwK_6…
Cc: Michael Schmitz <schmitzmic(a)gmail.com>
Cc: Andreas Schwab <schwab(a)linux-m68k.org>
Cc: stable(a)vger.kernel.org
Co-developed-by: Michael Schmitz <schmitzmic(a)gmail.com>
Signed-off-by: Michael Schmitz <schmitzmic(a)gmail.com>
Signed-off-by: Finn Thain <fthain(a)linux-m68k.org>
Reviewed-by: Geert Uytterhoeven <geert(a)linux-m68k.org>
Link: https://lore.kernel.org/r/9e66262a754fcba50208aa424188896cc52a1dd1.16833658…
Signed-off-by: Geert Uytterhoeven <geert(a)linux-m68k.org>
---
arch/m68k/kernel/signal.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 8fb8ee804b3a..de7c1bde62bc 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -808,11 +808,17 @@ static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *
}
static inline void __user *
-get_sigframe(struct ksignal *ksig, size_t frame_size)
+get_sigframe(struct ksignal *ksig, struct pt_regs *tregs, size_t frame_size)
{
unsigned long usp = sigsp(rdusp(), ksig);
+ unsigned long gap = 0;
- return (void __user *)((usp - frame_size) & -8UL);
+ if (CPU_IS_020_OR_030 && tregs->format == 0xb) {
+ /* USP is unreliable so use worst-case value */
+ gap = 256;
+ }
+
+ return (void __user *)((usp - gap - frame_size) & -8UL);
}
static int setup_frame(struct ksignal *ksig, sigset_t *set,
@@ -830,7 +836,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set,
return -EFAULT;
}
- frame = get_sigframe(ksig, sizeof(*frame) + fsize);
+ frame = get_sigframe(ksig, tregs, sizeof(*frame) + fsize);
if (fsize)
err |= copy_to_user (frame + 1, regs + 1, fsize);
@@ -903,7 +909,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
return -EFAULT;
}
- frame = get_sigframe(ksig, sizeof(*frame));
+ frame = get_sigframe(ksig, tregs, sizeof(*frame));
if (fsize)
err |= copy_to_user (&frame->uc.uc_extra, regs + 1, fsize);
--
2.17.1
commit 3f90f9ef2dda316d64e420d5d51ba369587ccc55 upstream.
If 020/030 support is enabled, get_io_area() leaves an IO_SIZE gap
between mappings which is added to the vm_struct representing the
mapping. __ioremap() uses the actual requested size (after alignment),
while __iounmap() is passed the size from the vm_struct.
On 020/030, early termination descriptors are used to set up mappings of
extent 'size', which are validated on unmapping. The unmapped gap of
size IO_SIZE defeats the sanity check of the pmd tables, causing
__iounmap() to loop forever on 030.
On 040/060, unmapping of page table entries does not check for a valid
mapping, so the umapping loop always completes there.
Adjust size to be unmapped by the gap that had been added in the
vm_struct prior.
This fixes the hang in atari_platform_init() reported a long time ago,
and a similar one reported by Finn recently (addressed by removing
ioremap() use from the SWIM driver.
Tested on my Falcon in 030 mode - untested but should work the same on
040/060 (the extra page tables cleared there would never have been set
up anyway).
Signed-off-by: Michael Schmitz <schmitzmic(a)gmail.com>
[geert: Minor commit description improvements]
[geert: This was fixed in 2.4.23, but not in 2.5.x]
Signed-off-by: Geert Uytterhoeven <geert(a)linux-m68k.org>
Cc: stable(a)vger.kernel.org
Cc: <cip-dev(a)lists.cip-project.org> # 4.4
---
arch/m68k/mm/kmap.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index 6e4955bc542b..fcd52cefee29 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -88,7 +88,8 @@ static inline void free_io_area(void *addr)
for (p = &iolist ; (tmp = *p) ; p = &tmp->next) {
if (tmp->addr == addr) {
*p = tmp->next;
- __iounmap(tmp->addr, tmp->size);
+ /* remove gap added in get_io_area() */
+ __iounmap(tmp->addr, tmp->size - IO_SIZE);
kfree(tmp);
return;
}
--
2.17.1
Syzkaller reports NULL pointer dereference issue at skb_segment()
in 5.10/5.15/6.1 stable releases. The problem has been fixed by
the following patch which can be cleanly applied to 5.10/5.15/6.1 branches.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
The detection of dirty-throttled tasks in blk-wbt has been subtly broken
since its beginning in 2016. Namely if we are doing cgroup writeback and
the throttled task is not in the root cgroup, balance_dirty_pages() will
set dirty_sleep for the non-root bdi_writeback structure. However
blk-wbt checks dirty_sleep only in the root cgroup bdi_writeback
structure. Thus detection of recently throttled tasks is not working in
this case (we noticed this when we switched to cgroup v2 and suddently
writeback was slow).
Since blk-wbt has no easy way to get to proper bdi_writeback and
furthermore its intention has always been to work on the whole device
rather than on individual cgroups, just move the dirty_sleep timestamp
from bdi_writeback to backing_dev_info. That fixes the checking for
recently throttled task and saves memory for everybody as a bonus.
CC: stable(a)vger.kernel.org
Fixes: b57d74aff9ab ("writeback: track if we're sleeping on progress in balance_dirty_pages()")
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
block/blk-wbt.c | 4 ++--
include/linux/backing-dev-defs.h | 7 +++++--
mm/backing-dev.c | 2 +-
mm/page-writeback.c | 2 +-
4 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 5ba3cd574eac..0c0e270a8265 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -163,9 +163,9 @@ static void wb_timestamp(struct rq_wb *rwb, unsigned long *var)
*/
static bool wb_recent_wait(struct rq_wb *rwb)
{
- struct bdi_writeback *wb = &rwb->rqos.disk->bdi->wb;
+ struct backing_dev_info *bdi = rwb->rqos.disk->bdi;
- return time_before(jiffies, wb->dirty_sleep + HZ);
+ return time_before(jiffies, bdi->last_bdp_sleep + HZ);
}
static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index ae12696ec492..ad17739a2e72 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -141,8 +141,6 @@ struct bdi_writeback {
struct delayed_work dwork; /* work item used for writeback */
struct delayed_work bw_dwork; /* work item used for bandwidth estimate */
- unsigned long dirty_sleep; /* last wait */
-
struct list_head bdi_node; /* anchored at bdi->wb_list */
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -179,6 +177,11 @@ struct backing_dev_info {
* any dirty wbs, which is depended upon by bdi_has_dirty().
*/
atomic_long_t tot_write_bandwidth;
+ /*
+ * Jiffies when last process was dirty throttled on this bdi. Used by
+ * blk-wbt.
+ */
+ unsigned long last_bdp_sleep;
struct bdi_writeback wb; /* the root writeback info for this bdi */
struct list_head wb_list; /* list of all wbs */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1e3447bccdb1..e039d05304dd 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -436,7 +436,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
INIT_LIST_HEAD(&wb->work_list);
INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn);
- wb->dirty_sleep = jiffies;
err = fprop_local_init_percpu(&wb->completions, gfp);
if (err)
@@ -921,6 +920,7 @@ int bdi_init(struct backing_dev_info *bdi)
INIT_LIST_HEAD(&bdi->bdi_list);
INIT_LIST_HEAD(&bdi->wb_list);
init_waitqueue_head(&bdi->wb_waitq);
+ bdi->last_bdp_sleep = jiffies;
return cgwb_bdi_init(bdi);
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index cd4e4ae77c40..cc37fa7f3364 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1921,7 +1921,7 @@ static int balance_dirty_pages(struct bdi_writeback *wb,
break;
}
__set_current_state(TASK_KILLABLE);
- wb->dirty_sleep = now;
+ bdi->last_bdp_sleep = jiffies;
io_schedule_timeout(pause);
current->dirty_paused_when = now + pause;
--
2.35.3
ATTENTION!
Before applying this patch a conflict patch in the queue needs to be removed:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git/tre…
Describe bug:
After mounting a remote cifs resource, it becomes unavailable:
df: /mnt/sambashare: Resource temporarily unavailable
It was tested on the following Linux kernel:
Linux altlinux 5.15.148
The error appeared starting from kernel 5.15.147 after adding the commit [1] "smb: client: fix OOB in SMB2_query_info_init()", in which the buffer length increases by 1 as a result of changes:
.
- iov[0].iov_len = total_len - 1 + input_len;
+ iov[0].iov_len = len;
.
[1] https://patchwork.kernel.org/project/cifs-client/patch/20231213152557.6634-…
Error fixed by backported commit in next patch adapted for the 5.15 kernel:
[PATCH 5.15.y 1/1] smb3: Replace smb2pdu 1-element arrays with flex-arrays
P.S.
I have already backported similar changes for the 5.10.y kernel [2],
but I did not know that there was the same error on 5.15,
since I only deal with kernels 5.10 and 6.1.
Therefore, this patch is to follow the rules of backport to stable branches.
[2] https://lore.kernel.org/all/2024012613-woozy-exhume-7b9d@gregkh/T/
@Stable-Kernel:
You receive this patch series because its first patch fixes a leak in PCI.
@Bjorn:
I decided that it's now actually possible to just embed the docu updates
to the respective patches, instead of a separate patch.
Also dropped the ioport_unmap() for now.
Changes in v6:
- Remove the addition of ioport_unmap() from patch #1, since this is not
really a bug, as explained by the comment above pci_iounmap. (Bjorn)
- Drop the patch unifying the two versions of pci_iounmap(). (Bjorn)
- Make patch #4's style congruent with PCI style.
- Drop (in any case empty) ioport_unmap() again from pci_iounmap()
- Add forgotten updates to Documentation/ when moving files from lib/ to
drivers/pci/
Changes in v5:
- Add forgotten update to MAINTAINERS file.
Changes in v4:
- Apply Arnd's Reviewed-by's
- Add ifdef CONFIG_HAS_IOPORT_MAP guard in drivers/pci/iomap.c (build
error on openrisc)
- Fix typo in patch no.5
Changes in v3:
- Create a separate patch for the leaks in lib/iomap.c. Make it the
series' first patch. (Arnd)
- Turns out the aforementioned bug wasn't just accidentally removing
iounmap() with the ifdef, it was also missing ioport_unmap() to begin
with. Add it.
- Move the ARCH_WANTS_GENERIC_IOMEM_IS_IOPORT-mechanism from
asm-generic/io.h to asm-generic/ioport.h. (Arnd)
- Adjust the implementation of iomem_is_ioport() in asm-generic/io.h so
that it matches exactly what pci_iounmap() previously did in
lib/pci_iomap.c. (Arnd)
- Move the CONFIG_HAS_IOPORT guard in asm-generic/io.h so that
iomem_is_ioport() will always be compiled and just returns false if
there are no ports.
- Add TODOs to several places informing about the generic
iomem_is_ioport() in lib/iomap.c not being generic.
- Add TODO about the followup work to make drivers/pci/iomap.c's
pci_iounmap() actually generic.
Changes in v2:
- Replace patch 4, previously extending the comment about pci_iounmap()
in lib/iomap.c, with a patch that moves pci_iounmap() from that file
to drivers/pci/iomap.c, creating a unified version there. (Arnd)
- Implement iomem_is_ioport() as a new helper in asm-generic/io.h and
lib/iomap.c. (Arnd)
- Move the build rule in drivers/pci/Makefile for iomap.o under the
guard of #if PCI. This had to be done because when just checking for
GENERIC_PCI_IOMAP being defined, the functions don't disappear, which
was the case previously in lib/pci_iomap.c, where the entire file was
made empty if PCI was not set by the guard #ifdef PCI. (Intel's Bots)
- Rephares all patches' commit messages a little bit.
Sooooooooo. I reworked v1.
Please review this carefully, the IO-Ranges are obviously a bit tricky,
as is the build-system / ifdef-ery.
Arnd has suggested that architectures defining a custom inb() need their
own iomem_is_ioport(), as well. I've grepped for inb() and found the
following list of archs that define their own:
- alpha
- arm
- m68k <--
- parisc
- powerpc
- sh
- sparc
- x86 <--
All of those have their own definitons of pci_iounmap(). Therefore, they
don't need our generic version in the first place and, thus, also need
no iomem_is_ioport().
The two exceptions are x86 and m68k. The former uses lib/iomap.c through
CONFIG_GENERIC_IOMAP, as Arnd pointed out in the previous discussion
(thus, CONFIG_GENERIC_IOMAP is not really generic in this regard).
So as I see it, only m68k WOULD need its own custom definition of
iomem_is_ioport(). But as I understand it it doesn't because it uses the
one from asm-generic/pci_iomap.h ??
I wasn't entirely sure how to deal with the address ranges for the
generic implementation in asm-generic/io.h. It's marked with a TODO.
Input appreciated.
I removed the guard around define pci_iounmap in asm-generic/io.h. An
alternative would be to have it be guarded by CONFIG_GENERIC_IOMAP and
CONFIG_GENERIC_PCI_IOMAP, both. Without such a guard, there is no
collision however, because generic pci_iounmap() from
drivers/pci/iomap.c will only get pulled in when
CONFIG_GENERIC_PCI_IOMAP is actually set.
I cross-built this for a variety of architectures, including the usual
suspects (s390, m68k). So far successfully. But let's see what Intel's
robots say :O
P.
Original cover letter:
Hi!
So it seems that since ca. 2007 the PCI code has been scattered a bit.
PCI's devres code, which is only ever used by users of the entire
PCI-subsystem anyways, resides in lib/devres.c and is guarded by an
ifdef PCI, just as the content of lib/pci_iomap.c is.
It, thus, seems reasonable to move all of that.
As I were at it, I moved as much of the devres-specific code from pci.c
to devres.c, too. The only exceptions are four functions that are
currently difficult to move. More information about that can be read
here [1].
I noticed these scattered files while working on (new) PCI-specific
devres functions. If we can get this here merged, I'll soon send another
patch series that addresses some API-inconsistencies and could move the
devres-part of the four remaining functions.
I don't want to do that in this series as this here is only about moving
code, whereas the next series would have to actually change API
behavior.
I successfully (cross-)built this for x86, x86_64, AARCH64 and ARM
(allyesconfig). I booted a kernel with it on x86_64, with a Fedora
desktop environment as payload. The OS came up fine
I hope this is OK. If we can get it in, we'd soon have a very
consistent PCI API again.
Regards,
P.
Philipp Stanner (4):
lib/pci_iomap.c: fix cleanup bug in pci_iounmap()
lib: move pci_iomap.c to drivers/pci/
lib: move pci-specific devres code to drivers/pci/
PCI: Move devres code from pci.c to devres.c
Documentation/driver-api/device-io.rst | 2 +-
Documentation/driver-api/pci/pci.rst | 6 +
MAINTAINERS | 1 -
drivers/pci/Kconfig | 5 +
drivers/pci/Makefile | 3 +-
drivers/pci/devres.c | 450 +++++++++++++++++++++++++
lib/pci_iomap.c => drivers/pci/iomap.c | 5 +-
drivers/pci/pci.c | 249 --------------
drivers/pci/pci.h | 24 ++
lib/Kconfig | 3 -
lib/Makefile | 1 -
lib/devres.c | 208 +-----------
12 files changed, 490 insertions(+), 467 deletions(-)
create mode 100644 drivers/pci/devres.c
rename lib/pci_iomap.c => drivers/pci/iomap.c (99%)
--
2.43.0
From: Jason Gerecke <killertofu(a)gmail.com>
If a input device is opened before hid_hw_start is called, events may
not be received from the hardware. In the case of USB-backed devices,
for example, the hid_hw_start function is responsible for filling in
the URB which is submitted when the input device is opened. If a device
is opened prematurely, polling will never start because the device will
not have been in the correct state to send the URB.
Because the wacom driver registers its input devices before calling
hid_hw_start, there is a window of time where a device can be opened
and end up in an inoperable state. Some ARM-based Chromebooks in particular
reliably trigger this bug.
This commit splits the wacom_register_inputs function into two pieces.
One which is responsible for setting up the allocated inputs (and runs
prior to hid_hw_start so that devices are ready for any input events
they may end up receiving) and another which only registers the devices
(and runs after hid_hw_start to ensure devices can be immediately opened
without issue). Note that the functions to initialize the LEDs and remotes
are also moved after hid_hw_start to maintain their own dependency chains.
Fixes: 7704ac937345 ("HID: wacom: implement generic HID handling for pen generic devices")
Cc: stable(a)vger.kernel.org # v3.18+
Suggested-by: Dmitry Torokhov <dmitry.torokhov(a)gmail.com>
Signed-off-by: Jason Gerecke <jason.gerecke(a)wacom.com>
---
drivers/hid/wacom_sys.c | 63 ++++++++++++++++++++++++++++-------------
1 file changed, 43 insertions(+), 20 deletions(-)
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index b613f11ed9498..2bc45b24075c3 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2087,7 +2087,7 @@ static int wacom_allocate_inputs(struct wacom *wacom)
return 0;
}
-static int wacom_register_inputs(struct wacom *wacom)
+static int wacom_setup_inputs(struct wacom *wacom)
{
struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev;
struct wacom_wac *wacom_wac = &(wacom->wacom_wac);
@@ -2106,10 +2106,6 @@ static int wacom_register_inputs(struct wacom *wacom)
input_free_device(pen_input_dev);
wacom_wac->pen_input = NULL;
pen_input_dev = NULL;
- } else {
- error = input_register_device(pen_input_dev);
- if (error)
- goto fail;
}
error = wacom_setup_touch_input_capabilities(touch_input_dev, wacom_wac);
@@ -2118,10 +2114,6 @@ static int wacom_register_inputs(struct wacom *wacom)
input_free_device(touch_input_dev);
wacom_wac->touch_input = NULL;
touch_input_dev = NULL;
- } else {
- error = input_register_device(touch_input_dev);
- if (error)
- goto fail;
}
error = wacom_setup_pad_input_capabilities(pad_input_dev, wacom_wac);
@@ -2130,7 +2122,34 @@ static int wacom_register_inputs(struct wacom *wacom)
input_free_device(pad_input_dev);
wacom_wac->pad_input = NULL;
pad_input_dev = NULL;
- } else {
+ }
+
+ return 0;
+}
+
+static int wacom_register_inputs(struct wacom *wacom)
+{
+ struct input_dev *pen_input_dev, *touch_input_dev, *pad_input_dev;
+ struct wacom_wac *wacom_wac = &(wacom->wacom_wac);
+ int error = 0;
+
+ pen_input_dev = wacom_wac->pen_input;
+ touch_input_dev = wacom_wac->touch_input;
+ pad_input_dev = wacom_wac->pad_input;
+
+ if (pen_input_dev) {
+ error = input_register_device(pen_input_dev);
+ if (error)
+ goto fail;
+ }
+
+ if (touch_input_dev) {
+ error = input_register_device(touch_input_dev);
+ if (error)
+ goto fail;
+ }
+
+ if (pad_input_dev) {
error = input_register_device(pad_input_dev);
if (error)
goto fail;
@@ -2383,6 +2402,20 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
if (error)
goto fail;
+ error = wacom_setup_inputs(wacom);
+ if (error)
+ goto fail;
+
+ if (features->type == HID_GENERIC)
+ connect_mask |= HID_CONNECT_DRIVER;
+
+ /* Regular HID work starts now */
+ error = hid_hw_start(hdev, connect_mask);
+ if (error) {
+ hid_err(hdev, "hw start failed\n");
+ goto fail;
+ }
+
error = wacom_register_inputs(wacom);
if (error)
goto fail;
@@ -2397,16 +2430,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
goto fail;
}
- if (features->type == HID_GENERIC)
- connect_mask |= HID_CONNECT_DRIVER;
-
- /* Regular HID work starts now */
- error = hid_hw_start(hdev, connect_mask);
- if (error) {
- hid_err(hdev, "hw start failed\n");
- goto fail;
- }
-
if (!wireless) {
/* Note that if query fails it is not a hard failure */
wacom_query_tablet_data(wacom);
--
2.43.0