The original implementation of memcpy_sglist() was broken because it
didn't handle overlapping scatterlists. The current implementation is
broken too because it calls the skcipher_walk functions which can fail.
It ignores any errors from those functions.
Fix it by replacing it with a new implementation written from scratch.
It always succeeds. It's also a bit faster, since it avoids the
overhead of skcipher_walk. skcipher_walk includes a lot of
functionality (such as alignmask handling) that's irrelevant here.
Reported-by: Colin Ian King <coking(a)nvidia.com>
Closes: https://lore.kernel.org/r/20251114122620.111623-1-coking@nvidia.com
Fixes: 131bdceca1f0 ("crypto: scatterwalk - Add memcpy_sglist")
Fixes: 0f8d42bf128d ("crypto: scatterwalk - Move skcipher walk and use it for memcpy_sglist")
Cc: stable(a)vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers(a)kernel.org>
---
crypto/scatterwalk.c | 103 ++++++++++++++++++++++++++++++-----
include/crypto/scatterwalk.h | 52 +++++++++++-------
2 files changed, 121 insertions(+), 34 deletions(-)
diff --git a/crypto/scatterwalk.c b/crypto/scatterwalk.c
index 1d010e2a1b1a..af6c17bfcadb 100644
--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -99,30 +99,107 @@ void memcpy_to_sglist(struct scatterlist *sg, unsigned int start,
scatterwalk_start_at_pos(&walk, sg, start);
memcpy_to_scatterwalk(&walk, buf, nbytes);
}
EXPORT_SYMBOL_GPL(memcpy_to_sglist);
+/**
+ * memcpy_sglist() - Copy data from one scatterlist to another
+ * @dst: The destination scatterlist. Can be NULL if @nbytes == 0.
+ * @src: The source scatterlist. Can be NULL if @nbytes == 0.
+ * @nbytes: Number of bytes to copy
+ *
+ * The scatterlists can overlap. Hence this really acts like memmove(), not
+ * memcpy().
+ *
+ * Context: Any context
+ */
void memcpy_sglist(struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes)
{
- struct skcipher_walk walk = {};
+ unsigned int src_offset, dst_offset;
- if (unlikely(nbytes == 0)) /* in case sg == NULL */
+ if (unlikely(nbytes == 0)) /* in case src and/or dst is NULL */
return;
- walk.total = nbytes;
-
- scatterwalk_start(&walk.in, src);
- scatterwalk_start(&walk.out, dst);
+ src_offset = src->offset;
+ dst_offset = dst->offset;
+ for (;;) {
+ /* Compute the length to copy this step. */
+ unsigned int len = min3(src->offset + src->length - src_offset,
+ dst->offset + dst->length - dst_offset,
+ nbytes);
+ struct page *src_page = sg_page(src);
+ struct page *dst_page = sg_page(dst);
+ const void *src_virt;
+ void *dst_virt;
+
+ if (IS_ENABLED(CONFIG_HIGHMEM)) {
+ /* HIGHMEM: we may have to actually map the pages. */
+ const unsigned int src_oip = offset_in_page(src_offset);
+ const unsigned int dst_oip = offset_in_page(dst_offset);
+ const unsigned int limit = PAGE_SIZE;
+
+ /* Further limit len to not cross a page boundary. */
+ len = min3(len, limit - src_oip, limit - dst_oip);
+
+ /* Compute the source and destination pages. */
+ src_page += src_offset / PAGE_SIZE;
+ dst_page += dst_offset / PAGE_SIZE;
+
+ if (src_page != dst_page) {
+ /*
+ * Copy between different pages.
+ * No need for memmove(), as the pages differ.
+ */
+ src_virt = kmap_local_page(src_page);
+ dst_virt = kmap_local_page(dst_page);
+ memcpy(dst_virt + dst_oip, src_virt + src_oip,
+ len);
+ flush_dcache_page(dst_page);
+ kunmap_local(dst_virt);
+ kunmap_local(src_virt);
+ } else if (src_oip != dst_oip) {
+ /* Copy between different parts of same page */
+ dst_virt = kmap_local_page(dst_page);
+ memmove(dst_virt + dst_oip, dst_virt + src_oip,
+ len);
+ flush_dcache_page(dst_page);
+ kunmap_local(dst_virt);
+ } /* Exact overlap. No action needed. */
+ } else {
+ /*
+ * !HIGHMEM: no mapping needed. Just work in the linear
+ * buffer of each sg entry.
+ */
+ src_virt = page_address(src_page) + src_offset;
+ dst_virt = page_address(dst_page) + dst_offset;
+ if (src_virt != dst_virt) {
+ memmove(dst_virt, src_virt, len);
+ if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE)
+ __scatterwalk_flush_dcache_pages(
+ dst_page, dst_offset, len);
+ }
+ }
+ nbytes -= len;
+ if (nbytes == 0) /* No more to copy? */
+ break;
- skcipher_walk_first(&walk, true);
- do {
- if (walk.src.virt.addr != walk.dst.virt.addr)
- memcpy(walk.dst.virt.addr, walk.src.virt.addr,
- walk.nbytes);
- skcipher_walk_done(&walk, 0);
- } while (walk.nbytes);
+ /*
+ * There's more to copy. Advance the offsets by the length
+ * copied this step, and advance the sg entries as needed.
+ */
+ src_offset += len;
+ if (src_offset >= src->offset + src->length) {
+ src = sg_next(src);
+ src_offset = src->offset;
+ }
+ dst_offset += len;
+ if (dst_offset >= dst->offset + dst->length) {
+ dst = sg_next(dst);
+ dst_offset = dst->offset;
+ }
+ }
}
EXPORT_SYMBOL_GPL(memcpy_sglist);
struct scatterlist *scatterwalk_ffwd(struct scatterlist dst[2],
struct scatterlist *src,
diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h
index 83d14376ff2b..f485454e3955 100644
--- a/include/crypto/scatterwalk.h
+++ b/include/crypto/scatterwalk.h
@@ -225,10 +225,38 @@ static inline void scatterwalk_done_src(struct scatter_walk *walk,
{
scatterwalk_unmap(walk);
scatterwalk_advance(walk, nbytes);
}
+/*
+ * Flush the dcache of any pages that overlap the region
+ * [offset, offset + nbytes) relative to base_page.
+ *
+ * This should be called only when ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE, to ensure
+ * that all relevant code (including the call to sg_page() in the caller, if
+ * applicable) gets fully optimized out when !ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE.
+ */
+static inline void __scatterwalk_flush_dcache_pages(struct page *base_page,
+ unsigned int offset,
+ unsigned int nbytes)
+{
+ unsigned int num_pages;
+
+ base_page += offset / PAGE_SIZE;
+ offset %= PAGE_SIZE;
+
+ /*
+ * This is an overflow-safe version of
+ * num_pages = DIV_ROUND_UP(offset + nbytes, PAGE_SIZE).
+ */
+ num_pages = nbytes / PAGE_SIZE;
+ num_pages += DIV_ROUND_UP(offset + (nbytes % PAGE_SIZE), PAGE_SIZE);
+
+ for (unsigned int i = 0; i < num_pages; i++)
+ flush_dcache_page(base_page + i);
+}
+
/**
* scatterwalk_done_dst() - Finish one step of a walk of destination scatterlist
* @walk: the scatter_walk
* @nbytes: the number of bytes processed this step, less than or equal to the
* number of bytes that scatterwalk_next() returned.
@@ -238,31 +266,13 @@ static inline void scatterwalk_done_src(struct scatter_walk *walk,
*/
static inline void scatterwalk_done_dst(struct scatter_walk *walk,
unsigned int nbytes)
{
scatterwalk_unmap(walk);
- /*
- * Explicitly check ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE instead of just
- * relying on flush_dcache_page() being a no-op when not implemented,
- * since otherwise the BUG_ON in sg_page() does not get optimized out.
- * This also avoids having to consider whether the loop would get
- * reliably optimized out or not.
- */
- if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE) {
- struct page *base_page;
- unsigned int offset;
- int start, end, i;
-
- base_page = sg_page(walk->sg);
- offset = walk->offset;
- start = offset >> PAGE_SHIFT;
- end = start + (nbytes >> PAGE_SHIFT);
- end += (offset_in_page(offset) + offset_in_page(nbytes) +
- PAGE_SIZE - 1) >> PAGE_SHIFT;
- for (i = start; i < end; i++)
- flush_dcache_page(base_page + i);
- }
+ if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE)
+ __scatterwalk_flush_dcache_pages(sg_page(walk->sg),
+ walk->offset, nbytes);
scatterwalk_advance(walk, nbytes);
}
void scatterwalk_skip(struct scatter_walk *walk, unsigned int nbytes);
--
2.51.2
Commit 4dfd4bba8578 ("selftests/mm/uffd: refactor non-composite global
vars into struct") moved some of the operations previously implemented
in uffd_setup_environment() earlier in the main test loop.
The calculation of nr_pages, which involves a division by page_size, now
occurs before checking that default_huge_page_size() returns a non-zero
This leads to a division-by-zero error on systems with !CONFIG_HUGETLB.
Fix this by relocating the non-zero page_size check before the nr_pages
calculation, as it was originally implemented.
Cc: stable(a)vger.kernel.org
Fixes: 4dfd4bba8578 ("selftests/mm/uffd: refactor non-composite global vars into struct")
Signed-off-by: Carlos Llamas <cmllamas(a)google.com>
---
tools/testing/selftests/mm/uffd-unit-tests.c | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index 9e3be2ee7f1b..f917b4c4c943 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -1758,10 +1758,15 @@ int main(int argc, char *argv[])
uffd_test_ops = mem_type->mem_ops;
uffd_test_case_ops = test->test_case_ops;
- if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB))
+ if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) {
gopts.page_size = default_huge_page_size();
- else
+ if (gopts.page_size == 0) {
+ uffd_test_skip("huge page size is 0, feature missing?");
+ continue;
+ }
+ } else {
gopts.page_size = psize();
+ }
/* Ensure we have at least 2 pages */
gopts.nr_pages = MAX(UFFD_TEST_MEM_SIZE, gopts.page_size * 2)
@@ -1776,12 +1781,6 @@ int main(int argc, char *argv[])
continue;
uffd_test_start("%s on %s", test->name, mem_type->name);
- if ((mem_type->mem_flag == MEM_HUGETLB ||
- mem_type->mem_flag == MEM_HUGETLB_PRIVATE) &&
- (default_huge_page_size() == 0)) {
- uffd_test_skip("huge page size is 0, feature missing?");
- continue;
- }
if (!uffd_feature_supported(test)) {
uffd_test_skip("feature missing");
continue;
--
2.51.2.1041.gc1ab5b90ca-goog
From: NeilBrown <neil(a)brown.name>
A recent change to clamp_t() in 6.1.y caused fs/nfsd/nfs4state.c to fail
to compile with gcc-9. The code in nfsd4_get_drc_mem() was written with
the assumption that when "max < min",
clamp(val, min, max)
would return max. This assumption is not documented as an API promise
and the change caused a compile failure if it could be statically
determined that "max < min".
The relevant code was no longer present upstream when commit 1519fbc8832b
("minmax.h: use BUILD_BUG_ON_MSG() for the lo < hi test in clamp()")
landed there, so there is no upstream change to nfsd4_get_drc_mem() to
backport.
There is no clear case that the existing code in nfsd4_get_drc_mem()
is functioning incorrectly. The goal of this patch is to permit the clean
application of commit 1519fbc8832b ("minmax.h: use BUILD_BUG_ON_MSG() for
the lo < hi test in clamp()"), and any commits that depend on it, to LTS
kernels without affecting the ability to compile those kernels. This is
done by open-coding the __clamp() macro sans the built-in type checking.
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220745#c0
Signed-off-by: NeilBrown <neil(a)brown.name>
Stable-dep-of: 1519fbc8832b ("minmax.h: use BUILD_BUG_ON_MSG() for the lo < hi test in clamp()")
Signed-off-by: Chuck Lever <chuck.lever(a)oracle.com>
---
fs/nfsd/nfs4state.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
Changes since Neil's post:
* Editorial changes to the commit message
* Attempt to address David's review comments
* Applied to linux-6.12.y, passed NFSD upstream CI suite
This patch is intended to be applied to linux-6.12.y, and should
apply cleanly to other LTS kernels since nfsd4_get_drc_mem hasn't
changed since v5.4.
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7b0fabf8c657..41545933dd18 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1983,8 +1983,10 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn
*/
scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
- avail = clamp_t(unsigned long, avail, slotsize,
- total_avail/scale_factor);
+ if (avail > total_avail / scale_factor)
+ avail = total_avail / scale_factor;
+ else if (avail < slotsize)
+ avail = slotsize;
num = min_t(int, num, avail / slotsize);
num = max_t(int, num, 1);
nfsd_drc_mem_used += num * slotsize;
--
2.51.0
Freezing the request queue from inside sysfs store callbacks may cause a
deadlock in combination with the dm-multipath driver and the
queue_if_no_path option. Additionally, freezing the request queue slows
down system boot on systems where sysfs attributes are set synchronously.
Fix this by removing the blk_mq_freeze_queue() / blk_mq_unfreeze_queue()
calls from the store callbacks that do not strictly need these callbacks.
Add the __data_racy annotation to request_queue.rq_timeout to suppress
KCSAN data race reports about the rq_timeout reads.
This patch may cause a small delay in applying the new settings.
For all the attributes affected by this patch, I/O will complete
correctly whether the old or the new value of the attribute is used.
This patch affects the following sysfs attributes:
* io_poll_delay
* io_timeout
* nomerges
* read_ahead_kb
* rq_affinity
Here is an example of a deadlock triggered by running test srp/002
if this patch is not applied:
task:multipathd
Call Trace:
<TASK>
__schedule+0x8c1/0x1bf0
schedule+0xdd/0x270
schedule_preempt_disabled+0x1c/0x30
__mutex_lock+0xb89/0x1650
mutex_lock_nested+0x1f/0x30
dm_table_set_restrictions+0x823/0xdf0
__bind+0x166/0x590
dm_swap_table+0x2a7/0x490
do_resume+0x1b1/0x610
dev_suspend+0x55/0x1a0
ctl_ioctl+0x3a5/0x7e0
dm_ctl_ioctl+0x12/0x20
__x64_sys_ioctl+0x127/0x1a0
x64_sys_call+0xe2b/0x17d0
do_syscall_64+0x96/0x3a0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
</TASK>
task:(udev-worker)
Call Trace:
<TASK>
__schedule+0x8c1/0x1bf0
schedule+0xdd/0x270
blk_mq_freeze_queue_wait+0xf2/0x140
blk_mq_freeze_queue_nomemsave+0x23/0x30
queue_ra_store+0x14e/0x290
queue_attr_store+0x23e/0x2c0
sysfs_kf_write+0xde/0x140
kernfs_fop_write_iter+0x3b2/0x630
vfs_write+0x4fd/0x1390
ksys_write+0xfd/0x230
__x64_sys_write+0x76/0xc0
x64_sys_call+0x276/0x17d0
do_syscall_64+0x96/0x3a0
entry_SYSCALL_64_after_hwframe+0x4b/0x53
</TASK>
Cc: Christoph Hellwig <hch(a)lst.de>
Cc: Ming Lei <ming.lei(a)redhat.com>
Cc: Nilay Shroff <nilay(a)linux.ibm.com>
Cc: Martin Wilck <mwilck(a)suse.com>
Cc: Benjamin Marzinski <bmarzins(a)redhat.com>
Cc: stable(a)vger.kernel.org
Fixes: af2814149883 ("block: freeze the queue in queue_attr_store")
Signed-off-by: Bart Van Assche <bvanassche(a)acm.org>
---
block/blk-sysfs.c | 26 ++++++++------------------
include/linux/blkdev.h | 2 +-
2 files changed, 9 insertions(+), 19 deletions(-)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 76c47fe9b8d6..8684c57498cc 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -143,21 +143,22 @@ queue_ra_store(struct gendisk *disk, const char *page, size_t count)
{
unsigned long ra_kb;
ssize_t ret;
- unsigned int memflags;
struct request_queue *q = disk->queue;
ret = queue_var_store(&ra_kb, page, count);
if (ret < 0)
return ret;
/*
- * ->ra_pages is protected by ->limits_lock because it is usually
- * calculated from the queue limits by queue_limits_commit_update.
+ * The ->ra_pages change below is protected by ->limits_lock because it
+ * is usually calculated from the queue limits by
+ * queue_limits_commit_update().
+ *
+ * bdi->ra_pages reads are not serialized against bdi->ra_pages writes.
+ * Use WRITE_ONCE() to write bdi->ra_pages once.
*/
mutex_lock(&q->limits_lock);
- memflags = blk_mq_freeze_queue(q);
- disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
+ WRITE_ONCE(disk->bdi->ra_pages, ra_kb >> (PAGE_SHIFT - 10));
mutex_unlock(&q->limits_lock);
- blk_mq_unfreeze_queue(q, memflags);
return ret;
}
@@ -375,21 +376,18 @@ static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
size_t count)
{
unsigned long nm;
- unsigned int memflags;
struct request_queue *q = disk->queue;
ssize_t ret = queue_var_store(&nm, page, count);
if (ret < 0)
return ret;
- memflags = blk_mq_freeze_queue(q);
blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
if (nm == 2)
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
else if (nm)
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
- blk_mq_unfreeze_queue(q, memflags);
return ret;
}
@@ -409,7 +407,6 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
#ifdef CONFIG_SMP
struct request_queue *q = disk->queue;
unsigned long val;
- unsigned int memflags;
ret = queue_var_store(&val, page, count);
if (ret < 0)
@@ -421,7 +418,6 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
* are accessed individually using atomic test_bit operation. So we
* don't grab any lock while updating these flags.
*/
- memflags = blk_mq_freeze_queue(q);
if (val == 2) {
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, q);
@@ -432,7 +428,6 @@ queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
blk_queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
blk_queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q);
}
- blk_mq_unfreeze_queue(q, memflags);
#endif
return ret;
}
@@ -446,11 +441,9 @@ static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page,
static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
size_t count)
{
- unsigned int memflags;
ssize_t ret = count;
struct request_queue *q = disk->queue;
- memflags = blk_mq_freeze_queue(q);
if (!(q->limits.features & BLK_FEAT_POLL)) {
ret = -EINVAL;
goto out;
@@ -459,7 +452,6 @@ static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
pr_info_ratelimited("writes to the poll attribute are ignored.\n");
pr_info_ratelimited("please use driver specific parameters instead.\n");
out:
- blk_mq_unfreeze_queue(q, memflags);
return ret;
}
@@ -472,7 +464,7 @@ static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
size_t count)
{
- unsigned int val, memflags;
+ unsigned int val;
int err;
struct request_queue *q = disk->queue;
@@ -480,9 +472,7 @@ static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
if (err || val == 0)
return -EINVAL;
- memflags = blk_mq_freeze_queue(q);
blk_queue_rq_timeout(q, msecs_to_jiffies(val));
- blk_mq_unfreeze_queue(q, memflags);
return count;
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2fff8a80dbd2..cb4ba09959ee 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -495,7 +495,7 @@ struct request_queue {
*/
unsigned long queue_flags;
- unsigned int rq_timeout;
+ unsigned int __data_racy rq_timeout;
unsigned int queue_depth;
Hi linux-stable-mirror(a)lists.linaro.org We make sublimated sports bags, backpacks, and sportswear. To get delivery prices, please: 1. Visit hillapex.com and pick an item. 2. Let us know your order quantity. 3. Provide your country name. Best regards, Saad Afzal Founder | Hill Apex 📧 Email: saad(a)hillapex.com 🌐 Website: hillapex.com 📞 Phone/WhatsApp: +92 300 7101027 📍 Address: Chenab Ranger’s Road, 51310 Sialkot, Pakistan
The following commit has been merged into the x86/boot branch of tip:
Commit-ID: 1c7ac68c05bc0327d725dd10aa05f5120f868250
Gitweb: https://git.kernel.org/tip/1c7ac68c05bc0327d725dd10aa05f5120f868250
Author: Yazen Ghannam <yazen.ghannam(a)amd.com>
AuthorDate: Tue, 11 Nov 2025 14:53:57
Committer: Borislav Petkov (AMD) <bp(a)alien8.de>
CommitterDate: Fri, 14 Nov 2025 21:00:26 +01:00
x86/acpi/boot: Correct acpi_is_processor_usable() check again
ACPI v6.3 defined a new "Online Capable" MADT LAPIC flag. This bit is
used in conjunction with the "Enabled" MADT LAPIC flag to determine if
a CPU can be enabled/hotplugged by the OS after boot.
Before the new bit was defined, the "Enabled" bit was explicitly
described like this (ACPI v6.0 wording provided):
"If zero, this processor is unusable, and the operating system
support will not attempt to use it"
This means that CPU hotplug (based on MADT) is not possible. Many BIOS
implementations follow this guidance. They may include LAPIC entries in
MADT for unavailable CPUs, but since these entries are marked with
"Enabled=0" it is expected that the OS will completely ignore these
entries.
However, QEMU will do the same (include entries with "Enabled=0") for
the purpose of allowing CPU hotplug within the guest.
Comment from QEMU function pc_madt_cpu_entry():
/* ACPI spec says that LAPIC entry for non present
* CPU may be omitted from MADT or it must be marked
* as disabled. However omitting non present CPU from
* MADT breaks hotplug on linux. So possible CPUs
* should be put in MADT but kept disabled.
*/
Recent Linux topology changes broke the QEMU use case. A following fix
for the QEMU use case broke bare metal topology enumeration.
Rework the Linux MADT LAPIC flags check to allow the QEMU use case only
for guests and to maintain the ACPI spec behavior for bare metal.
Remove an unnecessary check added to fix a bare metal case introduced by
the QEMU "fix".
[ bp: Change logic as Michal suggested. ]
Fixes: fed8d8773b8e ("x86/acpi/boot: Correct acpi_is_processor_usable() check")
Fixes: f0551af02130 ("x86/topology: Ignore non-present APIC IDs in a present package")
Closes: https://lore.kernel.org/r/20251024204658.3da9bf3f.michal.pecio@gmail.com
Reported-by: Michal Pecio <michal.pecio(a)gmail.com>
Signed-off-by: Yazen Ghannam <yazen.ghannam(a)amd.com>
Tested-by: Michal Pecio <michal.pecio(a)gmail.com>
Tested-by: Ricardo Neri <ricardo.neri-calderon(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/20251111145357.4031846-1-yazen.ghannam@amd.com
---
arch/x86/kernel/acpi/boot.c | 12 ++++++++----
arch/x86/kernel/cpu/topology.c | 15 ---------------
2 files changed, 8 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 9fa321a..d6138b2 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -35,6 +35,7 @@
#include <asm/smp.h>
#include <asm/i8259.h>
#include <asm/setup.h>
+#include <asm/hypervisor.h>
#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */
static int __initdata acpi_force = 0;
@@ -164,11 +165,14 @@ static bool __init acpi_is_processor_usable(u32 lapic_flags)
if (lapic_flags & ACPI_MADT_ENABLED)
return true;
- if (!acpi_support_online_capable ||
- (lapic_flags & ACPI_MADT_ONLINE_CAPABLE))
- return true;
+ if (acpi_support_online_capable)
+ return lapic_flags & ACPI_MADT_ONLINE_CAPABLE;
- return false;
+ /*
+ * QEMU expects legacy "Enabled=0" LAPIC entries to be counted as usable
+ * in order to support CPU hotplug in guests.
+ */
+ return !hypervisor_is_type(X86_HYPER_NATIVE);
}
static int __init
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index 6073a16..425404e 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -27,7 +27,6 @@
#include <xen/xen.h>
#include <asm/apic.h>
-#include <asm/hypervisor.h>
#include <asm/io_apic.h>
#include <asm/mpspec.h>
#include <asm/msr.h>
@@ -240,20 +239,6 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
cpuid_to_apicid[cpu] = apic_id;
topo_set_cpuids(cpu, apic_id, acpi_id);
} else {
- u32 pkgid = topo_apicid(apic_id, TOPO_PKG_DOMAIN);
-
- /*
- * Check for present APICs in the same package when running
- * on bare metal. Allow the bogosity in a guest.
- */
- if (hypervisor_is_type(X86_HYPER_NATIVE) &&
- topo_unit_count(pkgid, TOPO_PKG_DOMAIN, phys_cpu_present_map)) {
- pr_info_once("Ignoring hot-pluggable APIC ID %x in present package.\n",
- apic_id);
- topo_info.nr_rejected_cpus++;
- return;
- }
-
topo_info.nr_disabled_cpus++;
}
On a Xen dom0 boot, this feature does not behave, and we end up
calculating:
num_roots = 1
num_nodes = 2
roots_per_node = 0
This causes a divide-by-zero in the modulus inside the loop.
This change adds a couple of guards for invalid states where we might
get a divide-by-zero.
Signed-off-by: Steven Noonan <steven(a)uplinklabs.net>
Signed-off-by: Ariadne Conill <ariadne(a)ariadne.space>
CC: Yazen Ghannam <yazen.ghannam(a)amd.com>
CC: x86(a)vger.kernel.org
CC: stable(a)vger.kernel.org
---
arch/x86/kernel/amd_node.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/arch/x86/kernel/amd_node.c b/arch/x86/kernel/amd_node.c
index 3d0a4768d603c..cdc6ba224d4ad 100644
--- a/arch/x86/kernel/amd_node.c
+++ b/arch/x86/kernel/amd_node.c
@@ -282,6 +282,17 @@ static int __init amd_smn_init(void)
return -ENODEV;
num_nodes = amd_num_nodes();
+
+ if (!num_nodes)
+ return -ENODEV;
+
+ /* Possibly a virtualized environment (e.g. Xen) where we wi
ll get
+ * roots_per_node=0 if the number of roots is fewer than number of
+ * nodes
+ */
+ if (num_roots < num_nodes)
+ return -ENODEV;
+
amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL);
if (!amd_roots)
return -ENOMEM;
--
2.51.2
Hi.
I've check c-repro [1] on 6.1.y branch and found that repro still produce
the crash on 6.1.y. I notice that syzbot bisection result [2]
is incorrect: indeed, the hung was fixed by upstream commit b0ad381fa769
("btrfs: fix deadlock with fiemap and extent locking"). Also,
I saw CVE-2024-35784 [3][4] vulnerability, that have direct relation with that syzbot
report. Therefore, syzbot reproducer provided additional way to check for CVE-2024-35784.
I attempted to fix CVE-2024-35784 in stable 6.1.y (over v6.1.157), and
found that the initial fix commit b0ad381fa769 ("btrfs: fix deadlock with
fiemap and extent locking") introduced regressions [5][6].
IMHO here is the minimum patch series to eliminate CVE-2024-35784 from 6.1.y:
b0ad381fa769 ("btrfs: fix deadlock with fiemap and extent locking") (Initial fix of the CVE-2024-35784)
a1a4a9ca77f1 ("btrfs: fix race between ordered extent completion and fiemap") (Fixes: b0ad381fa769)
978b63f7464a ("btrfs: fix race when detecting delalloc ranges during fiemap") (Fixes: b0ad381fa769)
1cab1375ba6d ("btrfs: reuse cloned extent buffer during fiemap to avoid re-allocations") (Optimization: 978b63f7464a)
53e24158684b ("btrfs: set start on clone before calling copy_extent_buffer_full") (Fixes: 1cab1375ba6d)
Required patches attached.
Only two patches in the series have minor backport modifications due to v6.1.157 btrfs code differences.
The remaining patches are identical to the upstream.
Regards,
AK
Reported-by: syzbot+f8217aae382555004877(a)syzkaller.appspotmail.com
----
[1] https://syzkaller.appspot.com/text?tag=ReproC&x=12b4c88b280000
[2] https://syzkaller.appspot.com/bug?extid=f8217aae382555004877
[3] https://lore.kernel.org/all/2024051704-CVE-2024-35784-6dec@gregkh/
[4] https://cve.org/CVERecord/?id=CVE-2024-35784
[5] https://lore.kernel.org/linux-btrfs/cover.1709202499.git.fdmanana@suse.com/
[6] https://lore.kernel.org/all/20240304211551.880347593@linuxfoundation.org/
Here is a series adding support for 6 Winbond SPI NOR chips. Describing
these chips is needed otherwise the block protection feature is not
available. Everything else looks fine otherwise.
In practice I am only adding 6 very similar IDs but I split the commits
because the amount of meta data to show proof that all the chips have
been tested and work is pretty big.
As the commits simply add an ID, I am Cc'ing stable with the hope to
get these backported to LTS kernels as allowed by the stable rules (see
link below, but I hope I am doing this right).
Link: https://elixir.bootlin.com/linux/v6.17.7/source/Documentation/process/stabl…
Thanks,
Miquèl
---
Miquel Raynal (6):
mtd: spi-nor: winbond: Add support for W25Q01NWxxIQ chips
mtd: spi-nor: winbond: Add support for W25Q01NWxxIM chips
mtd: spi-nor: winbond: Add support for W25Q02NWxxIM chips
mtd: spi-nor: winbond: Add support for W25H512NWxxAM chips
mtd: spi-nor: winbond: Add support for W25H01NWxxAM chips
mtd: spi-nor: winbond: Add support for W25H02NWxxAM chips
drivers/mtd/spi-nor/winbond.c | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
---
base-commit: 479ba7fc704936b74a91ee352fe113d6391d562f
change-id: 20251105-winbond-v6-18-rc1-spi-nor-7f78cb2785d6
Best regards,
--
Miquel Raynal <miquel.raynal(a)bootlin.com>