Generally PASID support requires ACS settings that usually create
single device groups, but there are some niche cases where we can get
multi-device groups and still have working PASID support. The primary
issue is that PCI switches are not required to treat PASID tagged TLPs
specially so appropriate ACS settings are required to route all TLPs to
the host bridge if PASID is going to work properly.
pci_enable_pasid() does check that each device that will use PASID has
the proper ACS settings to achieve this routing.
However, no-PASID devices can be combined with PASID capable devices
within the same topology using non-uniform ACS settings. In this case
the no-PASID devices may not have strict route to host ACS flags and
end up being grouped with the PASID devices.
This configuration fails to allow use of the PASID within the iommu
core code which wrongly checks if the no-PASID device supports PASID.
Fix this by ignoring no-PASID devices during the PASID validation. They
will never issue a PASID TLP anyhow so they can be ignored.
Fixes: c404f55c26fc ("iommu: Validate the PASID in iommu_attach_device_pasid()")
Cc: stable(a)vger.kernel.org
Signed-off-by: Tushar Dave <tdave(a)nvidia.com>
---
drivers/iommu/iommu.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 4f91a740c15f..e01df4c3e709 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3440,7 +3440,13 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
mutex_lock(&group->mutex);
for_each_group_device(group, device) {
- if (pasid >= device->dev->iommu->max_pasids) {
+ /*
+ * Skip PASID validation for devices without PASID support
+ * (max_pasids = 0). These devices cannot issue transactions
+ * with PASID, so they don't affect group's PASID usage.
+ */
+ if ((device->dev->iommu->max_pasids > 0) &&
+ (pasid >= device->dev->iommu->max_pasids)) {
ret = -EINVAL;
goto out_unlock;
}
--
2.34.1
From: Dave Penkler <dpenkler(a)gmail.com>
[ Upstream commit 76d54fd5471b10ee993c217928a39d7351eaff5c ]
In the accel read and write functions the transfer length
was being calculated by an if statement setting it to
the lesser of the remaining bytes to read/write and the
fifo size.
Replace both instances with min() which is clearer and
more compact.
Reported-by: kernel test robot <lkp(a)intel.com>
Reported-by: Julia Lawall <julia.lawall(a)inria.fr>
Closes: https://lore.kernel.org/r/202501182153.qHfL4Fbc-lkp@intel.com/
Signed-off-by: Dave Penkler <dpenkler(a)gmail.com>
Link: https://lore.kernel.org/r/20250120145030.29684-1-dpenkler@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/staging/gpib/agilent_82350b/agilent_82350b.c | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c
index 3f4f95b7fe34a..0ba592dc98490 100644
--- a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c
+++ b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c
@@ -66,10 +66,7 @@ int agilent_82350b_accel_read(gpib_board_t *board, uint8_t *buffer, size_t lengt
int j;
int count;
- if (num_fifo_bytes - i < agilent_82350b_fifo_size)
- block_size = num_fifo_bytes - i;
- else
- block_size = agilent_82350b_fifo_size;
+ block_size = min(num_fifo_bytes - i, agilent_82350b_fifo_size);
set_transfer_counter(a_priv, block_size);
writeb(ENABLE_TI_TO_SRAM | DIRECTION_GPIB_TO_HOST,
a_priv->gpib_base + SRAM_ACCESS_CONTROL_REG);
@@ -200,10 +197,7 @@ int agilent_82350b_accel_write(gpib_board_t *board, uint8_t *buffer, size_t leng
for (i = 1; i < fifotransferlength;) {
clear_bit(WRITE_READY_BN, &tms_priv->state);
- if (fifotransferlength - i < agilent_82350b_fifo_size)
- block_size = fifotransferlength - i;
- else
- block_size = agilent_82350b_fifo_size;
+ block_size = min(fifotransferlength - i, agilent_82350b_fifo_size);
set_transfer_counter(a_priv, block_size);
for (j = 0; j < block_size; ++j, ++i) {
// load data into board's sram
--
2.39.5
From: Dave Penkler <dpenkler(a)gmail.com>
[ Upstream commit 76d54fd5471b10ee993c217928a39d7351eaff5c ]
In the accel read and write functions the transfer length
was being calculated by an if statement setting it to
the lesser of the remaining bytes to read/write and the
fifo size.
Replace both instances with min() which is clearer and
more compact.
Reported-by: kernel test robot <lkp(a)intel.com>
Reported-by: Julia Lawall <julia.lawall(a)inria.fr>
Closes: https://lore.kernel.org/r/202501182153.qHfL4Fbc-lkp@intel.com/
Signed-off-by: Dave Penkler <dpenkler(a)gmail.com>
Link: https://lore.kernel.org/r/20250120145030.29684-1-dpenkler@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/staging/gpib/agilent_82350b/agilent_82350b.c | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c
index 8e2334fe5c9b8..533cc956b3f6c 100644
--- a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c
+++ b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c
@@ -69,10 +69,7 @@ int agilent_82350b_accel_read(gpib_board_t *board, uint8_t *buffer, size_t lengt
int j;
int count;
- if (num_fifo_bytes - i < agilent_82350b_fifo_size)
- block_size = num_fifo_bytes - i;
- else
- block_size = agilent_82350b_fifo_size;
+ block_size = min(num_fifo_bytes - i, agilent_82350b_fifo_size);
set_transfer_counter(a_priv, block_size);
writeb(ENABLE_TI_TO_SRAM | DIRECTION_GPIB_TO_HOST,
a_priv->gpib_base + SRAM_ACCESS_CONTROL_REG);
@@ -203,10 +200,7 @@ int agilent_82350b_accel_write(gpib_board_t *board, uint8_t *buffer, size_t leng
for (i = 1; i < fifotransferlength;) {
clear_bit(WRITE_READY_BN, &tms_priv->state);
- if (fifotransferlength - i < agilent_82350b_fifo_size)
- block_size = fifotransferlength - i;
- else
- block_size = agilent_82350b_fifo_size;
+ block_size = min(fifotransferlength - i, agilent_82350b_fifo_size);
set_transfer_counter(a_priv, block_size);
for (j = 0; j < block_size; ++j, ++i) {
// load data into board's sram
--
2.39.5
My Dear,
I am pleased to find your email address via google search when i was
searching for my lost childhood friend which I have not seen for the
past 30 years, i have been searching for him on all social media
platforms to no avail. Actually I was searching for him because of my
health, which started since covid 19, 2020, covid 19 killed every
member of my family, I am the only surviving member.
I Survived the pandemic through the assistance of the CDC, But my life
has never remained the same since ever then. my heart and lungs is
Severely affected and damaged, my life is gradually fading away before
me.
I was searching for my childhood friend to be the beneficiary of my
family estate and funds in the bank. When I saw your name there was
something fascinating about it. Please can you grant me the
opportunity to make you the beneficiary. I shall be awaiting your
response for more details.
Thanks
Mrs Norah Jane
[Why]
Notice few problems under I2C-write-over-Aux with
Write_Status_Update_Request flag set cases:
- I2C-write-over-Aux request with
Write_Status_Update_Request flag set won't get sent
upon the reply of I2C_ACK|AUX_ACK followed by “M”
Value. Now just set the flag but won't send out
- The I2C-over-Aux request command with
Write_Status_Update_Request flag set is incorrect.
Should be "SYNC->COM3:0 (= 0110)|0000-> 0000|0000->
0|7-bit I2C address (the same as the last)-> STOP->".
Address only transaction without length and data.
- Upon I2C_DEFER|AUX_ACK Reply for I2C-read-over-Aux,
soure should repeat the identical I2C-read-over-AUX
transaction with the same LEN value. Not with
Write_Status_Update_Request set.
[How]
Refer to DP v2.1: 2.11.7.1.5.3 & 2.11.7.1.5.4
- Clean aux msg buffer and size when constructing
write status update request.
- Send out Write_Status_Update_Request upon reply of
I2C_ACK|AUX_ACK followed by “M”
- Send Write_Status_Update_Request upon I2C_DEFER|AUX_ACK
reply only when the request is I2C-write-over-Aux.
Fixes: 68ec2a2a2481 ("drm/dp: Use I2C_WRITE_STATUS_UPDATE to drain partial I2C_WRITE requests")
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Cc: Jani Nikula <jani.nikula(a)intel.com>
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Harry Wentland <harry.wentland(a)amd.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Wayne Lin <Wayne.Lin(a)amd.com>
---
drivers/gpu/drm/display/drm_dp_helper.c | 27 +++++++++++++++++++++----
1 file changed, 23 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c
index 6ee51003de3c..28f0708c3b27 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -1631,6 +1631,12 @@ static void drm_dp_i2c_msg_write_status_update(struct drm_dp_aux_msg *msg)
msg->request &= DP_AUX_I2C_MOT;
msg->request |= DP_AUX_I2C_WRITE_STATUS_UPDATE;
}
+
+ /*
+ * Address only transaction
+ */
+ msg->buffer = NULL;
+ msg->size = 0;
}
#define AUX_PRECHARGE_LEN 10 /* 10 to 16 */
@@ -1797,10 +1803,22 @@ static int drm_dp_i2c_do_msg(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
case DP_AUX_I2C_REPLY_ACK:
/*
* Both native ACK and I2C ACK replies received. We
- * can assume the transfer was successful.
+ * can't assume the transfer was completed. Both I2C
+ * WRITE/READ request may get I2C ack reply with partially
+ * completion. We have to continue to poll for the
+ * completion of the request.
*/
- if (ret != msg->size)
- drm_dp_i2c_msg_write_status_update(msg);
+ if (ret != msg->size) {
+ drm_dbg_kms(aux->drm_dev,
+ "%s: I2C partially ack (result=%d, size=%zu)\n",
+ aux->name, ret, msg->size);
+ if (!(msg->request & DP_AUX_I2C_READ)) {
+ usleep_range(AUX_RETRY_INTERVAL, AUX_RETRY_INTERVAL + 100);
+ drm_dp_i2c_msg_write_status_update(msg);
+ }
+
+ continue;
+ }
return ret;
case DP_AUX_I2C_REPLY_NACK:
@@ -1819,7 +1837,8 @@ static int drm_dp_i2c_do_msg(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
if (defer_i2c < 7)
defer_i2c++;
usleep_range(AUX_RETRY_INTERVAL, AUX_RETRY_INTERVAL + 100);
- drm_dp_i2c_msg_write_status_update(msg);
+ if (!(msg->request & DP_AUX_I2C_READ))
+ drm_dp_i2c_msg_write_status_update(msg);
continue;
--
2.43.0
GCC 15 changed the default C standard dialect from gnu17 to gnu23,
which should not have impacted the kernel because it explicitly requests
the gnu11 standard in the main Makefile. However, mips/vdso code uses
its own CFLAGS without a '-std=' value, which break with this dialect
change because of the kernel's own definitions of bool, false, and true
conflicting with the C23 reserved keywords.
include/linux/stddef.h:11:9: error: cannot use keyword 'false' as enumeration constant
11 | false = 0,
| ^~~~~
include/linux/stddef.h:11:9: note: 'false' is a keyword with '-std=c23' onwards
include/linux/types.h:35:33: error: 'bool' cannot be defined via 'typedef'
35 | typedef _Bool bool;
| ^~~~
include/linux/types.h:35:33: note: 'bool' is a keyword with '-std=c23' onwards
Add -std as specified in KBUILD_CFLAGS to the decompressor and purgatory
CFLAGS to eliminate these errors and make the C standard version of these
areas match the rest of the kernel.
Signed-off-by: Khem Raj <raj.khem(a)gmail.com>
Cc: stable(a)vger.kernel.org
---
v2: Filter the -std flag from KBUILD_CFLAGS instead of hardcoding
v3: Adjust subject and commit message
arch/mips/vdso/Makefile | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index fb4c493aaffa..69d4593f64fe 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -27,6 +27,7 @@ endif
# offsets.
cflags-vdso := $(ccflags-vdso) \
$(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
+ $(filter -std=%,$(KBUILD_CFLAGS)) \
-O3 -g -fPIC -fno-strict-aliasing -fno-common -fno-builtin -G 0 \
-mrelax-pic-calls $(call cc-option, -mexplicit-relocs) \
-fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \
Recent tests with timeouts > INT_MAX produced random error returns
with usbtmc_get_stb. This was caused by assigning the return value
of wait_event_interruptible_timeout to an int which overflowed to
negative values. Also return value on success was the remaining
number of jiffies instead of 0.
These patches fix all the cases where the return of
wait_event_interruptible_timeout was assigned to an int and
the case of the remaining jiffies return in usbtmc_get_stb.
Patch 1: Fixes usbtmc_get_stb
Patch 2: Fixes usbtmc488_ioctl_wait_srq
Patch 3: Fixes usbtmc_generic_read
Dave Penkler (3):
usb: usbtmc: Fix erroneous get_stb ioctl error returns
usb: usbtmc: Fix erroneous wait_srq ioctl return
usb: usbtmc: Fix erroneous generic_read ioctl return
drivers/usb/class/usbtmc.c | 53 ++++++++++++++++++++++----------------
1 file changed, 31 insertions(+), 22 deletions(-)
--
Changes V1 => V2 Add cc to stable line
2.49.0
On April 26, 2025 6:25:09 AM PDT, Sasha Levin <sashal(a)kernel.org> wrote:
>This is a note to let you know that I've just added the patch titled
>
> lib/Kconfig.ubsan: Remove 'default UBSAN' from UBSAN_INTEGER_WRAP
>
>to the 6.14-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
>The filename of the patch is:
> lib-kconfig.ubsan-remove-default-ubsan-from-ubsan_in.patch
>and it can be found in the queue-6.14 subdirectory.
>
>If you, or anyone else, feels it should not be added to the stable tree,
>please let <stable(a)vger.kernel.org> know about it.
Please drop this; it's fixing the other patch that should not be backported. :)
-Kees
--
Kees Cook
On April 26, 2025 6:27:11 AM PDT, Sasha Levin <sashal(a)kernel.org> wrote:
>This is a note to let you know that I've just added the patch titled
>
> lib/Kconfig.ubsan: Remove 'default UBSAN' from UBSAN_INTEGER_WRAP
>
>to the 6.12-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
>The filename of the patch is:
> lib-kconfig.ubsan-remove-default-ubsan-from-ubsan_in.patch
>and it can be found in the queue-6.12 subdirectory.
>
>If you, or anyone else, feels it should not be added to the stable tree,
>please let <stable(a)vger.kernel.org> know about it.
And this too; please drop. :)
-Kees
--
Kees Cook
On April 26, 2025 6:27:07 AM PDT, Sasha Levin <sashal(a)kernel.org> wrote:
>This is a note to let you know that I've just added the patch titled
>
> ubsan/overflow: Rework integer overflow sanitizer option to turn on everything
>
>to the 6.12-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
>The filename of the patch is:
> ubsan-overflow-rework-integer-overflow-sanitizer-opt.patch
>and it can be found in the queue-6.12 subdirectory.
>
>If you, or anyone else, feels it should not be added to the stable tree,
>please let <stable(a)vger.kernel.org> know about it.
Same as the other email; please drop.
-Kees
--
Kees Cook
On April 26, 2025 6:25:06 AM PDT, Sasha Levin <sashal(a)kernel.org> wrote:
>This is a note to let you know that I've just added the patch titled
>
> ubsan/overflow: Rework integer overflow sanitizer option to turn on everything
>
>to the 6.14-stable tree which can be found at:
> http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
>
>The filename of the patch is:
> ubsan-overflow-rework-integer-overflow-sanitizer-opt.patch
>and it can be found in the queue-6.14 subdirectory.
>
>If you, or anyone else, feels it should not be added to the stable tree,
>please let <stable(a)vger.kernel.org> know about it.
Please drop this; it is a config change and should not be backported.
-Kees
--
Kees Cook
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 2ccd42b959aaf490333dbd3b9b102eaf295c036a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041736-abrasion-yonder-b301@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2ccd42b959aaf490333dbd3b9b102eaf295c036a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david(a)redhat.com>
Date: Wed, 2 Apr 2025 22:36:21 +0200
Subject: [PATCH] s390/virtio_ccw: Don't allocate/assign airqs for non-existing
queues
If we finds a vq without a name in our input array in
virtio_ccw_find_vqs(), we treat it as "non-existing" and set the vq pointer
to NULL; we will not call virtio_ccw_setup_vq() to allocate/setup a vq.
Consequently, we create only a queue if it actually exists (name != NULL)
and assign an incremental queue index to each such existing queue.
However, in virtio_ccw_register_adapter_ind()->get_airq_indicator() we
will not ignore these "non-existing queues", but instead assign an airq
indicator to them.
Besides never releasing them in virtio_ccw_drop_indicators() (because
there is no virtqueue), the bigger issue seems to be that there will be a
disagreement between the device and the Linux guest about the airq
indicator to be used for notifying a queue, because the indicator bit
for adapter I/O interrupt is derived from the queue index.
The virtio spec states under "Setting Up Two-Stage Queue Indicators":
... indicator contains the guest address of an area wherein the
indicators for the devices are contained, starting at bit_nr, one
bit per virtqueue of the device.
And further in "Notification via Adapter I/O Interrupts":
For notifying the driver of virtqueue buffers, the device sets the
bit in the guest-provided indicator area at the corresponding
offset.
For example, QEMU uses in virtio_ccw_notify() the queue index (passed as
"vector") to select the relevant indicator bit. If a queue does not exist,
it does not have a corresponding indicator bit assigned, because it
effectively doesn't have a queue index.
Using a virtio-balloon-ccw device under QEMU with free-page-hinting
disabled ("free-page-hint=off") but free-page-reporting enabled
("free-page-reporting=on") will result in free page reporting
not working as expected: in the virtio_balloon driver, we'll be stuck
forever in virtballoon_free_page_report()->wait_event(), because the
waitqueue will not be woken up as the notification from the device is
lost: it would use the wrong indicator bit.
Free page reporting stops working and we get splats (when configured to
detect hung wqs) like:
INFO: task kworker/1:3:463 blocked for more than 61 seconds.
Not tainted 6.14.0 #4
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:kworker/1:3 [...]
Workqueue: events page_reporting_process
Call Trace:
[<000002f404e6dfb2>] __schedule+0x402/0x1640
[<000002f404e6f22e>] schedule+0x3e/0xe0
[<000002f3846a88fa>] virtballoon_free_page_report+0xaa/0x110 [virtio_balloon]
[<000002f40435c8a4>] page_reporting_process+0x2e4/0x740
[<000002f403fd3ee2>] process_one_work+0x1c2/0x400
[<000002f403fd4b96>] worker_thread+0x296/0x420
[<000002f403fe10b4>] kthread+0x124/0x290
[<000002f403f4e0dc>] __ret_from_fork+0x3c/0x60
[<000002f404e77272>] ret_from_fork+0xa/0x38
There was recently a discussion [1] whether the "holes" should be
treated differently again, effectively assigning also non-existing
queues a queue index: that should also fix the issue, but requires other
workarounds to not break existing setups.
Let's fix it without affecting existing setups for now by properly ignoring
the non-existing queues, so the indicator bits will match the queue
indexes.
[1] https://lore.kernel.org/all/cover.1720611677.git.mst@redhat.com/
Fixes: a229989d975e ("virtio: don't allocate vqs when names[i] = NULL")
Reported-by: Chandra Merla <cmerla(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Tested-by: Thomas Huth <thuth(a)redhat.com>
Reviewed-by: Thomas Huth <thuth(a)redhat.com>
Reviewed-by: Cornelia Huck <cohuck(a)redhat.com>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Acked-by: Christian Borntraeger <borntraeger(a)linux.ibm.com>
Link: https://lore.kernel.org/r/20250402203621.940090-1-david@redhat.com
Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com>
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 21fa7ac849e5..4904b831c0a7 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -302,11 +302,17 @@ static struct airq_info *new_airq_info(int index)
static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
u64 *first, void **airq_info)
{
- int i, j;
+ int i, j, queue_idx, highest_queue_idx = -1;
struct airq_info *info;
unsigned long *indicator_addr = NULL;
unsigned long bit, flags;
+ /* Array entries without an actual queue pointer must be ignored. */
+ for (i = 0; i < nvqs; i++) {
+ if (vqs[i])
+ highest_queue_idx++;
+ }
+
for (i = 0; i < MAX_AIRQ_AREAS && !indicator_addr; i++) {
mutex_lock(&airq_areas_lock);
if (!airq_areas[i])
@@ -316,7 +322,7 @@ static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
if (!info)
return NULL;
write_lock_irqsave(&info->lock, flags);
- bit = airq_iv_alloc(info->aiv, nvqs);
+ bit = airq_iv_alloc(info->aiv, highest_queue_idx + 1);
if (bit == -1UL) {
/* Not enough vacancies. */
write_unlock_irqrestore(&info->lock, flags);
@@ -325,8 +331,10 @@ static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
*first = bit;
*airq_info = info;
indicator_addr = info->aiv->vector;
- for (j = 0; j < nvqs; j++) {
- airq_iv_set_ptr(info->aiv, bit + j,
+ for (j = 0, queue_idx = 0; j < nvqs; j++) {
+ if (!vqs[j])
+ continue;
+ airq_iv_set_ptr(info->aiv, bit + queue_idx++,
(unsigned long)vqs[j]);
}
write_unlock_irqrestore(&info->lock, flags);
This patchset backport commit to fix BPF selftests failure in stable
6.12 since commit 972bafed67ca ("bpf, test_run: Fix use-after-free issue
in eth_skb_pkt_type()"), which is backport of upstream commit
6b3d638ca897.
The fix needed is upstream commit c7f2188d68c1 ("selftests/bpf: Adjust
data size to have ETH_HLEN"), which in turn depends on upstream commit
d5fbcf46ee82 "selftests/bpf: make xdp_cpumap_attach keep redirect prog
attached". Latter is part of "selftests/bpf: add coverage for
xdp_features in test_progs"[1], and I opt to backport the series entirely
since it adds coverage. With these patches the xdp_devmap_attach no
longer fails[2].
BPF selftests failure log below for completeness. See [3] for the
raw log.
Error: #566 xdp_devmap_attach
Error: #566/1 xdp_devmap_attach/DEVMAP with programs in entries
test_xdp_with_devmap_helpers:PASS:ip netns add devmap_attach_ns 0 nsec
test_xdp_with_devmap_helpers:PASS:open_netns 0 nsec
test_xdp_with_devmap_helpers:PASS:ip link set dev lo up 0 nsec
test_xdp_with_devmap_helpers:PASS:test_xdp_with_devmap_helpers__open_and_load 0 nsec
test_xdp_with_devmap_helpers:PASS:Generic attach of program with 8-byte devmap 0 nsec
test_xdp_with_devmap_helpers:PASS:bpf_prog_get_info_by_fd 0 nsec
test_xdp_with_devmap_helpers:PASS:Add program to devmap entry 0 nsec
test_xdp_with_devmap_helpers:PASS:Read devmap entry 0 nsec
test_xdp_with_devmap_helpers:PASS:Match program id to devmap entry prog_id 0 nsec
test_xdp_with_devmap_helpers:FAIL:XDP test run unexpected error: -22 (errno 22)
test_xdp_with_devmap_helpers:PASS:XDP program detach 0 nsec
libbpf: Kernel error message: BPF_XDP_DEVMAP programs can not be attached to a device
test_xdp_with_devmap_helpers:PASS:Attach of BPF_XDP_DEVMAP program 0 nsec
test_xdp_with_devmap_helpers:PASS:Add non-BPF_XDP_DEVMAP program to devmap entry 0 nsec
test_xdp_with_devmap_helpers:PASS:Add BPF_XDP program with frags to devmap entry 0 nsec
Error: #566/4 xdp_devmap_attach/DEVMAP with programs in entries on veth
test_xdp_with_devmap_helpers_veth:PASS:ip netns add devmap_attach_ns 0 nsec
test_xdp_with_devmap_helpers_veth:PASS:open_netns 0 nsec
test_xdp_with_devmap_helpers_veth:PASS:ip link add veth_src type veth peer name veth_dst 0 nsec
test_xdp_with_devmap_helpers_veth:PASS:ip link set dev veth_src up 0 nsec
test_xdp_with_devmap_helpers_veth:PASS:ip link set dev veth_dst up 0 nsec
test_xdp_with_devmap_helpers_veth:PASS:val.ifindex 0 nsec
test_xdp_with_devmap_helpers_veth:PASS:ifindex_dst 0 nsec
test_xdp_with_devmap_helpers_veth:PASS:test_xdp_with_devmap_helpers__open_and_load 0 nsec
test_xdp_with_devmap_helpers_veth:PASS:Attach of program with 8-byte devmap 0 nsec
test_xdp_with_devmap_helpers_veth:PASS:bpf_prog_get_info_by_fd 0 nsec
test_xdp_with_devmap_helpers_veth:PASS:Add program to devmap entry 0 nsec
test_xdp_with_devmap_helpers_veth:PASS:Read devmap entry 0 nsec
test_xdp_with_devmap_helpers_veth:PASS:Match program id to devmap entry prog_id 0 nsec
test_xdp_with_devmap_helpers_veth:PASS:Attach of dummy XDP 0 nsec
test_xdp_with_devmap_helpers_veth:FAIL:XDP test run unexpected error: -22 (errno 22)
test_xdp_with_devmap_helpers_veth:PASS:XDP program detach 0 nsec
test_xdp_with_devmap_helpers_veth:PASS:XDP program detach 0 nsec
1: https://lore.kernel.org/all/20241009-convert_xdp_tests-v3-0-51cea913710c@bo…
2: https://github.com/shunghsiyu/libbpf/actions/runs/14569651139/job/408644287…
3: https://github.com/shunghsiyu/libbpf/actions/runs/14562221313/job/408469275…
Alexis Lothoré (eBPF Foundation) (3):
selftests/bpf: fix bpf_map_redirect call for cpu map test
selftests/bpf: make xdp_cpumap_attach keep redirect prog attached
selftests/bpf: check program redirect in xdp_cpumap_attach
Shigeru Yoshida (1):
selftests/bpf: Adjust data size to have ETH_HLEN
.../bpf/prog_tests/xdp_cpumap_attach.c | 44 +++++++++++++++----
.../bpf/prog_tests/xdp_devmap_attach.c | 8 ++--
.../bpf/progs/test_xdp_with_cpumap_helpers.c | 7 ++-
3 files changed, 46 insertions(+), 13 deletions(-)
--
2.49.0
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y
git checkout FETCH_HEAD
git cherry-pick -x 2ccd42b959aaf490333dbd3b9b102eaf295c036a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041737-impart-slacker-8722@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2ccd42b959aaf490333dbd3b9b102eaf295c036a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david(a)redhat.com>
Date: Wed, 2 Apr 2025 22:36:21 +0200
Subject: [PATCH] s390/virtio_ccw: Don't allocate/assign airqs for non-existing
queues
If we finds a vq without a name in our input array in
virtio_ccw_find_vqs(), we treat it as "non-existing" and set the vq pointer
to NULL; we will not call virtio_ccw_setup_vq() to allocate/setup a vq.
Consequently, we create only a queue if it actually exists (name != NULL)
and assign an incremental queue index to each such existing queue.
However, in virtio_ccw_register_adapter_ind()->get_airq_indicator() we
will not ignore these "non-existing queues", but instead assign an airq
indicator to them.
Besides never releasing them in virtio_ccw_drop_indicators() (because
there is no virtqueue), the bigger issue seems to be that there will be a
disagreement between the device and the Linux guest about the airq
indicator to be used for notifying a queue, because the indicator bit
for adapter I/O interrupt is derived from the queue index.
The virtio spec states under "Setting Up Two-Stage Queue Indicators":
... indicator contains the guest address of an area wherein the
indicators for the devices are contained, starting at bit_nr, one
bit per virtqueue of the device.
And further in "Notification via Adapter I/O Interrupts":
For notifying the driver of virtqueue buffers, the device sets the
bit in the guest-provided indicator area at the corresponding
offset.
For example, QEMU uses in virtio_ccw_notify() the queue index (passed as
"vector") to select the relevant indicator bit. If a queue does not exist,
it does not have a corresponding indicator bit assigned, because it
effectively doesn't have a queue index.
Using a virtio-balloon-ccw device under QEMU with free-page-hinting
disabled ("free-page-hint=off") but free-page-reporting enabled
("free-page-reporting=on") will result in free page reporting
not working as expected: in the virtio_balloon driver, we'll be stuck
forever in virtballoon_free_page_report()->wait_event(), because the
waitqueue will not be woken up as the notification from the device is
lost: it would use the wrong indicator bit.
Free page reporting stops working and we get splats (when configured to
detect hung wqs) like:
INFO: task kworker/1:3:463 blocked for more than 61 seconds.
Not tainted 6.14.0 #4
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:kworker/1:3 [...]
Workqueue: events page_reporting_process
Call Trace:
[<000002f404e6dfb2>] __schedule+0x402/0x1640
[<000002f404e6f22e>] schedule+0x3e/0xe0
[<000002f3846a88fa>] virtballoon_free_page_report+0xaa/0x110 [virtio_balloon]
[<000002f40435c8a4>] page_reporting_process+0x2e4/0x740
[<000002f403fd3ee2>] process_one_work+0x1c2/0x400
[<000002f403fd4b96>] worker_thread+0x296/0x420
[<000002f403fe10b4>] kthread+0x124/0x290
[<000002f403f4e0dc>] __ret_from_fork+0x3c/0x60
[<000002f404e77272>] ret_from_fork+0xa/0x38
There was recently a discussion [1] whether the "holes" should be
treated differently again, effectively assigning also non-existing
queues a queue index: that should also fix the issue, but requires other
workarounds to not break existing setups.
Let's fix it without affecting existing setups for now by properly ignoring
the non-existing queues, so the indicator bits will match the queue
indexes.
[1] https://lore.kernel.org/all/cover.1720611677.git.mst@redhat.com/
Fixes: a229989d975e ("virtio: don't allocate vqs when names[i] = NULL")
Reported-by: Chandra Merla <cmerla(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Tested-by: Thomas Huth <thuth(a)redhat.com>
Reviewed-by: Thomas Huth <thuth(a)redhat.com>
Reviewed-by: Cornelia Huck <cohuck(a)redhat.com>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Acked-by: Christian Borntraeger <borntraeger(a)linux.ibm.com>
Link: https://lore.kernel.org/r/20250402203621.940090-1-david@redhat.com
Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com>
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 21fa7ac849e5..4904b831c0a7 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -302,11 +302,17 @@ static struct airq_info *new_airq_info(int index)
static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
u64 *first, void **airq_info)
{
- int i, j;
+ int i, j, queue_idx, highest_queue_idx = -1;
struct airq_info *info;
unsigned long *indicator_addr = NULL;
unsigned long bit, flags;
+ /* Array entries without an actual queue pointer must be ignored. */
+ for (i = 0; i < nvqs; i++) {
+ if (vqs[i])
+ highest_queue_idx++;
+ }
+
for (i = 0; i < MAX_AIRQ_AREAS && !indicator_addr; i++) {
mutex_lock(&airq_areas_lock);
if (!airq_areas[i])
@@ -316,7 +322,7 @@ static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
if (!info)
return NULL;
write_lock_irqsave(&info->lock, flags);
- bit = airq_iv_alloc(info->aiv, nvqs);
+ bit = airq_iv_alloc(info->aiv, highest_queue_idx + 1);
if (bit == -1UL) {
/* Not enough vacancies. */
write_unlock_irqrestore(&info->lock, flags);
@@ -325,8 +331,10 @@ static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
*first = bit;
*airq_info = info;
indicator_addr = info->aiv->vector;
- for (j = 0; j < nvqs; j++) {
- airq_iv_set_ptr(info->aiv, bit + j,
+ for (j = 0, queue_idx = 0; j < nvqs; j++) {
+ if (!vqs[j])
+ continue;
+ airq_iv_set_ptr(info->aiv, bit + queue_idx++,
(unsigned long)vqs[j]);
}
write_unlock_irqrestore(&info->lock, flags);
From: Ihor Solodrai <ihor.solodrai(a)linux.dev>
commit 5071a1e606b30c0c11278d3c6620cd6a24724cf6 upstream.
"sockmap_ktls disconnect_after_delete" test has been failing on BPF CI
after recent merges from netdev:
* https://github.com/kernel-patches/bpf/actions/runs/14458537639
* https://github.com/kernel-patches/bpf/actions/runs/14457178732
It happens because disconnect has been disabled for TLS [1], and it
renders the test case invalid.
Removing all the test code creates a conflict between bpf and
bpf-next, so for now only remove the offending assert [2].
The test will be removed later on bpf-next.
[1] https://lore.kernel.org/netdev/20250404180334.3224206-1-kuba@kernel.org/
[2] https://lore.kernel.org/bpf/cfc371285323e1a3f3b006bfcf74e6cf7ad65258@linux.…
Signed-off-by: Ihor Solodrai <ihor.solodrai(a)linux.dev>
Signed-off-by: Andrii Nakryiko <andrii(a)kernel.org>
Reviewed-by: Jiayuan Chen <jiayuan.chen(a)linux.dev>
Link: https://lore.kernel.org/bpf/20250416170246.2438524-1-ihor.solodrai@linux.dev
[ shung-hsi.yu: needed because upstream commit 5071a1e606b3 ("net: tls:
explicitly disallow disconnect") is backported ]
Signed-off-by: Shung-Hsi Yu <shung-hsi.yu(a)suse.com>
---
tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 2d0796314862..0a99fd404f6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -68,7 +68,6 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
goto close_cli;
err = disconnect(cli);
- ASSERT_OK(err, "disconnect");
close_cli:
close(cli);
--
2.49.0
From: Kan Liang <kan.liang(a)linux.intel.com>
[ Upstream commit 96a720db59ab330c8562b2437153faa45dac705f ]
(The existing patch in queue-5.10 was wrong.
queue-5.10/perf-x86-intel-uncore-fix-the-scale-of-iio-free-running-counters-on-snr.patch
It's supposed to change the array snr_uncore_iio_freerunning_events[]
rather than icx_uncore_iio_freerunning_events[]. Send the patch to
replace the wrong one.
With this fix the https://lore.kernel.org/stable/2025042139-protector-rickety-a72d@gregkh/
can be applied then.)
There was a mistake in the SNR uncore spec. The counter increments for
every 32 bytes of data sent from the IO agent to the SOC, not 4 bytes
which was documented in the spec.
The event list has been updated:
"EventName": "UNC_IIO_BANDWIDTH_IN.PART0_FREERUN",
"BriefDescription": "Free running counter that increments for every 32
bytes of data sent from the IO agent to the SOC",
Update the scale of the IIO bandwidth in free running counters as well.
Fixes: 210cc5f9db7a ("perf/x86/intel/uncore: Add uncore support for Snow Ridge server")
Signed-off-by: Kan Liang <kan.liang(a)linux.intel.com>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra(a)chello.nl>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20250416142426.3933977-1-kan.liang@linux.intel.com
---
arch/x86/events/intel/uncore_snbep.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index ad084a5a1463..dd70a6b7879b 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -4487,28 +4487,28 @@ static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = {
INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"),
/* Free-Running IIO BANDWIDTH IN Counters */
INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"),
INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"),
- INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"),
+ INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.0517578125e-5"),
INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"),
{ /* end: all zeroes */ },
};
--
2.38.1
From: Sergey Shtylyov <s.shtylyov(a)omp.ru>
[ Upstream commit cf7385cb26ac4f0ee6c7385960525ad534323252 ]
In of_modalias(), if the buffer happens to be too small even for the 1st
snprintf() call, the len parameter will become negative and str parameter
(if not NULL initially) will point beyond the buffer's end. Add the buffer
overflow check after the 1st snprintf() call and fix such check after the
strlen() call (accounting for the terminating NUL char).
Fixes: bc575064d688 ("of/device: use of_property_for_each_string to parse compatible strings")
Signed-off-by: Sergey Shtylyov <s.shtylyov(a)omp.ru>
Link: https://lore.kernel.org/r/bbfc6be0-c687-62b6-d015-5141b93f313e@omp.ru
Signed-off-by: Rob Herring <robh(a)kernel.org>
Signed-off-by: Uwe Kleine-König <ukleinek(a)debian.org>
---
Hello,
commit cf7385cb26ac4f0ee6c7385960525ad534323252 was already backported to
stable/linux-6.6.y as commit 0b0d5701a8bf02f8fee037e81aacf6746558bfd6.
In 6.1 the function to fix is in a different file and differently named
since v6.1 lacks commits 5c3d15e127eb ("of: Update
of_device_get_modalias()") and bd7a7ed774af ("of: Move of_modalias() to
module.c")
This is the respective backport to 6.1. Looking into that commit was
triggered by https://bugs.debian.org/1103277 and my backport is
identical to this bug's reporter's. Thanks for considering it for the
next 6.1.y update.
Best regards
Uwe
drivers/of/device.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/of/device.c b/drivers/of/device.c
index ce225d2590b5..91d92bfe5735 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -264,14 +264,15 @@ static ssize_t of_device_get_modalias(struct device *dev, char *str, ssize_t len
csize = snprintf(str, len, "of:N%pOFn%c%s", dev->of_node, 'T',
of_node_get_device_type(dev->of_node));
tsize = csize;
+ if (csize >= len)
+ csize = len > 0 ? len - 1 : 0;
len -= csize;
- if (str)
- str += csize;
+ str += csize;
of_property_for_each_string(dev->of_node, "compatible", p, compat) {
csize = strlen(compat) + 1;
tsize += csize;
- if (csize > len)
+ if (csize >= len)
continue;
csize = snprintf(str, len, "C%s", compat);
base-commit: 535ec20c50273d81b2cc7985fed2108dee0e65d7
--
2.47.2
The patch below does not apply to the 6.1-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y
git checkout FETCH_HEAD
git cherry-pick -x 2ccd42b959aaf490333dbd3b9b102eaf295c036a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041733-cosmetics-brigade-9ed7@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2ccd42b959aaf490333dbd3b9b102eaf295c036a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david(a)redhat.com>
Date: Wed, 2 Apr 2025 22:36:21 +0200
Subject: [PATCH] s390/virtio_ccw: Don't allocate/assign airqs for non-existing
queues
If we finds a vq without a name in our input array in
virtio_ccw_find_vqs(), we treat it as "non-existing" and set the vq pointer
to NULL; we will not call virtio_ccw_setup_vq() to allocate/setup a vq.
Consequently, we create only a queue if it actually exists (name != NULL)
and assign an incremental queue index to each such existing queue.
However, in virtio_ccw_register_adapter_ind()->get_airq_indicator() we
will not ignore these "non-existing queues", but instead assign an airq
indicator to them.
Besides never releasing them in virtio_ccw_drop_indicators() (because
there is no virtqueue), the bigger issue seems to be that there will be a
disagreement between the device and the Linux guest about the airq
indicator to be used for notifying a queue, because the indicator bit
for adapter I/O interrupt is derived from the queue index.
The virtio spec states under "Setting Up Two-Stage Queue Indicators":
... indicator contains the guest address of an area wherein the
indicators for the devices are contained, starting at bit_nr, one
bit per virtqueue of the device.
And further in "Notification via Adapter I/O Interrupts":
For notifying the driver of virtqueue buffers, the device sets the
bit in the guest-provided indicator area at the corresponding
offset.
For example, QEMU uses in virtio_ccw_notify() the queue index (passed as
"vector") to select the relevant indicator bit. If a queue does not exist,
it does not have a corresponding indicator bit assigned, because it
effectively doesn't have a queue index.
Using a virtio-balloon-ccw device under QEMU with free-page-hinting
disabled ("free-page-hint=off") but free-page-reporting enabled
("free-page-reporting=on") will result in free page reporting
not working as expected: in the virtio_balloon driver, we'll be stuck
forever in virtballoon_free_page_report()->wait_event(), because the
waitqueue will not be woken up as the notification from the device is
lost: it would use the wrong indicator bit.
Free page reporting stops working and we get splats (when configured to
detect hung wqs) like:
INFO: task kworker/1:3:463 blocked for more than 61 seconds.
Not tainted 6.14.0 #4
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:kworker/1:3 [...]
Workqueue: events page_reporting_process
Call Trace:
[<000002f404e6dfb2>] __schedule+0x402/0x1640
[<000002f404e6f22e>] schedule+0x3e/0xe0
[<000002f3846a88fa>] virtballoon_free_page_report+0xaa/0x110 [virtio_balloon]
[<000002f40435c8a4>] page_reporting_process+0x2e4/0x740
[<000002f403fd3ee2>] process_one_work+0x1c2/0x400
[<000002f403fd4b96>] worker_thread+0x296/0x420
[<000002f403fe10b4>] kthread+0x124/0x290
[<000002f403f4e0dc>] __ret_from_fork+0x3c/0x60
[<000002f404e77272>] ret_from_fork+0xa/0x38
There was recently a discussion [1] whether the "holes" should be
treated differently again, effectively assigning also non-existing
queues a queue index: that should also fix the issue, but requires other
workarounds to not break existing setups.
Let's fix it without affecting existing setups for now by properly ignoring
the non-existing queues, so the indicator bits will match the queue
indexes.
[1] https://lore.kernel.org/all/cover.1720611677.git.mst@redhat.com/
Fixes: a229989d975e ("virtio: don't allocate vqs when names[i] = NULL")
Reported-by: Chandra Merla <cmerla(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Tested-by: Thomas Huth <thuth(a)redhat.com>
Reviewed-by: Thomas Huth <thuth(a)redhat.com>
Reviewed-by: Cornelia Huck <cohuck(a)redhat.com>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Acked-by: Christian Borntraeger <borntraeger(a)linux.ibm.com>
Link: https://lore.kernel.org/r/20250402203621.940090-1-david@redhat.com
Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com>
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 21fa7ac849e5..4904b831c0a7 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -302,11 +302,17 @@ static struct airq_info *new_airq_info(int index)
static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
u64 *first, void **airq_info)
{
- int i, j;
+ int i, j, queue_idx, highest_queue_idx = -1;
struct airq_info *info;
unsigned long *indicator_addr = NULL;
unsigned long bit, flags;
+ /* Array entries without an actual queue pointer must be ignored. */
+ for (i = 0; i < nvqs; i++) {
+ if (vqs[i])
+ highest_queue_idx++;
+ }
+
for (i = 0; i < MAX_AIRQ_AREAS && !indicator_addr; i++) {
mutex_lock(&airq_areas_lock);
if (!airq_areas[i])
@@ -316,7 +322,7 @@ static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
if (!info)
return NULL;
write_lock_irqsave(&info->lock, flags);
- bit = airq_iv_alloc(info->aiv, nvqs);
+ bit = airq_iv_alloc(info->aiv, highest_queue_idx + 1);
if (bit == -1UL) {
/* Not enough vacancies. */
write_unlock_irqrestore(&info->lock, flags);
@@ -325,8 +331,10 @@ static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
*first = bit;
*airq_info = info;
indicator_addr = info->aiv->vector;
- for (j = 0; j < nvqs; j++) {
- airq_iv_set_ptr(info->aiv, bit + j,
+ for (j = 0, queue_idx = 0; j < nvqs; j++) {
+ if (!vqs[j])
+ continue;
+ airq_iv_set_ptr(info->aiv, bit + queue_idx++,
(unsigned long)vqs[j]);
}
write_unlock_irqrestore(&info->lock, flags);
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 2ccd42b959aaf490333dbd3b9b102eaf295c036a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041734-deport-antennae-d763@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2ccd42b959aaf490333dbd3b9b102eaf295c036a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david(a)redhat.com>
Date: Wed, 2 Apr 2025 22:36:21 +0200
Subject: [PATCH] s390/virtio_ccw: Don't allocate/assign airqs for non-existing
queues
If we finds a vq without a name in our input array in
virtio_ccw_find_vqs(), we treat it as "non-existing" and set the vq pointer
to NULL; we will not call virtio_ccw_setup_vq() to allocate/setup a vq.
Consequently, we create only a queue if it actually exists (name != NULL)
and assign an incremental queue index to each such existing queue.
However, in virtio_ccw_register_adapter_ind()->get_airq_indicator() we
will not ignore these "non-existing queues", but instead assign an airq
indicator to them.
Besides never releasing them in virtio_ccw_drop_indicators() (because
there is no virtqueue), the bigger issue seems to be that there will be a
disagreement between the device and the Linux guest about the airq
indicator to be used for notifying a queue, because the indicator bit
for adapter I/O interrupt is derived from the queue index.
The virtio spec states under "Setting Up Two-Stage Queue Indicators":
... indicator contains the guest address of an area wherein the
indicators for the devices are contained, starting at bit_nr, one
bit per virtqueue of the device.
And further in "Notification via Adapter I/O Interrupts":
For notifying the driver of virtqueue buffers, the device sets the
bit in the guest-provided indicator area at the corresponding
offset.
For example, QEMU uses in virtio_ccw_notify() the queue index (passed as
"vector") to select the relevant indicator bit. If a queue does not exist,
it does not have a corresponding indicator bit assigned, because it
effectively doesn't have a queue index.
Using a virtio-balloon-ccw device under QEMU with free-page-hinting
disabled ("free-page-hint=off") but free-page-reporting enabled
("free-page-reporting=on") will result in free page reporting
not working as expected: in the virtio_balloon driver, we'll be stuck
forever in virtballoon_free_page_report()->wait_event(), because the
waitqueue will not be woken up as the notification from the device is
lost: it would use the wrong indicator bit.
Free page reporting stops working and we get splats (when configured to
detect hung wqs) like:
INFO: task kworker/1:3:463 blocked for more than 61 seconds.
Not tainted 6.14.0 #4
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:kworker/1:3 [...]
Workqueue: events page_reporting_process
Call Trace:
[<000002f404e6dfb2>] __schedule+0x402/0x1640
[<000002f404e6f22e>] schedule+0x3e/0xe0
[<000002f3846a88fa>] virtballoon_free_page_report+0xaa/0x110 [virtio_balloon]
[<000002f40435c8a4>] page_reporting_process+0x2e4/0x740
[<000002f403fd3ee2>] process_one_work+0x1c2/0x400
[<000002f403fd4b96>] worker_thread+0x296/0x420
[<000002f403fe10b4>] kthread+0x124/0x290
[<000002f403f4e0dc>] __ret_from_fork+0x3c/0x60
[<000002f404e77272>] ret_from_fork+0xa/0x38
There was recently a discussion [1] whether the "holes" should be
treated differently again, effectively assigning also non-existing
queues a queue index: that should also fix the issue, but requires other
workarounds to not break existing setups.
Let's fix it without affecting existing setups for now by properly ignoring
the non-existing queues, so the indicator bits will match the queue
indexes.
[1] https://lore.kernel.org/all/cover.1720611677.git.mst@redhat.com/
Fixes: a229989d975e ("virtio: don't allocate vqs when names[i] = NULL")
Reported-by: Chandra Merla <cmerla(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Tested-by: Thomas Huth <thuth(a)redhat.com>
Reviewed-by: Thomas Huth <thuth(a)redhat.com>
Reviewed-by: Cornelia Huck <cohuck(a)redhat.com>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Acked-by: Christian Borntraeger <borntraeger(a)linux.ibm.com>
Link: https://lore.kernel.org/r/20250402203621.940090-1-david@redhat.com
Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com>
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 21fa7ac849e5..4904b831c0a7 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -302,11 +302,17 @@ static struct airq_info *new_airq_info(int index)
static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
u64 *first, void **airq_info)
{
- int i, j;
+ int i, j, queue_idx, highest_queue_idx = -1;
struct airq_info *info;
unsigned long *indicator_addr = NULL;
unsigned long bit, flags;
+ /* Array entries without an actual queue pointer must be ignored. */
+ for (i = 0; i < nvqs; i++) {
+ if (vqs[i])
+ highest_queue_idx++;
+ }
+
for (i = 0; i < MAX_AIRQ_AREAS && !indicator_addr; i++) {
mutex_lock(&airq_areas_lock);
if (!airq_areas[i])
@@ -316,7 +322,7 @@ static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
if (!info)
return NULL;
write_lock_irqsave(&info->lock, flags);
- bit = airq_iv_alloc(info->aiv, nvqs);
+ bit = airq_iv_alloc(info->aiv, highest_queue_idx + 1);
if (bit == -1UL) {
/* Not enough vacancies. */
write_unlock_irqrestore(&info->lock, flags);
@@ -325,8 +331,10 @@ static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
*first = bit;
*airq_info = info;
indicator_addr = info->aiv->vector;
- for (j = 0; j < nvqs; j++) {
- airq_iv_set_ptr(info->aiv, bit + j,
+ for (j = 0, queue_idx = 0; j < nvqs; j++) {
+ if (!vqs[j])
+ continue;
+ airq_iv_set_ptr(info->aiv, bit + queue_idx++,
(unsigned long)vqs[j]);
}
write_unlock_irqrestore(&info->lock, flags);
The patch below does not apply to the 6.6-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y
git checkout FETCH_HEAD
git cherry-pick -x 2ccd42b959aaf490333dbd3b9b102eaf295c036a
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025041731-release-charity-8e70@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 2ccd42b959aaf490333dbd3b9b102eaf295c036a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david(a)redhat.com>
Date: Wed, 2 Apr 2025 22:36:21 +0200
Subject: [PATCH] s390/virtio_ccw: Don't allocate/assign airqs for non-existing
queues
If we finds a vq without a name in our input array in
virtio_ccw_find_vqs(), we treat it as "non-existing" and set the vq pointer
to NULL; we will not call virtio_ccw_setup_vq() to allocate/setup a vq.
Consequently, we create only a queue if it actually exists (name != NULL)
and assign an incremental queue index to each such existing queue.
However, in virtio_ccw_register_adapter_ind()->get_airq_indicator() we
will not ignore these "non-existing queues", but instead assign an airq
indicator to them.
Besides never releasing them in virtio_ccw_drop_indicators() (because
there is no virtqueue), the bigger issue seems to be that there will be a
disagreement between the device and the Linux guest about the airq
indicator to be used for notifying a queue, because the indicator bit
for adapter I/O interrupt is derived from the queue index.
The virtio spec states under "Setting Up Two-Stage Queue Indicators":
... indicator contains the guest address of an area wherein the
indicators for the devices are contained, starting at bit_nr, one
bit per virtqueue of the device.
And further in "Notification via Adapter I/O Interrupts":
For notifying the driver of virtqueue buffers, the device sets the
bit in the guest-provided indicator area at the corresponding
offset.
For example, QEMU uses in virtio_ccw_notify() the queue index (passed as
"vector") to select the relevant indicator bit. If a queue does not exist,
it does not have a corresponding indicator bit assigned, because it
effectively doesn't have a queue index.
Using a virtio-balloon-ccw device under QEMU with free-page-hinting
disabled ("free-page-hint=off") but free-page-reporting enabled
("free-page-reporting=on") will result in free page reporting
not working as expected: in the virtio_balloon driver, we'll be stuck
forever in virtballoon_free_page_report()->wait_event(), because the
waitqueue will not be woken up as the notification from the device is
lost: it would use the wrong indicator bit.
Free page reporting stops working and we get splats (when configured to
detect hung wqs) like:
INFO: task kworker/1:3:463 blocked for more than 61 seconds.
Not tainted 6.14.0 #4
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:kworker/1:3 [...]
Workqueue: events page_reporting_process
Call Trace:
[<000002f404e6dfb2>] __schedule+0x402/0x1640
[<000002f404e6f22e>] schedule+0x3e/0xe0
[<000002f3846a88fa>] virtballoon_free_page_report+0xaa/0x110 [virtio_balloon]
[<000002f40435c8a4>] page_reporting_process+0x2e4/0x740
[<000002f403fd3ee2>] process_one_work+0x1c2/0x400
[<000002f403fd4b96>] worker_thread+0x296/0x420
[<000002f403fe10b4>] kthread+0x124/0x290
[<000002f403f4e0dc>] __ret_from_fork+0x3c/0x60
[<000002f404e77272>] ret_from_fork+0xa/0x38
There was recently a discussion [1] whether the "holes" should be
treated differently again, effectively assigning also non-existing
queues a queue index: that should also fix the issue, but requires other
workarounds to not break existing setups.
Let's fix it without affecting existing setups for now by properly ignoring
the non-existing queues, so the indicator bits will match the queue
indexes.
[1] https://lore.kernel.org/all/cover.1720611677.git.mst@redhat.com/
Fixes: a229989d975e ("virtio: don't allocate vqs when names[i] = NULL")
Reported-by: Chandra Merla <cmerla(a)redhat.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Tested-by: Thomas Huth <thuth(a)redhat.com>
Reviewed-by: Thomas Huth <thuth(a)redhat.com>
Reviewed-by: Cornelia Huck <cohuck(a)redhat.com>
Acked-by: Michael S. Tsirkin <mst(a)redhat.com>
Acked-by: Christian Borntraeger <borntraeger(a)linux.ibm.com>
Link: https://lore.kernel.org/r/20250402203621.940090-1-david@redhat.com
Signed-off-by: Heiko Carstens <hca(a)linux.ibm.com>
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 21fa7ac849e5..4904b831c0a7 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -302,11 +302,17 @@ static struct airq_info *new_airq_info(int index)
static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
u64 *first, void **airq_info)
{
- int i, j;
+ int i, j, queue_idx, highest_queue_idx = -1;
struct airq_info *info;
unsigned long *indicator_addr = NULL;
unsigned long bit, flags;
+ /* Array entries without an actual queue pointer must be ignored. */
+ for (i = 0; i < nvqs; i++) {
+ if (vqs[i])
+ highest_queue_idx++;
+ }
+
for (i = 0; i < MAX_AIRQ_AREAS && !indicator_addr; i++) {
mutex_lock(&airq_areas_lock);
if (!airq_areas[i])
@@ -316,7 +322,7 @@ static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
if (!info)
return NULL;
write_lock_irqsave(&info->lock, flags);
- bit = airq_iv_alloc(info->aiv, nvqs);
+ bit = airq_iv_alloc(info->aiv, highest_queue_idx + 1);
if (bit == -1UL) {
/* Not enough vacancies. */
write_unlock_irqrestore(&info->lock, flags);
@@ -325,8 +331,10 @@ static unsigned long *get_airq_indicator(struct virtqueue *vqs[], int nvqs,
*first = bit;
*airq_info = info;
indicator_addr = info->aiv->vector;
- for (j = 0; j < nvqs; j++) {
- airq_iv_set_ptr(info->aiv, bit + j,
+ for (j = 0, queue_idx = 0; j < nvqs; j++) {
+ if (!vqs[j])
+ continue;
+ airq_iv_set_ptr(info->aiv, bit + queue_idx++,
(unsigned long)vqs[j]);
}
write_unlock_irqrestore(&info->lock, flags);
The following commit has been merged into the irq/urgent branch of tip:
Commit-ID: 3318dc299b072a0511d6dfd8367f3304fb6d9827
Gitweb: https://git.kernel.org/tip/3318dc299b072a0511d6dfd8367f3304fb6d9827
Author: Suzuki K Poulose <suzuki.poulose(a)arm.com>
AuthorDate: Tue, 22 Apr 2025 17:16:16 +01:00
Committer: Ingo Molnar <mingo(a)kernel.org>
CommitterDate: Sat, 26 Apr 2025 10:17:24 +02:00
irqchip/gic-v2m: Prevent use after free of gicv2m_get_fwnode()
With ACPI in place, gicv2m_get_fwnode() is registered with the pci
subsystem as pci_msi_get_fwnode_cb(), which may get invoked at runtime
during a PCI host bridge probe. But, the call back is wrongly marked as
__init, causing it to be freed, while being registered with the PCI
subsystem and could trigger:
Unable to handle kernel paging request at virtual address ffff8000816c0400
gicv2m_get_fwnode+0x0/0x58 (P)
pci_set_bus_msi_domain+0x74/0x88
pci_register_host_bridge+0x194/0x548
This is easily reproducible on a Juno board with ACPI boot.
Retain the function for later use.
Fixes: 0644b3daca28 ("irqchip/gic-v2m: acpi: Introducing GICv2m ACPI support")
Signed-off-by: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Reviewed-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
---
drivers/irqchip/irq-gic-v2m.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index c698948..dc98c39 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -421,7 +421,7 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle,
#ifdef CONFIG_ACPI
static int acpi_num_msi;
-static __init struct fwnode_handle *gicv2m_get_fwnode(struct device *dev)
+static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev)
{
struct v2m_data *data;
Dear,
Send your Ref: FSG2025 / Name / Phone Number / Country to Mr. Andrej
Mahecic on un.grant(a)socialworker.net, +1 888 673 0430 for your £100,000.00
payment.
Sincerely
Mr. C. Gunness
On behalf of the UN.
The patch titled
Subject: mm: vmalloc: support more granular vrealloc() sizing
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-vmalloc-support-more-granular-vrealloc-sizing.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Kees Cook <kees(a)kernel.org>
Subject: mm: vmalloc: support more granular vrealloc() sizing
Date: Fri, 25 Apr 2025 17:11:07 -0700
Introduce struct vm_struct::requested_size so that the requested
(re)allocation size is retained separately from the allocated area size.
This means that KASAN will correctly poison the correct spans of requested
bytes. This also means we can support growing the usable portion of an
allocation that can already be supported by the existing area's existing
allocation.
Link: https://lkml.kernel.org/r/20250426001105.it.679-kees@kernel.org
Fixes: 3ddc2fefe6f3 ("mm: vmalloc: implement vrealloc()")
Signed-off-by: Kees Cook <kees(a)kernel.org>
Reported-by: Erhard Furtner <erhard_f(a)mailbox.org>
Closes: https://lore.kernel.org/all/20250408192503.6149a816@outsider.home/
Cc: Danilo Krummrich <dakr(a)kernel.org>
Cc: Michal Hocko <mhocko(a)suse.com>
Cc: "Uladzislau Rezki (Sony)" <urezki(a)gmail.com>
Cc: Vlastimil Babka <vbabka(a)suse.cz>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/vmalloc.h | 1 +
mm/vmalloc.c | 31 ++++++++++++++++++++++++-------
2 files changed, 25 insertions(+), 7 deletions(-)
--- a/include/linux/vmalloc.h~mm-vmalloc-support-more-granular-vrealloc-sizing
+++ a/include/linux/vmalloc.h
@@ -61,6 +61,7 @@ struct vm_struct {
unsigned int nr_pages;
phys_addr_t phys_addr;
const void *caller;
+ unsigned long requested_size;
};
struct vmap_area {
--- a/mm/vmalloc.c~mm-vmalloc-support-more-granular-vrealloc-sizing
+++ a/mm/vmalloc.c
@@ -1940,7 +1940,7 @@ static inline void setup_vmalloc_vm(stru
{
vm->flags = flags;
vm->addr = (void *)va->va_start;
- vm->size = va_size(va);
+ vm->size = vm->requested_size = va_size(va);
vm->caller = caller;
va->vm = vm;
}
@@ -3133,6 +3133,7 @@ struct vm_struct *__get_vm_area_node(uns
area->flags = flags;
area->caller = caller;
+ area->requested_size = requested_size;
va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0, area);
if (IS_ERR(va)) {
@@ -4063,6 +4064,8 @@ EXPORT_SYMBOL(vzalloc_node_noprof);
*/
void *vrealloc_noprof(const void *p, size_t size, gfp_t flags)
{
+ struct vm_struct *vm = NULL;
+ size_t alloced_size = 0;
size_t old_size = 0;
void *n;
@@ -4072,15 +4075,17 @@ void *vrealloc_noprof(const void *p, siz
}
if (p) {
- struct vm_struct *vm;
-
vm = find_vm_area(p);
if (unlikely(!vm)) {
WARN(1, "Trying to vrealloc() nonexistent vm area (%p)\n", p);
return NULL;
}
- old_size = get_vm_area_size(vm);
+ alloced_size = get_vm_area_size(vm);
+ old_size = vm->requested_size;
+ if (WARN(alloced_size < old_size,
+ "vrealloc() has mismatched area vs requested sizes (%p)\n", p))
+ return NULL;
}
/*
@@ -4088,14 +4093,26 @@ void *vrealloc_noprof(const void *p, siz
* would be a good heuristic for when to shrink the vm_area?
*/
if (size <= old_size) {
- /* Zero out spare memory. */
- if (want_init_on_alloc(flags))
+ /* Zero out "freed" memory. */
+ if (want_init_on_free())
memset((void *)p + size, 0, old_size - size);
+ vm->requested_size = size;
kasan_poison_vmalloc(p + size, old_size - size);
- kasan_unpoison_vmalloc(p, size, KASAN_VMALLOC_PROT_NORMAL);
return (void *)p;
}
+ /*
+ * We already have the bytes available in the allocation; use them.
+ */
+ if (size <= alloced_size) {
+ kasan_unpoison_vmalloc(p + old_size, size - old_size,
+ KASAN_VMALLOC_PROT_NORMAL);
+ /* Zero out "alloced" memory. */
+ if (want_init_on_alloc(flags))
+ memset((void *)p + old_size, 0, size - old_size);
+ vm->requested_size = size;
+ }
+
/* TODO: Grow the vm_area, i.e. allocate and map additional pages. */
n = __vmalloc_noprof(size, flags);
if (!n)
_
Patches currently in -mm which might be from kees(a)kernel.org are
mm-vmalloc-support-more-granular-vrealloc-sizing.patch
When dwc3_gadget_soft_disconnect() fails, dwc3_suspend_common() keeps
going with the suspend, resulting in a period where the power domain is
off, but the gadget driver remains connected. Within this time frame,
invoking vbus_event_work() will cause an error as it attempts to access
DWC3 registers for endpoint disabling after the power domain has been
completely shut down.
Abort the suspend sequence when dwc3_gadget_suspend() cannot halt the
controller and proceeds with a soft connect.
Fixes: c8540870af4c ("usb: dwc3: gadget: Improve dwc3_gadget_suspend()
and dwc3_gadget_resume()")
CC: stable(a)vger.kernel.org
Signed-off-by: Kuen-Han Tsai <khtsai(a)google.com>
---
Kernel panic - not syncing: Asynchronous SError Interrupt
Workqueue: events vbus_event_work
Call trace:
dump_backtrace+0xf4/0x118
show_stack+0x18/0x24
dump_stack_lvl+0x60/0x7c
dump_stack+0x18/0x3c
panic+0x16c/0x390
nmi_panic+0xa4/0xa8
arm64_serror_panic+0x6c/0x94
do_serror+0xc4/0xd0
el1h_64_error_handler+0x34/0x48
el1h_64_error+0x68/0x6c
readl+0x4c/0x8c
__dwc3_gadget_ep_disable+0x48/0x230
dwc3_gadget_ep_disable+0x50/0xc0
usb_ep_disable+0x44/0xe4
ffs_func_eps_disable+0x64/0xc8
ffs_func_set_alt+0x74/0x368
ffs_func_disable+0x18/0x28
composite_disconnect+0x90/0xec
configfs_composite_disconnect+0x64/0x88
usb_gadget_disconnect_locked+0xc0/0x168
vbus_event_work+0x3c/0x58
process_one_work+0x1e4/0x43c
worker_thread+0x25c/0x430
kthread+0x104/0x1d4
ret_from_fork+0x10/0x20
---
Changelog:
v2:
- move declarations in separate lines
- add the Fixes tag
drivers/usb/dwc3/core.c | 9 +++++++--
drivers/usb/dwc3/gadget.c | 22 +++++++++-------------
2 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 66a08b527165..1cf1996ae1fb 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -2388,6 +2388,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
{
u32 reg;
int i;
+ int ret;
if (!pm_runtime_suspended(dwc->dev) && !PMSG_IS_AUTO(msg)) {
dwc->susphy_state = (dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)) &
@@ -2406,7 +2407,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
case DWC3_GCTL_PRTCAP_DEVICE:
if (pm_runtime_suspended(dwc->dev))
break;
- dwc3_gadget_suspend(dwc);
+ ret = dwc3_gadget_suspend(dwc);
+ if (ret)
+ return ret
synchronize_irq(dwc->irq_gadget);
dwc3_core_exit(dwc);
break;
@@ -2441,7 +2444,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
break;
if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) {
- dwc3_gadget_suspend(dwc);
+ ret = dwc3_gadget_suspend(dwc);
+ if (ret)
+ return ret;
synchronize_irq(dwc->irq_gadget);
}
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 89a4dc8ebf94..316c1589618e 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4776,26 +4776,22 @@ int dwc3_gadget_suspend(struct dwc3 *dwc)
int ret;
ret = dwc3_gadget_soft_disconnect(dwc);
- if (ret)
- goto err;
-
- spin_lock_irqsave(&dwc->lock, flags);
- if (dwc->gadget_driver)
- dwc3_disconnect_gadget(dwc);
- spin_unlock_irqrestore(&dwc->lock, flags);
-
- return 0;
-
-err:
/*
* Attempt to reset the controller's state. Likely no
* communication can be established until the host
* performs a port reset.
*/
- if (dwc->softconnect)
+ if (ret && dwc->softconnect) {
dwc3_gadget_soft_connect(dwc);
+ return ret;
+ }
- return ret;
+ spin_lock_irqsave(&dwc->lock, flags);
+ if (dwc->gadget_driver)
+ dwc3_disconnect_gadget(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ return 0;
}
int dwc3_gadget_resume(struct dwc3 *dwc)
--
2.49.0.395.g12beb8f557-goog
The patch titled
Subject: tools/testing/selftests: fix guard region test tmpfs assumption
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
tools-testing-selftests-fix-guard-region-test-tmpfs-assumption.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Subject: tools/testing/selftests: fix guard region test tmpfs assumption
Date: Fri, 25 Apr 2025 17:24:36 +0100
The current implementation of the guard region tests assume that /tmp is
mounted as tmpfs, that is shmem.
This isn't always the case, and at least one instance of a spurious test
failure has been reported as a result.
This assumption is unsafe, rushed and silly - and easily remedied by
simply using memfd, so do so.
We also have to fixup the readonly_file test to explicitly only be
applicable to file-backed cases.
Link: https://lkml.kernel.org/r/20250425162436.564002-1-lorenzo.stoakes@oracle.com
Fixes: 272f37d3e99a ("tools/selftests: expand all guard region tests to file-backed")
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
Reported-by: Ryan Roberts <ryan.roberts(a)arm.com>
Closes: https://lore.kernel.org/linux-mm/a2d2766b-0ab4-437b-951a-8595a7506fe9@arm.c…
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/guard-regions.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
--- a/tools/testing/selftests/mm/guard-regions.c~tools-testing-selftests-fix-guard-region-test-tmpfs-assumption
+++ a/tools/testing/selftests/mm/guard-regions.c
@@ -271,12 +271,16 @@ FIXTURE_SETUP(guard_regions)
self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
setup_sighandler();
- if (variant->backing == ANON_BACKED)
+ switch (variant->backing) {
+ case ANON_BACKED:
return;
-
- self->fd = open_file(
- variant->backing == SHMEM_BACKED ? "/tmp/" : "",
- self->path);
+ case LOCAL_FILE_BACKED:
+ self->fd = open_file("", self->path);
+ break;
+ case SHMEM_BACKED:
+ self->fd = memfd_create(self->path, 0);
+ break;
+ }
/* We truncate file to at least 100 pages, tests can modify as needed. */
ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0);
@@ -1696,7 +1700,7 @@ TEST_F(guard_regions, readonly_file)
char *ptr;
int i;
- if (variant->backing == ANON_BACKED)
+ if (variant->backing != LOCAL_FILE_BACKED)
SKIP(return, "Read-only test specific to file-backed");
/* Map shared so we can populate with pattern, populate it, unmap. */
_
Patches currently in -mm which might be from lorenzo.stoakes(a)oracle.com are
maintainers-add-reverse-mapping-section.patch
maintainers-add-core-mm-section.patch
maintainers-add-mm-thp-section.patch
maintainers-add-mm-thp-section-fix.patch
tools-testing-selftests-fix-guard-region-test-tmpfs-assumption.patch
mm-vma-fix-incorrectly-disallowed-anonymous-vma-merges.patch
tools-testing-add-procmap_query-helper-functions-in-mm-self-tests.patch
tools-testing-selftests-assert-that-anon-merge-cases-behave-as-expected.patch
mm-move-mmap-vma-locking-logic-into-specific-files.patch
Previously, commit ed129ec9057f ("KVM: x86: forcibly leave nested mode
on vCPU reset") addressed an issue where a triple fault occurring in
nested mode could lead to use-after-free scenarios. However, the commit
did not handle the analogous situation for System Management Mode (SMM).
This omission results in triggering a WARN when a vCPU reset occurs
while still in SMM mode, due to the check in kvm_vcpu_reset(). This
situation was reprodused using Syzkaller by:
1) Creating a KVM VM and vCPU
2) Sending a KVM_SMI ioctl to explicitly enter SMM
3) Executing invalid instructions causing consecutive exceptions and
eventually a triple fault
The issue manifests as follows:
WARNING: CPU: 0 PID: 25506 at arch/x86/kvm/x86.c:12112
kvm_vcpu_reset+0x1d2/0x1530 arch/x86/kvm/x86.c:12112
Modules linked in:
CPU: 0 PID: 25506 Comm: syz-executor.0 Not tainted
6.1.130-syzkaller-00157-g164fe5dde9b6 #0
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.12.0-1 04/01/2014
RIP: 0010:kvm_vcpu_reset+0x1d2/0x1530 arch/x86/kvm/x86.c:12112
Call Trace:
<TASK>
shutdown_interception+0x66/0xb0 arch/x86/kvm/svm/svm.c:2136
svm_invoke_exit_handler+0x110/0x530 arch/x86/kvm/svm/svm.c:3395
svm_handle_exit+0x424/0x920 arch/x86/kvm/svm/svm.c:3457
vcpu_enter_guest arch/x86/kvm/x86.c:10959 [inline]
vcpu_run+0x2c43/0x5a90 arch/x86/kvm/x86.c:11062
kvm_arch_vcpu_ioctl_run+0x50f/0x1cf0 arch/x86/kvm/x86.c:11283
kvm_vcpu_ioctl+0x570/0xf00 arch/x86/kvm/../../../virt/kvm/kvm_main.c:4122
vfs_ioctl fs/ioctl.c:51 [inline]
__do_sys_ioctl fs/ioctl.c:870 [inline]
__se_sys_ioctl fs/ioctl.c:856 [inline]
__x64_sys_ioctl+0x19a/0x210 fs/ioctl.c:856
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x35/0x80 arch/x86/entry/common.c:81
entry_SYSCALL_64_after_hwframe+0x6e/0xd8
Architecturally, hardware CPUs exit SMM upon receiving a triple
fault as part of a hardware reset. To reflect this behavior and
avoid triggering the WARN, this patch explicitly calls
kvm_smm_changed(vcpu, false) in the SVM-specific shutdown_interception()
handler prior to resetting the vCPU.
The initial version of this patch attempted to address the issue by calling
kvm_smm_changed() inside kvm_vcpu_reset(). However, this approach was not
architecturally accurate, as INIT is blocked during SMM and SMM should not
be exited implicitly during a generic vCPU reset. This version moves the
fix into shutdown_interception() for SVM, where the triple fault is
actually handled, and where exiting SMM explicitly is appropriate.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: ed129ec9057f ("KVM: x86: forcibly leave nested mode on vCPU reset")
Cc: stable(a)vger.kernel.org
Suggested-by: Sean Christopherson <seanjc(a)google.com>
Signed-off-by: Mikhail Lobanov <m.lobanov(a)rosa.ru>
---
v2: Move SMM exit from kvm_vcpu_reset() to SVM's shutdown_interception(),
per suggestion from Sean Christopherson <seanjc(a)google.com>.
v3: -Export kvm_smm_changed() using EXPORT_SYMBOL_GPL.
-Wrap the call to kvm_smm_changed() in svm.c with #ifdef CONFIG_KVM_SMM
to avoid build errors when SMM support is disabled.
arch/x86/kvm/smm.c | 1 +
arch/x86/kvm/svm/svm.c | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 699e551ec93b..9864c057187d 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -131,6 +131,7 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
kvm_mmu_reset_context(vcpu);
}
+EXPORT_SYMBOL_GPL(kvm_smm_changed);
void process_smi(struct kvm_vcpu *vcpu)
{
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d5d0c5c3300b..c5470d842aed 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2231,6 +2231,10 @@ static int shutdown_interception(struct kvm_vcpu *vcpu)
*/
if (!sev_es_guest(vcpu->kvm)) {
clear_page(svm->vmcb);
+#ifdef CONFIG_KVM_SMM
+ if (is_smm(vcpu))
+ kvm_smm_changed(vcpu, false);
+#endif
kvm_vcpu_reset(vcpu, true);
}
--
2.47.2
While discussing some userfaultfd relevant issues recently, Andrea noticed
a potential ABI breakage with -EAGAIN on almost all userfaultfd ioctl()s.
Quote from Andrea, explaining how -EAGAIN was processed, and how this
should fix it (taking example of UFFDIO_COPY ioctl):
The "mmap_changing" and "stale pmd" conditions are already reported as
-EAGAIN written in the copy field, this does not change it. This change
removes the subnormal case that left copy.copy uninitialized and required
apps to explicitly set the copy field to get deterministic
behavior (which is a requirement contrary to the documentation in both
the manpage and source code). In turn there's no alteration to backwards
compatibility as result of this change because userland will find the
copy field consistently set to -EAGAIN, and not anymore sometime -EAGAIN
and sometime uninitialized.
Even then the change only can make a difference to non cooperative users
of userfaultfd, so when UFFD_FEATURE_EVENT_* is enabled, which is not
true for the vast majority of apps using userfaultfd or this unintended
uninitialized field may have been noticed sooner.
Meanwhile, since this bug existed for years, it also almost affects all
ioctl()s that was introduced later. Besides UFFDIO_ZEROPAGE, these also
get affected in the same way:
- UFFDIO_CONTINUE
- UFFDIO_POISON
- UFFDIO_MOVE
This patch should have fixed all of them.
Fixes: df2cc96e7701 ("userfaultfd: prevent non-cooperative events vs mcopy_atomic races")
Fixes: f619147104c8 ("userfaultfd: add UFFDIO_CONTINUE ioctl")
Fixes: fc71884a5f59 ("mm: userfaultfd: add new UFFDIO_POISON ioctl")
Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI")
Cc: linux-stable <stable(a)vger.kernel.org>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Reported-by: Andrea Arcangeli <aarcange(a)redhat.com>
Suggested-by: Andrea Arcangeli <aarcange(a)redhat.com>
Signed-off-by: Peter Xu <peterx(a)redhat.com>
---
fs/userfaultfd.c | 28 ++++++++++++++++++++++------
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index d80f94346199..22f4bf956ba1 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1585,8 +1585,11 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
user_uffdio_copy = (struct uffdio_copy __user *) arg;
ret = -EAGAIN;
- if (atomic_read(&ctx->mmap_changing))
+ if (unlikely(atomic_read(&ctx->mmap_changing))) {
+ if (unlikely(put_user(ret, &user_uffdio_copy->copy)))
+ return -EFAULT;
goto out;
+ }
ret = -EFAULT;
if (copy_from_user(&uffdio_copy, user_uffdio_copy,
@@ -1641,8 +1644,11 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg;
ret = -EAGAIN;
- if (atomic_read(&ctx->mmap_changing))
+ if (unlikely(atomic_read(&ctx->mmap_changing))) {
+ if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage)))
+ return -EFAULT;
goto out;
+ }
ret = -EFAULT;
if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage,
@@ -1744,8 +1750,11 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
user_uffdio_continue = (struct uffdio_continue __user *)arg;
ret = -EAGAIN;
- if (atomic_read(&ctx->mmap_changing))
+ if (unlikely(atomic_read(&ctx->mmap_changing))) {
+ if (unlikely(put_user(ret, &user_uffdio_continue->mapped)))
+ return -EFAULT;
goto out;
+ }
ret = -EFAULT;
if (copy_from_user(&uffdio_continue, user_uffdio_continue,
@@ -1801,8 +1810,11 @@ static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long
user_uffdio_poison = (struct uffdio_poison __user *)arg;
ret = -EAGAIN;
- if (atomic_read(&ctx->mmap_changing))
+ if (unlikely(atomic_read(&ctx->mmap_changing))) {
+ if (unlikely(put_user(ret, &user_uffdio_poison->updated)))
+ return -EFAULT;
goto out;
+ }
ret = -EFAULT;
if (copy_from_user(&uffdio_poison, user_uffdio_poison,
@@ -1870,8 +1882,12 @@ static int userfaultfd_move(struct userfaultfd_ctx *ctx,
user_uffdio_move = (struct uffdio_move __user *) arg;
- if (atomic_read(&ctx->mmap_changing))
- return -EAGAIN;
+ ret = -EAGAIN;
+ if (unlikely(atomic_read(&ctx->mmap_changing))) {
+ if (unlikely(put_user(ret, &user_uffdio_move->move)))
+ return -EFAULT;
+ goto out;
+ }
if (copy_from_user(&uffdio_move, user_uffdio_move,
/* don't copy "move" last field */
--
2.48.1
Device tree bindings state that the clock is optional for UHCI platform
controllers, and some existing device trees don't provide those - such
as those for VIA/WonderMedia devices.
The driver however fails to probe now if no clock is provided, because
devm_clk_get returns an error pointer in such case.
Switch to devm_clk_get_optional instead, so that it could probe again
on those platforms where no clocks are given.
Cc: stable(a)vger.kernel.org
Fixes: 26c502701c52 ("usb: uhci: Add clk support to uhci-platform")
Signed-off-by: Alexey Charkov <alchark(a)gmail.com>
---
drivers/usb/host/uhci-platform.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/host/uhci-platform.c b/drivers/usb/host/uhci-platform.c
index a7c934404ebc7ed74f64265fafa7830809979ba5..62318291f5664c9ec94f24535c71d962e28354f3 100644
--- a/drivers/usb/host/uhci-platform.c
+++ b/drivers/usb/host/uhci-platform.c
@@ -121,7 +121,7 @@ static int uhci_hcd_platform_probe(struct platform_device *pdev)
}
/* Get and enable clock if any specified */
- uhci->clk = devm_clk_get(&pdev->dev, NULL);
+ uhci->clk = devm_clk_get_optional(&pdev->dev, NULL);
if (IS_ERR(uhci->clk)) {
ret = PTR_ERR(uhci->clk);
goto err_rmr;
---
base-commit: 0af2f6be1b4281385b618cb86ad946eded089ac8
change-id: 20250425-uhci-clock-optional-9a9d09560e17
Best regards,
--
Alexey Charkov <alchark(a)gmail.com>
Resend patch series to fix cc list
There are MMC boot failures seen with V1P8_SIGNAL_ENA on Kingston eMMC
and Microcenter/Patriot SD cards on Sitara K3 boards due to the HS200
initialization sequence involving V1P8_SIGNAL_ENA. Since V1P8_SIGNAL_ENA
is optional for eMMC, do not set V1P8_SIGNAL_ENA by default for eMMC.
For SD cards we shall parse DT for ti,suppress-v1p8-ena property to
determine whether to suppress V1P8_SIGNAL_ENA. Add new ti,suppress-v1p8-ena
to am62x, am62ax, and am62px SoC dtsi files since there is no internal LDO
tied to sdhci1 interface so V1P8_SIGNAL_ENA only affects timing.
This fix was previously merged in the kernel, but was reverted due
to the "heuristics for enabling the quirk"[0]. This issue is adressed
in this patch series by adding optional ti,suppress-v1p8-ena DT property
which determines whether to apply the quirk for SD.
Changes since v2:
- Include patch 3/3
- Reword cover letter
- Reword binding patch description
- Add fixes/cc tags to driver patch
- Reorder patches according to binding patch first
- Resend to fix cc list in original v3 series
Link to v2:
https://lore.kernel.org/linux-mmc/20250417182652.3521104-1-jm@ti.com/
Link to v1:
https://lore.kernel.org/linux-mmc/20250407222702.2199047-1-jm@ti.com/
[0] https://lore.kernel.org/linux-mmc/20250127-am654-mmc-regression-v2-1-9bb39f…
Judith Mendez (3):
dt-bindings: mmc: sdhci-am654: Add ti,suppress-v1p8-ena
mmc: sdhci_am654: Add sdhci_am654_start_signal_voltage_switch
arm64: dts: ti: k3-am62*: add ti,suppress-v1p8-ena
.../devicetree/bindings/mmc/sdhci-am654.yaml | 5 +++
arch/arm64/boot/dts/ti/k3-am62-main.dtsi | 1 +
arch/arm64/boot/dts/ti/k3-am62a-main.dtsi | 1 +
.../dts/ti/k3-am62p-j722s-common-main.dtsi | 1 +
drivers/mmc/host/sdhci_am654.c | 32 +++++++++++++++++++
5 files changed, 40 insertions(+)
base-commit: 1be38f81251f6d276713c259ecf4414f82f22c29
--
2.49.0
From: Sibi Sankar <quic_sibis(a)quicinc.com>
Currently the perf and powercap protocol relies on the protocol domain
attributes, which just ensures that one fastchannel per domain, before
instantiating fastchannels for all possible message-ids. Fix this by
ensuring that each message-id supports fastchannel before initialization.
Logs:
scmi: Failed to get FC for protocol 13 [MSG_ID:6 / RES_ID:0] - ret:-95. Using regular messaging.
scmi: Failed to get FC for protocol 13 [MSG_ID:6 / RES_ID:1] - ret:-95. Using regular messaging.
scmi: Failed to get FC for protocol 13 [MSG_ID:6 / RES_ID:2] - ret:-95. Using regular messaging.
CC: stable(a)vger.kernel.org
Reported-by: Johan Hovold <johan+linaro(a)kernel.org>
Closes: https://lore.kernel.org/lkml/ZoQjAWse2YxwyRJv@hovoldconsulting.com/
Fixes: 6f9ea4dabd2d ("firmware: arm_scmi: Generalize the fast channel support")
Reviewed-by: Johan Hovold <johan+linaro(a)kernel.org>
Tested-by: Johan Hovold <johan+linaro(a)kernel.org>
Signed-off-by: Sibi Sankar <quic_sibis(a)quicinc.com>
[Cristian: Modified the condition checked to establish support or not]
Signed-off-by: Cristian Marussi <cristian.marussi(a)arm.com>
---
RFC -> V1
- picked up a few tags
Since PROTOCOL_MESSAGE_ATTRIBUTES, used to check if message_id is supported,
is a mandatory command, it cannot fail so we must bail-out NOT only if FC was
not supported for that command but also if the query fails as a whole; so the
condition checked for bailing out is modified to:
if (ret || !MSG_SUPPORTS_FASTCHANNEL(attributes)) {
---
drivers/firmware/arm_scmi/driver.c | 76 +++++++++++++++------------
drivers/firmware/arm_scmi/protocols.h | 2 +
2 files changed, 45 insertions(+), 33 deletions(-)
diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c
index 1cf18cc8e63f..0e281fca0a38 100644
--- a/drivers/firmware/arm_scmi/driver.c
+++ b/drivers/firmware/arm_scmi/driver.c
@@ -1738,6 +1738,39 @@ static int scmi_common_get_max_msg_size(const struct scmi_protocol_handle *ph)
return info->desc->max_msg_size;
}
+/**
+ * scmi_protocol_msg_check - Check protocol message attributes
+ *
+ * @ph: A reference to the protocol handle.
+ * @message_id: The ID of the message to check.
+ * @attributes: A parameter to optionally return the retrieved message
+ * attributes, in case of Success.
+ *
+ * An helper to check protocol message attributes for a specific protocol
+ * and message pair.
+ *
+ * Return: 0 on SUCCESS
+ */
+static int scmi_protocol_msg_check(const struct scmi_protocol_handle *ph,
+ u32 message_id, u32 *attributes)
+{
+ int ret;
+ struct scmi_xfer *t;
+
+ ret = xfer_get_init(ph, PROTOCOL_MESSAGE_ATTRIBUTES,
+ sizeof(__le32), 0, &t);
+ if (ret)
+ return ret;
+
+ put_unaligned_le32(message_id, t->tx.buf);
+ ret = do_xfer(ph, t);
+ if (!ret && attributes)
+ *attributes = get_unaligned_le32(t->rx.buf);
+ xfer_put(ph, t);
+
+ return ret;
+}
+
/**
* struct scmi_iterator - Iterator descriptor
* @msg: A reference to the message TX buffer; filled by @prepare_message with
@@ -1879,6 +1912,7 @@ scmi_common_fastchannel_init(const struct scmi_protocol_handle *ph,
int ret;
u32 flags;
u64 phys_addr;
+ u32 attributes;
u8 size;
void __iomem *addr;
struct scmi_xfer *t;
@@ -1887,6 +1921,15 @@ scmi_common_fastchannel_init(const struct scmi_protocol_handle *ph,
struct scmi_msg_resp_desc_fc *resp;
const struct scmi_protocol_instance *pi = ph_to_pi(ph);
+ /* Check if the MSG_ID supports fastchannel */
+ ret = scmi_protocol_msg_check(ph, message_id, &attributes);
+ if (ret || !MSG_SUPPORTS_FASTCHANNEL(attributes)) {
+ dev_dbg(ph->dev,
+ "Skip FC init for 0x%02X/%d domain:%d - ret:%d\n",
+ pi->proto->id, message_id, domain, ret);
+ return;
+ }
+
if (!p_addr) {
ret = -EINVAL;
goto err_out;
@@ -2004,39 +2047,6 @@ static void scmi_common_fastchannel_db_ring(struct scmi_fc_db_info *db)
SCMI_PROTO_FC_RING_DB(64);
}
-/**
- * scmi_protocol_msg_check - Check protocol message attributes
- *
- * @ph: A reference to the protocol handle.
- * @message_id: The ID of the message to check.
- * @attributes: A parameter to optionally return the retrieved message
- * attributes, in case of Success.
- *
- * An helper to check protocol message attributes for a specific protocol
- * and message pair.
- *
- * Return: 0 on SUCCESS
- */
-static int scmi_protocol_msg_check(const struct scmi_protocol_handle *ph,
- u32 message_id, u32 *attributes)
-{
- int ret;
- struct scmi_xfer *t;
-
- ret = xfer_get_init(ph, PROTOCOL_MESSAGE_ATTRIBUTES,
- sizeof(__le32), 0, &t);
- if (ret)
- return ret;
-
- put_unaligned_le32(message_id, t->tx.buf);
- ret = do_xfer(ph, t);
- if (!ret && attributes)
- *attributes = get_unaligned_le32(t->rx.buf);
- xfer_put(ph, t);
-
- return ret;
-}
-
static const struct scmi_proto_helpers_ops helpers_ops = {
.extended_name_get = scmi_common_extended_name_get,
.get_max_msg_size = scmi_common_get_max_msg_size,
diff --git a/drivers/firmware/arm_scmi/protocols.h b/drivers/firmware/arm_scmi/protocols.h
index aaee57cdcd55..d62c4469d1fd 100644
--- a/drivers/firmware/arm_scmi/protocols.h
+++ b/drivers/firmware/arm_scmi/protocols.h
@@ -31,6 +31,8 @@
#define SCMI_PROTOCOL_VENDOR_BASE 0x80
+#define MSG_SUPPORTS_FASTCHANNEL(x) ((x) & BIT(0))
+
enum scmi_common_cmd {
PROTOCOL_VERSION = 0x0,
PROTOCOL_ATTRIBUTES = 0x1,
--
2.47.0
From: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
In the latest kernel versions system crashes were noticed occasionally
during suspend/resume. This occurs because the RZ SSI suspend trigger
(called from snd_soc_suspend()) is executed after rz_ssi_pm_ops->suspend()
and it accesses IP registers. After the rz_ssi_pm_ops->suspend() is
executed the IP clocks are disabled and its reset line is asserted.
Since snd_soc_suspend() is invoked through snd_soc_pm_ops->suspend(),
snd_soc_pm_ops is associated with soc_driver (defined in
sound/soc/soc-core.c), and there is no parent-child relationship between
soc_driver and rz_ssi_driver the power management subsystem does not
enforce a specific suspend/resume order between the RZ SSI platform driver
and soc_driver.
To ensure that the suspend/resume function of rz-ssi is executed after
snd_soc_suspend(), use NOIRQ_SYSTEM_SLEEP_PM_OPS().
Fixes: 1fc778f7c833 ("ASoC: renesas: rz-ssi: Add suspend to RAM support")
Cc: stable(a)vger.kernel.org
Signed-off-by: Claudiu Beznea <claudiu.beznea.uj(a)bp.renesas.com>
---
sound/soc/renesas/rz-ssi.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sound/soc/renesas/rz-ssi.c b/sound/soc/renesas/rz-ssi.c
index 3a0af4ca7ab6..0f7458a43901 100644
--- a/sound/soc/renesas/rz-ssi.c
+++ b/sound/soc/renesas/rz-ssi.c
@@ -1244,7 +1244,7 @@ static int rz_ssi_runtime_resume(struct device *dev)
static const struct dev_pm_ops rz_ssi_pm_ops = {
RUNTIME_PM_OPS(rz_ssi_runtime_suspend, rz_ssi_runtime_resume, NULL)
- SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
+ NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
};
static struct platform_driver rz_ssi_driver = {
--
2.43.0
Commit c141ecc3cecd ("of: Warn when of_property_read_bool() is used on
non-boolean properties") added a warning when trying to parse a property
with a value (boolean properties are defined as: absent = false, present
without any value = true). This causes a warning from meson-card-utils.
meson-card-utils needs to know about the existence of the
"audio-routing" and/or "audio-widgets" properties in order to properly
parse them. Switch to of_property_present() in order to silence the
following warning messages during boot:
OF: /sound: Read of boolean property 'audio-routing' with a value.
OF: /sound: Read of boolean property 'audio-widgets' with a value.
Fixes: 7864a79f37b5 ("ASoC: meson: add axg sound card support")
Tested-by: Christian Hewitt <christianshewitt(a)gmail.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Martin Blumenstingl <martin.blumenstingl(a)googlemail.com>
---
sound/soc/meson/meson-card-utils.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sound/soc/meson/meson-card-utils.c b/sound/soc/meson/meson-card-utils.c
index cfc7f6e41ab5..68531183fb60 100644
--- a/sound/soc/meson/meson-card-utils.c
+++ b/sound/soc/meson/meson-card-utils.c
@@ -231,7 +231,7 @@ static int meson_card_parse_of_optional(struct snd_soc_card *card,
const char *p))
{
/* If property is not provided, don't fail ... */
- if (!of_property_read_bool(card->dev->of_node, propname))
+ if (!of_property_present(card->dev->of_node, propname))
return 0;
/* ... but do fail if it is provided and the parsing fails */
--
2.49.0
Typically HDMI to MIPI CSI-2 bridges have a pin to signal image data is
being received. On the host side this is wired to a GPIO for polling or
interrupts. This includes the Lontium HDMI to MIPI CSI-2 bridges
lt6911uxe and lt6911uxc.
The GPIO "hpd" is used already by other HDMI to CSI-2 bridges, use it
here as well.
Signed-off-by: Dongcheng Yan <dongcheng.yan(a)intel.com>
---
drivers/platform/x86/intel/int3472/common.h | 1 +
drivers/platform/x86/intel/int3472/discrete.c | 6 ++++++
2 files changed, 7 insertions(+)
diff --git a/drivers/platform/x86/intel/int3472/common.h b/drivers/platform/x86/intel/int3472/common.h
index 145dec66df64..db4cd3720e24 100644
--- a/drivers/platform/x86/intel/int3472/common.h
+++ b/drivers/platform/x86/intel/int3472/common.h
@@ -22,6 +22,7 @@
#define INT3472_GPIO_TYPE_POWER_ENABLE 0x0b
#define INT3472_GPIO_TYPE_CLK_ENABLE 0x0c
#define INT3472_GPIO_TYPE_PRIVACY_LED 0x0d
+#define INT3472_GPIO_TYPE_HOTPLUG_DETECT 0x13
#define INT3472_PDEV_MAX_NAME_LEN 23
#define INT3472_MAX_SENSOR_GPIOS 3
diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c
index 30ff8f3ea1f5..26215d1b63a2 100644
--- a/drivers/platform/x86/intel/int3472/discrete.c
+++ b/drivers/platform/x86/intel/int3472/discrete.c
@@ -186,6 +186,10 @@ static void int3472_get_con_id_and_polarity(struct acpi_device *adev, u8 *type,
*con_id = "privacy-led";
*gpio_flags = GPIO_ACTIVE_HIGH;
break;
+ case INT3472_GPIO_TYPE_HOTPLUG_DETECT:
+ *con_id = "hpd";
+ *gpio_flags = GPIO_ACTIVE_HIGH;
+ break;
case INT3472_GPIO_TYPE_POWER_ENABLE:
*con_id = "power-enable";
*gpio_flags = GPIO_ACTIVE_HIGH;
@@ -212,6 +216,7 @@ static void int3472_get_con_id_and_polarity(struct acpi_device *adev, u8 *type,
* 0x0b Power enable
* 0x0c Clock enable
* 0x0d Privacy LED
+ * 0x13 Hotplug detect
*
* There are some known platform specific quirks where that does not quite
* hold up; for example where a pin with type 0x01 (Power down) is mapped to
@@ -281,6 +286,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares,
switch (type) {
case INT3472_GPIO_TYPE_RESET:
case INT3472_GPIO_TYPE_POWERDOWN:
+ case INT3472_GPIO_TYPE_HOTPLUG_DETECT:
ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, con_id, gpio_flags);
if (ret)
err_msg = "Failed to map GPIO pin to sensor\n";
base-commit: 01c6df60d5d4ae00cd5c1648818744838bba7763
--
2.34.1
On Tue, Apr 22, 2025 at 07:33:03PM +0530, Hardik Gohil wrote:
> >
> > > I'm sorry, I have no idea what to do here :(
> > >
> please add all the patches 1/3,2/3 and 3/3 to v5.4.y.
Please do not post in html format :(
Anyway, I do not see patches 2/3 or 3/3 at all.
Please resend them all as a full patch series, don't just give links.
thanks,
greg k-h
When v4l2_subdev_init_finalize() fails no changes have been made to
the runtime-pm device state yet, so the probe_error_media_entity_cleanup
rollback path should not touch the runtime-pm device state.
Instead this should be done from the probe_error_v4l2_subdev_cleanup
rollback path. Note the pm_runtime_xxx() calls are put above
the v4l2_subdev_cleanup() call to have the reverse call order of probe().
Fixes: 289c25923ecd ("media: ov2740: Use sub-device active state")
Cc: stable(a)vger.kernel.org
Reviewed-by: Bingbu Cao <bingbu.cao(a)intel.com>
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
Changes in v2:
- Add Fixes: tag
---
drivers/media/i2c/ov2740.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/media/i2c/ov2740.c b/drivers/media/i2c/ov2740.c
index 80d151e8ae29..6cf461e3373c 100644
--- a/drivers/media/i2c/ov2740.c
+++ b/drivers/media/i2c/ov2740.c
@@ -1456,12 +1456,12 @@ static int ov2740_probe(struct i2c_client *client)
return 0;
probe_error_v4l2_subdev_cleanup:
+ pm_runtime_disable(&client->dev);
+ pm_runtime_set_suspended(&client->dev);
v4l2_subdev_cleanup(&ov2740->sd);
probe_error_media_entity_cleanup:
media_entity_cleanup(&ov2740->sd.entity);
- pm_runtime_disable(&client->dev);
- pm_runtime_set_suspended(&client->dev);
probe_error_v4l2_ctrl_handler_free:
v4l2_ctrl_handler_free(ov2740->sd.ctrl_handler);
--
2.49.0
According to the AD9832 datasheet (Table 10, D12 description), setting
the RESET bit forces the phase accumulator to zero, which corresponds to
a full-scale DC output, rather than disabling the output signal.
The correct way to disable the output and enter a low-power state is to
set the AD9832_SLEEP bit (Table 10, D13 description), which powers down
the internal DAC current sources and disables internal clocks.
Fixes: ea707584bac1 ("Staging: IIO: DDS: AD9832 / AD9835 driver")
Cc: stable(a)vger.kernel.org
Signed-off-by: Gabriel Shahrouzi <gshahrouzi(a)gmail.com>
---
v3 -> v4:
- Rebase changes ontop of most recent changes.
v2 -> v3:
v1 -> v2:
---
drivers/staging/iio/frequency/ad9832.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/iio/frequency/ad9832.c b/drivers/staging/iio/frequency/ad9832.c
index 49388da5a684a..2e555084ff98a 100644
--- a/drivers/staging/iio/frequency/ad9832.c
+++ b/drivers/staging/iio/frequency/ad9832.c
@@ -236,7 +236,7 @@ static ssize_t ad9832_write(struct device *dev, struct device_attribute *attr,
if (val)
st->ctrl_src &= ~(AD9832_RESET | AD9832_SLEEP | AD9832_CLR);
else
- st->ctrl_src |= FIELD_PREP(AD9832_RESET, 1);
+ st->ctrl_src |= FIELD_PREP(AD9832_SLEEP, 1);
st->data = cpu_to_be16(FIELD_PREP(AD9832_CMD_MSK, AD9832_CMD_SLEEPRESCLR) |
st->ctrl_src);
--
2.43.0
On 4/24/25 5:55 PM, Jack Vogel wrote:
>
>
>> On Apr 24, 2025, at 16:15, Dave Jiang <dave.jiang(a)intel.com> wrote:
>>
>>
>>
>> On 4/24/25 3:59 PM, Jack Vogel wrote:
>>>
>>>
>>>> On Apr 24, 2025, at 15:40, Dave Jiang <dave.jiang(a)intel.com> wrote:
>>>>
>>>>
>>>>
>>>> On 4/24/25 3:34 PM, Jack Vogel wrote:
>>>>> I am having test issues with this patch, test system is running OL9, basically RHEL 9.5, the kernel boots ok, and the dmesg is clean… but the tests in accel-config dont pass. Specifically the crypto tests, this is due to vfio_pci_core not loading. Right now I’m not sure if any of this is my mistake, but at least it’s something I need to keep looking at.
>>>>>
>>>>> Also, since I saw that issue on the latest I did a backport to our UEK8 kernel which is 6.12.23, and on that kernel it still exhibited these messages on boot:
>>>>>
>>>>> *idxd*0000:6a:01.0: enabling device (0144 -> 0146)
>>>>>
>>>>> [ 21.112733] *idxd*0000:6a:01.0: failed to attach device pasid 1, domain type 4
>>>>>
>>>>> [ 21.120770] *idxd*0000:6a:01.0: No in-kernel DMA with PASID. -95
>>>>>
>>>>>
>>>>> Again, maybe an issue in my backporting… however I’d like to be sure.
>>>>
>>>> Can you verify against latest upstream kernel plus the patch and see if you still see the error?
>>>>
>>>> DJ
>>>
>>> Yes, the kernel was build from the tip this morning. Like I said, it got no messages booting up, all looked fine. But when running the actual test suite in the accel-config tarball specifically the iaa crypt tests, they failed and the dmesg was from vfio_pci_core failed to load with an unknown symbol.
>>
>> I'm not sure what the test consists of (haven't worked on this device for almost 2 years). But usually the device is either bound to the idxd driver or the vfio_pci driver. Not both. And if the idxd driver didn't emit any errors while loading, then the test failure may be something else...
>>
>> Another way to verify is to set CONFIG_IOMMU_DEFAULT_DMA_LAZY vs PASSTHROUGH. If the tests still fail then it's something else.
>>
>> DJ
>
> There isn’t a lot of ways to test this driver, yes DPDK will use it, but apart from that? So, the tests that are part of your (Intel) accel-config package are the only convenient way that I’ve found to do so. It is also convenient, there is a “make check” target in the top Makefile that will invoke both set of DMA tests, and some crypto (IAA) tests. I have been planning to give this to our QA group as a verification suite. Do you have an alternative to this?
This should be the right test package. Let me talk to our QA people and see if there are any issues. We can resolve this off list. If there's any issues that end up pointing to the original bug, we can raise that then.
DJ
>
> Jack
>
>>
>>>
>>> This sounds like the module was wrong, but i would think it was installed with the v6.15 kernel…..
>>>
>>> Jack
>>>
>>>>
>>>>>
>>>>> Cheers,
>>>>>
>>>>> Jack
>>>>>
>>>>>
>>>>>> On Apr 23, 2025, at 20:41, Lu Baolu <baolu.lu(a)linux.intel.com> wrote:
>>>>>>
>>>>>> The idxd driver attaches the default domain to a PASID of the device to
>>>>>> perform kernel DMA using that PASID. The domain is attached to the
>>>>>> device's PASID through iommu_attach_device_pasid(), which checks if the
>>>>>> domain->owner matches the iommu_ops retrieved from the device. If they
>>>>>> do not match, it returns a failure.
>>>>>>
>>>>>> if (ops != domain->owner || pasid == IOMMU_NO_PASID)
>>>>>> return -EINVAL;
>>>>>>
>>>>>> The static identity domain implemented by the intel iommu driver doesn't
>>>>>> specify the domain owner. Therefore, kernel DMA with PASID doesn't work
>>>>>> for the idxd driver if the device translation mode is set to passthrough.
>>>>>>
>>>>>> Generally the owner field of static domains are not set because they are
>>>>>> already part of iommu ops. Add a helper domain_iommu_ops_compatible()
>>>>>> that checks if a domain is compatible with the device's iommu ops. This
>>>>>> helper explicitly allows the static blocked and identity domains associated
>>>>>> with the device's iommu_ops to be considered compatible.
>>>>>>
>>>>>> Fixes: 2031c469f816 ("iommu/vt-d: Add support for static identity domain")
>>>>>> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220031
>>>>>> Cc: stable(a)vger.kernel.org
>>>>>> Suggested-by: Jason Gunthorpe <jgg(a)nvidia.com>
>>>>>> Link: https://lore.kernel.org/linux-iommu/20250422191554.GC1213339@ziepe.ca/
>>>>>> Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
>>>>>> Reviewed-by: Dave Jiang <dave.jiang(a)intel.com>
>>>>>> Reviewed-by: Robin Murphy <robin.murphy(a)arm.com>
>>>>>> ---
>>>>>> drivers/iommu/iommu.c | 21 ++++++++++++++++++---
>>>>>> 1 file changed, 18 insertions(+), 3 deletions(-)
>>>>>>
>>>>>> Change log:
>>>>>> v3:
>>>>>> - Convert all places checking domain->owner to the new helper.
>>>>>> v2: https://lore.kernel.org/linux-iommu/20250423021839.2189204-1-baolu.lu@linux…
>>>>>> - Make the solution generic for all static domains as suggested by
>>>>>> Jason.
>>>>>> v1: https://lore.kernel.org/linux-iommu/20250422075422.2084548-1-baolu.lu@linux…
>>>>>>
>>>>>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>>>>>> index 4f91a740c15f..b26fc3ed9f01 100644
>>>>>> --- a/drivers/iommu/iommu.c
>>>>>> +++ b/drivers/iommu/iommu.c
>>>>>> @@ -2204,6 +2204,19 @@ static void *iommu_make_pasid_array_entry(struct iommu_domain *domain,
>>>>>> return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN);
>>>>>> }
>>>>>>
>>>>>> +static bool domain_iommu_ops_compatible(const struct iommu_ops *ops,
>>>>>> +struct iommu_domain *domain)
>>>>>> +{
>>>>>> +if (domain->owner == ops)
>>>>>> +return true;
>>>>>> +
>>>>>> +/* For static domains, owner isn't set. */
>>>>>> +if (domain == ops->blocked_domain || domain == ops->identity_domain)
>>>>>> +return true;
>>>>>> +
>>>>>> +return false;
>>>>>> +}
>>>>>> +
>>>>>> static int __iommu_attach_group(struct iommu_domain *domain,
>>>>>> struct iommu_group *group)
>>>>>> {
>>>>>> @@ -2214,7 +2227,8 @@ static int __iommu_attach_group(struct iommu_domain *domain,
>>>>>> return -EBUSY;
>>>>>>
>>>>>> dev = iommu_group_first_dev(group);
>>>>>> -if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner)
>>>>>> +if (!dev_has_iommu(dev) ||
>>>>>> + !domain_iommu_ops_compatible(dev_iommu_ops(dev), domain))
>>>>>> return -EINVAL;
>>>>>>
>>>>>> return __iommu_group_set_domain(group, domain);
>>>>>> @@ -3435,7 +3449,8 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
>>>>>> !ops->blocked_domain->ops->set_dev_pasid)
>>>>>> return -EOPNOTSUPP;
>>>>>>
>>>>>> -if (ops != domain->owner || pasid == IOMMU_NO_PASID)
>>>>>> +if (!domain_iommu_ops_compatible(ops, domain) ||
>>>>>> + pasid == IOMMU_NO_PASID)
>>>>>> return -EINVAL;
>>>>>>
>>>>>> mutex_lock(&group->mutex);
>>>>>> @@ -3511,7 +3526,7 @@ int iommu_replace_device_pasid(struct iommu_domain *domain,
>>>>>> if (!domain->ops->set_dev_pasid)
>>>>>> return -EOPNOTSUPP;
>>>>>>
>>>>>> -if (dev_iommu_ops(dev) != domain->owner ||
>>>>>> +if (!domain_iommu_ops_compatible(dev_iommu_ops(dev), domain) ||
>>>>>> pasid == IOMMU_NO_PASID || !handle)
>>>>>> return -EINVAL;
>>>>>>
>>>>>> --
>>>>>> 2.43.0
>
The patch titled
Subject: mm/userfaultfd: fix uninitialized output field for -EAGAIN race
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-userfaultfd-fix-uninitialized-output-field-for-eagain-race.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Peter Xu <peterx(a)redhat.com>
Subject: mm/userfaultfd: fix uninitialized output field for -EAGAIN race
Date: Thu, 24 Apr 2025 17:57:28 -0400
While discussing some userfaultfd relevant issues recently, Andrea noticed
a potential ABI breakage with -EAGAIN on almost all userfaultfd ioctl()s.
Quote from Andrea, explaining how -EAGAIN was processed, and how this
should fix it (taking example of UFFDIO_COPY ioctl):
The "mmap_changing" and "stale pmd" conditions are already reported as
-EAGAIN written in the copy field, this does not change it. This change
removes the subnormal case that left copy.copy uninitialized and required
apps to explicitly set the copy field to get deterministic
behavior (which is a requirement contrary to the documentation in both
the manpage and source code). In turn there's no alteration to backwards
compatibility as result of this change because userland will find the
copy field consistently set to -EAGAIN, and not anymore sometime -EAGAIN
and sometime uninitialized.
Even then the change only can make a difference to non cooperative users
of userfaultfd, so when UFFD_FEATURE_EVENT_* is enabled, which is not
true for the vast majority of apps using userfaultfd or this unintended
uninitialized field may have been noticed sooner.
Meanwhile, since this bug existed for years, it also almost affects all
ioctl()s that was introduced later. Besides UFFDIO_ZEROPAGE, these also
get affected in the same way:
- UFFDIO_CONTINUE
- UFFDIO_POISON
- UFFDIO_MOVE
This patch should have fixed all of them.
Link: https://lkml.kernel.org/r/20250424215729.194656-2-peterx@redhat.com
Fixes: df2cc96e7701 ("userfaultfd: prevent non-cooperative events vs mcopy_atomic races")
Fixes: f619147104c8 ("userfaultfd: add UFFDIO_CONTINUE ioctl")
Fixes: fc71884a5f59 ("mm: userfaultfd: add new UFFDIO_POISON ioctl")
Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Reported-by: Andrea Arcangeli <aarcange(a)redhat.com>
Suggested-by: Andrea Arcangeli <aarcange(a)redhat.com>
Cc: Mike Rapoport <rppt(a)kernel.org>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: Suren Baghdasaryan <surenb(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/userfaultfd.c | 28 ++++++++++++++++++++++------
1 file changed, 22 insertions(+), 6 deletions(-)
--- a/fs/userfaultfd.c~mm-userfaultfd-fix-uninitialized-output-field-for-eagain-race
+++ a/fs/userfaultfd.c
@@ -1585,8 +1585,11 @@ static int userfaultfd_copy(struct userf
user_uffdio_copy = (struct uffdio_copy __user *) arg;
ret = -EAGAIN;
- if (atomic_read(&ctx->mmap_changing))
+ if (unlikely(atomic_read(&ctx->mmap_changing))) {
+ if (unlikely(put_user(ret, &user_uffdio_copy->copy)))
+ return -EFAULT;
goto out;
+ }
ret = -EFAULT;
if (copy_from_user(&uffdio_copy, user_uffdio_copy,
@@ -1641,8 +1644,11 @@ static int userfaultfd_zeropage(struct u
user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg;
ret = -EAGAIN;
- if (atomic_read(&ctx->mmap_changing))
+ if (unlikely(atomic_read(&ctx->mmap_changing))) {
+ if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage)))
+ return -EFAULT;
goto out;
+ }
ret = -EFAULT;
if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage,
@@ -1744,8 +1750,11 @@ static int userfaultfd_continue(struct u
user_uffdio_continue = (struct uffdio_continue __user *)arg;
ret = -EAGAIN;
- if (atomic_read(&ctx->mmap_changing))
+ if (unlikely(atomic_read(&ctx->mmap_changing))) {
+ if (unlikely(put_user(ret, &user_uffdio_continue->mapped)))
+ return -EFAULT;
goto out;
+ }
ret = -EFAULT;
if (copy_from_user(&uffdio_continue, user_uffdio_continue,
@@ -1801,8 +1810,11 @@ static inline int userfaultfd_poison(str
user_uffdio_poison = (struct uffdio_poison __user *)arg;
ret = -EAGAIN;
- if (atomic_read(&ctx->mmap_changing))
+ if (unlikely(atomic_read(&ctx->mmap_changing))) {
+ if (unlikely(put_user(ret, &user_uffdio_poison->updated)))
+ return -EFAULT;
goto out;
+ }
ret = -EFAULT;
if (copy_from_user(&uffdio_poison, user_uffdio_poison,
@@ -1870,8 +1882,12 @@ static int userfaultfd_move(struct userf
user_uffdio_move = (struct uffdio_move __user *) arg;
- if (atomic_read(&ctx->mmap_changing))
- return -EAGAIN;
+ ret = -EAGAIN;
+ if (unlikely(atomic_read(&ctx->mmap_changing))) {
+ if (unlikely(put_user(ret, &user_uffdio_move->move)))
+ return -EFAULT;
+ goto out;
+ }
if (copy_from_user(&uffdio_move, user_uffdio_move,
/* don't copy "move" last field */
_
Patches currently in -mm which might be from peterx(a)redhat.com are
mm-userfaultfd-fix-uninitialized-output-field-for-eagain-race.patch
mm-selftests-add-a-test-to-verify-mmap_changing-race-with-eagain.patch
Ricardo reported a KASAN discovered use after free in v6.6-stable.
The syzbot starts a BPF program via xdp_test_run_batch() which assigns
ri->tgt_value via dev_hash_map_redirect() and the return code isn't
XDP_REDIRECT it looks like nonsense. So the output in
bpf_warn_invalid_xdp_action() appears once.
Then the TUN driver runs another BPF program (on the same CPU) which
returns XDP_REDIRECT without setting ri->tgt_value first. It invokes
bpf_trace_printk() to print four characters and obtain the required
return value. This is enough to get xdp_do_redirect() invoked which
then accesses the pointer in tgt_value which might have been already
deallocated.
This problem does not affect upstream because since commit
401cb7dae8130 ("net: Reference bpf_redirect_info via task_struct on PREEMPT_RT.")
the per-CPU variable is referenced via task's task_struct and exists on
the stack during NAPI callback. Therefore it is cleared once before the
first invocation and remains valid within the RCU section of the NAPI
callback.
Instead of performing the huge backport of the commit (plus its fix ups)
here is an alternative version which only resets the variable in
question prior invoking the BPF program.
Acked-by: Toke Høiland-Jørgensen <toke(a)kernel.org>
Reported-by: Ricardo Cañuelo Navarro <rcn(a)igalia.com>
Closes: https://lore.kernel.org/all/20250226-20250204-kasan-slab-use-after-free-rea…
Fixes: 97f91a7cf04ff ("bpf: add bpf_redirect_map helper routine")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
---
I discussed this with Toke, thread starts at
https://lore.kernel.org/all/20250313183911.SPAmGLyw@linutronix.de/
The commit, which this by accident, is part of v6.11-rc1.
I added the commit introducing map redirects as the origin of the
problem which is v4.14-rc1. The code is a bit different there but it
seems to work similar.
Affected kernels would be from v4.14 to v6.10.
include/net/xdp.h | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/include/net/xdp.h b/include/net/xdp.h
index de08c8e0d1348..b39ac83618a55 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -486,7 +486,14 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
* under local_bh_disable(), which provides the needed RCU protection
* for accessing map entries.
*/
- u32 act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ u32 act;
+
+ if (ri->map_id || ri->map_type) {
+ ri->map_id = 0;
+ ri->map_type = BPF_MAP_TYPE_UNSPEC;
+ }
+ act = __bpf_prog_run(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) {
if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev))
--
2.49.0
On 4/24/25 3:59 PM, Jack Vogel wrote:
>
>
>> On Apr 24, 2025, at 15:40, Dave Jiang <dave.jiang(a)intel.com> wrote:
>>
>>
>>
>> On 4/24/25 3:34 PM, Jack Vogel wrote:
>>> I am having test issues with this patch, test system is running OL9, basically RHEL 9.5, the kernel boots ok, and the dmesg is clean… but the tests in accel-config dont pass. Specifically the crypto tests, this is due to vfio_pci_core not loading. Right now I’m not sure if any of this is my mistake, but at least it’s something I need to keep looking at.
>>>
>>> Also, since I saw that issue on the latest I did a backport to our UEK8 kernel which is 6.12.23, and on that kernel it still exhibited these messages on boot:
>>>
>>> *idxd*0000:6a:01.0: enabling device (0144 -> 0146)
>>>
>>> [ 21.112733] *idxd*0000:6a:01.0: failed to attach device pasid 1, domain type 4
>>>
>>> [ 21.120770] *idxd*0000:6a:01.0: No in-kernel DMA with PASID. -95
>>>
>>>
>>> Again, maybe an issue in my backporting… however I’d like to be sure.
>>
>> Can you verify against latest upstream kernel plus the patch and see if you still see the error?
>>
>> DJ
>
> Yes, the kernel was build from the tip this morning. Like I said, it got no messages booting up, all looked fine. But when running the actual test suite in the accel-config tarball specifically the iaa crypt tests, they failed and the dmesg was from vfio_pci_core failed to load with an unknown symbol.
I'm not sure what the test consists of (haven't worked on this device for almost 2 years). But usually the device is either bound to the idxd driver or the vfio_pci driver. Not both. And if the idxd driver didn't emit any errors while loading, then the test failure may be something else...
Another way to verify is to set CONFIG_IOMMU_DEFAULT_DMA_LAZY vs PASSTHROUGH. If the tests still fail then it's something else.
DJ
>
> This sounds like the module was wrong, but i would think it was installed with the v6.15 kernel…..
>
> Jack
>
>>
>>>
>>> Cheers,
>>>
>>> Jack
>>>
>>>
>>>> On Apr 23, 2025, at 20:41, Lu Baolu <baolu.lu(a)linux.intel.com> wrote:
>>>>
>>>> The idxd driver attaches the default domain to a PASID of the device to
>>>> perform kernel DMA using that PASID. The domain is attached to the
>>>> device's PASID through iommu_attach_device_pasid(), which checks if the
>>>> domain->owner matches the iommu_ops retrieved from the device. If they
>>>> do not match, it returns a failure.
>>>>
>>>> if (ops != domain->owner || pasid == IOMMU_NO_PASID)
>>>> return -EINVAL;
>>>>
>>>> The static identity domain implemented by the intel iommu driver doesn't
>>>> specify the domain owner. Therefore, kernel DMA with PASID doesn't work
>>>> for the idxd driver if the device translation mode is set to passthrough.
>>>>
>>>> Generally the owner field of static domains are not set because they are
>>>> already part of iommu ops. Add a helper domain_iommu_ops_compatible()
>>>> that checks if a domain is compatible with the device's iommu ops. This
>>>> helper explicitly allows the static blocked and identity domains associated
>>>> with the device's iommu_ops to be considered compatible.
>>>>
>>>> Fixes: 2031c469f816 ("iommu/vt-d: Add support for static identity domain")
>>>> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220031
>>>> Cc: stable(a)vger.kernel.org
>>>> Suggested-by: Jason Gunthorpe <jgg(a)nvidia.com>
>>>> Link: https://lore.kernel.org/linux-iommu/20250422191554.GC1213339@ziepe.ca/
>>>> Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
>>>> Reviewed-by: Dave Jiang <dave.jiang(a)intel.com>
>>>> Reviewed-by: Robin Murphy <robin.murphy(a)arm.com>
>>>> ---
>>>> drivers/iommu/iommu.c | 21 ++++++++++++++++++---
>>>> 1 file changed, 18 insertions(+), 3 deletions(-)
>>>>
>>>> Change log:
>>>> v3:
>>>> - Convert all places checking domain->owner to the new helper.
>>>> v2: https://lore.kernel.org/linux-iommu/20250423021839.2189204-1-baolu.lu@linux…
>>>> - Make the solution generic for all static domains as suggested by
>>>> Jason.
>>>> v1: https://lore.kernel.org/linux-iommu/20250422075422.2084548-1-baolu.lu@linux…
>>>>
>>>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>>>> index 4f91a740c15f..b26fc3ed9f01 100644
>>>> --- a/drivers/iommu/iommu.c
>>>> +++ b/drivers/iommu/iommu.c
>>>> @@ -2204,6 +2204,19 @@ static void *iommu_make_pasid_array_entry(struct iommu_domain *domain,
>>>> return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN);
>>>> }
>>>>
>>>> +static bool domain_iommu_ops_compatible(const struct iommu_ops *ops,
>>>> +struct iommu_domain *domain)
>>>> +{
>>>> +if (domain->owner == ops)
>>>> +return true;
>>>> +
>>>> +/* For static domains, owner isn't set. */
>>>> +if (domain == ops->blocked_domain || domain == ops->identity_domain)
>>>> +return true;
>>>> +
>>>> +return false;
>>>> +}
>>>> +
>>>> static int __iommu_attach_group(struct iommu_domain *domain,
>>>> struct iommu_group *group)
>>>> {
>>>> @@ -2214,7 +2227,8 @@ static int __iommu_attach_group(struct iommu_domain *domain,
>>>> return -EBUSY;
>>>>
>>>> dev = iommu_group_first_dev(group);
>>>> -if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner)
>>>> +if (!dev_has_iommu(dev) ||
>>>> + !domain_iommu_ops_compatible(dev_iommu_ops(dev), domain))
>>>> return -EINVAL;
>>>>
>>>> return __iommu_group_set_domain(group, domain);
>>>> @@ -3435,7 +3449,8 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
>>>> !ops->blocked_domain->ops->set_dev_pasid)
>>>> return -EOPNOTSUPP;
>>>>
>>>> -if (ops != domain->owner || pasid == IOMMU_NO_PASID)
>>>> +if (!domain_iommu_ops_compatible(ops, domain) ||
>>>> + pasid == IOMMU_NO_PASID)
>>>> return -EINVAL;
>>>>
>>>> mutex_lock(&group->mutex);
>>>> @@ -3511,7 +3526,7 @@ int iommu_replace_device_pasid(struct iommu_domain *domain,
>>>> if (!domain->ops->set_dev_pasid)
>>>> return -EOPNOTSUPP;
>>>>
>>>> -if (dev_iommu_ops(dev) != domain->owner ||
>>>> +if (!domain_iommu_ops_compatible(dev_iommu_ops(dev), domain) ||
>>>> pasid == IOMMU_NO_PASID || !handle)
>>>> return -EINVAL;
>>>>
>>>> --
>>>> 2.43.0
>
On 4/24/25 3:34 PM, Jack Vogel wrote:
> I am having test issues with this patch, test system is running OL9, basically RHEL 9.5, the kernel boots ok, and the dmesg is clean… but the tests in accel-config dont pass. Specifically the crypto tests, this is due to vfio_pci_core not loading. Right now I’m not sure if any of this is my mistake, but at least it’s something I need to keep looking at.
>
> Also, since I saw that issue on the latest I did a backport to our UEK8 kernel which is 6.12.23, and on that kernel it still exhibited these messages on boot:
>
> *idxd*0000:6a:01.0: enabling device (0144 -> 0146)
>
> [ 21.112733] *idxd*0000:6a:01.0: failed to attach device pasid 1, domain type 4
>
> [ 21.120770] *idxd*0000:6a:01.0: No in-kernel DMA with PASID. -95
>
>
> Again, maybe an issue in my backporting… however I’d like to be sure.
Can you verify against latest upstream kernel plus the patch and see if you still see the error?
DJ
>
> Cheers,
>
> Jack
>
>
>> On Apr 23, 2025, at 20:41, Lu Baolu <baolu.lu(a)linux.intel.com> wrote:
>>
>> The idxd driver attaches the default domain to a PASID of the device to
>> perform kernel DMA using that PASID. The domain is attached to the
>> device's PASID through iommu_attach_device_pasid(), which checks if the
>> domain->owner matches the iommu_ops retrieved from the device. If they
>> do not match, it returns a failure.
>>
>> if (ops != domain->owner || pasid == IOMMU_NO_PASID)
>> return -EINVAL;
>>
>> The static identity domain implemented by the intel iommu driver doesn't
>> specify the domain owner. Therefore, kernel DMA with PASID doesn't work
>> for the idxd driver if the device translation mode is set to passthrough.
>>
>> Generally the owner field of static domains are not set because they are
>> already part of iommu ops. Add a helper domain_iommu_ops_compatible()
>> that checks if a domain is compatible with the device's iommu ops. This
>> helper explicitly allows the static blocked and identity domains associated
>> with the device's iommu_ops to be considered compatible.
>>
>> Fixes: 2031c469f816 ("iommu/vt-d: Add support for static identity domain")
>> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220031
>> Cc: stable(a)vger.kernel.org
>> Suggested-by: Jason Gunthorpe <jgg(a)nvidia.com>
>> Link: https://lore.kernel.org/linux-iommu/20250422191554.GC1213339@ziepe.ca/
>> Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
>> Reviewed-by: Dave Jiang <dave.jiang(a)intel.com>
>> Reviewed-by: Robin Murphy <robin.murphy(a)arm.com>
>> ---
>> drivers/iommu/iommu.c | 21 ++++++++++++++++++---
>> 1 file changed, 18 insertions(+), 3 deletions(-)
>>
>> Change log:
>> v3:
>> - Convert all places checking domain->owner to the new helper.
>> v2: https://lore.kernel.org/linux-iommu/20250423021839.2189204-1-baolu.lu@linux…
>> - Make the solution generic for all static domains as suggested by
>> Jason.
>> v1: https://lore.kernel.org/linux-iommu/20250422075422.2084548-1-baolu.lu@linux…
>>
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index 4f91a740c15f..b26fc3ed9f01 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -2204,6 +2204,19 @@ static void *iommu_make_pasid_array_entry(struct iommu_domain *domain,
>> return xa_tag_pointer(domain, IOMMU_PASID_ARRAY_DOMAIN);
>> }
>>
>> +static bool domain_iommu_ops_compatible(const struct iommu_ops *ops,
>> +struct iommu_domain *domain)
>> +{
>> +if (domain->owner == ops)
>> +return true;
>> +
>> +/* For static domains, owner isn't set. */
>> +if (domain == ops->blocked_domain || domain == ops->identity_domain)
>> +return true;
>> +
>> +return false;
>> +}
>> +
>> static int __iommu_attach_group(struct iommu_domain *domain,
>> struct iommu_group *group)
>> {
>> @@ -2214,7 +2227,8 @@ static int __iommu_attach_group(struct iommu_domain *domain,
>> return -EBUSY;
>>
>> dev = iommu_group_first_dev(group);
>> -if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner)
>> +if (!dev_has_iommu(dev) ||
>> + !domain_iommu_ops_compatible(dev_iommu_ops(dev), domain))
>> return -EINVAL;
>>
>> return __iommu_group_set_domain(group, domain);
>> @@ -3435,7 +3449,8 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
>> !ops->blocked_domain->ops->set_dev_pasid)
>> return -EOPNOTSUPP;
>>
>> -if (ops != domain->owner || pasid == IOMMU_NO_PASID)
>> +if (!domain_iommu_ops_compatible(ops, domain) ||
>> + pasid == IOMMU_NO_PASID)
>> return -EINVAL;
>>
>> mutex_lock(&group->mutex);
>> @@ -3511,7 +3526,7 @@ int iommu_replace_device_pasid(struct iommu_domain *domain,
>> if (!domain->ops->set_dev_pasid)
>> return -EOPNOTSUPP;
>>
>> -if (dev_iommu_ops(dev) != domain->owner ||
>> +if (!domain_iommu_ops_compatible(dev_iommu_ops(dev), domain) ||
>> pasid == IOMMU_NO_PASID || !handle)
>> return -EINVAL;
>>
>> --
>> 2.43.0
>>
>
The quilt patch titled
Subject: smaps: fix crash in smaps_hugetlb_range for non-present hugetlb entries
has been removed from the -mm tree. Its filename was
smaps-fix-crash-in-smaps_hugetlb_range-for-non-present-hugetlb-entries.patch
This patch was dropped because an alternative patch was or shall be merged
------------------------------------------------------
From: Ming Wang <wangming01(a)loongson.cn>
Subject: smaps: fix crash in smaps_hugetlb_range for non-present hugetlb entries
Date: Wed, 23 Apr 2025 09:03:59 +0800
When reading /proc/pid/smaps for a process that has mapped a hugetlbfs
file with MAP_PRIVATE, the kernel might crash inside
pfn_swap_entry_to_page. This occurs on LoongArch under specific
conditions.
The root cause involves several steps:
1. When the hugetlbfs file is mapped (MAP_PRIVATE), the initial PMD
(or relevant level) entry is often populated by the kernel during
mmap() with a non-present entry pointing to the architecture's
invalid_pte_table On the affected LoongArch system, this address was
observed to be 0x90000000031e4000.
2. The smaps walker (walk_hugetlb_range -> smaps_hugetlb_range) reads
this entry.
3. The generic is_swap_pte() macro checks `!pte_present() &&
!pte_none()`. The entry (invalid_pte_table address) is not present.
Crucially, the generic pte_none() check (`!(pte_val(pte) &
~_PAGE_GLOBAL)`) returns false because the invalid_pte_table address is
non-zero. Therefore, is_swap_pte() incorrectly returns true.
4. The code enters the `else if (is_swap_pte(...))` block.
5. Inside this block, it checks `is_pfn_swap_entry()`. Due to a bit
pattern coincidence in the invalid_pte_table address on LoongArch, the
embedded generic `is_migration_entry()` check happens to return true
(misinterpreting parts of the address as a migration type).
6. This leads to a call to pfn_swap_entry_to_page() with the bogus
swap entry derived from the invalid table address.
7. pfn_swap_entry_to_page() extracts a meaningless PFN, finds an
unrelated struct page, checks its lock status (unlocked), and hits the
`BUG_ON(is_migration_entry(entry) && !PageLocked(p))` assertion.
The original code's intent in the `else if` block seems aimed at handling
potential migration entries, as indicated by the inner
`is_pfn_swap_entry()` check. The issue arises because the outer
`is_swap_pte()` check incorrectly includes the invalid table pointer case
on LoongArch.
This patch fixes the issue by changing the condition in
smaps_hugetlb_range() from the broad `is_swap_pte()` to the specific
`is_hugetlb_entry_migration()`.
The `is_hugetlb_entry_migration()` helper function correctly handles this
by first checking `huge_pte_none()`. Architectures like LoongArch can
provide an override for `huge_pte_none()` that specifically recognizes the
`invalid_pte_table` address as a "none" state for HugeTLB entries. This
ensures `is_hugetlb_entry_migration()` returns false for the invalid
entry, preventing the code from entering the faulty block.
This change makes the code reflect the likely original intent (handling
migration) more accurately and leverages architecture-specific helpers
(`huge_pte_none`) to correctly interpret special PTE/PMD values in the
HugeTLB context, fixing the crash on LoongArch without altering the
generic is_swap_pte() behavior.
Link: https://lkml.kernel.org/r/20250423010359.2030576-1-wangming01@loongson.cn
Fixes: 25ee01a2fca0 ("mm: hugetlb: proc: add hugetlb-related fields to /proc/PID/smaps")
Co-developed-by: Hongchen Zhang <zhanghongchen(a)loongson.cn>
Signed-off-by: Hongchen Zhang <zhanghongchen(a)loongson.cn>
Signed-off-by: Ming Wang <wangming01(a)loongson.cn>
Cc: Andrii Nakryiko <andrii(a)kernel.org>
Cc: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Huacai Chen <chenhuacai(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Joern Engel <joern(a)logfs.org>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.cz>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/task_mmu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/fs/proc/task_mmu.c~smaps-fix-crash-in-smaps_hugetlb_range-for-non-present-hugetlb-entries
+++ a/fs/proc/task_mmu.c
@@ -1027,7 +1027,7 @@ static int smaps_hugetlb_range(pte_t *pt
if (pte_present(ptent)) {
folio = page_folio(pte_page(ptent));
present = true;
- } else if (is_swap_pte(ptent)) {
+ } else if (is_hugetlb_entry_migration(ptent)) {
swp_entry_t swpent = pte_to_swp_entry(ptent);
if (is_pfn_swap_entry(swpent))
_
Patches currently in -mm which might be from wangming01(a)loongson.cn are
When running machines with 64k page size and a 16k nodesize we started
seeing tree log corruption in production. This turned out to be because
we were not writing out dirty blocks sometimes, so this in fact affects
all metadata writes.
When writing out a subpage EB we scan the subpage bitmap for a dirty
range. If the range isn't dirty we do
bit_start++;
to move onto the next bit. The problem is the bitmap is based on the
number of sectors that an EB has. So in this case, we have a 64k
pagesize, 16k nodesize, but a 4k sectorsize. This means our bitmap is 4
bits for every node. With a 64k page size we end up with 4 nodes per
page.
To make this easier this is how everything looks
[0 16k 32k 48k ] logical address
[0 4 8 12 ] radix tree offset
[ 64k page ] folio
[ 16k eb ][ 16k eb ][ 16k eb ][ 16k eb ] extent buffers
[ | | | | | | | | | | | | | | | | ] bitmap
Now we use all of our addressing based on fs_info->sectorsize_bits, so
as you can see the above our 16k eb->start turns into radix entry 4.
When we find a dirty range for our eb, we correctly do bit_start +=
sectors_per_node, because if we start at bit 0, the next bit for the
next eb is 4, to correspond to eb->start 16k.
However if our range is clean, we will do bit_start++, which will now
put us offset from our radix tree entries.
In our case, assume that the first time we check the bitmap the block is
not dirty, we increment bit_start so now it == 1, and then we loop
around and check again. This time it is dirty, and we go to find that
start using the following equation
start = folio_start + bit_start * fs_info->sectorsize;
so in the case above, eb->start 0 is now dirty, and we calculate start
as
0 + 1 * fs_info->sectorsize = 4096
4096 >> 12 = 1
Now we're looking up the radix tree for 1, and we won't find an eb.
What's worse is now we're using bit_start == 1, so we do bit_start +=
sectors_per_node, which is now 5. If that eb is dirty we will run into
the same thing, we will look at an offset that is not populated in the
radix tree, and now we're skipping the writeout of dirty extent buffers.
The best fix for this is to not use sectorsize_bits to address nodes,
but that's a larger change. Since this is a fs corruption problem fix
it simply by always using sectors_per_node to increment the start bit.
cc: stable(a)vger.kernel.org
Fixes: c4aec299fa8f ("btrfs: introduce submit_eb_subpage() to submit a subpage metadata page")
Reviewed-by: Boris Burkov <boris(a)bur.io>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
---
- Further testing indicated that the page tagging theoretical race isn't getting
hit in practice, so we're going to limit the "hotfix" to this specific patch,
and then send subsequent patches to address the other issues we're hitting. My
simplify metadata writebback patches are the more wholistic fix.
fs/btrfs/extent_io.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 5f08615b334f..6cfd286b8bbc 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2034,7 +2034,7 @@ static int submit_eb_subpage(struct folio *folio, struct writeback_control *wbc)
subpage->bitmaps)) {
spin_unlock_irqrestore(&subpage->lock, flags);
spin_unlock(&folio->mapping->i_private_lock);
- bit_start++;
+ bit_start += sectors_per_node;
continue;
}
--
2.48.1
From: Ashish Kalra <ashish.kalra(a)amd.com>
When the shared pages are being made private during kdump preparation
there are additional checks to handle shared GHCB pages.
These additional checks include handling the case of GHCB page being
contained within a 2MB page.
There is a bug in this additional check for GHCB page contained
within a 2MB page which causes any shared page just below the
per-cpu GHCB getting skipped from being transitioned back to private
before kdump preparation which subsequently causes a 0x404 #VC
exception when this shared page is accessed later while dumping guest
memory during vmcore generation via kdump.
Correct the detection and handling of GHCB pages contained within
a 2MB page.
Cc: stable(a)vger.kernel.org
Fixes: 3074152e56c9 ("x86/sev: Convert shared memory back to private on kexec")
Signed-off-by: Ashish Kalra <ashish.kalra(a)amd.com>
---
arch/x86/coco/sev/core.c | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c
index 2c27d4b3985c..16d874f4dcd3 100644
--- a/arch/x86/coco/sev/core.c
+++ b/arch/x86/coco/sev/core.c
@@ -926,7 +926,13 @@ static void unshare_all_memory(void)
data = per_cpu(runtime_data, cpu);
ghcb = (unsigned long)&data->ghcb_page;
- if (addr <= ghcb && ghcb <= addr + size) {
+ /* Handle the case of 2MB page containing the GHCB page */
+ if (level == PG_LEVEL_4K && addr == ghcb) {
+ skipped_addr = true;
+ break;
+ }
+ if (level > PG_LEVEL_4K && addr <= ghcb &&
+ ghcb < addr + size) {
skipped_addr = true;
break;
}
@@ -1106,6 +1112,9 @@ void snp_kexec_finish(void)
ghcb = &data->ghcb_page;
pte = lookup_address((unsigned long)ghcb, &level);
size = page_level_size(level);
+ /* Handle the case of 2MB page containing the GHCB page */
+ if (level > PG_LEVEL_4K)
+ ghcb = (struct ghcb *)((unsigned long)ghcb & PMD_MASK);
set_pte_enc(pte, level, (void *)ghcb);
snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE));
}
--
2.34.1
When memory allocation profiling is disabled at runtime or due to an
error, shutdown_mem_profiling() is called: slab->obj_exts which
previously allocated remains.
It won't be cleared by unaccount_slab() because of
mem_alloc_profiling_enabled() not true. It's incorrect, slab->obj_exts
should always be cleaned up in unaccount_slab() to avoid following error:
[...]BUG: Bad page state in process...
..
[...]page dumped because: page still charged to cgroup
Cc: stable(a)vger.kernel.org
Fixes: 21c690a349ba ("mm: introduce slabobj_ext to support slab object extensions")
Signed-off-by: Zhenhua Huang <quic_zhenhuah(a)quicinc.com>
Acked-by: David Rientjes <rientjes(a)google.com>
Acked-by: Harry Yoo <harry.yoo(a)oracle.com>
Tested-by: Harry Yoo <harry.yoo(a)oracle.com>
---
mm/slub.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/mm/slub.c b/mm/slub.c
index 566eb8b8282d..a98ce1426076 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2028,8 +2028,8 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
return 0;
}
-/* Should be called only if mem_alloc_profiling_enabled() */
-static noinline void free_slab_obj_exts(struct slab *slab)
+/* Free only if slab_obj_exts(slab) */
+static inline void free_slab_obj_exts(struct slab *slab)
{
struct slabobj_ext *obj_exts;
@@ -2601,8 +2601,12 @@ static __always_inline void account_slab(struct slab *slab, int order,
static __always_inline void unaccount_slab(struct slab *slab, int order,
struct kmem_cache *s)
{
- if (memcg_kmem_online() || need_slab_obj_ext())
- free_slab_obj_exts(slab);
+ /*
+ * The slab object extensions should now be freed regardless of
+ * whether mem_alloc_profiling_enabled() or not because profiling
+ * might have been disabled after slab->obj_exts got allocated.
+ */
+ free_slab_obj_exts(slab);
mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
-(PAGE_SIZE << order));
--
2.34.1
Hi
As per subject, can you please apply commit 8983dc1b66c0 ("ALSA:
hda/realtek: Fix built-in mic on another ASUS VivoBook model") to
v6.1.y?
The commit fixes 3b4309546b48 ("ALSA: hda: Fix headset detection
failure due to unstable sort"), which is in 6.14-rc1 *but* it got
backported to other stable series as well: 6.1.129, 6.6.78, 6.12.14
and 6.13.3.
While 8983dc1b66c0 got then backported down to 6.12.23, 6.13.11 and
and 6.14.2 it was not backported further down, the reason is likely
the commit does not apply cleanly due to context changes in the struct
hda_quirk alc269_fixup_tbl (as some entries are missing in older
series).
For context see as well:
https://lore.kernel.org/linux-sound/Z95s5T6OXFPjRnKf@eldamar.lanhttps://lore.kernel.org/linux-sound/Z_aq9kkdswrGZRUQ@eldamar.lan/https://bugs.debian.org/1100928
Can you please apply it down for 6.1.y?
Attached is a manual backport of the change in case needed.
Regards,
Salvatore
From 336110525d8a24cd8bbc4cfe61c2aaf6aee511d4 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai(a)suse.de>
Date: Wed, 2 Apr 2025 09:42:07 +0200
Subject: [PATCH] ALSA: hda/realtek: Fix built-in mic on another ASUS VivoBook
model
[ Upstream commit 8983dc1b66c0e1928a263b8af0bb06f6cb9229c4 ]
There is another VivoBook model which built-in mic got broken recently
by the fix of the pin sort. Apply the correct quirk
ALC256_FIXUP_ASUS_MIC_NO_PRESENCE to this model for addressing the
regression, too.
Fixes: 3b4309546b48 ("ALSA: hda: Fix headset detection failure due to unstable sort")
Closes: https://lore.kernel.org/Z95s5T6OXFPjRnKf@eldamar.lan
Link: https://patch.msgid.link/20250402074208.7347-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
[Salvatore Bonaccorso: Update for context change due to missing other
quirk entries in the struct snd_pci_quirk alc269_fixup_tbl]
Signed-off-by: Salvatore Bonaccorso <carnil(a)debian.org>
---
sound/pci/hda/patch_realtek.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 93e8990c23bc..61b48f2418bf 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -10071,6 +10071,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x1c23, "Asus X55U", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
+ SND_PCI_QUIRK(0x1043, 0x1c80, "ASUS VivoBook TP401", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC),
--
2.49.0
The patch below does not apply to the 6.12-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y
git checkout FETCH_HEAD
git cherry-pick -x cdc2e1d9d929d7f7009b3a5edca52388a2b0891f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025042120-backward-waged-41cf@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cdc2e1d9d929d7f7009b3a5edca52388a2b0891f Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan(a)kernel.org>
Date: Mon, 14 Apr 2025 15:00:59 -0700
Subject: [PATCH] lib/Kconfig.ubsan: Remove 'default UBSAN' from
UBSAN_INTEGER_WRAP
CONFIG_UBSAN_INTEGER_WRAP is 'default UBSAN', which is problematic for a
couple of reasons.
The first is that this sanitizer is under active development on the
compiler side to come up with a solution that is maintainable on the
compiler side and usable on the kernel side. As a result of this, there
are many warnings when the sanitizer is enabled that have no clear path
to resolution yet but users may see them and report them in the meantime.
The second is that this option was renamed from
CONFIG_UBSAN_SIGNED_WRAP, meaning that if a configuration has
CONFIG_UBSAN=y but CONFIG_UBSAN_SIGNED_WRAP=n and it is upgraded via
olddefconfig (common in non-interactive scenarios such as CI),
CONFIG_UBSAN_INTEGER_WRAP will be silently enabled again.
Remove 'default UBSAN' from CONFIG_UBSAN_INTEGER_WRAP until it is ready
for regular usage and testing from a broader community than the folks
actively working on the feature.
Cc: stable(a)vger.kernel.org
Fixes: 557f8c582a9b ("ubsan: Reintroduce signed overflow sanitizer")
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
Link: https://lore.kernel.org/r/20250414-drop-default-ubsan-integer-wrap-v1-1-392…
Signed-off-by: Kees Cook <kees(a)kernel.org>
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 4216b3a4ff21..f6ea0c5b5da3 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -118,7 +118,6 @@ config UBSAN_UNREACHABLE
config UBSAN_INTEGER_WRAP
bool "Perform checking for integer arithmetic wrap-around"
- default UBSAN
depends on !COMPILE_TEST
depends on $(cc-option,-fsanitize-undefined-ignore-overflow-pattern=all)
depends on $(cc-option,-fsanitize=signed-integer-overflow)
The patch below does not apply to the 6.14-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.14.y
git checkout FETCH_HEAD
git cherry-pick -x cdc2e1d9d929d7f7009b3a5edca52388a2b0891f
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025042119-imbecile-greeter-0ce1@gregkh' --subject-prefix 'PATCH 6.14.y' HEAD^..
Possible dependencies:
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From cdc2e1d9d929d7f7009b3a5edca52388a2b0891f Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan(a)kernel.org>
Date: Mon, 14 Apr 2025 15:00:59 -0700
Subject: [PATCH] lib/Kconfig.ubsan: Remove 'default UBSAN' from
UBSAN_INTEGER_WRAP
CONFIG_UBSAN_INTEGER_WRAP is 'default UBSAN', which is problematic for a
couple of reasons.
The first is that this sanitizer is under active development on the
compiler side to come up with a solution that is maintainable on the
compiler side and usable on the kernel side. As a result of this, there
are many warnings when the sanitizer is enabled that have no clear path
to resolution yet but users may see them and report them in the meantime.
The second is that this option was renamed from
CONFIG_UBSAN_SIGNED_WRAP, meaning that if a configuration has
CONFIG_UBSAN=y but CONFIG_UBSAN_SIGNED_WRAP=n and it is upgraded via
olddefconfig (common in non-interactive scenarios such as CI),
CONFIG_UBSAN_INTEGER_WRAP will be silently enabled again.
Remove 'default UBSAN' from CONFIG_UBSAN_INTEGER_WRAP until it is ready
for regular usage and testing from a broader community than the folks
actively working on the feature.
Cc: stable(a)vger.kernel.org
Fixes: 557f8c582a9b ("ubsan: Reintroduce signed overflow sanitizer")
Signed-off-by: Nathan Chancellor <nathan(a)kernel.org>
Link: https://lore.kernel.org/r/20250414-drop-default-ubsan-integer-wrap-v1-1-392…
Signed-off-by: Kees Cook <kees(a)kernel.org>
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 4216b3a4ff21..f6ea0c5b5da3 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -118,7 +118,6 @@ config UBSAN_UNREACHABLE
config UBSAN_INTEGER_WRAP
bool "Perform checking for integer arithmetic wrap-around"
- default UBSAN
depends on !COMPILE_TEST
depends on $(cc-option,-fsanitize-undefined-ignore-overflow-pattern=all)
depends on $(cc-option,-fsanitize=signed-integer-overflow)
The following commit has been merged into the irq/urgent branch of tip:
Commit-ID: 637cf959dac97d5b7b5ce5e6cd91dd3a2c2fc324
Gitweb: https://git.kernel.org/tip/637cf959dac97d5b7b5ce5e6cd91dd3a2c2fc324
Author: Suzuki K Poulose <suzuki.poulose(a)arm.com>
AuthorDate: Tue, 22 Apr 2025 17:16:16 +01:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Thu, 24 Apr 2025 14:47:52 +02:00
irqchip/gic-v2m: Prevent use after free of gicv2m_get_fwnode()
With ACPI in place, gicv2m_get_fwnode() is registered with the pci
subsystem as pci_msi_get_fwnode_cb(), which may get invoked at runtime
during a PCI host bridge probe. But, the call back is wrongly marked as
__init, causing it to be freed, while being registered with the PCI
subsystem and could trigger:
Unable to handle kernel paging request at virtual address ffff8000816c0400
gicv2m_get_fwnode+0x0/0x58 (P)
pci_set_bus_msi_domain+0x74/0x88
pci_register_host_bridge+0x194/0x548
This is easily reproducible on a Juno board with ACPI boot.
Retain the function for later use.
Fixes: 0644b3daca28 ("irqchip/gic-v2m: acpi: Introducing GICv2m ACPI support")
Signed-off-by: Suzuki K Poulose <suzuki.poulose(a)arm.com>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org
Link: https://lkml.kernel.org/all/20250422161616.1584405-1-suzuki.poulose@arm.com
Link: https://lkml.kernel.org/r/68053cf43bb54_7205294cc@dwillia2-xfh.jf.intel.com…
---
drivers/irqchip/irq-gic-v2m.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index c698948..dc98c39 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -421,7 +421,7 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle,
#ifdef CONFIG_ACPI
static int acpi_num_msi;
-static __init struct fwnode_handle *gicv2m_get_fwnode(struct device *dev)
+static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev)
{
struct v2m_data *data;
The patch titled
Subject: smaps: fix crash in smaps_hugetlb_range for non-present hugetlb entries
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
smaps-fix-crash-in-smaps_hugetlb_range-for-non-present-hugetlb-entries.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Ming Wang <wangming01(a)loongson.cn>
Subject: smaps: fix crash in smaps_hugetlb_range for non-present hugetlb entries
Date: Wed, 23 Apr 2025 09:03:59 +0800
When reading /proc/pid/smaps for a process that has mapped a hugetlbfs
file with MAP_PRIVATE, the kernel might crash inside
pfn_swap_entry_to_page. This occurs on LoongArch under specific
conditions.
The root cause involves several steps:
1. When the hugetlbfs file is mapped (MAP_PRIVATE), the initial PMD
(or relevant level) entry is often populated by the kernel during
mmap() with a non-present entry pointing to the architecture's
invalid_pte_table On the affected LoongArch system, this address was
observed to be 0x90000000031e4000.
2. The smaps walker (walk_hugetlb_range -> smaps_hugetlb_range) reads
this entry.
3. The generic is_swap_pte() macro checks `!pte_present() &&
!pte_none()`. The entry (invalid_pte_table address) is not present.
Crucially, the generic pte_none() check (`!(pte_val(pte) &
~_PAGE_GLOBAL)`) returns false because the invalid_pte_table address is
non-zero. Therefore, is_swap_pte() incorrectly returns true.
4. The code enters the `else if (is_swap_pte(...))` block.
5. Inside this block, it checks `is_pfn_swap_entry()`. Due to a bit
pattern coincidence in the invalid_pte_table address on LoongArch, the
embedded generic `is_migration_entry()` check happens to return true
(misinterpreting parts of the address as a migration type).
6. This leads to a call to pfn_swap_entry_to_page() with the bogus
swap entry derived from the invalid table address.
7. pfn_swap_entry_to_page() extracts a meaningless PFN, finds an
unrelated struct page, checks its lock status (unlocked), and hits the
`BUG_ON(is_migration_entry(entry) && !PageLocked(p))` assertion.
The original code's intent in the `else if` block seems aimed at handling
potential migration entries, as indicated by the inner
`is_pfn_swap_entry()` check. The issue arises because the outer
`is_swap_pte()` check incorrectly includes the invalid table pointer case
on LoongArch.
This patch fixes the issue by changing the condition in
smaps_hugetlb_range() from the broad `is_swap_pte()` to the specific
`is_hugetlb_entry_migration()`.
The `is_hugetlb_entry_migration()` helper function correctly handles this
by first checking `huge_pte_none()`. Architectures like LoongArch can
provide an override for `huge_pte_none()` that specifically recognizes the
`invalid_pte_table` address as a "none" state for HugeTLB entries. This
ensures `is_hugetlb_entry_migration()` returns false for the invalid
entry, preventing the code from entering the faulty block.
This change makes the code reflect the likely original intent (handling
migration) more accurately and leverages architecture-specific helpers
(`huge_pte_none`) to correctly interpret special PTE/PMD values in the
HugeTLB context, fixing the crash on LoongArch without altering the
generic is_swap_pte() behavior.
Link: https://lkml.kernel.org/r/20250423010359.2030576-1-wangming01@loongson.cn
Fixes: 25ee01a2fca0 ("mm: hugetlb: proc: add hugetlb-related fields to /proc/PID/smaps")
Co-developed-by: Hongchen Zhang <zhanghongchen(a)loongson.cn>
Signed-off-by: Hongchen Zhang <zhanghongchen(a)loongson.cn>
Signed-off-by: Ming Wang <wangming01(a)loongson.cn>
Cc: Andrii Nakryiko <andrii(a)kernel.org>
Cc: Christophe Leroy <christophe.leroy(a)csgroup.eu>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Huacai Chen <chenhuacai(a)kernel.org>
Cc: Hugh Dickins <hughd(a)google.com>
Cc: Joern Engel <joern(a)logfs.org>
Cc: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Cc: Michal Hocko <mhocko(a)suse.cz>
Cc: Naoya Horiguchi <nao.horiguchi(a)gmail.com>
Cc: Oscar Salvador <osalvador(a)suse.de>
Cc: Ryan Roberts <ryan.roberts(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
fs/proc/task_mmu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- a/fs/proc/task_mmu.c~smaps-fix-crash-in-smaps_hugetlb_range-for-non-present-hugetlb-entries
+++ a/fs/proc/task_mmu.c
@@ -1027,7 +1027,7 @@ static int smaps_hugetlb_range(pte_t *pt
if (pte_present(ptent)) {
folio = page_folio(pte_page(ptent));
present = true;
- } else if (is_swap_pte(ptent)) {
+ } else if (is_hugetlb_entry_migration(ptent)) {
swp_entry_t swpent = pte_to_swp_entry(ptent);
if (is_pfn_swap_entry(swpent))
_
Patches currently in -mm which might be from wangming01(a)loongson.cn are
smaps-fix-crash-in-smaps_hugetlb_range-for-non-present-hugetlb-entries.patch
From: Yu Kuai <yukuai3(a)huawei.com>
Hi, Greg
This is the manual adaptation version for 6.1, for 6.6/6.12 commit
8542870237c3 ("md: fix mddev uaf while iterating all_mddevs list") can
be applied cleanly, can you queue them as well?
Thanks!
Yu Kuai (2):
md: factor out a helper from mddev_put()
md: fix mddev uaf while iterating all_mddevs list
drivers/md/md.c | 50 +++++++++++++++++++++++++++++--------------------
1 file changed, 30 insertions(+), 20 deletions(-)
--
2.39.2
From: Mario Limonciello <mario.limonciello(a)amd.com>
On systems that only have an SRA sensor connected to SFH the sensor
doesn't get enabled due to a bad optimization condition of breaking
the sensor walk loop.
This optimization is unnecessary in the first place because if there
is only one device then the loop only runs once. Drop the condition
and explicitly mark sensor as enabled.
Reported-by: Yijun Shen <Yijun.Shen(a)dell.com>
Fixes: d1c444b47100d ("HID: amd_sfh: Add support to export device operating states")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com>
---
drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
index 25f0ebfcbd5f5..c1bdf1e0d44af 100644
--- a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
+++ b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_init.c
@@ -134,9 +134,6 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata)
for (i = 0; i < cl_data->num_hid_devices; i++) {
cl_data->sensor_sts[i] = SENSOR_DISABLED;
- if (cl_data->num_hid_devices == 1 && cl_data->sensor_idx[0] == SRA_IDX)
- break;
-
if (cl_data->sensor_idx[i] == SRA_IDX) {
info.sensor_idx = cl_data->sensor_idx[i];
writel(0, privdata->mmio + amd_get_p2c_val(privdata, 0));
@@ -145,8 +142,10 @@ static int amd_sfh1_1_hid_client_init(struct amd_mp2_dev *privdata)
(privdata, cl_data->sensor_idx[i], ENABLE_SENSOR);
cl_data->sensor_sts[i] = (status == 0) ? SENSOR_ENABLED : SENSOR_DISABLED;
- if (cl_data->sensor_sts[i] == SENSOR_ENABLED)
+ if (cl_data->sensor_sts[i] == SENSOR_ENABLED) {
+ cl_data->is_any_sensor_enabled = true;
privdata->dev_en.is_sra_present = true;
+ }
continue;
}
--
2.43.0
During wacom_parse_and_register() the code calls wacom_devm_kfifo_alloc
to allocate a fifo. During this operation it passes kfifo_alloc a
fifo_size of 0. Kfifo attempts to round the size passed to it to the
next power of 2 via roundup_pow_of_two (queue-type data structures
do this to maintain efficiency of operations).
However during this phase a problem arises when the roundup_pow_of_two()
function utilises a shift exponent of fls_long(n-1), where n is the
fifo_size. Since n is 0 in this case and n is also an unsigned long,
doing n-1 causes unsigned integer wrap-around to occur making the
fifo_size 4294967295. So the code effectively does fls_long(4294967295)
which results in 64. Returning back to roundup_pow_of_two(), the code
utilises a shift exponent of 64. When a shift exponent of 64 is used
on a 64-bit type such as 1UL it results in a shift-out-of-bounds.
The root cause of the issue seems to stem from insufficient validation
of wacom_compute_pktlen(), since in this case the fifo_size comes
from wacom_wac->features.pktlen. During wacom_parse_and_register()
the wacom_compute_pktlen() function sets the pktlen as 0.
To fix this, we should handle cases where wacom_compute_pktlen()
results in 0.
Reported-by: syzbot <syzbot+d5204cbbdd921f1f7cad(a)syzkaller.appspotmail.com>
Closes: https://syzkaller.appspot.com/bug?extid=d5204cbbdd921f1f7cad
Fixes: 5e013ad20689 ("HID: wacom: Remove static WACOM_PKGLEN_MAX limit")
Tested-by: Qasim Ijaz <qasdev00(a)gmail.com>
Reviewed-by: Jason Gerecke <jason.gerecke(a)wacom.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Qasim Ijaz <qasdev00(a)gmail.com>
---
v2:
- Added Fixes tag as suggested by Jason Gerecke
drivers/hid/wacom_sys.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 97393a3083ca..9b2f3dbca467 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2361,6 +2361,8 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
unsigned int connect_mask = HID_CONNECT_HIDRAW;
features->pktlen = wacom_compute_pktlen(hdev);
+ if (!features->pktlen)
+ return -ENODEV;
if (!devres_open_group(&hdev->dev, wacom, GFP_KERNEL))
return -ENOMEM;
--
2.39.5
From: Peter Zijlstra <peterz(a)infradead.org>
[ Upstream commit 517e6a301f34613bff24a8e35b5455884f2d83d8 ]
Per syzbot it is possible for perf_pending_task() to run after the
event is free()'d. There are two related but distinct cases:
- the task_work was already queued before destroying the event;
- destroying the event itself queues the task_work.
The first cannot be solved using task_work_cancel() since
perf_release() itself might be called from a task_work (____fput),
which means the current->task_works list is already empty and
task_work_cancel() won't be able to find the perf_pending_task()
entry.
The simplest alternative is extending the perf_event lifetime to cover
the task_work.
The second is just silly, queueing a task_work while you know the
event is going away makes no sense and is easily avoided by
re-arranging how the event is marked STATE_DEAD and ensuring it goes
through STATE_OFF on the way down.
Reported-by: syzbot+9228d6098455bb209ec8(a)syzkaller.appspotmail.com
Signed-off-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Tested-by: Marco Elver <elver(a)google.com>
[ Discard the changes in event_sched_out() due to 5.10 don't have the
commit: 97ba62b27867 ("perf: Add support for SIGTRAP on perf events")
and commit: ca6c21327c6a ("perf: Fix missing SIGTRAPs") ]
Signed-off-by: Xiangyu Chen <xiangyu.chen(a)windriver.com>
Signed-off-by: He Zhe <zhe.he(a)windriver.com>
---
Verified the build test.
---
kernel/events/core.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8f19d6ab039e..798c839a00b3 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2419,6 +2419,7 @@ group_sched_out(struct perf_event *group_event,
}
#define DETACH_GROUP 0x01UL
+#define DETACH_DEAD 0x04UL
/*
* Cross CPU call to remove a performance event
@@ -2439,10 +2440,18 @@ __perf_remove_from_context(struct perf_event *event,
update_cgrp_time_from_cpuctx(cpuctx, false);
}
+ /*
+ * Ensure event_sched_out() switches to OFF, at the very least
+ * this avoids raising perf_pending_task() at this time.
+ */
+ if (flags & DETACH_DEAD)
+ event->pending_disable = 1;
event_sched_out(event, cpuctx, ctx);
if (flags & DETACH_GROUP)
perf_group_detach(event);
list_del_event(event, ctx);
+ if (flags & DETACH_DEAD)
+ event->state = PERF_EVENT_STATE_DEAD;
if (!ctx->nr_events && ctx->is_active) {
if (ctx == &cpuctx->ctx)
@@ -5111,9 +5120,7 @@ int perf_event_release_kernel(struct perf_event *event)
ctx = perf_event_ctx_lock(event);
WARN_ON_ONCE(ctx->parent_ctx);
- perf_remove_from_context(event, DETACH_GROUP);
- raw_spin_lock_irq(&ctx->lock);
/*
* Mark this event as STATE_DEAD, there is no external reference to it
* anymore.
@@ -5125,8 +5132,7 @@ int perf_event_release_kernel(struct perf_event *event)
* Thus this guarantees that we will in fact observe and kill _ALL_
* child events.
*/
- event->state = PERF_EVENT_STATE_DEAD;
- raw_spin_unlock_irq(&ctx->lock);
+ perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD);
perf_event_ctx_unlock(event, ctx);
@@ -6533,6 +6539,8 @@ static void perf_pending_event(struct irq_work *entry)
if (rctx >= 0)
perf_swevent_put_recursion_context(rctx);
+
+ put_event(event);
}
/*
--
2.34.1
[Why]
There is no handling for I2C-read-over-AUX when receive reply of
I2C_ACK|AUX_ACK followed by the total number of data bytes Fewer
than LEN + 1
[How]
Refer to DP v2.1: 2.11.7.1.6.3 & 2.11.7.1.6.4, repeat the identical
I2C-read-over-AUX transaction with the updated LEN value equal to
the original LEN value minus the total number of data bytes received
so far.
Fixes: 68ec2a2a2481 ("drm/dp: Use I2C_WRITE_STATUS_UPDATE to drain partial I2C_WRITE requests")
Cc: Ville Syrjälä <ville.syrjala(a)linux.intel.com>
Cc: Jani Nikula <jani.nikula(a)intel.com>
Cc: Mario Limonciello <mario.limonciello(a)amd.com>
Cc: Harry Wentland <harry.wentland(a)amd.com>
Cc: stable(a)vger.kernel.org
Signed-off-by: Wayne Lin <Wayne.Lin(a)amd.com>
---
drivers/gpu/drm/display/drm_dp_helper.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c
index 28f0708c3b27..938214a980a9 100644
--- a/drivers/gpu/drm/display/drm_dp_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_helper.c
@@ -1812,10 +1812,11 @@ static int drm_dp_i2c_do_msg(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
drm_dbg_kms(aux->drm_dev,
"%s: I2C partially ack (result=%d, size=%zu)\n",
aux->name, ret, msg->size);
- if (!(msg->request & DP_AUX_I2C_READ)) {
- usleep_range(AUX_RETRY_INTERVAL, AUX_RETRY_INTERVAL + 100);
+ usleep_range(AUX_RETRY_INTERVAL, AUX_RETRY_INTERVAL + 100);
+ if (msg->request & DP_AUX_I2C_READ)
+ msg->size -= ret;
+ else
drm_dp_i2c_msg_write_status_update(msg);
- }
continue;
}
--
2.43.0
The idxd driver attaches the default domain to a PASID of the device to
perform kernel DMA using that PASID. The domain is attached to the
device's PASID through iommu_attach_device_pasid(), which checks if the
domain->owner matches the iommu_ops retrieved from the device. If they
do not match, it returns a failure.
if (ops != domain->owner || pasid == IOMMU_NO_PASID)
return -EINVAL;
The static identity domain implemented by the intel iommu driver doesn't
specify the domain owner. Therefore, kernel DMA with PASID doesn't work
for the idxd driver if the device translation mode is set to passthrough.
Generally the owner field of static domains are not set because they are
already part of iommu ops. Add a helper domain_iommu_ops_compatible()
that checks if a domain is compatible with the device's iommu ops. This
helper explicitly allows the static blocked and identity domains associated
with the device's iommu_ops to be considered compatible.
Fixes: 2031c469f816 ("iommu/vt-d: Add support for static identity domain")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220031
Cc: stable(a)vger.kernel.org
Suggested-by: Jason Gunthorpe <jgg(a)nvidia.com>
Link: https://lore.kernel.org/linux-iommu/20250422191554.GC1213339@ziepe.ca/
Signed-off-by: Lu Baolu <baolu.lu(a)linux.intel.com>
---
drivers/iommu/iommu.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
Change log:
-v2:
- Make the solution generic for all static domains as suggested by
Jason.
-v1: https://lore.kernel.org/linux-iommu/20250422075422.2084548-1-baolu.lu@linux…
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 4f91a740c15f..abda40ec377a 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3402,6 +3402,19 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
iommu_remove_dev_pasid(device->dev, pasid, domain);
}
+static bool domain_iommu_ops_compatible(const struct iommu_ops *ops,
+ struct iommu_domain *domain)
+{
+ if (domain->owner == ops)
+ return true;
+
+ /* For static domains, owner isn't set. */
+ if (domain == ops->blocked_domain || domain == ops->identity_domain)
+ return true;
+
+ return false;
+}
+
/*
* iommu_attach_device_pasid() - Attach a domain to pasid of device
* @domain: the iommu domain.
@@ -3435,7 +3448,8 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
!ops->blocked_domain->ops->set_dev_pasid)
return -EOPNOTSUPP;
- if (ops != domain->owner || pasid == IOMMU_NO_PASID)
+ if (!domain_iommu_ops_compatible(ops, domain) ||
+ pasid == IOMMU_NO_PASID)
return -EINVAL;
mutex_lock(&group->mutex);
--
2.43.0
The patch titled
Subject: selftests/mm: compaction_test: support platform with huge mount of memory
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
selftests-mm-compaction_test-support-platform-with-huge-mount-of-memory.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Feng Tang <feng.tang(a)linux.alibaba.com>
Subject: selftests/mm: compaction_test: support platform with huge mount of memory
Date: Wed, 23 Apr 2025 18:36:45 +0800
When running mm selftest to verify mm patches, 'compaction_test' case
failed on an x86 server with 1TB memory. And the root cause is that it
has too much free memory than what the test supports.
The test case tries to allocate 100000 huge pages, which is about 200 GB
for that x86 server, and when it succeeds, it expects it's large than 1/3
of 80% of the free memory in system. This logic only works for platform
with 750 GB ( 200 / (1/3) / 80% ) or less free memory, and may raise false
alarm for others.
Fix it by changing the fixed page number to self-adjustable number
according to the real number of free memory.
Link: https://lkml.kernel.org/r/20250423103645.2758-1-feng.tang@linux.alibaba.com
Fixes: bd67d5c15cc19 ("Test compaction of mlocked memory")
Signed-off-by: Feng Tang <feng.tang(a)linux.alibaba.com>
Acked-by: Dev Jain <dev.jain(a)arm.com>
Cc: Baolin Wang <baolin.wang(a)linux.alibaba.com>
Cc: Shuah Khan <shuah(a)kernel.org>
Cc: Sri Jayaramappa <sjayaram(a)akamai.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
tools/testing/selftests/mm/compaction_test.c | 19 ++++++++++++-----
1 file changed, 14 insertions(+), 5 deletions(-)
--- a/tools/testing/selftests/mm/compaction_test.c~selftests-mm-compaction_test-support-platform-with-huge-mount-of-memory
+++ a/tools/testing/selftests/mm/compaction_test.c
@@ -90,6 +90,8 @@ int check_compaction(unsigned long mem_f
int compaction_index = 0;
char nr_hugepages[20] = {0};
char init_nr_hugepages[24] = {0};
+ char target_nr_hugepages[24] = {0};
+ int slen;
snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
"%lu", initial_nr_hugepages);
@@ -106,11 +108,18 @@ int check_compaction(unsigned long mem_f
goto out;
}
- /* Request a large number of huge pages. The Kernel will allocate
- as much as it can */
- if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
- ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n",
- strerror(errno));
+ /*
+ * Request huge pages for about half of the free memory. The Kernel
+ * will allocate as much as it can, and we expect it will get at least 1/3
+ */
+ nr_hugepages_ul = mem_free / hugepage_size / 2;
+ snprintf(target_nr_hugepages, sizeof(target_nr_hugepages),
+ "%lu", nr_hugepages_ul);
+
+ slen = strlen(target_nr_hugepages);
+ if (write(fd, target_nr_hugepages, slen) != slen) {
+ ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n",
+ nr_hugepages_ul, strerror(errno));
goto close_fd;
}
_
Patches currently in -mm which might be from feng.tang(a)linux.alibaba.com are
selftests-mm-compaction_test-support-platform-with-huge-mount-of-memory.patch
From: Long Li <longli(a)microsoft.com>
There are use cases that interrupt and monitor pages are mapped to
user-mode through UIO, they need to be system page aligned. Some Hyper-V
allocation APIs introduced earlier broke those requirements.
Fix those APIs by always allocating Hyper-V page at system page boundaries.
Cc: stable(a)vger.kernel.org
Fixes: ca48739e59df ("Drivers: hv: vmbus: Move Hyper-V page allocator to arch neutral code")
Signed-off-by: Long Li <longli(a)microsoft.com>
---
drivers/hv/hv_common.c | 29 +++++++----------------------
1 file changed, 7 insertions(+), 22 deletions(-)
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index a7d7494feaca..f426aaa9b8f9 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -106,41 +106,26 @@ void __init hv_common_free(void)
}
/*
- * Functions for allocating and freeing memory with size and
- * alignment HV_HYP_PAGE_SIZE. These functions are needed because
- * the guest page size may not be the same as the Hyper-V page
- * size. We depend upon kmalloc() aligning power-of-two size
- * allocations to the allocation size boundary, so that the
- * allocated memory appears to Hyper-V as a page of the size
- * it expects.
+ * A Hyper-V page can be used by UIO for mapping to user-space, it should
+ * always be allocated on system page boundaries.
*/
-
void *hv_alloc_hyperv_page(void)
{
- BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
-
- if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
- return (void *)__get_free_page(GFP_KERNEL);
- else
- return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
+ BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
+ return (void *)__get_free_page(GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
void *hv_alloc_hyperv_zeroed_page(void)
{
- if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
- return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- else
- return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
+ BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
+ return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
}
EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
void hv_free_hyperv_page(void *addr)
{
- if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
- free_page((unsigned long)addr);
- else
- kfree(addr);
+ free_page((unsigned long)addr);
}
EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
--
2.34.1
Syzkaller detected a use-after-free issue in ext4_insert_dentry that was
caused by out-of-bounds access due to incorrect splitting in do_split.
BUG: KASAN: use-after-free in ext4_insert_dentry+0x36a/0x6d0 fs/ext4/namei.c:2109
Write of size 251 at addr ffff888074572f14 by task syz-executor335/5847
CPU: 0 UID: 0 PID: 5847 Comm: syz-executor335 Not tainted 6.12.0-rc6-syzkaller-00318-ga9cda7c0ffed #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/30/2024
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:94 [inline]
dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120
print_address_description mm/kasan/report.c:377 [inline]
print_report+0x169/0x550 mm/kasan/report.c:488
kasan_report+0x143/0x180 mm/kasan/report.c:601
kasan_check_range+0x282/0x290 mm/kasan/generic.c:189
__asan_memcpy+0x40/0x70 mm/kasan/shadow.c:106
ext4_insert_dentry+0x36a/0x6d0 fs/ext4/namei.c:2109
add_dirent_to_buf+0x3d9/0x750 fs/ext4/namei.c:2154
make_indexed_dir+0xf98/0x1600 fs/ext4/namei.c:2351
ext4_add_entry+0x222a/0x25d0 fs/ext4/namei.c:2455
ext4_add_nondir+0x8d/0x290 fs/ext4/namei.c:2796
ext4_symlink+0x920/0xb50 fs/ext4/namei.c:3431
vfs_symlink+0x137/0x2e0 fs/namei.c:4615
do_symlinkat+0x222/0x3a0 fs/namei.c:4641
__do_sys_symlink fs/namei.c:4662 [inline]
__se_sys_symlink fs/namei.c:4660 [inline]
__x64_sys_symlink+0x7a/0x90 fs/namei.c:4660
do_syscall_x64 arch/x86/entry/common.c:52 [inline]
do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
entry_SYSCALL_64_after_hwframe+0x77/0x7f
</TASK>
The following loop is located right above 'if' statement.
for (i = count-1; i >= 0; i--) {
/* is more than half of this entry in 2nd half of the block? */
if (size + map[i].size/2 > blocksize/2)
break;
size += map[i].size;
move++;
}
'i' in this case could go down to -1, in which case sum of active entries
wouldn't exceed half the block size, but previous behaviour would also do
split in half if sum would exceed at the very last block, which in case of
having too many long name files in a single block could lead to
out-of-bounds access and following use-after-free.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Cc: stable(a)vger.kernel.org
Fixes: 5872331b3d91 ("ext4: fix potential negative array index in do_split()")
Signed-off-by: Artem Sadovnikov <a.sadovnikov(a)ispras.ru>
---
fs/ext4/namei.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index cb5cb33b1d91..e9712e64ec8f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1971,7 +1971,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
* split it in half by count; each resulting block will have at least
* half the space free.
*/
- if (i > 0)
+ if (i >= 0)
split = count - move;
else
split = count/2;
--
2.43.0
Getting / Setting the frame interval using the V4L2 subdev pad ops
get_frame_interval/set_frame_interval causes a deadlock, as the
subdev state is locked in the [1] but also in the driver itself.
In [2] it's described that the caller is responsible to acquire and
release the lock in this case. Therefore, acquiring the lock in the
driver is wrong.
Remove the lock acquisitions/releases from mt9m114_ifp_get_frame_interval()
and mt9m114_ifp_set_frame_interval().
[1] drivers/media/v4l2-core/v4l2-subdev.c - line 1129
[2] Documentation/driver-api/media/v4l2-subdev.rst
Fixes: 24d756e914fc ("media: i2c: Add driver for onsemi MT9M114 camera sensor")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mathis Foerst <mathis.foerst(a)mt.com>
---
drivers/media/i2c/mt9m114.c | 8 --------
1 file changed, 8 deletions(-)
diff --git a/drivers/media/i2c/mt9m114.c b/drivers/media/i2c/mt9m114.c
index 65b9124e464f..79c97ab19be9 100644
--- a/drivers/media/i2c/mt9m114.c
+++ b/drivers/media/i2c/mt9m114.c
@@ -1644,13 +1644,9 @@ static int mt9m114_ifp_get_frame_interval(struct v4l2_subdev *sd,
if (interval->which != V4L2_SUBDEV_FORMAT_ACTIVE)
return -EINVAL;
- mutex_lock(sensor->ifp.hdl.lock);
-
ival->numerator = 1;
ival->denominator = sensor->ifp.frame_rate;
- mutex_unlock(sensor->ifp.hdl.lock);
-
return 0;
}
@@ -1669,8 +1665,6 @@ static int mt9m114_ifp_set_frame_interval(struct v4l2_subdev *sd,
if (interval->which != V4L2_SUBDEV_FORMAT_ACTIVE)
return -EINVAL;
- mutex_lock(sensor->ifp.hdl.lock);
-
if (ival->numerator != 0 && ival->denominator != 0)
sensor->ifp.frame_rate = min_t(unsigned int,
ival->denominator / ival->numerator,
@@ -1684,8 +1678,6 @@ static int mt9m114_ifp_set_frame_interval(struct v4l2_subdev *sd,
if (sensor->streaming)
ret = mt9m114_set_frame_rate(sensor);
- mutex_unlock(sensor->ifp.hdl.lock);
-
return ret;
}
--
2.34.1
From: James Smart <jsmart2021(a)gmail.com>
[ Upstream commit 577a942df3de2666f6947bdd3a5c9e8d30073424 ]
If lpfc_issue_els_flogi() fails and returns non-zero status, the node
reference count is decremented to trigger the release of the nodelist
structure. However, if there is a prior registration or dev-loss-evt work
pending, the node may be released prematurely. When dev-loss-evt
completes, the released node is referenced causing a use-after-free null
pointer dereference.
Similarly, when processing non-zero ELS PLOGI completion status in
lpfc_cmpl_els_plogi(), the ndlp flags are checked for a transport
registration before triggering node removal. If dev-loss-evt work is
pending, the node may be released prematurely and a subsequent call to
lpfc_dev_loss_tmo_handler() results in a use after free ndlp dereference.
Add test for pending dev-loss before decrementing the node reference count
for FLOGI, PLOGI, PRLI, and ADISC handling.
Link: https://lore.kernel.org/r/20220412222008.126521-9-jsmart2021@gmail.com
Co-developed-by: Justin Tee <justin.tee(a)broadcom.com>
Signed-off-by: Justin Tee <justin.tee(a)broadcom.com>
Signed-off-by: James Smart <jsmart2021(a)gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
[Minor context change fixed.]
Signed-off-by: Bin Lan <bin.lan.cn(a)windriver.com>
Signed-off-by: He Zhe <zhe.he(a)windriver.com>
---
Build test passed.
---
drivers/scsi/lpfc/lpfc_els.c | 51 +++++++++++++++++++++++++-----------
1 file changed, 35 insertions(+), 16 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 5f44a0763f37..134d56bd00da 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -1517,10 +1517,13 @@ lpfc_initial_flogi(struct lpfc_vport *vport)
}
if (lpfc_issue_els_flogi(vport, ndlp, 0)) {
- /* This decrement of reference count to node shall kick off
- * the release of the node.
+ /* A node reference should be retained while registered with a
+ * transport or dev-loss-evt work is pending.
+ * Otherwise, decrement node reference to trigger release.
*/
- lpfc_nlp_put(ndlp);
+ if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD)) &&
+ !(ndlp->nlp_flag & NLP_IN_DEV_LOSS))
+ lpfc_nlp_put(ndlp);
return 0;
}
return 1;
@@ -1563,10 +1566,13 @@ lpfc_initial_fdisc(struct lpfc_vport *vport)
}
if (lpfc_issue_els_fdisc(vport, ndlp, 0)) {
- /* decrement node reference count to trigger the release of
- * the node.
+ /* A node reference should be retained while registered with a
+ * transport or dev-loss-evt work is pending.
+ * Otherwise, decrement node reference to trigger release.
*/
- lpfc_nlp_put(ndlp);
+ if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD)) &&
+ !(ndlp->nlp_flag & NLP_IN_DEV_LOSS))
+ lpfc_nlp_put(ndlp);
return 0;
}
return 1;
@@ -1967,6 +1973,7 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
struct lpfc_dmabuf *prsp;
int disc;
struct serv_parm *sp = NULL;
+ bool release_node = false;
/* we pass cmdiocb to state machine which needs rspiocb as well */
cmdiocb->context_un.rsp_iocb = rspiocb;
@@ -2047,19 +2054,21 @@ lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
spin_unlock_irq(&ndlp->lock);
goto out;
}
- spin_unlock_irq(&ndlp->lock);
/* No PLOGI collision and the node is not registered with the
* scsi or nvme transport. It is no longer an active node. Just
* start the device remove process.
*/
if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD))) {
- spin_lock_irq(&ndlp->lock);
ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
- spin_unlock_irq(&ndlp->lock);
+ if (!(ndlp->nlp_flag & NLP_IN_DEV_LOSS))
+ release_node = true;
+ }
+ spin_unlock_irq(&ndlp->lock);
+
+ if (release_node)
lpfc_disc_state_machine(vport, ndlp, cmdiocb,
NLP_EVT_DEVICE_RM);
- }
} else {
/* Good status, call state machine */
prsp = list_entry(((struct lpfc_dmabuf *)
@@ -2269,6 +2278,7 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
struct lpfc_nodelist *ndlp;
char *mode;
u32 loglevel;
+ bool release_node = false;
/* we pass cmdiocb to state machine which needs rspiocb as well */
cmdiocb->context_un.rsp_iocb = rspiocb;
@@ -2335,14 +2345,18 @@ lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
* it is no longer an active node. Otherwise devloss
* handles the final cleanup.
*/
+ spin_lock_irq(&ndlp->lock);
if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD)) &&
!ndlp->fc4_prli_sent) {
- spin_lock_irq(&ndlp->lock);
ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
- spin_unlock_irq(&ndlp->lock);
+ if (!(ndlp->nlp_flag & NLP_IN_DEV_LOSS))
+ release_node = true;
+ }
+ spin_unlock_irq(&ndlp->lock);
+
+ if (release_node)
lpfc_disc_state_machine(vport, ndlp, cmdiocb,
NLP_EVT_DEVICE_RM);
- }
} else {
/* Good status, call state machine. However, if another
* PRLI is outstanding, don't call the state machine
@@ -2713,6 +2727,7 @@ lpfc_cmpl_els_adisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
IOCB_t *irsp;
struct lpfc_nodelist *ndlp;
int disc;
+ bool release_node = false;
/* we pass cmdiocb to state machine which needs rspiocb as well */
cmdiocb->context_un.rsp_iocb = rspiocb;
@@ -2771,13 +2786,17 @@ lpfc_cmpl_els_adisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
* transport, it is no longer an active node. Otherwise
* devloss handles the final cleanup.
*/
+ spin_lock_irq(&ndlp->lock);
if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD))) {
- spin_lock_irq(&ndlp->lock);
ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
- spin_unlock_irq(&ndlp->lock);
+ if (!(ndlp->nlp_flag & NLP_IN_DEV_LOSS))
+ release_node = true;
+ }
+ spin_unlock_irq(&ndlp->lock);
+
+ if (release_node)
lpfc_disc_state_machine(vport, ndlp, cmdiocb,
NLP_EVT_DEVICE_RM);
- }
} else
/* Good status, call state machine */
lpfc_disc_state_machine(vport, ndlp, cmdiocb,
--
2.34.1