For some odd reason 5.10 kernel series doesn't compile with a newer
toolchain since 2025-02-09:
2025-02-09T17:32:07.7991299Z GEN .version
2025-02-09T17:32:07.8270062Z CHK include/generated/compile.h
2025-02-09T17:32:07.8540777Z LD vmlinux.o
2025-02-09T17:32:11.7210899Z MODPOST vmlinux.symvers
2025-02-09T17:32:12.0869599Z MODINFO modules.builtin.modinfo
2025-02-09T17:32:12.1403022Z GEN modules.builtin
2025-02-09T17:32:12.1475659Z LD .tmp_vmlinux.btf
2025-02-09T17:32:19.6117204Z BTF .btf.vmlinux.bin.o
2025-02-09T17:32:31.2916650Z LD .tmp_vmlinux.kallsyms1
2025-02-09T17:32:34.8731104Z KSYMS .tmp_vmlinux.kallsyms1.S
2025-02-09T17:32:35.4910608Z AS .tmp_vmlinux.kallsyms1.o
2025-02-09T17:32:35.9662538Z LD .tmp_vmlinux.kallsyms2
2025-02-09T17:32:39.2595984Z KSYMS .tmp_vmlinux.kallsyms2.S
2025-02-09T17:32:39.8802028Z AS .tmp_vmlinux.kallsyms2.o
2025-02-09T17:32:40.3659440Z LD vmlinux
2025-02-09T17:32:48.0031558Z BTFIDS vmlinux
2025-02-09T17:32:48.0143553Z FAILED unresolved symbol filp_close
2025-02-09T17:32:48.5019928Z make: *** [Makefile:1207: vmlinux] Error 255
2025-02-09T17:32:48.5061241Z ==> ERROR: A failure occurred in build().
5.10.234 built fine couple of days ago with the older one. There were
slight changes made. 5.4 and 5.15 still compile.
Wonder what might be missing here ...
--
Best, Philip
Users of the Lenovo ThinkPad X13s have reported that Wi-Fi sometimes
breaks and the log fills up with errors like:
ath11k_pci 0006:01:00.0: HTC Rx: insufficient length, got 1484, expected 1492
ath11k_pci 0006:01:00.0: HTC Rx: insufficient length, got 1460, expected 1484
which based on a quick look at the ath11k driver seemed to indicate some
kind of ring-buffer corruption.
Miaoqing Pan tracked it down to the host seeing the updated destination
ring head pointer before the updated descriptor, and the error handling
for that in turn leaves the ring buffer in an inconsistent state.
While this has not yet been observed with ath12k, the ring-buffer
implementation is very similar to the ath11k one and it suffers from the
same bugs.
Add the missing memory barrier to make sure that the descriptor is read
after the head pointer to address the root cause of the corruption while
fixing up the error handling in case there are ever any (ordering) bugs
on the device side.
Note that the READ_ONCE() are only needed to avoid compiler mischief in
case the ring-buffer helpers are ever inlined.
Tested-on: WCN7850 hw2.0 WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3
Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices")
Cc: stable(a)vger.kernel.org # 6.3
Link: https://bugzilla.kernel.org/show_bug.cgi?id=218623
Link: https://lore.kernel.org/20250310010217.3845141-3-quic_miaoqing@quicinc.com
Cc: Miaoqing Pan <quic_miaoqing(a)quicinc.com>
Signed-off-by: Johan Hovold <johan+linaro(a)kernel.org>
---
drivers/net/wireless/ath/ath12k/ce.c | 11 +++++------
drivers/net/wireless/ath/ath12k/hal.c | 4 ++--
2 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/drivers/net/wireless/ath/ath12k/ce.c b/drivers/net/wireless/ath/ath12k/ce.c
index be0d669d31fc..740586fe49d1 100644
--- a/drivers/net/wireless/ath/ath12k/ce.c
+++ b/drivers/net/wireless/ath/ath12k/ce.c
@@ -343,11 +343,10 @@ static int ath12k_ce_completed_recv_next(struct ath12k_ce_pipe *pipe,
goto err;
}
+ /* Make sure descriptor is read after the head pointer. */
+ dma_rmb();
+
*nbytes = ath12k_hal_ce_dst_status_get_length(desc);
- if (*nbytes == 0) {
- ret = -EIO;
- goto err;
- }
*skb = pipe->dest_ring->skb[sw_index];
pipe->dest_ring->skb[sw_index] = NULL;
@@ -380,8 +379,8 @@ static void ath12k_ce_recv_process_cb(struct ath12k_ce_pipe *pipe)
dma_unmap_single(ab->dev, ATH12K_SKB_RXCB(skb)->paddr,
max_nbytes, DMA_FROM_DEVICE);
- if (unlikely(max_nbytes < nbytes)) {
- ath12k_warn(ab, "rxed more than expected (nbytes %d, max %d)",
+ if (unlikely(max_nbytes < nbytes || nbytes == 0)) {
+ ath12k_warn(ab, "unexpected rx length (nbytes %d, max %d)",
nbytes, max_nbytes);
dev_kfree_skb_any(skb);
continue;
diff --git a/drivers/net/wireless/ath/ath12k/hal.c b/drivers/net/wireless/ath/ath12k/hal.c
index cd59ff8e6c7b..91d5126ca149 100644
--- a/drivers/net/wireless/ath/ath12k/hal.c
+++ b/drivers/net/wireless/ath/ath12k/hal.c
@@ -1962,7 +1962,7 @@ u32 ath12k_hal_ce_dst_status_get_length(struct hal_ce_srng_dst_status_desc *desc
{
u32 len;
- len = le32_get_bits(desc->flags, HAL_CE_DST_STATUS_DESC_FLAGS_LEN);
+ len = le32_get_bits(READ_ONCE(desc->flags), HAL_CE_DST_STATUS_DESC_FLAGS_LEN);
desc->flags &= ~cpu_to_le32(HAL_CE_DST_STATUS_DESC_FLAGS_LEN);
return len;
@@ -2132,7 +2132,7 @@ void ath12k_hal_srng_access_begin(struct ath12k_base *ab, struct hal_srng *srng)
srng->u.src_ring.cached_tp =
*(volatile u32 *)srng->u.src_ring.tp_addr;
else
- srng->u.dst_ring.cached_hp = *srng->u.dst_ring.hp_addr;
+ srng->u.dst_ring.cached_hp = READ_ONCE(*srng->u.dst_ring.hp_addr);
}
/* Update cached ring head/tail pointers to HW. ath12k_hal_srng_access_begin()
--
2.48.1
When dwc3_gadget_soft_disconnect() fails, dwc3_suspend_common() keeps
going with the suspend, resulting in a period where the power domain is
off, but the gadget driver remains connected. Within this time frame,
invoking vbus_event_work() will cause an error as it attempts to access
DWC3 registers for endpoint disabling after the power domain has been
completely shut down.
Abort the suspend sequence when dwc3_gadget_suspend() cannot halt the
controller and proceeds with a soft connect.
Fixes: 9f8a67b65a49 ("usb: dwc3: gadget: fix gadget suspend/resume")
CC: stable(a)vger.kernel.org
Signed-off-by: Kuen-Han Tsai <khtsai(a)google.com>
---
Kernel panic - not syncing: Asynchronous SError Interrupt
Workqueue: events vbus_event_work
Call trace:
dump_backtrace+0xf4/0x118
show_stack+0x18/0x24
dump_stack_lvl+0x60/0x7c
dump_stack+0x18/0x3c
panic+0x16c/0x390
nmi_panic+0xa4/0xa8
arm64_serror_panic+0x6c/0x94
do_serror+0xc4/0xd0
el1h_64_error_handler+0x34/0x48
el1h_64_error+0x68/0x6c
readl+0x4c/0x8c
__dwc3_gadget_ep_disable+0x48/0x230
dwc3_gadget_ep_disable+0x50/0xc0
usb_ep_disable+0x44/0xe4
ffs_func_eps_disable+0x64/0xc8
ffs_func_set_alt+0x74/0x368
ffs_func_disable+0x18/0x28
composite_disconnect+0x90/0xec
configfs_composite_disconnect+0x64/0x88
usb_gadget_disconnect_locked+0xc0/0x168
vbus_event_work+0x3c/0x58
process_one_work+0x1e4/0x43c
worker_thread+0x25c/0x430
kthread+0x104/0x1d4
ret_from_fork+0x10/0x20
---
Changelog:
v4:
- correct the mistake where semicolon was forgotten
- return -EAGAIN upon dwc3_gadget_suspend() failure
v3:
- change the Fixes tag
v2:
- move declarations in separate lines
- add the Fixes tag
---
drivers/usb/dwc3/core.c | 9 +++++++--
drivers/usb/dwc3/gadget.c | 22 +++++++++-------------
2 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 66a08b527165..f36bc933c55b 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -2388,6 +2388,7 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
{
u32 reg;
int i;
+ int ret;
if (!pm_runtime_suspended(dwc->dev) && !PMSG_IS_AUTO(msg)) {
dwc->susphy_state = (dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)) &
@@ -2406,7 +2407,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
case DWC3_GCTL_PRTCAP_DEVICE:
if (pm_runtime_suspended(dwc->dev))
break;
- dwc3_gadget_suspend(dwc);
+ ret = dwc3_gadget_suspend(dwc);
+ if (ret)
+ return ret;
synchronize_irq(dwc->irq_gadget);
dwc3_core_exit(dwc);
break;
@@ -2441,7 +2444,9 @@ static int dwc3_suspend_common(struct dwc3 *dwc, pm_message_t msg)
break;
if (dwc->current_otg_role == DWC3_OTG_ROLE_DEVICE) {
- dwc3_gadget_suspend(dwc);
+ ret = dwc3_gadget_suspend(dwc);
+ if (ret)
+ return ret;
synchronize_irq(dwc->irq_gadget);
}
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 89a4dc8ebf94..630fd5f0ce97 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4776,26 +4776,22 @@ int dwc3_gadget_suspend(struct dwc3 *dwc)
int ret;
ret = dwc3_gadget_soft_disconnect(dwc);
- if (ret)
- goto err;
-
- spin_lock_irqsave(&dwc->lock, flags);
- if (dwc->gadget_driver)
- dwc3_disconnect_gadget(dwc);
- spin_unlock_irqrestore(&dwc->lock, flags);
-
- return 0;
-
-err:
/*
* Attempt to reset the controller's state. Likely no
* communication can be established until the host
* performs a port reset.
*/
- if (dwc->softconnect)
+ if (ret && dwc->softconnect) {
dwc3_gadget_soft_connect(dwc);
+ return -EAGAIN;
+ }
- return ret;
+ spin_lock_irqsave(&dwc->lock, flags);
+ if (dwc->gadget_driver)
+ dwc3_disconnect_gadget(dwc);
+ spin_unlock_irqrestore(&dwc->lock, flags);
+
+ return 0;
}
int dwc3_gadget_resume(struct dwc3 *dwc)
--
2.49.0.604.gff1f9ca942-goog
This series introduces a new metadata format for UVC cameras and adds a
couple of improvements to the UVC metadata handling.
Signed-off-by: Ricardo Ribalda <ribalda(a)chromium.org>
---
Changes in v5:
- Fix codestyle and kerneldoc warnings reported by media-ci
- Link to v4: https://lore.kernel.org/r/20250403-uvc-meta-v4-0-877aa6475975@chromium.org
Changes in v4:
- Rename format to V4L2_META_FMT_UVC_MSXU_1_5 (Thanks Mauro)
- Flag the new format with a quirk.
- Autodetect MSXU devices.
- Link to v3: https://lore.kernel.org/linux-media/20250313-uvc-metadata-v3-0-c467af869c60…
Changes in v3:
- Fix doc syntax errors.
- Link to v2: https://lore.kernel.org/r/20250306-uvc-metadata-v2-0-7e939857cad5@chromium.…
Changes in v2:
- Add metadata invalid fix
- Move doc note to a separate patch
- Introuce V4L2_META_FMT_UVC_CUSTOM (thanks HdG!).
- Link to v1: https://lore.kernel.org/r/20250226-uvc-metadata-v1-1-6cd6fe5ec2cb@chromium.…
---
Ricardo Ribalda (4):
media: uvcvideo: Do not mark valid metadata as invalid
media: Documentation: Add note about UVCH length field
media: uvcvideo: Introduce V4L2_META_FMT_UVC_MSXU_1_5
media: uvcvideo: Auto-set UVC_QUIRK_MSXU_META
.../userspace-api/media/v4l/meta-formats.rst | 1 +
.../media/v4l/metafmt-uvc-msxu-1-5.rst | 23 +++++
.../userspace-api/media/v4l/metafmt-uvc.rst | 4 +-
MAINTAINERS | 1 +
drivers/media/usb/uvc/uvc_metadata.c | 97 ++++++++++++++++++++--
drivers/media/usb/uvc/uvc_video.c | 12 +--
drivers/media/usb/uvc/uvcvideo.h | 1 +
drivers/media/v4l2-core/v4l2-ioctl.c | 1 +
include/linux/usb/uvc.h | 3 +
include/uapi/linux/videodev2.h | 1 +
10 files changed, 131 insertions(+), 13 deletions(-)
---
base-commit: 4e82c87058f45e79eeaa4d5bcc3b38dd3dce7209
change-id: 20250403-uvc-meta-e556773d12ae
Best regards,
--
Ricardo Ribalda <ribalda(a)chromium.org>
Hi,
We (Ateme, a video encoding company) may have found an unwanted behavior in the scheduler since 5.10 (commit 16b0a7a1a0af), then 5.16 (commit c5b0a7eefc70), then 5.19 (commit not found yet), then maybe some other commits from 5.19 to 6.12, with a consequence of IPC decrease. Problem still appears on lasts 6.12, 6.13 and 6.14
We have reverted both 16b0a7a1a0af and c5b0a7eefc70 commits that reduce our performances (see fair.patch attached, applicable on 6.12.17). Performances increase but still doesnt reach our reference on 5.4.152.
Instead of trying to find every single commits from 5.18 to 6.12 that could decrease our performance, I chosed to bench 5.4.152 versus 6.12.17 with and without fair.patch.
The problem appeared clear : a lot of CPU migrations go out of CCX, then L3 miss, then IPC decrease.
Context of our bench: video decoder which work at a regulated speed, 1 process, 21 main threads, everyone of them creates 10 threads, 8 of them have a fine granularity, meaning they go to sleep quite often, giving the scheduler a lot of opportunities to act).
Hardware is an AMD Epyc 7702P, 128 cores, grouped by shared LLC 4 cores +4 hyperthreaded cores. NUMA topology is set by the BIOS to 1 node per socket.
Every pthread are created with default attributes.
I use AMDuProf (-C -A system -a -m ipc,l1,l2,l3,memory) for CPU Utilization (%), CPU effective freq, IPC, L2 access (pti), L2 miss (pti), L3 miss (absolute) and Mem (GB/s, and perf (stat -d -d -d -a) for Context switches, CPU migrations and Real time (s).
We noted that upgrade 5.4.152 to 6.12.17 without any special preempt configuration :
Two fold increase in CPU migration
30% memory bandwidth increase
20% L3 cache misses increase
10% IPC decrease
With the attached fair.patch applied to 6.12.17 (reminder : this patch reverts one commit appeared in 5.10 and another in 5.16) we managed to reduce CPU migrations and increase IPC but not as much as we had on 5.4.152. Our goal is to keep kernel "clean" without any patch (we don't want to apply and maintain fair.patch) then for the rest of my email we will consider stock kernel 6.12.17.
I've reduced the "sub threads count" to stays below 128 threads. Then still 21 main threads and instead of 10 worker per main thread I set 5 workers (4 of them with fine granularity) giving 105 pthreads -> everything goes fine in 6.12.17, no extra CPU migration, no extra memory bandwidth...
But as soon as we increase worker threads count (10 instead of 5) the problem appears.
We know our decoder may have too many threads but that's out of our scope, it has been designed like that some years ago and moving from "lot of small threads to few of big thread" is for now not possible.
We have a work around : we group threads using pthread affinities. Every main thread (and by inheritance of affinities every worker threads) on a single CCX so we reduce the L3 miss for them, then decrease memory bandwidth, then finally increasing IPC.
With that solution, we go above our original performances, for both kernels, and they perform at the same level. However, it is impractical to productize as such.
I've tried many kernel build configurations (CONFIG_PREMPT_*, CONFIG_SCHEDULER_*, tuning of fair.c:sysctl_sched_migration_cost) on 6.12.17, 6.12.21 (longterm), 6.13.9 (mainline), and 6.14.0 Nothing changes.
Q: Is there anyway to tune the kernel so we can get our performance back without using the pthread affinities work around ?
Feel free to ask an archive containing binaries and payload.
I first posted on https://bugzilla.kernel.org/show_bug.cgi?id=220000 but one told me the best way to get answers where these mailing lists
Regards,
Jean-Baptiste Roquefere, Ateme
Attached bench.tar.gz :
* bench/fair.patch
* bench/bench.ods with 2 sheets :
- regulated : decoder speed is regulated to keep real time constant
- no regul : decoder speed is not regulated and uses from 1 to 76 main threads with 10 worker per main thread
* bench/regulated.csv : bench.ods:regulated exported in csv format
* bench/not-regulated : bench.ods:no regul exported in csv format
The xHC resources allocated for USB devices are not released in correct
order after resuming in case when while suspend device was reconnected.
This issue has been detected during the fallowing scenario:
- connect hub HS to root port
- connect LS/FS device to hub port
- wait for enumeration to finish
- force host to suspend
- reconnect hub attached to root port
- wake host
For this scenario during enumeration of USB LS/FS device the Cadence xHC
reports completion error code for xHC commands because the xHC resources
used for devices has not been properly released.
XHCI specification doesn't mention that device can be reset in any order
so, we should not treat this issue as Cadence xHC controller bug.
Similar as during disconnecting in this case the device resources should
be cleared starting form the last usb device in tree toward the root hub.
To fix this issue usbcore driver should call hcd->driver->reset_device
for all USB devices connected to hub which was reconnected while
suspending.
Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver")
cc: <stable(a)vger.kernel.org>
Signed-off-by: Pawel Laszczak <pawell(a)cadence.com>
---
Changelog:
v3:
- Changed patch title
- Corrected typo
- Moved hub_hc_release_resources above mutex_lock(hcd->address0_mutex)
v2:
- Replaced disconnection procedure with releasing only the xHC resources
drivers/usb/core/hub.c | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index a76bb50b6202..dcba4281ea48 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -6065,6 +6065,36 @@ void usb_hub_cleanup(void)
usb_deregister(&hub_driver);
} /* usb_hub_cleanup() */
+/**
+ * hub_hc_release_resources - clear resources used by host controller
+ * @udev: pointer to device being released
+ *
+ * Context: task context, might sleep
+ *
+ * Function releases the host controller resources in correct order before
+ * making any operation on resuming usb device. The host controller resources
+ * allocated for devices in tree should be released starting from the last
+ * usb device in tree toward the root hub. This function is used only during
+ * resuming device when usb device require reinitialization – that is, when
+ * flag udev->reset_resume is set.
+ *
+ * This call is synchronous, and may not be used in an interrupt context.
+ */
+static void hub_hc_release_resources(struct usb_device *udev)
+{
+ struct usb_hub *hub = usb_hub_to_struct_hub(udev);
+ struct usb_hcd *hcd = bus_to_hcd(udev->bus);
+ int i;
+
+ /* Release up resources for all children before this device */
+ for (i = 0; i < udev->maxchild; i++)
+ if (hub->ports[i]->child)
+ hub_hc_release_resources(hub->ports[i]->child);
+
+ if (hcd->driver->reset_device)
+ hcd->driver->reset_device(hcd, udev);
+}
+
/**
* usb_reset_and_verify_device - perform a USB port reset to reinitialize a device
* @udev: device to reset (not in SUSPENDED or NOTATTACHED state)
@@ -6129,6 +6159,9 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
bos = udev->bos;
udev->bos = NULL;
+ if (udev->reset_resume)
+ hub_hc_release_resources(udev);
+
mutex_lock(hcd->address0_mutex);
for (i = 0; i < PORT_INIT_TRIES; ++i) {
--
2.43.0