Hi, this is your Linux kernel regression tracker.
On 03.04.22 13:22, Michele Ballabio wrote:
>
> I think I hit a regression in 5.16-stable.
> It is difficult to reproduce, and I'm not sure if it's
> still present in 5.17 and/or if it's caused by flaky hardware.
>
> The machine is a Ryzen 5 1600 with AMD graphics (RX 560).
>
> Kernels 5.16.10 do not have the following regression, 5.16.11-16
5.16.11-16 sounds like this is a distro kernel that might or might not
be patched. Or is 11-16 just meant as a range. Could you clarify?
> do. My machine would freeze completely about once a week, no oops in
> the logs, sysrq won't work either. I managed to log only the
> following (and only once) with netconsole, while running kernel 5.16.16.
> I could not reproduce the problem since.
Hmmm. Of course ideally all regressions get fixed, but that beeing said:
5.16 will likely be EOL in round about two weeks anway and getting to
the root of this problem might take some time and effort. That's why I'm
not sure myself what's the best way forward here. Maybe testing 5.17 to
see if the problem still shows up would be good; bisection would help,
but I guess that will be hard here. But I guess there is one thing that
could help: could you maybe decode the panic you have as described in
this document:
https://www.kernel.org/doc/html/latest/admin-guide/reporting-issues.html
Ciao, Thorsten
> ----------
> 4,1490,11865947234,-;ieee80211 phy0: rt2x00usb_watchdog_tx_dma: Warning - TX queue 2 DMA timed out, invoke forced reset
> SUBSYSTEM=ieee80211
> DEVICE=+ieee80211:phy0
> 4,1491,11872348272,-;ieee80211 phy0: rt2x00usb_watchdog_tx_dma: Warning - TX queue 2 DMA timed out, invoke forced reset
> SUBSYSTEM=ieee80211
> DEVICE=+ieee80211:phy0
> 0,1493,12767657117,-;traps: PANIC: double fault, error_code: 0x0
> 4,1494,12767657121,-;double fault: 0000 [#1] PREEMPT SMP NOPTI
> 4,1495,12767657123,-;CPU: 4 PID: 16786 Comm: MediaPD~der #12 Not tainted 5.16.16 #1
> 4,1496,12767657126,-;Hardware name: System manufacturer System Product Name/PRIME B350-PLUS, BIOS 4011 04/19/2018
> 4,1497,12767657127,-;RIP: 0010:entry_SYSCALL_64+0x3/0x29
> 4,1498,12767657133,-;Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc 0f 01 f8 <65> 48 89 24 25 14 60 00 00 eb 12 0f 20 dc 0f 1f 44 00 00 48 81 e4
> 4,1499,12767657134,-;RSP: 0018:00007f2a8bcbd438 EFLAGS: 00010002
> 4,1500,12767657136,-;RAX: 00000000000000ca RBX: 000000000000005d RCX: 00007f2aa45e8aab
> 4,1501,12767657138,-;RDX: 0000000000000002 RSI: 0000000000000080 RDI: 00007f2aa4400018
> 4,1502,12767657139,-;RBP: 00007f2aa4400018 R08: 0000000000000000 R09: 00007f2a8ed00000
> 4,1503,12767657140,-;R10: 0000000000000000 R11: 0000000000000282 R12: 00000000000000a8
> 4,1504,12767657141,-;R13: 0000000000000003 R14: 0000000000000030 R15: 00007f2aa4400000
> 4,1505,12767657142,-;FS: 00007f2a8bcbe640(0000) GS:ffff8b110ed00000(0000) knlGS:0000000000000000
> 4,1506,12767657143,-;CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> 4,1507,12767657144,-;CR2: 00007f2a8bcbd428 CR3: 00000002953f2000 CR4: 00000000003506e0
> 4,1508,12767657146,-;Call Trace:
> 4,1509,12767657146,-,ncfrag=0/986;Modules linked in: nfnetlink_queue nfnetlink_log nfnetlink bluetooth ecdh_generic ecc netconsole uas usb_storage snd_seq_dummy snd_hrtimer snd_seq snd_seq_device iptable_filter xt_tcpudp ip_tables
> x_tables hwmon_vid 8021q garp mrp stp llc ipv6 fuse rt73usb rt2x00usb rt2x00lib mac80211 hid_logitech cfg80211 joydev hid_generic usbhid hid amdgpu intel_rapl_msr iommu_v2 intel_rapl_common gpu_sched eeepc_wmi asus_wmi drm_ttm_helper
> ttm platform_profile battery drm_kms_helper sparse_keymap edac_mce_amd rfkill drm kvm_amd snd_hda_codec_realtek video snd_hda_codec_generic ledtrig_audio kvm snd_hda_codec_hdmi snd_hda_intel agpgart snd_intel_dspcfg snd_intel_sdw_acpi
> wmi_bmof snd_hda_codec evdev i2c_algo_bit snd_hda_core fb_sys_fops syscopyarea sysfillrect sysimgblt snd_hwdep mfd_core snd_pcm r8169 irqbypass snd_timer realtek snd xhci_pci xhci_pci_renesas xhci_hcd mdio_devres crct10dif_pclmul
> crc32_pclmul i2c_piix4 soundcore ccp libphy ghash_clmulni_intel i2c_co4,1509,12767657146,-,ncfrag=966/986;re rapl k10temp wmi
> 4,1510,12767657189,c; acpi_cpufreq gpio_amdpt button gpio_generic loop [last unloaded: netconsole]
> 4,1511,12767657207,-;------------[ cut here ]------------
> 4,1512,12767657207,-;WARNING: CPU: 4 PID: 16786 at kernel/softirq.c:362 __local_bh_enable_ip+0x43/0x70
> 4,1513,12767657212,-,ncfrag=0/986;Modules linked in: nfnetlink_queue nfnetlink_log nfnetlink bluetooth ecdh_generic ecc netconsole uas usb_storage snd_seq_dummy snd_hrtimer snd_seq snd_seq_device iptable_filter xt_tcpudp ip_tables
> x_tables hwmon_vid 8021q garp mrp stp llc ipv6 fuse rt73usb rt2x00usb rt2x00lib mac80211 hid_logitech cfg80211 joydev hid_generic usbhid hid amdgpu intel_rapl_msr iommu_v2 intel_rapl_common gpu_sched eeepc_wmi asus_wmi drm_ttm_helper
> ttm platform_profile battery drm_kms_helper sparse_keymap edac_mce_amd rfkill drm kvm_amd snd_hda_codec_realtek video snd_hda_codec_generic ledtrig_audio kvm snd_hda_codec_hdmi snd_hda_intel agpgart snd_intel_dspcfg snd_intel_sdw_acpi
> wmi_bmof snd_hda_codec evdev i2c_algo_bit snd_hda_core fb_sys_fops syscopyarea sysfillrect sysimgblt snd_hwdep mfd_core snd_pcm r8169 irqbypass snd_timer realtek snd xhci_pci xhci_pci_renesas xhci_hcd mdio_devres crct10dif_pclmul
> crc32_pclmul i2c_piix4 soundcore ccp libphy ghash_clmulni_intel i2c_co4,1513,12767657212,-,ncfrag=966/986;re rapl k10temp wmi
> 4,1514,12767657248,c; acpi_cpufreq gpio_amdpt button gpio_generic loop [last unloaded: netconsole]
> 4,1515,12767657252,-;CPU: 4 PID: 16786 Comm: MediaPD~der #12 Not tainted 5.16.16 #1
> 4,1516,12767657254,-;Hardware name: System manufacturer System Product Name/PRIME B350-PLUS, BIOS 4011 04/19/2018
> 4,1517,12767657255,-;RIP: 0010:__local_bh_enable_ip+0x43/0x70
> 4,1518,12767657257,-;Code: 01 35 61 1d f3 7d 65 8b 05 5a 1d f3 7d a9 00 ff ff 00 74 1a bf 01 00 00 00 e8 99 b5 02 00 65 8b 05 42 1d f3 7d 85 c0 74 25 c3 <0f> 0b eb cc 48 c7 c7 d9 53 42 83 e8 4d ec a6 00 65 66 8b 05 25 19
> 4,1519,12767657259,-;RSP: 0018:fffffe00000f69a0 EFLAGS: 00010006
> 4,1520,12767657260,-;RAX: 0000000080110203 RBX: ffff8b0e05bd2000 RCX: ffff8b0e05bd2000
> 4,1521,12767657261,-;RDX: ffff8b0e0ac28000 RSI: 0000000000000201 RDI: ffffffffc12f12c3
> 4,1522,12767657262,-;RBP: ffff8b0e0c977a30 R08: fffffe00000f69e8 R09: ffff8b0e0d085000
> 4,1523,12767657263,-;R10: ffff8b0e03234300 R11: 0000000000000fff R12: ffff8b0e0d0850d0
> 4,1524,12767657264,-;R13: fffffe00000f69e8 R14: ffff8b0e0ddfc980 R15: ffff8b0e0d085a58
> 4,1525,12767657265,-;FS: 00007f2a8bcbe640(0000) GS:ffff8b110ed00000(0000) knlGS:0000000000000000
> 4,1526,12767657266,-;CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> ----------
>
>
The following commit has been merged into the x86/urgent branch of tip:
Commit-ID: d39268ad24c0fd0665d0c5cf55a7c1a0ebf94766
Gitweb: https://git.kernel.org/tip/d39268ad24c0fd0665d0c5cf55a7c1a0ebf94766
Author: Dave Hansen <dave.hansen(a)linux.intel.com>
AuthorDate: Fri, 18 Mar 2022 06:52:59 -07:00
Committer: Borislav Petkov <bp(a)suse.de>
CommitterDate: Mon, 04 Apr 2022 19:41:36 +02:00
x86/mm/tlb: Revert retpoline avoidance approach
0day reported a regression on a microbenchmark which is intended to
stress the TLB flushing path:
https://lore.kernel.org/all/20220317090415.GE735@xsang-OptiPlex-9020/
It pointed at a commit from Nadav which intended to remove retpoline
overhead in the TLB flushing path by taking the 'cond'-ition in
on_each_cpu_cond_mask(), pre-calculating it, and incorporating it into
'cpumask'. That allowed the code to use a bunch of earlier direct
calls instead of later indirect calls that need a retpoline.
But, in practice, threads can go idle (and into lazy TLB mode where
they don't need to flush their TLB) between the early and late calls.
It works in this direction and not in the other because TLB-flushing
threads tend to hold mmap_lock for write. Contention on that lock
causes threads to _go_ idle right in this early/late window.
There was not any performance data in the original commit specific
to the retpoline overhead. I did a few tests on a system with
retpolines:
https://lore.kernel.org/all/dd8be93c-ded6-b962-50d4-96b1c3afb2b7@intel.com/
which showed a possible small win. But, that small win pales in
comparison with the bigger loss induced on non-retpoline systems.
Revert the patch that removed the retpolines. This was not a
clean revert, but it was self-contained enough not to be too painful.
Fixes: 6035152d8eeb ("x86/mm/tlb: Open-code on_each_cpu_cond_mask() for tlb_is_not_lazy()")
Reported-by: kernel test robot <oliver.sang(a)intel.com>
Signed-off-by: Dave Hansen <dave.hansen(a)linux.intel.com>
Signed-off-by: Borislav Petkov <bp(a)suse.de>
Acked-by: Nadav Amit <namit(a)vmware.com>
Cc: <stable(a)vger.kernel.org>
Link: https://lkml.kernel.org/r/164874672286.389.7021457716635788197.tip-bot2@tip…
---
arch/x86/mm/tlb.c | 37 +++++--------------------------------
1 file changed, 5 insertions(+), 32 deletions(-)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6eb4d91..d400b6d 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -855,13 +855,11 @@ done:
nr_invalidate);
}
-static bool tlb_is_not_lazy(int cpu)
+static bool tlb_is_not_lazy(int cpu, void *data)
{
return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
}
-static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
-
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
EXPORT_PER_CPU_SYMBOL(cpu_tlbstate_shared);
@@ -890,36 +888,11 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
* up on the new contents of what used to be page tables, while
* doing a speculative memory access.
*/
- if (info->freed_tables) {
+ if (info->freed_tables)
on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
- } else {
- /*
- * Although we could have used on_each_cpu_cond_mask(),
- * open-coding it has performance advantages, as it eliminates
- * the need for indirect calls or retpolines. In addition, it
- * allows to use a designated cpumask for evaluating the
- * condition, instead of allocating one.
- *
- * This code works under the assumption that there are no nested
- * TLB flushes, an assumption that is already made in
- * flush_tlb_mm_range().
- *
- * cond_cpumask is logically a stack-local variable, but it is
- * more efficient to have it off the stack and not to allocate
- * it on demand. Preemption is disabled and this code is
- * non-reentrant.
- */
- struct cpumask *cond_cpumask = this_cpu_ptr(&flush_tlb_mask);
- int cpu;
-
- cpumask_clear(cond_cpumask);
-
- for_each_cpu(cpu, cpumask) {
- if (tlb_is_not_lazy(cpu))
- __cpumask_set_cpu(cpu, cond_cpumask);
- }
- on_each_cpu_mask(cond_cpumask, flush_tlb_func, (void *)info, true);
- }
+ else
+ on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
+ (void *)info, 1, cpumask);
}
void flush_tlb_multi(const struct cpumask *cpumask,