The patch titled
Subject: mm,swapops: update check in is_pfn_swap_entry for hwpoison entries
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mmswapops-update-check-in-is_pfn_swap_entry-for-hwpoison-entries.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Oscar Salvador <osalvador(a)suse.de>
Subject: mm,swapops: update check in is_pfn_swap_entry for hwpoison entries
Date: Sun, 7 Apr 2024 15:05:37 +0200
Tony reported that the Machine check recovery was broken in v6.9-rc1, as
he was hitting a VM_BUG_ON when injecting uncorrectable memory errors to
DRAM.
After some more digging and debugging on his side, he realized that this
went back to v6.1, with the introduction of 'commit 0d206b5d2e0d
("mm/swap: add swp_offset_pfn() to fetch PFN from swap entry")'. That
commit, among other things, introduced swp_offset_pfn(), replacing
hwpoison_entry_to_pfn() in its favour.
The patch also introduced a VM_BUG_ON() check for is_pfn_swap_entry(), but
is_pfn_swap_entry() never got updated to cover hwpoison entries, which
means that we would hit the VM_BUG_ON whenever we would call
swp_offset_pfn() for such entries on environments with CONFIG_DEBUG_VM
set. Fix this by updating the check to cover hwpoison entries as well,
and update the comment while we are it.
Link: https://lkml.kernel.org/r/20240407130537.16977-1-osalvador@suse.de
Fixes: 0d206b5d2e0d ("mm/swap: add swp_offset_pfn() to fetch PFN from swap entry")
Signed-off-by: Oscar Salvador <osalvador(a)suse.de>
Reported-by: Tony Luck <tony.luck(a)intel.com>
Closes: https://lore.kernel.org/all/Zg8kLSl2yAlA3o5D@agluck-desk3/
Tested-by: Tony Luck <tony.luck(a)intel.com>
Reviewed-by: Peter Xu <peterx(a)redhat.com>
Reviewed-by: David Hildenbrand <david(a)redhat.com>
Acked-by: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: <stable(a)vger.kernel.org> [6.1.x]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/swapops.h | 65 +++++++++++++++++++-------------------
1 file changed, 33 insertions(+), 32 deletions(-)
--- a/include/linux/swapops.h~mmswapops-update-check-in-is_pfn_swap_entry-for-hwpoison-entries
+++ a/include/linux/swapops.h
@@ -390,6 +390,35 @@ static inline bool is_migration_entry_di
}
#endif /* CONFIG_MIGRATION */
+#ifdef CONFIG_MEMORY_FAILURE
+
+/*
+ * Support for hardware poisoned pages
+ */
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+ return swp_entry(SWP_HWPOISON, page_to_pfn(page));
+}
+
+static inline int is_hwpoison_entry(swp_entry_t entry)
+{
+ return swp_type(entry) == SWP_HWPOISON;
+}
+
+#else
+
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+ return swp_entry(0, 0);
+}
+
+static inline int is_hwpoison_entry(swp_entry_t swp)
+{
+ return 0;
+}
+#endif
+
typedef unsigned long pte_marker;
#define PTE_MARKER_UFFD_WP BIT(0)
@@ -483,8 +512,9 @@ static inline struct folio *pfn_swap_ent
/*
* A pfn swap entry is a special type of swap entry that always has a pfn stored
- * in the swap offset. They are used to represent unaddressable device memory
- * and to restrict access to a page undergoing migration.
+ * in the swap offset. They can either be used to represent unaddressable device
+ * memory, to restrict access to a page undergoing migration or to represent a
+ * pfn which has been hwpoisoned and unmapped.
*/
static inline bool is_pfn_swap_entry(swp_entry_t entry)
{
@@ -492,7 +522,7 @@ static inline bool is_pfn_swap_entry(swp
BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
return is_migration_entry(entry) || is_device_private_entry(entry) ||
- is_device_exclusive_entry(entry);
+ is_device_exclusive_entry(entry) || is_hwpoison_entry(entry);
}
struct page_vma_mapped_walk;
@@ -561,35 +591,6 @@ static inline int is_pmd_migration_entry
}
#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
-#ifdef CONFIG_MEMORY_FAILURE
-
-/*
- * Support for hardware poisoned pages
- */
-static inline swp_entry_t make_hwpoison_entry(struct page *page)
-{
- BUG_ON(!PageLocked(page));
- return swp_entry(SWP_HWPOISON, page_to_pfn(page));
-}
-
-static inline int is_hwpoison_entry(swp_entry_t entry)
-{
- return swp_type(entry) == SWP_HWPOISON;
-}
-
-#else
-
-static inline swp_entry_t make_hwpoison_entry(struct page *page)
-{
- return swp_entry(0, 0);
-}
-
-static inline int is_hwpoison_entry(swp_entry_t swp)
-{
- return 0;
-}
-#endif
-
static inline int non_swap_entry(swp_entry_t entry)
{
return swp_type(entry) >= MAX_SWAPFILES;
_
Patches currently in -mm which might be from osalvador(a)suse.de are
mmpage_owner-update-metadata-for-tail-pages.patch
mmpage_owner-fix-refcount-imbalance.patch
mmpage_owner-fix-accounting-of-pages-when-migrating.patch
mmpage_owner-fix-printing-of-stack-records.patch
mmswapops-update-check-in-is_pfn_swap_entry-for-hwpoison-entries.patch
The patch titled
Subject: mm/memory-failure: fix deadlock when hugetlb_optimize_vmemmap is enabled
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-memory-failure-fix-deadlock-when-hugetlb_optimize_vmemmap-is-enabled.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Miaohe Lin <linmiaohe(a)huawei.com>
Subject: mm/memory-failure: fix deadlock when hugetlb_optimize_vmemmap is enabled
Date: Sun, 7 Apr 2024 16:54:56 +0800
When I did hard offline test with hugetlb pages, below deadlock occurs:
======================================================
WARNING: possible circular locking dependency detected
6.8.0-11409-gf6cef5f8c37f #1 Not tainted
------------------------------------------------------
bash/46904 is trying to acquire lock:
ffffffffabe68910 (cpu_hotplug_lock){++++}-{0:0}, at: static_key_slow_dec+0x16/0x60
but task is already holding lock:
ffffffffabf92ea8 (pcp_batch_high_lock){+.+.}-{3:3}, at: zone_pcp_disable+0x16/0x40
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (pcp_batch_high_lock){+.+.}-{3:3}:
__mutex_lock+0x6c/0x770
page_alloc_cpu_online+0x3c/0x70
cpuhp_invoke_callback+0x397/0x5f0
__cpuhp_invoke_callback_range+0x71/0xe0
_cpu_up+0xeb/0x210
cpu_up+0x91/0xe0
cpuhp_bringup_mask+0x49/0xb0
bringup_nonboot_cpus+0xb7/0xe0
smp_init+0x25/0xa0
kernel_init_freeable+0x15f/0x3e0
kernel_init+0x15/0x1b0
ret_from_fork+0x2f/0x50
ret_from_fork_asm+0x1a/0x30
-> #0 (cpu_hotplug_lock){++++}-{0:0}:
__lock_acquire+0x1298/0x1cd0
lock_acquire+0xc0/0x2b0
cpus_read_lock+0x2a/0xc0
static_key_slow_dec+0x16/0x60
__hugetlb_vmemmap_restore_folio+0x1b9/0x200
dissolve_free_huge_page+0x211/0x260
__page_handle_poison+0x45/0xc0
memory_failure+0x65e/0xc70
hard_offline_page_store+0x55/0xa0
kernfs_fop_write_iter+0x12c/0x1d0
vfs_write+0x387/0x550
ksys_write+0x64/0xe0
do_syscall_64+0xca/0x1e0
entry_SYSCALL_64_after_hwframe+0x6d/0x75
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(pcp_batch_high_lock);
lock(cpu_hotplug_lock);
lock(pcp_batch_high_lock);
rlock(cpu_hotplug_lock);
*** DEADLOCK ***
5 locks held by bash/46904:
#0: ffff98f6c3bb23f0 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x64/0xe0
#1: ffff98f6c328e488 (&of->mutex){+.+.}-{3:3}, at: kernfs_fop_write_iter+0xf8/0x1d0
#2: ffff98ef83b31890 (kn->active#113){.+.+}-{0:0}, at: kernfs_fop_write_iter+0x100/0x1d0
#3: ffffffffabf9db48 (mf_mutex){+.+.}-{3:3}, at: memory_failure+0x44/0xc70
#4: ffffffffabf92ea8 (pcp_batch_high_lock){+.+.}-{3:3}, at: zone_pcp_disable+0x16/0x40
stack backtrace:
CPU: 10 PID: 46904 Comm: bash Kdump: loaded Not tainted 6.8.0-11409-gf6cef5f8c37f #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
Call Trace:
<TASK>
dump_stack_lvl+0x68/0xa0
check_noncircular+0x129/0x140
__lock_acquire+0x1298/0x1cd0
lock_acquire+0xc0/0x2b0
cpus_read_lock+0x2a/0xc0
static_key_slow_dec+0x16/0x60
__hugetlb_vmemmap_restore_folio+0x1b9/0x200
dissolve_free_huge_page+0x211/0x260
__page_handle_poison+0x45/0xc0
memory_failure+0x65e/0xc70
hard_offline_page_store+0x55/0xa0
kernfs_fop_write_iter+0x12c/0x1d0
vfs_write+0x387/0x550
ksys_write+0x64/0xe0
do_syscall_64+0xca/0x1e0
entry_SYSCALL_64_after_hwframe+0x6d/0x75
RIP: 0033:0x7fc862314887
Code: 10 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
RSP: 002b:00007fff19311268 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007fc862314887
RDX: 000000000000000c RSI: 000056405645fe10 RDI: 0000000000000001
RBP: 000056405645fe10 R08: 00007fc8623d1460 R09: 000000007fffffff
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000000000c
R13: 00007fc86241b780 R14: 00007fc862417600 R15: 00007fc862416a00
In short, below scene breaks the lock dependency chain:
memory_failure
__page_handle_poison
zone_pcp_disable -- lock(pcp_batch_high_lock)
dissolve_free_huge_page
__hugetlb_vmemmap_restore_folio
static_key_slow_dec
cpus_read_lock -- rlock(cpu_hotplug_lock)
Fix this by calling drain_all_pages() instead.
Link: https://lkml.kernel.org/r/20240407085456.2798193-1-linmiaohe@huawei.com
Fixes: 510d25c92ec4a ("mm/hwpoison: disable pcp for page_handle_poison()")
Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com>
Cc: Naoya Horiguchi <naoya.horiguchi(a)nec.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/memory-failure.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
--- a/mm/memory-failure.c~mm-memory-failure-fix-deadlock-when-hugetlb_optimize_vmemmap-is-enabled
+++ a/mm/memory-failure.c
@@ -154,11 +154,17 @@ static int __page_handle_poison(struct p
{
int ret;
- zone_pcp_disable(page_zone(page));
+ /*
+ * zone_pcp_disable() can't be used here. It will hold pcp_batch_high_lock and
+ * dissolve_free_huge_page() might hold cpu_hotplug_lock via static_key_slow_dec()
+ * when hugetlb vmemmap optimization is enabled. This will break current lock
+ * dependency chain and leads to deadlock.
+ */
ret = dissolve_free_huge_page(page);
- if (!ret)
+ if (!ret) {
+ drain_all_pages(page_zone(page));
ret = take_page_off_buddy(page);
- zone_pcp_enable(page_zone(page));
+ }
return ret;
}
_
Patches currently in -mm which might be from linmiaohe(a)huawei.com are
mm-memory-failure-fix-deadlock-when-hugetlb_optimize_vmemmap-is-enabled.patch
The patch titled
Subject: mm/userfaultfd: Allow hugetlb change protection upon poison entry
has been added to the -mm mm-hotfixes-unstable branch. Its filename is
mm-userfaultfd-allow-hugetlb-change-protection-upon-poison-entry.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patche…
This patch will later appear in the mm-hotfixes-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Peter Xu <peterx(a)redhat.com>
Subject: mm/userfaultfd: Allow hugetlb change protection upon poison entry
Date: Fri, 5 Apr 2024 19:19:20 -0400
After UFFDIO_POISON, there can be two kinds of hugetlb pte markers, either
the POISON one or UFFD_WP one.
Allow change protection to run on a poisoned marker just like !hugetlb
cases, ignoring the marker irrelevant of the permission.
Here the two bits are mutual exclusive. For example, when install a
poisoned entry it must not be UFFD_WP already (by checking pte_none()
before such install). And it also means if UFFD_WP is set there must have
no POISON bit set. It makes sense because UFFD_WP is a bit to reflect
permission, and permissions do not apply if the pte is poisoned and
destined to sigbus.
So here we simply check uffd_wp bit set first, do nothing otherwise.
Attach the Fixes to UFFDIO_POISON work, as before that it should not be
possible to have poison entry for hugetlb (e.g., hugetlb doesn't do swap,
so no chance of swapin errors).
Link: https://lkml.kernel.org/r/20240405231920.1772199-1-peterx@redhat.com
Link: https://lore.kernel.org/r/000000000000920d5e0615602dd1@google.com
Reported-by: syzbot+b07c8ac8eee3d4d8440f(a)syzkaller.appspotmail.com
Fixes: fc71884a5f59 ("mm: userfaultfd: add new UFFDIO_POISON ioctl")
Signed-off-by: Peter Xu <peterx(a)redhat.com>
Cc: Axel Rasmussen <axelrasmussen(a)google.com>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org> [6.6+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/hugetlb.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
--- a/mm/hugetlb.c~mm-userfaultfd-allow-hugetlb-change-protection-upon-poison-entry
+++ a/mm/hugetlb.c
@@ -7044,9 +7044,13 @@ long hugetlb_change_protection(struct vm
if (!pte_same(pte, newpte))
set_huge_pte_at(mm, address, ptep, newpte, psize);
} else if (unlikely(is_pte_marker(pte))) {
- /* No other markers apply for now. */
- WARN_ON_ONCE(!pte_marker_uffd_wp(pte));
- if (uffd_wp_resolve)
+ /*
+ * Do nothing on a poison marker; page is
+ * corrupted, permissons do not apply. Here
+ * pte_marker_uffd_wp()==true implies !poison
+ * because they're mutual exclusive.
+ */
+ if (pte_marker_uffd_wp(pte) && uffd_wp_resolve)
/* Safe to modify directly (non-present->none). */
huge_pte_clear(mm, address, ptep, psize);
} else if (!huge_pte_none(pte)) {
_
Patches currently in -mm which might be from peterx(a)redhat.com are
mm-userfaultfd-allow-hugetlb-change-protection-upon-poison-entry.patch
mm-hmm-process-pud-swap-entry-without-pud_huge.patch
mm-gup-cache-p4d-in-follow_p4d_mask.patch
mm-gup-check-p4d-presence-before-going-on.patch
mm-x86-change-pxd_huge-behavior-to-exclude-swap-entries.patch
mm-sparc-change-pxd_huge-behavior-to-exclude-swap-entries.patch
mm-arm-use-macros-to-define-pmd-pud-helpers.patch
mm-arm-redefine-pmd_huge-with-pmd_leaf.patch
mm-arm64-merge-pxd_huge-and-pxd_leaf-definitions.patch
mm-powerpc-redefine-pxd_huge-with-pxd_leaf.patch
mm-gup-merge-pxd-huge-mapping-checks.patch
mm-treewide-replace-pxd_huge-with-pxd_leaf.patch
mm-treewide-remove-pxd_huge.patch
mm-arm-remove-pmd_thp_or_huge.patch
mm-document-pxd_leaf-api.patch
selftests-mm-run_vmtestssh-fix-hugetlb-mem-size-calculation.patch
selftests-mm-run_vmtestssh-fix-hugetlb-mem-size-calculation-fix.patch
mm-kconfig-config_pgtable_has_huge_leaves.patch
mm-hugetlb-declare-hugetlbfs_pagecache_present-non-static.patch
mm-make-hpage_pxd_-macros-even-if-thp.patch
mm-introduce-vma_pgtable_walk_beginend.patch
mm-arch-provide-pud_pfn-fallback.patch
mm-arch-provide-pud_pfn-fallback-fix.patch
mm-gup-drop-folio_fast_pin_allowed-in-hugepd-processing.patch
mm-gup-refactor-record_subpages-to-find-1st-small-page.patch
mm-gup-handle-hugetlb-for-no_page_table.patch
mm-gup-cache-pudp-in-follow_pud_mask.patch
mm-gup-handle-huge-pud-for-follow_pud_mask.patch
mm-gup-handle-huge-pmd-for-follow_pmd_mask.patch
mm-gup-handle-huge-pmd-for-follow_pmd_mask-fix.patch
mm-gup-handle-hugepd-for-follow_page.patch
mm-gup-handle-hugetlb-in-the-generic-follow_page_mask-code.patch
mm-allow-anon-exclusive-check-over-hugetlb-tail-pages.patch
Stop printing the TT memory decryption status info each time tt is created
and instead print it just once.
Reduces the spam in the system logs when running guests with SEV enabled.
Signed-off-by: Zack Rusin <zack.rusin(a)broadcom.com>
Fixes: 71ce046327cf ("drm/ttm: Make sure the mapped tt pages are decrypted when needed")
Cc: Thomas Hellström <thomas.hellstrom(a)linux.intel.com>
Cc: Christian König <christian.koenig(a)amd.com>
Cc: dri-devel(a)lists.freedesktop.org
Cc: linux-kernel(a)vger.kernel.org
Cc: <stable(a)vger.kernel.org> # v5.14+
---
drivers/gpu/drm/ttm/ttm_tt.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 578a7c37f00b..d776e3f87064 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -92,7 +92,7 @@ int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc)
*/
if (bdev->pool.use_dma_alloc && cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) {
page_flags |= TTM_TT_FLAG_DECRYPTED;
- drm_info(ddev, "TT memory decryption enabled.");
+ drm_info_once(ddev, "TT memory decryption enabled.");
}
bo->ttm = bdev->funcs->ttm_tt_create(bo, page_flags);
--
2.40.1
Hi,
Starting from kernel version 6.7.4 I hit the following problem: when I
connect two or more USB-to-ethernet adapters to the computer they get
assigned the same MAC address. Furthermore, the address is not the one
specified in any of the device labels but is selected seemingly at
random on boot.
This becomes a blocking issue when trying to use SYSTEMD.LINK(5) to
match the interfaces.
6.7.3 is OK, 6.7.4 introduces this behavior in the following upstream commit:
d2689b6a86b9 net: usb: ax88179_178a: avoid two consecutive device resets
Reverting this commit in 6.7.4 fixes the issue.
The behavior is also present in LTS 6.6.23. The commit has been
backported in 6.6.16.
Example system log when connecting two adapters. Both interfaces are
assigned address 02:a5:ab:80:e6:94.
kernel: usb 2-5.4: new SuperSpeed USB device number 3 using xhci_hcd
kernel: usb 2-5.4: New USB device found, idVendor=2001, idProduct=4a00, bcdDevice= 1.00
kernel: usb 2-5.4: New USB device strings: Mfr=1, Product=2, SerialNumber=3
kernel: usb 2-5.4: Product: D-Link DUB-1312
kernel: usb 2-5.4: Manufacturer: D-Link Elec. Corp.
kernel: usb 2-5.4: SerialNumber: 00000000001D4D
mtp-probe[2431]: checking bus 2, device 3: "/sys/devices/pci0000:00/0000:00:14.0/usb2/2-5/2-5.4"
mtp-probe[2431]: bus: 2, device: 3 was not an MTP device
kernel: ax88179_178a 2-5.4:1.0 eth0: register 'ax88179_178a' at usb-0000:00:14.0-5.4, D-Link DUB-1312 USB 3.0 to Gigabit Ethernet Adapter, 02:a5:ab:80:e6:94
kernel: usbcore: registered new interface driver ax88179_178a
mtp-probe[2436]: checking bus 2, device 3: "/sys/devices/pci0000:00/0000:00:14.0/usb2/2-5/2-5.4"
mtp-probe[2436]: bus: 2, device: 3 was not an MTP device
kernel: ax88179_178a 2-5.4:1.0 enp0s20f0u5u4: renamed from eth0
systemd-networkd[469]: eth0: Interface name change detected, renamed to enp0s20f0u5u4.
kernel: usb 1-5.3: new high-speed USB device number 8 using xhci_hcd
kernel: usb 1-5.3: New USB device found, idVendor=0b95, idProduct=1790, bcdDevice= 1.00
kernel: usb 1-5.3: New USB device strings: Mfr=1, Product=2, SerialNumber=3
kernel: usb 1-5.3: Product: AX88179
kernel: usb 1-5.3: Manufacturer: ASIX Elec. Corp.
kernel: usb 1-5.3: SerialNumber: 0000249B2BAEC8
kernel: ax88179_178a 1-5.3:1.0 eth0: register 'ax88179_178a' at usb-0000:00:14.0-5.3, ASIX AX88179 USB 3.0 Gigabit Ethernet, 02:a5:ab:80:e6:94
mtp-probe[2440]: checking bus 1, device 8: "/sys/devices/pci0000:00/0000:00:14.0/usb1/1-5/1-5.3"
mtp-probe[2440]: bus: 1, device: 8 was not an MTP device
kernel: ax88179_178a 1-5.3:1.0 enp0s20f0u5u3: renamed from eth0
systemd-networkd[469]: eth0: Interface name change detected, renamed to enp0s20f0u5u3.
mtp-probe[2444]: checking bus 1, device 8: "/sys/devices/pci0000:00/0000:00:14.0/usb1/1-5/1-5.3"
mtp-probe[2444]: bus: 1, device: 8 was not an MTP device
The following commit has been merged into the timers/urgent branch of tip:
Commit-ID: c1d11fc2c8320871b40730991071dd0a0b405bc8
Gitweb: https://git.kernel.org/tip/c1d11fc2c8320871b40730991071dd0a0b405bc8
Author: Arnd Bergmann <arnd(a)arndb.de>
AuthorDate: Mon, 08 Apr 2024 09:46:01 +02:00
Committer: Thomas Gleixner <tglx(a)linutronix.de>
CommitterDate: Mon, 08 Apr 2024 16:34:18 +02:00
irqflags: Explicitly ignore lockdep_hrtimer_exit() argument
When building with 'make W=1' but CONFIG_TRACE_IRQFLAGS=n, the
unused argument to lockdep_hrtimer_exit() causes a warning:
kernel/time/hrtimer.c:1655:14: error: variable 'expires_in_hardirq' set but not used [-Werror=unused-but-set-variable]
This is intentional behavior, so add a cast to void to shut up the warning.
Fixes: 73d20564e0dc ("hrtimer: Don't dereference the hrtimer pointer after the callback")
Reported-by: kernel test robot <lkp(a)intel.com>
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
Signed-off-by: Thomas Gleixner <tglx(a)linutronix.de>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy(a)linutronix.de>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20240408074609.3170807-1-arnd@kernel.org
Closes: https://lore.kernel.org/oe-kbuild-all/202311191229.55QXHVc6-lkp@intel.com/
---
include/linux/irqflags.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 147feeb..3f003d5 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -114,7 +114,7 @@ do { \
# define lockdep_softirq_enter() do { } while (0)
# define lockdep_softirq_exit() do { } while (0)
# define lockdep_hrtimer_enter(__hrtimer) false
-# define lockdep_hrtimer_exit(__context) do { } while (0)
+# define lockdep_hrtimer_exit(__context) do { (void)(__context); } while (0)
# define lockdep_posixtimer_enter() do { } while (0)
# define lockdep_posixtimer_exit() do { } while (0)
# define lockdep_irq_work_enter(__work) do { } while (0)
From: Benjamin Berg <benjamin.berg(a)intel.com>
[ Upstream commit b8b80770b26c4591f20f1cde3328e5f1489c4488 ]
struct sta_info may be removed without holding sta_mtx if it has not
yet been inserted. To support this, only assert that the lock is held
for links other than the deflink.
This fixes lockdep issues that may be triggered in error cases.
Signed-off-by: Benjamin Berg <benjamin.berg(a)intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman(a)intel.com>
Link: https://lore.kernel.org/r/20230619161906.cdd81377dea0.If5a6734b4b85608a2275…
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
Signed-off-by: Alexander Ofitserov <oficerovas(a)altlinux.org>
Cc: stable(a)vger.kernel.org
---
net/mac80211/sta_info.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index bd56015b29258..edec857edbd25 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -357,8 +357,9 @@ static void sta_remove_link(struct sta_info *sta, unsigned int link_id,
struct sta_link_alloc *alloc = NULL;
struct link_sta_info *link_sta;
- link_sta = rcu_dereference_protected(sta->link[link_id],
- lockdep_is_held(&sta->local->sta_mtx));
+ link_sta = rcu_access_pointer(sta->link[link_id]);
+ if (link_sta != &sta->deflink)
+ lockdep_assert_held(&sta->local->sta_mtx);
if (WARN_ON(!link_sta))
return;
--
2.42.1
When done from a virtual machine, instructions that touch APIC memory
must be emulated. By convention, MMIO access are typically performed via
io.h helpers such as 'readl()' or 'writeq()' to simplify instruction
emulation/decoding (ex: in KVM hosts and SEV guests) [0].
Currently, native_apic_mem_read() does not follow this convention,
allowing the compiler to emit instructions other than the MOV
instruction generated by readl(). In particular, when compiled with
clang and run as a SEV-ES or SEV-SNP guest, the compiler would emit a
TESTL instruction which is not supported by the SEV-ES emulator, causing
a boot failure in that environment. It is likely the same problem would
happen in a TDX guest as that uses the same instruction emulator as
SEV-ES.
To make sure all emulators can emulate APIC memory reads via MOV, use
the readl() function in native_apic_mem_read(). It is expected that any
emulator would support MOV in any addressing mode it is the most generic
and is what is ususally emitted currently.
The TESTL instruction is emitted when native_apic_mem_read() is inlined
into apic_mem_wait_icr_idle(). The emulator comes from insn_decode_mmio
in arch/x86/lib/insn-eval.c. It's not worth it to extend
insn_decode_mmio to support more instructions since, in theory, the
compiler could choose to output nearly any instruction for such reads
which would bloat the emulator beyond reason.
[0] https://lore.kernel.org/all/20220405232939.73860-12-kirill.shutemov@linux.i…
Signed-off-by: Adam Dunlap <acdunlap(a)google.com>
Tested-by: Kevin Loughlin <kevinloughlin(a)google.com>
Reviewed-by: Thomas Gleixner <tglx(a)linutronix.de>
Cc: stable(a)vger.kernel.org
---
An alterative to this approach would be to use inline assembly instead
of the readl() helper, as that is what native_apic_mem_write() does. I
consider using readl() to be cleaner since it is documented to be a simple
wrapper and inline assembly is less readable. native_apic_mem_write()
cannot be trivially updated to use writel since it appears to use custom
asm to workaround for a processor-specific bug.
Patch changelog:
V1 -> V2: Replaced asm with readl function which does the same thing
V2 -> V3: Updated commit message to show more motivation and
justification
V3 -> V4: Fixed nits in commit message
Link to v2 discussion: https://lore.kernel.org/all/20220908170456.3177635-1-acdunlap@google.com/
arch/x86/include/asm/apic.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 9d159b771dc8..dddd3fc195ef 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -13,6 +13,7 @@
#include <asm/mpspec.h>
#include <asm/msr.h>
#include <asm/hardirq.h>
+#include <asm/io.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
@@ -96,7 +97,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
static inline u32 native_apic_mem_read(u32 reg)
{
- return *((volatile u32 *)(APIC_BASE + reg));
+ return readl((void __iomem *)(APIC_BASE + reg));
}
static inline void native_apic_mem_eoi(void)
--
2.43.0.594.gd9cf4e227d-goog
From: Ma Wupeng <mawupeng1(a)huawei.com>
[ Upstream commit d155df53f31068c3340733d586eb9b3ddfd70fc5 ]
Syzbot reports a warning in untrack_pfn(). Digging into the root we found
that this is due to memory allocation failure in pmd_alloc_one. And this
failure is produced due to failslab.
In copy_page_range(), memory alloaction for pmd failed. During the error
handling process in copy_page_range(), mmput() is called to remove all
vmas. While untrack_pfn this empty pfn, warning happens.
Here's a simplified flow:
dup_mm
dup_mmap
copy_page_range
copy_p4d_range
copy_pud_range
copy_pmd_range
pmd_alloc
__pmd_alloc
pmd_alloc_one
page = alloc_pages(gfp, 0);
if (!page)
return NULL;
mmput
exit_mmap
unmap_vmas
unmap_single_vma
untrack_pfn
follow_phys
WARN_ON_ONCE(1);
Since this vma is not generate successfully, we can clear flag VM_PAT. In
this case, untrack_pfn() will not be called while cleaning this vma.
Function untrack_pfn_moved() has also been renamed to fit the new logic.
Link: https://lkml.kernel.org/r/20230217025615.1595558-1-mawupeng1@huawei.com
Signed-off-by: Ma Wupeng <mawupeng1(a)huawei.com>
Reported-by: <syzbot+5f488e922d047d8f00cc(a)syzkaller.appspotmail.com>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Alexander Ofitserov <oficerovas(a)altlinux.org>
Cc: stable(a)vger.kernel.org
---
arch/x86/mm/pat/memtype.c | 12 ++++++++----
include/linux/pgtable.h | 7 ++++---
mm/memory.c | 1 +
mm/mremap.c | 2 +-
4 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index d5ef64ddd35e9..fd819f112a7a7 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -1108,11 +1108,15 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
}
/*
- * untrack_pfn_moved is called, while mremapping a pfnmap for a new region,
- * with the old vma after its pfnmap page table has been removed. The new
- * vma has a new pfnmap to the same pfn & cache type with VM_PAT set.
+ * untrack_pfn_clear is called if the following situation fits:
+ *
+ * 1) while mremapping a pfnmap for a new region, with the old vma after
+ * its pfnmap page table has been removed. The new vma has a new pfnmap
+ * to the same pfn & cache type with VM_PAT set.
+ * 2) while duplicating vm area, the new vma fails to copy the pgtable from
+ * old vma.
*/
-void untrack_pfn_moved(struct vm_area_struct *vma)
+void untrack_pfn_clear(struct vm_area_struct *vma)
{
vma->vm_flags &= ~VM_PAT;
}
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index d468efcf48f45..734d5e707fe6d 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1121,9 +1121,10 @@ static inline void untrack_pfn(struct vm_area_struct *vma,
}
/*
- * untrack_pfn_moved is called while mremapping a pfnmap for a new region.
+ * untrack_pfn_clear is called while mremapping a pfnmap for a new region
+ * or fails to copy pgtable during duplicate vm area.
*/
-static inline void untrack_pfn_moved(struct vm_area_struct *vma)
+static inline void untrack_pfn_clear(struct vm_area_struct *vma)
{
}
#else
@@ -1135,7 +1136,7 @@ extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
extern int track_pfn_copy(struct vm_area_struct *vma);
extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size);
-extern void untrack_pfn_moved(struct vm_area_struct *vma);
+extern void untrack_pfn_clear(struct vm_area_struct *vma);
#endif
#ifdef CONFIG_MMU
diff --git a/mm/memory.c b/mm/memory.c
index 8d71a82462dd5..95db1df5fd03a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1289,6 +1289,7 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma)
continue;
if (unlikely(copy_p4d_range(dst_vma, src_vma, dst_pgd, src_pgd,
addr, next))) {
+ untrack_pfn_clear(dst_vma);
ret = -ENOMEM;
break;
}
diff --git a/mm/mremap.c b/mm/mremap.c
index 3a3cf4cc2c632..9457a1e06b5ae 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -668,7 +668,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
/* Tell pfnmap has moved from this vma */
if (unlikely(vma->vm_flags & VM_PFNMAP))
- untrack_pfn_moved(vma);
+ untrack_pfn_clear(vma);
if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) {
/* We always clear VM_LOCKED[ONFAULT] on the old vma */
--
2.42.1