When handling a numa page fault, task_numa_fault() should be called by a
process that restores the page table of the faulted folio to avoid
duplicated stats counting. Commit b99a342d4f11 ("NUMA balancing: reduce
TLB flush via delaying mapping on hint page fault") restructured
do_numa_page() and do_huge_pmd_numa_page() and did not avoid
task_numa_fault() call in the second page table check after a numa
migration failure. Fix it by making all !pte_same()/!pmd_same() return
immediately.
This issue can cause task_numa_fault() being called more than necessary
and lead to unexpected numa balancing results (It is hard to tell whether
the issue will cause positive or negative performance impact due to
duplicated numa fault counting).
Reported-by: "Huang, Ying" <ying.huang(a)intel.com>
Closes: https://lore.kernel.org/linux-mm/87zfqfw0yw.fsf@yhuang6-desk2.ccr.corp.inte…
Fixes: b99a342d4f11 ("NUMA balancing: reduce TLB flush via delaying mapping on hint page fault")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Zi Yan <ziy(a)nvidia.com>
---
mm/huge_memory.c | 5 +++--
mm/memory.c | 5 +++--
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0024266dea0a..a3c018f2b554 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1734,10 +1734,11 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
goto out_map;
}
-out:
+count_fault:
if (nid != NUMA_NO_NODE)
task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags);
+out:
return 0;
out_map:
@@ -1749,7 +1750,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
spin_unlock(vmf->ptl);
- goto out;
+ goto count_fault;
}
/*
diff --git a/mm/memory.c b/mm/memory.c
index 67496dc5064f..503d493263df 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5536,9 +5536,10 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
goto out_map;
}
-out:
+count_fault:
if (nid != NUMA_NO_NODE)
task_numa_fault(last_cpupid, nid, nr_pages, flags);
+out:
return 0;
out_map:
/*
@@ -5552,7 +5553,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte,
writable);
pte_unmap_unlock(vmf->pte, vmf->ptl);
- goto out;
+ goto count_fault;
}
static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
--
2.43.0
Dear Sir :
Nice day!
This is Lily from METAL & HARNESSES factory ,we make Machinery Manufacturing hardware, agriculture Machinery hardware ,Machinery and Agricultural Implements ,Plows harnesses,Growers hardware ,Miscellaneous,Harrows hardware ,Planers hardware ,Shearers,Seeders hardware ,Parts Machinery hardware,Agricultural Implements,Parts agriculture, tools,D rings , O rings ,screws ,bolts ,buts ,bits ,spurs,,Clamps,joints,rings ,blinds ,snaps ,buckles,bars ,Harnessesas,brass snaps ,brass gear , stainless steel hardware,steel harness ,brass gears ,brass products ,iron metal hardware ,Fasteners,safety buckles,brass buckles, brass rings required for our global clients .
We are manufactory, we are the source, our price is very competitive ,you will get the best price , We have profuse designs with series quality grade, and expressly.
Our factory always produce customer designs and drawing , if you have any products looking please let me know
we could surely make for you
Sincerely hope could work with you !
Best regards
Lily
Rename tdx_parse_tdinfo() to tdx_setup() and move setting NOTIFY_ENABLES
there.
The function will be extended to adjust TD configuration.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy(a)linux.intel.com>
Reviewed-by: Kai Huang <kai.huang(a)intel.com>
Cc: stable(a)vger.kernel.org
---
arch/x86/coco/tdx/tdx.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 64717a96a936..08ce488b54d0 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -193,7 +193,7 @@ static void __noreturn tdx_panic(const char *msg)
__tdx_hypercall(&args);
}
-static void tdx_parse_tdinfo(u64 *cc_mask)
+static void tdx_setup(u64 *cc_mask)
{
struct tdx_module_args args = {};
unsigned int gpa_width;
@@ -218,6 +218,9 @@ static void tdx_parse_tdinfo(u64 *cc_mask)
gpa_width = args.rcx & GENMASK(5, 0);
*cc_mask = BIT_ULL(gpa_width - 1);
+ /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
+ tdg_vm_wr(TDCS_NOTIFY_ENABLES, 0, -1ULL);
+
/*
* The kernel can not handle #VE's when accessing normal kernel
* memory. Ensure that no #VE will be delivered for accesses to
@@ -964,11 +967,11 @@ void __init tdx_early_init(void)
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
cc_vendor = CC_VENDOR_INTEL;
- tdx_parse_tdinfo(&cc_mask);
- cc_set_mask(cc_mask);
- /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
- tdg_vm_wr(TDCS_NOTIFY_ENABLES, 0, -1ULL);
+ /* Configure the TD */
+ tdx_setup(&cc_mask);
+
+ cc_set_mask(cc_mask);
/*
* All bits above GPA width are reserved and kernel treats shared bit
--
2.43.0
The TDG_VM_WR TDCALL is used to ask the TDX module to change some
TD-specific VM configuration. There is currently only one user in the
kernel of this TDCALL leaf. More will be added shortly.
Refactor to make way for more users of TDG_VM_WR who will need to modify
other TD configuration values.
Add a wrapper for the TDG_VM_RD TDCALL that requests TD-specific
metadata from the TDX module. There are currently no users for
TDG_VM_RD. Mark it as __maybe_unused until the first user appears.
This is preparation for enumeration and enabling optional TD features.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov(a)linux.intel.com>
Reviewed-by: Kai Huang <kai.huang(a)intel.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy(a)linux.intel.com>
Cc: stable(a)vger.kernel.org
---
arch/x86/coco/tdx/tdx.c | 32 ++++++++++++++++++++++++++-----
arch/x86/include/asm/shared/tdx.h | 1 +
2 files changed, 28 insertions(+), 5 deletions(-)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 078e2bac2553..64717a96a936 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -77,6 +77,32 @@ static inline void tdcall(u64 fn, struct tdx_module_args *args)
panic("TDCALL %lld failed (Buggy TDX module!)\n", fn);
}
+/* Read TD-scoped metadata */
+static inline u64 __maybe_unused tdg_vm_rd(u64 field, u64 *value)
+{
+ struct tdx_module_args args = {
+ .rdx = field,
+ };
+ u64 ret;
+
+ ret = __tdcall_ret(TDG_VM_RD, &args);
+ *value = args.r8;
+
+ return ret;
+}
+
+/* Write TD-scoped metadata */
+static inline u64 tdg_vm_wr(u64 field, u64 value, u64 mask)
+{
+ struct tdx_module_args args = {
+ .rdx = field,
+ .r8 = value,
+ .r9 = mask,
+ };
+
+ return __tdcall(TDG_VM_WR, &args);
+}
+
/**
* tdx_mcall_get_report0() - Wrapper to get TDREPORT0 (a.k.a. TDREPORT
* subtype 0) using TDG.MR.REPORT TDCALL.
@@ -924,10 +950,6 @@ static void tdx_kexec_finish(void)
void __init tdx_early_init(void)
{
- struct tdx_module_args args = {
- .rdx = TDCS_NOTIFY_ENABLES,
- .r9 = -1ULL,
- };
u64 cc_mask;
u32 eax, sig[3];
@@ -946,7 +968,7 @@ void __init tdx_early_init(void)
cc_set_mask(cc_mask);
/* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */
- tdcall(TDG_VM_WR, &args);
+ tdg_vm_wr(TDCS_NOTIFY_ENABLES, 0, -1ULL);
/*
* All bits above GPA width are reserved and kernel treats shared bit
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index fdfd41511b02..7e12cfa28bec 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -16,6 +16,7 @@
#define TDG_VP_VEINFO_GET 3
#define TDG_MR_REPORT 4
#define TDG_MEM_PAGE_ACCEPT 6
+#define TDG_VM_RD 7
#define TDG_VM_WR 8
/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
--
2.43.0
From: Marc Zyngier <maz(a)kernel.org>
If xhci_mem_init() fails, it calls into xhci_mem_cleanup() to mop
up the damage. If it fails early enough, before xhci->interrupters
is allocated but after xhci->max_interrupters has been set, which
happens in most (all?) cases, things get uglier, as xhci_mem_cleanup()
unconditionally derefences xhci->interrupters. With prejudice.
Gate the interrupt freeing loop with a check on xhci->interrupters
being non-NULL.
Found while debugging a DMA allocation issue that led the XHCI driver
on this exact path.
Fixes: c99b38c41234 ("xhci: add support to allocate several interrupters")
Cc: Mathias Nyman <mathias.nyman(a)linux.intel.com>
Cc: Wesley Cheng <quic_wcheng(a)quicinc.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Cc: stable(a)vger.kernel.org # 6.8+
Signed-off-by: Mathias Nyman <mathias.nyman(a)linux.intel.com>
---
drivers/usb/host/xhci-mem.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index d7654f475daf..937ce5fd5809 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1872,7 +1872,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci)
cancel_delayed_work_sync(&xhci->cmd_timer);
- for (i = 0; i < xhci->max_interrupters; i++) {
+ for (i = 0; xhci->interrupters && i < xhci->max_interrupters; i++) {
if (xhci->interrupters[i]) {
xhci_remove_interrupter(xhci, xhci->interrupters[i]);
xhci_free_interrupter(xhci, xhci->interrupters[i]);
--
2.25.1
From: Mitchell Levy <levymitchell0(a)gmail.com>
When computing which xfeatures are available, make sure that LBR is only
present if both LBR is supported in general, as well as by XSAVES.
There are two distinct CPU features related to the use of XSAVES as it
applies to LBR: whether LBR is itself supported (strictly speaking, I'm
not sure that this is necessary to check though it's certainly a good
sanity check), and whether XSAVES supports LBR (see sections 13.2 and
13.5.12 of the Intel 64 and IA-32 Architectures Software Developer's
Manual, Volume 1). Currently, the LBR subsystem correctly checks both
(see intel_pmu_arch_lbr_init), however the xstate initialization
subsystem does not.
When calculating what value to place in the IA32_XSS MSR,
xfeatures_mask_independent only checks whether LBR support is present,
not whether XSAVES supports LBR. If XSAVES does not support LBR, this
write causes #GP, leaving the state of IA32_XSS unchanged (i.e., set to
zero, as its not written with other values, and its default value is
zero out of RESET per section 13.3 of the arch manual).
Then, the next time XRSTORS is used to restore supervisor state, it will
fail with #GP (because the RFBM has zero for all supervisor features,
which does not match the XCOMP_BV field). In particular,
XFEATURE_MASK_FPSTATE includes supervisor features, so setting up the FPU
will cause a #GP. This results in a call to fpu_reset_from_exception_fixup,
which by the same process results in another #GP. Eventually this causes
the kernel to run out of stack space and #DF.
Fixes: d72c87018d00 ("x86/fpu/xstate: Move remaining xfeature helpers to core")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mitchell Levy <levymitchell0(a)gmail.com>
---
arch/x86/kernel/fpu/xstate.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index 2ee0b9c53dcc..574d2c2ea227 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -61,7 +61,8 @@ static inline u64 xfeatures_mask_supervisor(void)
static inline u64 xfeatures_mask_independent(void)
{
- if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR))
+ if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR) ||
+ (fpu_kernel_cfg.max_features & XFEATURE_MASK_LBR) != XFEATURE_MASK_LBR)
return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR;
return XFEATURE_MASK_INDEPENDENT;
---
base-commit: de9c2c66ad8e787abec7c9d7eff4f8c3cdd28aed
change-id: 20240807-xsave-lbr-fix-02d52f641653
Best regards,
--
Mitchell Levy <levymitchell0(a)gmail.com>
The patch below does not apply to the 5.10-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y
git checkout FETCH_HEAD
git cherry-pick -x 4dde0d72ccec500c60c798e036b852e013d6e124
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024080723-untangled-rogue-da43@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^..
Possible dependencies:
4dde0d72ccec ("mptcp: mib: count MPJ with backup flag")
b3ea6b272d79 ("mptcp: consolidate initial ack seq generation")
f3589be0c420 ("mptcp: never shrink offered window")
74c7dfbee3e1 ("mptcp: consolidate in_opt sub-options fields in a bitmask")
a086aebae0eb ("mptcp: better binary layout for mptcp_options_received")
8d548ea1dd15 ("mptcp: do not set unconditionally csum_reqd on incoming opt")
eb7f33654dc1 ("mptcp: add the mibs for MP_FAIL")
478d770008b0 ("mptcp: send out MP_FAIL when data checksum fails")
5580d41b758a ("mptcp: MP_FAIL suboption receiving")
f444fea7896d ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4dde0d72ccec500c60c798e036b852e013d6e124 Mon Sep 17 00:00:00 2001
From: "Matthieu Baerts (NGI0)" <matttbe(a)kernel.org>
Date: Sat, 27 Jul 2024 12:01:26 +0200
Subject: [PATCH] mptcp: mib: count MPJ with backup flag
Without such counters, it is difficult to easily debug issues with MPJ
not having the backup flags on production servers.
This is not strictly a fix, but it eases to validate the following
patches without requiring to take packet traces, to query ongoing
connections with Netlink with admin permissions, or to guess by looking
at the behaviour of the packet scheduler. Also, the modification is self
contained, isolated, well controlled, and the increments are done just
after others, there from the beginning. It looks then safe, and helpful
to backport this.
Fixes: 4596a2c1b7f5 ("mptcp: allow creating non-backup subflows")
Cc: stable(a)vger.kernel.org
Reviewed-by: Mat Martineau <martineau(a)kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe(a)kernel.org>
Signed-off-by: Paolo Abeni <pabeni(a)redhat.com>
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index c30405e76833..7884217f33eb 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -19,7 +19,9 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX),
+ SNMP_MIB_ITEM("MPJoinSynBackupRx", MPTCP_MIB_JOINSYNBACKUPRX),
SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX),
+ SNMP_MIB_ITEM("MPJoinSynAckBackupRx", MPTCP_MIB_JOINSYNACKBACKUPRX),
SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC),
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 2704afd0dfe4..66aa67f49d03 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -14,7 +14,9 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
MPTCP_MIB_JOINSYNRX, /* Received a SYN + MP_JOIN */
+ MPTCP_MIB_JOINSYNBACKUPRX, /* Received a SYN + MP_JOIN + backup flag */
MPTCP_MIB_JOINSYNACKRX, /* Received a SYN/ACK + MP_JOIN */
+ MPTCP_MIB_JOINSYNACKBACKUPRX, /* Received a SYN/ACK + MP_JOIN + backup flag */
MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index a3778aee4e77..be406197b1c4 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -168,6 +168,9 @@ static int subflow_check_req(struct request_sock *req,
return 0;
} else if (opt_mp_join) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX);
+
+ if (mp_opt.backup)
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX);
}
if (opt_mp_capable && listener->request_mptcp) {
@@ -577,6 +580,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
+ if (subflow->backup)
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKBACKUPRX);
+
if (subflow_use_different_dport(msk, sk)) {
pr_debug("synack inet_dport=%d %d",
ntohs(inet_sk(sk)->inet_dport),