The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
To reproduce the conflict and resubmit, you may use the following commands:
git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y
git checkout FETCH_HEAD
git cherry-pick -x 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de
# <resolve conflicts, build, test, etc.>
git commit -s
git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2024012648-backwater-colt-7290@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^..
Possible dependencies:
3c12466b6b7b ("erofs: fix lz4 inplace decompression")
123ec246ebe3 ("erofs: get rid of the remaining kmap_atomic()")
ab749badf9f4 ("erofs: support unaligned data decompression")
10e5f6e482e1 ("erofs: introduce z_erofs_fixup_insize")
d67aee76d418 ("erofs: tidy up z_erofs_lz4_decompress")
7e508f2ca8bb ("erofs: rename lz4_0pading to zero_padding")
eaa9172ad988 ("erofs: get rid of ->lru usage")
622ceaddb764 ("erofs: lzma compression support")
966edfb0a3dc ("erofs: rename some generic methods in decompressor")
386292919c25 ("erofs: introduce readmore decompression strategy")
8f89926290c4 ("erofs: get compression algorithms directly on mapping")
dfeab2e95a75 ("erofs: add multiple device support")
e62424651f43 ("erofs: decouple basic mount options from fs_context")
5b6e7e120e71 ("erofs: remove the fast path of per-CPU buffer decompression")
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de Mon Sep 17 00:00:00 2001
From: Gao Xiang <xiang(a)kernel.org>
Date: Wed, 6 Dec 2023 12:55:34 +0800
Subject: [PATCH] erofs: fix lz4 inplace decompression
Currently EROFS can map another compressed buffer for inplace
decompression, that was used to handle the cases that some pages of
compressed data are actually not in-place I/O.
However, like most simple LZ77 algorithms, LZ4 expects the compressed
data is arranged at the end of the decompressed buffer and it
explicitly uses memmove() to handle overlapping:
__________________________________________________________
|_ direction of decompression --> ____ |_ compressed data _|
Although EROFS arranges compressed data like this, it typically maps two
individual virtual buffers so the relative order is uncertain.
Previously, it was hardly observed since LZ4 only uses memmove() for
short overlapped literals and x86/arm64 memmove implementations seem to
completely cover it up and they don't have this issue. Juhyung reported
that EROFS data corruption can be found on a new Intel x86 processor.
After some analysis, it seems that recent x86 processors with the new
FSRM feature expose this issue with "rep movsb".
Let's strictly use the decompressed buffer for lz4 inplace
decompression for now. Later, as an useful improvement, we could try
to tie up these two buffers together in the correct order.
Reported-and-tested-by: Juhyung Park <qkrwngud825(a)gmail.com>
Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR…
Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace")
Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend")
Cc: stable <stable(a)vger.kernel.org> # 5.4+
Tested-by: Yifan Zhao <zhaoyifan(a)sjtu.edu.cn>
Signed-off-by: Gao Xiang <hsiangkao(a)linux.alibaba.com>
Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.…
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 021be5feb1bc..e0d609c3958f 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -121,11 +121,11 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
}
static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
- void *inpage, unsigned int *inputmargin, int *maptype,
- bool may_inplace)
+ void *inpage, void *out, unsigned int *inputmargin,
+ int *maptype, bool may_inplace)
{
struct z_erofs_decompress_req *rq = ctx->rq;
- unsigned int omargin, total, i, j;
+ unsigned int omargin, total, i;
struct page **in;
void *src, *tmp;
@@ -135,12 +135,13 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize))
goto docopy;
- for (i = 0; i < ctx->inpages; ++i) {
- DBG_BUGON(rq->in[i] == NULL);
- for (j = 0; j < ctx->outpages - ctx->inpages + i; ++j)
- if (rq->out[j] == rq->in[i])
- goto docopy;
- }
+ for (i = 0; i < ctx->inpages; ++i)
+ if (rq->out[ctx->outpages - ctx->inpages + i] !=
+ rq->in[i])
+ goto docopy;
+ kunmap_local(inpage);
+ *maptype = 3;
+ return out + ((ctx->outpages - ctx->inpages) << PAGE_SHIFT);
}
if (ctx->inpages <= 1) {
@@ -148,7 +149,6 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
return inpage;
}
kunmap_local(inpage);
- might_sleep();
src = erofs_vm_map_ram(rq->in, ctx->inpages);
if (!src)
return ERR_PTR(-ENOMEM);
@@ -204,12 +204,12 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
}
static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
- u8 *out)
+ u8 *dst)
{
struct z_erofs_decompress_req *rq = ctx->rq;
bool support_0padding = false, may_inplace = false;
unsigned int inputmargin;
- u8 *headpage, *src;
+ u8 *out, *headpage, *src;
int ret, maptype;
DBG_BUGON(*rq->in == NULL);
@@ -230,11 +230,12 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
}
inputmargin = rq->pageofs_in;
- src = z_erofs_lz4_handle_overlap(ctx, headpage, &inputmargin,
+ src = z_erofs_lz4_handle_overlap(ctx, headpage, dst, &inputmargin,
&maptype, may_inplace);
if (IS_ERR(src))
return PTR_ERR(src);
+ out = dst + rq->pageofs_out;
/* legacy format could compress extra data in a pcluster. */
if (rq->partial_decoding || !support_0padding)
ret = LZ4_decompress_safe_partial(src + inputmargin, out,
@@ -265,7 +266,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
vm_unmap_ram(src, ctx->inpages);
} else if (maptype == 2) {
erofs_put_pcpubuf(src);
- } else {
+ } else if (maptype != 3) {
DBG_BUGON(1);
return -EFAULT;
}
@@ -308,7 +309,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
}
dstmap_out:
- ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out);
+ ret = z_erofs_lz4_decompress_mem(&ctx, dst);
if (!dst_maptype)
kunmap_local(dst);
else if (dst_maptype == 2)
Hi stable team - please don't take patches for fs/bcachefs/ except from
myself; I'll be doing backports and sending pull requests after stuff
has been tested by my CI.
Thanks, and let me know if there's any other workflow things I should
know about
-Kent
I'm new here, first time reporting a regression, apologies in advance if
I'm doing something wrong of if this was already reported (I found some
CIFS issues but not exactly this one).
I'm using x86-64 Arch Linux and LTS kernel (6.1.71 as I write this) and
I noticed a regression that I could reproduce in other boxes with other
architectures as well (aarch64 with 6.1.70).
# mount.cifs //server/share /mnt
# mount
//server/share on /mnt type cifs (rw,relatime,vers=3.1.1...)
# cd /mnt
# df .
df: .: Resource temporarily unavailable
# ls -al
ls: .: Resource temporarily unavailable
ls: file1: Resource temporarily unavailable
ls: file2: Resource temporarily unavailable
[...then ls shows the listing...]
If I use strace with df, the problem is:
statfs(".", 0x.....) = -1 EAGAIN (Resource temporarily unavailable)
And with ls:
listxattr(".", 0x..., 152): -1 EAGAIN (Resource temporarily unavailable)
listxattr("file1", ..., 152): -1 EAGAIN (same as above)
...
Initially I thought the problem was with the Samba server and/or the
client mount flags, but I've spent a day trying a *lot* of different
combinations and nothing worked. This happens with any share that I try,
and I've tried mounting shares from multiple Linux boxes running
different Samba and kernel versions.
Then I tried changing kernel versions at my client box. I booted latest
6.6.9 and the problem simply disappeared. My Debian server with 6.5.11
also doesn't have it. I then started a VM and tried a "bisection" of
6.1.x versions, leading to kernel 6.1.70 when this started to happen.
6.1.69 and older look fine.
I hope that this is enough information to reproduce this issue. I will
be glad to provide more info if necessary.
// Leonardo.
From: "Guilherme G. Piccoli" <gpiccoli(a)igalia.com>
[ Upstream commit e585a37e5061f6d5060517aed1ca4ccb2e56a34c ]
By running a Van Gogh device (Steam Deck), the following message
was noticed in the kernel log:
pci 0000:04:00.3: PCI class overridden (0x0c03fe -> 0x0c03fe) so dwc3 driver can claim this instead of xhci
Effectively this means the quirk executed but changed nothing, since the
class of this device was already the proper one (likely adjusted by newer
firmware versions).
Check and perform the override only if necessary.
Link: https://lore.kernel.org/r/20231120160531.361552-1-gpiccoli@igalia.com
Signed-off-by: Guilherme G. Piccoli <gpiccoli(a)igalia.com>
Signed-off-by: Bjorn Helgaas <bhelgaas(a)google.com>
Cc: Huang Rui <ray.huang(a)amd.com>
Cc: Vicki Pfau <vi(a)endrift.com>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
---
drivers/pci/quirks.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 8765544bac35..75b297c15cf5 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -607,10 +607,13 @@ static void quirk_amd_dwc_class(struct pci_dev *pdev)
{
u32 class = pdev->class;
- /* Use "USB Device (not host controller)" class */
- pdev->class = PCI_CLASS_SERIAL_USB_DEVICE;
- pci_info(pdev, "PCI class overridden (%#08x -> %#08x) so dwc3 driver can claim this instead of xhci\n",
- class, pdev->class);
+ if (class != PCI_CLASS_SERIAL_USB_DEVICE) {
+ /* Use "USB Device (not host controller)" class */
+ pdev->class = PCI_CLASS_SERIAL_USB_DEVICE;
+ pci_info(pdev,
+ "PCI class overridden (%#08x -> %#08x) so dwc3 driver can claim this instead of xhci\n",
+ class, pdev->class);
+ }
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB,
quirk_amd_dwc_class);
--
2.43.0
Hi all,
Eric reported that builds of LLVM with [1] (close to tip of tree) have
CONFIG_AS_HAS_OPTION_ARCH=n because the test for expected failure on
invalid input has started succeeding.
This Kconfig test was added because '.option arch' only causes an
assembler warning when it is unsupported, rather than a hard error,
which is what users of as-instr expect when something is unsupported.
This can be resolved by turning assembler warnings into errors with
'-Wa,--fatal-warnings' like we do with the compiler with '-Werror',
which is what the first patch does. The second patch removes the invalid
test, as the valid test is good enough with fatal warnings.
I have diffed several configurations for the different architectures
that use as-instr and I have found no issues.
I think this could go in through either the kbuild or RISC-V tree with
sufficient acks but I will let them fight over who takes it :)
[1]: https://github.com/llvm/llvm-project/commit/3ac9fe69f70a2b3541266daedbaaa7d…
---
Nathan Chancellor (2):
kbuild: Add -Wa,--fatal-warnings to as-instr invocation
RISC-V: Drop invalid test from CONFIG_AS_HAS_OPTION_ARCH
arch/riscv/Kconfig | 1 -
scripts/Kconfig.include | 2 +-
scripts/Makefile.compiler | 2 +-
3 files changed, 2 insertions(+), 3 deletions(-)
---
base-commit: 6613476e225e090cc9aad49be7fa504e290dd33d
change-id: 20240124-fix-riscv-option-arch-llvm-18-3cbe7b09a216
Best regards,
--
Nathan Chancellor <nathan(a)kernel.org>
The current logic is probably fine but is a bit convoluted. Plus, we
don't want partial pages to be part of the sequential operation just in
case the core would optimize the page read with a subpage read (which
would break the sequence). This may happen on the first and last page
only, so if the start offset or the end offset is not aligned with a
page boundary, better avoid them to prevent any risk.
Cc: stable(a)vger.kernel.org
Fixes: 003fe4b9545b ("mtd: rawnand: Support for sequential cache reads")
Signed-off-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
---
drivers/mtd/nand/raw/nand_base.c | 26 +++++++++++++++++---------
1 file changed, 17 insertions(+), 9 deletions(-)
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 139fdf3e58c0..bbdcfbe643f3 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -3460,21 +3460,29 @@ static void rawnand_enable_cont_reads(struct nand_chip *chip, unsigned int page,
u32 readlen, int col)
{
struct mtd_info *mtd = nand_to_mtd(chip);
+ unsigned int end_page, end_col;
+
+ chip->cont_read.ongoing = false;
if (!chip->controller->supported_op.cont_read)
return;
- if ((col && col + readlen < (3 * mtd->writesize)) ||
- (!col && readlen < (2 * mtd->writesize))) {
- chip->cont_read.ongoing = false;
- return;
- }
+ end_page = DIV_ROUND_UP(col + readlen, mtd->writesize);
+ end_col = (col + readlen) % mtd->writesize;
- chip->cont_read.ongoing = true;
- chip->cont_read.first_page = page;
if (col)
- chip->cont_read.first_page++;
- chip->cont_read.last_page = page + ((readlen >> chip->page_shift) & chip->pagemask);
+ page++;
+
+ if (end_col && end_page)
+ end_page--;
+
+ if (page + 1 > end_page)
+ return;
+
+ chip->cont_read.first_page = page;
+ chip->cont_read.last_page = end_page;
+ chip->cont_read.ongoing = true;
+
rawnand_cap_cont_reads(chip);
}
--
2.34.1
From: "Borislav Petkov (AMD)" <bp(a)alien8.de>
commit 04c3024560d3a14acd18d0a51a1d0a89d29b7eb5 upstream.
AMD does not have the requirement for a synchronization barrier when
acccessing a certain group of MSRs. Do not incur that unnecessary
penalty there.
There will be a CPUID bit which explicitly states that a MFENCE is not
needed. Once that bit is added to the APM, this will be extended with
it.
While at it, move to processor.h to avoid include hell. Untangling that
file properly is a matter for another day.
Some notes on the performance aspect of why this is relevant, courtesy
of Kishon VijayAbraham <Kishon.VijayAbraham(a)amd.com>:
On a AMD Zen4 system with 96 cores, a modified ipi-bench[1] on a VM
shows x2AVIC IPI rate is 3% to 4% lower than AVIC IPI rate. The
ipi-bench is modified so that the IPIs are sent between two vCPUs in the
same CCX. This also requires to pin the vCPU to a physical core to
prevent any latencies. This simulates the use case of pinning vCPUs to
the thread of a single CCX to avoid interrupt IPI latency.
In order to avoid run-to-run variance (for both x2AVIC and AVIC), the
below configurations are done:
1) Disable Power States in BIOS (to prevent the system from going to
lower power state)
2) Run the system at fixed frequency 2500MHz (to prevent the system
from increasing the frequency when the load is more)
With the above configuration:
*) Performance measured using ipi-bench for AVIC:
Average Latency: 1124.98ns [Time to send IPI from one vCPU to another vCPU]
Cumulative throughput: 42.6759M/s [Total number of IPIs sent in a second from
48 vCPUs simultaneously]
*) Performance measured using ipi-bench for x2AVIC:
Average Latency: 1172.42ns [Time to send IPI from one vCPU to another vCPU]
Cumulative throughput: 40.9432M/s [Total number of IPIs sent in a second from
48 vCPUs simultaneously]
From above, x2AVIC latency is ~4% more than AVIC. However, the expectation is
x2AVIC performance to be better or equivalent to AVIC. Upon analyzing
the perf captures, it is observed significant time is spent in
weak_wrmsr_fence() invoked by x2apic_send_IPI().
With the fix to skip weak_wrmsr_fence()
*) Performance measured using ipi-bench for x2AVIC:
Average Latency: 1117.44ns [Time to send IPI from one vCPU to another vCPU]
Cumulative throughput: 42.9608M/s [Total number of IPIs sent in a second from
48 vCPUs simultaneously]
Comparing the performance of x2AVIC with and without the fix, it can be seen
the performance improves by ~4%.
Performance captured using an unmodified ipi-bench using the 'mesh-ipi' option
with and without weak_wrmsr_fence() on a Zen4 system also showed significant
performance improvement without weak_wrmsr_fence(). The 'mesh-ipi' option ignores
CCX or CCD and just picks random vCPU.
Average throughput (10 iterations) with weak_wrmsr_fence(),
Cumulative throughput: 4933374 IPI/s
Average throughput (10 iterations) without weak_wrmsr_fence(),
Cumulative throughput: 6355156 IPI/s
[1] https://github.com/bytedance/kvm-utils/tree/master/microbenchmark/ipi-bench
Cc: stable(a)vger.kernel.org # 6.6+
Signed-off-by: Borislav Petkov (AMD) <bp(a)alien8.de>
Link: https://lore.kernel.org/r/20230622095212.20940-1-bp@alien8.de
Signed-off-by: Kishon Vijay Abraham I <kvijayab(a)amd.com>
---
Kindly merge this patch to stable releases (v6.6+) as it's a perf optimization.
[It does not apply as is on earlier releases and have to be reworked]
arch/x86/include/asm/barrier.h | 18 ------------------
arch/x86/include/asm/cpufeatures.h | 2 +-
arch/x86/include/asm/processor.h | 18 ++++++++++++++++++
arch/x86/kernel/cpu/amd.c | 3 +++
arch/x86/kernel/cpu/common.c | 7 +++++++
arch/x86/kernel/cpu/hygon.c | 3 +++
6 files changed, 32 insertions(+), 19 deletions(-)
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 35389b2af88e..0216f63a366b 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -81,22 +81,4 @@ do { \
#include <asm-generic/barrier.h>
-/*
- * Make previous memory operations globally visible before
- * a WRMSR.
- *
- * MFENCE makes writes visible, but only affects load/store
- * instructions. WRMSR is unfortunately not a load/store
- * instruction and is unaffected by MFENCE. The LFENCE ensures
- * that the WRMSR is not reordered.
- *
- * Most WRMSRs are full serializing instructions themselves and
- * do not require this barrier. This is only required for the
- * IA32_TSC_DEADLINE and X2APIC MSRs.
- */
-static inline void weak_wrmsr_fence(void)
-{
- asm volatile("mfence; lfence" : : : "memory");
-}
-
#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 58cb9495e40f..0091f1008314 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -308,10 +308,10 @@
#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */
-
#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
+#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index a3669a7774ed..191f1d8f0506 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -734,4 +734,22 @@ bool arch_is_platform_page(u64 paddr);
extern bool gds_ucode_mitigated(void);
+/*
+ * Make previous memory operations globally visible before
+ * a WRMSR.
+ *
+ * MFENCE makes writes visible, but only affects load/store
+ * instructions. WRMSR is unfortunately not a load/store
+ * instruction and is unaffected by MFENCE. The LFENCE ensures
+ * that the WRMSR is not reordered.
+ *
+ * Most WRMSRs are full serializing instructions themselves and
+ * do not require this barrier. This is only required for the
+ * IA32_TSC_DEADLINE and X2APIC MSRs.
+ */
+static inline void weak_wrmsr_fence(void)
+{
+ alternative("mfence; lfence", "", ALT_NOT(X86_FEATURE_APIC_MSRS_FENCE));
+}
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 6e4f23f314ac..bb3efc825bf4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1157,6 +1157,9 @@ static void init_amd(struct cpuinfo_x86 *c)
if (!cpu_has(c, X86_FEATURE_HYPERVISOR) &&
cpu_has_amd_erratum(c, amd_erratum_1485))
msr_set_bit(MSR_ZEN4_BP_CFG, MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT);
+
+ /* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
+ clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4e5ffc8b0e46..d98d023ae497 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1858,6 +1858,13 @@ static void identify_cpu(struct cpuinfo_x86 *c)
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
#endif
+
+ /*
+ * Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and
+ * Hygon will clear it in ->c_init() below.
+ */
+ set_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
+
/*
* Vendor-specific initialization. In this section we
* canonicalize the feature flags, meaning if there are
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index a7b3ef4c4de9..6e738759779e 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -348,6 +348,9 @@ static void init_hygon(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
check_null_seg_clears_base(c);
+
+ /* Hygon CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
+ clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);
}
static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c)
--
2.34.1