----- On Jan 8, 2022, at 11:43 AM, Andy Lutomirski luto@kernel.org wrote:
The old sync_core_before_usermode() comments suggested that a non-icache-syncing return-to-usermode instruction is x86-specific and that all other architectures automatically notice cross-modified code on return to userspace.
This is misleading. The incantation needed to modify code from one CPU and execute it on another CPU is highly architecture dependent. On x86, according to the SDM, one must modify the code, issue SFENCE if the modification was WC or nontemporal, and then issue a "serializing instruction" on the CPU that will execute the code. membarrier() can do the latter.
On arm, arm64 and powerpc, one must flush the icache and then flush the pipeline on the target CPU, although the CPU manuals don't necessarily use this language.
So let's drop any pretense that we can have a generic way to define or implement membarrier's SYNC_CORE operation and instead require all architectures to define the helper and supply their own documentation as to how to use it. This means x86, arm64, and powerpc for now. Let's also rename the function from sync_core_before_usermode() to membarrier_sync_core_before_usermode() because the precise flushing details may very well be specific to membarrier, and even the concept of "sync_core" in the kernel is mostly an x86-ism.
(It may well be the case that, on real x86 processors, synchronizing the icache (which requires no action at all) and "flushing the pipeline" is sufficient, but trying to use this language would be confusing at best. LFENCE does something awfully like "flushing the pipeline", but the SDM does not permit LFENCE as an alternative to a "serializing instruction" for this purpose.)
A few comments below:
[...]
+# On powerpc, a program can use MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE +# similarly to arm64. It would be nice if the powerpc maintainers could +# add a more clear explanantion.
Any thoughts from ppc maintainers ?
[...]
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index e9da3dc71254..b47cd22b2eb1 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -17,7 +17,7 @@ #include <linux/kprobes.h> #include <linux/mmu_context.h> #include <linux/bsearch.h> -#include <linux/sync_core.h> +#include <asm/sync_core.h>
All this churn wrt move from linux/sync_core.h to asm/sync_core.h should probably be moved to a separate cleanup patch.
#include <asm/text-patching.h> #include <asm/alternative.h> #include <asm/sections.h> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 193204aee880..a2529e09f620 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -41,12 +41,12 @@ #include <linux/irq_work.h> #include <linux/export.h> #include <linux/set_memory.h> -#include <linux/sync_core.h> #include <linux/task_work.h> #include <linux/hardirq.h>
#include <asm/intel-family.h> #include <asm/processor.h> +#include <asm/sync_core.h> #include <asm/traps.h> #include <asm/tlbflush.h> #include <asm/mce.h>
[...]
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index d7ef61e602ed..462c667bd6c4 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -20,8 +20,8 @@ #include <linux/io.h> #include <linux/uaccess.h> #include <linux/security.h> -#include <linux/sync_core.h> #include <linux/prefetch.h> +#include <asm/sync_core.h> #include "gru.h" #include "grutables.h" #include "grulib.h" diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c index 1d75d5e540bc..c8cba1c1b00f 100644 --- a/drivers/misc/sgi-gru/gruhandles.c +++ b/drivers/misc/sgi-gru/gruhandles.c @@ -16,7 +16,7 @@ #define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) #define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq) #else -#include <linux/sync_core.h> +#include <asm/sync_core.h> #include <asm/tsc.h> #define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) #define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz) diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c index 0ea923fe6371..ce03ff3f7c3a 100644 --- a/drivers/misc/sgi-gru/grukservices.c +++ b/drivers/misc/sgi-gru/grukservices.c @@ -16,10 +16,10 @@ #include <linux/miscdevice.h> #include <linux/proc_fs.h> #include <linux/interrupt.h> -#include <linux/sync_core.h> #include <linux/uaccess.h> #include <linux/delay.h> #include <linux/export.h> +#include <asm/sync_core.h> #include <asm/io_apic.h> #include "gru.h" #include "grulib.h" diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index e8919995d8dd..e107f292fc42 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -7,7 +7,6 @@ #include <linux/sched.h> #include <linux/mm_types.h> #include <linux/gfp.h> -#include <linux/sync_core.h>
/*
- Routines for handling mm_structs
diff --git a/include/linux/sync_core.h b/include/linux/sync_core.h deleted file mode 100644 index 013da4b8b327..000000000000 --- a/include/linux/sync_core.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_SYNC_CORE_H -#define _LINUX_SYNC_CORE_H
-#ifdef CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE -#include <asm/sync_core.h> -#else -/*
- This is a dummy sync_core_before_usermode() implementation that can be used
- on all architectures which return to user-space through core serializing
- instructions.
- If your architecture returns to user-space through non-core-serializing
- instructions, you need to write your own functions.
- */
-static inline void sync_core_before_usermode(void) -{ -} -#endif
-#endif /* _LINUX_SYNC_CORE_H */
Thanks,
Mathieu