Linux remembers cpu_cachinfo::num_leaves per CPU, but x86 initializes all CPUs from the same global "num_cache_leaves".
This is erroneous on systems like Meteor Lake, which has different num_leaves per CPU. Delete the global "num_cache_leaves" and initialize num_leaves accurately on each CPU.
Cc: Andreas Herrmann aherrmann@suse.com Cc: Catalin Marinas catalin.marinas@arm.com Cc: Chen Yu yu.c.chen@intel.com Cc: Len Brown len.brown@intel.com Cc: Radu Rendec rrendec@redhat.com Cc: Pierre Gondois Pierre.Gondois@arm.com Cc: Pu Wen puwen@hygon.cn Cc: "Rafael J. Wysocki" rafael.j.wysocki@intel.com Cc: Sudeep Holla sudeep.holla@arm.com Cc: Srinivas Pandruvada srinivas.pandruvada@linux.intel.com Cc: Will Deacon will@kernel.org Cc: Zhang Rui rui.zhang@intel.com Cc: linux-arm-kernel@lists.infradead.org Cc: stable@vger.kernel.org Reviewed-by: Len Brown len.brown@intel.com Signed-off-by: Ricardo Neri ricardo.neri-calderon@linux.intel.com --- After this change, all CPUs will traverse CPUID leaf 0x4 when booted for the first time. On systems with symmetric cache topologies this is useless work.
Creating a list of processor models that have asymmetric cache topologies was considered. The burden of maintaining such list would outweigh the performance benefit of skipping this extra step. --- Changes since v2: * None
Changes since v1: * Do not make num_cache_leaves a per-CPU variable. Instead, reuse the existing per-CPU ci_cpu_cacheinfo variable. (Dave Hansen) --- arch/x86/kernel/cpu/cacheinfo.c | 45 ++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 20 deletions(-)
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 8f86eacf69f7..b4334c529231 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -178,7 +178,16 @@ struct _cpuid4_info_regs { struct amd_northbridge *nb; };
-static unsigned short num_cache_leaves; +static inline unsigned int get_num_cache_leaves(unsigned int cpu) +{ + return get_cpu_cacheinfo(cpu)->num_leaves; +} + +static inline void +set_num_cache_leaves(unsigned int nr_leaves, unsigned int cpu) +{ + get_cpu_cacheinfo(cpu)->num_leaves = nr_leaves; +}
/* AMD doesn't have CPUID4. Emulate it here to report the same information to the user. This makes some assumptions about the machine: @@ -718,19 +727,21 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu) void init_amd_cacheinfo(struct cpuinfo_x86 *c) {
+ unsigned int cpu = c->cpu_index; + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - num_cache_leaves = find_num_cache_leaves(c); + set_num_cache_leaves(find_num_cache_leaves(c), cpu); } else if (c->extended_cpuid_level >= 0x80000006) { if (cpuid_edx(0x80000006) & 0xf000) - num_cache_leaves = 4; + set_num_cache_leaves(4, cpu); else - num_cache_leaves = 3; + set_num_cache_leaves(3, cpu); } }
void init_hygon_cacheinfo(struct cpuinfo_x86 *c) { - num_cache_leaves = find_num_cache_leaves(c); + set_num_cache_leaves(find_num_cache_leaves(c), c->cpu_index); }
void init_intel_cacheinfo(struct cpuinfo_x86 *c) @@ -740,24 +751,21 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; -#ifdef CONFIG_SMP unsigned int cpu = c->cpu_index; -#endif
if (c->cpuid_level > 3) { - static int is_initialized; - - if (is_initialized == 0) { - /* Init num_cache_leaves from boot CPU */ - num_cache_leaves = find_num_cache_leaves(c); - is_initialized++; - } + /* + * There should be at least one leaf. A non-zero value means + * that the number of leaves has been initialized. + */ + if (!get_num_cache_leaves(cpu)) + set_num_cache_leaves(find_num_cache_leaves(c), cpu);
/* * Whenever possible use cpuid(4), deterministic cache * parameters cpuid leaf to find the cache details */ - for (i = 0; i < num_cache_leaves; i++) { + for (i = 0; i < get_num_cache_leaves(cpu); i++) { struct _cpuid4_info_regs this_leaf = {}; int retval;
@@ -793,14 +801,14 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for * trace cache */ - if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { + if ((!get_num_cache_leaves(cpu) || c->x86 == 15) && c->cpuid_level > 1) { /* supports eax=2 call */ int j, n; unsigned int regs[4]; unsigned char *dp = (unsigned char *)regs; int only_trace = 0;
- if (num_cache_leaves != 0 && c->x86 == 15) + if (get_num_cache_leaves(cpu) && c->x86 == 15) only_trace = 1;
/* Number of times to iterate */ @@ -1002,12 +1010,9 @@ int init_cache_level(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
- if (!num_cache_leaves) - return -ENOENT; if (!this_cpu_ci) return -EINVAL; this_cpu_ci->num_levels = 3; - this_cpu_ci->num_leaves = num_cache_leaves; return 0; }