Make num_cache_leaves a per-CPU variable. Otherwise, populate_cache_
leaves() fails on systems with asymmetric number of subleaves in CPUID
leaf 0x4. Intel Meteor Lake is an example of such a system.
Cc: Srinivas Pandruvada <srinivas.pandruvada(a)linux.intel.com>
Cc: Len Brown <len.brown(a)intel.com>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki(a)intel.com>
Cc: Zhang Rui <rui.zhang(a)intel.com>
Cc: Chen Yu <yu.c.chen(a)intel.com>
Cc: stable(a)vger.kernel.org
Reviewed-by: Len Brown <len.brown(a)intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon(a)linux.intel.com>
---
After this change, all CPUs will traverse CPUID leaf 0x4 when booted for
the first time. On systems with asymmetric cache topologies this is
useless work.
Creating a list of processor models that have asymmetric cache topologies
was considered. The burden of maintaining such list would outweigh the
performance benefit of skipping this extra step.
---
arch/x86/kernel/cpu/cacheinfo.c | 48 ++++++++++++++++++++-------------
1 file changed, 29 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 4063e8991211..6ad51657c853 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -176,7 +176,18 @@ struct _cpuid4_info_regs {
struct amd_northbridge *nb;
};
-static unsigned short num_cache_leaves;
+static DEFINE_PER_CPU(unsigned short, num_cache_leaves);
+
+static inline unsigned short get_num_cache_leaves(unsigned int cpu)
+{
+ return per_cpu(num_cache_leaves, cpu);
+}
+
+static inline void
+set_num_cache_leaves(unsigned short nr_leaves, unsigned int cpu)
+{
+ per_cpu(num_cache_leaves, cpu) = nr_leaves;
+}
/* AMD doesn't have CPUID4. Emulate it here to report the same
information to the user. This makes some assumptions about the machine:
@@ -716,19 +727,21 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu)
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
+ unsigned int cpu = c->cpu_index;
+
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
- num_cache_leaves = find_num_cache_leaves(c);
+ set_num_cache_leaves(find_num_cache_leaves(c), cpu);
} else if (c->extended_cpuid_level >= 0x80000006) {
if (cpuid_edx(0x80000006) & 0xf000)
- num_cache_leaves = 4;
+ set_num_cache_leaves(4, cpu);
else
- num_cache_leaves = 3;
+ set_num_cache_leaves(3, cpu);
}
}
void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
{
- num_cache_leaves = find_num_cache_leaves(c);
+ set_num_cache_leaves(find_num_cache_leaves(c), c->cpu_index);
}
void init_intel_cacheinfo(struct cpuinfo_x86 *c)
@@ -738,24 +751,21 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
-#ifdef CONFIG_SMP
unsigned int cpu = c->cpu_index;
-#endif
if (c->cpuid_level > 3) {
- static int is_initialized;
-
- if (is_initialized == 0) {
- /* Init num_cache_leaves from boot CPU */
- num_cache_leaves = find_num_cache_leaves(c);
- is_initialized++;
- }
+ /*
+ * There should be at least one leaf. A non-zero value means
+ * that the number of leaves has been initialized.
+ */
+ if (!get_num_cache_leaves(cpu))
+ set_num_cache_leaves(find_num_cache_leaves(c), cpu);
/*
* Whenever possible use cpuid(4), deterministic cache
* parameters cpuid leaf to find the cache details
*/
- for (i = 0; i < num_cache_leaves; i++) {
+ for (i = 0; i < get_num_cache_leaves(cpu); i++) {
struct _cpuid4_info_regs this_leaf = {};
int retval;
@@ -791,14 +801,14 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
* Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
* trace cache
*/
- if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
+ if ((!get_num_cache_leaves(cpu) || c->x86 == 15) && c->cpuid_level > 1) {
/* supports eax=2 call */
int j, n;
unsigned int regs[4];
unsigned char *dp = (unsigned char *)regs;
int only_trace = 0;
- if (num_cache_leaves != 0 && c->x86 == 15)
+ if (get_num_cache_leaves(cpu) && c->x86 == 15)
only_trace = 1;
/* Number of times to iterate */
@@ -1000,12 +1010,12 @@ int init_cache_level(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
- if (!num_cache_leaves)
+ if (!get_num_cache_leaves(cpu))
return -ENOENT;
if (!this_cpu_ci)
return -EINVAL;
this_cpu_ci->num_levels = 3;
- this_cpu_ci->num_leaves = num_cache_leaves;
+ this_cpu_ci->num_leaves = get_num_cache_leaves(cpu);
return 0;
}
--
2.25.1
From: Sasha Finkelstein <fnkl.kernel(a)gmail.com>
This patch fixes an incorrect loop exit condition in code that replaces
'/' symbols in the board name. There might also be a memory corruption
issue here, but it is unlikely to be a real problem.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Sasha Finkelstein <fnkl.kernel(a)gmail.com>
---
drivers/bluetooth/btbcm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 3006e2a0f37e..43e98a598bd9 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -511,7 +511,7 @@ static const char *btbcm_get_board_name(struct device *dev)
len = strlen(tmp) + 1;
board_type = devm_kzalloc(dev, len, GFP_KERNEL);
strscpy(board_type, tmp, len);
- for (i = 0; i < board_type[i]; i++) {
+ for (i = 0; i < len; i++) {
if (board_type[i] == '/')
board_type[i] = '-';
}
---
base-commit: fe15c26ee26efa11741a7b632e9f23b01aca4cc6
change-id: 20230224-btbcm-wtf-ff32fed3e930
Best regards,
--
Sasha Finkelstein <fnkl.kernel(a)gmail.com>
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
A while ago where the trace events had the following:
rcu_read_lock_sched_notrace();
rcu_dereference_sched(...);
rcu_read_unlock_sched_notrace();
If the tracepoint is enabled, it could trigger RCU issues if called in
the wrong place. And this warning was only triggered if lockdep was
enabled. If the tracepoint was never enabled with lockdep, the bug would
not be caught. To handle this, the above sequence was done when lockdep
was enabled regardless if the tracepoint was enabled or not (although the
always enabled code really didn't do anything, it would still trigger a
warning).
But a lot has changed since that lockdep code was added. One is, that
sequence no longer triggers any warning. Another is, the tracepoint when
enabled doesn't even do that sequence anymore.
The main check we care about today is whether RCU is "watching" or not.
So if lockdep is enabled, always check if rcu_is_watching() which will
trigger a warning if it is not (tracepoints require RCU to be watching).
Note, that old sequence did add a bit of overhead when lockdep was enabled,
and with the latest kernel updates, would cause the system to slow down
enough to trigger kernel "stalled" warnings.
Link: http://lore.kernel.org/lkml/20140806181801.GA4605@redhat.com
Link: http://lore.kernel.org/lkml/20140807175204.C257CAC5@viggo.jf.intel.com
Link: https://lore.kernel.org/lkml/20230307184645.521db5c9@gandalf.local.home/
Link: https://lore.kernel.org/linux-trace-kernel/20230310172856.77406446@gandalf.…
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Dave Hansen <dave.hansen(a)linux.intel.com>
Cc: "Paul E. McKenney" <paulmck(a)kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com>
Cc: Joel Fernandes <joel(a)joelfernandes.org>
Acked-by: Peter Zijlstra (Intel) <peterz(a)infradead.org>
Fixes: e6753f23d961 ("tracepoint: Make rcuidle tracepoint callers use SRCU")
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
include/linux/tracepoint.h | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index fa1004fcf810..2083f2d2f05b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -231,12 +231,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
* not add unwanted padding between the beginning of the section and the
* structure. Force alignment to the same alignment as the section start.
*
- * When lockdep is enabled, we make sure to always do the RCU portions of
- * the tracepoint code, regardless of whether tracing is on. However,
- * don't check if the condition is false, due to interaction with idle
- * instrumentation. This lets us find RCU issues triggered with tracepoints
- * even when this tracepoint is off. This code has no purpose other than
- * poking RCU a bit.
+ * When lockdep is enabled, we make sure to always test if RCU is
+ * "watching" regardless if the tracepoint is enabled or not. Tracepoints
+ * require RCU to be active, and it should always warn at the tracepoint
+ * site if it is not watching, as it will need to be active when the
+ * tracepoint is enabled.
*/
#define __DECLARE_TRACE(name, proto, args, cond, data_proto) \
extern int __traceiter_##name(data_proto); \
@@ -249,9 +248,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
TP_ARGS(args), \
TP_CONDITION(cond), 0); \
if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \
- rcu_read_lock_sched_notrace(); \
- rcu_dereference_sched(__tracepoint_##name.funcs);\
- rcu_read_unlock_sched_notrace(); \
+ WARN_ON_ONCE(!rcu_is_watching()); \
} \
} \
__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args), \
--
2.39.1
From: Chen Zhongjin <chenzhongjin(a)huawei.com>
KASAN reported follow problem:
BUG: KASAN: use-after-free in lookup_rec
Read of size 8 at addr ffff000199270ff0 by task modprobe
CPU: 2 Comm: modprobe
Call trace:
kasan_report
__asan_load8
lookup_rec
ftrace_location
arch_check_ftrace_location
check_kprobe_address_safe
register_kprobe
When checking pg->records[pg->index - 1].ip in lookup_rec(), it can get a
pg which is newly added to ftrace_pages_start in ftrace_process_locs().
Before the first pg->index++, index is 0 and accessing pg->records[-1].ip
will cause this problem.
Don't check the ip when pg->index is 0.
Link: https://lore.kernel.org/linux-trace-kernel/20230309080230.36064-1-chenzhong…
Cc: stable(a)vger.kernel.org
Fixes: 9644302e3315 ("ftrace: Speed up search by skipping pages by address")
Suggested-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
Signed-off-by: Chen Zhongjin <chenzhongjin(a)huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/ftrace.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 750aa3f08b25..a47f7d93e32d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1537,7 +1537,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
key.flags = end; /* overload flags, as it is unsigned long */
for (pg = ftrace_pages_start; pg; pg = pg->next) {
- if (end < pg->records[0].ip ||
+ if (pg->index == 0 ||
+ end < pg->records[0].ip ||
start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
continue;
rec = bsearch(&key, pg->records, pg->index,
--
2.39.1
From: "Steven Rostedt (Google)" <rostedt(a)goodmis.org>
The function hist_field_name() cannot handle being passed a NULL field
parameter. It should never be NULL, but due to a previous bug, NULL was
passed to the function and the kernel crashed due to a NULL dereference.
Mark Rutland reported this to me on IRC.
The bug was fixed, but to prevent future bugs from crashing the kernel,
check the field and add a WARN_ON() if it is NULL.
Link: https://lkml.kernel.org/r/20230302020810.762384440@goodmis.org
Cc: stable(a)vger.kernel.org
Cc: Masami Hiramatsu <mhiramat(a)kernel.org>
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Reported-by: Mark Rutland <mark.rutland(a)arm.com>
Fixes: c6afad49d127f ("tracing: Add hist trigger 'sym' and 'sym-offset' modifiers")
Tested-by: Mark Rutland <mark.rutland(a)arm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org>
---
kernel/trace/trace_events_hist.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 6e8ab726a7b5..486cca3c2b75 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1331,6 +1331,9 @@ static const char *hist_field_name(struct hist_field *field,
{
const char *field_name = "";
+ if (WARN_ON_ONCE(!field))
+ return field_name;
+
if (level > 1)
return field_name;
--
2.39.1