From: Steven Rostedt (Google) rostedt@goodmis.org
commit 795301d3c28996219d555023ac6863401b6076bc upstream.
When an enum is used in the visible parts of a trace event that is exported to user space, the user space applications like perf and trace-cmd do not have a way to know what the value of the enum is. To solve this, at boot up (or module load) the printk formats are modified to replace the enum with their numeric value in the string output.
Array fields of the event are defined by [<nr-elements>] in the type portion of the format file so that the user space parsers can correctly parse the array into the appropriate size chunks. But in some trace events, an enum is used in defining the size of the array, which once again breaks the parsing of user space tooling.
This was solved the same way as the print formats were, but it modified the type strings of the trace event. This caused crashes in some architectures because, as supposed to the print string, is a const string value. This was not detected on x86, as it appears that const strings are still writable (at least in boot up), but other architectures this is not the case, and writing to a const string will cause a kernel fault.
To fix this, use kstrdup() to copy the type before modifying it. If the trace event is for the core kernel there's no need to free it because the string will be in use for the life of the machine being on line. For modules, create a link list to store all the strings being allocated for modules and when the module is removed, free them.
Link: https://lore.kernel.org/all/yt9dr1706b4i.fsf@linux.ibm.com/ Link: https://lkml.kernel.org/r/20220318153432.3984b871@gandalf.local.home
Tested-by: Marc Zyngier maz@kernel.org Tested-by: Sven Schnelle svens@linux.ibm.com Reported-by: Sven Schnelle svens@linux.ibm.com Fixes: b3bc8547d3be ("tracing: Have TRACE_DEFINE_ENUM affect trace event types as well") Signed-off-by: Steven Rostedt (Google) rostedt@goodmis.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- kernel/trace/trace_events.c | 62 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-)
--- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -40,6 +40,14 @@ static LIST_HEAD(ftrace_generic_fields); static LIST_HEAD(ftrace_common_fields); static bool eventdir_initialized;
+static LIST_HEAD(module_strings); + +struct module_string { + struct list_head next; + struct module *module; + char *str; +}; + #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
static struct kmem_cache *field_cachep; @@ -2637,14 +2645,40 @@ static void update_event_printk(struct t } }
+static void add_str_to_module(struct module *module, char *str) +{ + struct module_string *modstr; + + modstr = kmalloc(sizeof(*modstr), GFP_KERNEL); + + /* + * If we failed to allocate memory here, then we'll just + * let the str memory leak when the module is removed. + * If this fails to allocate, there's worse problems than + * a leaked string on module removal. + */ + if (WARN_ON_ONCE(!modstr)) + return; + + modstr->module = module; + modstr->str = str; + + list_add(&modstr->next, &module_strings); +} + static void update_event_fields(struct trace_event_call *call, struct trace_eval_map *map) { struct ftrace_event_field *field; struct list_head *head; char *ptr; + char *str; int len = strlen(map->eval_string);
+ /* Dynamic events should never have field maps */ + if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC)) + return; + head = trace_get_fields(call); list_for_each_entry(field, head, link) { ptr = strchr(field->type, '['); @@ -2658,9 +2692,26 @@ static void update_event_fields(struct t if (strncmp(map->eval_string, ptr, len) != 0) continue;
+ str = kstrdup(field->type, GFP_KERNEL); + if (WARN_ON_ONCE(!str)) + return; + ptr = str + (ptr - field->type); ptr = eval_replace(ptr, map, len); /* enum/sizeof string smaller than value */ - WARN_ON_ONCE(!ptr); + if (WARN_ON_ONCE(!ptr)) { + kfree(str); + continue; + } + + /* + * If the event is part of a module, then we need to free the string + * when the module is removed. Otherwise, it will stay allocated + * until a reboot. + */ + if (call->module) + add_str_to_module(call->module, str); + + field->type = str; } }
@@ -2883,6 +2934,7 @@ static void trace_module_add_events(stru static void trace_module_remove_events(struct module *mod) { struct trace_event_call *call, *p; + struct module_string *modstr, *m;
down_write(&trace_event_sem); list_for_each_entry_safe(call, p, &ftrace_events, list) { @@ -2891,6 +2943,14 @@ static void trace_module_remove_events(s if (call->module == mod) __trace_remove_event_call(call); } + /* Check for any strings allocade for this module */ + list_for_each_entry_safe(modstr, m, &module_strings, next) { + if (modstr->module != mod) + continue; + list_del(&modstr->next); + kfree(modstr->str); + kfree(modstr); + } up_write(&trace_event_sem);
/*