Add array type support for probe events. This allows user to get arraied types from memory address. The array type syntax is
TYPE[N]
Where TYPE is one of types (u8/16/32/64,s8/16/32/64, x8/16/32/64, symbol, string) and N is a fixed value less than 64.
The string array type is a bit different from other types. For other base types, <base-type>[1] is equal to <base-type> (e.g. +0(%di):x32[1] is same as +0(%di):x32.) But string[1] is not equal to string. The string type itself represents "char array", but string array type represents "char * array". So, for example, +0(%di):string[1] is equal to +0(+0(%di)):string.
Signed-off-by: Masami Hiramatsu mhiramat@kernel.org --- Changes in v6: - Fix to add escaping backslash to a brace for syntax in README file. Changes in v4: - Fix to use calculated size correctly for field definition. (Thank you Namhyung!) Changes in v2: - Add array description in README file - Fix to init s3 code out of loop. - Fix to proceed code when the last code is OP_ARRAY. - Add string array type and bitfield array type. --- Documentation/trace/kprobetrace.rst | 11 +++ kernel/trace/trace.c | 3 + kernel/trace/trace_probe.c | 130 +++++++++++++++++++++++++++-------- kernel/trace/trace_probe.h | 14 ++++ kernel/trace/trace_probe_tmpl.h | 63 +++++++++++++++-- 5 files changed, 181 insertions(+), 40 deletions(-)
diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index 30eb37391064..b9e9198ea95d 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -66,9 +66,20 @@ respectively. 'x' prefix implies it is unsigned. Traced arguments are shown in decimal ('s' and 'u') or hexadecimal ('x'). Without type casting, 'x32' or 'x64' is used depends on the architecture (e.g. x86-32 uses x32, and x86-64 uses x64). +These value types can be an array. To record array data, you can add '[N]' +(where N is a fixed number, less than 64) to the base type. +E.g. 'x16[4]' means an array of x16 (2bytes hex) with 4 elements. +Note that the array can be applied to memory type fetchargs, you can not +apply it to registers/stack-entries etc. (for example, '$stack1:x8[8]' is +wrong, but '+8($stack):x8[8]' is OK.) String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. +The string array type is a bit different from other types. For other base +types, <base-type>[1] is equal to <base-type> (e.g. +0(%di):x32[1] is same +as +0(%di):x32.) But string[1] is not equal to string. The string type itself +represents "char array", but string array type represents "char * array". +So, for example, +0(%di):string[1] is equal to +0(+0(%di)):string. Bitfield is another special type, which takes 3 parameters, bit-width, bit- offset, and container-size (usually 32). The syntax is::
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7add9bfdff76..f34d026689c7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4626,7 +4626,8 @@ static const char readme_msg[] = "\t $stack<index>, $stack, $retval, $comm\n" #endif "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n" - "\t b<bit-width>@<bit-offset>/<container-size>\n" + "\t b<bit-width>@<bit-offset>/<container-size>,\n" + "\t <type>\[<array-size>\]\n" #endif " events/\t\t- Directory containing all trace event subsystems:\n" " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 9458800f394a..0e185050e492 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -363,9 +363,9 @@ static int __parse_bitfield_probe_arg(const char *bf, int traceprobe_parse_probe_arg(char *arg, ssize_t *size, struct probe_arg *parg, unsigned int flags) { - struct fetch_insn *code, *tmp = NULL; - const char *t; - int ret; + struct fetch_insn *code, *scode, *tmp = NULL; + char *t, *t2; + int ret, len;
if (strlen(arg) > MAX_ARGSTR_LEN) { pr_info("Argument is too long.: %s\n", arg); @@ -376,24 +376,42 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, pr_info("Failed to allocate memory for command '%s'.\n", arg); return -ENOMEM; } - t = strchr(parg->comm, ':'); + t = strchr(arg, ':'); if (t) { - arg[t - parg->comm] = '\0'; - t++; + *t = '\0'; + t2 = strchr(++t, '['); + if (t2) { + *t2 = '\0'; + parg->count = simple_strtoul(t2 + 1, &t2, 0); + if (strcmp(t2, "]") || parg->count == 0) + return -EINVAL; + if (parg->count > MAX_ARRAY_LEN) + return -E2BIG; + } } /* * The default type of $comm should be "string", and it can't be * dereferenced. */ if (!t && strcmp(arg, "$comm") == 0) - t = "string"; - parg->type = find_fetch_type(t); + parg->type = find_fetch_type("string"); + else + parg->type = find_fetch_type(t); if (!parg->type) { pr_info("Unsupported type: %s\n", t); return -EINVAL; } parg->offset = *size; - *size += parg->type->size; + *size += parg->type->size * (parg->count ?: 1); + + if (parg->count) { + len = strlen(parg->type->fmttype) + 6; + parg->fmt = kmalloc(len, GFP_KERNEL); + if (!parg->fmt) + return -ENOMEM; + snprintf(parg->fmt, len, "%s[%d]", parg->type->fmttype, + parg->count); + }
code = tmp = kzalloc(sizeof(*code) * FETCH_INSN_MAX, GFP_KERNEL); if (!code) @@ -413,10 +431,20 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, ret = -EINVAL; goto fail; } - /* Since IMM or COMM must be the 1st insn, this is safe */ - if (code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) + if (code->op != FETCH_OP_DEREF || parg->count) { + /* + * IMM and COMM is pointing actual address, those must + * be kept, and if parg->count != 0, this is an array + * of string pointers instead of string address itself. + */ code++; + if (code->op != FETCH_OP_NOP) { + ret = -E2BIG; + goto fail; + } + } code->op = FETCH_OP_ST_STRING; /* In DEREF case, replace it */ + code->size = parg->type->size; parg->dynamic = true; } else if (code->op == FETCH_OP_DEREF) { code->op = FETCH_OP_ST_MEM; @@ -430,12 +458,29 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size, code->op = FETCH_OP_ST_RAW; code->size = parg->type->size; } + scode = code; /* Modify operation */ if (t != NULL) { ret = __parse_bitfield_probe_arg(t, parg->type, &code); if (ret) goto fail; } + /* Loop(Array) operation */ + if (parg->count) { + if (scode->op != FETCH_OP_ST_MEM && + scode->op != FETCH_OP_ST_STRING) { + pr_info("array only accepts memory or address\n"); + ret = -EINVAL; + goto fail; + } + code++; + if (code->op != FETCH_OP_NOP) { + ret = -E2BIG; + goto fail; + } + code->op = FETCH_OP_LP_ARRAY; + code->param = parg->count; + } code++; code->op = FETCH_OP_END;
@@ -474,14 +519,17 @@ void traceprobe_free_probe_arg(struct probe_arg *arg) kfree(arg->code); kfree(arg->name); kfree(arg->comm); + kfree(arg->fmt); }
+/* When len=0, we just calculate the needed length */ +#define LEN_OR_ZERO (len ? len - pos : 0) static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, bool is_return) { - int i; + struct probe_arg *parg; + int i, j; int pos = 0; - const char *fmt, *arg;
if (!is_return) { @@ -492,33 +540,49 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len, arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; }
- /* When len=0, we just calculate the needed length */ -#define LEN_OR_ZERO (len ? len - pos : 0) - pos += snprintf(buf + pos, LEN_OR_ZERO, ""%s", fmt);
for (i = 0; i < tp->nr_args; i++) { - pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", - tp->args[i].name, tp->args[i].type->fmt); + parg = tp->args + i; + pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=", parg->name); + if (parg->count) { + pos += snprintf(buf + pos, LEN_OR_ZERO, "{%s", + parg->type->fmt); + for (j = 1; j < parg->count; j++) + pos += snprintf(buf + pos, LEN_OR_ZERO, ",%s", + parg->type->fmt); + pos += snprintf(buf + pos, LEN_OR_ZERO, "}"); + } else + pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", + parg->type->fmt); }
pos += snprintf(buf + pos, LEN_OR_ZERO, "", %s", arg);
for (i = 0; i < tp->nr_args; i++) { - if (strcmp(tp->args[i].type->name, "string") == 0) + parg = tp->args + i; + if (parg->count) { + if (strcmp(parg->type->name, "string") == 0) + fmt = ", __get_str(%s[%d])"; + else + fmt = ", REC->%s[%d]"; + for (j = 0; j < parg->count; j++) + pos += snprintf(buf + pos, LEN_OR_ZERO, + fmt, parg->name, j); + } else { + if (strcmp(parg->type->name, "string") == 0) + fmt = ", __get_str(%s)"; + else + fmt = ", REC->%s"; pos += snprintf(buf + pos, LEN_OR_ZERO, - ", __get_str(%s)", - tp->args[i].name); - else - pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", - tp->args[i].name); + fmt, parg->name); + } }
-#undef LEN_OR_ZERO - /* return the length of print_fmt */ return pos; } +#undef LEN_OR_ZERO
int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return) { @@ -546,11 +610,15 @@ int traceprobe_define_arg_fields(struct trace_event_call *event_call, /* Set argument names as fields */ for (i = 0; i < tp->nr_args; i++) { struct probe_arg *parg = &tp->args[i]; - - ret = trace_define_field(event_call, parg->type->fmttype, - parg->name, - offset + parg->offset, - parg->type->size, + const char *fmt = parg->type->fmttype; + int size = parg->type->size; + + if (parg->fmt) + fmt = parg->fmt; + if (parg->count) + size *= parg->count; + ret = trace_define_field(event_call, fmt, parg->name, + offset + parg->offset, size, parg->type->is_signed, FILTER_OTHER); if (ret) diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index ff91faf70887..d256a19ee6d1 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -43,6 +43,7 @@
#define MAX_TRACE_ARGS 128 #define MAX_ARGSTR_LEN 63 +#define MAX_ARRAY_LEN 64 #define MAX_STRING_SIZE PATH_MAX
/* Reserved field names */ @@ -78,6 +79,14 @@ static nokprobe_inline void *get_loc_data(u32 *dl, void *ent) return (u8 *)ent + get_loc_offs(*dl); }
+static nokprobe_inline u32 update_data_loc(u32 loc, int consumed) +{ + u32 maxlen = get_loc_len(loc); + u32 offset = get_loc_offs(loc); + + return make_data_loc(maxlen - consumed, offset + consumed); +} + /* Printing function type */ typedef int (*print_type_func_t)(struct trace_seq *, void *, void *);
@@ -100,6 +109,8 @@ enum fetch_op { FETCH_OP_ST_STRING, /* String: .offset, .size */ // Stage 4 (modify) op FETCH_OP_MOD_BF, /* Bitfield: .basesize, .lshift, .rshift */ + // Stage 5 (loop) op + FETCH_OP_LP_ARRAY, /* Array: .param = loop count */ FETCH_OP_END, };
@@ -189,6 +200,7 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(symbol); _ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, atype)
#define ASSIGN_FETCH_TYPE_END {} +#define MAX_ARRAY_LEN 64
#ifdef CONFIG_KPROBE_EVENTS bool trace_kprobe_on_func_entry(struct trace_event_call *call); @@ -209,8 +221,10 @@ struct probe_arg { struct fetch_insn *code; bool dynamic;/* Dynamic array (string) is used */ unsigned int offset; /* Offset from argument entry */ + unsigned int count; /* Array count */ const char *name; /* Name of this argument */ const char *comm; /* Command of this argument */ + char *fmt; /* Format string if needed */ const struct fetch_type *type; /* Type of this argument */ };
diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index b4075f3e3a29..5c56afc17cf8 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -67,10 +67,15 @@ static nokprobe_inline int process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, void *dest, void *base) { - int ret = 0; + struct fetch_insn *s3 = NULL; + int total = 0, ret = 0, i = 0; + u32 loc = 0; + unsigned long lval = val;
+stage2: /* 2nd stage: dereference memory if needed */ while (code->op == FETCH_OP_DEREF) { + lval = val; ret = probe_mem_read(&val, (void *)val + code->offset, sizeof(val)); if (ret) @@ -78,11 +83,15 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, code++; }
+ s3 = code; +stage3: /* 3rd stage: store value to buffer */ if (unlikely(!dest)) { - if (code->op == FETCH_OP_ST_STRING) - return fetch_store_strlen(val + code->offset); - else + if (code->op == FETCH_OP_ST_STRING) { + ret += fetch_store_strlen(val + code->offset); + code++; + goto array; + } else return -EILSEQ; }
@@ -94,6 +103,7 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, probe_mem_read(dest, (void *)val + code->offset, code->size); break; case FETCH_OP_ST_STRING: + loc = *(u32 *)dest; ret = fetch_store_string(val + code->offset, dest, base); break; default: @@ -107,6 +117,29 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, code++; }
+array: + /* the last stage: Loop on array */ + if (code->op == FETCH_OP_LP_ARRAY) { + total += ret; + if (++i < code->param) { + code = s3; + if (s3->op != FETCH_OP_ST_STRING) { + dest += s3->size; + val += s3->size; + goto stage3; + } + code--; + val = lval + sizeof(char *); + if (dest) { + dest += sizeof(u32); + *(u32 *)dest = update_data_loc(loc, ret); + } + goto stage2; + } + code++; + ret = total; + } + return code->op == FETCH_OP_END ? ret : -EILSEQ; }
@@ -158,12 +191,26 @@ static inline int print_probe_args(struct trace_seq *s, struct probe_arg *args, int nr_args, u8 *data, void *field) { - int i; + void *p; + int i, j;
for (i = 0; i < nr_args; i++) { - trace_seq_printf(s, " %s=", args[i].name); - if (!args[i].type->print(s, data + args[i].offset, field)) - return -ENOMEM; + struct probe_arg *a = args + i; + + trace_seq_printf(s, " %s=", a->name); + if (likely(!a->count)) { + if (!a->type->print(s, data + a->offset, field)) + return -ENOMEM; + continue; + } + trace_seq_putc(s, '{'); + p = data + a->offset; + for (j = 0; j < a->count; j++) { + if (!a->type->print(s, p, field)) + return -ENOMEM; + trace_seq_putc(s, j == a->count - 1 ? '}' : ','); + p += a->type->size; + } } return 0; }
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html