From: Steven Rostedt rostedt@goodmis.org
Pingfan reported that the following causes a fault:
echo "filename ~ "cpu"" > events/syscalls/sys_enter_openat/filter echo 1 > events/syscalls/sys_enter_at/enable
The reason is that trace event filter treats the user space pointer defined by "filename" as a normal pointer to compare against the "cpu" string. If the string is not loaded into memory yet, it will trigger a fault in kernel space:
kvm-03-guest16 login: [72198.026181] BUG: unable to handle page fault for address: 00007fffaae8ef60 #PF: supervisor read access in kernel mode #PF: error_code(0x0001) - permissions violation PGD 80000001008b7067 P4D 80000001008b7067 PUD 2393f1067 PMD 2393ec067 PTE 8000000108f47867 Oops: 0001 [#1] PREEMPT SMP PTI CPU: 1 PID: 1 Comm: systemd Kdump: loaded Not tainted 5.14.0-32.el9.x86_64 #1 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:strlen+0x0/0x20 Code: 48 89 f9 74 09 48 83 c1 01 80 39 00 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee c3 0f 1f 80 00 00 00 00 <80> 3f 00 74 10 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 31 RSP: 0018:ffffb5b900013e48 EFLAGS: 00010246 RAX: 0000000000000018 RBX: ffff8fc1c49ede00 RCX: 0000000000000000 RDX: 0000000000000020 RSI: ffff8fc1c02d601c RDI: 00007fffaae8ef60 RBP: 00007fffaae8ef60 R08: 0005034f4ddb8ea4 R09: 0000000000000000 R10: ffff8fc1c02d601c R11: 0000000000000000 R12: ffff8fc1c8a6e380 R13: 0000000000000000 R14: ffff8fc1c02d6010 R15: ffff8fc1c00453c0 FS: 00007fa86123db40(0000) GS:ffff8fc2ffd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fffaae8ef60 CR3: 0000000102880001 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: filter_pred_pchar+0x18/0x40 filter_match_preds+0x31/0x70 ftrace_syscall_enter+0x27a/0x2c0 syscall_trace_enter.constprop.0+0x1aa/0x1d0 do_syscall_64+0x16/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7fa861d88664
To be even more robust, test both kernel and user space strings. If the string fails to read, then simply have the filter fail.
Link: https://lore.kernel.org/all/20220107044951.22080-1-kernelfans@gmail.com/
Cc: stable@vger.kernel.org Reported-by: Pingfan Liu kernelfans@gmail.com Fixes: 87a342f5db69d ("tracing/filters: Support filtering for char * strings") Signed-off-by: Steven Rostedt rostedt@goodmis.org --- kernel/trace/trace_events_filter.c | 79 +++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 996920ed1812..cf0fa9a785c7 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -5,6 +5,7 @@ * Copyright (C) 2009 Tom Zanussi tzanussi@gmail.com */
+#include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/mutex.h> @@ -654,12 +655,50 @@ DEFINE_EQUALITY_PRED(32); DEFINE_EQUALITY_PRED(16); DEFINE_EQUALITY_PRED(8);
+/* user space strings temp buffer */ +#define USTRING_BUF_SIZE 512 + +struct ustring_buffer { + char buffer[USTRING_BUF_SIZE]; +}; + +static __percpu struct ustring_buffer *ustring_per_cpu; + +static __always_inline char *test_string(char *str) +{ + struct ustring_buffer *ubuf; + char __user *ustr; + char *kstr; + + if (!ustring_per_cpu) + return NULL; + + ubuf = this_cpu_ptr(ustring_per_cpu); + kstr = ubuf->buffer; + + if (likely((unsigned long)str >= TASK_SIZE)) { + /* For safety, do not trust the string pointer */ + if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) + return NULL; + } else { + /* user space address? */ + ustr = str; + if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) + return NULL; + } + return kstr; +} + /* Filter predicate for fixed sized arrays of characters */ static int filter_pred_string(struct filter_pred *pred, void *event) { char *addr = (char *)(event + pred->offset); int cmp, match;
+ addr = test_string(addr); + if (!addr) + return 0; + cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len);
match = cmp ^ pred->not; @@ -671,10 +710,16 @@ static int filter_pred_string(struct filter_pred *pred, void *event) static int filter_pred_pchar(struct filter_pred *pred, void *event) { char **addr = (char **)(event + pred->offset); + char *str; int cmp, match; - int len = strlen(*addr) + 1; /* including tailing '\0' */ + int len; + + str = test_string(*addr); + if (!str) + return 0;
- cmp = pred->regex.match(*addr, &pred->regex, len); + len = strlen(str) + 1; /* including tailing '\0' */ + cmp = pred->regex.match(str, &pred->regex, len);
match = cmp ^ pred->not;
@@ -784,6 +829,10 @@ static int filter_pred_none(struct filter_pred *pred, void *event)
static int regex_match_full(char *str, struct regex *r, int len) { + str = test_string(str); + if (!str) + return 0; + /* len of zero means str is dynamic and ends with '\0' */ if (!len) return strcmp(str, r->pattern) == 0; @@ -793,6 +842,10 @@ static int regex_match_full(char *str, struct regex *r, int len)
static int regex_match_front(char *str, struct regex *r, int len) { + str = test_string(str); + if (!str) + return 0; + if (len && len < r->len) return 0;
@@ -801,6 +854,10 @@ static int regex_match_front(char *str, struct regex *r, int len)
static int regex_match_middle(char *str, struct regex *r, int len) { + str = test_string(str); + if (!str) + return 0; + if (!len) return strstr(str, r->pattern) != NULL;
@@ -811,6 +868,10 @@ static int regex_match_end(char *str, struct regex *r, int len) { int strlen = len - 1;
+ str = test_string(str); + if (!str) + return 0; + if (strlen >= r->len && memcmp(str + strlen - r->len, r->pattern, r->len) == 0) return 1; @@ -819,6 +880,10 @@ static int regex_match_end(char *str, struct regex *r, int len)
static int regex_match_glob(char *str, struct regex *r, int len __maybe_unused) { + str = test_string(str); + if (!str) + return 0; + if (glob_match(r->pattern, str)) return 1; return 0; @@ -1335,6 +1400,13 @@ static int parse_pred(const char *str, void *data, strncpy(pred->regex.pattern, str + s, len); pred->regex.pattern[len] = 0;
+ if (!ustring_per_cpu) { + /* Once allocated, keep it around for good */ + ustring_per_cpu = alloc_percpu(struct ustring_buffer); + if (!ustring_per_cpu) + goto err_mem; + } + filter_build_regex(pred);
if (field->filter_type == FILTER_COMM) { @@ -1415,6 +1487,9 @@ static int parse_pred(const char *str, void *data, err_free: kfree(pred); return -EINVAL; +err_mem: + kfree(pred); + return -ENOMEM; }
enum {
Hi Steven,
I love your patch! Perhaps something to improve:
[auto build test WARNING on rostedt-trace/for-next] [also build test WARNING on linux/master linus/master hnaz-mm/master v5.16-rc8 next-20220107] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Steven-Rostedt/tracing-Fix-filterin... base: https://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git for-next config: openrisc-randconfig-s031-20220107 (https://download.01.org/0day-ci/archive/20220109/202201090343.IKIQqdk4-lkp@i...) compiler: or1k-linux-gcc (GCC) 11.2.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # apt-get install sparse # sparse version: v0.6.4-dirty # https://github.com/0day-ci/linux/commit/19b8c1c0fee0d7bff07ed0d5862a29ac2bb4... git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Steven-Rostedt/tracing-Fix-filtering-on-string-pointers/20220108-070047 git checkout 19b8c1c0fee0d7bff07ed0d5862a29ac2bb4adc9 # save the config file to linux build tree mkdir build_dir COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=openrisc SHELL=/bin/bash kernel/trace/
If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot lkp@intel.com
sparse warnings: (new ones prefixed by >>)
kernel/trace/trace_events_filter.c:685:22: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected char [noderef] __user *ustr @@ got char *str @@
kernel/trace/trace_events_filter.c:685:22: sparse: expected char [noderef] __user *ustr kernel/trace/trace_events_filter.c:685:22: sparse: got char *str
kernel/trace/trace_events_filter.c:685:22: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected char [noderef] __user *ustr @@ got char *str @@
kernel/trace/trace_events_filter.c:685:22: sparse: expected char [noderef] __user *ustr kernel/trace/trace_events_filter.c:685:22: sparse: got char *str
kernel/trace/trace_events_filter.c:685:22: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected char [noderef] __user *ustr @@ got char *str @@
kernel/trace/trace_events_filter.c:685:22: sparse: expected char [noderef] __user *ustr kernel/trace/trace_events_filter.c:685:22: sparse: got char *str
kernel/trace/trace_events_filter.c:685:22: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected char [noderef] __user *ustr @@ got char *str @@
kernel/trace/trace_events_filter.c:685:22: sparse: expected char [noderef] __user *ustr kernel/trace/trace_events_filter.c:685:22: sparse: got char *str
kernel/trace/trace_events_filter.c:685:22: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected char [noderef] __user *ustr @@ got char *str @@
kernel/trace/trace_events_filter.c:685:22: sparse: expected char [noderef] __user *ustr kernel/trace/trace_events_filter.c:685:22: sparse: got char *str
kernel/trace/trace_events_filter.c:685:22: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected char [noderef] __user *ustr @@ got char *str @@
kernel/trace/trace_events_filter.c:685:22: sparse: expected char [noderef] __user *ustr kernel/trace/trace_events_filter.c:685:22: sparse: got char *str
kernel/trace/trace_events_filter.c:685:22: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected char [noderef] __user *ustr @@ got char *str @@
kernel/trace/trace_events_filter.c:685:22: sparse: expected char [noderef] __user *ustr kernel/trace/trace_events_filter.c:685:22: sparse: got char *str kernel/trace/trace_events_filter.c:1066:20: sparse: sparse: incorrect type in return expression (different address spaces) @@ expected struct event_filter * @@ got struct event_filter [noderef] __rcu *filter @@ kernel/trace/trace_events_filter.c:1066:20: sparse: expected struct event_filter * kernel/trace/trace_events_filter.c:1066:20: sparse: got struct event_filter [noderef] __rcu *filter kernel/trace/trace_events_filter.c:1136:34: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct event_filter *filter @@ got struct event_filter [noderef] __rcu *filter @@ kernel/trace/trace_events_filter.c:1136:34: sparse: expected struct event_filter *filter kernel/trace/trace_events_filter.c:1136:34: sparse: got struct event_filter [noderef] __rcu *filter kernel/trace/trace_events_filter.c:1153:27: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct event_filter *filter @@ got struct event_filter [noderef] __rcu *filter @@ kernel/trace/trace_events_filter.c:1153:27: sparse: expected struct event_filter *filter kernel/trace/trace_events_filter.c:1153:27: sparse: got struct event_filter [noderef] __rcu *filter kernel/trace/trace_events_filter.c:1066:20: sparse: sparse: incorrect type in return expression (different address spaces) @@ expected struct event_filter * @@ got struct event_filter [noderef] __rcu *filter @@ kernel/trace/trace_events_filter.c:1066:20: sparse: expected struct event_filter * kernel/trace/trace_events_filter.c:1066:20: sparse: got struct event_filter [noderef] __rcu *filter kernel/trace/trace_events_filter.c:1066:20: sparse: sparse: incorrect type in return expression (different address spaces) @@ expected struct event_filter * @@ got struct event_filter [noderef] __rcu *filter @@ kernel/trace/trace_events_filter.c:1066:20: sparse: expected struct event_filter * kernel/trace/trace_events_filter.c:1066:20: sparse: got struct event_filter [noderef] __rcu *filter kernel/trace/trace_events_filter.c:1066:20: sparse: sparse: incorrect type in return expression (different address spaces) @@ expected struct event_filter * @@ got struct event_filter [noderef] __rcu *filter @@ kernel/trace/trace_events_filter.c:1066:20: sparse: expected struct event_filter * kernel/trace/trace_events_filter.c:1066:20: sparse: got struct event_filter [noderef] __rcu *filter
vim +685 kernel/trace/trace_events_filter.c
666 667 static __always_inline char *test_string(char *str) 668 { 669 struct ustring_buffer *ubuf; 670 char __user *ustr; 671 char *kstr; 672 673 if (!ustring_per_cpu) 674 return NULL; 675 676 ubuf = this_cpu_ptr(ustring_per_cpu); 677 kstr = ubuf->buffer; 678 679 if (likely((unsigned long)str >= TASK_SIZE)) { 680 /* For safety, do not trust the string pointer */ 681 if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) 682 return NULL; 683 } else { 684 /* user space address? */
685 ustr = str;
686 if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) 687 return NULL; 688 } 689 return kstr; 690 } 691
--- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Hi Steven,
This patch passed my test. But I have some concern, please see comment inline. On Fri, Jan 07, 2022 at 05:56:57PM -0500, Steven Rostedt wrote:
From: Steven Rostedt rostedt@goodmis.org
Pingfan reported that the following causes a fault:
echo "filename ~ "cpu"" > events/syscalls/sys_enter_openat/filter echo 1 > events/syscalls/sys_enter_at/enable
The reason is that trace event filter treats the user space pointer defined by "filename" as a normal pointer to compare against the "cpu" string. If the string is not loaded into memory yet, it will trigger a fault in kernel space:
kvm-03-guest16 login: [72198.026181] BUG: unable to handle page fault for address: 00007fffaae8ef60 #PF: supervisor read access in kernel mode #PF: error_code(0x0001) - permissions violation PGD 80000001008b7067 P4D 80000001008b7067 PUD 2393f1067 PMD 2393ec067 PTE 8000000108f47867 Oops: 0001 [#1] PREEMPT SMP PTI CPU: 1 PID: 1 Comm: systemd Kdump: loaded Not tainted 5.14.0-32.el9.x86_64 #1 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:strlen+0x0/0x20 Code: 48 89 f9 74 09 48 83 c1 01 80 39 00 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee c3 0f 1f 80 00 00 00 00 <80> 3f 00 74 10 48 89 f8 48 83 c0 01 80 38 00 75 f7 48 29 f8 c3 31 RSP: 0018:ffffb5b900013e48 EFLAGS: 00010246 RAX: 0000000000000018 RBX: ffff8fc1c49ede00 RCX: 0000000000000000 RDX: 0000000000000020 RSI: ffff8fc1c02d601c RDI: 00007fffaae8ef60 RBP: 00007fffaae8ef60 R08: 0005034f4ddb8ea4 R09: 0000000000000000 R10: ffff8fc1c02d601c R11: 0000000000000000 R12: ffff8fc1c8a6e380 R13: 0000000000000000 R14: ffff8fc1c02d6010 R15: ffff8fc1c00453c0 FS: 00007fa86123db40(0000) GS:ffff8fc2ffd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fffaae8ef60 CR3: 0000000102880001 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: filter_pred_pchar+0x18/0x40 filter_match_preds+0x31/0x70 ftrace_syscall_enter+0x27a/0x2c0 syscall_trace_enter.constprop.0+0x1aa/0x1d0 do_syscall_64+0x16/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7fa861d88664
To be even more robust, test both kernel and user space strings. If the string fails to read, then simply have the filter fail.
Link: https://lore.kernel.org/all/20220107044951.22080-1-kernelfans@gmail.com/
Cc: stable@vger.kernel.org Reported-by: Pingfan Liu kernelfans@gmail.com Fixes: 87a342f5db69d ("tracing/filters: Support filtering for char * strings") Signed-off-by: Steven Rostedt rostedt@goodmis.org
kernel/trace/trace_events_filter.c | 79 +++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 996920ed1812..cf0fa9a785c7 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -5,6 +5,7 @@
- Copyright (C) 2009 Tom Zanussi tzanussi@gmail.com
*/ +#include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/mutex.h> @@ -654,12 +655,50 @@ DEFINE_EQUALITY_PRED(32); DEFINE_EQUALITY_PRED(16); DEFINE_EQUALITY_PRED(8); +/* user space strings temp buffer */ +#define USTRING_BUF_SIZE 512
Should it be PATH_MAX(4096) in case of matching against a file path?
+struct ustring_buffer {
- char buffer[USTRING_BUF_SIZE];
+};
+static __percpu struct ustring_buffer *ustring_per_cpu;
+static __always_inline char *test_string(char *str) +{
- struct ustring_buffer *ubuf;
- char __user *ustr;
- char *kstr;
- if (!ustring_per_cpu)
return NULL;
- ubuf = this_cpu_ptr(ustring_per_cpu);
- kstr = ubuf->buffer;
- if (likely((unsigned long)str >= TASK_SIZE)) {
/* For safety, do not trust the string pointer */
if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE))
Since no other trace_event_class except event_class_syscall_enter tries to uaccess, so the unreliable source only comes from event_class_syscall_enter.
In that case, the access to kernel address is forbidden. So here just return -EACCES ?
return NULL;
- } else {
/* user space address? */
ustr = str;
if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE))
return NULL;
- }
- return kstr;
+}
/* Filter predicate for fixed sized arrays of characters */ static int filter_pred_string(struct filter_pred *pred, void *event) { char *addr = (char *)(event + pred->offset); int cmp, match;
- addr = test_string(addr);
Among all of trace_event_class, only event_class_syscall_enter exposed to this fault (uprobe does not uaccess). So I think the strncpy_*() can be avoided based on class, which improves performance.
- if (!addr)
return 0;
- cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len);
match = cmp ^ pred->not; @@ -671,10 +710,16 @@ static int filter_pred_string(struct filter_pred *pred, void *event) static int filter_pred_pchar(struct filter_pred *pred, void *event) { char **addr = (char **)(event + pred->offset);
- char *str; int cmp, match;
- int len = strlen(*addr) + 1; /* including tailing '\0' */
- int len;
- str = test_string(*addr);
- if (!str)
return 0;
- cmp = pred->regex.match(*addr, &pred->regex, len);
- len = strlen(str) + 1; /* including tailing '\0' */
- cmp = pred->regex.match(str, &pred->regex, len);
match = cmp ^ pred->not; @@ -784,6 +829,10 @@ static int filter_pred_none(struct filter_pred *pred, void *event) static int regex_match_full(char *str, struct regex *r, int len) {
- str = test_string(str);
Since all regex_match_*() are called in filter_pred_*(), which have already protected codes from page fault. So no need to double check.
Thanks,
Pingfan
- if (!str)
return 0;
- /* len of zero means str is dynamic and ends with '\0' */ if (!len) return strcmp(str, r->pattern) == 0;
@@ -793,6 +842,10 @@ static int regex_match_full(char *str, struct regex *r, int len) static int regex_match_front(char *str, struct regex *r, int len) {
- str = test_string(str);
- if (!str)
return 0;
- if (len && len < r->len) return 0;
@@ -801,6 +854,10 @@ static int regex_match_front(char *str, struct regex *r, int len) static int regex_match_middle(char *str, struct regex *r, int len) {
- str = test_string(str);
- if (!str)
return 0;
- if (!len) return strstr(str, r->pattern) != NULL;
@@ -811,6 +868,10 @@ static int regex_match_end(char *str, struct regex *r, int len) { int strlen = len - 1;
- str = test_string(str);
- if (!str)
return 0;
- if (strlen >= r->len && memcmp(str + strlen - r->len, r->pattern, r->len) == 0) return 1;
@@ -819,6 +880,10 @@ static int regex_match_end(char *str, struct regex *r, int len) static int regex_match_glob(char *str, struct regex *r, int len __maybe_unused) {
- str = test_string(str);
- if (!str)
return 0;
- if (glob_match(r->pattern, str)) return 1; return 0;
@@ -1335,6 +1400,13 @@ static int parse_pred(const char *str, void *data, strncpy(pred->regex.pattern, str + s, len); pred->regex.pattern[len] = 0;
if (!ustring_per_cpu) {
/* Once allocated, keep it around for good */
ustring_per_cpu = alloc_percpu(struct ustring_buffer);
if (!ustring_per_cpu)
goto err_mem;
}
- filter_build_regex(pred);
if (field->filter_type == FILTER_COMM) { @@ -1415,6 +1487,9 @@ static int parse_pred(const char *str, void *data, err_free: kfree(pred); return -EINVAL; +err_mem:
- kfree(pred);
- return -ENOMEM;
} enum { -- 2.33.0
On Mon, 10 Jan 2022 11:15:20 +0800 Pingfan Liu kernelfans@gmail.com wrote:
Hi Steven,
This patch passed my test. But I have some concern, please see comment inline.
Thanks, can I add a "Tested-by:" from you?
(when I have my final version)
index 996920ed1812..cf0fa9a785c7 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -5,6 +5,7 @@
- Copyright (C) 2009 Tom Zanussi tzanussi@gmail.com
*/ +#include <linux/uaccess.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/mutex.h> @@ -654,12 +655,50 @@ DEFINE_EQUALITY_PRED(32); DEFINE_EQUALITY_PRED(16); DEFINE_EQUALITY_PRED(8); +/* user space strings temp buffer */ +#define USTRING_BUF_SIZE 512
Should it be PATH_MAX(4096) in case of matching against a file path?
I went back and forth with this size in my head, and since I currently do not free it, and it is 4 * nr_cpus in size, I went with the smallest number I felt was OK.
We can increase it in the future, and even expose the size to user space.
+struct ustring_buffer {
- char buffer[USTRING_BUF_SIZE];
+};
+static __percpu struct ustring_buffer *ustring_per_cpu;
+static __always_inline char *test_string(char *str) +{
- struct ustring_buffer *ubuf;
- char __user *ustr;
- char *kstr;
- if (!ustring_per_cpu)
return NULL;
- ubuf = this_cpu_ptr(ustring_per_cpu);
- kstr = ubuf->buffer;
- if (likely((unsigned long)str >= TASK_SIZE)) {
/* For safety, do not trust the string pointer */
if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE))
Since no other trace_event_class except event_class_syscall_enter tries to uaccess, so the unreliable source only comes from event_class_syscall_enter.
In that case, the access to kernel address is forbidden. So here just return -EACCES ?
I changed the way all pointers work. Any event that access a string pointer (there are a few), and we filter on it, then I want it to go through this path.
And returning -EACCES is useless this is done in the filtering logic and that error will not be exposed to anyone. The best we can do is to fail the filter.
return NULL;
- } else {
/* user space address? */
ustr = str;
if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE))
return NULL;
- }
- return kstr;
+}
/* Filter predicate for fixed sized arrays of characters */ static int filter_pred_string(struct filter_pred *pred, void *event) { char *addr = (char *)(event + pred->offset); int cmp, match;
- addr = test_string(addr);
Among all of trace_event_class, only event_class_syscall_enter exposed to this fault (uprobe does not uaccess). So I think the strncpy_*() can be avoided based on class, which improves performance.
The thing is, tracing should never cause a fault in the system. If a pointer is bad and you filter on it, it should not cause a crash. Your patch showed me we have this issues with kernel pointers too. And since we have no safe "strncmp" the best we can do is a safe "strncpy" and then compare it.
You actually exposed more issues than the one you were trying to solve, and this patch now addresses those issues.
Yes, it will impact performance, but robustness always trumps performance.
- if (!addr)
return 0;
- cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len);
match = cmp ^ pred->not; @@ -671,10 +710,16 @@ static int filter_pred_string(struct filter_pred *pred, void *event) static int filter_pred_pchar(struct filter_pred *pred, void *event) { char **addr = (char **)(event + pred->offset);
- char *str; int cmp, match;
- int len = strlen(*addr) + 1; /* including tailing '\0' */
- int len;
- str = test_string(*addr);
- if (!str)
return 0;
- cmp = pred->regex.match(*addr, &pred->regex, len);
- len = strlen(str) + 1; /* including tailing '\0' */
- cmp = pred->regex.match(str, &pred->regex, len);
match = cmp ^ pred->not; @@ -784,6 +829,10 @@ static int filter_pred_none(struct filter_pred *pred, void *event) static int regex_match_full(char *str, struct regex *r, int len) {
- str = test_string(str);
Since all regex_match_*() are called in filter_pred_*(), which have already protected codes from page fault. So no need to double check.
Ah, you're right. I only need to add this to filter_pred_pchar() and not the regex.
Thanks, I'll send a v2.
-- Steve
Hi Steve,
Steven Rostedt rostedt@goodmis.org writes:
From: Steven Rostedt rostedt@goodmis.org
Pingfan reported that the following causes a fault:
echo "filename ~ "cpu"" > events/syscalls/sys_enter_openat/filter echo 1 > events/syscalls/sys_enter_at/enable
[..]
+static __always_inline char *test_string(char *str) +{
- struct ustring_buffer *ubuf;
- char __user *ustr;
- char *kstr;
- if (!ustring_per_cpu)
return NULL;
- ubuf = this_cpu_ptr(ustring_per_cpu);
- kstr = ubuf->buffer;
- if (likely((unsigned long)str >= TASK_SIZE)) {
I think that would not work on architectures where addresses for kernel and user space could overlap, i.e. with different address spaces?
/* For safety, do not trust the string pointer */
if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE))
return NULL;
- } else {
/* user space address? */
ustr = str;
if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE))
return NULL;
- }
- return kstr;
+}
linux-stable-mirror@lists.linaro.org