It was reported that some perf event setup can make fork failed on
ARM64. It was the case of a group of mixed hw and sw events. The ARM
PMU code checks if all the events in a group belong to the same PMU
except for software events. But it didn't set the event_caps of
inherited events and no longer identify them as software events.
Therefore the test failed in a child process.
A simple reproducer is:
$ perf stat -e '{cycles,cs,instructions}' perf bench sched messaging
# Running 'sched/messaging' benchmark:
perf: fork(): Invalid argument
The perf stat was fine but the perf bench failed in fork(). Let's
inherit the event caps from the parent.
Cc: Will Deacon <will(a)kernel.org>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Namhyung Kim <namhyung(a)kernel.org>
---
kernel/events/core.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fc18664f49b0..e28f63ae625b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11560,6 +11560,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
event->state = PERF_EVENT_STATE_INACTIVE;
+ if (parent_event)
+ event->event_caps = parent_event->event_caps;
+
if (event->attr.sigtrap)
atomic_set(&event->event_limit, 1);
--
2.35.1.265.g69c8d7142f-goog
The patch titled
Subject: selftests/exec: add non-regular to TEST_GEN_PROGS
has been added to the -mm tree. Its filename is
selftests-exec-add-non-regular-to-test_gen_progs.patch
This patch should soon appear at
https://ozlabs.org/~akpm/mmots/broken-out/selftests-exec-add-non-regular-to…
and later at
https://ozlabs.org/~akpm/mmotm/broken-out/selftests-exec-add-non-regular-to…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Muhammad Usama Anjum <usama.anjum(a)collabora.com>
Subject: selftests/exec: add non-regular to TEST_GEN_PROGS
non-regular file needs to be compiled and then copied to the output
directory. Remove it from TEST_PROGS and add it to TEST_GEN_PROGS. This
removes error thrown by rsync when non-regular object isn't found:
rsync: [sender] link_stat "/linux/tools/testing/selftests/exec/non-regular" failed: No such file or directory (2)
rsync error: some files/attrs were not transferred (see previous errors) (code 23) at main.c(1333) [sender=3.2.3]
Link: https://lkml.kernel.org/r/20220210171323.1304501-1-usama.anjum@collabora.com
Fixes: 0f71241a8e32 ("selftests/exec: add file type errno tests")
Signed-off-by: Muhammad Usama Anjum <usama.anjum(a)collabora.com>
Reported-by: "kernelci.org bot" <bot(a)kernelci.org>
Reviewed-by: Shuah Khan <skhan(a)linuxfoundation.org>
Reviewed-by: Kees Cook <keescook(a)chromium.org>
Cc: Eric Biederman <ebiederm(a)xmission.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
--- a/tools/testing/selftests/exec/Makefile~selftests-exec-add-non-regular-to-test_gen_progs
+++ a/tools/testing/selftests/exec/Makefile
@@ -3,8 +3,8 @@ CFLAGS = -Wall
CFLAGS += -Wno-nonnull
CFLAGS += -D_GNU_SOURCE
-TEST_PROGS := binfmt_script non-regular
-TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216
+TEST_PROGS := binfmt_script
+TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 non-regular
TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
# Makefile is a run-time dependency, since it's accessed by the execveat test
TEST_FILES := Makefile
_
Patches currently in -mm which might be from usama.anjum(a)collabora.com are
selftests-exec-add-non-regular-to-test_gen_progs.patch
selftests-set-the-build-variable-to-absolute-path.patch
selftests-add-and-export-a-kernel-uapi-headers-path.patch
selftests-correct-the-headers-install-path.patch
selftests-futex-add-the-uapi-headers-include-variable.patch
selftests-kvm-add-the-uapi-headers-include-variable.patch
selftests-landlock-add-the-uapi-headers-include-variable.patch
selftests-net-add-the-uapi-headers-include-variable.patch
selftests-mptcp-add-the-uapi-headers-include-variable.patch
selftests-vm-add-the-uapi-headers-include-variable.patch
selftests-vm-remove-dependecy-from-internal-kernel-macros.patch
From: Daniel Borkmann <daniel(a)iogearbox.net>
commit 08389d888287c3823f80b0216766b71e17f0aba5 upstream.
Add a kconfig knob which allows for unprivileged bpf to be disabled by default.
If set, the knob sets /proc/sys/kernel/unprivileged_bpf_disabled to value of 2.
This still allows a transition of 2 -> {0,1} through an admin. Similarly,
this also still keeps 1 -> {1} behavior intact, so that once set to permanently
disabled, it cannot be undone aside from a reboot.
We've also added extra2 with max of 2 for the procfs handler, so that an admin
still has a chance to toggle between 0 <-> 2.
Either way, as an additional alternative, applications can make use of CAP_BPF
that we added a while ago.
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Link: https://lore.kernel.org/bpf/74ec548079189e4e4dffaeb42b8987bb3c852eee.162076…
[fllinden(a)amazon.com: backported to 4.14]
Signed-off-by: Frank van der Linden <fllinden(a)amazon.com>
---
Documentation/sysctl/kernel.txt | 21 +++++++++++++++++++++
init/Kconfig | 10 ++++++++++
kernel/bpf/syscall.c | 3 ++-
kernel/sysctl.c | 29 +++++++++++++++++++++++++----
4 files changed, 58 insertions(+), 5 deletions(-)
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 694968c7523c..3c8f5bfdf6da 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -91,6 +91,7 @@ show up in /proc/sys/kernel:
- sysctl_writes_strict
- tainted
- threads-max
+- unprivileged_bpf_disabled
- unknown_nmi_panic
- watchdog
- watchdog_thresh
@@ -999,6 +1000,26 @@ available RAM pages threads-max is reduced accordingly.
==============================================================
+unprivileged_bpf_disabled:
+
+Writing 1 to this entry will disable unprivileged calls to bpf();
+once disabled, calling bpf() without CAP_SYS_ADMIN will return
+-EPERM. Once set to 1, this can't be cleared from the running kernel
+anymore.
+
+Writing 2 to this entry will also disable unprivileged calls to bpf(),
+however, an admin can still change this setting later on, if needed, by
+writing 0 or 1 to this entry.
+
+If BPF_UNPRIV_DEFAULT_OFF is enabled in the kernel config, then this
+entry will default to 2 instead of 0.
+
+ 0 - Unprivileged calls to bpf() are enabled
+ 1 - Unprivileged calls to bpf() are disabled without recovery
+ 2 - Unprivileged calls to bpf() are disabled
+
+==============================================================
+
unknown_nmi_panic:
The value in this file affects behavior of handling NMI. When the
diff --git a/init/Kconfig b/init/Kconfig
index be58f0449c68..c87858c434cc 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1378,6 +1378,16 @@ config ADVISE_SYSCALLS
applications use these syscalls, you can disable this option to save
space.
+config BPF_UNPRIV_DEFAULT_OFF
+ bool "Disable unprivileged BPF by default"
+ depends on BPF_SYSCALL
+ help
+ Disables unprivileged BPF by default by setting the corresponding
+ /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can
+ still reenable it by setting it to 0 later on, or permanently
+ disable it by setting it to 1 (from which no other transition to
+ 0 is possible anymore).
+
config USERFAULTFD
bool "Enable userfaultfd() system call"
select ANON_INODES
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 21073682061d..59d44f1ad958 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -37,7 +37,8 @@ static DEFINE_SPINLOCK(prog_idr_lock);
static DEFINE_IDR(map_idr);
static DEFINE_SPINLOCK(map_idr_lock);
-int sysctl_unprivileged_bpf_disabled __read_mostly;
+int sysctl_unprivileged_bpf_disabled __read_mostly =
+ IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
static const struct bpf_map_ops * const bpf_map_types[] = {
#define BPF_PROG_TYPE(_id, _ops)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 74fc3a9d1923..c9a3e61c88f8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -242,6 +242,28 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write,
#endif
+#ifdef CONFIG_BPF_SYSCALL
+static int bpf_unpriv_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret, unpriv_enable = *(int *)table->data;
+ bool locked_state = unpriv_enable == 1;
+ struct ctl_table tmp = *table;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ tmp.data = &unpriv_enable;
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret) {
+ if (locked_state && unpriv_enable != 1)
+ return -EPERM;
+ *(int *)table->data = unpriv_enable;
+ }
+ return ret;
+}
+#endif
+
static struct ctl_table kern_table[];
static struct ctl_table vm_table[];
static struct ctl_table fs_table[];
@@ -1201,10 +1223,9 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_unprivileged_bpf_disabled,
.maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
.mode = 0644,
- /* only handle a transition from default "0" to "1" */
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &one,
+ .proc_handler = bpf_unpriv_handler,
+ .extra1 = &zero,
+ .extra2 = &two,
},
#endif
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
--
2.32.0
From: Daniel Borkmann <daniel(a)iogearbox.net>
commit 08389d888287c3823f80b0216766b71e17f0aba5 upstream.
Add a kconfig knob which allows for unprivileged bpf to be disabled by default.
If set, the knob sets /proc/sys/kernel/unprivileged_bpf_disabled to value of 2.
This still allows a transition of 2 -> {0,1} through an admin. Similarly,
this also still keeps 1 -> {1} behavior intact, so that once set to permanently
disabled, it cannot be undone aside from a reboot.
We've also added extra2 with max of 2 for the procfs handler, so that an admin
still has a chance to toggle between 0 <-> 2.
Either way, as an additional alternative, applications can make use of CAP_BPF
that we added a while ago.
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Link: https://lore.kernel.org/bpf/74ec548079189e4e4dffaeb42b8987bb3c852eee.162076…
[fllinden(a)amazon.com: backported to 4.19]
Signed-off-by: Frank van der Linden <fllinden(a)amazon.com>
---
Documentation/sysctl/kernel.txt | 21 +++++++++++++++++++++
init/Kconfig | 10 ++++++++++
kernel/bpf/syscall.c | 3 ++-
kernel/sysctl.c | 29 +++++++++++++++++++++++++----
4 files changed, 58 insertions(+), 5 deletions(-)
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 37a679501ddc..8bd3b0153959 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -94,6 +94,7 @@ show up in /proc/sys/kernel:
- sysctl_writes_strict
- tainted
- threads-max
+- unprivileged_bpf_disabled
- unknown_nmi_panic
- watchdog
- watchdog_thresh
@@ -1041,6 +1042,26 @@ available RAM pages threads-max is reduced accordingly.
==============================================================
+unprivileged_bpf_disabled:
+
+Writing 1 to this entry will disable unprivileged calls to bpf();
+once disabled, calling bpf() without CAP_SYS_ADMIN will return
+-EPERM. Once set to 1, this can't be cleared from the running kernel
+anymore.
+
+Writing 2 to this entry will also disable unprivileged calls to bpf(),
+however, an admin can still change this setting later on, if needed, by
+writing 0 or 1 to this entry.
+
+If BPF_UNPRIV_DEFAULT_OFF is enabled in the kernel config, then this
+entry will default to 2 instead of 0.
+
+ 0 - Unprivileged calls to bpf() are enabled
+ 1 - Unprivileged calls to bpf() are disabled without recovery
+ 2 - Unprivileged calls to bpf() are disabled
+
+==============================================================
+
unknown_nmi_panic:
The value in this file affects behavior of handling NMI. When the
diff --git a/init/Kconfig b/init/Kconfig
index b56a125b5a76..0fe4f60c974d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1474,6 +1474,16 @@ config BPF_JIT_ALWAYS_ON
Enables BPF JIT and removes BPF interpreter to avoid
speculative execution of BPF instructions by the interpreter
+config BPF_UNPRIV_DEFAULT_OFF
+ bool "Disable unprivileged BPF by default"
+ depends on BPF_SYSCALL
+ help
+ Disables unprivileged BPF by default by setting the corresponding
+ /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can
+ still reenable it by setting it to 0 later on, or permanently
+ disable it by setting it to 1 (from which no other transition to
+ 0 is possible anymore).
+
config USERFAULTFD
bool "Enable userfaultfd() system call"
select ANON_INODES
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 353a8d672302..e940c1f65938 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -48,7 +48,8 @@ static DEFINE_SPINLOCK(prog_idr_lock);
static DEFINE_IDR(map_idr);
static DEFINE_SPINLOCK(map_idr_lock);
-int sysctl_unprivileged_bpf_disabled __read_mostly;
+int sysctl_unprivileged_bpf_disabled __read_mostly =
+ IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
static const struct bpf_map_ops * const bpf_map_types[] = {
#define BPF_PROG_TYPE(_id, _ops)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a5d75bc38eea..03af4a493aff 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -250,6 +250,28 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write,
#endif
+#ifdef CONFIG_BPF_SYSCALL
+static int bpf_unpriv_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret, unpriv_enable = *(int *)table->data;
+ bool locked_state = unpriv_enable == 1;
+ struct ctl_table tmp = *table;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ tmp.data = &unpriv_enable;
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret) {
+ if (locked_state && unpriv_enable != 1)
+ return -EPERM;
+ *(int *)table->data = unpriv_enable;
+ }
+ return ret;
+}
+#endif
+
static struct ctl_table kern_table[];
static struct ctl_table vm_table[];
static struct ctl_table fs_table[];
@@ -1220,10 +1242,9 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_unprivileged_bpf_disabled,
.maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
.mode = 0644,
- /* only handle a transition from default "0" to "1" */
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &one,
- .extra2 = &one,
+ .proc_handler = bpf_unpriv_handler,
+ .extra1 = &zero,
+ .extra2 = &two,
},
#endif
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
--
2.32.0