The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ca0cb9a60f6d86d4b2139c6f393a78f39edcd7cb Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer(a)rivosinc.com>
Date: Fri, 4 Feb 2022 13:14:08 -0800
Subject: [PATCH] riscv/mm: Add XIP_FIXUP for riscv_pfn_base
This manifests as a crash early in boot on VexRiscv.
Signed-off-by: Myrtle Shah <gatecat(a)ds0.me>
[Palmer: split commit]
Fixes: 44c922572952 ("RISC-V: enable XIP")
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index eecfacac2cc5..c27294128e18 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -232,6 +232,7 @@ static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAG
#ifdef CONFIG_XIP_KERNEL
#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
+#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base))
#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
The patch below does not apply to the 5.16-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From ca0cb9a60f6d86d4b2139c6f393a78f39edcd7cb Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer(a)rivosinc.com>
Date: Fri, 4 Feb 2022 13:14:08 -0800
Subject: [PATCH] riscv/mm: Add XIP_FIXUP for riscv_pfn_base
This manifests as a crash early in boot on VexRiscv.
Signed-off-by: Myrtle Shah <gatecat(a)ds0.me>
[Palmer: split commit]
Fixes: 44c922572952 ("RISC-V: enable XIP")
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index eecfacac2cc5..c27294128e18 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -232,6 +232,7 @@ static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAG
#ifdef CONFIG_XIP_KERNEL
#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
+#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base))
#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
The patch below does not apply to the 5.15-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From 4b1c70aa8ed8249608bb991380cb8ff423edf49e Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer(a)rivosinc.com>
Date: Fri, 4 Feb 2022 13:13:37 -0800
Subject: [PATCH] riscv/mm: Add XIP_FIXUP for phys_ram_base
This manifests as a crash early in boot on VexRiscv.
Signed-off-by: Myrtle Shah <gatecat(a)ds0.me>
[Palmer: split commit]
Fixes: 6d7f91d914bc ("riscv: Get rid of CONFIG_PHYS_RAM_BASE in kernel physical address conversion")
Cc: stable(a)vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer(a)rivosinc.com>
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index cf4d018b7d66..eecfacac2cc5 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -522,6 +522,7 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
}
#ifdef CONFIG_XIP_KERNEL
+#define phys_ram_base (*(phys_addr_t *)XIP_FIXUP(&phys_ram_base))
extern char _xiprom[], _exiprom[], __data_loc;
/* called from head.S with MMU off */
From: Daniel Borkmann <daniel(a)iogearbox.net>
commit 08389d888287c3823f80b0216766b71e17f0aba5 upstream.
Add a kconfig knob which allows for unprivileged bpf to be disabled by default.
If set, the knob sets /proc/sys/kernel/unprivileged_bpf_disabled to value of 2.
This still allows a transition of 2 -> {0,1} through an admin. Similarly,
this also still keeps 1 -> {1} behavior intact, so that once set to permanently
disabled, it cannot be undone aside from a reboot.
We've also added extra2 with max of 2 for the procfs handler, so that an admin
still has a chance to toggle between 0 <-> 2.
Either way, as an additional alternative, applications can make use of CAP_BPF
that we added a while ago.
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast(a)kernel.org>
Link: https://lore.kernel.org/bpf/74ec548079189e4e4dffaeb42b8987bb3c852eee.162076…
[fllinden(a)amazon.com: backported to 5.4]
Signed-off-by: Frank van der Linden <fllinden(a)amazon.com>
---
Documentation/admin-guide/sysctl/kernel.rst | 21 +++++++++++++++
init/Kconfig | 10 +++++++
kernel/bpf/syscall.c | 3 ++-
kernel/sysctl.c | 29 ++++++++++++++++++---
4 files changed, 58 insertions(+), 5 deletions(-)
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 032c7cd3cede..7b477fa19534 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1125,6 +1125,27 @@ NMI switch that most IA32 servers have fires unknown NMI up, for
example. If a system hangs up, try pressing the NMI switch.
+unprivileged_bpf_disabled:
+==========================
+
+Writing 1 to this entry will disable unprivileged calls to ``bpf()``;
+once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` will return
+``-EPERM``. Once set to 1, this can't be cleared from the running kernel
+anymore.
+
+Writing 2 to this entry will also disable unprivileged calls to ``bpf()``,
+however, an admin can still change this setting later on, if needed, by
+writing 0 or 1 to this entry.
+
+If ``BPF_UNPRIV_DEFAULT_OFF`` is enabled in the kernel config, then this
+entry will default to 2 instead of 0.
+
+= =============================================================
+0 Unprivileged calls to ``bpf()`` are enabled
+1 Unprivileged calls to ``bpf()`` are disabled without recovery
+2 Unprivileged calls to ``bpf()`` are disabled
+= =============================================================
+
watchdog:
=========
diff --git a/init/Kconfig b/init/Kconfig
index bafe9708cf15..264ab19c11ca 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1609,6 +1609,16 @@ config BPF_JIT_ALWAYS_ON
Enables BPF JIT and removes BPF interpreter to avoid
speculative execution of BPF instructions by the interpreter
+config BPF_UNPRIV_DEFAULT_OFF
+ bool "Disable unprivileged BPF by default"
+ depends on BPF_SYSCALL
+ help
+ Disables unprivileged BPF by default by setting the corresponding
+ /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can
+ still reenable it by setting it to 0 later on, or permanently
+ disable it by setting it to 1 (from which no other transition to
+ 0 is possible anymore).
+
config USERFAULTFD
bool "Enable userfaultfd() system call"
depends on MMU
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bf03d04a9e2f..9ebdcdaa5f16 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -39,7 +39,8 @@ static DEFINE_SPINLOCK(prog_idr_lock);
static DEFINE_IDR(map_idr);
static DEFINE_SPINLOCK(map_idr_lock);
-int sysctl_unprivileged_bpf_disabled __read_mostly;
+int sysctl_unprivileged_bpf_disabled __read_mostly =
+ IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
static const struct bpf_map_ops * const bpf_map_types[] = {
#define BPF_PROG_TYPE(_id, _ops)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index eae6a078619f..8494d5a706bb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -250,6 +250,28 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write,
#endif
+#ifdef CONFIG_BPF_SYSCALL
+static int bpf_unpriv_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int ret, unpriv_enable = *(int *)table->data;
+ bool locked_state = unpriv_enable == 1;
+ struct ctl_table tmp = *table;
+
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ tmp.data = &unpriv_enable;
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+ if (write && !ret) {
+ if (locked_state && unpriv_enable != 1)
+ return -EPERM;
+ *(int *)table->data = unpriv_enable;
+ }
+ return ret;
+}
+#endif
+
static struct ctl_table kern_table[];
static struct ctl_table vm_table[];
static struct ctl_table fs_table[];
@@ -1255,10 +1277,9 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_unprivileged_bpf_disabled,
.maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
.mode = 0644,
- /* only handle a transition from default "0" to "1" */
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ONE,
- .extra2 = SYSCTL_ONE,
+ .proc_handler = bpf_unpriv_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &two,
},
{
.procname = "bpf_stats_enabled",
--
2.32.0