Memory protection keys enables an application to protect its address space from inadvertent access by its own code.
This feature is now enabled on powerpc architecture and integrated in 4.16-rc1. The patches move the selftests to arch neutral directory and enhance their test coverage.
Test ---- Verified for correctness on powerpc and on x86.
History: ------- version 14: (1) incorporated another round of comments from Dave Hansen.
version 13: (1) Incorporated comments for Dave Hansen. (2) Added one more test for correct pkey-0 behavior.
version 12: (1) fixed the offset of pkey field in the siginfo structure for x86_64 and powerpc. And tries to use the actual field if the headers have it defined.
version 11: (1) fixed a deadlock in the ptrace testcase.
version 10 and prior: (1) moved the testcase to arch neutral directory (2) split the changes into incremental patches.
Ram Pai (20): selftests/x86: Move protecton key selftest to arch neutral directory selftests/vm: rename all references to pkru to a generic name selftests/vm: move generic definitions to header file selftests/vm: typecast the pkey register selftests/vm: generic function to handle shadow key register selftests/vm: fix the wrong assert in pkey_disable_set() selftests/vm: fixed bugs in pkey_disable_clear() selftests/vm: fix alloc_random_pkey() to make it really random selftests/vm: introduce two arch independent abstraction selftests/vm: pkey register should match shadow pkey selftests/vm: generic cleanup selftests/vm: Introduce generic abstractions selftests/vm: powerpc implementation to check support for pkey selftests/vm: fix an assertion in test_pkey_alloc_exhaust() selftests/vm: associate key on a mapped page and detect access violation selftests/vm: associate key on a mapped page and detect write violation selftests/vm: detect write violation on a mapped access-denied-key page selftests/vm: testcases must restore pkey-permissions selftests/vm: sub-page allocator selftests/vm: test correct behavior of pkey-0
Thiago Jung Bauermann (2): selftests/vm: move arch-specific definitions to arch-specific header selftests/vm: Make gcc check arguments of sigsafe_printf()
tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/pkey-helpers.h | 214 ++++ tools/testing/selftests/vm/pkey-powerpc.h | 128 ++ tools/testing/selftests/vm/pkey-x86.h | 184 +++ tools/testing/selftests/vm/protection_keys.c | 1593 +++++++++++++++++++++++++ tools/testing/selftests/x86/.gitignore | 1 - tools/testing/selftests/x86/pkey-helpers.h | 219 ---- tools/testing/selftests/x86/protection_keys.c | 1485 ----------------------- 9 files changed, 2121 insertions(+), 1705 deletions(-) create mode 100644 tools/testing/selftests/vm/pkey-helpers.h create mode 100644 tools/testing/selftests/vm/pkey-powerpc.h create mode 100644 tools/testing/selftests/vm/pkey-x86.h create mode 100644 tools/testing/selftests/vm/protection_keys.c delete mode 100644 tools/testing/selftests/x86/pkey-helpers.h delete mode 100644 tools/testing/selftests/x86/protection_keys.c
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com Signed-off-by: Thiago Jung Bauermann bauerman@linux.ibm.com Acked-by: Ingo Molnar mingo@kernel.org --- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/pkey-helpers.h | 219 ++++ tools/testing/selftests/vm/protection_keys.c | 1485 +++++++++++++++++++++++++ tools/testing/selftests/x86/.gitignore | 1 - tools/testing/selftests/x86/pkey-helpers.h | 219 ---- tools/testing/selftests/x86/protection_keys.c | 1485 ------------------------- 7 files changed, 1706 insertions(+), 1705 deletions(-) create mode 100644 tools/testing/selftests/vm/pkey-helpers.h create mode 100644 tools/testing/selftests/vm/protection_keys.c delete mode 100644 tools/testing/selftests/x86/pkey-helpers.h delete mode 100644 tools/testing/selftests/x86/protection_keys.c
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 342c7bc..0214fbf 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -12,3 +12,4 @@ mlock-random-test virtual_address_range gup_benchmark va_128TBswitch +protection_keys diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index fdefa22..9788a58 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -20,6 +20,7 @@ TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range +TEST_GEN_FILES += protection_keys
TEST_PROGS := run_vmtests
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h new file mode 100644 index 0000000..254e543 --- /dev/null +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PKEYS_HELPER_H +#define _PKEYS_HELPER_H +#define _GNU_SOURCE +#include <string.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <stdlib.h> +#include <ucontext.h> +#include <sys/mman.h> + +#define NR_PKEYS 16 +#define PKRU_BITS_PER_PKEY 2 + +#ifndef DEBUG_LEVEL +#define DEBUG_LEVEL 0 +#endif +#define DPRINT_IN_SIGNAL_BUF_SIZE 4096 +extern int dprint_in_signal; +extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; +static inline void sigsafe_printf(const char *format, ...) +{ + va_list ap; + + if (!dprint_in_signal) { + va_start(ap, format); + vprintf(format, ap); + va_end(ap); + } else { + int ret; + /* + * No printf() functions are signal-safe. + * They deadlock easily. Write the format + * string to get some output, even if + * incomplete. + */ + ret = write(1, format, strlen(format)); + if (ret < 0) + exit(1); + } +} +#define dprintf_level(level, args...) do { \ + if (level <= DEBUG_LEVEL) \ + sigsafe_printf(args); \ +} while (0) +#define dprintf0(args...) dprintf_level(0, args) +#define dprintf1(args...) dprintf_level(1, args) +#define dprintf2(args...) dprintf_level(2, args) +#define dprintf3(args...) dprintf_level(3, args) +#define dprintf4(args...) dprintf_level(4, args) + +extern unsigned int shadow_pkru; +static inline unsigned int __rdpkru(void) +{ + unsigned int eax, edx; + unsigned int ecx = 0; + unsigned int pkru; + + asm volatile(".byte 0x0f,0x01,0xee\n\t" + : "=a" (eax), "=d" (edx) + : "c" (ecx)); + pkru = eax; + return pkru; +} + +static inline unsigned int _rdpkru(int line) +{ + unsigned int pkru = __rdpkru(); + + dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n", + line, pkru, shadow_pkru); + assert(pkru == shadow_pkru); + + return pkru; +} + +#define rdpkru() _rdpkru(__LINE__) + +static inline void __wrpkru(unsigned int pkru) +{ + unsigned int eax = pkru; + unsigned int ecx = 0; + unsigned int edx = 0; + + dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (eax), "c" (ecx), "d" (edx)); + assert(pkru == __rdpkru()); +} + +static inline void wrpkru(unsigned int pkru) +{ + dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); + /* will do the shadow check for us: */ + rdpkru(); + __wrpkru(pkru); + shadow_pkru = pkru; + dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru()); +} + +/* + * These are technically racy. since something could + * change PKRU between the read and the write. + */ +static inline void __pkey_access_allow(int pkey, int do_allow) +{ + unsigned int pkru = rdpkru(); + int bit = pkey * 2; + + if (do_allow) + pkru &= (1<<bit); + else + pkru |= (1<<bit); + + dprintf4("pkru now: %08x\n", rdpkru()); + wrpkru(pkru); +} + +static inline void __pkey_write_allow(int pkey, int do_allow_write) +{ + long pkru = rdpkru(); + int bit = pkey * 2 + 1; + + if (do_allow_write) + pkru &= (1<<bit); + else + pkru |= (1<<bit); + + wrpkru(pkru); + dprintf4("pkru now: %08x\n", rdpkru()); +} + +#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ +#define PROT_PKEY1 0x20 /* protection key value (bit 1) */ +#define PROT_PKEY2 0x40 /* protection key value (bit 2) */ +#define PROT_PKEY3 0x80 /* protection key value (bit 3) */ + +#define PAGE_SIZE 4096 +#define MB (1<<20) + +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile( + "cpuid;" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */ +#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ + +static inline int cpu_has_pku(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + eax = 0x7; + ecx = 0x0; + __cpuid(&eax, &ebx, &ecx, &edx); + + if (!(ecx & X86_FEATURE_PKU)) { + dprintf2("cpu does not have PKU\n"); + return 0; + } + if (!(ecx & X86_FEATURE_OSPKE)) { + dprintf2("cpu does not have OSPKE\n"); + return 0; + } + return 1; +} + +#define XSTATE_PKRU_BIT (9) +#define XSTATE_PKRU 0x200 + +int pkru_xstate_offset(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + int xstate_offset; + int xstate_size; + unsigned long XSTATE_CPUID = 0xd; + int leaf; + + /* assume that XSTATE_PKRU is set in XCR0 */ + leaf = XSTATE_PKRU_BIT; + { + eax = XSTATE_CPUID; + ecx = leaf; + __cpuid(&eax, &ebx, &ecx, &edx); + + if (leaf == XSTATE_PKRU_BIT) { + xstate_offset = ebx; + xstate_size = eax; + } + } + + if (xstate_size == 0) { + printf("could not find size/offset of PKRU in xsave state\n"); + return 0; + } + + return xstate_offset; +} + +#endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c new file mode 100644 index 0000000..460b4bd --- /dev/null +++ b/tools/testing/selftests/vm/protection_keys.c @@ -0,0 +1,1485 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt) + * + * There are examples in here of: + * * how to set protection keys on memory + * * how to set/clear bits in PKRU (the rights register) + * * how to handle SEGV_PKRU signals and extract pkey-relevant + * information from the siginfo + * + * Things to add: + * make sure KSM and KSM COW breaking works + * prefault pages in at malloc, or not + * protect MPX bounds tables with protection keys? + * make sure VMA splitting/merging is working correctly + * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys + * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel + * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks + * + * Compile like this: + * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + */ +#define _GNU_SOURCE +#include <errno.h> +#include <linux/futex.h> +#include <sys/time.h> +#include <sys/syscall.h> +#include <string.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <stdlib.h> +#include <ucontext.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/ptrace.h> +#include <setjmp.h> + +#include "pkey-helpers.h" + +int iteration_nr = 1; +int test_nr; + +unsigned int shadow_pkru; + +#define HPAGE_SIZE (1UL<<21) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) +#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) +#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) +#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) +#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) + +int dprint_in_signal; +char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; + +extern void abort_hooks(void); +#define pkey_assert(condition) do { \ + if (!(condition)) { \ + dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ + __FILE__, __LINE__, \ + test_nr, iteration_nr); \ + dprintf0("errno at assert: %d", errno); \ + abort_hooks(); \ + exit(__LINE__); \ + } \ +} while (0) + +void cat_into_file(char *str, char *file) +{ + int fd = open(file, O_RDWR); + int ret; + + dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file); + /* + * these need to be raw because they are called under + * pkey_assert() + */ + if (fd < 0) { + fprintf(stderr, "error opening '%s'\n", str); + perror("error: "); + exit(__LINE__); + } + + ret = write(fd, str, strlen(str)); + if (ret != strlen(str)) { + perror("write to file failed"); + fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); + exit(__LINE__); + } + close(fd); +} + +#if CONTROL_TRACING > 0 +static int warned_tracing; +int tracing_root_ok(void) +{ + if (geteuid() != 0) { + if (!warned_tracing) + fprintf(stderr, "WARNING: not run as root, " + "can not do tracing control\n"); + warned_tracing = 1; + return 0; + } + return 1; +} +#endif + +void tracing_on(void) +{ +#if CONTROL_TRACING > 0 +#define TRACEDIR "/sys/kernel/debug/tracing" + char pidstr[32]; + + if (!tracing_root_ok()) + return; + + sprintf(pidstr, "%d", getpid()); + cat_into_file("0", TRACEDIR "/tracing_on"); + cat_into_file("\n", TRACEDIR "/trace"); + if (1) { + cat_into_file("function_graph", TRACEDIR "/current_tracer"); + cat_into_file("1", TRACEDIR "/options/funcgraph-proc"); + } else { + cat_into_file("nop", TRACEDIR "/current_tracer"); + } + cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid"); + cat_into_file("1", TRACEDIR "/tracing_on"); + dprintf1("enabled tracing\n"); +#endif +} + +void tracing_off(void) +{ +#if CONTROL_TRACING > 0 + if (!tracing_root_ok()) + return; + cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on"); +#endif +} + +void abort_hooks(void) +{ + fprintf(stderr, "running %s()...\n", __func__); + tracing_off(); +#ifdef SLEEP_ON_ABORT + sleep(SLEEP_ON_ABORT); +#endif +} + +static inline void __page_o_noops(void) +{ + /* 8-bytes of instruction * 512 bytes = 1 page */ + asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); +} + +/* + * This attempts to have roughly a page of instructions followed by a few + * instructions that do a write, and another page of instructions. That + * way, we are pretty sure that the write is in the second page of + * instructions and has at least a page of padding behind it. + * + * *That* lets us be sure to madvise() away the write instruction, which + * will then fault, which makes sure that the fault code handles + * execute-only memory properly. + */ +__attribute__((__aligned__(PAGE_SIZE))) +void lots_o_noops_around_write(int *write_to_me) +{ + dprintf3("running %s()\n", __func__); + __page_o_noops(); + /* Assume this happens in the second page of instructions: */ + *write_to_me = __LINE__; + /* pad out by another page: */ + __page_o_noops(); + dprintf3("%s() done\n", __func__); +} + +/* Define some kernel-like types */ +#define u8 uint8_t +#define u16 uint16_t +#define u32 uint32_t +#define u64 uint64_t + +#ifdef __i386__ + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 380 +#endif + +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 381 +# define SYS_pkey_free 382 +#endif + +#define REG_IP_IDX REG_EIP +#define si_pkey_offset 0x14 + +#else + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 329 +#endif + +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 330 +# define SYS_pkey_free 331 +#endif + +#define REG_IP_IDX REG_RIP +#define si_pkey_offset 0x20 + +#endif + +void dump_mem(void *dumpme, int len_bytes) +{ + char *c = (void *)dumpme; + int i; + + for (i = 0; i < len_bytes; i += sizeof(u64)) { + u64 *ptr = (u64 *)(c + i); + dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr); + } +} + +/* Failed address bound checks: */ +#ifndef SEGV_BNDERR +# define SEGV_BNDERR 3 +#endif + +#ifndef SEGV_PKUERR +# define SEGV_PKUERR 4 +#endif + +static char *si_code_str(int si_code) +{ + if (si_code == SEGV_MAPERR) + return "SEGV_MAPERR"; + if (si_code == SEGV_ACCERR) + return "SEGV_ACCERR"; + if (si_code == SEGV_BNDERR) + return "SEGV_BNDERR"; + if (si_code == SEGV_PKUERR) + return "SEGV_PKUERR"; + return "UNKNOWN"; +} + +int pkru_faults; +int last_si_pkey = -1; +void signal_handler(int signum, siginfo_t *si, void *vucontext) +{ + ucontext_t *uctxt = vucontext; + int trapno; + unsigned long ip; + char *fpregs; + u32 *pkru_ptr; + u64 siginfo_pkey; + u32 *si_pkey_ptr; + int pkru_offset; + fpregset_t fpregset; + + dprint_in_signal = 1; + dprintf1(">>>>===============SIGSEGV============================\n"); + dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__, + __rdpkru(), shadow_pkru); + + trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; + ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; + fpregset = uctxt->uc_mcontext.fpregs; + fpregs = (void *)fpregset; + + dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__, + trapno, ip, si_code_str(si->si_code), si->si_code); +#ifdef __i386__ + /* + * 32-bit has some extra padding so that userspace can tell whether + * the XSTATE header is present in addition to the "legacy" FPU + * state. We just assume that it is here. + */ + fpregs += 0x70; +#endif + pkru_offset = pkru_xstate_offset(); + pkru_ptr = (void *)(&fpregs[pkru_offset]); + + dprintf1("siginfo: %p\n", si); + dprintf1(" fpregs: %p\n", fpregs); + /* + * If we got a PKRU fault, we *HAVE* to have at least one bit set in + * here. + */ + dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset()); + if (DEBUG_LEVEL > 4) + dump_mem(pkru_ptr - 128, 256); + pkey_assert(*pkru_ptr); + + if ((si->si_code == SEGV_MAPERR) || + (si->si_code == SEGV_ACCERR) || + (si->si_code == SEGV_BNDERR)) { + printf("non-PK si_code, exiting...\n"); + exit(4); + } + + si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); + dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); + dump_mem((u8 *)si_pkey_ptr - 8, 24); + siginfo_pkey = *si_pkey_ptr; + pkey_assert(siginfo_pkey < NR_PKEYS); + last_si_pkey = siginfo_pkey; + + dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); + /* need __rdpkru() version so we do not do shadow_pkru checking */ + dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); + dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); + *(u64 *)pkru_ptr = 0x00000000; + dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); + pkru_faults++; + dprintf1("<<<<==================================================\n"); + dprint_in_signal = 0; +} + +int wait_all_children(void) +{ + int status; + return waitpid(-1, &status, 0); +} + +void sig_chld(int x) +{ + dprint_in_signal = 1; + dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); + dprint_in_signal = 0; +} + +void setup_sigsegv_handler(void) +{ + int r, rs; + struct sigaction newact; + struct sigaction oldact; + + /* #PF is mapped to sigsegv */ + int signum = SIGSEGV; + + newact.sa_handler = 0; + newact.sa_sigaction = signal_handler; + + /*sigset_t - signals to block while in the handler */ + /* get the old signal mask. */ + rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); + pkey_assert(rs == 0); + + /* call sa_sigaction, not sa_handler*/ + newact.sa_flags = SA_SIGINFO; + + newact.sa_restorer = 0; /* void(*)(), obsolete */ + r = sigaction(signum, &newact, &oldact); + r = sigaction(SIGALRM, &newact, &oldact); + pkey_assert(r == 0); +} + +void setup_handlers(void) +{ + signal(SIGCHLD, &sig_chld); + setup_sigsegv_handler(); +} + +pid_t fork_lazy_child(void) +{ + pid_t forkret; + + forkret = fork(); + pkey_assert(forkret >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); + + if (!forkret) { + /* in the child */ + while (1) { + dprintf1("child sleeping...\n"); + sleep(30); + } + } + return forkret; +} + +#ifndef PKEY_DISABLE_ACCESS +# define PKEY_DISABLE_ACCESS 0x1 +#endif + +#ifndef PKEY_DISABLE_WRITE +# define PKEY_DISABLE_WRITE 0x2 +#endif + +static u32 hw_pkey_get(int pkey, unsigned long flags) +{ + u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); + u32 pkru = __rdpkru(); + u32 shifted_pkru; + u32 masked_pkru; + + dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", + __func__, pkey, flags, 0, 0); + dprintf2("%s() raw pkru: %x\n", __func__, pkru); + + shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY)); + dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru); + masked_pkru = shifted_pkru & mask; + dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru); + /* + * shift down the relevant bits to the lowest two, then + * mask off all the other high bits. + */ + return masked_pkru; +} + +static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) +{ + u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); + u32 old_pkru = __rdpkru(); + u32 new_pkru; + + /* make sure that 'rights' only contains the bits we expect: */ + assert(!(rights & ~mask)); + + /* copy old pkru */ + new_pkru = old_pkru; + /* mask out bits from pkey in old value: */ + new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY)); + /* OR in new bits for pkey: */ + new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY)); + + __wrpkru(new_pkru); + + dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n", + __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru); + return 0; +} + +void pkey_disable_set(int pkey, int flags) +{ + unsigned long syscall_flags = 0; + int ret; + int pkey_rights; + u32 orig_pkru = rdpkru(); + + dprintf1("START->%s(%d, 0x%x)\n", __func__, + pkey, flags); + pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); + + pkey_rights = hw_pkey_get(pkey, syscall_flags); + + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + pkey_assert(pkey_rights >= 0); + + pkey_rights |= flags; + + ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); + assert(!ret); + /*pkru and flags have the same format */ + shadow_pkru |= flags << (pkey * 2); + dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru); + + pkey_assert(ret >= 0); + + pkey_rights = hw_pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + + dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); + if (flags) + pkey_assert(rdpkru() > orig_pkru); + dprintf1("END<---%s(%d, 0x%x)\n", __func__, + pkey, flags); +} + +void pkey_disable_clear(int pkey, int flags) +{ + unsigned long syscall_flags = 0; + int ret; + int pkey_rights = hw_pkey_get(pkey, syscall_flags); + u32 orig_pkru = rdpkru(); + + pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); + + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + pkey_assert(pkey_rights >= 0); + + pkey_rights |= flags; + + ret = hw_pkey_set(pkey, pkey_rights, 0); + /* pkru and flags have the same format */ + shadow_pkru &= ~(flags << (pkey * 2)); + pkey_assert(ret >= 0); + + pkey_rights = hw_pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + + dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); + if (flags) + assert(rdpkru() > orig_pkru); +} + +void pkey_write_allow(int pkey) +{ + pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); +} +void pkey_write_deny(int pkey) +{ + pkey_disable_set(pkey, PKEY_DISABLE_WRITE); +} +void pkey_access_allow(int pkey) +{ + pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); +} +void pkey_access_deny(int pkey) +{ + pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); +} + +int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey) +{ + int sret; + + dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, + ptr, size, orig_prot, pkey); + + errno = 0; + sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); + if (errno) { + dprintf2("SYS_mprotect_key sret: %d\n", sret); + dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); + dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); + if (DEBUG_LEVEL >= 2) + perror("SYS_mprotect_pkey"); + } + return sret; +} + +int sys_pkey_alloc(unsigned long flags, unsigned long init_val) +{ + int ret = syscall(SYS_pkey_alloc, flags, init_val); + dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", + __func__, flags, init_val, ret, errno); + return ret; +} + +int alloc_pkey(void) +{ + int ret; + unsigned long init_val = 0x0; + + dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n", + __LINE__, __rdpkru(), shadow_pkru); + ret = sys_pkey_alloc(0, init_val); + /* + * pkey_alloc() sets PKRU, so we need to reflect it in + * shadow_pkru: + */ + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + if (ret) { + /* clear both the bits: */ + shadow_pkru &= ~(0x3 << (ret * 2)); + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + /* + * move the new state in from init_val + * (remember, we cheated and init_val == pkru format) + */ + shadow_pkru |= (init_val << (ret * 2)); + } + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno); + /* for shadow checking: */ + rdpkru(); + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + return ret; +} + +int sys_pkey_free(unsigned long pkey) +{ + int ret = syscall(SYS_pkey_free, pkey); + dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); + return ret; +} + +/* + * I had a bug where pkey bits could be set by mprotect() but + * not cleared. This ensures we get lots of random bit sets + * and clears on the vma and pte pkey bits. + */ +int alloc_random_pkey(void) +{ + int max_nr_pkey_allocs; + int ret; + int i; + int alloced_pkeys[NR_PKEYS]; + int nr_alloced = 0; + int random_index; + memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); + + /* allocate every possible key and make a note of which ones we got */ + max_nr_pkey_allocs = NR_PKEYS; + max_nr_pkey_allocs = 1; + for (i = 0; i < max_nr_pkey_allocs; i++) { + int new_pkey = alloc_pkey(); + if (new_pkey < 0) + break; + alloced_pkeys[nr_alloced++] = new_pkey; + } + + pkey_assert(nr_alloced > 0); + /* select a random one out of the allocated ones */ + random_index = rand() % nr_alloced; + ret = alloced_pkeys[random_index]; + /* now zero it out so we don't free it next */ + alloced_pkeys[random_index] = 0; + + /* go through the allocated ones that we did not want and free them */ + for (i = 0; i < nr_alloced; i++) { + int free_ret; + if (!alloced_pkeys[i]) + continue; + free_ret = sys_pkey_free(alloced_pkeys[i]); + pkey_assert(!free_ret); + } + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + return ret; +} + +int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey) +{ + int nr_iterations = random() % 100; + int ret; + + while (0) { + int rpkey = alloc_random_pkey(); + ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); + dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", + ptr, size, orig_prot, pkey, ret); + if (nr_iterations-- < 0) + break; + + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + sys_pkey_free(rpkey); + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + } + pkey_assert(pkey < NR_PKEYS); + + ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); + dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", + ptr, size, orig_prot, pkey, ret); + pkey_assert(!ret); + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + return ret; +} + +struct pkey_malloc_record { + void *ptr; + long size; + int prot; +}; +struct pkey_malloc_record *pkey_malloc_records; +struct pkey_malloc_record *pkey_last_malloc_record; +long nr_pkey_malloc_records; +void record_pkey_malloc(void *ptr, long size, int prot) +{ + long i; + struct pkey_malloc_record *rec = NULL; + + for (i = 0; i < nr_pkey_malloc_records; i++) { + rec = &pkey_malloc_records[i]; + /* find a free record */ + if (rec) + break; + } + if (!rec) { + /* every record is full */ + size_t old_nr_records = nr_pkey_malloc_records; + size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); + size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); + dprintf2("new_nr_records: %zd\n", new_nr_records); + dprintf2("new_size: %zd\n", new_size); + pkey_malloc_records = realloc(pkey_malloc_records, new_size); + pkey_assert(pkey_malloc_records != NULL); + rec = &pkey_malloc_records[nr_pkey_malloc_records]; + /* + * realloc() does not initialize memory, so zero it from + * the first new record all the way to the end. + */ + for (i = 0; i < new_nr_records - old_nr_records; i++) + memset(rec + i, 0, sizeof(*rec)); + } + dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n", + (int)(rec - pkey_malloc_records), rec, ptr, size); + rec->ptr = ptr; + rec->size = size; + rec->prot = prot; + pkey_last_malloc_record = rec; + nr_pkey_malloc_records++; +} + +void free_pkey_malloc(void *ptr) +{ + long i; + int ret; + dprintf3("%s(%p)\n", __func__, ptr); + for (i = 0; i < nr_pkey_malloc_records; i++) { + struct pkey_malloc_record *rec = &pkey_malloc_records[i]; + dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", + ptr, i, rec, rec->ptr, rec->size); + if ((ptr < rec->ptr) || + (ptr >= rec->ptr + rec->size)) + continue; + + dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n", + ptr, i, rec, rec->ptr, rec->size); + nr_pkey_malloc_records--; + ret = munmap(rec->ptr, rec->size); + dprintf3("munmap ret: %d\n", ret); + pkey_assert(!ret); + dprintf3("clearing rec->ptr, rec: %p\n", rec); + rec->ptr = NULL; + dprintf3("done clearing rec->ptr, rec: %p\n", rec); + return; + } + pkey_assert(false); +} + + +void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) +{ + void *ptr; + int ret; + + rdpkru(); + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + pkey_assert(pkey < NR_PKEYS); + ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + pkey_assert(ptr != (void *)-1); + ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); + pkey_assert(!ret); + record_pkey_malloc(ptr, size, prot); + rdpkru(); + + dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); + return ptr; +} + +void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) +{ + int ret; + void *ptr; + + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + /* + * Guarantee we can fit at least one huge page in the resulting + * allocation by allocating space for 2: + */ + size = ALIGN_UP(size, HPAGE_SIZE * 2); + ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + pkey_assert(ptr != (void *)-1); + record_pkey_malloc(ptr, size, prot); + mprotect_pkey(ptr, size, prot, pkey); + + dprintf1("unaligned ptr: %p\n", ptr); + ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE); + dprintf1(" aligned ptr: %p\n", ptr); + ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE); + dprintf1("MADV_HUGEPAGE ret: %d\n", ret); + ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED); + dprintf1("MADV_WILLNEED ret: %d\n", ret); + memset(ptr, 0, HPAGE_SIZE); + + dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr); + return ptr; +} + +int hugetlb_setup_ok; +#define GET_NR_HUGE_PAGES 10 +void setup_hugetlbfs(void) +{ + int err; + int fd; + char buf[] = "123"; + + if (geteuid() != 0) { + fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); + return; + } + + cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); + + /* + * Now go make sure that we got the pages and that they + * are 2M pages. Someone might have made 1G the default. + */ + fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); + if (fd < 0) { + perror("opening sysfs 2M hugetlb config"); + return; + } + + /* -1 to guarantee leaving the trailing \0 */ + err = read(fd, buf, sizeof(buf)-1); + close(fd); + if (err <= 0) { + perror("reading sysfs 2M hugetlb config"); + return; + } + + if (atoi(buf) != GET_NR_HUGE_PAGES) { + fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n", + buf, GET_NR_HUGE_PAGES); + return; + } + + hugetlb_setup_ok = 1; +} + +void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) +{ + void *ptr; + int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; + + if (!hugetlb_setup_ok) + return PTR_ERR_ENOTSUP; + + dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey); + size = ALIGN_UP(size, HPAGE_SIZE * 2); + pkey_assert(pkey < NR_PKEYS); + ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0); + pkey_assert(ptr != (void *)-1); + mprotect_pkey(ptr, size, prot, pkey); + + record_pkey_malloc(ptr, size, prot); + + dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); + return ptr; +} + +void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) +{ + void *ptr; + int fd; + + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + pkey_assert(pkey < NR_PKEYS); + fd = open("/dax/foo", O_RDWR); + pkey_assert(fd >= 0); + + ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); + pkey_assert(ptr != (void *)-1); + + mprotect_pkey(ptr, size, prot, pkey); + + record_pkey_malloc(ptr, size, prot); + + dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); + close(fd); + return ptr; +} + +void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { + + malloc_pkey_with_mprotect, + malloc_pkey_anon_huge, + malloc_pkey_hugetlb +/* can not do direct with the pkey_mprotect() API: + malloc_pkey_mmap_direct, + malloc_pkey_mmap_dax, +*/ +}; + +void *malloc_pkey(long size, int prot, u16 pkey) +{ + void *ret; + static int malloc_type; + int nr_malloc_types = ARRAY_SIZE(pkey_malloc); + + pkey_assert(pkey < NR_PKEYS); + + while (1) { + pkey_assert(malloc_type < nr_malloc_types); + + ret = pkey_malloc[malloc_type](size, prot, pkey); + pkey_assert(ret != (void *)-1); + + malloc_type++; + if (malloc_type >= nr_malloc_types) + malloc_type = (random()%nr_malloc_types); + + /* try again if the malloc_type we tried is unsupported */ + if (ret == PTR_ERR_ENOTSUP) + continue; + + break; + } + + dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__, + size, prot, pkey, ret); + return ret; +} + +int last_pkru_faults; +#define UNKNOWN_PKEY -2 +void expected_pk_fault(int pkey) +{ + dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", + __func__, last_pkru_faults, pkru_faults); + dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); + pkey_assert(last_pkru_faults + 1 == pkru_faults); + + /* + * For exec-only memory, we do not know the pkey in + * advance, so skip this check. + */ + if (pkey != UNKNOWN_PKEY) + pkey_assert(last_si_pkey == pkey); + + /* + * The signal handler shold have cleared out PKRU to let the + * test program continue. We now have to restore it. + */ + if (__rdpkru() != 0) + pkey_assert(0); + + __wrpkru(shadow_pkru); + dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n", + __func__, shadow_pkru); + last_pkru_faults = pkru_faults; + last_si_pkey = -1; +} + +#define do_not_expect_pk_fault(msg) do { \ + if (last_pkru_faults != pkru_faults) \ + dprintf0("unexpected PK fault: %s\n", msg); \ + pkey_assert(last_pkru_faults == pkru_faults); \ +} while (0) + +int test_fds[10] = { -1 }; +int nr_test_fds; +void __save_test_fd(int fd) +{ + pkey_assert(fd >= 0); + pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); + test_fds[nr_test_fds] = fd; + nr_test_fds++; +} + +int get_test_read_fd(void) +{ + int test_fd = open("/etc/passwd", O_RDONLY); + __save_test_fd(test_fd); + return test_fd; +} + +void close_test_fds(void) +{ + int i; + + for (i = 0; i < nr_test_fds; i++) { + if (test_fds[i] < 0) + continue; + close(test_fds[i]); + test_fds[i] = -1; + } + nr_test_fds = 0; +} + +#define barrier() __asm__ __volatile__("": : :"memory") +__attribute__((noinline)) int read_ptr(int *ptr) +{ + /* + * Keep GCC from optimizing this away somehow + */ + barrier(); + return *ptr; +} + +void test_read_of_write_disabled_region(int *ptr, u16 pkey) +{ + int ptr_contents; + + dprintf1("disabling write access to PKEY[1], doing read\n"); + pkey_write_deny(pkey); + ptr_contents = read_ptr(ptr); + dprintf1("*ptr: %d\n", ptr_contents); + dprintf1("\n"); +} +void test_read_of_access_disabled_region(int *ptr, u16 pkey) +{ + int ptr_contents; + + dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); + rdpkru(); + pkey_access_deny(pkey); + ptr_contents = read_ptr(ptr); + dprintf1("*ptr: %d\n", ptr_contents); + expected_pk_fault(pkey); +} +void test_write_of_write_disabled_region(int *ptr, u16 pkey) +{ + dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); + pkey_write_deny(pkey); + *ptr = __LINE__; + expected_pk_fault(pkey); +} +void test_write_of_access_disabled_region(int *ptr, u16 pkey) +{ + dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); + pkey_access_deny(pkey); + *ptr = __LINE__; + expected_pk_fault(pkey); +} +void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) +{ + int ret; + int test_fd = get_test_read_fd(); + + dprintf1("disabling access to PKEY[%02d], " + "having kernel read() to buffer\n", pkey); + pkey_access_deny(pkey); + ret = read(test_fd, ptr, 1); + dprintf1("read ret: %d\n", ret); + pkey_assert(ret); +} +void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) +{ + int ret; + int test_fd = get_test_read_fd(); + + pkey_write_deny(pkey); + ret = read(test_fd, ptr, 100); + dprintf1("read ret: %d\n", ret); + if (ret < 0 && (DEBUG_LEVEL > 0)) + perror("verbose read result (OK for this to be bad)"); + pkey_assert(ret); +} + +void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) +{ + int pipe_ret, vmsplice_ret; + struct iovec iov; + int pipe_fds[2]; + + pipe_ret = pipe(pipe_fds); + + pkey_assert(pipe_ret == 0); + dprintf1("disabling access to PKEY[%02d], " + "having kernel vmsplice from buffer\n", pkey); + pkey_access_deny(pkey); + iov.iov_base = ptr; + iov.iov_len = PAGE_SIZE; + vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); + dprintf1("vmsplice() ret: %d\n", vmsplice_ret); + pkey_assert(vmsplice_ret == -1); + + close(pipe_fds[0]); + close(pipe_fds[1]); +} + +void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) +{ + int ignored = 0xdada; + int futex_ret; + int some_int = __LINE__; + + dprintf1("disabling write to PKEY[%02d], " + "doing futex gunk in buffer\n", pkey); + *ptr = some_int; + pkey_write_deny(pkey); + futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL, + &ignored, ignored); + if (DEBUG_LEVEL > 0) + perror("futex"); + dprintf1("futex() ret: %d\n", futex_ret); +} + +/* Assumes that all pkeys other than 'pkey' are unallocated */ +void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) +{ + int err; + int i; + + /* Note: 0 is the default pkey, so don't mess with it */ + for (i = 1; i < NR_PKEYS; i++) { + if (pkey == i) + continue; + + dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i); + err = sys_pkey_free(i); + pkey_assert(err); + + err = sys_pkey_free(i); + pkey_assert(err); + + err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i); + pkey_assert(err); + } +} + +/* Assumes that all pkeys other than 'pkey' are unallocated */ +void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) +{ + int err; + int bad_pkey = NR_PKEYS+99; + + /* pass a known-invalid pkey in: */ + err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey); + pkey_assert(err); +} + +/* Assumes that all pkeys other than 'pkey' are unallocated */ +void test_pkey_alloc_exhaust(int *ptr, u16 pkey) +{ + int err; + int allocated_pkeys[NR_PKEYS] = {0}; + int nr_allocated_pkeys = 0; + int i; + + for (i = 0; i < NR_PKEYS*2; i++) { + int new_pkey; + dprintf1("%s() alloc loop: %d\n", __func__, i); + new_pkey = alloc_pkey(); + dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, err, __rdpkru(), shadow_pkru); + rdpkru(); /* for shadow checking */ + dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); + if ((new_pkey == -1) && (errno == ENOSPC)) { + dprintf2("%s() failed to allocate pkey after %d tries\n", + __func__, nr_allocated_pkeys); + break; + } + pkey_assert(nr_allocated_pkeys < NR_PKEYS); + allocated_pkeys[nr_allocated_pkeys++] = new_pkey; + } + + dprintf3("%s()::%d\n", __func__, __LINE__); + + /* + * ensure it did not reach the end of the loop without + * failure: + */ + pkey_assert(i < NR_PKEYS*2); + + /* + * There are 16 pkeys supported in hardware. Three are + * allocated by the time we get here: + * 1. The default key (0) + * 2. One possibly consumed by an execute-only mapping. + * 3. One allocated by the test code and passed in via + * 'pkey' to this function. + * Ensure that we can allocate at least another 13 (16-3). + */ + pkey_assert(i >= NR_PKEYS-3); + + for (i = 0; i < nr_allocated_pkeys; i++) { + err = sys_pkey_free(allocated_pkeys[i]); + pkey_assert(!err); + rdpkru(); /* for shadow checking */ + } +} + +/* + * pkey 0 is special. It is allocated by default, so you do not + * have to call pkey_alloc() to use it first. Make sure that it + * is usable. + */ +void test_mprotect_with_pkey_0(int *ptr, u16 pkey) +{ + long size; + int prot; + + assert(pkey_last_malloc_record); + size = pkey_last_malloc_record->size; + /* + * This is a bit of a hack. But mprotect() requires + * huge-page-aligned sizes when operating on hugetlbfs. + * So, make sure that we use something that's a multiple + * of a huge page when we can. + */ + if (size >= HPAGE_SIZE) + size = HPAGE_SIZE; + prot = pkey_last_malloc_record->prot; + + /* Use pkey 0 */ + mprotect_pkey(ptr, size, prot, 0); + + /* Make sure that we can set it back to the original pkey. */ + mprotect_pkey(ptr, size, prot, pkey); +} + +void test_ptrace_of_child(int *ptr, u16 pkey) +{ + __attribute__((__unused__)) int peek_result; + pid_t child_pid; + void *ignored = 0; + long ret; + int status; + /* + * This is the "control" for our little expermient. Make sure + * we can always access it when ptracing. + */ + int *plain_ptr_unaligned = malloc(HPAGE_SIZE); + int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE); + + /* + * Fork a child which is an exact copy of this process, of course. + * That means we can do all of our tests via ptrace() and then plain + * memory access and ensure they work differently. + */ + child_pid = fork_lazy_child(); + dprintf1("[%d] child pid: %d\n", getpid(), child_pid); + + ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); + if (ret) + perror("attach"); + dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__); + pkey_assert(ret != -1); + ret = waitpid(child_pid, &status, WUNTRACED); + if ((ret != child_pid) || !(WIFSTOPPED(status))) { + fprintf(stderr, "weird waitpid result %ld stat %x\n", + ret, status); + pkey_assert(0); + } + dprintf2("waitpid ret: %ld\n", ret); + dprintf2("waitpid status: %d\n", status); + + pkey_access_deny(pkey); + pkey_write_deny(pkey); + + /* Write access, untested for now: + ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); + pkey_assert(ret != -1); + dprintf1("poke at %p: %ld\n", peek_at, ret); + */ + + /* + * Try to access the pkey-protected "ptr" via ptrace: + */ + ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); + /* expect it to work, without an error: */ + pkey_assert(ret != -1); + /* Now access from the current task, and expect an exception: */ + peek_result = read_ptr(ptr); + expected_pk_fault(pkey); + + /* + * Try to access the NON-pkey-protected "plain_ptr" via ptrace: + */ + ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); + /* expect it to work, without an error: */ + pkey_assert(ret != -1); + /* Now access from the current task, and expect NO exception: */ + peek_result = read_ptr(plain_ptr); + do_not_expect_pk_fault("read plain pointer after ptrace"); + + ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); + pkey_assert(ret != -1); + + ret = kill(child_pid, SIGKILL); + pkey_assert(ret != -1); + + wait(&status); + + free(plain_ptr_unaligned); +} + +void *get_pointer_to_instructions(void) +{ + void *p1; + + p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); + dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); + /* lots_o_noops_around_write should be page-aligned already */ + assert(p1 == &lots_o_noops_around_write); + + /* Point 'p1' at the *second* page of the function: */ + p1 += PAGE_SIZE; + + /* + * Try to ensure we fault this in on next touch to ensure + * we get an instruction fault as opposed to a data one + */ + madvise(p1, PAGE_SIZE, MADV_DONTNEED); + + return p1; +} + +void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + p1 = get_pointer_to_instructions(); + lots_o_noops_around_write(&scratch); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + + ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey); + pkey_assert(!ret); + pkey_access_deny(pkey); + + dprintf2("pkru: %x\n", rdpkru()); + + /* + * Make sure this is an *instruction* fault + */ + madvise(p1, PAGE_SIZE, MADV_DONTNEED); + lots_o_noops_around_write(&scratch); + do_not_expect_pk_fault("executing on PROT_EXEC memory"); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + expected_pk_fault(pkey); +} + +void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + dprintf1("%s() start\n", __func__); + + p1 = get_pointer_to_instructions(); + lots_o_noops_around_write(&scratch); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + + /* Use a *normal* mprotect(), not mprotect_pkey(): */ + ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); + pkey_assert(!ret); + + dprintf2("pkru: %x\n", rdpkru()); + + /* Make sure this is an *instruction* fault */ + madvise(p1, PAGE_SIZE, MADV_DONTNEED); + lots_o_noops_around_write(&scratch); + do_not_expect_pk_fault("executing on PROT_EXEC memory"); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + expected_pk_fault(UNKNOWN_PKEY); + + /* + * Put the memory back to non-PROT_EXEC. Should clear the + * exec-only pkey off the VMA and allow it to be readable + * again. Go to PROT_NONE first to check for a kernel bug + * that did not clear the pkey when doing PROT_NONE. + */ + ret = mprotect(p1, PAGE_SIZE, PROT_NONE); + pkey_assert(!ret); + + ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); + pkey_assert(!ret); + ptr_contents = read_ptr(p1); + do_not_expect_pk_fault("plain read on recently PROT_EXEC area"); +} + +void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) +{ + int size = PAGE_SIZE; + int sret; + + if (cpu_has_pku()) { + dprintf1("SKIP: %s: no CPU support\n", __func__); + return; + } + + sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); + pkey_assert(sret < 0); +} + +void (*pkey_tests[])(int *ptr, u16 pkey) = { + test_read_of_write_disabled_region, + test_read_of_access_disabled_region, + test_write_of_write_disabled_region, + test_write_of_access_disabled_region, + test_kernel_write_of_access_disabled_region, + test_kernel_write_of_write_disabled_region, + test_kernel_gup_of_access_disabled_region, + test_kernel_gup_write_to_write_disabled_region, + test_executing_on_unreadable_memory, + test_implicit_mprotect_exec_only_memory, + test_mprotect_with_pkey_0, + test_ptrace_of_child, + test_pkey_syscalls_on_non_allocated_pkey, + test_pkey_syscalls_bad_args, + test_pkey_alloc_exhaust, +}; + +void run_tests_once(void) +{ + int *ptr; + int prot = PROT_READ|PROT_WRITE; + + for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { + int pkey; + int orig_pkru_faults = pkru_faults; + + dprintf1("======================\n"); + dprintf1("test %d preparing...\n", test_nr); + + tracing_on(); + pkey = alloc_random_pkey(); + dprintf1("test %d starting with pkey: %d\n", test_nr, pkey); + ptr = malloc_pkey(PAGE_SIZE, prot, pkey); + dprintf1("test %d starting...\n", test_nr); + pkey_tests[test_nr](ptr, pkey); + dprintf1("freeing test memory: %p\n", ptr); + free_pkey_malloc(ptr); + sys_pkey_free(pkey); + + dprintf1("pkru_faults: %d\n", pkru_faults); + dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults); + + tracing_off(); + close_test_fds(); + + printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); + dprintf1("======================\n\n"); + } + iteration_nr++; +} + +void pkey_setup_shadow(void) +{ + shadow_pkru = __rdpkru(); +} + +int main(void) +{ + int nr_iterations = 22; + + setup_handlers(); + + printf("has pku: %d\n", cpu_has_pku()); + + if (!cpu_has_pku()) { + int size = PAGE_SIZE; + int *ptr; + + printf("running PKEY tests for unsupported CPU/OS\n"); + + ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + assert(ptr != (void *)-1); + test_mprotect_pkey_on_unsupported_cpu(ptr, 1); + exit(0); + } + + pkey_setup_shadow(); + printf("startup pkru: %x\n", rdpkru()); + setup_hugetlbfs(); + + while (nr_iterations-- > 0) + run_tests_once(); + + printf("done (all tests OK)\n"); + return 0; +} diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore index 7757f73..eb30ffd 100644 --- a/tools/testing/selftests/x86/.gitignore +++ b/tools/testing/selftests/x86/.gitignore @@ -11,5 +11,4 @@ ldt_gdt iopl mpx-mini-test ioperm -protection_keys test_vdso diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h deleted file mode 100644 index 254e543..0000000 --- a/tools/testing/selftests/x86/pkey-helpers.h +++ /dev/null @@ -1,219 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PKEYS_HELPER_H -#define _PKEYS_HELPER_H -#define _GNU_SOURCE -#include <string.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdint.h> -#include <stdbool.h> -#include <signal.h> -#include <assert.h> -#include <stdlib.h> -#include <ucontext.h> -#include <sys/mman.h> - -#define NR_PKEYS 16 -#define PKRU_BITS_PER_PKEY 2 - -#ifndef DEBUG_LEVEL -#define DEBUG_LEVEL 0 -#endif -#define DPRINT_IN_SIGNAL_BUF_SIZE 4096 -extern int dprint_in_signal; -extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; -static inline void sigsafe_printf(const char *format, ...) -{ - va_list ap; - - if (!dprint_in_signal) { - va_start(ap, format); - vprintf(format, ap); - va_end(ap); - } else { - int ret; - /* - * No printf() functions are signal-safe. - * They deadlock easily. Write the format - * string to get some output, even if - * incomplete. - */ - ret = write(1, format, strlen(format)); - if (ret < 0) - exit(1); - } -} -#define dprintf_level(level, args...) do { \ - if (level <= DEBUG_LEVEL) \ - sigsafe_printf(args); \ -} while (0) -#define dprintf0(args...) dprintf_level(0, args) -#define dprintf1(args...) dprintf_level(1, args) -#define dprintf2(args...) dprintf_level(2, args) -#define dprintf3(args...) dprintf_level(3, args) -#define dprintf4(args...) dprintf_level(4, args) - -extern unsigned int shadow_pkru; -static inline unsigned int __rdpkru(void) -{ - unsigned int eax, edx; - unsigned int ecx = 0; - unsigned int pkru; - - asm volatile(".byte 0x0f,0x01,0xee\n\t" - : "=a" (eax), "=d" (edx) - : "c" (ecx)); - pkru = eax; - return pkru; -} - -static inline unsigned int _rdpkru(int line) -{ - unsigned int pkru = __rdpkru(); - - dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n", - line, pkru, shadow_pkru); - assert(pkru == shadow_pkru); - - return pkru; -} - -#define rdpkru() _rdpkru(__LINE__) - -static inline void __wrpkru(unsigned int pkru) -{ - unsigned int eax = pkru; - unsigned int ecx = 0; - unsigned int edx = 0; - - dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); - asm volatile(".byte 0x0f,0x01,0xef\n\t" - : : "a" (eax), "c" (ecx), "d" (edx)); - assert(pkru == __rdpkru()); -} - -static inline void wrpkru(unsigned int pkru) -{ - dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); - /* will do the shadow check for us: */ - rdpkru(); - __wrpkru(pkru); - shadow_pkru = pkru; - dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru()); -} - -/* - * These are technically racy. since something could - * change PKRU between the read and the write. - */ -static inline void __pkey_access_allow(int pkey, int do_allow) -{ - unsigned int pkru = rdpkru(); - int bit = pkey * 2; - - if (do_allow) - pkru &= (1<<bit); - else - pkru |= (1<<bit); - - dprintf4("pkru now: %08x\n", rdpkru()); - wrpkru(pkru); -} - -static inline void __pkey_write_allow(int pkey, int do_allow_write) -{ - long pkru = rdpkru(); - int bit = pkey * 2 + 1; - - if (do_allow_write) - pkru &= (1<<bit); - else - pkru |= (1<<bit); - - wrpkru(pkru); - dprintf4("pkru now: %08x\n", rdpkru()); -} - -#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ -#define PROT_PKEY1 0x20 /* protection key value (bit 1) */ -#define PROT_PKEY2 0x40 /* protection key value (bit 2) */ -#define PROT_PKEY3 0x80 /* protection key value (bit 3) */ - -#define PAGE_SIZE 4096 -#define MB (1<<20) - -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile( - "cpuid;" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} - -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */ -#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ - -static inline int cpu_has_pku(void) -{ - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - - eax = 0x7; - ecx = 0x0; - __cpuid(&eax, &ebx, &ecx, &edx); - - if (!(ecx & X86_FEATURE_PKU)) { - dprintf2("cpu does not have PKU\n"); - return 0; - } - if (!(ecx & X86_FEATURE_OSPKE)) { - dprintf2("cpu does not have OSPKE\n"); - return 0; - } - return 1; -} - -#define XSTATE_PKRU_BIT (9) -#define XSTATE_PKRU 0x200 - -int pkru_xstate_offset(void) -{ - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - int xstate_offset; - int xstate_size; - unsigned long XSTATE_CPUID = 0xd; - int leaf; - - /* assume that XSTATE_PKRU is set in XCR0 */ - leaf = XSTATE_PKRU_BIT; - { - eax = XSTATE_CPUID; - ecx = leaf; - __cpuid(&eax, &ebx, &ecx, &edx); - - if (leaf == XSTATE_PKRU_BIT) { - xstate_offset = ebx; - xstate_size = eax; - } - } - - if (xstate_size == 0) { - printf("could not find size/offset of PKRU in xsave state\n"); - return 0; - } - - return xstate_offset; -} - -#endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c deleted file mode 100644 index 460b4bd..0000000 --- a/tools/testing/selftests/x86/protection_keys.c +++ /dev/null @@ -1,1485 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt) - * - * There are examples in here of: - * * how to set protection keys on memory - * * how to set/clear bits in PKRU (the rights register) - * * how to handle SEGV_PKRU signals and extract pkey-relevant - * information from the siginfo - * - * Things to add: - * make sure KSM and KSM COW breaking works - * prefault pages in at malloc, or not - * protect MPX bounds tables with protection keys? - * make sure VMA splitting/merging is working correctly - * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys - * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel - * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks - * - * Compile like this: - * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - */ -#define _GNU_SOURCE -#include <errno.h> -#include <linux/futex.h> -#include <sys/time.h> -#include <sys/syscall.h> -#include <string.h> -#include <stdio.h> -#include <stdint.h> -#include <stdbool.h> -#include <signal.h> -#include <assert.h> -#include <stdlib.h> -#include <ucontext.h> -#include <sys/mman.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <sys/ptrace.h> -#include <setjmp.h> - -#include "pkey-helpers.h" - -int iteration_nr = 1; -int test_nr; - -unsigned int shadow_pkru; - -#define HPAGE_SIZE (1UL<<21) -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) -#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) -#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) -#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) -#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) -#define __stringify_1(x...) #x -#define __stringify(x...) __stringify_1(x) - -#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) - -int dprint_in_signal; -char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; - -extern void abort_hooks(void); -#define pkey_assert(condition) do { \ - if (!(condition)) { \ - dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ - __FILE__, __LINE__, \ - test_nr, iteration_nr); \ - dprintf0("errno at assert: %d", errno); \ - abort_hooks(); \ - exit(__LINE__); \ - } \ -} while (0) - -void cat_into_file(char *str, char *file) -{ - int fd = open(file, O_RDWR); - int ret; - - dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file); - /* - * these need to be raw because they are called under - * pkey_assert() - */ - if (fd < 0) { - fprintf(stderr, "error opening '%s'\n", str); - perror("error: "); - exit(__LINE__); - } - - ret = write(fd, str, strlen(str)); - if (ret != strlen(str)) { - perror("write to file failed"); - fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); - exit(__LINE__); - } - close(fd); -} - -#if CONTROL_TRACING > 0 -static int warned_tracing; -int tracing_root_ok(void) -{ - if (geteuid() != 0) { - if (!warned_tracing) - fprintf(stderr, "WARNING: not run as root, " - "can not do tracing control\n"); - warned_tracing = 1; - return 0; - } - return 1; -} -#endif - -void tracing_on(void) -{ -#if CONTROL_TRACING > 0 -#define TRACEDIR "/sys/kernel/debug/tracing" - char pidstr[32]; - - if (!tracing_root_ok()) - return; - - sprintf(pidstr, "%d", getpid()); - cat_into_file("0", TRACEDIR "/tracing_on"); - cat_into_file("\n", TRACEDIR "/trace"); - if (1) { - cat_into_file("function_graph", TRACEDIR "/current_tracer"); - cat_into_file("1", TRACEDIR "/options/funcgraph-proc"); - } else { - cat_into_file("nop", TRACEDIR "/current_tracer"); - } - cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid"); - cat_into_file("1", TRACEDIR "/tracing_on"); - dprintf1("enabled tracing\n"); -#endif -} - -void tracing_off(void) -{ -#if CONTROL_TRACING > 0 - if (!tracing_root_ok()) - return; - cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on"); -#endif -} - -void abort_hooks(void) -{ - fprintf(stderr, "running %s()...\n", __func__); - tracing_off(); -#ifdef SLEEP_ON_ABORT - sleep(SLEEP_ON_ABORT); -#endif -} - -static inline void __page_o_noops(void) -{ - /* 8-bytes of instruction * 512 bytes = 1 page */ - asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); -} - -/* - * This attempts to have roughly a page of instructions followed by a few - * instructions that do a write, and another page of instructions. That - * way, we are pretty sure that the write is in the second page of - * instructions and has at least a page of padding behind it. - * - * *That* lets us be sure to madvise() away the write instruction, which - * will then fault, which makes sure that the fault code handles - * execute-only memory properly. - */ -__attribute__((__aligned__(PAGE_SIZE))) -void lots_o_noops_around_write(int *write_to_me) -{ - dprintf3("running %s()\n", __func__); - __page_o_noops(); - /* Assume this happens in the second page of instructions: */ - *write_to_me = __LINE__; - /* pad out by another page: */ - __page_o_noops(); - dprintf3("%s() done\n", __func__); -} - -/* Define some kernel-like types */ -#define u8 uint8_t -#define u16 uint16_t -#define u32 uint32_t -#define u64 uint64_t - -#ifdef __i386__ - -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 -#endif - -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 -#endif - -#define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x14 - -#else - -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 -#endif - -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 -#endif - -#define REG_IP_IDX REG_RIP -#define si_pkey_offset 0x20 - -#endif - -void dump_mem(void *dumpme, int len_bytes) -{ - char *c = (void *)dumpme; - int i; - - for (i = 0; i < len_bytes; i += sizeof(u64)) { - u64 *ptr = (u64 *)(c + i); - dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr); - } -} - -/* Failed address bound checks: */ -#ifndef SEGV_BNDERR -# define SEGV_BNDERR 3 -#endif - -#ifndef SEGV_PKUERR -# define SEGV_PKUERR 4 -#endif - -static char *si_code_str(int si_code) -{ - if (si_code == SEGV_MAPERR) - return "SEGV_MAPERR"; - if (si_code == SEGV_ACCERR) - return "SEGV_ACCERR"; - if (si_code == SEGV_BNDERR) - return "SEGV_BNDERR"; - if (si_code == SEGV_PKUERR) - return "SEGV_PKUERR"; - return "UNKNOWN"; -} - -int pkru_faults; -int last_si_pkey = -1; -void signal_handler(int signum, siginfo_t *si, void *vucontext) -{ - ucontext_t *uctxt = vucontext; - int trapno; - unsigned long ip; - char *fpregs; - u32 *pkru_ptr; - u64 siginfo_pkey; - u32 *si_pkey_ptr; - int pkru_offset; - fpregset_t fpregset; - - dprint_in_signal = 1; - dprintf1(">>>>===============SIGSEGV============================\n"); - dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__, - __rdpkru(), shadow_pkru); - - trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; - ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; - fpregset = uctxt->uc_mcontext.fpregs; - fpregs = (void *)fpregset; - - dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__, - trapno, ip, si_code_str(si->si_code), si->si_code); -#ifdef __i386__ - /* - * 32-bit has some extra padding so that userspace can tell whether - * the XSTATE header is present in addition to the "legacy" FPU - * state. We just assume that it is here. - */ - fpregs += 0x70; -#endif - pkru_offset = pkru_xstate_offset(); - pkru_ptr = (void *)(&fpregs[pkru_offset]); - - dprintf1("siginfo: %p\n", si); - dprintf1(" fpregs: %p\n", fpregs); - /* - * If we got a PKRU fault, we *HAVE* to have at least one bit set in - * here. - */ - dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset()); - if (DEBUG_LEVEL > 4) - dump_mem(pkru_ptr - 128, 256); - pkey_assert(*pkru_ptr); - - if ((si->si_code == SEGV_MAPERR) || - (si->si_code == SEGV_ACCERR) || - (si->si_code == SEGV_BNDERR)) { - printf("non-PK si_code, exiting...\n"); - exit(4); - } - - si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); - dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); - dump_mem((u8 *)si_pkey_ptr - 8, 24); - siginfo_pkey = *si_pkey_ptr; - pkey_assert(siginfo_pkey < NR_PKEYS); - last_si_pkey = siginfo_pkey; - - dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); - /* need __rdpkru() version so we do not do shadow_pkru checking */ - dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); - dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); - *(u64 *)pkru_ptr = 0x00000000; - dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); - pkru_faults++; - dprintf1("<<<<==================================================\n"); - dprint_in_signal = 0; -} - -int wait_all_children(void) -{ - int status; - return waitpid(-1, &status, 0); -} - -void sig_chld(int x) -{ - dprint_in_signal = 1; - dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); - dprint_in_signal = 0; -} - -void setup_sigsegv_handler(void) -{ - int r, rs; - struct sigaction newact; - struct sigaction oldact; - - /* #PF is mapped to sigsegv */ - int signum = SIGSEGV; - - newact.sa_handler = 0; - newact.sa_sigaction = signal_handler; - - /*sigset_t - signals to block while in the handler */ - /* get the old signal mask. */ - rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); - pkey_assert(rs == 0); - - /* call sa_sigaction, not sa_handler*/ - newact.sa_flags = SA_SIGINFO; - - newact.sa_restorer = 0; /* void(*)(), obsolete */ - r = sigaction(signum, &newact, &oldact); - r = sigaction(SIGALRM, &newact, &oldact); - pkey_assert(r == 0); -} - -void setup_handlers(void) -{ - signal(SIGCHLD, &sig_chld); - setup_sigsegv_handler(); -} - -pid_t fork_lazy_child(void) -{ - pid_t forkret; - - forkret = fork(); - pkey_assert(forkret >= 0); - dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); - - if (!forkret) { - /* in the child */ - while (1) { - dprintf1("child sleeping...\n"); - sleep(30); - } - } - return forkret; -} - -#ifndef PKEY_DISABLE_ACCESS -# define PKEY_DISABLE_ACCESS 0x1 -#endif - -#ifndef PKEY_DISABLE_WRITE -# define PKEY_DISABLE_WRITE 0x2 -#endif - -static u32 hw_pkey_get(int pkey, unsigned long flags) -{ - u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 pkru = __rdpkru(); - u32 shifted_pkru; - u32 masked_pkru; - - dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", - __func__, pkey, flags, 0, 0); - dprintf2("%s() raw pkru: %x\n", __func__, pkru); - - shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY)); - dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru); - masked_pkru = shifted_pkru & mask; - dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru); - /* - * shift down the relevant bits to the lowest two, then - * mask off all the other high bits. - */ - return masked_pkru; -} - -static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) -{ - u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 old_pkru = __rdpkru(); - u32 new_pkru; - - /* make sure that 'rights' only contains the bits we expect: */ - assert(!(rights & ~mask)); - - /* copy old pkru */ - new_pkru = old_pkru; - /* mask out bits from pkey in old value: */ - new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY)); - /* OR in new bits for pkey: */ - new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY)); - - __wrpkru(new_pkru); - - dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n", - __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru); - return 0; -} - -void pkey_disable_set(int pkey, int flags) -{ - unsigned long syscall_flags = 0; - int ret; - int pkey_rights; - u32 orig_pkru = rdpkru(); - - dprintf1("START->%s(%d, 0x%x)\n", __func__, - pkey, flags); - pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - - pkey_rights = hw_pkey_get(pkey, syscall_flags); - - dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - pkey_assert(pkey_rights >= 0); - - pkey_rights |= flags; - - ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); - assert(!ret); - /*pkru and flags have the same format */ - shadow_pkru |= flags << (pkey * 2); - dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru); - - pkey_assert(ret >= 0); - - pkey_rights = hw_pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - - dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); - if (flags) - pkey_assert(rdpkru() > orig_pkru); - dprintf1("END<---%s(%d, 0x%x)\n", __func__, - pkey, flags); -} - -void pkey_disable_clear(int pkey, int flags) -{ - unsigned long syscall_flags = 0; - int ret; - int pkey_rights = hw_pkey_get(pkey, syscall_flags); - u32 orig_pkru = rdpkru(); - - pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - - dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - pkey_assert(pkey_rights >= 0); - - pkey_rights |= flags; - - ret = hw_pkey_set(pkey, pkey_rights, 0); - /* pkru and flags have the same format */ - shadow_pkru &= ~(flags << (pkey * 2)); - pkey_assert(ret >= 0); - - pkey_rights = hw_pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - - dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); - if (flags) - assert(rdpkru() > orig_pkru); -} - -void pkey_write_allow(int pkey) -{ - pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); -} -void pkey_write_deny(int pkey) -{ - pkey_disable_set(pkey, PKEY_DISABLE_WRITE); -} -void pkey_access_allow(int pkey) -{ - pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); -} -void pkey_access_deny(int pkey) -{ - pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); -} - -int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, - unsigned long pkey) -{ - int sret; - - dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, - ptr, size, orig_prot, pkey); - - errno = 0; - sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); - if (errno) { - dprintf2("SYS_mprotect_key sret: %d\n", sret); - dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); - dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); - if (DEBUG_LEVEL >= 2) - perror("SYS_mprotect_pkey"); - } - return sret; -} - -int sys_pkey_alloc(unsigned long flags, unsigned long init_val) -{ - int ret = syscall(SYS_pkey_alloc, flags, init_val); - dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", - __func__, flags, init_val, ret, errno); - return ret; -} - -int alloc_pkey(void) -{ - int ret; - unsigned long init_val = 0x0; - - dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n", - __LINE__, __rdpkru(), shadow_pkru); - ret = sys_pkey_alloc(0, init_val); - /* - * pkey_alloc() sets PKRU, so we need to reflect it in - * shadow_pkru: - */ - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - if (ret) { - /* clear both the bits: */ - shadow_pkru &= ~(0x3 << (ret * 2)); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - /* - * move the new state in from init_val - * (remember, we cheated and init_val == pkru format) - */ - shadow_pkru |= (init_val << (ret * 2)); - } - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno); - /* for shadow checking: */ - rdpkru(); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - return ret; -} - -int sys_pkey_free(unsigned long pkey) -{ - int ret = syscall(SYS_pkey_free, pkey); - dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); - return ret; -} - -/* - * I had a bug where pkey bits could be set by mprotect() but - * not cleared. This ensures we get lots of random bit sets - * and clears on the vma and pte pkey bits. - */ -int alloc_random_pkey(void) -{ - int max_nr_pkey_allocs; - int ret; - int i; - int alloced_pkeys[NR_PKEYS]; - int nr_alloced = 0; - int random_index; - memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); - - /* allocate every possible key and make a note of which ones we got */ - max_nr_pkey_allocs = NR_PKEYS; - max_nr_pkey_allocs = 1; - for (i = 0; i < max_nr_pkey_allocs; i++) { - int new_pkey = alloc_pkey(); - if (new_pkey < 0) - break; - alloced_pkeys[nr_alloced++] = new_pkey; - } - - pkey_assert(nr_alloced > 0); - /* select a random one out of the allocated ones */ - random_index = rand() % nr_alloced; - ret = alloced_pkeys[random_index]; - /* now zero it out so we don't free it next */ - alloced_pkeys[random_index] = 0; - - /* go through the allocated ones that we did not want and free them */ - for (i = 0; i < nr_alloced; i++) { - int free_ret; - if (!alloced_pkeys[i]) - continue; - free_ret = sys_pkey_free(alloced_pkeys[i]); - pkey_assert(!free_ret); - } - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - return ret; -} - -int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, - unsigned long pkey) -{ - int nr_iterations = random() % 100; - int ret; - - while (0) { - int rpkey = alloc_random_pkey(); - ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); - dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", - ptr, size, orig_prot, pkey, ret); - if (nr_iterations-- < 0) - break; - - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - sys_pkey_free(rpkey); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - } - pkey_assert(pkey < NR_PKEYS); - - ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); - dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", - ptr, size, orig_prot, pkey, ret); - pkey_assert(!ret); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - return ret; -} - -struct pkey_malloc_record { - void *ptr; - long size; - int prot; -}; -struct pkey_malloc_record *pkey_malloc_records; -struct pkey_malloc_record *pkey_last_malloc_record; -long nr_pkey_malloc_records; -void record_pkey_malloc(void *ptr, long size, int prot) -{ - long i; - struct pkey_malloc_record *rec = NULL; - - for (i = 0; i < nr_pkey_malloc_records; i++) { - rec = &pkey_malloc_records[i]; - /* find a free record */ - if (rec) - break; - } - if (!rec) { - /* every record is full */ - size_t old_nr_records = nr_pkey_malloc_records; - size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); - size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); - dprintf2("new_nr_records: %zd\n", new_nr_records); - dprintf2("new_size: %zd\n", new_size); - pkey_malloc_records = realloc(pkey_malloc_records, new_size); - pkey_assert(pkey_malloc_records != NULL); - rec = &pkey_malloc_records[nr_pkey_malloc_records]; - /* - * realloc() does not initialize memory, so zero it from - * the first new record all the way to the end. - */ - for (i = 0; i < new_nr_records - old_nr_records; i++) - memset(rec + i, 0, sizeof(*rec)); - } - dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n", - (int)(rec - pkey_malloc_records), rec, ptr, size); - rec->ptr = ptr; - rec->size = size; - rec->prot = prot; - pkey_last_malloc_record = rec; - nr_pkey_malloc_records++; -} - -void free_pkey_malloc(void *ptr) -{ - long i; - int ret; - dprintf3("%s(%p)\n", __func__, ptr); - for (i = 0; i < nr_pkey_malloc_records; i++) { - struct pkey_malloc_record *rec = &pkey_malloc_records[i]; - dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", - ptr, i, rec, rec->ptr, rec->size); - if ((ptr < rec->ptr) || - (ptr >= rec->ptr + rec->size)) - continue; - - dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n", - ptr, i, rec, rec->ptr, rec->size); - nr_pkey_malloc_records--; - ret = munmap(rec->ptr, rec->size); - dprintf3("munmap ret: %d\n", ret); - pkey_assert(!ret); - dprintf3("clearing rec->ptr, rec: %p\n", rec); - rec->ptr = NULL; - dprintf3("done clearing rec->ptr, rec: %p\n", rec); - return; - } - pkey_assert(false); -} - - -void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) -{ - void *ptr; - int ret; - - rdpkru(); - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - pkey_assert(pkey < NR_PKEYS); - ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - pkey_assert(ptr != (void *)-1); - ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); - pkey_assert(!ret); - record_pkey_malloc(ptr, size, prot); - rdpkru(); - - dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); - return ptr; -} - -void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) -{ - int ret; - void *ptr; - - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - /* - * Guarantee we can fit at least one huge page in the resulting - * allocation by allocating space for 2: - */ - size = ALIGN_UP(size, HPAGE_SIZE * 2); - ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - pkey_assert(ptr != (void *)-1); - record_pkey_malloc(ptr, size, prot); - mprotect_pkey(ptr, size, prot, pkey); - - dprintf1("unaligned ptr: %p\n", ptr); - ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE); - dprintf1(" aligned ptr: %p\n", ptr); - ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE); - dprintf1("MADV_HUGEPAGE ret: %d\n", ret); - ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED); - dprintf1("MADV_WILLNEED ret: %d\n", ret); - memset(ptr, 0, HPAGE_SIZE); - - dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr); - return ptr; -} - -int hugetlb_setup_ok; -#define GET_NR_HUGE_PAGES 10 -void setup_hugetlbfs(void) -{ - int err; - int fd; - char buf[] = "123"; - - if (geteuid() != 0) { - fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); - return; - } - - cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); - - /* - * Now go make sure that we got the pages and that they - * are 2M pages. Someone might have made 1G the default. - */ - fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); - if (fd < 0) { - perror("opening sysfs 2M hugetlb config"); - return; - } - - /* -1 to guarantee leaving the trailing \0 */ - err = read(fd, buf, sizeof(buf)-1); - close(fd); - if (err <= 0) { - perror("reading sysfs 2M hugetlb config"); - return; - } - - if (atoi(buf) != GET_NR_HUGE_PAGES) { - fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n", - buf, GET_NR_HUGE_PAGES); - return; - } - - hugetlb_setup_ok = 1; -} - -void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) -{ - void *ptr; - int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; - - if (!hugetlb_setup_ok) - return PTR_ERR_ENOTSUP; - - dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey); - size = ALIGN_UP(size, HPAGE_SIZE * 2); - pkey_assert(pkey < NR_PKEYS); - ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0); - pkey_assert(ptr != (void *)-1); - mprotect_pkey(ptr, size, prot, pkey); - - record_pkey_malloc(ptr, size, prot); - - dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); - return ptr; -} - -void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) -{ - void *ptr; - int fd; - - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - pkey_assert(pkey < NR_PKEYS); - fd = open("/dax/foo", O_RDWR); - pkey_assert(fd >= 0); - - ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); - pkey_assert(ptr != (void *)-1); - - mprotect_pkey(ptr, size, prot, pkey); - - record_pkey_malloc(ptr, size, prot); - - dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); - close(fd); - return ptr; -} - -void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { - - malloc_pkey_with_mprotect, - malloc_pkey_anon_huge, - malloc_pkey_hugetlb -/* can not do direct with the pkey_mprotect() API: - malloc_pkey_mmap_direct, - malloc_pkey_mmap_dax, -*/ -}; - -void *malloc_pkey(long size, int prot, u16 pkey) -{ - void *ret; - static int malloc_type; - int nr_malloc_types = ARRAY_SIZE(pkey_malloc); - - pkey_assert(pkey < NR_PKEYS); - - while (1) { - pkey_assert(malloc_type < nr_malloc_types); - - ret = pkey_malloc[malloc_type](size, prot, pkey); - pkey_assert(ret != (void *)-1); - - malloc_type++; - if (malloc_type >= nr_malloc_types) - malloc_type = (random()%nr_malloc_types); - - /* try again if the malloc_type we tried is unsupported */ - if (ret == PTR_ERR_ENOTSUP) - continue; - - break; - } - - dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__, - size, prot, pkey, ret); - return ret; -} - -int last_pkru_faults; -#define UNKNOWN_PKEY -2 -void expected_pk_fault(int pkey) -{ - dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", - __func__, last_pkru_faults, pkru_faults); - dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); - pkey_assert(last_pkru_faults + 1 == pkru_faults); - - /* - * For exec-only memory, we do not know the pkey in - * advance, so skip this check. - */ - if (pkey != UNKNOWN_PKEY) - pkey_assert(last_si_pkey == pkey); - - /* - * The signal handler shold have cleared out PKRU to let the - * test program continue. We now have to restore it. - */ - if (__rdpkru() != 0) - pkey_assert(0); - - __wrpkru(shadow_pkru); - dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n", - __func__, shadow_pkru); - last_pkru_faults = pkru_faults; - last_si_pkey = -1; -} - -#define do_not_expect_pk_fault(msg) do { \ - if (last_pkru_faults != pkru_faults) \ - dprintf0("unexpected PK fault: %s\n", msg); \ - pkey_assert(last_pkru_faults == pkru_faults); \ -} while (0) - -int test_fds[10] = { -1 }; -int nr_test_fds; -void __save_test_fd(int fd) -{ - pkey_assert(fd >= 0); - pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); - test_fds[nr_test_fds] = fd; - nr_test_fds++; -} - -int get_test_read_fd(void) -{ - int test_fd = open("/etc/passwd", O_RDONLY); - __save_test_fd(test_fd); - return test_fd; -} - -void close_test_fds(void) -{ - int i; - - for (i = 0; i < nr_test_fds; i++) { - if (test_fds[i] < 0) - continue; - close(test_fds[i]); - test_fds[i] = -1; - } - nr_test_fds = 0; -} - -#define barrier() __asm__ __volatile__("": : :"memory") -__attribute__((noinline)) int read_ptr(int *ptr) -{ - /* - * Keep GCC from optimizing this away somehow - */ - barrier(); - return *ptr; -} - -void test_read_of_write_disabled_region(int *ptr, u16 pkey) -{ - int ptr_contents; - - dprintf1("disabling write access to PKEY[1], doing read\n"); - pkey_write_deny(pkey); - ptr_contents = read_ptr(ptr); - dprintf1("*ptr: %d\n", ptr_contents); - dprintf1("\n"); -} -void test_read_of_access_disabled_region(int *ptr, u16 pkey) -{ - int ptr_contents; - - dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); - rdpkru(); - pkey_access_deny(pkey); - ptr_contents = read_ptr(ptr); - dprintf1("*ptr: %d\n", ptr_contents); - expected_pk_fault(pkey); -} -void test_write_of_write_disabled_region(int *ptr, u16 pkey) -{ - dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); - pkey_write_deny(pkey); - *ptr = __LINE__; - expected_pk_fault(pkey); -} -void test_write_of_access_disabled_region(int *ptr, u16 pkey) -{ - dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); - pkey_access_deny(pkey); - *ptr = __LINE__; - expected_pk_fault(pkey); -} -void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) -{ - int ret; - int test_fd = get_test_read_fd(); - - dprintf1("disabling access to PKEY[%02d], " - "having kernel read() to buffer\n", pkey); - pkey_access_deny(pkey); - ret = read(test_fd, ptr, 1); - dprintf1("read ret: %d\n", ret); - pkey_assert(ret); -} -void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) -{ - int ret; - int test_fd = get_test_read_fd(); - - pkey_write_deny(pkey); - ret = read(test_fd, ptr, 100); - dprintf1("read ret: %d\n", ret); - if (ret < 0 && (DEBUG_LEVEL > 0)) - perror("verbose read result (OK for this to be bad)"); - pkey_assert(ret); -} - -void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) -{ - int pipe_ret, vmsplice_ret; - struct iovec iov; - int pipe_fds[2]; - - pipe_ret = pipe(pipe_fds); - - pkey_assert(pipe_ret == 0); - dprintf1("disabling access to PKEY[%02d], " - "having kernel vmsplice from buffer\n", pkey); - pkey_access_deny(pkey); - iov.iov_base = ptr; - iov.iov_len = PAGE_SIZE; - vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); - dprintf1("vmsplice() ret: %d\n", vmsplice_ret); - pkey_assert(vmsplice_ret == -1); - - close(pipe_fds[0]); - close(pipe_fds[1]); -} - -void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) -{ - int ignored = 0xdada; - int futex_ret; - int some_int = __LINE__; - - dprintf1("disabling write to PKEY[%02d], " - "doing futex gunk in buffer\n", pkey); - *ptr = some_int; - pkey_write_deny(pkey); - futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL, - &ignored, ignored); - if (DEBUG_LEVEL > 0) - perror("futex"); - dprintf1("futex() ret: %d\n", futex_ret); -} - -/* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) -{ - int err; - int i; - - /* Note: 0 is the default pkey, so don't mess with it */ - for (i = 1; i < NR_PKEYS; i++) { - if (pkey == i) - continue; - - dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i); - err = sys_pkey_free(i); - pkey_assert(err); - - err = sys_pkey_free(i); - pkey_assert(err); - - err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i); - pkey_assert(err); - } -} - -/* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) -{ - int err; - int bad_pkey = NR_PKEYS+99; - - /* pass a known-invalid pkey in: */ - err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey); - pkey_assert(err); -} - -/* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_alloc_exhaust(int *ptr, u16 pkey) -{ - int err; - int allocated_pkeys[NR_PKEYS] = {0}; - int nr_allocated_pkeys = 0; - int i; - - for (i = 0; i < NR_PKEYS*2; i++) { - int new_pkey; - dprintf1("%s() alloc loop: %d\n", __func__, i); - new_pkey = alloc_pkey(); - dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, err, __rdpkru(), shadow_pkru); - rdpkru(); /* for shadow checking */ - dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); - if ((new_pkey == -1) && (errno == ENOSPC)) { - dprintf2("%s() failed to allocate pkey after %d tries\n", - __func__, nr_allocated_pkeys); - break; - } - pkey_assert(nr_allocated_pkeys < NR_PKEYS); - allocated_pkeys[nr_allocated_pkeys++] = new_pkey; - } - - dprintf3("%s()::%d\n", __func__, __LINE__); - - /* - * ensure it did not reach the end of the loop without - * failure: - */ - pkey_assert(i < NR_PKEYS*2); - - /* - * There are 16 pkeys supported in hardware. Three are - * allocated by the time we get here: - * 1. The default key (0) - * 2. One possibly consumed by an execute-only mapping. - * 3. One allocated by the test code and passed in via - * 'pkey' to this function. - * Ensure that we can allocate at least another 13 (16-3). - */ - pkey_assert(i >= NR_PKEYS-3); - - for (i = 0; i < nr_allocated_pkeys; i++) { - err = sys_pkey_free(allocated_pkeys[i]); - pkey_assert(!err); - rdpkru(); /* for shadow checking */ - } -} - -/* - * pkey 0 is special. It is allocated by default, so you do not - * have to call pkey_alloc() to use it first. Make sure that it - * is usable. - */ -void test_mprotect_with_pkey_0(int *ptr, u16 pkey) -{ - long size; - int prot; - - assert(pkey_last_malloc_record); - size = pkey_last_malloc_record->size; - /* - * This is a bit of a hack. But mprotect() requires - * huge-page-aligned sizes when operating on hugetlbfs. - * So, make sure that we use something that's a multiple - * of a huge page when we can. - */ - if (size >= HPAGE_SIZE) - size = HPAGE_SIZE; - prot = pkey_last_malloc_record->prot; - - /* Use pkey 0 */ - mprotect_pkey(ptr, size, prot, 0); - - /* Make sure that we can set it back to the original pkey. */ - mprotect_pkey(ptr, size, prot, pkey); -} - -void test_ptrace_of_child(int *ptr, u16 pkey) -{ - __attribute__((__unused__)) int peek_result; - pid_t child_pid; - void *ignored = 0; - long ret; - int status; - /* - * This is the "control" for our little expermient. Make sure - * we can always access it when ptracing. - */ - int *plain_ptr_unaligned = malloc(HPAGE_SIZE); - int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE); - - /* - * Fork a child which is an exact copy of this process, of course. - * That means we can do all of our tests via ptrace() and then plain - * memory access and ensure they work differently. - */ - child_pid = fork_lazy_child(); - dprintf1("[%d] child pid: %d\n", getpid(), child_pid); - - ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); - if (ret) - perror("attach"); - dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__); - pkey_assert(ret != -1); - ret = waitpid(child_pid, &status, WUNTRACED); - if ((ret != child_pid) || !(WIFSTOPPED(status))) { - fprintf(stderr, "weird waitpid result %ld stat %x\n", - ret, status); - pkey_assert(0); - } - dprintf2("waitpid ret: %ld\n", ret); - dprintf2("waitpid status: %d\n", status); - - pkey_access_deny(pkey); - pkey_write_deny(pkey); - - /* Write access, untested for now: - ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); - pkey_assert(ret != -1); - dprintf1("poke at %p: %ld\n", peek_at, ret); - */ - - /* - * Try to access the pkey-protected "ptr" via ptrace: - */ - ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); - /* expect it to work, without an error: */ - pkey_assert(ret != -1); - /* Now access from the current task, and expect an exception: */ - peek_result = read_ptr(ptr); - expected_pk_fault(pkey); - - /* - * Try to access the NON-pkey-protected "plain_ptr" via ptrace: - */ - ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); - /* expect it to work, without an error: */ - pkey_assert(ret != -1); - /* Now access from the current task, and expect NO exception: */ - peek_result = read_ptr(plain_ptr); - do_not_expect_pk_fault("read plain pointer after ptrace"); - - ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); - pkey_assert(ret != -1); - - ret = kill(child_pid, SIGKILL); - pkey_assert(ret != -1); - - wait(&status); - - free(plain_ptr_unaligned); -} - -void *get_pointer_to_instructions(void) -{ - void *p1; - - p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); - dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); - /* lots_o_noops_around_write should be page-aligned already */ - assert(p1 == &lots_o_noops_around_write); - - /* Point 'p1' at the *second* page of the function: */ - p1 += PAGE_SIZE; - - /* - * Try to ensure we fault this in on next touch to ensure - * we get an instruction fault as opposed to a data one - */ - madvise(p1, PAGE_SIZE, MADV_DONTNEED); - - return p1; -} - -void test_executing_on_unreadable_memory(int *ptr, u16 pkey) -{ - void *p1; - int scratch; - int ptr_contents; - int ret; - - p1 = get_pointer_to_instructions(); - lots_o_noops_around_write(&scratch); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - - ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey); - pkey_assert(!ret); - pkey_access_deny(pkey); - - dprintf2("pkru: %x\n", rdpkru()); - - /* - * Make sure this is an *instruction* fault - */ - madvise(p1, PAGE_SIZE, MADV_DONTNEED); - lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault("executing on PROT_EXEC memory"); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - expected_pk_fault(pkey); -} - -void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) -{ - void *p1; - int scratch; - int ptr_contents; - int ret; - - dprintf1("%s() start\n", __func__); - - p1 = get_pointer_to_instructions(); - lots_o_noops_around_write(&scratch); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - - /* Use a *normal* mprotect(), not mprotect_pkey(): */ - ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); - pkey_assert(!ret); - - dprintf2("pkru: %x\n", rdpkru()); - - /* Make sure this is an *instruction* fault */ - madvise(p1, PAGE_SIZE, MADV_DONTNEED); - lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault("executing on PROT_EXEC memory"); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - expected_pk_fault(UNKNOWN_PKEY); - - /* - * Put the memory back to non-PROT_EXEC. Should clear the - * exec-only pkey off the VMA and allow it to be readable - * again. Go to PROT_NONE first to check for a kernel bug - * that did not clear the pkey when doing PROT_NONE. - */ - ret = mprotect(p1, PAGE_SIZE, PROT_NONE); - pkey_assert(!ret); - - ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); - pkey_assert(!ret); - ptr_contents = read_ptr(p1); - do_not_expect_pk_fault("plain read on recently PROT_EXEC area"); -} - -void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) -{ - int size = PAGE_SIZE; - int sret; - - if (cpu_has_pku()) { - dprintf1("SKIP: %s: no CPU support\n", __func__); - return; - } - - sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); - pkey_assert(sret < 0); -} - -void (*pkey_tests[])(int *ptr, u16 pkey) = { - test_read_of_write_disabled_region, - test_read_of_access_disabled_region, - test_write_of_write_disabled_region, - test_write_of_access_disabled_region, - test_kernel_write_of_access_disabled_region, - test_kernel_write_of_write_disabled_region, - test_kernel_gup_of_access_disabled_region, - test_kernel_gup_write_to_write_disabled_region, - test_executing_on_unreadable_memory, - test_implicit_mprotect_exec_only_memory, - test_mprotect_with_pkey_0, - test_ptrace_of_child, - test_pkey_syscalls_on_non_allocated_pkey, - test_pkey_syscalls_bad_args, - test_pkey_alloc_exhaust, -}; - -void run_tests_once(void) -{ - int *ptr; - int prot = PROT_READ|PROT_WRITE; - - for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { - int pkey; - int orig_pkru_faults = pkru_faults; - - dprintf1("======================\n"); - dprintf1("test %d preparing...\n", test_nr); - - tracing_on(); - pkey = alloc_random_pkey(); - dprintf1("test %d starting with pkey: %d\n", test_nr, pkey); - ptr = malloc_pkey(PAGE_SIZE, prot, pkey); - dprintf1("test %d starting...\n", test_nr); - pkey_tests[test_nr](ptr, pkey); - dprintf1("freeing test memory: %p\n", ptr); - free_pkey_malloc(ptr); - sys_pkey_free(pkey); - - dprintf1("pkru_faults: %d\n", pkru_faults); - dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults); - - tracing_off(); - close_test_fds(); - - printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); - dprintf1("======================\n\n"); - } - iteration_nr++; -} - -void pkey_setup_shadow(void) -{ - shadow_pkru = __rdpkru(); -} - -int main(void) -{ - int nr_iterations = 22; - - setup_handlers(); - - printf("has pku: %d\n", cpu_has_pku()); - - if (!cpu_has_pku()) { - int size = PAGE_SIZE; - int *ptr; - - printf("running PKEY tests for unsupported CPU/OS\n"); - - ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - assert(ptr != (void *)-1); - test_mprotect_pkey_on_unsupported_cpu(ptr, 1); - exit(0); - } - - pkey_setup_shadow(); - printf("startup pkru: %x\n", rdpkru()); - setup_hugetlbfs(); - - while (nr_iterations-- > 0) - run_tests_once(); - - printf("done (all tests OK)\n"); - return 0; -}
Acked-by: Dave Hansen dave.hansen@intel.com
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
some pkru references are named to pkey_reg and some prku references are renamed to pkey
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com Signed-off-by: Thiago Jung Bauermann bauerman@linux.ibm.com Reviewed-by: Dave Hansen dave.hansen@intel.com --- tools/testing/selftests/vm/pkey-helpers.h | 85 +++++----- tools/testing/selftests/vm/protection_keys.c | 238 ++++++++++++++------------ 2 files changed, 169 insertions(+), 154 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 254e543..d5779be 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -14,7 +14,7 @@ #include <sys/mman.h>
#define NR_PKEYS 16 -#define PKRU_BITS_PER_PKEY 2 +#define PKEY_BITS_PER_PKEY 2
#ifndef DEBUG_LEVEL #define DEBUG_LEVEL 0 @@ -53,85 +53,88 @@ static inline void sigsafe_printf(const char *format, ...) #define dprintf3(args...) dprintf_level(3, args) #define dprintf4(args...) dprintf_level(4, args)
-extern unsigned int shadow_pkru; -static inline unsigned int __rdpkru(void) +extern unsigned int shadow_pkey_reg; +static inline unsigned int __read_pkey_reg(void) { unsigned int eax, edx; unsigned int ecx = 0; - unsigned int pkru; + unsigned int pkey_reg;
asm volatile(".byte 0x0f,0x01,0xee\n\t" : "=a" (eax), "=d" (edx) : "c" (ecx)); - pkru = eax; - return pkru; + pkey_reg = eax; + return pkey_reg; }
-static inline unsigned int _rdpkru(int line) +static inline unsigned int _read_pkey_reg(int line) { - unsigned int pkru = __rdpkru(); + unsigned int pkey_reg = __read_pkey_reg();
- dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n", - line, pkru, shadow_pkru); - assert(pkru == shadow_pkru); + dprintf4("read_pkey_reg(line=%d) pkey_reg: %x shadow: %x\n", + line, pkey_reg, shadow_pkey_reg); + assert(pkey_reg == shadow_pkey_reg);
- return pkru; + return pkey_reg; }
-#define rdpkru() _rdpkru(__LINE__) +#define read_pkey_reg() _read_pkey_reg(__LINE__)
-static inline void __wrpkru(unsigned int pkru) +static inline void __write_pkey_reg(unsigned int pkey_reg) { - unsigned int eax = pkru; + unsigned int eax = pkey_reg; unsigned int ecx = 0; unsigned int edx = 0;
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); + dprintf4("%s() changing %08x to %08x\n", __func__, + __read_pkey_reg(), pkey_reg); asm volatile(".byte 0x0f,0x01,0xef\n\t" : : "a" (eax), "c" (ecx), "d" (edx)); - assert(pkru == __rdpkru()); + assert(pkey_reg == __read_pkey_reg()); }
-static inline void wrpkru(unsigned int pkru) +static inline void write_pkey_reg(unsigned int pkey_reg) { - dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); + dprintf4("%s() changing %08x to %08x\n", __func__, + __read_pkey_reg(), pkey_reg); /* will do the shadow check for us: */ - rdpkru(); - __wrpkru(pkru); - shadow_pkru = pkru; - dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru()); + read_pkey_reg(); + __write_pkey_reg(pkey_reg); + shadow_pkey_reg = pkey_reg; + dprintf4("%s(%08x) pkey_reg: %08x\n", __func__, + pkey_reg, __read_pkey_reg()); }
/* * These are technically racy. since something could - * change PKRU between the read and the write. + * change PKEY register between the read and the write. */ static inline void __pkey_access_allow(int pkey, int do_allow) { - unsigned int pkru = rdpkru(); + unsigned int pkey_reg = read_pkey_reg(); int bit = pkey * 2;
if (do_allow) - pkru &= (1<<bit); + pkey_reg &= (1<<bit); else - pkru |= (1<<bit); + pkey_reg |= (1<<bit);
- dprintf4("pkru now: %08x\n", rdpkru()); - wrpkru(pkru); + dprintf4("pkey_reg now: %08x\n", read_pkey_reg()); + write_pkey_reg(pkey_reg); }
static inline void __pkey_write_allow(int pkey, int do_allow_write) { - long pkru = rdpkru(); + long pkey_reg = read_pkey_reg(); int bit = pkey * 2 + 1;
if (do_allow_write) - pkru &= (1<<bit); + pkey_reg &= (1<<bit); else - pkru |= (1<<bit); + pkey_reg |= (1<<bit);
- wrpkru(pkru); - dprintf4("pkru now: %08x\n", rdpkru()); + write_pkey_reg(pkey_reg); + dprintf4("pkey_reg now: %08x\n", read_pkey_reg()); }
#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ @@ -181,10 +184,10 @@ static inline int cpu_has_pku(void) return 1; }
-#define XSTATE_PKRU_BIT (9) -#define XSTATE_PKRU 0x200 +#define XSTATE_PKEY_BIT (9) +#define XSTATE_PKEY 0x200
-int pkru_xstate_offset(void) +int pkey_reg_xstate_offset(void) { unsigned int eax; unsigned int ebx; @@ -195,21 +198,21 @@ int pkru_xstate_offset(void) unsigned long XSTATE_CPUID = 0xd; int leaf;
- /* assume that XSTATE_PKRU is set in XCR0 */ - leaf = XSTATE_PKRU_BIT; + /* assume that XSTATE_PKEY is set in XCR0 */ + leaf = XSTATE_PKEY_BIT; { eax = XSTATE_CPUID; ecx = leaf; __cpuid(&eax, &ebx, &ecx, &edx);
- if (leaf == XSTATE_PKRU_BIT) { + if (leaf == XSTATE_PKEY_BIT) { xstate_offset = ebx; xstate_size = eax; } }
if (xstate_size == 0) { - printf("could not find size/offset of PKRU in xsave state\n"); + printf("could not find size/offset of PKEY in xsave state\n"); return 0; }
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 460b4bd..9f373cc 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt) + * Tests Memory Protection Keys (see Documentation/vm/protection-keys.txt) * * There are examples in here of: * * how to set protection keys on memory - * * how to set/clear bits in PKRU (the rights register) - * * how to handle SEGV_PKRU signals and extract pkey-relevant + * * how to set/clear bits in pkey registers (the rights register) + * * how to handle SEGV_PKUERR signals and extract pkey-relevant * information from the siginfo * * Things to add: @@ -48,7 +48,7 @@ int iteration_nr = 1; int test_nr;
-unsigned int shadow_pkru; +unsigned int shadow_pkey_reg;
#define HPAGE_SIZE (1UL<<21) #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) @@ -255,7 +255,7 @@ void dump_mem(void *dumpme, int len_bytes) return "UNKNOWN"; }
-int pkru_faults; +int pkey_faults; int last_si_pkey = -1; void signal_handler(int signum, siginfo_t *si, void *vucontext) { @@ -263,16 +263,16 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) int trapno; unsigned long ip; char *fpregs; - u32 *pkru_ptr; + u32 *pkey_reg_ptr; u64 siginfo_pkey; u32 *si_pkey_ptr; - int pkru_offset; + int pkey_reg_offset; fpregset_t fpregset;
dprint_in_signal = 1; dprintf1(">>>>===============SIGSEGV============================\n"); - dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__, - __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, __LINE__, + __read_pkey_reg(), shadow_pkey_reg);
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; @@ -289,19 +289,19 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) */ fpregs += 0x70; #endif - pkru_offset = pkru_xstate_offset(); - pkru_ptr = (void *)(&fpregs[pkru_offset]); + pkey_reg_offset = pkey_reg_xstate_offset(); + pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]);
dprintf1("siginfo: %p\n", si); dprintf1(" fpregs: %p\n", fpregs); /* - * If we got a PKRU fault, we *HAVE* to have at least one bit set in + * If we got a PKEY fault, we *HAVE* to have at least one bit set in * here. */ - dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset()); + dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset()); if (DEBUG_LEVEL > 4) - dump_mem(pkru_ptr - 128, 256); - pkey_assert(*pkru_ptr); + dump_mem(pkey_reg_ptr - 128, 256); + pkey_assert(*pkey_reg_ptr);
if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || @@ -317,13 +317,16 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) pkey_assert(siginfo_pkey < NR_PKEYS); last_si_pkey = siginfo_pkey;
- dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); - /* need __rdpkru() version so we do not do shadow_pkru checking */ - dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); + dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr); + /* + * need __read_pkey_reg() version so we do not do shadow_pkey_reg + * checking + */ + dprintf1("signal pkey_reg from pkey_reg: %08x\n", __read_pkey_reg()); dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); - *(u64 *)pkru_ptr = 0x00000000; + *(u64 *)pkey_reg_ptr = 0x00000000; dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); - pkru_faults++; + pkey_faults++; dprintf1("<<<<==================================================\n"); dprint_in_signal = 0; } @@ -402,45 +405,47 @@ pid_t fork_lazy_child(void) static u32 hw_pkey_get(int pkey, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 pkru = __rdpkru(); - u32 shifted_pkru; - u32 masked_pkru; + u32 pkey_reg = __read_pkey_reg(); + u32 shifted_pkey_reg; + u32 masked_pkey_reg;
dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", __func__, pkey, flags, 0, 0); - dprintf2("%s() raw pkru: %x\n", __func__, pkru); + dprintf2("%s() raw pkey_reg: %x\n", __func__, pkey_reg);
- shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY)); - dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru); - masked_pkru = shifted_pkru & mask; - dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru); + shifted_pkey_reg = (pkey_reg >> (pkey * PKEY_BITS_PER_PKEY)); + dprintf2("%s() shifted_pkey_reg: %x\n", __func__, shifted_pkey_reg); + masked_pkey_reg = shifted_pkey_reg & mask; + dprintf2("%s() masked pkey_reg: %x\n", __func__, masked_pkey_reg); /* * shift down the relevant bits to the lowest two, then * mask off all the other high bits. */ - return masked_pkru; + return masked_pkey_reg; }
static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 old_pkru = __rdpkru(); - u32 new_pkru; + u32 old_pkey_reg = __read_pkey_reg(); + u32 new_pkey_reg;
/* make sure that 'rights' only contains the bits we expect: */ assert(!(rights & ~mask));
- /* copy old pkru */ - new_pkru = old_pkru; + /* copy old pkey_reg */ + new_pkey_reg = old_pkey_reg; /* mask out bits from pkey in old value: */ - new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY)); + new_pkey_reg &= ~(mask << (pkey * PKEY_BITS_PER_PKEY)); /* OR in new bits for pkey: */ - new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY)); + new_pkey_reg |= (rights << (pkey * PKEY_BITS_PER_PKEY));
- __wrpkru(new_pkru); + __write_pkey_reg(new_pkey_reg);
- dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n", - __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru); + dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x" + " pkey_reg now: %x old_pkey_reg: %x\n", + __func__, pkey, rights, flags, 0, __read_pkey_reg(), + old_pkey_reg); return 0; }
@@ -449,7 +454,7 @@ void pkey_disable_set(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights; - u32 orig_pkru = rdpkru(); + u32 orig_pkey_reg = read_pkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__, pkey, flags); @@ -465,9 +470,9 @@ void pkey_disable_set(int pkey, int flags)
ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); assert(!ret); - /*pkru and flags have the same format */ - shadow_pkru |= flags << (pkey * 2); - dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru); + /*pkey_reg and flags have the same format */ + shadow_pkey_reg |= flags << (pkey * 2); + dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkey_reg);
pkey_assert(ret >= 0);
@@ -475,9 +480,9 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); + dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, read_pkey_reg()); if (flags) - pkey_assert(rdpkru() > orig_pkru); + pkey_assert(read_pkey_reg() > orig_pkey_reg); dprintf1("END<---%s(%d, 0x%x)\n", __func__, pkey, flags); } @@ -487,7 +492,7 @@ void pkey_disable_clear(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights = hw_pkey_get(pkey, syscall_flags); - u32 orig_pkru = rdpkru(); + u32 orig_pkey_reg = read_pkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
@@ -498,17 +503,16 @@ void pkey_disable_clear(int pkey, int flags) pkey_rights |= flags;
ret = hw_pkey_set(pkey, pkey_rights, 0); - /* pkru and flags have the same format */ - shadow_pkru &= ~(flags << (pkey * 2)); + shadow_pkey_reg &= ~(flags << (pkey * 2)); pkey_assert(ret >= 0);
pkey_rights = hw_pkey_get(pkey, syscall_flags); dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); + dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, read_pkey_reg()); if (flags) - assert(rdpkru() > orig_pkru); + assert(read_pkey_reg() > orig_pkey_reg); }
void pkey_write_allow(int pkey) @@ -561,33 +565,38 @@ int alloc_pkey(void) int ret; unsigned long init_val = 0x0;
- dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n", - __LINE__, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, + __LINE__, __read_pkey_reg(), shadow_pkey_reg); ret = sys_pkey_alloc(0, init_val); /* - * pkey_alloc() sets PKRU, so we need to reflect it in - * shadow_pkru: + * pkey_alloc() sets PKEY register, so we need to reflect it in + * shadow_pkey_reg: */ - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __read_pkey_reg(), + shadow_pkey_reg); if (ret) { /* clear both the bits: */ - shadow_pkru &= ~(0x3 << (ret * 2)); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); + shadow_pkey_reg &= ~(0x3 << (ret * 2)); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, + __LINE__, ret, __read_pkey_reg(), + shadow_pkey_reg); /* * move the new state in from init_val - * (remember, we cheated and init_val == pkru format) + * (remember, we cheated and init_val == pkey_reg format) */ - shadow_pkru |= (init_val << (ret * 2)); + shadow_pkey_reg |= (init_val << (ret * 2)); } - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __read_pkey_reg(), + shadow_pkey_reg); + dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno); /* for shadow checking: */ - rdpkru(); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); + read_pkey_reg(); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __read_pkey_reg(), + shadow_pkey_reg); return ret; }
@@ -638,8 +647,8 @@ int alloc_random_pkey(void) free_ret = sys_pkey_free(alloced_pkeys[i]); pkey_assert(!free_ret); } - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); return ret; }
@@ -657,11 +666,13 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, if (nr_iterations-- < 0) break;
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __read_pkey_reg(), + shadow_pkey_reg); sys_pkey_free(rpkey); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __read_pkey_reg(), + shadow_pkey_reg); } pkey_assert(pkey < NR_PKEYS);
@@ -669,8 +680,8 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", ptr, size, orig_prot, pkey, ret); pkey_assert(!ret); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); return ret; }
@@ -752,7 +763,7 @@ void free_pkey_malloc(void *ptr) void *ptr; int ret;
- rdpkru(); + read_pkey_reg(); dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, size, prot, pkey); pkey_assert(pkey < NR_PKEYS); @@ -761,7 +772,7 @@ void free_pkey_malloc(void *ptr) ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); pkey_assert(!ret); record_pkey_malloc(ptr, size, prot); - rdpkru(); + read_pkey_reg();
dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); return ptr; @@ -924,15 +935,15 @@ void setup_hugetlbfs(void) return ret; }
-int last_pkru_faults; +int last_pkey_faults; #define UNKNOWN_PKEY -2 -void expected_pk_fault(int pkey) +void expected_pkey_fault(int pkey) { - dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", - __func__, last_pkru_faults, pkru_faults); + dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n", + __func__, last_pkey_faults, pkey_faults); dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); - pkey_assert(last_pkru_faults + 1 == pkru_faults);
+ pkey_assert(last_pkey_faults + 1 == pkey_faults); /* * For exec-only memory, we do not know the pkey in * advance, so skip this check. @@ -941,23 +952,23 @@ void expected_pk_fault(int pkey) pkey_assert(last_si_pkey == pkey);
/* - * The signal handler shold have cleared out PKRU to let the + * The signal handler shold have cleared out PKEY register to let the * test program continue. We now have to restore it. */ - if (__rdpkru() != 0) + if (__read_pkey_reg() != 0) pkey_assert(0);
- __wrpkru(shadow_pkru); - dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n", - __func__, shadow_pkru); - last_pkru_faults = pkru_faults; + __write_pkey_reg(shadow_pkey_reg); + dprintf1("%s() set pkey_reg=%x to restore state after signal " + "nuked it\n", __func__, shadow_pkey_reg); + last_pkey_faults = pkey_faults; last_si_pkey = -1; }
-#define do_not_expect_pk_fault(msg) do { \ - if (last_pkru_faults != pkru_faults) \ - dprintf0("unexpected PK fault: %s\n", msg); \ - pkey_assert(last_pkru_faults == pkru_faults); \ +#define do_not_expect_pkey_fault(msg) do { \ + if (last_pkey_faults != pkey_faults) \ + dprintf0("unexpected PKey fault: %s\n", msg); \ + pkey_assert(last_pkey_faults == pkey_faults); \ } while (0)
int test_fds[10] = { -1 }; @@ -1015,25 +1026,25 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey) int ptr_contents;
dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); - rdpkru(); + read_pkey_reg(); pkey_access_deny(pkey); ptr_contents = read_ptr(ptr); dprintf1("*ptr: %d\n", ptr_contents); - expected_pk_fault(pkey); + expected_pkey_fault(pkey); } void test_write_of_write_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); pkey_write_deny(pkey); *ptr = __LINE__; - expected_pk_fault(pkey); + expected_pkey_fault(pkey); } void test_write_of_access_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); pkey_access_deny(pkey); *ptr = __LINE__; - expected_pk_fault(pkey); + expected_pkey_fault(pkey); } void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) { @@ -1145,9 +1156,10 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) int new_pkey; dprintf1("%s() alloc loop: %d\n", __func__, i); new_pkey = alloc_pkey(); - dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, err, __rdpkru(), shadow_pkru); - rdpkru(); /* for shadow checking */ + dprintf4("%s()::%d, err: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, err, __read_pkey_reg(), + shadow_pkey_reg); + read_pkey_reg(); /* for shadow checking */ dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); if ((new_pkey == -1) && (errno == ENOSPC)) { dprintf2("%s() failed to allocate pkey after %d tries\n", @@ -1180,7 +1192,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) for (i = 0; i < nr_allocated_pkeys; i++) { err = sys_pkey_free(allocated_pkeys[i]); pkey_assert(!err); - rdpkru(); /* for shadow checking */ + read_pkey_reg(); /* for shadow checking */ } }
@@ -1266,7 +1278,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) pkey_assert(ret != -1); /* Now access from the current task, and expect an exception: */ peek_result = read_ptr(ptr); - expected_pk_fault(pkey); + expected_pkey_fault(pkey);
/* * Try to access the NON-pkey-protected "plain_ptr" via ptrace: @@ -1276,7 +1288,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) pkey_assert(ret != -1); /* Now access from the current task, and expect NO exception: */ peek_result = read_ptr(plain_ptr); - do_not_expect_pk_fault("read plain pointer after ptrace"); + do_not_expect_pkey_fault("read plain pointer after ptrace");
ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); pkey_assert(ret != -1); @@ -1326,17 +1338,17 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) pkey_assert(!ret); pkey_access_deny(pkey);
- dprintf2("pkru: %x\n", rdpkru()); + dprintf2("pkey_reg: %x\n", read_pkey_reg());
/* * Make sure this is an *instruction* fault */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault("executing on PROT_EXEC memory"); + do_not_expect_pkey_fault("executing on PROT_EXEC memory"); ptr_contents = read_ptr(p1); dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - expected_pk_fault(pkey); + expected_pkey_fault(pkey); }
void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) @@ -1357,15 +1369,15 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); pkey_assert(!ret);
- dprintf2("pkru: %x\n", rdpkru()); + dprintf2("pkru: %x\n", read_pkey_reg());
/* Make sure this is an *instruction* fault */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault("executing on PROT_EXEC memory"); + do_not_expect_pkey_fault("executing on PROT_EXEC memory"); ptr_contents = read_ptr(p1); dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - expected_pk_fault(UNKNOWN_PKEY); + expected_pkey_fault(UNKNOWN_PKEY);
/* * Put the memory back to non-PROT_EXEC. Should clear the @@ -1379,7 +1391,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); pkey_assert(!ret); ptr_contents = read_ptr(p1); - do_not_expect_pk_fault("plain read on recently PROT_EXEC area"); + do_not_expect_pkey_fault("plain read on recently PROT_EXEC area"); }
void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) @@ -1421,7 +1433,7 @@ void run_tests_once(void)
for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { int pkey; - int orig_pkru_faults = pkru_faults; + int orig_pkey_faults = pkey_faults;
dprintf1("======================\n"); dprintf1("test %d preparing...\n", test_nr); @@ -1436,8 +1448,8 @@ void run_tests_once(void) free_pkey_malloc(ptr); sys_pkey_free(pkey);
- dprintf1("pkru_faults: %d\n", pkru_faults); - dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults); + dprintf1("pkey_faults: %d\n", pkey_faults); + dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults);
tracing_off(); close_test_fds(); @@ -1450,7 +1462,7 @@ void run_tests_once(void)
void pkey_setup_shadow(void) { - shadow_pkru = __rdpkru(); + shadow_pkey_reg = __read_pkey_reg(); }
int main(void) @@ -1474,7 +1486,7 @@ int main(void) }
pkey_setup_shadow(); - printf("startup pkru: %x\n", rdpkru()); + printf("startup pkey_reg: %x\n", read_pkey_reg()); setup_hugetlbfs();
while (nr_iterations-- > 0)
Moved all the generic definition and helper functions to the header file.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com Signed-off-by: Thiago Jung Bauermann bauerman@linux.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 35 ++++++++++++++++++++++--- tools/testing/selftests/vm/protection_keys.c | 27 -------------------- 2 files changed, 30 insertions(+), 32 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index d5779be..6ad1bd5 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -13,6 +13,14 @@ #include <ucontext.h> #include <sys/mman.h>
+/* Define some kernel-like types */ +#define u8 uint8_t +#define u16 uint16_t +#define u32 uint32_t +#define u64 uint64_t + +#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) + #define NR_PKEYS 16 #define PKEY_BITS_PER_PKEY 2
@@ -53,6 +61,18 @@ static inline void sigsafe_printf(const char *format, ...) #define dprintf3(args...) dprintf_level(3, args) #define dprintf4(args...) dprintf_level(4, args)
+extern void abort_hooks(void); +#define pkey_assert(condition) do { \ + if (!(condition)) { \ + dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ + __FILE__, __LINE__, \ + test_nr, iteration_nr); \ + dprintf0("errno at assert: %d", errno); \ + abort_hooks(); \ + exit(__LINE__); \ + } \ +} while (0) + extern unsigned int shadow_pkey_reg; static inline unsigned int __read_pkey_reg(void) { @@ -137,11 +157,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write) dprintf4("pkey_reg now: %08x\n", read_pkey_reg()); }
-#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ -#define PROT_PKEY1 0x20 /* protection key value (bit 1) */ -#define PROT_PKEY2 0x40 /* protection key value (bit 2) */ -#define PROT_PKEY3 0x80 /* protection key value (bit 3) */ - #define PAGE_SIZE 4096 #define MB (1<<20)
@@ -219,4 +234,14 @@ int pkey_reg_xstate_offset(void) return xstate_offset; }
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) +#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) +#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) +#define ALIGN_PTR_UP(p, ptr_align_to) \ + ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) +#define ALIGN_PTR_DOWN(p, ptr_align_to) \ + ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + #endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 9f373cc..cad52dc 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -51,31 +51,10 @@ unsigned int shadow_pkey_reg;
#define HPAGE_SIZE (1UL<<21) -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) -#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) -#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) -#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) -#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) -#define __stringify_1(x...) #x -#define __stringify(x...) __stringify_1(x) - -#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
int dprint_in_signal; char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-extern void abort_hooks(void); -#define pkey_assert(condition) do { \ - if (!(condition)) { \ - dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ - __FILE__, __LINE__, \ - test_nr, iteration_nr); \ - dprintf0("errno at assert: %d", errno); \ - abort_hooks(); \ - exit(__LINE__); \ - } \ -} while (0) - void cat_into_file(char *str, char *file) { int fd = open(file, O_RDWR); @@ -186,12 +165,6 @@ void lots_o_noops_around_write(int *write_to_me) dprintf3("%s() done\n", __func__); }
-/* Define some kernel-like types */ -#define u8 uint8_t -#define u16 uint16_t -#define u32 uint32_t -#define u64 uint64_t - #ifdef __i386__
#ifndef SYS_mprotect_key
Acked-by: Dave Hansen dave.hansen@intel.com -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Thiago Jung Bauermann bauerman@linux.ibm.com
In preparation for multi-arch support, move definitions which have arch-specific values to x86-specific header.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com Signed-off-by: Thiago Jung Bauermann bauerman@linux.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 111 +----------------- tools/testing/selftests/vm/pkey-x86.h | 156 ++++++++++++++++++++++++++ tools/testing/selftests/vm/protection_keys.c | 47 -------- 3 files changed, 162 insertions(+), 152 deletions(-) create mode 100644 tools/testing/selftests/vm/pkey-x86.h
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 6ad1bd5..3ed2f02 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -21,9 +21,6 @@
#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
-#define NR_PKEYS 16 -#define PKEY_BITS_PER_PKEY 2 - #ifndef DEBUG_LEVEL #define DEBUG_LEVEL 0 #endif @@ -73,19 +70,13 @@ static inline void sigsafe_printf(const char *format, ...) } \ } while (0)
+#if defined(__i386__) || defined(__x86_64__) /* arch */ +#include "pkey-x86.h" +#else /* arch */ +#error Architecture not supported +#endif /* arch */ + extern unsigned int shadow_pkey_reg; -static inline unsigned int __read_pkey_reg(void) -{ - unsigned int eax, edx; - unsigned int ecx = 0; - unsigned int pkey_reg; - - asm volatile(".byte 0x0f,0x01,0xee\n\t" - : "=a" (eax), "=d" (edx) - : "c" (ecx)); - pkey_reg = eax; - return pkey_reg; -}
static inline unsigned int _read_pkey_reg(int line) { @@ -100,19 +91,6 @@ static inline unsigned int _read_pkey_reg(int line)
#define read_pkey_reg() _read_pkey_reg(__LINE__)
-static inline void __write_pkey_reg(unsigned int pkey_reg) -{ - unsigned int eax = pkey_reg; - unsigned int ecx = 0; - unsigned int edx = 0; - - dprintf4("%s() changing %08x to %08x\n", __func__, - __read_pkey_reg(), pkey_reg); - asm volatile(".byte 0x0f,0x01,0xef\n\t" - : : "a" (eax), "c" (ecx), "d" (edx)); - assert(pkey_reg == __read_pkey_reg()); -} - static inline void write_pkey_reg(unsigned int pkey_reg) { dprintf4("%s() changing %08x to %08x\n", __func__, @@ -157,83 +135,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write) dprintf4("pkey_reg now: %08x\n", read_pkey_reg()); }
-#define PAGE_SIZE 4096 -#define MB (1<<20) - -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile( - "cpuid;" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} - -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */ -#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ - -static inline int cpu_has_pku(void) -{ - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - - eax = 0x7; - ecx = 0x0; - __cpuid(&eax, &ebx, &ecx, &edx); - - if (!(ecx & X86_FEATURE_PKU)) { - dprintf2("cpu does not have PKU\n"); - return 0; - } - if (!(ecx & X86_FEATURE_OSPKE)) { - dprintf2("cpu does not have OSPKE\n"); - return 0; - } - return 1; -} - -#define XSTATE_PKEY_BIT (9) -#define XSTATE_PKEY 0x200 - -int pkey_reg_xstate_offset(void) -{ - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - int xstate_offset; - int xstate_size; - unsigned long XSTATE_CPUID = 0xd; - int leaf; - - /* assume that XSTATE_PKEY is set in XCR0 */ - leaf = XSTATE_PKEY_BIT; - { - eax = XSTATE_CPUID; - ecx = leaf; - __cpuid(&eax, &ebx, &ecx, &edx); - - if (leaf == XSTATE_PKEY_BIT) { - xstate_offset = ebx; - xstate_size = eax; - } - } - - if (xstate_size == 0) { - printf("could not find size/offset of PKEY in xsave state\n"); - return 0; - } - - return xstate_offset; -} - #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) #define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h new file mode 100644 index 0000000..2f04ade --- /dev/null +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _PKEYS_X86_H +#define _PKEYS_X86_H + +#ifdef __i386__ + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 380 +#endif + +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 381 +# define SYS_pkey_free 382 +#endif + +#define REG_IP_IDX REG_EIP +#define si_pkey_offset 0x14 + +#else + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 329 +#endif + +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 330 +# define SYS_pkey_free 331 +#endif + +#define REG_IP_IDX REG_RIP +#define si_pkey_offset 0x20 + +#endif + +#ifndef PKEY_DISABLE_ACCESS +# define PKEY_DISABLE_ACCESS 0x1 +#endif + +#ifndef PKEY_DISABLE_WRITE +# define PKEY_DISABLE_WRITE 0x2 +#endif + +#define NR_PKEYS 16 +#define PKEY_BITS_PER_PKEY 2 +#define HPAGE_SIZE (1UL<<21) +#define PAGE_SIZE 4096 +#define MB (1<<20) + +static inline void __page_o_noops(void) +{ + /* 8-bytes of instruction * 512 bytes = 1 page */ + asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); +} + +static inline unsigned int __read_pkey_reg(void) +{ + unsigned int eax, edx; + unsigned int ecx = 0; + unsigned int pkey_reg; + + asm volatile(".byte 0x0f,0x01,0xee\n\t" + : "=a" (eax), "=d" (edx) + : "c" (ecx)); + pkey_reg = eax; + return pkey_reg; +} + +static inline void __write_pkey_reg(unsigned int pkey_reg) +{ + unsigned int eax = pkey_reg; + unsigned int ecx = 0; + unsigned int edx = 0; + + dprintf4("%s() changing %08x to %08x\n", __func__, + __read_pkey_reg(), pkey_reg); + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (eax), "c" (ecx), "d" (edx)); + assert(pkey_reg == __read_pkey_reg()); +} + +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile( + "cpuid;" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */ +#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ + +static inline int cpu_has_pku(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + eax = 0x7; + ecx = 0x0; + __cpuid(&eax, &ebx, &ecx, &edx); + + if (!(ecx & X86_FEATURE_PKU)) { + dprintf2("cpu does not have PKU\n"); + return 0; + } + if (!(ecx & X86_FEATURE_OSPKE)) { + dprintf2("cpu does not have OSPKE\n"); + return 0; + } + return 1; +} + +#define XSTATE_PKEY_BIT (9) +#define XSTATE_PKEY 0x200 + +int pkey_reg_xstate_offset(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + int xstate_offset; + int xstate_size; + unsigned long XSTATE_CPUID = 0xd; + int leaf; + + /* assume that XSTATE_PKEY is set in XCR0 */ + leaf = XSTATE_PKEY_BIT; + { + eax = XSTATE_CPUID; + ecx = leaf; + __cpuid(&eax, &ebx, &ecx, &edx); + + if (leaf == XSTATE_PKEY_BIT) { + xstate_offset = ebx; + xstate_size = eax; + } + } + + if (xstate_size == 0) { + printf("could not find size/offset of PKEY in xsave state\n"); + return 0; + } + + return xstate_offset; +} + +#endif /* _PKEYS_X86_H */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index cad52dc..99e4e1e 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -49,9 +49,6 @@ int test_nr;
unsigned int shadow_pkey_reg; - -#define HPAGE_SIZE (1UL<<21) - int dprint_in_signal; char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
@@ -137,12 +134,6 @@ void abort_hooks(void) #endif }
-static inline void __page_o_noops(void) -{ - /* 8-bytes of instruction * 512 bytes = 1 page */ - asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); -} - /* * This attempts to have roughly a page of instructions followed by a few * instructions that do a write, and another page of instructions. That @@ -165,36 +156,6 @@ void lots_o_noops_around_write(int *write_to_me) dprintf3("%s() done\n", __func__); }
-#ifdef __i386__ - -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 -#endif - -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 -#endif - -#define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x14 - -#else - -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 -#endif - -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 -#endif - -#define REG_IP_IDX REG_RIP -#define si_pkey_offset 0x20 - -#endif - void dump_mem(void *dumpme, int len_bytes) { char *c = (void *)dumpme; @@ -367,14 +328,6 @@ pid_t fork_lazy_child(void) return forkret; }
-#ifndef PKEY_DISABLE_ACCESS -# define PKEY_DISABLE_ACCESS 0x1 -#endif - -#ifndef PKEY_DISABLE_WRITE -# define PKEY_DISABLE_WRITE 0x2 -#endif - static u32 hw_pkey_get(int pkey, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
On 07/17/2018 06:49 AM, Ram Pai wrote:
In preparation for multi-arch support, move definitions which have arch-specific values to x86-specific header.
Acked-by: Dave Hansen dave.hansen@intel.com -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Thiago Jung Bauermann bauerman@linux.ibm.com
This will help us ensure we print pkey_reg_t values correctly in different architectures.
Signed-off-by: Thiago Jung Bauermann bauerman@linux.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 4 ++++ 1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 3ed2f02..7f18a82 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -27,6 +27,10 @@ #define DPRINT_IN_SIGNAL_BUF_SIZE 4096 extern int dprint_in_signal; extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; + +#ifdef __GNUC__ +__attribute__((format(printf, 1, 2))) +#endif static inline void sigsafe_printf(const char *format, ...) { va_list ap;
This is in preparation to accomadate a differing size register across architectures.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com Signed-off-by: Thiago Jung Bauermann bauerman@linux.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 23 ++++--- tools/testing/selftests/vm/pkey-x86.h | 16 +++-- tools/testing/selftests/vm/protection_keys.c | 87 +++++++++++++++---------- 3 files changed, 73 insertions(+), 53 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 7f18a82..2a1a024 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -80,13 +80,14 @@ static inline void sigsafe_printf(const char *format, ...) #error Architecture not supported #endif /* arch */
-extern unsigned int shadow_pkey_reg; +extern pkey_reg_t shadow_pkey_reg;
-static inline unsigned int _read_pkey_reg(int line) +static inline pkey_reg_t _read_pkey_reg(int line) { - unsigned int pkey_reg = __read_pkey_reg(); + pkey_reg_t pkey_reg = __read_pkey_reg();
- dprintf4("read_pkey_reg(line=%d) pkey_reg: %x shadow: %x\n", + dprintf4("read_pkey_reg(line=%d) pkey_reg: "PKEY_REG_FMT + " shadow: "PKEY_REG_FMT"\n", line, pkey_reg, shadow_pkey_reg); assert(pkey_reg == shadow_pkey_reg);
@@ -95,15 +96,15 @@ static inline unsigned int _read_pkey_reg(int line)
#define read_pkey_reg() _read_pkey_reg(__LINE__)
-static inline void write_pkey_reg(unsigned int pkey_reg) +static inline void write_pkey_reg(pkey_reg_t pkey_reg) { - dprintf4("%s() changing %08x to %08x\n", __func__, + dprintf4("%s() changing "PKEY_REG_FMT" to "PKEY_REG_FMT"\n", __func__, __read_pkey_reg(), pkey_reg); /* will do the shadow check for us: */ read_pkey_reg(); __write_pkey_reg(pkey_reg); shadow_pkey_reg = pkey_reg; - dprintf4("%s(%08x) pkey_reg: %08x\n", __func__, + dprintf4("%s("PKEY_REG_FMT") pkey_reg: "PKEY_REG_FMT"\n", __func__, pkey_reg, __read_pkey_reg()); }
@@ -113,7 +114,7 @@ static inline void write_pkey_reg(unsigned int pkey_reg) */ static inline void __pkey_access_allow(int pkey, int do_allow) { - unsigned int pkey_reg = read_pkey_reg(); + pkey_reg_t pkey_reg = read_pkey_reg(); int bit = pkey * 2;
if (do_allow) @@ -121,13 +122,13 @@ static inline void __pkey_access_allow(int pkey, int do_allow) else pkey_reg |= (1<<bit);
- dprintf4("pkey_reg now: %08x\n", read_pkey_reg()); + dprintf4("pkey_reg now: "PKEY_REG_FMT"\n", read_pkey_reg()); write_pkey_reg(pkey_reg); }
static inline void __pkey_write_allow(int pkey, int do_allow_write) { - long pkey_reg = read_pkey_reg(); + pkey_reg_t pkey_reg = read_pkey_reg(); int bit = pkey * 2 + 1;
if (do_allow_write) @@ -136,7 +137,7 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write) pkey_reg |= (1<<bit);
write_pkey_reg(pkey_reg); - dprintf4("pkey_reg now: %08x\n", read_pkey_reg()); + dprintf4("pkey_reg now: "PKEY_REG_FMT"\n", read_pkey_reg()); }
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h index 2f04ade..5f40901 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -46,6 +46,8 @@ #define HPAGE_SIZE (1UL<<21) #define PAGE_SIZE 4096 #define MB (1<<20) +#define pkey_reg_t u32 +#define PKEY_REG_FMT "%016x"
static inline void __page_o_noops(void) { @@ -53,11 +55,11 @@ static inline void __page_o_noops(void) asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); }
-static inline unsigned int __read_pkey_reg(void) +static inline pkey_reg_t __read_pkey_reg(void) { unsigned int eax, edx; unsigned int ecx = 0; - unsigned int pkey_reg; + pkey_reg_t pkey_reg;
asm volatile(".byte 0x0f,0x01,0xee\n\t" : "=a" (eax), "=d" (edx) @@ -66,13 +68,13 @@ static inline unsigned int __read_pkey_reg(void) return pkey_reg; }
-static inline void __write_pkey_reg(unsigned int pkey_reg) +static inline void __write_pkey_reg(pkey_reg_t pkey_reg) { - unsigned int eax = pkey_reg; - unsigned int ecx = 0; - unsigned int edx = 0; + pkey_reg_t eax = pkey_reg; + pkey_reg_t ecx = 0; + pkey_reg_t edx = 0;
- dprintf4("%s() changing %08x to %08x\n", __func__, + dprintf4("%s() changing "PKEY_REG_FMT" to "PKEY_REG_FMT"\n", __func__, __read_pkey_reg(), pkey_reg); asm volatile(".byte 0x0f,0x01,0xef\n\t" : : "a" (eax), "c" (ecx), "d" (edx)); diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 99e4e1e..d95fe20 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -48,7 +48,7 @@ int iteration_nr = 1; int test_nr;
-unsigned int shadow_pkey_reg; +pkey_reg_t shadow_pkey_reg; int dprint_in_signal; char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
@@ -163,7 +163,7 @@ void dump_mem(void *dumpme, int len_bytes)
for (i = 0; i < len_bytes; i += sizeof(u64)) { u64 *ptr = (u64 *)(c + i); - dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr); + dprintf1("dump[%03d][@%p]: %016lx\n", i, ptr, *ptr); } }
@@ -197,7 +197,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) int trapno; unsigned long ip; char *fpregs; - u32 *pkey_reg_ptr; + pkey_reg_t *pkey_reg_ptr; u64 siginfo_pkey; u32 *si_pkey_ptr; int pkey_reg_offset; @@ -205,7 +205,8 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
dprint_in_signal = 1; dprintf1(">>>>===============SIGSEGV============================\n"); - dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, __LINE__, + dprintf1("%s()::%d, pkey_reg: "PKEY_REG_FMT" shadow: "PKEY_REG_FMT"\n", + __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg);
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; @@ -213,8 +214,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) fpregset = uctxt->uc_mcontext.fpregs; fpregs = (void *)fpregset;
- dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__, - trapno, ip, si_code_str(si->si_code), si->si_code); + dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", + __func__, trapno, ip, si_code_str(si->si_code), + si->si_code); #ifdef __i386__ /* * 32-bit has some extra padding so that userspace can tell whether @@ -251,12 +253,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) pkey_assert(siginfo_pkey < NR_PKEYS); last_si_pkey = siginfo_pkey;
- dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr); + dprintf1("signal pkey_reg from xsave: "PKEY_REG_FMT"\n", *pkey_reg_ptr); /* * need __read_pkey_reg() version so we do not do shadow_pkey_reg * checking */ - dprintf1("signal pkey_reg from pkey_reg: %08x\n", __read_pkey_reg()); + dprintf1("signal pkey_reg from pkey_reg: "PKEY_REG_FMT"\n", + __read_pkey_reg()); dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); *(u64 *)pkey_reg_ptr = 0x00000000; dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); @@ -331,16 +334,17 @@ pid_t fork_lazy_child(void) static u32 hw_pkey_get(int pkey, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 pkey_reg = __read_pkey_reg(); - u32 shifted_pkey_reg; + pkey_reg_t pkey_reg = __read_pkey_reg(); + pkey_reg_t shifted_pkey_reg; u32 masked_pkey_reg;
dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", __func__, pkey, flags, 0, 0); - dprintf2("%s() raw pkey_reg: %x\n", __func__, pkey_reg); + dprintf2("%s() raw pkey_reg: "PKEY_REG_FMT"\n", __func__, pkey_reg);
shifted_pkey_reg = (pkey_reg >> (pkey * PKEY_BITS_PER_PKEY)); - dprintf2("%s() shifted_pkey_reg: %x\n", __func__, shifted_pkey_reg); + dprintf2("%s() shifted_pkey_reg: "PKEY_REG_FMT"\n", __func__, + shifted_pkey_reg); masked_pkey_reg = shifted_pkey_reg & mask; dprintf2("%s() masked pkey_reg: %x\n", __func__, masked_pkey_reg); /* @@ -353,8 +357,8 @@ static u32 hw_pkey_get(int pkey, unsigned long flags) static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 old_pkey_reg = __read_pkey_reg(); - u32 new_pkey_reg; + pkey_reg_t old_pkey_reg = __read_pkey_reg(); + pkey_reg_t new_pkey_reg;
/* make sure that 'rights' only contains the bits we expect: */ assert(!(rights & ~mask)); @@ -369,7 +373,7 @@ static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) __write_pkey_reg(new_pkey_reg);
dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x" - " pkey_reg now: %x old_pkey_reg: %x\n", + " pkey_reg now: "PKEY_REG_FMT" old_pkey_reg: "PKEY_REG_FMT"\n", __func__, pkey, rights, flags, 0, __read_pkey_reg(), old_pkey_reg); return 0; @@ -380,7 +384,7 @@ void pkey_disable_set(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights; - u32 orig_pkey_reg = read_pkey_reg(); + pkey_reg_t orig_pkey_reg = read_pkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__, pkey, flags); @@ -390,6 +394,7 @@ void pkey_disable_set(int pkey, int flags)
dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); + pkey_assert(pkey_rights >= 0);
pkey_rights |= flags; @@ -398,7 +403,8 @@ void pkey_disable_set(int pkey, int flags) assert(!ret); /*pkey_reg and flags have the same format */ shadow_pkey_reg |= flags << (pkey * 2); - dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkey_reg); + dprintf1("%s(%d) shadow: 0x"PKEY_REG_FMT"\n", + __func__, pkey, shadow_pkey_reg);
pkey_assert(ret >= 0);
@@ -406,7 +412,8 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, read_pkey_reg()); + dprintf1("%s(%d) pkey_reg: 0x"PKEY_REG_FMT"\n", + __func__, pkey, read_pkey_reg()); if (flags) pkey_assert(read_pkey_reg() > orig_pkey_reg); dprintf1("END<---%s(%d, 0x%x)\n", __func__, @@ -418,7 +425,7 @@ void pkey_disable_clear(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights = hw_pkey_get(pkey, syscall_flags); - u32 orig_pkey_reg = read_pkey_reg(); + pkey_reg_t orig_pkey_reg = read_pkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
@@ -436,7 +443,8 @@ void pkey_disable_clear(int pkey, int flags) dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, read_pkey_reg()); + dprintf1("%s(%d) pkey_reg: 0x"PKEY_REG_FMT"\n", __func__, + pkey, read_pkey_reg()); if (flags) assert(read_pkey_reg() > orig_pkey_reg); } @@ -491,20 +499,22 @@ int alloc_pkey(void) int ret; unsigned long init_val = 0x0;
- dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, - __LINE__, __read_pkey_reg(), shadow_pkey_reg); + dprintf1("%s()::%d, pkey_reg: "PKEY_REG_FMT" shadow: "PKEY_REG_FMT"\n", + __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); ret = sys_pkey_alloc(0, init_val); /* * pkey_alloc() sets PKEY register, so we need to reflect it in * shadow_pkey_reg: */ - dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT + " shadow: 0x"PKEY_REG_FMT"\n", __func__, __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); if (ret) { /* clear both the bits: */ shadow_pkey_reg &= ~(0x3 << (ret * 2)); - dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT + " shadow: 0x"PKEY_REG_FMT"\n", __func__, __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); @@ -514,13 +524,15 @@ int alloc_pkey(void) */ shadow_pkey_reg |= (init_val << (ret * 2)); } - dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT + " shadow: 0x"PKEY_REG_FMT"\n", __func__, __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno); /* for shadow checking: */ read_pkey_reg(); - dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT + " shadow: 0x"PKEY_REG_FMT"\n", __func__, __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); return ret; @@ -573,7 +585,8 @@ int alloc_random_pkey(void) free_ret = sys_pkey_free(alloced_pkeys[i]); pkey_assert(!free_ret); } - dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__, + dprintf1("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT + " shadow: 0x"PKEY_REG_FMT"\n", __func__, __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); return ret; } @@ -592,11 +605,13 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, if (nr_iterations-- < 0) break;
- dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf1("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT + " shadow: 0x"PKEY_REG_FMT"\n", __func__, __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); sys_pkey_free(rpkey); - dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf1("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT + " shadow: 0x"PKEY_REG_FMT"\n", __func__, __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); } @@ -606,7 +621,8 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", ptr, size, orig_prot, pkey, ret); pkey_assert(!ret); - dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__, + dprintf1("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT + " shadow: 0x"PKEY_REG_FMT"\n", __func__, __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg); return ret; } @@ -885,7 +901,7 @@ void expected_pkey_fault(int pkey) pkey_assert(0);
__write_pkey_reg(shadow_pkey_reg); - dprintf1("%s() set pkey_reg=%x to restore state after signal " + dprintf1("%s() set pkey_reg="PKEY_REG_FMT" to restore state after signal " "nuked it\n", __func__, shadow_pkey_reg); last_pkey_faults = pkey_faults; last_si_pkey = -1; @@ -1082,7 +1098,8 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) int new_pkey; dprintf1("%s() alloc loop: %d\n", __func__, i); new_pkey = alloc_pkey(); - dprintf4("%s()::%d, err: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf4("%s()::%d, err: %d pkey_reg: 0x"PKEY_REG_FMT + " shadow: 0x"PKEY_REG_FMT"\n", __func__, __LINE__, err, __read_pkey_reg(), shadow_pkey_reg); read_pkey_reg(); /* for shadow checking */ @@ -1264,7 +1281,7 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) pkey_assert(!ret); pkey_access_deny(pkey);
- dprintf2("pkey_reg: %x\n", read_pkey_reg()); + dprintf2("pkey_reg: "PKEY_REG_FMT"\n", read_pkey_reg());
/* * Make sure this is an *instruction* fault @@ -1295,7 +1312,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); pkey_assert(!ret);
- dprintf2("pkru: %x\n", read_pkey_reg()); + dprintf2("pkey_reg: "PKEY_REG_FMT"\n", read_pkey_reg());
/* Make sure this is an *instruction* fault */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); @@ -1412,7 +1429,7 @@ int main(void) }
pkey_setup_shadow(); - printf("startup pkey_reg: %x\n", read_pkey_reg()); + printf("startup pkey_reg: 0x"PKEY_REG_FMT"\n", read_pkey_reg()); setup_hugetlbfs();
while (nr_iterations-- > 0)
On 07/17/2018 06:49 AM, Ram Pai wrote:
This is in preparation to accomadate a differing size register across architectures.
This is pretty fugly, and reading it again, I wonder whether we should have just made it 64-bit, at least in all the printk's. Or even
prink("pk reg: %*llx\n", PKEY_FMT_LEN, pkey_reg);
But, I don't _really_ care in the end.
Acked-by: Dave Hansen dave.hansen@intel.com -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
helper functions to handler shadow pkey register
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com Signed-off-by: Thiago Jung Bauermann bauerman@linux.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 29 ++++++++++++++++++++++ tools/testing/selftests/vm/pkey-x86.h | 5 ++++ tools/testing/selftests/vm/protection_keys.c | 34 ++++++++++++++++--------- 3 files changed, 56 insertions(+), 12 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 2a1a024..ada0146 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -80,6 +80,35 @@ static inline void sigsafe_printf(const char *format, ...) #error Architecture not supported #endif /* arch */
+static inline pkey_reg_t clear_pkey_flags(int pkey, pkey_reg_t flags) +{ + u32 shift = pkey_bit_position(pkey); + + return ~(flags << shift); +} + +/* + * Takes pkey flags and puts them at the right bit position for the given key so + * that the result can be ORed into the register. + */ +static inline pkey_reg_t left_shift_bits(int pkey, pkey_reg_t bits) +{ + u32 shift = pkey_bit_position(pkey); + + return (bits << shift); +} + +/* + * Takes pkey register values and puts the flags for the given pkey at the least + * significant bits of the returned value. + */ +static inline pkey_reg_t right_shift_bits(int pkey, pkey_reg_t bits) +{ + u32 shift = pkey_bit_position(pkey); + + return (bits >> shift); +} + extern pkey_reg_t shadow_pkey_reg;
static inline pkey_reg_t _read_pkey_reg(int line) diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h index 5f40901..2b3780d 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -49,6 +49,11 @@ #define pkey_reg_t u32 #define PKEY_REG_FMT "%016x"
+static inline u32 pkey_bit_position(int pkey) +{ + return pkey * PKEY_BITS_PER_PKEY; +} + static inline void __page_o_noops(void) { /* 8-bytes of instruction * 512 bytes = 1 page */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index d95fe20..fb7dd32 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -342,7 +342,7 @@ static u32 hw_pkey_get(int pkey, unsigned long flags) __func__, pkey, flags, 0, 0); dprintf2("%s() raw pkey_reg: "PKEY_REG_FMT"\n", __func__, pkey_reg);
- shifted_pkey_reg = (pkey_reg >> (pkey * PKEY_BITS_PER_PKEY)); + shifted_pkey_reg = right_shift_bits(pkey, pkey_reg); dprintf2("%s() shifted_pkey_reg: "PKEY_REG_FMT"\n", __func__, shifted_pkey_reg); masked_pkey_reg = shifted_pkey_reg & mask; @@ -366,9 +366,9 @@ static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) /* copy old pkey_reg */ new_pkey_reg = old_pkey_reg; /* mask out bits from pkey in old value: */ - new_pkey_reg &= ~(mask << (pkey * PKEY_BITS_PER_PKEY)); + new_pkey_reg &= clear_pkey_flags(pkey, mask); /* OR in new bits for pkey: */ - new_pkey_reg |= (rights << (pkey * PKEY_BITS_PER_PKEY)); + new_pkey_reg |= left_shift_bits(pkey, rights);
__write_pkey_reg(new_pkey_reg);
@@ -402,7 +402,7 @@ void pkey_disable_set(int pkey, int flags) ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); assert(!ret); /*pkey_reg and flags have the same format */ - shadow_pkey_reg |= flags << (pkey * 2); + shadow_pkey_reg |= left_shift_bits(pkey, flags); dprintf1("%s(%d) shadow: 0x"PKEY_REG_FMT"\n", __func__, pkey, shadow_pkey_reg);
@@ -436,7 +436,7 @@ void pkey_disable_clear(int pkey, int flags) pkey_rights |= flags;
ret = hw_pkey_set(pkey, pkey_rights, 0); - shadow_pkey_reg &= ~(flags << (pkey * 2)); + shadow_pkey_reg &= clear_pkey_flags(pkey, flags); pkey_assert(ret >= 0);
pkey_rights = hw_pkey_get(pkey, syscall_flags); @@ -494,6 +494,21 @@ int sys_pkey_alloc(unsigned long flags, unsigned long init_val) return ret; }
+void pkey_setup_shadow(void) +{ + shadow_pkey_reg = __read_pkey_reg(); +} + +void pkey_reset_shadow(u32 key) +{ + shadow_pkey_reg &= clear_pkey_flags(key, 0x3); +} + +void pkey_set_shadow(u32 key, u64 init_val) +{ + shadow_pkey_reg |= left_shift_bits(key, init_val); +} + int alloc_pkey(void) { int ret; @@ -512,7 +527,7 @@ int alloc_pkey(void) shadow_pkey_reg); if (ret) { /* clear both the bits: */ - shadow_pkey_reg &= ~(0x3 << (ret * 2)); + pkey_reset_shadow(ret); dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT " shadow: 0x"PKEY_REG_FMT"\n", __func__, @@ -522,7 +537,7 @@ int alloc_pkey(void) * move the new state in from init_val * (remember, we cheated and init_val == pkey_reg format) */ - shadow_pkey_reg |= (init_val << (ret * 2)); + pkey_set_shadow(ret, init_val); } dprintf4("%s()::%d, ret: %d pkey_reg: 0x"PKEY_REG_FMT " shadow: 0x"PKEY_REG_FMT"\n", @@ -1403,11 +1418,6 @@ void run_tests_once(void) iteration_nr++; }
-void pkey_setup_shadow(void) -{ - shadow_pkey_reg = __read_pkey_reg(); -} - int main(void) { int nr_iterations = 22;
On 07/17/2018 06:49 AM, Ram Pai wrote:
- shifted_pkey_reg = (pkey_reg >> (pkey * PKEY_BITS_PER_PKEY));
- shifted_pkey_reg = right_shift_bits(pkey, pkey_reg); dprintf2("%s() shifted_pkey_reg: "PKEY_REG_FMT"\n", __func__, shifted_pkey_reg); masked_pkey_reg = shifted_pkey_reg & mask;
I'm not a fan of how this looks. This is almost certainly going to get the argument order mixed up at some point.
Why do we need this? The description doesn't say. -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
If the flag is 0, no bits will be set. Hence we cant expect the resulting bitmap to have a higher value than what it was earlier.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index fb7dd32..2dd94c3 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -415,7 +415,7 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x"PKEY_REG_FMT"\n", __func__, pkey, read_pkey_reg()); if (flags) - pkey_assert(read_pkey_reg() > orig_pkey_reg); + pkey_assert(read_pkey_reg() >= orig_pkey_reg); dprintf1("END<---%s(%d, 0x%x)\n", __func__, pkey, flags); }
On 07/17/2018 06:49 AM, Ram Pai wrote:
If the flag is 0, no bits will be set. Hence we cant expect the resulting bitmap to have a higher value than what it was earlier.
...
--- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -415,7 +415,7 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x"PKEY_REG_FMT"\n", __func__, pkey, read_pkey_reg()); if (flags)
pkey_assert(read_pkey_reg() > orig_pkey_reg);
dprintf1("END<---%s(%d, 0x%x)\n", __func__, pkey, flags);pkey_assert(read_pkey_reg() >= orig_pkey_reg);
}
I know these are just selftests, but this change makes zero sense without the context from how powerpc works. It's also totally non-obvious from the patch itself what is going on, even though I specifically called this out in a previous review.
Please add a comment here that either specifically calls out powerpc or talks about "an architecture that does this ..."
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
instead of clearing the bits, pkey_disable_clear() was setting the bits. Fixed it.
Also fixed a wrong assertion in that function. When bits are cleared, the resulting bit value will be less than the original.
This hasn't been a problem so far because this code isn't currently used.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 2dd94c3..8fa4f74 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -433,7 +433,7 @@ void pkey_disable_clear(int pkey, int flags) pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0);
- pkey_rights |= flags; + pkey_rights &= ~flags;
ret = hw_pkey_set(pkey, pkey_rights, 0); shadow_pkey_reg &= clear_pkey_flags(pkey, flags); @@ -446,7 +446,7 @@ void pkey_disable_clear(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x"PKEY_REG_FMT"\n", __func__, pkey, read_pkey_reg()); if (flags) - assert(read_pkey_reg() > orig_pkey_reg); + assert(read_pkey_reg() <= orig_pkey_reg); }
void pkey_write_allow(int pkey)
On 07/17/2018 06:49 AM, Ram Pai wrote:
instead of clearing the bits, pkey_disable_clear() was setting the bits. Fixed it.
Again, I know these are just selftests, but I'm seeing a real lack of attention to detail with these. Please at least go to the trouble of writing actual changelogs with full sentences and actual capitalization. I think it's the least you can do if I'm going to spend time reviewing these.
I'll go one better. Can you just use this as a changelog?
pkey_disable_clear() does not work. Instead of clearing bits, it sets them, probably because it originated as a copy-and-paste of pkey_disable_set().
This has been dead code up to now because the only callers are pkey_access/write_allow() and _those_ are currently unused.
Also fixed a wrong assertion in that function. When bits are cleared, the resulting bit value will be less than the original.
This hasn't been a problem so far because this code isn't currently used.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com
tools/testing/selftests/vm/protection_keys.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 2dd94c3..8fa4f74 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -433,7 +433,7 @@ void pkey_disable_clear(int pkey, int flags) pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0);
- pkey_rights |= flags;
pkey_rights &= ~flags;
ret = hw_pkey_set(pkey, pkey_rights, 0); shadow_pkey_reg &= clear_pkey_flags(pkey, flags);
@@ -446,7 +446,7 @@ void pkey_disable_clear(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x"PKEY_REG_FMT"\n", __func__, pkey, read_pkey_reg());
If you add a better changelog:
Acked-by: Dave Hansen dave.hansen@intel.com
if (flags)
assert(read_pkey_reg() > orig_pkey_reg);
assert(read_pkey_reg() <= orig_pkey_reg);
}
This really is an orthogonal change that would have been better placed with the other patch that did the exact same thing. But I'd be OK with it here, as long as it has an actual comment. -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
alloc_random_pkey() was allocating the same pkey every time. Not all pkeys were geting tested. fixed it.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 6 +++++- 1 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 8fa4f74..2565b4c 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -24,6 +24,7 @@ #define _GNU_SOURCE #include <errno.h> #include <linux/futex.h> +#include <time.h> #include <sys/time.h> #include <sys/syscall.h> #include <string.h> @@ -573,13 +574,15 @@ int alloc_random_pkey(void) int alloced_pkeys[NR_PKEYS]; int nr_alloced = 0; int random_index; + memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); + srand((unsigned int)time(NULL));
/* allocate every possible key and make a note of which ones we got */ max_nr_pkey_allocs = NR_PKEYS; - max_nr_pkey_allocs = 1; for (i = 0; i < max_nr_pkey_allocs; i++) { int new_pkey = alloc_pkey(); + if (new_pkey < 0) break; alloced_pkeys[nr_alloced++] = new_pkey; @@ -595,6 +598,7 @@ int alloc_random_pkey(void) /* go through the allocated ones that we did not want and free them */ for (i = 0; i < nr_alloced; i++) { int free_ret; + if (!alloced_pkeys[i]) continue; free_ret = sys_pkey_free(alloced_pkeys[i]);
On 07/17/2018 06:49 AM, Ram Pai wrote:
alloc_random_pkey() was allocating the same pkey every time. Not all pkeys were geting tested. fixed it.
This fixes a real issue but also unnecessarily munges whitespace. If you rev these again, please fix the munging. Otherwise:
Acked-by: Dave Hansen dave.hansen@intel.com -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
open_hugepage_file() <- opens the huge page file get_start_key() <-- provides the first non-reserved key.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com Signed-off-by: Thiago Jung Bauermann bauerman@linux.ibm.com Reviewed-by: Dave Hansen dave.hansen@intel.com --- tools/testing/selftests/vm/pkey-helpers.h | 10 ++++++++++ tools/testing/selftests/vm/pkey-x86.h | 1 + tools/testing/selftests/vm/protection_keys.c | 6 +++--- 3 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index ada0146..52a1152 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -179,4 +179,14 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write) #define __stringify_1(x...) #x #define __stringify(x...) __stringify_1(x)
+static inline int open_hugepage_file(int flag) +{ + return open(HUGEPAGE_FILE, flag); +} + +static inline int get_start_key(void) +{ + return 1; +} + #endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h index 2b3780d..d5fa299 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -48,6 +48,7 @@ #define MB (1<<20) #define pkey_reg_t u32 #define PKEY_REG_FMT "%016x" +#define HUGEPAGE_FILE "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages"
static inline u32 pkey_bit_position(int pkey) { diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 2565b4c..2e448e0 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -788,7 +788,7 @@ void setup_hugetlbfs(void) * Now go make sure that we got the pages and that they * are 2M pages. Someone might have made 1G the default. */ - fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); + fd = open_hugepage_file(O_RDONLY); if (fd < 0) { perror("opening sysfs 2M hugetlb config"); return; @@ -1075,10 +1075,10 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) { int err; - int i; + int i = get_start_key();
/* Note: 0 is the default pkey, so don't mess with it */ - for (i = 1; i < NR_PKEYS; i++) { + for (; i < NR_PKEYS; i++) { if (pkey == i) continue;
On 07/17/2018 06:49 AM, Ram Pai wrote:
open_hugepage_file() <- opens the huge page file
Folks, a sentence here would be nice:
Different architectures have different huge page sizes and thus have different sysfs filees to manipulate when allocating huge pages.
get_start_key() <-- provides the first non-reserved key.
Does powerpc not start on key 0? Why do you need this?
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com Signed-off-by: Thiago Jung Bauermann bauerman@linux.ibm.com Reviewed-by: Dave Hansen dave.hansen@intel.com
tools/testing/selftests/vm/pkey-helpers.h | 10 ++++++++++ tools/testing/selftests/vm/pkey-x86.h | 1 + tools/testing/selftests/vm/protection_keys.c | 6 +++--- 3 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index ada0146..52a1152 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -179,4 +179,14 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write) #define __stringify_1(x...) #x #define __stringify(x...) __stringify_1(x) +static inline int open_hugepage_file(int flag) +{
- return open(HUGEPAGE_FILE, flag);
+}
open_nr_hugepages_file() if you revise this, please
+static inline int get_start_key(void) +{
- return 1;
+}
get_first_user_pkey(), please.
#endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h index 2b3780d..d5fa299 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -48,6 +48,7 @@ #define MB (1<<20) #define pkey_reg_t u32 #define PKEY_REG_FMT "%016x" +#define HUGEPAGE_FILE "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" static inline u32 pkey_bit_position(int pkey) { diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 2565b4c..2e448e0 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -788,7 +788,7 @@ void setup_hugetlbfs(void) * Now go make sure that we got the pages and that they * are 2M pages. Someone might have made 1G the default. */
- fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
- fd = open_hugepage_file(O_RDONLY); if (fd < 0) { perror("opening sysfs 2M hugetlb config"); return;
This is fine, and obviously necessary.
@@ -1075,10 +1075,10 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) { int err;
- int i;
- int i = get_start_key();
/* Note: 0 is the default pkey, so don't mess with it */
- for (i = 1; i < NR_PKEYS; i++) {
- for (; i < NR_PKEYS; i++) { if (pkey == i) continue;
Grumble, grumble, you moved the code away from the comment connected to it.
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
expected_pkey_fault() is comparing the contents of pkey register with 0. This may not be true all the time. There could be bits set by default by the architecture which can never be changed. Hence compare the value against shadow pkey register, which is supposed to track the bits accurately all throughout
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 2e448e0..f50cce8 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -913,10 +913,10 @@ void expected_pkey_fault(int pkey) pkey_assert(last_si_pkey == pkey);
/* - * The signal handler shold have cleared out PKEY register to let the + * The signal handler should have cleared out pkey-register to let the * test program continue. We now have to restore it. */ - if (__read_pkey_reg() != 0) + if (__read_pkey_reg() != shadow_pkey_reg) pkey_assert(0);
__write_pkey_reg(shadow_pkey_reg);
On 07/17/2018 06:49 AM, Ram Pai wrote:
expected_pkey_fault() is comparing the contents of pkey register with 0. This may not be true all the time. There could be bits set by default by the architecture which can never be changed. Hence compare the value against shadow pkey register, which is supposed to track the bits accurately all throughout
This is getting dangerously close to full sentences that actually describe the patch. You forgot a period, but much this is a substantial improvement over earlier parts of the series. Thanks for writing this, seriously.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com
tools/testing/selftests/vm/protection_keys.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 2e448e0..f50cce8 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -913,10 +913,10 @@ void expected_pkey_fault(int pkey) pkey_assert(last_si_pkey == pkey); /*
* The signal handler shold have cleared out PKEY register to let the
* The signal handler should have cleared out pkey-register to let the
*/
- test program continue. We now have to restore it.
... while I appreciate the spelling corrections, and I would totally ack a patch that fixed them in one fell swoop, could we please segregate the random spelling corrections from code fixes unless you touch those lines otherwise?
- if (__read_pkey_reg() != 0)
- if (__read_pkey_reg() != shadow_pkey_reg) pkey_assert(0);
__write_pkey_reg(shadow_pkey_reg);
I know this is a one-line change, but I don't fully understand it.
On x86, if we take a pkey fault, we clear PKRU entirely (via the on-stack XSAVE state that is restored at sigreturn) which allows the faulting instruction to resume and execute normally. That's what this check is looking for: Did the signal handler clear PKRU?
Now, you're saying that powerpc might not clear it. That makes sense.
While PKRU's state here is obvious, it isn't patently obvious to me what shadow_pkey_reg's state is. In fact, looking at it, I don't see the signal handler manipulating the shadow. So, how can this patch work? -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
cleanup the code to satisfy coding styles.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 64 +++++++++++++++++-------- 1 files changed, 43 insertions(+), 21 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index f50cce8..304f74f 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -4,7 +4,7 @@ * * There are examples in here of: * * how to set protection keys on memory - * * how to set/clear bits in pkey registers (the rights register) + * * how to set/clear bits in Protection Key registers (the rights register) * * how to handle SEGV_PKUERR signals and extract pkey-relevant * information from the siginfo * @@ -13,13 +13,18 @@ * prefault pages in at malloc, or not * protect MPX bounds tables with protection keys? * make sure VMA splitting/merging is working correctly - * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys - * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel - * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks + * OOMs can destroy mm->mmap (see exit_mmap()), + * so make sure it is immune to pkeys + * look for pkey "leaks" where it is still set on a VMA + * but "freed" back to the kernel + * do a plain mprotect() to a mprotect_pkey() area and make + * sure the pkey sticks * * Compile like this: - * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -o protection_keys -O2 -g -std=gnu99 + * -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 + * -pthread -Wall protection_keys.c -lrt -ldl -lm */ #define _GNU_SOURCE #include <errno.h> @@ -263,10 +268,12 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) __read_pkey_reg()); dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); *(u64 *)pkey_reg_ptr = 0x00000000; - dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); + dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction " + "to continue\n"); pkey_faults++; dprintf1("<<<<==================================================\n"); dprint_in_signal = 0; + return; }
int wait_all_children(void) @@ -384,7 +391,7 @@ void pkey_disable_set(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; - int pkey_rights; + u32 pkey_rights; pkey_reg_t orig_pkey_reg = read_pkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__, @@ -487,9 +494,10 @@ int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, return sret; }
-int sys_pkey_alloc(unsigned long flags, unsigned long init_val) +int sys_pkey_alloc(unsigned long flags, u64 init_val) { int ret = syscall(SYS_pkey_alloc, flags, init_val); + dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", __func__, flags, init_val, ret, errno); return ret; @@ -513,7 +521,7 @@ void pkey_set_shadow(u32 key, u64 init_val) int alloc_pkey(void) { int ret; - unsigned long init_val = 0x0; + u64 init_val = 0x0;
dprintf1("%s()::%d, pkey_reg: "PKEY_REG_FMT" shadow: "PKEY_REG_FMT"\n", __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); @@ -669,7 +677,9 @@ void record_pkey_malloc(void *ptr, long size, int prot) /* every record is full */ size_t old_nr_records = nr_pkey_malloc_records; size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); - size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); + size_t new_size = new_nr_records * + sizeof(struct pkey_malloc_record); + dprintf2("new_nr_records: %zd\n", new_nr_records); dprintf2("new_size: %zd\n", new_size); pkey_malloc_records = realloc(pkey_malloc_records, new_size); @@ -695,9 +705,11 @@ void free_pkey_malloc(void *ptr) { long i; int ret; + dprintf3("%s(%p)\n", __func__, ptr); for (i = 0; i < nr_pkey_malloc_records; i++) { struct pkey_malloc_record *rec = &pkey_malloc_records[i]; + dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", ptr, i, rec, rec->ptr, rec->size); if ((ptr < rec->ptr) || @@ -778,11 +790,13 @@ void setup_hugetlbfs(void) char buf[] = "123";
if (geteuid() != 0) { - fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); + fprintf(stderr, + "WARNING: not run as root, can not do hugetlb test\n"); return; }
- cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); + cat_into_file(__stringify(GET_NR_HUGE_PAGES), + "/proc/sys/vm/nr_hugepages");
/* * Now go make sure that we got the pages and that they @@ -803,7 +817,8 @@ void setup_hugetlbfs(void) }
if (atoi(buf) != GET_NR_HUGE_PAGES) { - fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n", + fprintf(stderr, "could not confirm 2M pages, got:" + " '%s' expected %d\n", buf, GET_NR_HUGE_PAGES); return; } @@ -945,6 +960,7 @@ void __save_test_fd(int fd) int get_test_read_fd(void) { int test_fd = open("/etc/passwd", O_RDONLY); + __save_test_fd(test_fd); return test_fd; } @@ -986,7 +1002,8 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey) { int ptr_contents;
- dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); + dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", + pkey, ptr); read_pkey_reg(); pkey_access_deny(pkey); ptr_contents = read_ptr(ptr); @@ -1108,13 +1125,14 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) /* Assumes that all pkeys other than 'pkey' are unallocated */ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) { - int err; + int err = 0; int allocated_pkeys[NR_PKEYS] = {0}; int nr_allocated_pkeys = 0; int i;
for (i = 0; i < NR_PKEYS*2; i++) { int new_pkey; + dprintf1("%s() alloc loop: %d\n", __func__, i); new_pkey = alloc_pkey(); dprintf4("%s()::%d, err: %d pkey_reg: 0x"PKEY_REG_FMT @@ -1122,9 +1140,11 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) __func__, __LINE__, err, __read_pkey_reg(), shadow_pkey_reg); read_pkey_reg(); /* for shadow checking */ - dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); + dprintf2("%s() errno: %d ENOSPC: %d\n", + __func__, errno, ENOSPC); if ((new_pkey == -1) && (errno == ENOSPC)) { - dprintf2("%s() failed to allocate pkey after %d tries\n", + dprintf2("%s() failed to allocate pkey " + "after %d tries\n", __func__, nr_allocated_pkeys); break; } @@ -1416,7 +1436,8 @@ void run_tests_once(void) tracing_off(); close_test_fds();
- printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); + printf("test %2d PASSED (iteration %d)\n", + test_nr, iteration_nr); dprintf1("======================\n\n"); } iteration_nr++; @@ -1428,7 +1449,7 @@ int main(void)
setup_handlers();
- printf("has pku: %d\n", cpu_has_pku()); + printf("has pkey: %d\n", cpu_has_pku());
if (!cpu_has_pku()) { int size = PAGE_SIZE; @@ -1436,7 +1457,8 @@ int main(void)
printf("running PKEY tests for unsupported CPU/OS\n");
- ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + ptr = mmap(NULL, size, PROT_NONE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); assert(ptr != (void *)-1); test_mprotect_pkey_on_unsupported_cpu(ptr, 1); exit(0);
On 07/17/2018 06:49 AM, Ram Pai wrote:
cleanup the code to satisfy coding styles.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com
tools/testing/selftests/vm/protection_keys.c | 64 +++++++++++++++++-------- 1 files changed, 43 insertions(+), 21 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index f50cce8..304f74f 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -4,7 +4,7 @@
- There are examples in here of:
- how to set protection keys on memory
- how to set/clear bits in pkey registers (the rights register)
- how to set/clear bits in Protection Key registers (the rights register)
Huh? Which coding style says that we can't say "pkey"?
- how to handle SEGV_PKUERR signals and extract pkey-relevant
- information from the siginfo
@@ -13,13 +13,18 @@
- prefault pages in at malloc, or not
- protect MPX bounds tables with protection keys?
- make sure VMA splitting/merging is working correctly
- OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
- look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
- do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
- OOMs can destroy mm->mmap (see exit_mmap()),
so make sure it is immune to pkeys
- look for pkey "leaks" where it is still set on a VMA
but "freed" back to the kernel
- do a plain mprotect() to a mprotect_pkey() area and make
sure the pkey sticks
This makes it work substantially worse. That's not acceptable, even if you did move it under 80 columns.
- Compile like this:
- gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- gcc -o protection_keys -O2 -g -std=gnu99
-pthread -Wall protection_keys.c -lrt -ldl -lm
- gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99
*/
-pthread -Wall protection_keys.c -lrt -ldl -lm
Why was this on one line? Because it was easier to copy and paste. Please leave it on one line, CodingStyle be damned.
#define _GNU_SOURCE #include <errno.h> @@ -263,10 +268,12 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) __read_pkey_reg()); dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); *(u64 *)pkey_reg_ptr = 0x00000000;
- dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
- dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction "
"to continue\n");
It's actually totally OK to let printk strings go over 80 columns.
pkey_faults++; dprintf1("<<<<==================================================\n"); dprint_in_signal = 0;
- return;
}
Now we're just being silly.
int wait_all_children(void) @@ -384,7 +391,7 @@ void pkey_disable_set(int pkey, int flags) { unsigned long syscall_flags = 0; int ret;
- int pkey_rights;
- u32 pkey_rights;
This is not CodingStyle. Shouldn't this be the pkey_reg_t that you introduced earlier in the series?
-int sys_pkey_alloc(unsigned long flags, unsigned long init_val) +int sys_pkey_alloc(unsigned long flags, u64 init_val) {
Um, this is actually a 'unsigned long' in the ABI.
Can you go back through this and actually make sure that these are real coding style cleanups? -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Introduce generic abstractions and provide architecture specific implementation for the abstractions.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com Signed-off-by: Thiago Jung Bauermann bauerman@linux.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 16 ++++- tools/testing/selftests/vm/pkey-powerpc.h | 93 ++++++++++++++++++++++++++ tools/testing/selftests/vm/pkey-x86.h | 15 ++++ tools/testing/selftests/vm/protection_keys.c | 52 ++++++++------ 4 files changed, 153 insertions(+), 23 deletions(-) create mode 100644 tools/testing/selftests/vm/pkey-powerpc.h
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 52a1152..321bbbd 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -74,8 +74,13 @@ static inline void sigsafe_printf(const char *format, ...) } \ } while (0)
+__attribute__((noinline)) int read_ptr(int *ptr); +void expected_pkey_fault(int pkey); + #if defined(__i386__) || defined(__x86_64__) /* arch */ #include "pkey-x86.h" +#elif defined(__powerpc64__) /* arch */ +#include "pkey-powerpc.h" #else /* arch */ #error Architecture not supported #endif /* arch */ @@ -186,7 +191,16 @@ static inline int open_hugepage_file(int flag)
static inline int get_start_key(void) { - return 1; + return 0; +} + +static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si) +{ +#ifdef si_pkey + return &si->si_pkey; +#else + return (u32 *)(((u8 *)si) + si_pkey_offset); +#endif }
#endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/vm/pkey-powerpc.h b/tools/testing/selftests/vm/pkey-powerpc.h new file mode 100644 index 0000000..b0d1abe --- /dev/null +++ b/tools/testing/selftests/vm/pkey-powerpc.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _PKEYS_POWERPC_H +#define _PKEYS_POWERPC_H + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 386 +#endif +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 384 +# define SYS_pkey_free 385 +#endif +#define REG_IP_IDX PT_NIP +#define REG_TRAPNO PT_TRAP +#define gregs gp_regs +#define fpregs fp_regs +#define si_pkey_offset 0x20 + +#ifndef PKEY_DISABLE_ACCESS +# define PKEY_DISABLE_ACCESS 0x3 /* disable read and write */ +#endif + +#ifndef PKEY_DISABLE_WRITE +# define PKEY_DISABLE_WRITE 0x2 +#endif + +#define NR_PKEYS 32 +#define NR_RESERVED_PKEYS_4K 27 /* pkey-0, pkey-1, exec-only-pkey + and 24 other keys that cannot be + represented in the PTE */ +#define NR_RESERVED_PKEYS_64K 3 /* pkey-0, pkey-1 and exec-only-pkey */ +#define PKEY_BITS_PER_PKEY 2 +#define HPAGE_SIZE (1UL << 24) +#define PAGE_SIZE (1UL << 16) +#define pkey_reg_t u64 +#define PKEY_REG_FMT "%016lx" +#define HUGEPAGE_FILE "/sys/kernel/mm/hugepages/hugepages-16384kB/nr_hugepages" + +static inline u32 pkey_bit_position(int pkey) +{ + return (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY; +} + +static inline pkey_reg_t __read_pkey_reg(void) +{ + pkey_reg_t pkey_reg; + + asm volatile("mfspr %0, 0xd" : "=r" (pkey_reg)); + + return pkey_reg; +} + +static inline void __write_pkey_reg(pkey_reg_t pkey_reg) +{ + pkey_reg_t eax = pkey_reg; + + dprintf4("%s() changing "PKEY_REG_FMT" to "PKEY_REG_FMT"\n", + __func__, __read_pkey_reg(), pkey_reg); + + asm volatile("mtspr 0xd, %0" : : "r" ((unsigned long)(eax)) : "memory"); + + dprintf4("%s() pkey register after changing "PKEY_REG_FMT" to " + PKEY_REG_FMT"\n", __func__, __read_pkey_reg(), + pkey_reg); +} + +static inline int cpu_has_pku(void) +{ + return 1; +} + +static inline int arch_reserved_keys(void) +{ + if (sysconf(_SC_PAGESIZE) == 4096) + return NR_RESERVED_PKEYS_4K; + else + return NR_RESERVED_PKEYS_64K; +} + +void expect_fault_on_read_execonly_key(void *p1, int pkey) +{ + /* powerpc does not allow userspace to change permissions of exec-only + * keys since those keys are not allocated by userspace. The signal + * handler wont be able to reset the permissions, which means the code + * will infinitely continue to segfault here. + */ + return; +} + +/* 8-bytes of instruction * 16384bytes = 1 page */ +#define __page_o_noops() asm(".rept 16384 ; nop; .endr") + +#endif /* _PKEYS_POWERPC_H */ diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h index d5fa299..f5d0ff2 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -42,6 +42,7 @@ #endif
#define NR_PKEYS 16 +#define NR_RESERVED_PKEYS 2 /* pkey-0 and exec-only-pkey */ #define PKEY_BITS_PER_PKEY 2 #define HPAGE_SIZE (1UL<<21) #define PAGE_SIZE 4096 @@ -161,4 +162,18 @@ int pkey_reg_xstate_offset(void) return xstate_offset; }
+static inline int arch_reserved_keys(void) +{ + return NR_RESERVED_PKEYS; +} + +void expect_fault_on_read_execonly_key(void *p1, int pkey) +{ + int ptr_contents; + + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + expected_pkey_fault(pkey); +} + #endif /* _PKEYS_X86_H */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 304f74f..18e1bb7 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -197,17 +197,18 @@ void dump_mem(void *dumpme, int len_bytes)
int pkey_faults; int last_si_pkey = -1; +void pkey_access_allow(int pkey); void signal_handler(int signum, siginfo_t *si, void *vucontext) { ucontext_t *uctxt = vucontext; int trapno; unsigned long ip; char *fpregs; +#if defined(__i386__) || defined(__x86_64__) /* arch */ pkey_reg_t *pkey_reg_ptr; - u64 siginfo_pkey; +#endif /* defined(__i386__) || defined(__x86_64__) */ + u32 siginfo_pkey; u32 *si_pkey_ptr; - int pkey_reg_offset; - fpregset_t fpregset;
dprint_in_signal = 1; dprintf1(">>>>===============SIGSEGV============================\n"); @@ -217,12 +218,14 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; - fpregset = uctxt->uc_mcontext.fpregs; - fpregs = (void *)fpregset; + fpregs = (char *) uctxt->uc_mcontext.fpregs;
dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", __func__, trapno, ip, si_code_str(si->si_code), si->si_code); + +#if defined(__i386__) || defined(__x86_64__) /* arch */ + #ifdef __i386__ /* * 32-bit has some extra padding so that userspace can tell whether @@ -230,20 +233,21 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) * state. We just assume that it is here. */ fpregs += 0x70; -#endif - pkey_reg_offset = pkey_reg_xstate_offset(); - pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]); +#endif /* __i386__ */
- dprintf1("siginfo: %p\n", si); - dprintf1(" fpregs: %p\n", fpregs); + pkey_reg_ptr = (void *)(&fpregs[pkey_reg_xstate_offset()]); /* - * If we got a PKEY fault, we *HAVE* to have at least one bit set in + * If we got a key fault, we *HAVE* to have at least one bit set in * here. */ dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset()); if (DEBUG_LEVEL > 4) dump_mem(pkey_reg_ptr - 128, 256); pkey_assert(*pkey_reg_ptr); +#endif /* defined(__i386__) || defined(__x86_64__) */ + + dprintf1("siginfo: %p\n", si); + dprintf1(" fpregs: %p\n", fpregs);
if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || @@ -252,22 +256,28 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) exit(4); }
- si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); + si_pkey_ptr = siginfo_get_pkey_ptr(si); dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); - dump_mem((u8 *)si_pkey_ptr - 8, 24); + dump_mem(si_pkey_ptr - 8, 24); siginfo_pkey = *si_pkey_ptr; pkey_assert(siginfo_pkey < NR_PKEYS); last_si_pkey = siginfo_pkey;
- dprintf1("signal pkey_reg from xsave: "PKEY_REG_FMT"\n", *pkey_reg_ptr); /* * need __read_pkey_reg() version so we do not do shadow_pkey_reg * checking */ dprintf1("signal pkey_reg from pkey_reg: "PKEY_REG_FMT"\n", __read_pkey_reg()); - dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); - *(u64 *)pkey_reg_ptr = 0x00000000; +#if defined(__i386__) || defined(__x86_64__) /* arch */ + dprintf1("signal pkey_reg from xsave: "PKEY_REG_FMT"\n", *pkey_reg_ptr); + *(u64 *)pkey_reg_ptr &= clear_pkey_flags(siginfo_pkey, + PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE); +#elif __powerpc64__ + pkey_access_allow(siginfo_pkey); +#endif + shadow_pkey_reg &= clear_pkey_flags(siginfo_pkey, + PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE); dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction " "to continue\n"); pkey_faults++; @@ -1328,9 +1338,8 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) madvise(p1, PAGE_SIZE, MADV_DONTNEED); lots_o_noops_around_write(&scratch); do_not_expect_pkey_fault("executing on PROT_EXEC memory"); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - expected_pkey_fault(pkey); + + expect_fault_on_read_execonly_key(p1, pkey); }
void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) @@ -1357,9 +1366,8 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) madvise(p1, PAGE_SIZE, MADV_DONTNEED); lots_o_noops_around_write(&scratch); do_not_expect_pkey_fault("executing on PROT_EXEC memory"); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - expected_pkey_fault(UNKNOWN_PKEY); + + expect_fault_on_read_execonly_key(p1, UNKNOWN_PKEY);
/* * Put the memory back to non-PROT_EXEC. Should clear the
On 07/17/2018 06:49 AM, Ram Pai wrote:
Introduce generic abstractions and provide architecture specific implementation for the abstractions.
I really wanted to see these two things separated: 1. introduce abstractions 2. introduce ppc implementation
But, I guess most of it is done except for the siginfo stuff.
#if defined(__i386__) || defined(__x86_64__) /* arch */ #include "pkey-x86.h" +#elif defined(__powerpc64__) /* arch */ +#include "pkey-powerpc.h" #else /* arch */ #error Architecture not supported #endif /* arch */ @@ -186,7 +191,16 @@ static inline int open_hugepage_file(int flag) static inline int get_start_key(void) {
- return 1;
- return 0;
+}
How does this not now break x86?
#endif /* _PKEYS_X86_H */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 304f74f..18e1bb7 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -197,17 +197,18 @@ void dump_mem(void *dumpme, int len_bytes) int pkey_faults; int last_si_pkey = -1; +void pkey_access_allow(int pkey);
Please just move the function.
void signal_handler(int signum, siginfo_t *si, void *vucontext) { ucontext_t *uctxt = vucontext; int trapno; unsigned long ip; char *fpregs; +#if defined(__i386__) || defined(__x86_64__) /* arch */ pkey_reg_t *pkey_reg_ptr;
- u64 siginfo_pkey;
+#endif /* defined(__i386__) || defined(__x86_64__) */
- u32 siginfo_pkey; u32 *si_pkey_ptr;
- int pkey_reg_offset;
- fpregset_t fpregset;
dprint_in_signal = 1; dprintf1(">>>>===============SIGSEGV============================\n"); @@ -217,12 +218,14 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
- fpregset = uctxt->uc_mcontext.fpregs;
- fpregs = (void *)fpregset;
- fpregs = (char *) uctxt->uc_mcontext.fpregs;
dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", __func__, trapno, ip, si_code_str(si->si_code), si->si_code);
+#if defined(__i386__) || defined(__x86_64__) /* arch */
#ifdef __i386__ /* * 32-bit has some extra padding so that userspace can tell whether @@ -230,20 +233,21 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) * state. We just assume that it is here. */ fpregs += 0x70; -#endif
- pkey_reg_offset = pkey_reg_xstate_offset();
- pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]);
+#endif /* __i386__ */
- dprintf1("siginfo: %p\n", si);
- dprintf1(" fpregs: %p\n", fpregs);
- pkey_reg_ptr = (void *)(&fpregs[pkey_reg_xstate_offset()]);
There are unnecessary parenthesis here.
Also, why are you bothering to mess with this? This is inside the x86 #ifdef, right?
/*
* If we got a PKEY fault, we *HAVE* to have at least one bit set in
* If we got a key fault, we *HAVE* to have at least one bit set in
*/ dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset()); if (DEBUG_LEVEL > 4) dump_mem(pkey_reg_ptr - 128, 256); pkey_assert(*pkey_reg_ptr);
- here.
+#endif /* defined(__i386__) || defined(__x86_64__) */
- dprintf1("siginfo: %p\n", si);
- dprintf1(" fpregs: %p\n", fpregs);
if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || @@ -252,22 +256,28 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) exit(4); }
- si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
- si_pkey_ptr = siginfo_get_pkey_ptr(si); dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
- dump_mem((u8 *)si_pkey_ptr - 8, 24);
- dump_mem(si_pkey_ptr - 8, 24);
You removed the cast here, why? That changes the pointer math.
Can we merge this as-is. No, I do not think we can. If it were _just_ the #ifdefs, we could let it pass, but there are a bunch of rough spots, not just the #ifdefs.
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
pkey subsystem is supported if the hardware and kernel has support. We determine that by checking if allocation of a key succeeds or not.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 2 ++ tools/testing/selftests/vm/pkey-powerpc.h | 14 ++++++++++++-- tools/testing/selftests/vm/pkey-x86.h | 8 ++++---- tools/testing/selftests/vm/protection_keys.c | 9 +++++---- 4 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 321bbbd..288ccff 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -76,6 +76,8 @@ static inline void sigsafe_printf(const char *format, ...)
__attribute__((noinline)) int read_ptr(int *ptr); void expected_pkey_fault(int pkey); +int sys_pkey_alloc(unsigned long flags, u64 init_val); +int sys_pkey_free(unsigned long pkey);
#if defined(__i386__) || defined(__x86_64__) /* arch */ #include "pkey-x86.h" diff --git a/tools/testing/selftests/vm/pkey-powerpc.h b/tools/testing/selftests/vm/pkey-powerpc.h index b0d1abe..b649e85 100644 --- a/tools/testing/selftests/vm/pkey-powerpc.h +++ b/tools/testing/selftests/vm/pkey-powerpc.h @@ -64,9 +64,19 @@ static inline void __write_pkey_reg(pkey_reg_t pkey_reg) pkey_reg); }
-static inline int cpu_has_pku(void) +static inline bool is_pkey_supported(void) { - return 1; + /* + * No simple way to determine this. + * Lets try allocating a key and see if it succeeds. + */ + int ret = sys_pkey_alloc(0, 0); + + if (ret > 0) { + sys_pkey_free(ret); + return true; + } + return false; }
static inline int arch_reserved_keys(void) diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h index f5d0ff2..887acf2 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -105,7 +105,7 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, #define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
-static inline int cpu_has_pku(void) +static inline bool is_pkey_supported(void) { unsigned int eax; unsigned int ebx; @@ -118,13 +118,13 @@ static inline int cpu_has_pku(void)
if (!(ecx & X86_FEATURE_PKU)) { dprintf2("cpu does not have PKU\n"); - return 0; + return false; } if (!(ecx & X86_FEATURE_OSPKE)) { dprintf2("cpu does not have OSPKE\n"); - return 0; + return false; } - return 1; + return true; }
#define XSTATE_PKEY_BIT (9) diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 18e1bb7..d27fa5e 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1389,8 +1389,8 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) int size = PAGE_SIZE; int sret;
- if (cpu_has_pku()) { - dprintf1("SKIP: %s: no CPU support\n", __func__); + if (is_pkey_supported()) { + dprintf1("SKIP: %s: no CPU/kernel support\n", __func__); return; }
@@ -1454,12 +1454,13 @@ void run_tests_once(void) int main(void) { int nr_iterations = 22; + int pkey_supported = is_pkey_supported();
setup_handlers();
- printf("has pkey: %d\n", cpu_has_pku()); + printf("has pkey: %s\n", pkey_supported ? "Yes" : "No");
- if (!cpu_has_pku()) { + if (!pkey_supported) { int size = PAGE_SIZE; int *ptr;
On 07/17/2018 06:49 AM, Ram Pai wrote:
-static inline int cpu_has_pku(void) +static inline bool is_pkey_supported(void) {
- return 1;
- /*
* No simple way to determine this.
* Lets try allocating a key and see if it succeeds.
*/
- int ret = sys_pkey_alloc(0, 0);
- if (ret > 0) {
sys_pkey_free(ret);
return true;
- }
- return false;
}
This actually works on x86 too.
static inline int arch_reserved_keys(void) diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h index f5d0ff2..887acf2 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -105,7 +105,7 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, #define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ -static inline int cpu_has_pku(void) +static inline bool is_pkey_supported(void) { unsigned int eax; unsigned int ebx; @@ -118,13 +118,13 @@ static inline int cpu_has_pku(void) if (!(ecx & X86_FEATURE_PKU)) { dprintf2("cpu does not have PKU\n");
return 0;
} if (!(ecx & X86_FEATURE_OSPKE)) { dprintf2("cpu does not have OSPKE\n");return false;
return 0;
}return false;
- return 1;
- return true;
}
#define XSTATE_PKEY_BIT (9) diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 18e1bb7..d27fa5e 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1389,8 +1389,8 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) int size = PAGE_SIZE; int sret;
- if (cpu_has_pku()) {
dprintf1("SKIP: %s: no CPU support\n", __func__);
- if (is_pkey_supported()) {
return; }dprintf1("SKIP: %s: no CPU/kernel support\n", __func__);
I actually wanted a kernel-independent check, based entirely on CPUID. That's specifically why I said "no CPU support".
If you want to do this, please do:
/* powerpc has no enumeration, just assume it has support: */ static inline bool cpu_has_cpu(void) { return true; };
if (cpu_has_pku()) { dprintf1("SKIP: %s: no CPU support\n", __func__); return }
if (kernel_pkey_supported()) { dprintf1("SKIP: %s: no kernel support\n", __func__); return; }
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
The maximum number of keys that can be allocated has to take into consideration, that some keys are reserved by the architecture for specific purpose. Hence cannot be allocated.
Fix the assertion in test_pkey_alloc_exhaust()
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 12 ++++-------- 1 files changed, 4 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index d27fa5e..67d841e 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1171,15 +1171,11 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) pkey_assert(i < NR_PKEYS*2);
/* - * There are 16 pkeys supported in hardware. Three are - * allocated by the time we get here: - * 1. The default key (0) - * 2. One possibly consumed by an execute-only mapping. - * 3. One allocated by the test code and passed in via - * 'pkey' to this function. - * Ensure that we can allocate at least another 13 (16-3). + * There are NR_PKEYS pkeys supported in hardware. arch_reserved_keys() + * are reserved. And one key is allocated by the test code and passed + * in via 'pkey' to this function. */ - pkey_assert(i >= NR_PKEYS-3); + pkey_assert(i >= (NR_PKEYS-arch_reserved_keys()-1));
for (i = 0; i < nr_allocated_pkeys; i++) { err = sys_pkey_free(allocated_pkeys[i]);
On 07/17/2018 06:49 AM, Ram Pai wrote:
The maximum number of keys that can be allocated has to take into consideration, that some keys are reserved by the architecture for specific purpose. Hence cannot be allocated.
Back to incomplete sentences, I see. :)
How about:
Some pkeys which are valid to the hardware are not available for application use. Those can not be allocated.
test_pkey_alloc_exhaust() tries to account for these but ___FILL_IN_WHAT_IT_DID_WRONG____. We fix this by ___FILL_IN_WAY_IT_WAS_FIXED____.
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index d27fa5e..67d841e 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1171,15 +1171,11 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) pkey_assert(i < NR_PKEYS*2); /*
* There are 16 pkeys supported in hardware. Three are
* allocated by the time we get here:
* 1. The default key (0)
* 2. One possibly consumed by an execute-only mapping.
* 3. One allocated by the test code and passed in via
* 'pkey' to this function.
* Ensure that we can allocate at least another 13 (16-3).
* There are NR_PKEYS pkeys supported in hardware. arch_reserved_keys()
* are reserved. And one key is allocated by the test code and passed
*/* in via 'pkey' to this function.
- pkey_assert(i >= NR_PKEYS-3);
- pkey_assert(i >= (NR_PKEYS-arch_reserved_keys()-1));
for (i = 0; i < nr_allocated_pkeys; i++) { err = sys_pkey_free(allocated_pkeys[i]);
You also killed my nice, shiny, new comment. You made an attempt to make up for it two patches ago, but it pales in comparison to mine. The fact that I wrote it only a few short week ago makes me very attached to it, kinda like a new puppy. I don't want to throw it to the wolves quite yet. So, please preserve as much of it as possible, even if it has to live in the x86 header.
For bonus points, axe this comment in the same patch that you create the x86 header comment for easier review. -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
detect access-violation on a page to which access-disabled key is associated much after the page is mapped.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com Acked-by: Dave Hansen dave.hansen@intel.com --- tools/testing/selftests/vm/protection_keys.c | 19 +++++++++++++++++++ 1 files changed, 19 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 67d841e..d41b2dc 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1020,6 +1020,24 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey) dprintf1("*ptr: %d\n", ptr_contents); expected_pkey_fault(pkey); } + +void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, + u16 pkey) +{ + int ptr_contents; + + dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", + pkey, ptr); + ptr_contents = read_ptr(ptr); + dprintf1("reading ptr before disabling the read : %d\n", + ptr_contents); + read_pkey_reg(); + pkey_access_deny(pkey); + ptr_contents = read_ptr(ptr); + dprintf1("*ptr: %d\n", ptr_contents); + expected_pkey_fault(pkey); +} + void test_write_of_write_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); @@ -1397,6 +1415,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) void (*pkey_tests[])(int *ptr, u16 pkey) = { test_read_of_write_disabled_region, test_read_of_access_disabled_region, + test_read_of_access_disabled_region_with_page_already_mapped, test_write_of_write_disabled_region, test_write_of_access_disabled_region, test_kernel_write_of_access_disabled_region,
detect write-violation on a page to which write-disabled key is associated much after the page is mapped.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com Acked-by: Dave Hansen dave.hansen@intel.com --- tools/testing/selftests/vm/protection_keys.c | 12 ++++++++++++ 1 files changed, 12 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index d41b2dc..59f6f33 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1038,6 +1038,17 @@ void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, expected_pkey_fault(pkey); }
+void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, + u16 pkey) +{ + *ptr = __LINE__; + dprintf1("disabling write access; after accessing the page, " + "to PKEY[%02d], doing write\n", pkey); + pkey_write_deny(pkey); + *ptr = __LINE__; + expected_pkey_fault(pkey); +} + void test_write_of_write_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); @@ -1417,6 +1428,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) test_read_of_access_disabled_region, test_read_of_access_disabled_region_with_page_already_mapped, test_write_of_write_disabled_region, + test_write_of_write_disabled_region_with_page_already_mapped, test_write_of_access_disabled_region, test_kernel_write_of_access_disabled_region, test_kernel_write_of_write_disabled_region,
detect write-violation on a page to which access-disabled key is associated much after the page is mapped.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com Acked-by: Dave Hansen dave.hansen@intel.com --- tools/testing/selftests/vm/protection_keys.c | 13 +++++++++++++ 1 files changed, 13 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 59f6f33..8a6afdd 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1063,6 +1063,18 @@ void test_write_of_access_disabled_region(int *ptr, u16 pkey) *ptr = __LINE__; expected_pkey_fault(pkey); } + +void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, + u16 pkey) +{ + *ptr = __LINE__; + dprintf1("disabling access; after accessing the page, " + " to PKEY[%02d], doing write\n", pkey); + pkey_access_deny(pkey); + *ptr = __LINE__; + expected_pkey_fault(pkey); +} + void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) { int ret; @@ -1430,6 +1442,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) test_write_of_write_disabled_region, test_write_of_write_disabled_region_with_page_already_mapped, test_write_of_access_disabled_region, + test_write_of_access_disabled_region_with_page_already_mapped, test_kernel_write_of_access_disabled_region, test_kernel_write_of_write_disabled_region, test_kernel_gup_of_access_disabled_region,
Generally the signal handler restores the state of the pkey register before returning. However there are times when the read/write operation can legitamely fail without invoking the signal handler. Eg: A sys_read() operaton to a write-protected page should be disallowed. In such a case the state of the pkey register is not restored to its original state. Test cases may not remember to restoring the key register state. During cleanup generically restore the key permissions.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 8a6afdd..ea3cf04 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1476,8 +1476,13 @@ void run_tests_once(void) pkey_tests[test_nr](ptr, pkey); dprintf1("freeing test memory: %p\n", ptr); free_pkey_malloc(ptr); + + /* restore the permission on the key after use */ + pkey_access_allow(pkey); + pkey_write_allow(pkey); sys_pkey_free(pkey);
+ dprintf1("pkey_faults: %d\n", pkey_faults); dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults);
On 07/17/2018 06:49 AM, Ram Pai wrote:
Generally the signal handler restores the state of the pkey register before returning. However there are times when the read/write operation can legitamely fail without invoking the signal handler. Eg: A sys_read() operaton to a write-protected page should be disallowed. In such a case the state of the pkey register is not restored to its original state. Test cases may not remember to restoring the key register state. During cleanup generically restore the key permissions.
This would, indeed be a good thing to do for a well-behaved application.
But, for selftests, why does it matter what state we leave the key in? Doesn't the test itself need to establish permissions? Don't we *do* that at pkey_alloc() anyway?
What problem does this solve?
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 8a6afdd..ea3cf04 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1476,8 +1476,13 @@ void run_tests_once(void) pkey_tests[test_nr](ptr, pkey); dprintf1("freeing test memory: %p\n", ptr); free_pkey_malloc(ptr);
/* restore the permission on the key after use */
pkey_access_allow(pkey);
sys_pkey_free(pkey);pkey_write_allow(pkey);
- dprintf1("pkey_faults: %d\n", pkey_faults); dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults);
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
introduce a new allocator that allocates 4k hardware-pages to back 64k linux-page. This allocator is only applicable on powerpc.
cc: Dave Hansen dave.hansen@intel.com cc: Florian Weimer fweimer@redhat.com Signed-off-by: Ram Pai linuxram@us.ibm.com Signed-off-by: Thiago Jung Bauermann bauerman@linux.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 6 ++++++ tools/testing/selftests/vm/pkey-powerpc.h | 25 +++++++++++++++++++++++++ tools/testing/selftests/vm/pkey-x86.h | 5 +++++ tools/testing/selftests/vm/protection_keys.c | 1 + 4 files changed, 37 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 288ccff..a00eee6 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -28,6 +28,9 @@ extern int dprint_in_signal; extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+extern int test_nr; +extern int iteration_nr; + #ifdef __GNUC__ __attribute__((format(printf, 1, 2))) #endif @@ -78,6 +81,9 @@ static inline void sigsafe_printf(const char *format, ...) void expected_pkey_fault(int pkey); int sys_pkey_alloc(unsigned long flags, u64 init_val); int sys_pkey_free(unsigned long pkey); +int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey); +void record_pkey_malloc(void *ptr, long size, int prot);
#if defined(__i386__) || defined(__x86_64__) /* arch */ #include "pkey-x86.h" diff --git a/tools/testing/selftests/vm/pkey-powerpc.h b/tools/testing/selftests/vm/pkey-powerpc.h index b649e85..ab60f74 100644 --- a/tools/testing/selftests/vm/pkey-powerpc.h +++ b/tools/testing/selftests/vm/pkey-powerpc.h @@ -100,4 +100,29 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey) /* 8-bytes of instruction * 16384bytes = 1 page */ #define __page_o_noops() asm(".rept 16384 ; nop; .endr")
+void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +{ + void *ptr; + int ret; + + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + pkey_assert(pkey < NR_PKEYS); + ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + pkey_assert(ptr != (void *)-1); + + ret = syscall(__NR_subpage_prot, ptr, size, NULL); + if (ret) { + perror("subpage_perm"); + return PTR_ERR_ENOTSUP; + } + + ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); + pkey_assert(!ret); + record_pkey_malloc(ptr, size, prot); + + dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); + return ptr; +} + #endif /* _PKEYS_POWERPC_H */ diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h index 887acf2..a7e4648 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -176,4 +176,9 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey) expected_pkey_fault(pkey); }
+void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +{ + return PTR_ERR_ENOTSUP; +} + #endif /* _PKEYS_X86_H */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index ea3cf04..569faf1 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -883,6 +883,7 @@ void setup_hugetlbfs(void) void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
malloc_pkey_with_mprotect, + malloc_pkey_with_mprotect_subpage, malloc_pkey_anon_huge, malloc_pkey_hugetlb /* can not do direct with the pkey_mprotect() API:
Ensure pkey-0 is allocated on start. Ensure pkey-0 can be attached dynamically in various modes, without failures. Ensure pkey-0 can be freed and allocated.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 66 +++++++++++++++++++++++++- 1 files changed, 64 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 569faf1..156b449 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -999,6 +999,67 @@ void close_test_fds(void) return *ptr; }
+void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) +{ + int i, err; + int max_nr_pkey_allocs; + int alloced_pkeys[NR_PKEYS]; + int nr_alloced = 0; + int newpkey; + long size; + + assert(pkey_last_malloc_record); + size = pkey_last_malloc_record->size; + /* + * This is a bit of a hack. But mprotect() requires + * huge-page-aligned sizes when operating on hugetlbfs. + * So, make sure that we use something that's a multiple + * of a huge page when we can. + */ + if (size >= HPAGE_SIZE) + size = HPAGE_SIZE; + + + /* allocate every possible key and make sure key-0 never got allocated */ + max_nr_pkey_allocs = NR_PKEYS; + for (i = 0; i < max_nr_pkey_allocs; i++) { + int new_pkey = alloc_pkey(); + assert(new_pkey != 0); + + if (new_pkey < 0) + break; + alloced_pkeys[nr_alloced++] = new_pkey; + } + /* free all the allocated keys */ + for (i = 0; i < nr_alloced; i++) { + int free_ret; + + if (!alloced_pkeys[i]) + continue; + free_ret = sys_pkey_free(alloced_pkeys[i]); + pkey_assert(!free_ret); + } + + /* attach key-0 in various modes */ + err = sys_mprotect_pkey(ptr, size, PROT_READ, 0); + pkey_assert(!err); + err = sys_mprotect_pkey(ptr, size, PROT_WRITE, 0); + pkey_assert(!err); + err = sys_mprotect_pkey(ptr, size, PROT_EXEC, 0); + pkey_assert(!err); + err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE, 0); + pkey_assert(!err); + err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE|PROT_EXEC, 0); + pkey_assert(!err); + + /* free key-0 */ + err = sys_pkey_free(0); + pkey_assert(!err); + + newpkey = sys_pkey_alloc(0, 0x0); + assert(newpkey == 0); +} + void test_read_of_write_disabled_region(int *ptr, u16 pkey) { int ptr_contents; @@ -1144,10 +1205,10 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) { int err; - int i = get_start_key(); + int i;
/* Note: 0 is the default pkey, so don't mess with it */ - for (; i < NR_PKEYS; i++) { + for (i=1; i < NR_PKEYS; i++) { if (pkey == i) continue;
@@ -1455,6 +1516,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) test_pkey_syscalls_on_non_allocated_pkey, test_pkey_syscalls_bad_args, test_pkey_alloc_exhaust, + test_pkey_alloc_free_attach_pkey0, };
void run_tests_once(void)
On 07/17/2018 06:49 AM, Ram Pai wrote:
Ensure pkey-0 is allocated on start. Ensure pkey-0 can be attached dynamically in various modes, without failures. Ensure pkey-0 can be freed and allocated.
Signed-off-by: Ram Pai linuxram@us.ibm.com
tools/testing/selftests/vm/protection_keys.c | 66 +++++++++++++++++++++++++- 1 files changed, 64 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 569faf1..156b449 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -999,6 +999,67 @@ void close_test_fds(void) return *ptr; } +void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) +{
- int i, err;
- int max_nr_pkey_allocs;
- int alloced_pkeys[NR_PKEYS];
- int nr_alloced = 0;
- int newpkey;
- long size;
- assert(pkey_last_malloc_record);
- size = pkey_last_malloc_record->size;
- /*
* This is a bit of a hack. But mprotect() requires
* huge-page-aligned sizes when operating on hugetlbfs.
* So, make sure that we use something that's a multiple
* of a huge page when we can.
*/
- if (size >= HPAGE_SIZE)
size = HPAGE_SIZE;
- /* allocate every possible key and make sure key-0 never got allocated */
- max_nr_pkey_allocs = NR_PKEYS;
- for (i = 0; i < max_nr_pkey_allocs; i++) {
int new_pkey = alloc_pkey();
assert(new_pkey != 0);
Missed these earlier. This needs to be pkey_assert(). We don't want these tests to ever _actually_ crash.
- /* attach key-0 in various modes */
- err = sys_mprotect_pkey(ptr, size, PROT_READ, 0);
- pkey_assert(!err);
- err = sys_mprotect_pkey(ptr, size, PROT_WRITE, 0);
- pkey_assert(!err);
- err = sys_mprotect_pkey(ptr, size, PROT_EXEC, 0);
- pkey_assert(!err);
- err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE, 0);
- pkey_assert(!err);
- err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE|PROT_EXEC, 0);
- pkey_assert(!err);
This is all fine.
- /* free key-0 */
- err = sys_pkey_free(0);
- pkey_assert(!err);
This part is called out as undefined behavior in the manpage:
An application should not call pkey_free() on any protection key which has been assigned to an address range by pkey_mprotect(2) and which is still in use. The behavior in this case is undefined and may result in an error.
I don't think we should be testing for undefined behavior.
- newpkey = sys_pkey_alloc(0, 0x0);
- assert(newpkey == 0);
+}
void test_read_of_write_disabled_region(int *ptr, u16 pkey) { int ptr_contents; @@ -1144,10 +1205,10 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) { int err;
- int i = get_start_key();
- int i;
/* Note: 0 is the default pkey, so don't mess with it */
- for (; i < NR_PKEYS; i++) {
- for (i=1; i < NR_PKEYS; i++) { if (pkey == i) continue;
This seems to be randomly reverting earlier changes. -- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
linux-kselftest-mirror@lists.linaro.org