 
            Memory protection keys enable applications to protect its address space from inadvertent access from itself.
This feature is now enabled on powerpc architecture. The patches move the selftests to arch neutral directory and enhances them. Verified for correctness on powerpc and on x86 architectures(using EC2 ubuntu VM instances).
Ram Pai (21): selftests/x86: Move protecton key selftest to arch neutral directory selftests/vm: rename all references to pkru to a generic name selftests/vm: move generic definitions to header file selftests/vm: typecast the pkey register selftests/vm: generic function to handle shadow key register selftests/vm: fix the wrong assert in pkey_disable_set() selftests/vm: fixed bugs in pkey_disable_clear() selftests/vm: clear the bits in shadow reg when a pkey is freed. selftests/vm: fix alloc_random_pkey() to make it really random selftests/vm: introduce two arch independent abstraction selftests/vm: pkey register should match shadow pkey selftests/vm: generic cleanup selftests/vm: powerpc implementation for generic abstraction selftests/vm: clear the bits in shadow reg when a pkey is freed. selftests/vm: powerpc implementation to check support for pkey selftests/vm: fix an assertion in test_pkey_alloc_exhaust() selftests/vm: associate key on a mapped page and detect access violation selftests/vm: associate key on a mapped page and detect write violation selftests/vm: detect write violation on a mapped access-denied-key page selftests/vm: testcases must restore pkey-permissions selftests/vm: sub-page allocator
Thiago Jung Bauermann (3): selftests/vm: Fix deadlock in protection_keys.c selftests/powerpc: Add ptrace tests for Protection Key register selftests/powerpc: Add core file test for Protection Key register
tools/testing/selftests/powerpc/include/reg.h | 1 + tools/testing/selftests/powerpc/ptrace/Makefile | 5 +- tools/testing/selftests/powerpc/ptrace/core-pkey.c | 438 ++++++ .../testing/selftests/powerpc/ptrace/ptrace-pkey.c | 443 ++++++ tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/pkey-helpers.h | 419 ++++++ tools/testing/selftests/vm/protection_keys.c | 1471 ++++++++++++++++++++ tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/pkey-helpers.h | 223 --- tools/testing/selftests/x86/protection_keys.c | 1407 ------------------- 10 files changed, 2778 insertions(+), 1632 deletions(-) create mode 100644 tools/testing/selftests/powerpc/ptrace/core-pkey.c create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c create mode 100644 tools/testing/selftests/vm/pkey-helpers.h create mode 100644 tools/testing/selftests/vm/protection_keys.c delete mode 100644 tools/testing/selftests/x86/pkey-helpers.h delete mode 100644 tools/testing/selftests/x86/protection_keys.c
-- To unsubscribe from this list: send the line "unsubscribe linux-kselftest" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
 
            Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/pkey-helpers.h | 223 ++++ tools/testing/selftests/vm/protection_keys.c | 1407 +++++++++++++++++++++++++ tools/testing/selftests/x86/Makefile | 2 +- tools/testing/selftests/x86/pkey-helpers.h | 223 ---- tools/testing/selftests/x86/protection_keys.c | 1407 ------------------------- 6 files changed, 1632 insertions(+), 1631 deletions(-) create mode 100644 tools/testing/selftests/vm/pkey-helpers.h create mode 100644 tools/testing/selftests/vm/protection_keys.c delete mode 100644 tools/testing/selftests/x86/pkey-helpers.h delete mode 100644 tools/testing/selftests/x86/protection_keys.c
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 7f45806..d9c1d42 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -19,6 +19,7 @@ TEST_GEN_FILES += userfaultfd TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += gup_benchmark +TEST_GEN_FILES += protection_keys
TEST_PROGS := run_vmtests
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h new file mode 100644 index 0000000..b3cb767 --- /dev/null +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -0,0 +1,223 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PKEYS_HELPER_H +#define _PKEYS_HELPER_H +#define _GNU_SOURCE +#include <string.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <stdlib.h> +#include <ucontext.h> +#include <sys/mman.h> + +#define NR_PKEYS 16 +#define PKRU_BITS_PER_PKEY 2 + +#ifndef DEBUG_LEVEL +#define DEBUG_LEVEL 0 +#endif +#define DPRINT_IN_SIGNAL_BUF_SIZE 4096 +extern int dprint_in_signal; +extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; +static inline void sigsafe_printf(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + if (!dprint_in_signal) { + vprintf(format, ap); + } else { + int ret; + int len = vsnprintf(dprint_in_signal_buffer, + DPRINT_IN_SIGNAL_BUF_SIZE, + format, ap); + /* + * len is amount that would have been printed, + * but actual write is truncated at BUF_SIZE. + */ + if (len > DPRINT_IN_SIGNAL_BUF_SIZE) + len = DPRINT_IN_SIGNAL_BUF_SIZE; + ret = write(1, dprint_in_signal_buffer, len); + if (ret < 0) + abort(); + } + va_end(ap); +} +#define dprintf_level(level, args...) do { \ + if (level <= DEBUG_LEVEL) \ + sigsafe_printf(args); \ + fflush(NULL); \ +} while (0) +#define dprintf0(args...) dprintf_level(0, args) +#define dprintf1(args...) dprintf_level(1, args) +#define dprintf2(args...) dprintf_level(2, args) +#define dprintf3(args...) dprintf_level(3, args) +#define dprintf4(args...) dprintf_level(4, args) + +extern unsigned int shadow_pkru; +static inline unsigned int __rdpkru(void) +{ + unsigned int eax, edx; + unsigned int ecx = 0; + unsigned int pkru; + + asm volatile(".byte 0x0f,0x01,0xee\n\t" + : "=a" (eax), "=d" (edx) + : "c" (ecx)); + pkru = eax; + return pkru; +} + +static inline unsigned int _rdpkru(int line) +{ + unsigned int pkru = __rdpkru(); + + dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n", + line, pkru, shadow_pkru); + assert(pkru == shadow_pkru); + + return pkru; +} + +#define rdpkru() _rdpkru(__LINE__) + +static inline void __wrpkru(unsigned int pkru) +{ + unsigned int eax = pkru; + unsigned int ecx = 0; + unsigned int edx = 0; + + dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (eax), "c" (ecx), "d" (edx)); + assert(pkru == __rdpkru()); +} + +static inline void wrpkru(unsigned int pkru) +{ + dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); + /* will do the shadow check for us: */ + rdpkru(); + __wrpkru(pkru); + shadow_pkru = pkru; + dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru()); +} + +/* + * These are technically racy. since something could + * change PKRU between the read and the write. + */ +static inline void __pkey_access_allow(int pkey, int do_allow) +{ + unsigned int pkru = rdpkru(); + int bit = pkey * 2; + + if (do_allow) + pkru &= (1<<bit); + else + pkru |= (1<<bit); + + dprintf4("pkru now: %08x\n", rdpkru()); + wrpkru(pkru); +} + +static inline void __pkey_write_allow(int pkey, int do_allow_write) +{ + long pkru = rdpkru(); + int bit = pkey * 2 + 1; + + if (do_allow_write) + pkru &= (1<<bit); + else + pkru |= (1<<bit); + + wrpkru(pkru); + dprintf4("pkru now: %08x\n", rdpkru()); +} + +#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ +#define PROT_PKEY1 0x20 /* protection key value (bit 1) */ +#define PROT_PKEY2 0x40 /* protection key value (bit 2) */ +#define PROT_PKEY3 0x80 /* protection key value (bit 3) */ + +#define PAGE_SIZE 4096 +#define MB (1<<20) + +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile( + "cpuid;" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */ +#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ + +static inline int cpu_has_pku(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + eax = 0x7; + ecx = 0x0; + __cpuid(&eax, &ebx, &ecx, &edx); + + if (!(ecx & X86_FEATURE_PKU)) { + dprintf2("cpu does not have PKU\n"); + return 0; + } + if (!(ecx & X86_FEATURE_OSPKE)) { + dprintf2("cpu does not have OSPKE\n"); + return 0; + } + return 1; +} + +#define XSTATE_PKRU_BIT (9) +#define XSTATE_PKRU 0x200 + +int pkru_xstate_offset(void) +{ + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + int xstate_offset; + int xstate_size; + unsigned long XSTATE_CPUID = 0xd; + int leaf; + + /* assume that XSTATE_PKRU is set in XCR0 */ + leaf = XSTATE_PKRU_BIT; + { + eax = XSTATE_CPUID; + ecx = leaf; + __cpuid(&eax, &ebx, &ecx, &edx); + + if (leaf == XSTATE_PKRU_BIT) { + xstate_offset = ebx; + xstate_size = eax; + } + } + + if (xstate_size == 0) { + printf("could not find size/offset of PKRU in xsave state\n"); + return 0; + } + + return xstate_offset; +} + +#endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c new file mode 100644 index 0000000..bc1b073 --- /dev/null +++ b/tools/testing/selftests/vm/protection_keys.c @@ -0,0 +1,1407 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt) + * + * There are examples in here of: + * * how to set protection keys on memory + * * how to set/clear bits in PKRU (the rights register) + * * how to handle SEGV_PKRU signals and extract pkey-relevant + * information from the siginfo + * + * Things to add: + * make sure KSM and KSM COW breaking works + * prefault pages in at malloc, or not + * protect MPX bounds tables with protection keys? + * make sure VMA splitting/merging is working correctly + * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys + * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel + * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks + * + * Compile like this: + * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + */ +#define _GNU_SOURCE +#include <errno.h> +#include <linux/futex.h> +#include <sys/time.h> +#include <sys/syscall.h> +#include <string.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <stdlib.h> +#include <ucontext.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/ptrace.h> +#include <setjmp.h> + +#include "pkey-helpers.h" + +int iteration_nr = 1; +int test_nr; + +unsigned int shadow_pkru; + +#define HPAGE_SIZE (1UL<<21) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) +#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) +#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) +#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) +#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) + +int dprint_in_signal; +char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; + +extern void abort_hooks(void); +#define pkey_assert(condition) do { \ + if (!(condition)) { \ + dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ + __FILE__, __LINE__, \ + test_nr, iteration_nr); \ + dprintf0("errno at assert: %d", errno); \ + abort_hooks(); \ + assert(condition); \ + } \ +} while (0) +#define raw_assert(cond) assert(cond) + +void cat_into_file(char *str, char *file) +{ + int fd = open(file, O_RDWR); + int ret; + + dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file); + /* + * these need to be raw because they are called under + * pkey_assert() + */ + raw_assert(fd >= 0); + ret = write(fd, str, strlen(str)); + if (ret != strlen(str)) { + perror("write to file failed"); + fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); + raw_assert(0); + } + close(fd); +} + +#if CONTROL_TRACING > 0 +static int warned_tracing; +int tracing_root_ok(void) +{ + if (geteuid() != 0) { + if (!warned_tracing) + fprintf(stderr, "WARNING: not run as root, " + "can not do tracing control\n"); + warned_tracing = 1; + return 0; + } + return 1; +} +#endif + +void tracing_on(void) +{ +#if CONTROL_TRACING > 0 +#define TRACEDIR "/sys/kernel/debug/tracing" + char pidstr[32]; + + if (!tracing_root_ok()) + return; + + sprintf(pidstr, "%d", getpid()); + cat_into_file("0", TRACEDIR "/tracing_on"); + cat_into_file("\n", TRACEDIR "/trace"); + if (1) { + cat_into_file("function_graph", TRACEDIR "/current_tracer"); + cat_into_file("1", TRACEDIR "/options/funcgraph-proc"); + } else { + cat_into_file("nop", TRACEDIR "/current_tracer"); + } + cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid"); + cat_into_file("1", TRACEDIR "/tracing_on"); + dprintf1("enabled tracing\n"); +#endif +} + +void tracing_off(void) +{ +#if CONTROL_TRACING > 0 + if (!tracing_root_ok()) + return; + cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on"); +#endif +} + +void abort_hooks(void) +{ + fprintf(stderr, "running %s()...\n", __func__); + tracing_off(); +#ifdef SLEEP_ON_ABORT + sleep(SLEEP_ON_ABORT); +#endif +} + +static inline void __page_o_noops(void) +{ + /* 8-bytes of instruction * 512 bytes = 1 page */ + asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); +} + +/* + * This attempts to have roughly a page of instructions followed by a few + * instructions that do a write, and another page of instructions. That + * way, we are pretty sure that the write is in the second page of + * instructions and has at least a page of padding behind it. + * + * *That* lets us be sure to madvise() away the write instruction, which + * will then fault, which makes sure that the fault code handles + * execute-only memory properly. + */ +__attribute__((__aligned__(PAGE_SIZE))) +void lots_o_noops_around_write(int *write_to_me) +{ + dprintf3("running %s()\n", __func__); + __page_o_noops(); + /* Assume this happens in the second page of instructions: */ + *write_to_me = __LINE__; + /* pad out by another page: */ + __page_o_noops(); + dprintf3("%s() done\n", __func__); +} + +/* Define some kernel-like types */ +#define u8 uint8_t +#define u16 uint16_t +#define u32 uint32_t +#define u64 uint64_t + +#ifdef __i386__ + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 380 +#endif +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 381 +# define SYS_pkey_free 382 +#endif +#define REG_IP_IDX REG_EIP +#define si_pkey_offset 0x14 + +#else + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 329 +#endif +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 330 +# define SYS_pkey_free 331 +#endif +#define REG_IP_IDX REG_RIP +#define si_pkey_offset 0x20 + +#endif + +void dump_mem(void *dumpme, int len_bytes) +{ + char *c = (void *)dumpme; + int i; + + for (i = 0; i < len_bytes; i += sizeof(u64)) { + u64 *ptr = (u64 *)(c + i); + dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr); + } +} + +#define SEGV_BNDERR 3 /* failed address bound checks */ +#define SEGV_PKUERR 4 + +static char *si_code_str(int si_code) +{ + if (si_code == SEGV_MAPERR) + return "SEGV_MAPERR"; + if (si_code == SEGV_ACCERR) + return "SEGV_ACCERR"; + if (si_code == SEGV_BNDERR) + return "SEGV_BNDERR"; + if (si_code == SEGV_PKUERR) + return "SEGV_PKUERR"; + return "UNKNOWN"; +} + +int pkru_faults; +int last_si_pkey = -1; +void signal_handler(int signum, siginfo_t *si, void *vucontext) +{ + ucontext_t *uctxt = vucontext; + int trapno; + unsigned long ip; + char *fpregs; + u32 *pkru_ptr; + u64 siginfo_pkey; + u32 *si_pkey_ptr; + int pkru_offset; + fpregset_t fpregset; + + dprint_in_signal = 1; + dprintf1(">>>>===============SIGSEGV============================\n"); + dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__, + __rdpkru(), shadow_pkru); + + trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; + ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; + fpregset = uctxt->uc_mcontext.fpregs; + fpregs = (void *)fpregset; + + dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__, + trapno, ip, si_code_str(si->si_code), si->si_code); +#ifdef __i386__ + /* + * 32-bit has some extra padding so that userspace can tell whether + * the XSTATE header is present in addition to the "legacy" FPU + * state. We just assume that it is here. + */ + fpregs += 0x70; +#endif + pkru_offset = pkru_xstate_offset(); + pkru_ptr = (void *)(&fpregs[pkru_offset]); + + dprintf1("siginfo: %p\n", si); + dprintf1(" fpregs: %p\n", fpregs); + /* + * If we got a PKRU fault, we *HAVE* to have at least one bit set in + * here. + */ + dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset()); + if (DEBUG_LEVEL > 4) + dump_mem(pkru_ptr - 128, 256); + pkey_assert(*pkru_ptr); + + si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); + dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); + dump_mem(si_pkey_ptr - 8, 24); + siginfo_pkey = *si_pkey_ptr; + pkey_assert(siginfo_pkey < NR_PKEYS); + last_si_pkey = siginfo_pkey; + + if ((si->si_code == SEGV_MAPERR) || + (si->si_code == SEGV_ACCERR) || + (si->si_code == SEGV_BNDERR)) { + printf("non-PK si_code, exiting...\n"); + exit(4); + } + + dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); + /* need __rdpkru() version so we do not do shadow_pkru checking */ + dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); + dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); + *(u64 *)pkru_ptr = 0x00000000; + dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); + pkru_faults++; + dprintf1("<<<<==================================================\n"); + return; + if (trapno == 14) { + fprintf(stderr, + "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", + trapno, ip); + fprintf(stderr, "si_addr %p\n", si->si_addr); + fprintf(stderr, "REG_ERR: %lx\n", + (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); + exit(1); + } else { + fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); + fprintf(stderr, "si_addr %p\n", si->si_addr); + fprintf(stderr, "REG_ERR: %lx\n", + (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); + exit(2); + } + dprint_in_signal = 0; +} + +int wait_all_children(void) +{ + int status; + return waitpid(-1, &status, 0); +} + +void sig_chld(int x) +{ + dprint_in_signal = 1; + dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); + dprint_in_signal = 0; +} + +void setup_sigsegv_handler(void) +{ + int r, rs; + struct sigaction newact; + struct sigaction oldact; + + /* #PF is mapped to sigsegv */ + int signum = SIGSEGV; + + newact.sa_handler = 0; + newact.sa_sigaction = signal_handler; + + /*sigset_t - signals to block while in the handler */ + /* get the old signal mask. */ + rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); + pkey_assert(rs == 0); + + /* call sa_sigaction, not sa_handler*/ + newact.sa_flags = SA_SIGINFO; + + newact.sa_restorer = 0; /* void(*)(), obsolete */ + r = sigaction(signum, &newact, &oldact); + r = sigaction(SIGALRM, &newact, &oldact); + pkey_assert(r == 0); +} + +void setup_handlers(void) +{ + signal(SIGCHLD, &sig_chld); + setup_sigsegv_handler(); +} + +pid_t fork_lazy_child(void) +{ + pid_t forkret; + + forkret = fork(); + pkey_assert(forkret >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); + + if (!forkret) { + /* in the child */ + while (1) { + dprintf1("child sleeping...\n"); + sleep(30); + } + } + return forkret; +} + +void davecmp(void *_a, void *_b, int len) +{ + int i; + unsigned long *a = _a; + unsigned long *b = _b; + + for (i = 0; i < len / sizeof(*a); i++) { + if (a[i] == b[i]) + continue; + + dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]); + } +} + +void dumpit(char *f) +{ + int fd = open(f, O_RDONLY); + char buf[100]; + int nr_read; + + dprintf2("maps fd: %d\n", fd); + do { + nr_read = read(fd, &buf[0], sizeof(buf)); + write(1, buf, nr_read); + } while (nr_read > 0); + close(fd); +} + +#define PKEY_DISABLE_ACCESS 0x1 +#define PKEY_DISABLE_WRITE 0x2 + +u32 pkey_get(int pkey, unsigned long flags) +{ + u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); + u32 pkru = __rdpkru(); + u32 shifted_pkru; + u32 masked_pkru; + + dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", + __func__, pkey, flags, 0, 0); + dprintf2("%s() raw pkru: %x\n", __func__, pkru); + + shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY)); + dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru); + masked_pkru = shifted_pkru & mask; + dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru); + /* + * shift down the relevant bits to the lowest two, then + * mask off all the other high bits. + */ + return masked_pkru; +} + +int pkey_set(int pkey, unsigned long rights, unsigned long flags) +{ + u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); + u32 old_pkru = __rdpkru(); + u32 new_pkru; + + /* make sure that 'rights' only contains the bits we expect: */ + assert(!(rights & ~mask)); + + /* copy old pkru */ + new_pkru = old_pkru; + /* mask out bits from pkey in old value: */ + new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY)); + /* OR in new bits for pkey: */ + new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY)); + + __wrpkru(new_pkru); + + dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n", + __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru); + return 0; +} + +void pkey_disable_set(int pkey, int flags) +{ + unsigned long syscall_flags = 0; + int ret; + int pkey_rights; + u32 orig_pkru = rdpkru(); + + dprintf1("START->%s(%d, 0x%x)\n", __func__, + pkey, flags); + pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); + + pkey_rights = pkey_get(pkey, syscall_flags); + + dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + pkey_assert(pkey_rights >= 0); + + pkey_rights |= flags; + + ret = pkey_set(pkey, pkey_rights, syscall_flags); + assert(!ret); + /*pkru and flags have the same format */ + shadow_pkru |= flags << (pkey * 2); + dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru); + + pkey_assert(ret >= 0); + + pkey_rights = pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + + dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); + if (flags) + pkey_assert(rdpkru() > orig_pkru); + dprintf1("END<---%s(%d, 0x%x)\n", __func__, + pkey, flags); +} + +void pkey_disable_clear(int pkey, int flags) +{ + unsigned long syscall_flags = 0; + int ret; + int pkey_rights = pkey_get(pkey, syscall_flags); + u32 orig_pkru = rdpkru(); + + pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); + + dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + pkey_assert(pkey_rights >= 0); + + pkey_rights |= flags; + + ret = pkey_set(pkey, pkey_rights, 0); + /* pkru and flags have the same format */ + shadow_pkru &= ~(flags << (pkey * 2)); + pkey_assert(ret >= 0); + + pkey_rights = pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey, pkey, pkey_rights); + + dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); + if (flags) + assert(rdpkru() > orig_pkru); +} + +void pkey_write_allow(int pkey) +{ + pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); +} +void pkey_write_deny(int pkey) +{ + pkey_disable_set(pkey, PKEY_DISABLE_WRITE); +} +void pkey_access_allow(int pkey) +{ + pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); +} +void pkey_access_deny(int pkey) +{ + pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); +} + +int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey) +{ + int sret; + + dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, + ptr, size, orig_prot, pkey); + + errno = 0; + sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); + if (errno) { + dprintf2("SYS_mprotect_key sret: %d\n", sret); + dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); + dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); + if (DEBUG_LEVEL >= 2) + perror("SYS_mprotect_pkey"); + } + return sret; +} + +int sys_pkey_alloc(unsigned long flags, unsigned long init_val) +{ + int ret = syscall(SYS_pkey_alloc, flags, init_val); + dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", + __func__, flags, init_val, ret, errno); + return ret; +} + +int alloc_pkey(void) +{ + int ret; + unsigned long init_val = 0x0; + + dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n", + __LINE__, __rdpkru(), shadow_pkru); + ret = sys_pkey_alloc(0, init_val); + /* + * pkey_alloc() sets PKRU, so we need to reflect it in + * shadow_pkru: + */ + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + if (ret) { + /* clear both the bits: */ + shadow_pkru &= ~(0x3 << (ret * 2)); + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + /* + * move the new state in from init_val + * (remember, we cheated and init_val == pkru format) + */ + shadow_pkru |= (init_val << (ret * 2)); + } + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno); + /* for shadow checking: */ + rdpkru(); + dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", + __LINE__, ret, __rdpkru(), shadow_pkru); + return ret; +} + +int sys_pkey_free(unsigned long pkey) +{ + int ret = syscall(SYS_pkey_free, pkey); + dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); + return ret; +} + +/* + * I had a bug where pkey bits could be set by mprotect() but + * not cleared. This ensures we get lots of random bit sets + * and clears on the vma and pte pkey bits. + */ +int alloc_random_pkey(void) +{ + int max_nr_pkey_allocs; + int ret; + int i; + int alloced_pkeys[NR_PKEYS]; + int nr_alloced = 0; + int random_index; + memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); + + /* allocate every possible key and make a note of which ones we got */ + max_nr_pkey_allocs = NR_PKEYS; + max_nr_pkey_allocs = 1; + for (i = 0; i < max_nr_pkey_allocs; i++) { + int new_pkey = alloc_pkey(); + if (new_pkey < 0) + break; + alloced_pkeys[nr_alloced++] = new_pkey; + } + + pkey_assert(nr_alloced > 0); + /* select a random one out of the allocated ones */ + random_index = rand() % nr_alloced; + ret = alloced_pkeys[random_index]; + /* now zero it out so we don't free it next */ + alloced_pkeys[random_index] = 0; + + /* go through the allocated ones that we did not want and free them */ + for (i = 0; i < nr_alloced; i++) { + int free_ret; + if (!alloced_pkeys[i]) + continue; + free_ret = sys_pkey_free(alloced_pkeys[i]); + pkey_assert(!free_ret); + } + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + return ret; +} + +int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey) +{ + int nr_iterations = random() % 100; + int ret; + + while (0) { + int rpkey = alloc_random_pkey(); + ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); + dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", + ptr, size, orig_prot, pkey, ret); + if (nr_iterations-- < 0) + break; + + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + sys_pkey_free(rpkey); + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + } + pkey_assert(pkey < NR_PKEYS); + + ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); + dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", + ptr, size, orig_prot, pkey, ret); + pkey_assert(!ret); + dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkru(), shadow_pkru); + return ret; +} + +struct pkey_malloc_record { + void *ptr; + long size; +}; +struct pkey_malloc_record *pkey_malloc_records; +long nr_pkey_malloc_records; +void record_pkey_malloc(void *ptr, long size) +{ + long i; + struct pkey_malloc_record *rec = NULL; + + for (i = 0; i < nr_pkey_malloc_records; i++) { + rec = &pkey_malloc_records[i]; + /* find a free record */ + if (rec) + break; + } + if (!rec) { + /* every record is full */ + size_t old_nr_records = nr_pkey_malloc_records; + size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); + size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); + dprintf2("new_nr_records: %zd\n", new_nr_records); + dprintf2("new_size: %zd\n", new_size); + pkey_malloc_records = realloc(pkey_malloc_records, new_size); + pkey_assert(pkey_malloc_records != NULL); + rec = &pkey_malloc_records[nr_pkey_malloc_records]; + /* + * realloc() does not initialize memory, so zero it from + * the first new record all the way to the end. + */ + for (i = 0; i < new_nr_records - old_nr_records; i++) + memset(rec + i, 0, sizeof(*rec)); + } + dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n", + (int)(rec - pkey_malloc_records), rec, ptr, size); + rec->ptr = ptr; + rec->size = size; + nr_pkey_malloc_records++; +} + +void free_pkey_malloc(void *ptr) +{ + long i; + int ret; + dprintf3("%s(%p)\n", __func__, ptr); + for (i = 0; i < nr_pkey_malloc_records; i++) { + struct pkey_malloc_record *rec = &pkey_malloc_records[i]; + dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", + ptr, i, rec, rec->ptr, rec->size); + if ((ptr < rec->ptr) || + (ptr >= rec->ptr + rec->size)) + continue; + + dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n", + ptr, i, rec, rec->ptr, rec->size); + nr_pkey_malloc_records--; + ret = munmap(rec->ptr, rec->size); + dprintf3("munmap ret: %d\n", ret); + pkey_assert(!ret); + dprintf3("clearing rec->ptr, rec: %p\n", rec); + rec->ptr = NULL; + dprintf3("done clearing rec->ptr, rec: %p\n", rec); + return; + } + pkey_assert(false); +} + + +void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) +{ + void *ptr; + int ret; + + rdpkru(); + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + pkey_assert(pkey < NR_PKEYS); + ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + pkey_assert(ptr != (void *)-1); + ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); + pkey_assert(!ret); + record_pkey_malloc(ptr, size); + rdpkru(); + + dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); + return ptr; +} + +void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) +{ + int ret; + void *ptr; + + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + /* + * Guarantee we can fit at least one huge page in the resulting + * allocation by allocating space for 2: + */ + size = ALIGN_UP(size, HPAGE_SIZE * 2); + ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + pkey_assert(ptr != (void *)-1); + record_pkey_malloc(ptr, size); + mprotect_pkey(ptr, size, prot, pkey); + + dprintf1("unaligned ptr: %p\n", ptr); + ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE); + dprintf1(" aligned ptr: %p\n", ptr); + ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE); + dprintf1("MADV_HUGEPAGE ret: %d\n", ret); + ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED); + dprintf1("MADV_WILLNEED ret: %d\n", ret); + memset(ptr, 0, HPAGE_SIZE); + + dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr); + return ptr; +} + +int hugetlb_setup_ok; +#define GET_NR_HUGE_PAGES 10 +void setup_hugetlbfs(void) +{ + int err; + int fd; + char buf[] = "123"; + + if (geteuid() != 0) { + fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); + return; + } + + cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); + + /* + * Now go make sure that we got the pages and that they + * are 2M pages. Someone might have made 1G the default. + */ + fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); + if (fd < 0) { + perror("opening sysfs 2M hugetlb config"); + return; + } + + /* -1 to guarantee leaving the trailing \0 */ + err = read(fd, buf, sizeof(buf)-1); + close(fd); + if (err <= 0) { + perror("reading sysfs 2M hugetlb config"); + return; + } + + if (atoi(buf) != GET_NR_HUGE_PAGES) { + fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n", + buf, GET_NR_HUGE_PAGES); + return; + } + + hugetlb_setup_ok = 1; +} + +void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) +{ + void *ptr; + int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; + + if (!hugetlb_setup_ok) + return PTR_ERR_ENOTSUP; + + dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey); + size = ALIGN_UP(size, HPAGE_SIZE * 2); + pkey_assert(pkey < NR_PKEYS); + ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0); + pkey_assert(ptr != (void *)-1); + mprotect_pkey(ptr, size, prot, pkey); + + record_pkey_malloc(ptr, size); + + dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); + return ptr; +} + +void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) +{ + void *ptr; + int fd; + + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + pkey_assert(pkey < NR_PKEYS); + fd = open("/dax/foo", O_RDWR); + pkey_assert(fd >= 0); + + ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); + pkey_assert(ptr != (void *)-1); + + mprotect_pkey(ptr, size, prot, pkey); + + record_pkey_malloc(ptr, size); + + dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); + close(fd); + return ptr; +} + +void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { + + malloc_pkey_with_mprotect, + malloc_pkey_anon_huge, + malloc_pkey_hugetlb +/* can not do direct with the pkey_mprotect() API: + malloc_pkey_mmap_direct, + malloc_pkey_mmap_dax, +*/ +}; + +void *malloc_pkey(long size, int prot, u16 pkey) +{ + void *ret; + static int malloc_type; + int nr_malloc_types = ARRAY_SIZE(pkey_malloc); + + pkey_assert(pkey < NR_PKEYS); + + while (1) { + pkey_assert(malloc_type < nr_malloc_types); + + ret = pkey_malloc[malloc_type](size, prot, pkey); + pkey_assert(ret != (void *)-1); + + malloc_type++; + if (malloc_type >= nr_malloc_types) + malloc_type = (random()%nr_malloc_types); + + /* try again if the malloc_type we tried is unsupported */ + if (ret == PTR_ERR_ENOTSUP) + continue; + + break; + } + + dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__, + size, prot, pkey, ret); + return ret; +} + +int last_pkru_faults; +void expected_pk_fault(int pkey) +{ + dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", + __func__, last_pkru_faults, pkru_faults); + dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); + pkey_assert(last_pkru_faults + 1 == pkru_faults); + pkey_assert(last_si_pkey == pkey); + /* + * The signal handler shold have cleared out PKRU to let the + * test program continue. We now have to restore it. + */ + if (__rdpkru() != 0) + pkey_assert(0); + + __wrpkru(shadow_pkru); + dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n", + __func__, shadow_pkru); + last_pkru_faults = pkru_faults; + last_si_pkey = -1; +} + +void do_not_expect_pk_fault(void) +{ + pkey_assert(last_pkru_faults == pkru_faults); +} + +int test_fds[10] = { -1 }; +int nr_test_fds; +void __save_test_fd(int fd) +{ + pkey_assert(fd >= 0); + pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); + test_fds[nr_test_fds] = fd; + nr_test_fds++; +} + +int get_test_read_fd(void) +{ + int test_fd = open("/etc/passwd", O_RDONLY); + __save_test_fd(test_fd); + return test_fd; +} + +void close_test_fds(void) +{ + int i; + + for (i = 0; i < nr_test_fds; i++) { + if (test_fds[i] < 0) + continue; + close(test_fds[i]); + test_fds[i] = -1; + } + nr_test_fds = 0; +} + +#define barrier() __asm__ __volatile__("": : :"memory") +__attribute__((noinline)) int read_ptr(int *ptr) +{ + /* + * Keep GCC from optimizing this away somehow + */ + barrier(); + return *ptr; +} + +void test_read_of_write_disabled_region(int *ptr, u16 pkey) +{ + int ptr_contents; + + dprintf1("disabling write access to PKEY[1], doing read\n"); + pkey_write_deny(pkey); + ptr_contents = read_ptr(ptr); + dprintf1("*ptr: %d\n", ptr_contents); + dprintf1("\n"); +} +void test_read_of_access_disabled_region(int *ptr, u16 pkey) +{ + int ptr_contents; + + dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); + rdpkru(); + pkey_access_deny(pkey); + ptr_contents = read_ptr(ptr); + dprintf1("*ptr: %d\n", ptr_contents); + expected_pk_fault(pkey); +} +void test_write_of_write_disabled_region(int *ptr, u16 pkey) +{ + dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); + pkey_write_deny(pkey); + *ptr = __LINE__; + expected_pk_fault(pkey); +} +void test_write_of_access_disabled_region(int *ptr, u16 pkey) +{ + dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); + pkey_access_deny(pkey); + *ptr = __LINE__; + expected_pk_fault(pkey); +} +void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) +{ + int ret; + int test_fd = get_test_read_fd(); + + dprintf1("disabling access to PKEY[%02d], " + "having kernel read() to buffer\n", pkey); + pkey_access_deny(pkey); + ret = read(test_fd, ptr, 1); + dprintf1("read ret: %d\n", ret); + pkey_assert(ret); +} +void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) +{ + int ret; + int test_fd = get_test_read_fd(); + + pkey_write_deny(pkey); + ret = read(test_fd, ptr, 100); + dprintf1("read ret: %d\n", ret); + if (ret < 0 && (DEBUG_LEVEL > 0)) + perror("verbose read result (OK for this to be bad)"); + pkey_assert(ret); +} + +void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) +{ + int pipe_ret, vmsplice_ret; + struct iovec iov; + int pipe_fds[2]; + + pipe_ret = pipe(pipe_fds); + + pkey_assert(pipe_ret == 0); + dprintf1("disabling access to PKEY[%02d], " + "having kernel vmsplice from buffer\n", pkey); + pkey_access_deny(pkey); + iov.iov_base = ptr; + iov.iov_len = PAGE_SIZE; + vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); + dprintf1("vmsplice() ret: %d\n", vmsplice_ret); + pkey_assert(vmsplice_ret == -1); + + close(pipe_fds[0]); + close(pipe_fds[1]); +} + +void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) +{ + int ignored = 0xdada; + int futex_ret; + int some_int = __LINE__; + + dprintf1("disabling write to PKEY[%02d], " + "doing futex gunk in buffer\n", pkey); + *ptr = some_int; + pkey_write_deny(pkey); + futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL, + &ignored, ignored); + if (DEBUG_LEVEL > 0) + perror("futex"); + dprintf1("futex() ret: %d\n", futex_ret); +} + +/* Assumes that all pkeys other than 'pkey' are unallocated */ +void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) +{ + int err; + int i; + + /* Note: 0 is the default pkey, so don't mess with it */ + for (i = 1; i < NR_PKEYS; i++) { + if (pkey == i) + continue; + + dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i); + err = sys_pkey_free(i); + pkey_assert(err); + + err = sys_pkey_free(i); + pkey_assert(err); + + err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i); + pkey_assert(err); + } +} + +/* Assumes that all pkeys other than 'pkey' are unallocated */ +void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) +{ + int err; + int bad_pkey = NR_PKEYS+99; + + /* pass a known-invalid pkey in: */ + err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey); + pkey_assert(err); +} + +/* Assumes that all pkeys other than 'pkey' are unallocated */ +void test_pkey_alloc_exhaust(int *ptr, u16 pkey) +{ + int err; + int allocated_pkeys[NR_PKEYS] = {0}; + int nr_allocated_pkeys = 0; + int i; + + for (i = 0; i < NR_PKEYS*2; i++) { + int new_pkey; + dprintf1("%s() alloc loop: %d\n", __func__, i); + new_pkey = alloc_pkey(); + dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__, + __LINE__, err, __rdpkru(), shadow_pkru); + rdpkru(); /* for shadow checking */ + dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); + if ((new_pkey == -1) && (errno == ENOSPC)) { + dprintf2("%s() failed to allocate pkey after %d tries\n", + __func__, nr_allocated_pkeys); + break; + } + pkey_assert(nr_allocated_pkeys < NR_PKEYS); + allocated_pkeys[nr_allocated_pkeys++] = new_pkey; + } + + dprintf3("%s()::%d\n", __func__, __LINE__); + + /* + * ensure it did not reach the end of the loop without + * failure: + */ + pkey_assert(i < NR_PKEYS*2); + + /* + * There are 16 pkeys supported in hardware. One is taken + * up for the default (0) and another can be taken up by + * an execute-only mapping. Ensure that we can allocate + * at least 14 (16-2). + */ + pkey_assert(i >= NR_PKEYS-2); + + for (i = 0; i < nr_allocated_pkeys; i++) { + err = sys_pkey_free(allocated_pkeys[i]); + pkey_assert(!err); + rdpkru(); /* for shadow checking */ + } +} + +void test_ptrace_of_child(int *ptr, u16 pkey) +{ + __attribute__((__unused__)) int peek_result; + pid_t child_pid; + void *ignored = 0; + long ret; + int status; + /* + * This is the "control" for our little expermient. Make sure + * we can always access it when ptracing. + */ + int *plain_ptr_unaligned = malloc(HPAGE_SIZE); + int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE); + + /* + * Fork a child which is an exact copy of this process, of course. + * That means we can do all of our tests via ptrace() and then plain + * memory access and ensure they work differently. + */ + child_pid = fork_lazy_child(); + dprintf1("[%d] child pid: %d\n", getpid(), child_pid); + + ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); + if (ret) + perror("attach"); + dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__); + pkey_assert(ret != -1); + ret = waitpid(child_pid, &status, WUNTRACED); + if ((ret != child_pid) || !(WIFSTOPPED(status))) { + fprintf(stderr, "weird waitpid result %ld stat %x\n", + ret, status); + pkey_assert(0); + } + dprintf2("waitpid ret: %ld\n", ret); + dprintf2("waitpid status: %d\n", status); + + pkey_access_deny(pkey); + pkey_write_deny(pkey); + + /* Write access, untested for now: + ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); + pkey_assert(ret != -1); + dprintf1("poke at %p: %ld\n", peek_at, ret); + */ + + /* + * Try to access the pkey-protected "ptr" via ptrace: + */ + ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); + /* expect it to work, without an error: */ + pkey_assert(ret != -1); + /* Now access from the current task, and expect an exception: */ + peek_result = read_ptr(ptr); + expected_pk_fault(pkey); + + /* + * Try to access the NON-pkey-protected "plain_ptr" via ptrace: + */ + ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); + /* expect it to work, without an error: */ + pkey_assert(ret != -1); + /* Now access from the current task, and expect NO exception: */ + peek_result = read_ptr(plain_ptr); + do_not_expect_pk_fault(); + + ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); + pkey_assert(ret != -1); + + ret = kill(child_pid, SIGKILL); + pkey_assert(ret != -1); + + wait(&status); + + free(plain_ptr_unaligned); +} + +void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); + dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); + /* lots_o_noops_around_write should be page-aligned already */ + assert(p1 == &lots_o_noops_around_write); + + /* Point 'p1' at the *second* page of the function: */ + p1 += PAGE_SIZE; + + madvise(p1, PAGE_SIZE, MADV_DONTNEED); + lots_o_noops_around_write(&scratch); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + + ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey); + pkey_assert(!ret); + pkey_access_deny(pkey); + + dprintf2("pkru: %x\n", rdpkru()); + + /* + * Make sure this is an *instruction* fault + */ + madvise(p1, PAGE_SIZE, MADV_DONTNEED); + lots_o_noops_around_write(&scratch); + do_not_expect_pk_fault(); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + expected_pk_fault(pkey); +} + +void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) +{ + int size = PAGE_SIZE; + int sret; + + if (cpu_has_pku()) { + dprintf1("SKIP: %s: no CPU support\n", __func__); + return; + } + + sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); + pkey_assert(sret < 0); +} + +void (*pkey_tests[])(int *ptr, u16 pkey) = { + test_read_of_write_disabled_region, + test_read_of_access_disabled_region, + test_write_of_write_disabled_region, + test_write_of_access_disabled_region, + test_kernel_write_of_access_disabled_region, + test_kernel_write_of_write_disabled_region, + test_kernel_gup_of_access_disabled_region, + test_kernel_gup_write_to_write_disabled_region, + test_executing_on_unreadable_memory, + test_ptrace_of_child, + test_pkey_syscalls_on_non_allocated_pkey, + test_pkey_syscalls_bad_args, + test_pkey_alloc_exhaust, +}; + +void run_tests_once(void) +{ + int *ptr; + int prot = PROT_READ|PROT_WRITE; + + for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { + int pkey; + int orig_pkru_faults = pkru_faults; + + dprintf1("======================\n"); + dprintf1("test %d preparing...\n", test_nr); + + tracing_on(); + pkey = alloc_random_pkey(); + dprintf1("test %d starting with pkey: %d\n", test_nr, pkey); + ptr = malloc_pkey(PAGE_SIZE, prot, pkey); + dprintf1("test %d starting...\n", test_nr); + pkey_tests[test_nr](ptr, pkey); + dprintf1("freeing test memory: %p\n", ptr); + free_pkey_malloc(ptr); + sys_pkey_free(pkey); + + dprintf1("pkru_faults: %d\n", pkru_faults); + dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults); + + tracing_off(); + close_test_fds(); + + printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); + dprintf1("======================\n\n"); + } + iteration_nr++; +} + +void pkey_setup_shadow(void) +{ + shadow_pkru = __rdpkru(); +} + +int main(void) +{ + int nr_iterations = 22; + + setup_handlers(); + + printf("has pku: %d\n", cpu_has_pku()); + + if (!cpu_has_pku()) { + int size = PAGE_SIZE; + int *ptr; + + printf("running PKEY tests for unsupported CPU/OS\n"); + + ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + assert(ptr != (void *)-1); + test_mprotect_pkey_on_unsupported_cpu(ptr, 1); + exit(0); + } + + pkey_setup_shadow(); + printf("startup pkru: %x\n", rdpkru()); + setup_hugetlbfs(); + + while (nr_iterations-- > 0) + run_tests_once(); + + printf("done (all tests OK)\n"); + return 0; +} diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 5d4f10a..e1711fb 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -7,7 +7,7 @@ include ../lib.mk
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \ - protection_keys test_vdso test_vsyscall + test_vdso test_vsyscall TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h deleted file mode 100644 index b3cb767..0000000 --- a/tools/testing/selftests/x86/pkey-helpers.h +++ /dev/null @@ -1,223 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PKEYS_HELPER_H -#define _PKEYS_HELPER_H -#define _GNU_SOURCE -#include <string.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdint.h> -#include <stdbool.h> -#include <signal.h> -#include <assert.h> -#include <stdlib.h> -#include <ucontext.h> -#include <sys/mman.h> - -#define NR_PKEYS 16 -#define PKRU_BITS_PER_PKEY 2 - -#ifndef DEBUG_LEVEL -#define DEBUG_LEVEL 0 -#endif -#define DPRINT_IN_SIGNAL_BUF_SIZE 4096 -extern int dprint_in_signal; -extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; -static inline void sigsafe_printf(const char *format, ...) -{ - va_list ap; - - va_start(ap, format); - if (!dprint_in_signal) { - vprintf(format, ap); - } else { - int ret; - int len = vsnprintf(dprint_in_signal_buffer, - DPRINT_IN_SIGNAL_BUF_SIZE, - format, ap); - /* - * len is amount that would have been printed, - * but actual write is truncated at BUF_SIZE. - */ - if (len > DPRINT_IN_SIGNAL_BUF_SIZE) - len = DPRINT_IN_SIGNAL_BUF_SIZE; - ret = write(1, dprint_in_signal_buffer, len); - if (ret < 0) - abort(); - } - va_end(ap); -} -#define dprintf_level(level, args...) do { \ - if (level <= DEBUG_LEVEL) \ - sigsafe_printf(args); \ - fflush(NULL); \ -} while (0) -#define dprintf0(args...) dprintf_level(0, args) -#define dprintf1(args...) dprintf_level(1, args) -#define dprintf2(args...) dprintf_level(2, args) -#define dprintf3(args...) dprintf_level(3, args) -#define dprintf4(args...) dprintf_level(4, args) - -extern unsigned int shadow_pkru; -static inline unsigned int __rdpkru(void) -{ - unsigned int eax, edx; - unsigned int ecx = 0; - unsigned int pkru; - - asm volatile(".byte 0x0f,0x01,0xee\n\t" - : "=a" (eax), "=d" (edx) - : "c" (ecx)); - pkru = eax; - return pkru; -} - -static inline unsigned int _rdpkru(int line) -{ - unsigned int pkru = __rdpkru(); - - dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n", - line, pkru, shadow_pkru); - assert(pkru == shadow_pkru); - - return pkru; -} - -#define rdpkru() _rdpkru(__LINE__) - -static inline void __wrpkru(unsigned int pkru) -{ - unsigned int eax = pkru; - unsigned int ecx = 0; - unsigned int edx = 0; - - dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); - asm volatile(".byte 0x0f,0x01,0xef\n\t" - : : "a" (eax), "c" (ecx), "d" (edx)); - assert(pkru == __rdpkru()); -} - -static inline void wrpkru(unsigned int pkru) -{ - dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); - /* will do the shadow check for us: */ - rdpkru(); - __wrpkru(pkru); - shadow_pkru = pkru; - dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru()); -} - -/* - * These are technically racy. since something could - * change PKRU between the read and the write. - */ -static inline void __pkey_access_allow(int pkey, int do_allow) -{ - unsigned int pkru = rdpkru(); - int bit = pkey * 2; - - if (do_allow) - pkru &= (1<<bit); - else - pkru |= (1<<bit); - - dprintf4("pkru now: %08x\n", rdpkru()); - wrpkru(pkru); -} - -static inline void __pkey_write_allow(int pkey, int do_allow_write) -{ - long pkru = rdpkru(); - int bit = pkey * 2 + 1; - - if (do_allow_write) - pkru &= (1<<bit); - else - pkru |= (1<<bit); - - wrpkru(pkru); - dprintf4("pkru now: %08x\n", rdpkru()); -} - -#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ -#define PROT_PKEY1 0x20 /* protection key value (bit 1) */ -#define PROT_PKEY2 0x40 /* protection key value (bit 2) */ -#define PROT_PKEY3 0x80 /* protection key value (bit 3) */ - -#define PAGE_SIZE 4096 -#define MB (1<<20) - -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile( - "cpuid;" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} - -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */ -#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ - -static inline int cpu_has_pku(void) -{ - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - - eax = 0x7; - ecx = 0x0; - __cpuid(&eax, &ebx, &ecx, &edx); - - if (!(ecx & X86_FEATURE_PKU)) { - dprintf2("cpu does not have PKU\n"); - return 0; - } - if (!(ecx & X86_FEATURE_OSPKE)) { - dprintf2("cpu does not have OSPKE\n"); - return 0; - } - return 1; -} - -#define XSTATE_PKRU_BIT (9) -#define XSTATE_PKRU 0x200 - -int pkru_xstate_offset(void) -{ - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - int xstate_offset; - int xstate_size; - unsigned long XSTATE_CPUID = 0xd; - int leaf; - - /* assume that XSTATE_PKRU is set in XCR0 */ - leaf = XSTATE_PKRU_BIT; - { - eax = XSTATE_CPUID; - ecx = leaf; - __cpuid(&eax, &ebx, &ecx, &edx); - - if (leaf == XSTATE_PKRU_BIT) { - xstate_offset = ebx; - xstate_size = eax; - } - } - - if (xstate_size == 0) { - printf("could not find size/offset of PKRU in xsave state\n"); - return 0; - } - - return xstate_offset; -} - -#endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c deleted file mode 100644 index bc1b073..0000000 --- a/tools/testing/selftests/x86/protection_keys.c +++ /dev/null @@ -1,1407 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt) - * - * There are examples in here of: - * * how to set protection keys on memory - * * how to set/clear bits in PKRU (the rights register) - * * how to handle SEGV_PKRU signals and extract pkey-relevant - * information from the siginfo - * - * Things to add: - * make sure KSM and KSM COW breaking works - * prefault pages in at malloc, or not - * protect MPX bounds tables with protection keys? - * make sure VMA splitting/merging is working correctly - * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys - * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel - * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks - * - * Compile like this: - * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - */ -#define _GNU_SOURCE -#include <errno.h> -#include <linux/futex.h> -#include <sys/time.h> -#include <sys/syscall.h> -#include <string.h> -#include <stdio.h> -#include <stdint.h> -#include <stdbool.h> -#include <signal.h> -#include <assert.h> -#include <stdlib.h> -#include <ucontext.h> -#include <sys/mman.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <sys/ptrace.h> -#include <setjmp.h> - -#include "pkey-helpers.h" - -int iteration_nr = 1; -int test_nr; - -unsigned int shadow_pkru; - -#define HPAGE_SIZE (1UL<<21) -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) -#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) -#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) -#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) -#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) -#define __stringify_1(x...) #x -#define __stringify(x...) __stringify_1(x) - -#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) - -int dprint_in_signal; -char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; - -extern void abort_hooks(void); -#define pkey_assert(condition) do { \ - if (!(condition)) { \ - dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ - __FILE__, __LINE__, \ - test_nr, iteration_nr); \ - dprintf0("errno at assert: %d", errno); \ - abort_hooks(); \ - assert(condition); \ - } \ -} while (0) -#define raw_assert(cond) assert(cond) - -void cat_into_file(char *str, char *file) -{ - int fd = open(file, O_RDWR); - int ret; - - dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file); - /* - * these need to be raw because they are called under - * pkey_assert() - */ - raw_assert(fd >= 0); - ret = write(fd, str, strlen(str)); - if (ret != strlen(str)) { - perror("write to file failed"); - fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); - raw_assert(0); - } - close(fd); -} - -#if CONTROL_TRACING > 0 -static int warned_tracing; -int tracing_root_ok(void) -{ - if (geteuid() != 0) { - if (!warned_tracing) - fprintf(stderr, "WARNING: not run as root, " - "can not do tracing control\n"); - warned_tracing = 1; - return 0; - } - return 1; -} -#endif - -void tracing_on(void) -{ -#if CONTROL_TRACING > 0 -#define TRACEDIR "/sys/kernel/debug/tracing" - char pidstr[32]; - - if (!tracing_root_ok()) - return; - - sprintf(pidstr, "%d", getpid()); - cat_into_file("0", TRACEDIR "/tracing_on"); - cat_into_file("\n", TRACEDIR "/trace"); - if (1) { - cat_into_file("function_graph", TRACEDIR "/current_tracer"); - cat_into_file("1", TRACEDIR "/options/funcgraph-proc"); - } else { - cat_into_file("nop", TRACEDIR "/current_tracer"); - } - cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid"); - cat_into_file("1", TRACEDIR "/tracing_on"); - dprintf1("enabled tracing\n"); -#endif -} - -void tracing_off(void) -{ -#if CONTROL_TRACING > 0 - if (!tracing_root_ok()) - return; - cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on"); -#endif -} - -void abort_hooks(void) -{ - fprintf(stderr, "running %s()...\n", __func__); - tracing_off(); -#ifdef SLEEP_ON_ABORT - sleep(SLEEP_ON_ABORT); -#endif -} - -static inline void __page_o_noops(void) -{ - /* 8-bytes of instruction * 512 bytes = 1 page */ - asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); -} - -/* - * This attempts to have roughly a page of instructions followed by a few - * instructions that do a write, and another page of instructions. That - * way, we are pretty sure that the write is in the second page of - * instructions and has at least a page of padding behind it. - * - * *That* lets us be sure to madvise() away the write instruction, which - * will then fault, which makes sure that the fault code handles - * execute-only memory properly. - */ -__attribute__((__aligned__(PAGE_SIZE))) -void lots_o_noops_around_write(int *write_to_me) -{ - dprintf3("running %s()\n", __func__); - __page_o_noops(); - /* Assume this happens in the second page of instructions: */ - *write_to_me = __LINE__; - /* pad out by another page: */ - __page_o_noops(); - dprintf3("%s() done\n", __func__); -} - -/* Define some kernel-like types */ -#define u8 uint8_t -#define u16 uint16_t -#define u32 uint32_t -#define u64 uint64_t - -#ifdef __i386__ - -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 -#endif -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 -#endif -#define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x14 - -#else - -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 -#endif -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 -#endif -#define REG_IP_IDX REG_RIP -#define si_pkey_offset 0x20 - -#endif - -void dump_mem(void *dumpme, int len_bytes) -{ - char *c = (void *)dumpme; - int i; - - for (i = 0; i < len_bytes; i += sizeof(u64)) { - u64 *ptr = (u64 *)(c + i); - dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr); - } -} - -#define SEGV_BNDERR 3 /* failed address bound checks */ -#define SEGV_PKUERR 4 - -static char *si_code_str(int si_code) -{ - if (si_code == SEGV_MAPERR) - return "SEGV_MAPERR"; - if (si_code == SEGV_ACCERR) - return "SEGV_ACCERR"; - if (si_code == SEGV_BNDERR) - return "SEGV_BNDERR"; - if (si_code == SEGV_PKUERR) - return "SEGV_PKUERR"; - return "UNKNOWN"; -} - -int pkru_faults; -int last_si_pkey = -1; -void signal_handler(int signum, siginfo_t *si, void *vucontext) -{ - ucontext_t *uctxt = vucontext; - int trapno; - unsigned long ip; - char *fpregs; - u32 *pkru_ptr; - u64 siginfo_pkey; - u32 *si_pkey_ptr; - int pkru_offset; - fpregset_t fpregset; - - dprint_in_signal = 1; - dprintf1(">>>>===============SIGSEGV============================\n"); - dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__, - __rdpkru(), shadow_pkru); - - trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; - ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; - fpregset = uctxt->uc_mcontext.fpregs; - fpregs = (void *)fpregset; - - dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__, - trapno, ip, si_code_str(si->si_code), si->si_code); -#ifdef __i386__ - /* - * 32-bit has some extra padding so that userspace can tell whether - * the XSTATE header is present in addition to the "legacy" FPU - * state. We just assume that it is here. - */ - fpregs += 0x70; -#endif - pkru_offset = pkru_xstate_offset(); - pkru_ptr = (void *)(&fpregs[pkru_offset]); - - dprintf1("siginfo: %p\n", si); - dprintf1(" fpregs: %p\n", fpregs); - /* - * If we got a PKRU fault, we *HAVE* to have at least one bit set in - * here. - */ - dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset()); - if (DEBUG_LEVEL > 4) - dump_mem(pkru_ptr - 128, 256); - pkey_assert(*pkru_ptr); - - si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); - dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); - dump_mem(si_pkey_ptr - 8, 24); - siginfo_pkey = *si_pkey_ptr; - pkey_assert(siginfo_pkey < NR_PKEYS); - last_si_pkey = siginfo_pkey; - - if ((si->si_code == SEGV_MAPERR) || - (si->si_code == SEGV_ACCERR) || - (si->si_code == SEGV_BNDERR)) { - printf("non-PK si_code, exiting...\n"); - exit(4); - } - - dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); - /* need __rdpkru() version so we do not do shadow_pkru checking */ - dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); - dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); - *(u64 *)pkru_ptr = 0x00000000; - dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); - pkru_faults++; - dprintf1("<<<<==================================================\n"); - return; - if (trapno == 14) { - fprintf(stderr, - "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", - trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(1); - } else { - fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(2); - } - dprint_in_signal = 0; -} - -int wait_all_children(void) -{ - int status; - return waitpid(-1, &status, 0); -} - -void sig_chld(int x) -{ - dprint_in_signal = 1; - dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); - dprint_in_signal = 0; -} - -void setup_sigsegv_handler(void) -{ - int r, rs; - struct sigaction newact; - struct sigaction oldact; - - /* #PF is mapped to sigsegv */ - int signum = SIGSEGV; - - newact.sa_handler = 0; - newact.sa_sigaction = signal_handler; - - /*sigset_t - signals to block while in the handler */ - /* get the old signal mask. */ - rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); - pkey_assert(rs == 0); - - /* call sa_sigaction, not sa_handler*/ - newact.sa_flags = SA_SIGINFO; - - newact.sa_restorer = 0; /* void(*)(), obsolete */ - r = sigaction(signum, &newact, &oldact); - r = sigaction(SIGALRM, &newact, &oldact); - pkey_assert(r == 0); -} - -void setup_handlers(void) -{ - signal(SIGCHLD, &sig_chld); - setup_sigsegv_handler(); -} - -pid_t fork_lazy_child(void) -{ - pid_t forkret; - - forkret = fork(); - pkey_assert(forkret >= 0); - dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); - - if (!forkret) { - /* in the child */ - while (1) { - dprintf1("child sleeping...\n"); - sleep(30); - } - } - return forkret; -} - -void davecmp(void *_a, void *_b, int len) -{ - int i; - unsigned long *a = _a; - unsigned long *b = _b; - - for (i = 0; i < len / sizeof(*a); i++) { - if (a[i] == b[i]) - continue; - - dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]); - } -} - -void dumpit(char *f) -{ - int fd = open(f, O_RDONLY); - char buf[100]; - int nr_read; - - dprintf2("maps fd: %d\n", fd); - do { - nr_read = read(fd, &buf[0], sizeof(buf)); - write(1, buf, nr_read); - } while (nr_read > 0); - close(fd); -} - -#define PKEY_DISABLE_ACCESS 0x1 -#define PKEY_DISABLE_WRITE 0x2 - -u32 pkey_get(int pkey, unsigned long flags) -{ - u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 pkru = __rdpkru(); - u32 shifted_pkru; - u32 masked_pkru; - - dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", - __func__, pkey, flags, 0, 0); - dprintf2("%s() raw pkru: %x\n", __func__, pkru); - - shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY)); - dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru); - masked_pkru = shifted_pkru & mask; - dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru); - /* - * shift down the relevant bits to the lowest two, then - * mask off all the other high bits. - */ - return masked_pkru; -} - -int pkey_set(int pkey, unsigned long rights, unsigned long flags) -{ - u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 old_pkru = __rdpkru(); - u32 new_pkru; - - /* make sure that 'rights' only contains the bits we expect: */ - assert(!(rights & ~mask)); - - /* copy old pkru */ - new_pkru = old_pkru; - /* mask out bits from pkey in old value: */ - new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY)); - /* OR in new bits for pkey: */ - new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY)); - - __wrpkru(new_pkru); - - dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n", - __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru); - return 0; -} - -void pkey_disable_set(int pkey, int flags) -{ - unsigned long syscall_flags = 0; - int ret; - int pkey_rights; - u32 orig_pkru = rdpkru(); - - dprintf1("START->%s(%d, 0x%x)\n", __func__, - pkey, flags); - pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - - pkey_rights = pkey_get(pkey, syscall_flags); - - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - pkey_assert(pkey_rights >= 0); - - pkey_rights |= flags; - - ret = pkey_set(pkey, pkey_rights, syscall_flags); - assert(!ret); - /*pkru and flags have the same format */ - shadow_pkru |= flags << (pkey * 2); - dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru); - - pkey_assert(ret >= 0); - - pkey_rights = pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - - dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); - if (flags) - pkey_assert(rdpkru() > orig_pkru); - dprintf1("END<---%s(%d, 0x%x)\n", __func__, - pkey, flags); -} - -void pkey_disable_clear(int pkey, int flags) -{ - unsigned long syscall_flags = 0; - int ret; - int pkey_rights = pkey_get(pkey, syscall_flags); - u32 orig_pkru = rdpkru(); - - pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - pkey_assert(pkey_rights >= 0); - - pkey_rights |= flags; - - ret = pkey_set(pkey, pkey_rights, 0); - /* pkru and flags have the same format */ - shadow_pkru &= ~(flags << (pkey * 2)); - pkey_assert(ret >= 0); - - pkey_rights = pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - - dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); - if (flags) - assert(rdpkru() > orig_pkru); -} - -void pkey_write_allow(int pkey) -{ - pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); -} -void pkey_write_deny(int pkey) -{ - pkey_disable_set(pkey, PKEY_DISABLE_WRITE); -} -void pkey_access_allow(int pkey) -{ - pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); -} -void pkey_access_deny(int pkey) -{ - pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); -} - -int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, - unsigned long pkey) -{ - int sret; - - dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, - ptr, size, orig_prot, pkey); - - errno = 0; - sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); - if (errno) { - dprintf2("SYS_mprotect_key sret: %d\n", sret); - dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); - dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); - if (DEBUG_LEVEL >= 2) - perror("SYS_mprotect_pkey"); - } - return sret; -} - -int sys_pkey_alloc(unsigned long flags, unsigned long init_val) -{ - int ret = syscall(SYS_pkey_alloc, flags, init_val); - dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", - __func__, flags, init_val, ret, errno); - return ret; -} - -int alloc_pkey(void) -{ - int ret; - unsigned long init_val = 0x0; - - dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n", - __LINE__, __rdpkru(), shadow_pkru); - ret = sys_pkey_alloc(0, init_val); - /* - * pkey_alloc() sets PKRU, so we need to reflect it in - * shadow_pkru: - */ - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - if (ret) { - /* clear both the bits: */ - shadow_pkru &= ~(0x3 << (ret * 2)); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - /* - * move the new state in from init_val - * (remember, we cheated and init_val == pkru format) - */ - shadow_pkru |= (init_val << (ret * 2)); - } - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno); - /* for shadow checking: */ - rdpkru(); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - return ret; -} - -int sys_pkey_free(unsigned long pkey) -{ - int ret = syscall(SYS_pkey_free, pkey); - dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); - return ret; -} - -/* - * I had a bug where pkey bits could be set by mprotect() but - * not cleared. This ensures we get lots of random bit sets - * and clears on the vma and pte pkey bits. - */ -int alloc_random_pkey(void) -{ - int max_nr_pkey_allocs; - int ret; - int i; - int alloced_pkeys[NR_PKEYS]; - int nr_alloced = 0; - int random_index; - memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); - - /* allocate every possible key and make a note of which ones we got */ - max_nr_pkey_allocs = NR_PKEYS; - max_nr_pkey_allocs = 1; - for (i = 0; i < max_nr_pkey_allocs; i++) { - int new_pkey = alloc_pkey(); - if (new_pkey < 0) - break; - alloced_pkeys[nr_alloced++] = new_pkey; - } - - pkey_assert(nr_alloced > 0); - /* select a random one out of the allocated ones */ - random_index = rand() % nr_alloced; - ret = alloced_pkeys[random_index]; - /* now zero it out so we don't free it next */ - alloced_pkeys[random_index] = 0; - - /* go through the allocated ones that we did not want and free them */ - for (i = 0; i < nr_alloced; i++) { - int free_ret; - if (!alloced_pkeys[i]) - continue; - free_ret = sys_pkey_free(alloced_pkeys[i]); - pkey_assert(!free_ret); - } - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - return ret; -} - -int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, - unsigned long pkey) -{ - int nr_iterations = random() % 100; - int ret; - - while (0) { - int rpkey = alloc_random_pkey(); - ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); - dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", - ptr, size, orig_prot, pkey, ret); - if (nr_iterations-- < 0) - break; - - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - sys_pkey_free(rpkey); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - } - pkey_assert(pkey < NR_PKEYS); - - ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); - dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", - ptr, size, orig_prot, pkey, ret); - pkey_assert(!ret); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - return ret; -} - -struct pkey_malloc_record { - void *ptr; - long size; -}; -struct pkey_malloc_record *pkey_malloc_records; -long nr_pkey_malloc_records; -void record_pkey_malloc(void *ptr, long size) -{ - long i; - struct pkey_malloc_record *rec = NULL; - - for (i = 0; i < nr_pkey_malloc_records; i++) { - rec = &pkey_malloc_records[i]; - /* find a free record */ - if (rec) - break; - } - if (!rec) { - /* every record is full */ - size_t old_nr_records = nr_pkey_malloc_records; - size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); - size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); - dprintf2("new_nr_records: %zd\n", new_nr_records); - dprintf2("new_size: %zd\n", new_size); - pkey_malloc_records = realloc(pkey_malloc_records, new_size); - pkey_assert(pkey_malloc_records != NULL); - rec = &pkey_malloc_records[nr_pkey_malloc_records]; - /* - * realloc() does not initialize memory, so zero it from - * the first new record all the way to the end. - */ - for (i = 0; i < new_nr_records - old_nr_records; i++) - memset(rec + i, 0, sizeof(*rec)); - } - dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n", - (int)(rec - pkey_malloc_records), rec, ptr, size); - rec->ptr = ptr; - rec->size = size; - nr_pkey_malloc_records++; -} - -void free_pkey_malloc(void *ptr) -{ - long i; - int ret; - dprintf3("%s(%p)\n", __func__, ptr); - for (i = 0; i < nr_pkey_malloc_records; i++) { - struct pkey_malloc_record *rec = &pkey_malloc_records[i]; - dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", - ptr, i, rec, rec->ptr, rec->size); - if ((ptr < rec->ptr) || - (ptr >= rec->ptr + rec->size)) - continue; - - dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n", - ptr, i, rec, rec->ptr, rec->size); - nr_pkey_malloc_records--; - ret = munmap(rec->ptr, rec->size); - dprintf3("munmap ret: %d\n", ret); - pkey_assert(!ret); - dprintf3("clearing rec->ptr, rec: %p\n", rec); - rec->ptr = NULL; - dprintf3("done clearing rec->ptr, rec: %p\n", rec); - return; - } - pkey_assert(false); -} - - -void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) -{ - void *ptr; - int ret; - - rdpkru(); - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - pkey_assert(pkey < NR_PKEYS); - ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - pkey_assert(ptr != (void *)-1); - ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); - pkey_assert(!ret); - record_pkey_malloc(ptr, size); - rdpkru(); - - dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); - return ptr; -} - -void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) -{ - int ret; - void *ptr; - - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - /* - * Guarantee we can fit at least one huge page in the resulting - * allocation by allocating space for 2: - */ - size = ALIGN_UP(size, HPAGE_SIZE * 2); - ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - pkey_assert(ptr != (void *)-1); - record_pkey_malloc(ptr, size); - mprotect_pkey(ptr, size, prot, pkey); - - dprintf1("unaligned ptr: %p\n", ptr); - ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE); - dprintf1(" aligned ptr: %p\n", ptr); - ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE); - dprintf1("MADV_HUGEPAGE ret: %d\n", ret); - ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED); - dprintf1("MADV_WILLNEED ret: %d\n", ret); - memset(ptr, 0, HPAGE_SIZE); - - dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr); - return ptr; -} - -int hugetlb_setup_ok; -#define GET_NR_HUGE_PAGES 10 -void setup_hugetlbfs(void) -{ - int err; - int fd; - char buf[] = "123"; - - if (geteuid() != 0) { - fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); - return; - } - - cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); - - /* - * Now go make sure that we got the pages and that they - * are 2M pages. Someone might have made 1G the default. - */ - fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); - if (fd < 0) { - perror("opening sysfs 2M hugetlb config"); - return; - } - - /* -1 to guarantee leaving the trailing \0 */ - err = read(fd, buf, sizeof(buf)-1); - close(fd); - if (err <= 0) { - perror("reading sysfs 2M hugetlb config"); - return; - } - - if (atoi(buf) != GET_NR_HUGE_PAGES) { - fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n", - buf, GET_NR_HUGE_PAGES); - return; - } - - hugetlb_setup_ok = 1; -} - -void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) -{ - void *ptr; - int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; - - if (!hugetlb_setup_ok) - return PTR_ERR_ENOTSUP; - - dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey); - size = ALIGN_UP(size, HPAGE_SIZE * 2); - pkey_assert(pkey < NR_PKEYS); - ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0); - pkey_assert(ptr != (void *)-1); - mprotect_pkey(ptr, size, prot, pkey); - - record_pkey_malloc(ptr, size); - - dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); - return ptr; -} - -void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) -{ - void *ptr; - int fd; - - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - pkey_assert(pkey < NR_PKEYS); - fd = open("/dax/foo", O_RDWR); - pkey_assert(fd >= 0); - - ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); - pkey_assert(ptr != (void *)-1); - - mprotect_pkey(ptr, size, prot, pkey); - - record_pkey_malloc(ptr, size); - - dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); - close(fd); - return ptr; -} - -void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { - - malloc_pkey_with_mprotect, - malloc_pkey_anon_huge, - malloc_pkey_hugetlb -/* can not do direct with the pkey_mprotect() API: - malloc_pkey_mmap_direct, - malloc_pkey_mmap_dax, -*/ -}; - -void *malloc_pkey(long size, int prot, u16 pkey) -{ - void *ret; - static int malloc_type; - int nr_malloc_types = ARRAY_SIZE(pkey_malloc); - - pkey_assert(pkey < NR_PKEYS); - - while (1) { - pkey_assert(malloc_type < nr_malloc_types); - - ret = pkey_malloc[malloc_type](size, prot, pkey); - pkey_assert(ret != (void *)-1); - - malloc_type++; - if (malloc_type >= nr_malloc_types) - malloc_type = (random()%nr_malloc_types); - - /* try again if the malloc_type we tried is unsupported */ - if (ret == PTR_ERR_ENOTSUP) - continue; - - break; - } - - dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__, - size, prot, pkey, ret); - return ret; -} - -int last_pkru_faults; -void expected_pk_fault(int pkey) -{ - dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", - __func__, last_pkru_faults, pkru_faults); - dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); - pkey_assert(last_pkru_faults + 1 == pkru_faults); - pkey_assert(last_si_pkey == pkey); - /* - * The signal handler shold have cleared out PKRU to let the - * test program continue. We now have to restore it. - */ - if (__rdpkru() != 0) - pkey_assert(0); - - __wrpkru(shadow_pkru); - dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n", - __func__, shadow_pkru); - last_pkru_faults = pkru_faults; - last_si_pkey = -1; -} - -void do_not_expect_pk_fault(void) -{ - pkey_assert(last_pkru_faults == pkru_faults); -} - -int test_fds[10] = { -1 }; -int nr_test_fds; -void __save_test_fd(int fd) -{ - pkey_assert(fd >= 0); - pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); - test_fds[nr_test_fds] = fd; - nr_test_fds++; -} - -int get_test_read_fd(void) -{ - int test_fd = open("/etc/passwd", O_RDONLY); - __save_test_fd(test_fd); - return test_fd; -} - -void close_test_fds(void) -{ - int i; - - for (i = 0; i < nr_test_fds; i++) { - if (test_fds[i] < 0) - continue; - close(test_fds[i]); - test_fds[i] = -1; - } - nr_test_fds = 0; -} - -#define barrier() __asm__ __volatile__("": : :"memory") -__attribute__((noinline)) int read_ptr(int *ptr) -{ - /* - * Keep GCC from optimizing this away somehow - */ - barrier(); - return *ptr; -} - -void test_read_of_write_disabled_region(int *ptr, u16 pkey) -{ - int ptr_contents; - - dprintf1("disabling write access to PKEY[1], doing read\n"); - pkey_write_deny(pkey); - ptr_contents = read_ptr(ptr); - dprintf1("*ptr: %d\n", ptr_contents); - dprintf1("\n"); -} -void test_read_of_access_disabled_region(int *ptr, u16 pkey) -{ - int ptr_contents; - - dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); - rdpkru(); - pkey_access_deny(pkey); - ptr_contents = read_ptr(ptr); - dprintf1("*ptr: %d\n", ptr_contents); - expected_pk_fault(pkey); -} -void test_write_of_write_disabled_region(int *ptr, u16 pkey) -{ - dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); - pkey_write_deny(pkey); - *ptr = __LINE__; - expected_pk_fault(pkey); -} -void test_write_of_access_disabled_region(int *ptr, u16 pkey) -{ - dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); - pkey_access_deny(pkey); - *ptr = __LINE__; - expected_pk_fault(pkey); -} -void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) -{ - int ret; - int test_fd = get_test_read_fd(); - - dprintf1("disabling access to PKEY[%02d], " - "having kernel read() to buffer\n", pkey); - pkey_access_deny(pkey); - ret = read(test_fd, ptr, 1); - dprintf1("read ret: %d\n", ret); - pkey_assert(ret); -} -void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) -{ - int ret; - int test_fd = get_test_read_fd(); - - pkey_write_deny(pkey); - ret = read(test_fd, ptr, 100); - dprintf1("read ret: %d\n", ret); - if (ret < 0 && (DEBUG_LEVEL > 0)) - perror("verbose read result (OK for this to be bad)"); - pkey_assert(ret); -} - -void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) -{ - int pipe_ret, vmsplice_ret; - struct iovec iov; - int pipe_fds[2]; - - pipe_ret = pipe(pipe_fds); - - pkey_assert(pipe_ret == 0); - dprintf1("disabling access to PKEY[%02d], " - "having kernel vmsplice from buffer\n", pkey); - pkey_access_deny(pkey); - iov.iov_base = ptr; - iov.iov_len = PAGE_SIZE; - vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); - dprintf1("vmsplice() ret: %d\n", vmsplice_ret); - pkey_assert(vmsplice_ret == -1); - - close(pipe_fds[0]); - close(pipe_fds[1]); -} - -void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) -{ - int ignored = 0xdada; - int futex_ret; - int some_int = __LINE__; - - dprintf1("disabling write to PKEY[%02d], " - "doing futex gunk in buffer\n", pkey); - *ptr = some_int; - pkey_write_deny(pkey); - futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL, - &ignored, ignored); - if (DEBUG_LEVEL > 0) - perror("futex"); - dprintf1("futex() ret: %d\n", futex_ret); -} - -/* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) -{ - int err; - int i; - - /* Note: 0 is the default pkey, so don't mess with it */ - for (i = 1; i < NR_PKEYS; i++) { - if (pkey == i) - continue; - - dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i); - err = sys_pkey_free(i); - pkey_assert(err); - - err = sys_pkey_free(i); - pkey_assert(err); - - err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i); - pkey_assert(err); - } -} - -/* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) -{ - int err; - int bad_pkey = NR_PKEYS+99; - - /* pass a known-invalid pkey in: */ - err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey); - pkey_assert(err); -} - -/* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_alloc_exhaust(int *ptr, u16 pkey) -{ - int err; - int allocated_pkeys[NR_PKEYS] = {0}; - int nr_allocated_pkeys = 0; - int i; - - for (i = 0; i < NR_PKEYS*2; i++) { - int new_pkey; - dprintf1("%s() alloc loop: %d\n", __func__, i); - new_pkey = alloc_pkey(); - dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, err, __rdpkru(), shadow_pkru); - rdpkru(); /* for shadow checking */ - dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); - if ((new_pkey == -1) && (errno == ENOSPC)) { - dprintf2("%s() failed to allocate pkey after %d tries\n", - __func__, nr_allocated_pkeys); - break; - } - pkey_assert(nr_allocated_pkeys < NR_PKEYS); - allocated_pkeys[nr_allocated_pkeys++] = new_pkey; - } - - dprintf3("%s()::%d\n", __func__, __LINE__); - - /* - * ensure it did not reach the end of the loop without - * failure: - */ - pkey_assert(i < NR_PKEYS*2); - - /* - * There are 16 pkeys supported in hardware. One is taken - * up for the default (0) and another can be taken up by - * an execute-only mapping. Ensure that we can allocate - * at least 14 (16-2). - */ - pkey_assert(i >= NR_PKEYS-2); - - for (i = 0; i < nr_allocated_pkeys; i++) { - err = sys_pkey_free(allocated_pkeys[i]); - pkey_assert(!err); - rdpkru(); /* for shadow checking */ - } -} - -void test_ptrace_of_child(int *ptr, u16 pkey) -{ - __attribute__((__unused__)) int peek_result; - pid_t child_pid; - void *ignored = 0; - long ret; - int status; - /* - * This is the "control" for our little expermient. Make sure - * we can always access it when ptracing. - */ - int *plain_ptr_unaligned = malloc(HPAGE_SIZE); - int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE); - - /* - * Fork a child which is an exact copy of this process, of course. - * That means we can do all of our tests via ptrace() and then plain - * memory access and ensure they work differently. - */ - child_pid = fork_lazy_child(); - dprintf1("[%d] child pid: %d\n", getpid(), child_pid); - - ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); - if (ret) - perror("attach"); - dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__); - pkey_assert(ret != -1); - ret = waitpid(child_pid, &status, WUNTRACED); - if ((ret != child_pid) || !(WIFSTOPPED(status))) { - fprintf(stderr, "weird waitpid result %ld stat %x\n", - ret, status); - pkey_assert(0); - } - dprintf2("waitpid ret: %ld\n", ret); - dprintf2("waitpid status: %d\n", status); - - pkey_access_deny(pkey); - pkey_write_deny(pkey); - - /* Write access, untested for now: - ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); - pkey_assert(ret != -1); - dprintf1("poke at %p: %ld\n", peek_at, ret); - */ - - /* - * Try to access the pkey-protected "ptr" via ptrace: - */ - ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); - /* expect it to work, without an error: */ - pkey_assert(ret != -1); - /* Now access from the current task, and expect an exception: */ - peek_result = read_ptr(ptr); - expected_pk_fault(pkey); - - /* - * Try to access the NON-pkey-protected "plain_ptr" via ptrace: - */ - ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); - /* expect it to work, without an error: */ - pkey_assert(ret != -1); - /* Now access from the current task, and expect NO exception: */ - peek_result = read_ptr(plain_ptr); - do_not_expect_pk_fault(); - - ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); - pkey_assert(ret != -1); - - ret = kill(child_pid, SIGKILL); - pkey_assert(ret != -1); - - wait(&status); - - free(plain_ptr_unaligned); -} - -void test_executing_on_unreadable_memory(int *ptr, u16 pkey) -{ - void *p1; - int scratch; - int ptr_contents; - int ret; - - p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); - dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); - /* lots_o_noops_around_write should be page-aligned already */ - assert(p1 == &lots_o_noops_around_write); - - /* Point 'p1' at the *second* page of the function: */ - p1 += PAGE_SIZE; - - madvise(p1, PAGE_SIZE, MADV_DONTNEED); - lots_o_noops_around_write(&scratch); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - - ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey); - pkey_assert(!ret); - pkey_access_deny(pkey); - - dprintf2("pkru: %x\n", rdpkru()); - - /* - * Make sure this is an *instruction* fault - */ - madvise(p1, PAGE_SIZE, MADV_DONTNEED); - lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault(); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - expected_pk_fault(pkey); -} - -void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) -{ - int size = PAGE_SIZE; - int sret; - - if (cpu_has_pku()) { - dprintf1("SKIP: %s: no CPU support\n", __func__); - return; - } - - sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); - pkey_assert(sret < 0); -} - -void (*pkey_tests[])(int *ptr, u16 pkey) = { - test_read_of_write_disabled_region, - test_read_of_access_disabled_region, - test_write_of_write_disabled_region, - test_write_of_access_disabled_region, - test_kernel_write_of_access_disabled_region, - test_kernel_write_of_write_disabled_region, - test_kernel_gup_of_access_disabled_region, - test_kernel_gup_write_to_write_disabled_region, - test_executing_on_unreadable_memory, - test_ptrace_of_child, - test_pkey_syscalls_on_non_allocated_pkey, - test_pkey_syscalls_bad_args, - test_pkey_alloc_exhaust, -}; - -void run_tests_once(void) -{ - int *ptr; - int prot = PROT_READ|PROT_WRITE; - - for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { - int pkey; - int orig_pkru_faults = pkru_faults; - - dprintf1("======================\n"); - dprintf1("test %d preparing...\n", test_nr); - - tracing_on(); - pkey = alloc_random_pkey(); - dprintf1("test %d starting with pkey: %d\n", test_nr, pkey); - ptr = malloc_pkey(PAGE_SIZE, prot, pkey); - dprintf1("test %d starting...\n", test_nr); - pkey_tests[test_nr](ptr, pkey); - dprintf1("freeing test memory: %p\n", ptr); - free_pkey_malloc(ptr); - sys_pkey_free(pkey); - - dprintf1("pkru_faults: %d\n", pkru_faults); - dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults); - - tracing_off(); - close_test_fds(); - - printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); - dprintf1("======================\n\n"); - } - iteration_nr++; -} - -void pkey_setup_shadow(void) -{ - shadow_pkru = __rdpkru(); -} - -int main(void) -{ - int nr_iterations = 22; - - setup_handlers(); - - printf("has pku: %d\n", cpu_has_pku()); - - if (!cpu_has_pku()) { - int size = PAGE_SIZE; - int *ptr; - - printf("running PKEY tests for unsupported CPU/OS\n"); - - ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - assert(ptr != (void *)-1); - test_mprotect_pkey_on_unsupported_cpu(ptr, 1); - exit(0); - } - - pkey_setup_shadow(); - printf("startup pkru: %x\n", rdpkru()); - setup_hugetlbfs(); - - while (nr_iterations-- > 0) - run_tests_once(); - - printf("done (all tests OK)\n"); - return 0; -}
 
            some pkru references are named to pkey_reg and some prku references are renamed to pkey
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 85 +++++----- tools/testing/selftests/vm/protection_keys.c | 227 ++++++++++++++------------ 2 files changed, 164 insertions(+), 148 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index b3cb767..a568166 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -14,7 +14,7 @@ #include <sys/mman.h>
#define NR_PKEYS 16 -#define PKRU_BITS_PER_PKEY 2 +#define PKEY_BITS_PER_PKEY 2
#ifndef DEBUG_LEVEL #define DEBUG_LEVEL 0 @@ -57,85 +57,88 @@ static inline void sigsafe_printf(const char *format, ...) #define dprintf3(args...) dprintf_level(3, args) #define dprintf4(args...) dprintf_level(4, args)
-extern unsigned int shadow_pkru; -static inline unsigned int __rdpkru(void) +extern unsigned int shadow_pkey_reg; +static inline unsigned int __rdpkey_reg(void) { unsigned int eax, edx; unsigned int ecx = 0; - unsigned int pkru; + unsigned int pkey_reg;
asm volatile(".byte 0x0f,0x01,0xee\n\t" : "=a" (eax), "=d" (edx) : "c" (ecx)); - pkru = eax; - return pkru; + pkey_reg = eax; + return pkey_reg; }
-static inline unsigned int _rdpkru(int line) +static inline unsigned int _rdpkey_reg(int line) { - unsigned int pkru = __rdpkru(); + unsigned int pkey_reg = __rdpkey_reg();
- dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n", - line, pkru, shadow_pkru); - assert(pkru == shadow_pkru); + dprintf4("rdpkey_reg(line=%d) pkey_reg: %x shadow: %x\n", + line, pkey_reg, shadow_pkey_reg); + assert(pkey_reg == shadow_pkey_reg);
- return pkru; + return pkey_reg; }
-#define rdpkru() _rdpkru(__LINE__) +#define rdpkey_reg() _rdpkey_reg(__LINE__)
-static inline void __wrpkru(unsigned int pkru) +static inline void __wrpkey_reg(unsigned int pkey_reg) { - unsigned int eax = pkru; + unsigned int eax = pkey_reg; unsigned int ecx = 0; unsigned int edx = 0;
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); + dprintf4("%s() changing %08x to %08x\n", __func__, + __rdpkey_reg(), pkey_reg); asm volatile(".byte 0x0f,0x01,0xef\n\t" : : "a" (eax), "c" (ecx), "d" (edx)); - assert(pkru == __rdpkru()); + assert(pkey_reg == __rdpkey_reg()); }
-static inline void wrpkru(unsigned int pkru) +static inline void wrpkey_reg(unsigned int pkey_reg) { - dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); + dprintf4("%s() changing %08x to %08x\n", __func__, + __rdpkey_reg(), pkey_reg); /* will do the shadow check for us: */ - rdpkru(); - __wrpkru(pkru); - shadow_pkru = pkru; - dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru()); + rdpkey_reg(); + __wrpkey_reg(pkey_reg); + shadow_pkey_reg = pkey_reg; + dprintf4("%s(%08x) pkey_reg: %08x\n", __func__, + pkey_reg, __rdpkey_reg()); }
/* * These are technically racy. since something could - * change PKRU between the read and the write. + * change PKEY register between the read and the write. */ static inline void __pkey_access_allow(int pkey, int do_allow) { - unsigned int pkru = rdpkru(); + unsigned int pkey_reg = rdpkey_reg(); int bit = pkey * 2;
if (do_allow) - pkru &= (1<<bit); + pkey_reg &= (1<<bit); else - pkru |= (1<<bit); + pkey_reg |= (1<<bit);
- dprintf4("pkru now: %08x\n", rdpkru()); - wrpkru(pkru); + dprintf4("pkey_reg now: %08x\n", rdpkey_reg()); + wrpkey_reg(pkey_reg); }
static inline void __pkey_write_allow(int pkey, int do_allow_write) { - long pkru = rdpkru(); + long pkey_reg = rdpkey_reg(); int bit = pkey * 2 + 1;
if (do_allow_write) - pkru &= (1<<bit); + pkey_reg &= (1<<bit); else - pkru |= (1<<bit); + pkey_reg |= (1<<bit);
- wrpkru(pkru); - dprintf4("pkru now: %08x\n", rdpkru()); + wrpkey_reg(pkey_reg); + dprintf4("pkey_reg now: %08x\n", rdpkey_reg()); }
#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ @@ -185,10 +188,10 @@ static inline int cpu_has_pku(void) return 1; }
-#define XSTATE_PKRU_BIT (9) -#define XSTATE_PKRU 0x200 +#define XSTATE_PKEY_BIT (9) +#define XSTATE_PKEY 0x200
-int pkru_xstate_offset(void) +int pkey_reg_xstate_offset(void) { unsigned int eax; unsigned int ebx; @@ -199,21 +202,21 @@ int pkru_xstate_offset(void) unsigned long XSTATE_CPUID = 0xd; int leaf;
- /* assume that XSTATE_PKRU is set in XCR0 */ - leaf = XSTATE_PKRU_BIT; + /* assume that XSTATE_PKEY is set in XCR0 */ + leaf = XSTATE_PKEY_BIT; { eax = XSTATE_CPUID; ecx = leaf; __cpuid(&eax, &ebx, &ecx, &edx);
- if (leaf == XSTATE_PKRU_BIT) { + if (leaf == XSTATE_PKEY_BIT) { xstate_offset = ebx; xstate_size = eax; } }
if (xstate_size == 0) { - printf("could not find size/offset of PKRU in xsave state\n"); + printf("could not find size/offset of PKEY in xsave state\n"); return 0; }
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index bc1b073..4aebf12 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt) + * Tests Memory Protection Keys (see Documentation/vm/protection-keys.txt) * * There are examples in here of: * * how to set protection keys on memory - * * how to set/clear bits in PKRU (the rights register) - * * how to handle SEGV_PKRU signals and extract pkey-relevant + * * how to set/clear bits in pkey registers (the rights register) + * * how to handle SEGV_PKUERR signals and extract pkey-relevant * information from the siginfo * * Things to add: @@ -48,7 +48,7 @@ int iteration_nr = 1; int test_nr;
-unsigned int shadow_pkru; +unsigned int shadow_pkey_reg;
#define HPAGE_SIZE (1UL<<21) #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) @@ -241,7 +241,7 @@ void dump_mem(void *dumpme, int len_bytes) return "UNKNOWN"; }
-int pkru_faults; +int pkey_faults; int last_si_pkey = -1; void signal_handler(int signum, siginfo_t *si, void *vucontext) { @@ -249,16 +249,16 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) int trapno; unsigned long ip; char *fpregs; - u32 *pkru_ptr; + u32 *pkey_reg_ptr; u64 siginfo_pkey; u32 *si_pkey_ptr; - int pkru_offset; + int pkey_reg_offset; fpregset_t fpregset;
dprint_in_signal = 1; dprintf1(">>>>===============SIGSEGV============================\n"); - dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__, - __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, __LINE__, + __rdpkey_reg(), shadow_pkey_reg);
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; @@ -275,19 +275,19 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) */ fpregs += 0x70; #endif - pkru_offset = pkru_xstate_offset(); - pkru_ptr = (void *)(&fpregs[pkru_offset]); + pkey_reg_offset = pkey_reg_xstate_offset(); + pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]);
dprintf1("siginfo: %p\n", si); dprintf1(" fpregs: %p\n", fpregs); /* - * If we got a PKRU fault, we *HAVE* to have at least one bit set in + * If we got a PKEY fault, we *HAVE* to have at least one bit set in * here. */ - dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset()); + dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset()); if (DEBUG_LEVEL > 4) - dump_mem(pkru_ptr - 128, 256); - pkey_assert(*pkru_ptr); + dump_mem(pkey_reg_ptr - 128, 256); + pkey_assert(*pkey_reg_ptr);
si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); @@ -303,13 +303,16 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) exit(4); }
- dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); - /* need __rdpkru() version so we do not do shadow_pkru checking */ - dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); + dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr); + /* + * need __rdpkey_reg() version so we do not do shadow_pkey_reg + * checking + */ + dprintf1("signal pkey_reg from pkey_reg: %08x\n", __rdpkey_reg()); dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); - *(u64 *)pkru_ptr = 0x00000000; + *(u64 *)pkey_reg_ptr = 0x00000000; dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); - pkru_faults++; + pkey_faults++; dprintf1("<<<<==================================================\n"); return; if (trapno == 14) { @@ -427,45 +430,47 @@ void dumpit(char *f) u32 pkey_get(int pkey, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 pkru = __rdpkru(); - u32 shifted_pkru; - u32 masked_pkru; + u32 pkey_reg = __rdpkey_reg(); + u32 shifted_pkey_reg; + u32 masked_pkey_reg;
dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", __func__, pkey, flags, 0, 0); - dprintf2("%s() raw pkru: %x\n", __func__, pkru); + dprintf2("%s() raw pkey_reg: %x\n", __func__, pkey_reg);
- shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY)); - dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru); - masked_pkru = shifted_pkru & mask; - dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru); + shifted_pkey_reg = (pkey_reg >> (pkey * PKEY_BITS_PER_PKEY)); + dprintf2("%s() shifted_pkey_reg: %x\n", __func__, shifted_pkey_reg); + masked_pkey_reg = shifted_pkey_reg & mask; + dprintf2("%s() masked pkey_reg: %x\n", __func__, masked_pkey_reg); /* * shift down the relevant bits to the lowest two, then * mask off all the other high bits. */ - return masked_pkru; + return masked_pkey_reg; }
int pkey_set(int pkey, unsigned long rights, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 old_pkru = __rdpkru(); - u32 new_pkru; + u32 old_pkey_reg = __rdpkey_reg(); + u32 new_pkey_reg;
/* make sure that 'rights' only contains the bits we expect: */ assert(!(rights & ~mask));
- /* copy old pkru */ - new_pkru = old_pkru; + /* copy old pkey_reg */ + new_pkey_reg = old_pkey_reg; /* mask out bits from pkey in old value: */ - new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY)); + new_pkey_reg &= ~(mask << (pkey * PKEY_BITS_PER_PKEY)); /* OR in new bits for pkey: */ - new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY)); + new_pkey_reg |= (rights << (pkey * PKEY_BITS_PER_PKEY));
- __wrpkru(new_pkru); + __wrpkey_reg(new_pkey_reg);
- dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n", - __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru); + dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x" + " pkey_reg now: %x old_pkey_reg: %x\n", + __func__, pkey, rights, flags, 0, __rdpkey_reg(), + old_pkey_reg); return 0; }
@@ -474,7 +479,7 @@ void pkey_disable_set(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights; - u32 orig_pkru = rdpkru(); + u32 orig_pkey_reg = rdpkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__, pkey, flags); @@ -490,9 +495,9 @@ void pkey_disable_set(int pkey, int flags)
ret = pkey_set(pkey, pkey_rights, syscall_flags); assert(!ret); - /*pkru and flags have the same format */ - shadow_pkru |= flags << (pkey * 2); - dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru); + /*pkey_reg and flags have the same format */ + shadow_pkey_reg |= flags << (pkey * 2); + dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkey_reg);
pkey_assert(ret >= 0);
@@ -500,9 +505,9 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); + dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, rdpkey_reg()); if (flags) - pkey_assert(rdpkru() > orig_pkru); + pkey_assert(rdpkey_reg() > orig_pkey_reg); dprintf1("END<---%s(%d, 0x%x)\n", __func__, pkey, flags); } @@ -512,7 +517,7 @@ void pkey_disable_clear(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights = pkey_get(pkey, syscall_flags); - u32 orig_pkru = rdpkru(); + u32 orig_pkey_reg = rdpkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
@@ -523,17 +528,17 @@ void pkey_disable_clear(int pkey, int flags) pkey_rights |= flags;
ret = pkey_set(pkey, pkey_rights, 0); - /* pkru and flags have the same format */ - shadow_pkru &= ~(flags << (pkey * 2)); + /* pkey_reg and flags have the same format */ + shadow_pkey_reg &= ~(flags << (pkey * 2)); pkey_assert(ret >= 0);
pkey_rights = pkey_get(pkey, syscall_flags); dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); + dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, rdpkey_reg()); if (flags) - assert(rdpkru() > orig_pkru); + assert(rdpkey_reg() > orig_pkey_reg); }
void pkey_write_allow(int pkey) @@ -586,33 +591,38 @@ int alloc_pkey(void) int ret; unsigned long init_val = 0x0;
- dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n", - __LINE__, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, + __LINE__, __rdpkey_reg(), shadow_pkey_reg); ret = sys_pkey_alloc(0, init_val); /* - * pkey_alloc() sets PKRU, so we need to reflect it in - * shadow_pkru: + * pkey_alloc() sets PKEY register, so we need to reflect it in + * shadow_pkey_reg: */ - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __rdpkey_reg(), + shadow_pkey_reg); if (ret) { /* clear both the bits: */ - shadow_pkru &= ~(0x3 << (ret * 2)); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); + shadow_pkey_reg &= ~(0x3 << (ret * 2)); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, + __LINE__, ret, __rdpkey_reg(), + shadow_pkey_reg); /* * move the new state in from init_val - * (remember, we cheated and init_val == pkru format) + * (remember, we cheated and init_val == pkey_reg format) */ - shadow_pkru |= (init_val << (ret * 2)); + shadow_pkey_reg |= (init_val << (ret * 2)); } - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __rdpkey_reg(), + shadow_pkey_reg); + dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno); /* for shadow checking: */ - rdpkru(); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); + rdpkey_reg(); + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __rdpkey_reg(), + shadow_pkey_reg); return ret; }
@@ -663,8 +673,8 @@ int alloc_random_pkey(void) free_ret = sys_pkey_free(alloced_pkeys[i]); pkey_assert(!free_ret); } - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); return ret; }
@@ -682,11 +692,13 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, if (nr_iterations-- < 0) break;
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __rdpkey_reg(), + shadow_pkey_reg); sys_pkey_free(rpkey); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, ret, __rdpkey_reg(), + shadow_pkey_reg); } pkey_assert(pkey < NR_PKEYS);
@@ -694,8 +706,8 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", ptr, size, orig_prot, pkey, ret); pkey_assert(!ret); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__, + __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); return ret; }
@@ -773,7 +785,7 @@ void free_pkey_malloc(void *ptr) void *ptr; int ret;
- rdpkru(); + rdpkey_reg(); dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, size, prot, pkey); pkey_assert(pkey < NR_PKEYS); @@ -782,7 +794,7 @@ void free_pkey_malloc(void *ptr) ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); pkey_assert(!ret); record_pkey_malloc(ptr, size); - rdpkru(); + rdpkey_reg();
dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); return ptr; @@ -945,31 +957,31 @@ void setup_hugetlbfs(void) return ret; }
-int last_pkru_faults; -void expected_pk_fault(int pkey) +int last_pkey_faults; +void expected_pkey_fault(int pkey) { - dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", - __func__, last_pkru_faults, pkru_faults); + dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n", + __func__, last_pkey_faults, pkey_faults); dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); - pkey_assert(last_pkru_faults + 1 == pkru_faults); + pkey_assert(last_pkey_faults + 1 == pkey_faults); pkey_assert(last_si_pkey == pkey); /* - * The signal handler shold have cleared out PKRU to let the + * The signal handler shold have cleared out PKEY register to let the * test program continue. We now have to restore it. */ - if (__rdpkru() != 0) + if (__rdpkey_reg() != 0) pkey_assert(0);
- __wrpkru(shadow_pkru); - dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n", - __func__, shadow_pkru); - last_pkru_faults = pkru_faults; + __wrpkey_reg(shadow_pkey_reg); + dprintf1("%s() set pkey_reg=%x to restore state after signal " + "nuked it\n", __func__, shadow_pkey_reg); + last_pkey_faults = pkey_faults; last_si_pkey = -1; }
-void do_not_expect_pk_fault(void) +void do_not_expect_pkey_fault(void) { - pkey_assert(last_pkru_faults == pkru_faults); + pkey_assert(last_pkey_faults == pkey_faults); }
int test_fds[10] = { -1 }; @@ -1027,25 +1039,25 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey) int ptr_contents;
dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); - rdpkru(); + rdpkey_reg(); pkey_access_deny(pkey); ptr_contents = read_ptr(ptr); dprintf1("*ptr: %d\n", ptr_contents); - expected_pk_fault(pkey); + expected_pkey_fault(pkey); } void test_write_of_write_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); pkey_write_deny(pkey); *ptr = __LINE__; - expected_pk_fault(pkey); + expected_pkey_fault(pkey); } void test_write_of_access_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); pkey_access_deny(pkey); *ptr = __LINE__; - expected_pk_fault(pkey); + expected_pkey_fault(pkey); } void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) { @@ -1157,9 +1169,10 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) int new_pkey; dprintf1("%s() alloc loop: %d\n", __func__, i); new_pkey = alloc_pkey(); - dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, err, __rdpkru(), shadow_pkru); - rdpkru(); /* for shadow checking */ + dprintf4("%s()::%d, err: %d pkey_reg: 0x%x shadow: 0x%x\n", + __func__, __LINE__, err, __rdpkey_reg(), + shadow_pkey_reg); + rdpkey_reg(); /* for shadow checking */ dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); if ((new_pkey == -1) && (errno == ENOSPC)) { dprintf2("%s() failed to allocate pkey after %d tries\n", @@ -1189,7 +1202,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) for (i = 0; i < nr_allocated_pkeys; i++) { err = sys_pkey_free(allocated_pkeys[i]); pkey_assert(!err); - rdpkru(); /* for shadow checking */ + rdpkey_reg(); /* for shadow checking */ } }
@@ -1246,7 +1259,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) pkey_assert(ret != -1); /* Now access from the current task, and expect an exception: */ peek_result = read_ptr(ptr); - expected_pk_fault(pkey); + expected_pkey_fault(pkey);
/* * Try to access the NON-pkey-protected "plain_ptr" via ptrace: @@ -1256,7 +1269,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) pkey_assert(ret != -1); /* Now access from the current task, and expect NO exception: */ peek_result = read_ptr(plain_ptr); - do_not_expect_pk_fault(); + do_not_expect_pkey_fault();
ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); pkey_assert(ret != -1); @@ -1293,17 +1306,17 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) pkey_assert(!ret); pkey_access_deny(pkey);
- dprintf2("pkru: %x\n", rdpkru()); + dprintf2("pkey_reg: %x\n", rdpkey_reg());
/* * Make sure this is an *instruction* fault */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault(); + do_not_expect_pkey_fault(); ptr_contents = read_ptr(p1); dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - expected_pk_fault(pkey); + expected_pkey_fault(pkey); }
void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) @@ -1343,7 +1356,7 @@ void run_tests_once(void)
for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { int pkey; - int orig_pkru_faults = pkru_faults; + int orig_pkey_faults = pkey_faults;
dprintf1("======================\n"); dprintf1("test %d preparing...\n", test_nr); @@ -1358,8 +1371,8 @@ void run_tests_once(void) free_pkey_malloc(ptr); sys_pkey_free(pkey);
- dprintf1("pkru_faults: %d\n", pkru_faults); - dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults); + dprintf1("pkey_faults: %d\n", pkey_faults); + dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults);
tracing_off(); close_test_fds(); @@ -1372,7 +1385,7 @@ void run_tests_once(void)
void pkey_setup_shadow(void) { - shadow_pkru = __rdpkru(); + shadow_pkey_reg = __rdpkey_reg(); }
int main(void) @@ -1396,7 +1409,7 @@ int main(void) }
pkey_setup_shadow(); - printf("startup pkru: %x\n", rdpkru()); + printf("startup pkey_reg: %x\n", rdpkey_reg()); setup_hugetlbfs();
while (nr_iterations-- > 0)
 
            Moved all the generic definition and helper functions to the header file
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 62 ++++++++++++++++++++++-- tools/testing/selftests/vm/protection_keys.c | 66 -------------------------- 2 files changed, 57 insertions(+), 71 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index a568166..c1bc761 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -13,8 +13,31 @@ #include <ucontext.h> #include <sys/mman.h>
+/* Define some kernel-like types */ +#define u8 uint8_t +#define u16 uint16_t +#define u32 uint32_t +#define u64 uint64_t + +#ifdef __i386__ +#define SYS_mprotect_key 380 +#define SYS_pkey_alloc 381 +#define SYS_pkey_free 382 +#define REG_IP_IDX REG_EIP +#define si_pkey_offset 0x14 +#else +#define SYS_mprotect_key 329 +#define SYS_pkey_alloc 330 +#define SYS_pkey_free 331 +#define REG_IP_IDX REG_RIP +#define si_pkey_offset 0x20 +#endif + #define NR_PKEYS 16 #define PKEY_BITS_PER_PKEY 2 +#define PKEY_DISABLE_ACCESS 0x1 +#define PKEY_DISABLE_WRITE 0x2 +#define HPAGE_SIZE (1UL<<21)
#ifndef DEBUG_LEVEL #define DEBUG_LEVEL 0 @@ -141,11 +164,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write) dprintf4("pkey_reg now: %08x\n", rdpkey_reg()); }
-#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ -#define PROT_PKEY1 0x20 /* protection key value (bit 1) */ -#define PROT_PKEY2 0x40 /* protection key value (bit 2) */ -#define PROT_PKEY3 0x80 /* protection key value (bit 3) */ - #define PAGE_SIZE 4096 #define MB (1<<20)
@@ -223,4 +241,38 @@ int pkey_reg_xstate_offset(void) return xstate_offset; }
+static inline void __page_o_noops(void) +{ + /* 8-bytes of instruction * 512 bytes = 1 page */ + asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); +} + #endif /* _PKEYS_HELPER_H */ + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) +#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) +#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) +#define ALIGN_PTR_UP(p, ptr_align_to) \ + ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) +#define ALIGN_PTR_DOWN(p, ptr_align_to) \ + ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) + +int dprint_in_signal; +char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; + +extern void abort_hooks(void); +#define pkey_assert(condition) do { \ + if (!(condition)) { \ + dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ + __FILE__, __LINE__, \ + test_nr, iteration_nr); \ + dprintf0("errno at assert: %d", errno); \ + abort_hooks(); \ + assert(condition); \ + } \ +} while (0) +#define raw_assert(cond) assert(cond) diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 4aebf12..91bade4 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -49,34 +49,9 @@ int test_nr;
unsigned int shadow_pkey_reg; - -#define HPAGE_SIZE (1UL<<21) -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) -#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) -#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) -#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) -#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) -#define __stringify_1(x...) #x -#define __stringify(x...) __stringify_1(x) - -#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) - int dprint_in_signal; char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-extern void abort_hooks(void); -#define pkey_assert(condition) do { \ - if (!(condition)) { \ - dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ - __FILE__, __LINE__, \ - test_nr, iteration_nr); \ - dprintf0("errno at assert: %d", errno); \ - abort_hooks(); \ - assert(condition); \ - } \ -} while (0) -#define raw_assert(cond) assert(cond) - void cat_into_file(char *str, char *file) { int fd = open(file, O_RDWR); @@ -154,12 +129,6 @@ void abort_hooks(void) #endif }
-static inline void __page_o_noops(void) -{ - /* 8-bytes of instruction * 512 bytes = 1 page */ - asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); -} - /* * This attempts to have roughly a page of instructions followed by a few * instructions that do a write, and another page of instructions. That @@ -182,38 +151,6 @@ void lots_o_noops_around_write(int *write_to_me) dprintf3("%s() done\n", __func__); }
-/* Define some kernel-like types */ -#define u8 uint8_t -#define u16 uint16_t -#define u32 uint32_t -#define u64 uint64_t - -#ifdef __i386__ - -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 -#endif -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 -#endif -#define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x14 - -#else - -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 -#endif -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 -#endif -#define REG_IP_IDX REG_RIP -#define si_pkey_offset 0x20 - -#endif - void dump_mem(void *dumpme, int len_bytes) { char *c = (void *)dumpme; @@ -424,9 +361,6 @@ void dumpit(char *f) close(fd); }
-#define PKEY_DISABLE_ACCESS 0x1 -#define PKEY_DISABLE_WRITE 0x2 - u32 pkey_get(int pkey, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
 
            This is in preparation to accomadate a differing size register across architectures.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 27 +++++----- tools/testing/selftests/vm/protection_keys.c | 69 ++++++++++++++------------ 2 files changed, 51 insertions(+), 45 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index c1bc761..b6c2133 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -18,6 +18,7 @@ #define u16 uint16_t #define u32 uint32_t #define u64 uint64_t +#define pkey_reg_t u32
#ifdef __i386__ #define SYS_mprotect_key 380 @@ -80,12 +81,12 @@ static inline void sigsafe_printf(const char *format, ...) #define dprintf3(args...) dprintf_level(3, args) #define dprintf4(args...) dprintf_level(4, args)
-extern unsigned int shadow_pkey_reg; -static inline unsigned int __rdpkey_reg(void) +extern pkey_reg_t shadow_pkey_reg; +static inline pkey_reg_t __rdpkey_reg(void) { unsigned int eax, edx; unsigned int ecx = 0; - unsigned int pkey_reg; + pkey_reg_t pkey_reg;
asm volatile(".byte 0x0f,0x01,0xee\n\t" : "=a" (eax), "=d" (edx) @@ -94,11 +95,11 @@ static inline unsigned int __rdpkey_reg(void) return pkey_reg; }
-static inline unsigned int _rdpkey_reg(int line) +static inline pkey_reg_t _rdpkey_reg(int line) { - unsigned int pkey_reg = __rdpkey_reg(); + pkey_reg_t pkey_reg = __rdpkey_reg();
- dprintf4("rdpkey_reg(line=%d) pkey_reg: %x shadow: %x\n", + dprintf4("rdpkey_reg(line=%d) pkey_reg: %016lx shadow: %016lx\n", line, pkey_reg, shadow_pkey_reg); assert(pkey_reg == shadow_pkey_reg);
@@ -107,11 +108,11 @@ static inline unsigned int _rdpkey_reg(int line)
#define rdpkey_reg() _rdpkey_reg(__LINE__)
-static inline void __wrpkey_reg(unsigned int pkey_reg) +static inline void __wrpkey_reg(pkey_reg_t pkey_reg) { - unsigned int eax = pkey_reg; - unsigned int ecx = 0; - unsigned int edx = 0; + pkey_reg_t eax = pkey_reg; + pkey_reg_t ecx = 0; + pkey_reg_t edx = 0;
dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkey_reg(), pkey_reg); @@ -120,7 +121,7 @@ static inline void __wrpkey_reg(unsigned int pkey_reg) assert(pkey_reg == __rdpkey_reg()); }
-static inline void wrpkey_reg(unsigned int pkey_reg) +static inline void wrpkey_reg(pkey_reg_t pkey_reg) { dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkey_reg(), pkey_reg); @@ -138,7 +139,7 @@ static inline void wrpkey_reg(unsigned int pkey_reg) */ static inline void __pkey_access_allow(int pkey, int do_allow) { - unsigned int pkey_reg = rdpkey_reg(); + pkey_reg_t pkey_reg = rdpkey_reg(); int bit = pkey * 2;
if (do_allow) @@ -152,7 +153,7 @@ static inline void __pkey_access_allow(int pkey, int do_allow)
static inline void __pkey_write_allow(int pkey, int do_allow_write) { - long pkey_reg = rdpkey_reg(); + pkey_reg_t pkey_reg = rdpkey_reg(); int bit = pkey * 2 + 1;
if (do_allow_write) diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 91bade4..3ef2569 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -48,7 +48,7 @@ int iteration_nr = 1; int test_nr;
-unsigned int shadow_pkey_reg; +pkey_reg_t shadow_pkey_reg; int dprint_in_signal; char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
@@ -158,7 +158,7 @@ void dump_mem(void *dumpme, int len_bytes)
for (i = 0; i < len_bytes; i += sizeof(u64)) { u64 *ptr = (u64 *)(c + i); - dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr); + dprintf1("dump[%03d][@%p]: %016lx\n", i, ptr, *ptr); } }
@@ -186,7 +186,7 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) int trapno; unsigned long ip; char *fpregs; - u32 *pkey_reg_ptr; + pkey_reg_t *pkey_reg_ptr; u64 siginfo_pkey; u32 *si_pkey_ptr; int pkey_reg_offset; @@ -194,7 +194,8 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
dprint_in_signal = 1; dprintf1(">>>>===============SIGSEGV============================\n"); - dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, __LINE__, + dprintf1("%s()::%d, pkey_reg: 0x%016lx shadow: %016lx\n", + __func__, __LINE__, __rdpkey_reg(), shadow_pkey_reg);
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; @@ -202,8 +203,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) fpregset = uctxt->uc_mcontext.fpregs; fpregs = (void *)fpregset;
- dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__, - trapno, ip, si_code_str(si->si_code), si->si_code); + dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", + __func__, trapno, ip, si_code_str(si->si_code), + si->si_code); #ifdef __i386__ /* * 32-bit has some extra padding so that userspace can tell whether @@ -240,12 +242,12 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) exit(4); }
- dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr); + dprintf1("signal pkey_reg from xsave: %016lx\n", *pkey_reg_ptr); /* * need __rdpkey_reg() version so we do not do shadow_pkey_reg * checking */ - dprintf1("signal pkey_reg from pkey_reg: %08x\n", __rdpkey_reg()); + dprintf1("signal pkey_reg from pkey_reg: %016lx\n", __rdpkey_reg()); dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); *(u64 *)pkey_reg_ptr = 0x00000000; dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); @@ -364,8 +366,8 @@ void dumpit(char *f) u32 pkey_get(int pkey, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 pkey_reg = __rdpkey_reg(); - u32 shifted_pkey_reg; + pkey_reg_t pkey_reg = __rdpkey_reg(); + pkey_reg_t shifted_pkey_reg; u32 masked_pkey_reg;
dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", @@ -386,8 +388,8 @@ u32 pkey_get(int pkey, unsigned long flags) int pkey_set(int pkey, unsigned long rights, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 old_pkey_reg = __rdpkey_reg(); - u32 new_pkey_reg; + pkey_reg_t old_pkey_reg = __rdpkey_reg(); + pkey_reg_t new_pkey_reg;
/* make sure that 'rights' only contains the bits we expect: */ assert(!(rights & ~mask)); @@ -401,10 +403,10 @@ int pkey_set(int pkey, unsigned long rights, unsigned long flags)
__wrpkey_reg(new_pkey_reg);
- dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x" - " pkey_reg now: %x old_pkey_reg: %x\n", - __func__, pkey, rights, flags, 0, __rdpkey_reg(), - old_pkey_reg); + dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x " + "pkey_reg now: %016lx old_pkey_reg: %016lx\n", + __func__, pkey, rights, flags, + 0, __rdpkey_reg(), old_pkey_reg); return 0; }
@@ -413,7 +415,7 @@ void pkey_disable_set(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights; - u32 orig_pkey_reg = rdpkey_reg(); + pkey_reg_t orig_pkey_reg = rdpkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__, pkey, flags); @@ -421,8 +423,6 @@ void pkey_disable_set(int pkey, int flags)
pkey_rights = pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0);
pkey_rights |= flags; @@ -431,7 +431,8 @@ void pkey_disable_set(int pkey, int flags) assert(!ret); /*pkey_reg and flags have the same format */ shadow_pkey_reg |= flags << (pkey * 2); - dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkey_reg); + dprintf1("%s(%d) shadow: 0x%016lx\n", + __func__, pkey, shadow_pkey_reg);
pkey_assert(ret >= 0);
@@ -439,7 +440,8 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, rdpkey_reg()); + dprintf1("%s(%d) pkey_reg: 0x%lx\n", + __func__, pkey, rdpkey_reg()); if (flags) pkey_assert(rdpkey_reg() > orig_pkey_reg); dprintf1("END<---%s(%d, 0x%x)\n", __func__, @@ -451,7 +453,7 @@ void pkey_disable_clear(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights = pkey_get(pkey, syscall_flags); - u32 orig_pkey_reg = rdpkey_reg(); + pkey_reg_t orig_pkey_reg = rdpkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
@@ -470,7 +472,8 @@ void pkey_disable_clear(int pkey, int flags) dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights);
- dprintf1("%s(%d) pkey_reg: 0x%x\n", __func__, pkey, rdpkey_reg()); + dprintf1("%s(%d) pkey_reg: 0x%016lx\n", __func__, + pkey, rdpkey_reg()); if (flags) assert(rdpkey_reg() > orig_pkey_reg); } @@ -525,20 +528,21 @@ int alloc_pkey(void) int ret; unsigned long init_val = 0x0;
- dprintf1("%s()::%d, pkey_reg: 0x%x shadow: %x\n", __func__, + dprintf1("%s()::%d, pkey_reg: 0x%016lx shadow: %016lx\n", __func__, __LINE__, __rdpkey_reg(), shadow_pkey_reg); ret = sys_pkey_alloc(0, init_val); /* * pkey_alloc() sets PKEY register, so we need to reflect it in * shadow_pkey_reg: */ - dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016lx shadow: 0x%016lx\n", __func__, __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); if (ret) { /* clear both the bits: */ shadow_pkey_reg &= ~(0x3 << (ret * 2)); - dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016lx " + "shadow: 0x%016lx\n", __func__, __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); @@ -548,13 +552,13 @@ int alloc_pkey(void) */ shadow_pkey_reg |= (init_val << (ret * 2)); } - dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016lx shadow: 0x%016lx\n", __func__, __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno); /* for shadow checking: */ rdpkey_reg(); - dprintf4("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", + dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016lx shadow: 0x%016lx\n", __func__, __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); return ret; @@ -1103,9 +1107,10 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) int new_pkey; dprintf1("%s() alloc loop: %d\n", __func__, i); new_pkey = alloc_pkey(); - dprintf4("%s()::%d, err: %d pkey_reg: 0x%x shadow: 0x%x\n", - __func__, __LINE__, err, __rdpkey_reg(), - shadow_pkey_reg); + dprintf4("%s()::%d, err: %d pkey_reg: 0x%016lx " + "shadow: 0x%016lx\n", + __func__, __LINE__, err, __rdpkey_reg(), + shadow_pkey_reg); rdpkey_reg(); /* for shadow checking */ dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); if ((new_pkey == -1) && (errno == ENOSPC)) { @@ -1343,7 +1348,7 @@ int main(void) }
pkey_setup_shadow(); - printf("startup pkey_reg: %x\n", rdpkey_reg()); + printf("startup pkey_reg: 0x%016lx\n", (u64)rdpkey_reg()); setup_hugetlbfs();
while (nr_iterations-- > 0)
 
            helper functions to handler shadow pkey register
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 27 ++++++++++++++++++++ tools/testing/selftests/vm/protection_keys.c | 34 ++++++++++++++++--------- 2 files changed, 49 insertions(+), 12 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index b6c2133..7c979ad 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -44,6 +44,33 @@ #define DEBUG_LEVEL 0 #endif #define DPRINT_IN_SIGNAL_BUF_SIZE 4096 + +static inline u32 pkey_to_shift(int pkey) +{ + return pkey * PKEY_BITS_PER_PKEY; +} + +static inline pkey_reg_t reset_bits(int pkey, pkey_reg_t bits) +{ + u32 shift = pkey_to_shift(pkey); + + return ~(bits << shift); +} + +static inline pkey_reg_t left_shift_bits(int pkey, pkey_reg_t bits) +{ + u32 shift = pkey_to_shift(pkey); + + return (bits << shift); +} + +static inline pkey_reg_t right_shift_bits(int pkey, pkey_reg_t bits) +{ + u32 shift = pkey_to_shift(pkey); + + return (bits >> shift); +} + extern int dprint_in_signal; extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; static inline void sigsafe_printf(const char *format, ...) diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 3ef2569..83216c5 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -374,7 +374,7 @@ u32 pkey_get(int pkey, unsigned long flags) __func__, pkey, flags, 0, 0); dprintf2("%s() raw pkey_reg: %x\n", __func__, pkey_reg);
- shifted_pkey_reg = (pkey_reg >> (pkey * PKEY_BITS_PER_PKEY)); + shifted_pkey_reg = right_shift_bits(pkey, pkey_reg); dprintf2("%s() shifted_pkey_reg: %x\n", __func__, shifted_pkey_reg); masked_pkey_reg = shifted_pkey_reg & mask; dprintf2("%s() masked pkey_reg: %x\n", __func__, masked_pkey_reg); @@ -397,9 +397,9 @@ int pkey_set(int pkey, unsigned long rights, unsigned long flags) /* copy old pkey_reg */ new_pkey_reg = old_pkey_reg; /* mask out bits from pkey in old value: */ - new_pkey_reg &= ~(mask << (pkey * PKEY_BITS_PER_PKEY)); + new_pkey_reg &= reset_bits(pkey, mask); /* OR in new bits for pkey: */ - new_pkey_reg |= (rights << (pkey * PKEY_BITS_PER_PKEY)); + new_pkey_reg |= left_shift_bits(pkey, rights);
__wrpkey_reg(new_pkey_reg);
@@ -430,7 +430,7 @@ void pkey_disable_set(int pkey, int flags) ret = pkey_set(pkey, pkey_rights, syscall_flags); assert(!ret); /*pkey_reg and flags have the same format */ - shadow_pkey_reg |= flags << (pkey * 2); + shadow_pkey_reg |= left_shift_bits(pkey, flags); dprintf1("%s(%d) shadow: 0x%016lx\n", __func__, pkey, shadow_pkey_reg);
@@ -465,7 +465,7 @@ void pkey_disable_clear(int pkey, int flags)
ret = pkey_set(pkey, pkey_rights, 0); /* pkey_reg and flags have the same format */ - shadow_pkey_reg &= ~(flags << (pkey * 2)); + shadow_pkey_reg &= reset_bits(pkey, flags); pkey_assert(ret >= 0);
pkey_rights = pkey_get(pkey, syscall_flags); @@ -523,6 +523,21 @@ int sys_pkey_alloc(unsigned long flags, unsigned long init_val) return ret; }
+void pkey_setup_shadow(void) +{ + shadow_pkey_reg = __rdpkey_reg(); +} + +void pkey_reset_shadow(u32 key) +{ + shadow_pkey_reg &= reset_bits(key, 0x3); +} + +void pkey_set_shadow(u32 key, u64 init_val) +{ + shadow_pkey_reg |= left_shift_bits(key, init_val); +} + int alloc_pkey(void) { int ret; @@ -540,7 +555,7 @@ int alloc_pkey(void) shadow_pkey_reg); if (ret) { /* clear both the bits: */ - shadow_pkey_reg &= ~(0x3 << (ret * 2)); + pkey_reset_shadow(ret); dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016lx " "shadow: 0x%016lx\n", __func__, @@ -550,7 +565,7 @@ int alloc_pkey(void) * move the new state in from init_val * (remember, we cheated and init_val == pkey_reg format) */ - shadow_pkey_reg |= (init_val << (ret * 2)); + pkey_set_shadow(ret, init_val); } dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016lx shadow: 0x%016lx\n", __func__, __LINE__, ret, __rdpkey_reg(), @@ -1322,11 +1337,6 @@ void run_tests_once(void) iteration_nr++; }
-void pkey_setup_shadow(void) -{ - shadow_pkey_reg = __rdpkey_reg(); -} - int main(void) { int nr_iterations = 22;
 
            If the flag is 0, no bits will be set. Hence we cant expect the resulting bitmap to have a higher value than what it was earlier.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 83216c5..0109388 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -443,7 +443,7 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%lx\n", __func__, pkey, rdpkey_reg()); if (flags) - pkey_assert(rdpkey_reg() > orig_pkey_reg); + pkey_assert(rdpkey_reg() >= orig_pkey_reg); dprintf1("END<---%s(%d, 0x%x)\n", __func__, pkey, flags); }
 
            instead of clearing the bits, pkey_disable_clear() was setting the bits. Fixed it.
Also fixed a wrong assertion in that function. When bits are cleared, the resulting bit value will be less than the original.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 0109388..ca54a95 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -461,7 +461,7 @@ void pkey_disable_clear(int pkey, int flags) pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0);
- pkey_rights |= flags; + pkey_rights &= ~flags;
ret = pkey_set(pkey, pkey_rights, 0); /* pkey_reg and flags have the same format */ @@ -475,7 +475,7 @@ void pkey_disable_clear(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%016lx\n", __func__, pkey, rdpkey_reg()); if (flags) - assert(rdpkey_reg() > orig_pkey_reg); + assert(rdpkey_reg() < orig_pkey_reg); }
void pkey_write_allow(int pkey)
 
            When a key is freed, the key is no more effective. Clear the bits corresponding to the pkey in the shadow register. Otherwise it will carry some spurious bits which can trigger false-positive asserts.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 3 +++ 1 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index ca54a95..aaf9f09 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -582,6 +582,9 @@ int alloc_pkey(void) int sys_pkey_free(unsigned long pkey) { int ret = syscall(SYS_pkey_free, pkey); + + if (!ret) + shadow_pkey_reg &= reset_bits(pkey, PKEY_DISABLE_ACCESS); dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); return ret; }
 
            alloc_random_pkey() was allocating the same pkey every time. Not all pkeys were geting tested. fixed it.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 10 +++++++--- 1 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index aaf9f09..2e4b636 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -24,6 +24,7 @@ #define _GNU_SOURCE #include <errno.h> #include <linux/futex.h> +#include <time.h> #include <sys/time.h> #include <sys/syscall.h> #include <string.h> @@ -602,13 +603,15 @@ int alloc_random_pkey(void) int alloced_pkeys[NR_PKEYS]; int nr_alloced = 0; int random_index; + memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); + srand((unsigned int)time(NULL));
/* allocate every possible key and make a note of which ones we got */ max_nr_pkey_allocs = NR_PKEYS; - max_nr_pkey_allocs = 1; for (i = 0; i < max_nr_pkey_allocs; i++) { int new_pkey = alloc_pkey(); + if (new_pkey < 0) break; alloced_pkeys[nr_alloced++] = new_pkey; @@ -624,13 +627,14 @@ int alloc_random_pkey(void) /* go through the allocated ones that we did not want and free them */ for (i = 0; i < nr_alloced; i++) { int free_ret; + if (!alloced_pkeys[i]) continue; free_ret = sys_pkey_free(alloced_pkeys[i]); pkey_assert(!free_ret); } - dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); + dprintf1("%s()::%d, ret: %d pkey_reg: 0x%x shadow: 0x%016lx\n", + __func__, __LINE__, ret, __rdpkey_reg(), shadow_pkey_reg); return ret; }
 
            open_hugepage_file() <- opens the huge page file get_start_key() <-- provides the first non-reserved key.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 11 +++++++++++ tools/testing/selftests/vm/protection_keys.c | 6 +++--- 2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 7c979ad..c8f5739 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -304,3 +304,14 @@ static inline void __page_o_noops(void) } \ } while (0) #define raw_assert(cond) assert(cond) + +static inline int open_hugepage_file(int flag) +{ + return open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", + O_RDONLY); +} + +static inline int get_start_key(void) +{ + return 1; +} diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 2e4b636..254b66d 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -809,7 +809,7 @@ void setup_hugetlbfs(void) * Now go make sure that we got the pages and that they * are 2M pages. Someone might have made 1G the default. */ - fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); + fd = open_hugepage_file(O_RDONLY); if (fd < 0) { perror("opening sysfs 2M hugetlb config"); return; @@ -1087,10 +1087,10 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) { int err; - int i; + int i = get_start_key();
/* Note: 0 is the default pkey, so don't mess with it */ - for (i = 1; i < NR_PKEYS; i++) { + for (; i < NR_PKEYS; i++) { if (pkey == i) continue;
 
            expected_pkey_fault() is comparing the contents of pkey register with 0. This may not be true all the time. There could be bits set by default by the architecture which can never be changed. Hence compare the value against shadow pkey register, which is supposed to track the bits accurately all throughout
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 254b66d..6054093 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -926,10 +926,10 @@ void expected_pkey_fault(int pkey) pkey_assert(last_pkey_faults + 1 == pkey_faults); pkey_assert(last_si_pkey == pkey); /* - * The signal handler shold have cleared out PKEY register to let the + * The signal handler shold have cleared out pkey-register to let the * test program continue. We now have to restore it. */ - if (__rdpkey_reg() != 0) + if (__rdpkey_reg() != shadow_pkey_reg) pkey_assert(0);
__wrpkey_reg(shadow_pkey_reg);
 
            cleanup the code to satisfy coding styles.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 81 ++++++++++++++------------ 1 files changed, 43 insertions(+), 38 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 6054093..6fdd8f5 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -4,7 +4,7 @@ * * There are examples in here of: * * how to set protection keys on memory - * * how to set/clear bits in pkey registers (the rights register) + * * how to set/clear bits in Protection Key registers (the rights register) * * how to handle SEGV_PKUERR signals and extract pkey-relevant * information from the siginfo * @@ -13,13 +13,18 @@ * prefault pages in at malloc, or not * protect MPX bounds tables with protection keys? * make sure VMA splitting/merging is working correctly - * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys - * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel - * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks + * OOMs can destroy mm->mmap (see exit_mmap()), + * so make sure it is immune to pkeys + * look for pkey "leaks" where it is still set on a VMA + * but "freed" back to the kernel + * do a plain mprotect() to a mprotect_pkey() area and make + * sure the pkey sticks * * Compile like this: - * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -o protection_keys -O2 -g -std=gnu99 + * -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 + * -pthread -Wall protection_keys.c -lrt -ldl -lm */ #define _GNU_SOURCE #include <errno.h> @@ -251,26 +256,11 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dprintf1("signal pkey_reg from pkey_reg: %016lx\n", __rdpkey_reg()); dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); *(u64 *)pkey_reg_ptr = 0x00000000; - dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); + dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction " + "to continue\n"); pkey_faults++; dprintf1("<<<<==================================================\n"); return; - if (trapno == 14) { - fprintf(stderr, - "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", - trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(1); - } else { - fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(2); - } - dprint_in_signal = 0; }
int wait_all_children(void) @@ -415,7 +405,7 @@ void pkey_disable_set(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; - int pkey_rights; + u32 pkey_rights; pkey_reg_t orig_pkey_reg = rdpkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__, @@ -453,7 +443,7 @@ void pkey_disable_clear(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; - int pkey_rights = pkey_get(pkey, syscall_flags); + u32 pkey_rights = pkey_get(pkey, syscall_flags); pkey_reg_t orig_pkey_reg = rdpkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); @@ -516,9 +506,10 @@ int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, return sret; }
-int sys_pkey_alloc(unsigned long flags, unsigned long init_val) +int sys_pkey_alloc(unsigned long flags, u64 init_val) { int ret = syscall(SYS_pkey_alloc, flags, init_val); + dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", __func__, flags, init_val, ret, errno); return ret; @@ -542,7 +533,7 @@ void pkey_set_shadow(u32 key, u64 init_val) int alloc_pkey(void) { int ret; - unsigned long init_val = 0x0; + u64 init_val = 0x0;
dprintf1("%s()::%d, pkey_reg: 0x%016lx shadow: %016lx\n", __func__, __LINE__, __rdpkey_reg(), shadow_pkey_reg); @@ -692,7 +683,9 @@ void record_pkey_malloc(void *ptr, long size) /* every record is full */ size_t old_nr_records = nr_pkey_malloc_records; size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); - size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); + size_t new_size = new_nr_records * + sizeof(struct pkey_malloc_record); + dprintf2("new_nr_records: %zd\n", new_nr_records); dprintf2("new_size: %zd\n", new_size); pkey_malloc_records = realloc(pkey_malloc_records, new_size); @@ -716,9 +709,11 @@ void free_pkey_malloc(void *ptr) { long i; int ret; + dprintf3("%s(%p)\n", __func__, ptr); for (i = 0; i < nr_pkey_malloc_records; i++) { struct pkey_malloc_record *rec = &pkey_malloc_records[i]; + dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", ptr, i, rec, rec->ptr, rec->size); if ((ptr < rec->ptr) || @@ -799,11 +794,13 @@ void setup_hugetlbfs(void) char buf[] = "123";
if (geteuid() != 0) { - fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); + fprintf(stderr, + "WARNING: not run as root, can not do hugetlb test\n"); return; }
- cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); + cat_into_file(__stringify(GET_NR_HUGE_PAGES), + "/proc/sys/vm/nr_hugepages");
/* * Now go make sure that we got the pages and that they @@ -824,7 +821,8 @@ void setup_hugetlbfs(void) }
if (atoi(buf) != GET_NR_HUGE_PAGES) { - fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n", + fprintf(stderr, "could not confirm 2M pages, got:" + " '%s' expected %d\n", buf, GET_NR_HUGE_PAGES); return; } @@ -957,6 +955,7 @@ void __save_test_fd(int fd) int get_test_read_fd(void) { int test_fd = open("/etc/passwd", O_RDONLY); + __save_test_fd(test_fd); return test_fd; } @@ -998,7 +997,8 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey) { int ptr_contents;
- dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); + dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", + pkey, ptr); rdpkey_reg(); pkey_access_deny(pkey); ptr_contents = read_ptr(ptr); @@ -1120,13 +1120,14 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) /* Assumes that all pkeys other than 'pkey' are unallocated */ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) { - int err; + int err = 0; int allocated_pkeys[NR_PKEYS] = {0}; int nr_allocated_pkeys = 0; int i;
for (i = 0; i < NR_PKEYS*2; i++) { int new_pkey; + dprintf1("%s() alloc loop: %d\n", __func__, i); new_pkey = alloc_pkey(); dprintf4("%s()::%d, err: %d pkey_reg: 0x%016lx " @@ -1134,9 +1135,11 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) __func__, __LINE__, err, __rdpkey_reg(), shadow_pkey_reg); rdpkey_reg(); /* for shadow checking */ - dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); + dprintf2("%s() errno: %d ENOSPC: %d\n", + __func__, errno, ENOSPC); if ((new_pkey == -1) && (errno == ENOSPC)) { - dprintf2("%s() failed to allocate pkey after %d tries\n", + dprintf2("%s() failed to allocate pkey " + "after %d tries\n", __func__, nr_allocated_pkeys); break; } @@ -1338,7 +1341,8 @@ void run_tests_once(void) tracing_off(); close_test_fds();
- printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); + printf("test %2d PASSED (iteration %d)\n", + test_nr, iteration_nr); dprintf1("======================\n\n"); } iteration_nr++; @@ -1350,7 +1354,7 @@ int main(void)
setup_handlers();
- printf("has pku: %d\n", cpu_has_pku()); + printf("has pkey: %d\n", cpu_has_pku());
if (!cpu_has_pku()) { int size = PAGE_SIZE; @@ -1358,7 +1362,8 @@ int main(void)
printf("running PKEY tests for unsupported CPU/OS\n");
- ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + ptr = mmap(NULL, size, PROT_NONE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); assert(ptr != (void *)-1); test_mprotect_pkey_on_unsupported_cpu(ptr, 1); exit(0);
 
            Introduce powerpc implementation for the different abstactions.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 109 ++++++++++++++++++++++---- tools/testing/selftests/vm/protection_keys.c | 40 ++++++---- 2 files changed, 118 insertions(+), 31 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index c8f5739..cfc1a18 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -18,27 +18,54 @@ #define u16 uint16_t #define u32 uint32_t #define u64 uint64_t -#define pkey_reg_t u32
-#ifdef __i386__ +#if defined(__i386__) || defined(__x86_64__) /* arch */ + +#ifdef __i386__ /* arch */ #define SYS_mprotect_key 380 -#define SYS_pkey_alloc 381 -#define SYS_pkey_free 382 +#define SYS_pkey_alloc 381 +#define SYS_pkey_free 382 #define REG_IP_IDX REG_EIP #define si_pkey_offset 0x14 -#else +#elif __x86_64__ #define SYS_mprotect_key 329 -#define SYS_pkey_alloc 330 -#define SYS_pkey_free 331 +#define SYS_pkey_alloc 330 +#define SYS_pkey_free 331 #define REG_IP_IDX REG_RIP #define si_pkey_offset 0x20 -#endif +#endif /* __x86_64__ */ + +#define NR_PKEYS 16 +#define NR_RESERVED_PKEYS 1 +#define PKEY_BITS_PER_PKEY 2 +#define PKEY_DISABLE_ACCESS 0x1 +#define PKEY_DISABLE_WRITE 0x2 +#define HPAGE_SIZE (1UL<<21) +#define pkey_reg_t u32
-#define NR_PKEYS 16 -#define PKEY_BITS_PER_PKEY 2 -#define PKEY_DISABLE_ACCESS 0x1 -#define PKEY_DISABLE_WRITE 0x2 -#define HPAGE_SIZE (1UL<<21) +#elif __powerpc64__ /* arch */ + +#define SYS_mprotect_key 386 +#define SYS_pkey_alloc 384 +#define SYS_pkey_free 385 +#define si_pkey_offset 0x20 +#define REG_IP_IDX PT_NIP +#define REG_TRAPNO PT_TRAP +#define gregs gp_regs +#define fpregs fp_regs + +#define NR_PKEYS 32 +#define NR_RESERVED_PKEYS_4K 26 +#define NR_RESERVED_PKEYS_64K 3 +#define PKEY_BITS_PER_PKEY 2 +#define PKEY_DISABLE_ACCESS 0x3 /* disable read and write */ +#define PKEY_DISABLE_WRITE 0x2 +#define HPAGE_SIZE (1UL<<24) +#define pkey_reg_t u64 + +#else /* arch */ + NOT SUPPORTED +#endif /* arch */
#ifndef DEBUG_LEVEL #define DEBUG_LEVEL 0 @@ -47,7 +74,11 @@
static inline u32 pkey_to_shift(int pkey) { +#if defined(__i386__) || defined(__x86_64__) /* arch */ return pkey * PKEY_BITS_PER_PKEY; +#elif __powerpc64__ /* arch */ + return (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY; +#endif /* arch */ }
static inline pkey_reg_t reset_bits(int pkey, pkey_reg_t bits) @@ -111,6 +142,7 @@ static inline void sigsafe_printf(const char *format, ...) extern pkey_reg_t shadow_pkey_reg; static inline pkey_reg_t __rdpkey_reg(void) { +#if defined(__i386__) || defined(__x86_64__) /* arch */ unsigned int eax, edx; unsigned int ecx = 0; pkey_reg_t pkey_reg; @@ -118,7 +150,13 @@ static inline pkey_reg_t __rdpkey_reg(void) asm volatile(".byte 0x0f,0x01,0xee\n\t" : "=a" (eax), "=d" (edx) : "c" (ecx)); - pkey_reg = eax; +#elif __powerpc64__ /* arch */ + pkey_reg_t eax; + pkey_reg_t pkey_reg; + + asm volatile("mfspr %0, 0xd" : "=r" ((pkey_reg_t)(eax))); +#endif /* arch */ + pkey_reg = (pkey_reg_t)eax; return pkey_reg; }
@@ -138,6 +176,7 @@ static inline pkey_reg_t _rdpkey_reg(int line) static inline void __wrpkey_reg(pkey_reg_t pkey_reg) { pkey_reg_t eax = pkey_reg; +#if defined(__i386__) || defined(__x86_64__) /* arch */ pkey_reg_t ecx = 0; pkey_reg_t edx = 0;
@@ -146,6 +185,14 @@ static inline void __wrpkey_reg(pkey_reg_t pkey_reg) asm volatile(".byte 0x0f,0x01,0xef\n\t" : : "a" (eax), "c" (ecx), "d" (edx)); assert(pkey_reg == __rdpkey_reg()); + +#elif __powerpc64__ /* arch */ + dprintf4("%s() changing %llx to %llx\n", + __func__, __rdpkey_reg(), pkey_reg); + asm volatile("mtspr 0xd, %0" : : "r" ((unsigned long)(eax)) : "memory"); +#endif /* arch */ + dprintf4("%s() pkey register after changing %016lx to %016lx\n", + __func__, __rdpkey_reg(), pkey_reg); }
static inline void wrpkey_reg(pkey_reg_t pkey_reg) @@ -192,6 +239,8 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write) dprintf4("pkey_reg now: %08x\n", rdpkey_reg()); }
+#if defined(__i386__) || defined(__x86_64__) /* arch */ + #define PAGE_SIZE 4096 #define MB (1<<20)
@@ -274,8 +323,18 @@ static inline void __page_o_noops(void) /* 8-bytes of instruction * 512 bytes = 1 page */ asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); } +#elif __powerpc64__ /* arch */
-#endif /* _PKEYS_HELPER_H */ +#define PAGE_SIZE (0x1UL << 16) +static inline int cpu_has_pku(void) +{ + return 1; +} + +/* 8-bytes of instruction * 16384bytes = 1 page */ +#define __page_o_noops() asm(".rept 16384 ; nop; .endr") + +#endif /* arch */
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) @@ -307,11 +366,29 @@ static inline void __page_o_noops(void)
static inline int open_hugepage_file(int flag) { - return open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", + int fd; + +#if defined(__i386__) || defined(__x86_64__) /* arch */ + fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); +#elif __powerpc64__ /* arch */ + fd = open("/sys/kernel/mm/hugepages/hugepages-16384kB/nr_hugepages", + O_RDONLY); +#else /* arch */ + NOT SUPPORTED +#endif /* arch */ + return fd; }
static inline int get_start_key(void) { +#if defined(__i386__) || defined(__x86_64__) /* arch */ return 1; +#elif __powerpc64__ /* arch */ + return 0; +#else /* arch */ + NOT SUPPORTED +#endif /* arch */ } + +#endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 6fdd8f5..55a25e1 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -186,17 +186,18 @@ void dump_mem(void *dumpme, int len_bytes)
int pkey_faults; int last_si_pkey = -1; +void pkey_access_allow(int pkey); void signal_handler(int signum, siginfo_t *si, void *vucontext) { ucontext_t *uctxt = vucontext; int trapno; unsigned long ip; char *fpregs; +#if defined(__i386__) || defined(__x86_64__) /* arch */ pkey_reg_t *pkey_reg_ptr; - u64 siginfo_pkey; +#endif /* defined(__i386__) || defined(__x86_64__) */ + u32 siginfo_pkey; u32 *si_pkey_ptr; - int pkey_reg_offset; - fpregset_t fpregset;
dprint_in_signal = 1; dprintf1(">>>>===============SIGSEGV============================\n"); @@ -206,12 +207,14 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; - fpregset = uctxt->uc_mcontext.fpregs; - fpregs = (void *)fpregset; + fpregs = (char *) uctxt->uc_mcontext.fpregs;
dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", __func__, trapno, ip, si_code_str(si->si_code), si->si_code); + +#if defined(__i386__) || defined(__x86_64__) /* arch */ + #ifdef __i386__ /* * 32-bit has some extra padding so that userspace can tell whether @@ -219,20 +222,21 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) * state. We just assume that it is here. */ fpregs += 0x70; -#endif - pkey_reg_offset = pkey_reg_xstate_offset(); - pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]); +#endif /* __i386__ */
- dprintf1("siginfo: %p\n", si); - dprintf1(" fpregs: %p\n", fpregs); + pkey_reg_ptr = (void *)(&fpregs[pkey_reg_xstate_offset()]); /* - * If we got a PKEY fault, we *HAVE* to have at least one bit set in + * If we got a key fault, we *HAVE* to have at least one bit set in * here. */ dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset()); if (DEBUG_LEVEL > 4) dump_mem(pkey_reg_ptr - 128, 256); pkey_assert(*pkey_reg_ptr); +#endif /* defined(__i386__) || defined(__x86_64__) */ + + dprintf1("siginfo: %p\n", si); + dprintf1(" fpregs: %p\n", fpregs);
si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); @@ -248,19 +252,25 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) exit(4); }
- dprintf1("signal pkey_reg from xsave: %016lx\n", *pkey_reg_ptr); /* * need __rdpkey_reg() version so we do not do shadow_pkey_reg * checking */ dprintf1("signal pkey_reg from pkey_reg: %016lx\n", __rdpkey_reg()); - dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); - *(u64 *)pkey_reg_ptr = 0x00000000; + dprintf1("pkey from siginfo: %lx\n", siginfo_pkey); +#if defined(__i386__) || defined(__x86_64__) /* arch */ + dprintf1("signal pkey_reg from xsave: %016lx\n", *pkey_reg_ptr); + *(u64 *)pkey_reg_ptr &= reset_bits(siginfo_pkey, + PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE); +#elif __powerpc64__ + pkey_access_allow(siginfo_pkey); +#endif + shadow_pkey_reg &= reset_bits(siginfo_pkey, + PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE); dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction " "to continue\n"); pkey_faults++; dprintf1("<<<<==================================================\n"); - return; }
int wait_all_children(void)
 
            When a key is freed, the key is no more effective. Clear the bits corresponding to the pkey in the shadow register. Otherwise it will carry some spurious bits which can trigger false-positive asserts.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 55a25e1..d1cbdfe 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -586,7 +586,8 @@ int sys_pkey_free(unsigned long pkey) int ret = syscall(SYS_pkey_free, pkey);
if (!ret) - shadow_pkey_reg &= reset_bits(pkey, PKEY_DISABLE_ACCESS); + shadow_pkey_reg &= reset_bits(pkey, + PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE); dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); return ret; }
 
            pkey subsystem is supported if the hardware and kernel has support. We determine that by checking if allocation of a key succeeds or not.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 22 ++++++++++++++++------ tools/testing/selftests/vm/protection_keys.c | 9 +++++---- 2 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index cfc1a18..3559527 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -261,7 +261,7 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, #define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
-static inline int cpu_has_pku(void) +static inline bool is_pkey_supported(void) { unsigned int eax; unsigned int ebx; @@ -274,13 +274,13 @@ static inline int cpu_has_pku(void)
if (!(ecx & X86_FEATURE_PKU)) { dprintf2("cpu does not have PKU\n"); - return 0; + return false; } if (!(ecx & X86_FEATURE_OSPKE)) { dprintf2("cpu does not have OSPKE\n"); - return 0; + return false; } - return 1; + return true; }
#define XSTATE_PKEY_BIT (9) @@ -326,9 +326,19 @@ static inline void __page_o_noops(void) #elif __powerpc64__ /* arch */
#define PAGE_SIZE (0x1UL << 16) -static inline int cpu_has_pku(void) +static inline bool is_pkey_supported(void) { - return 1; + /* + * No simple way to determine this. + * lets try allocating a key and see if it succeeds. + */ + int ret = sys_pkey_alloc(0, 0); + + if (ret > 0) { + sys_pkey_free(ret); + return true; + } + return false; }
/* 8-bytes of instruction * 16384bytes = 1 page */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index d1cbdfe..65e6dd6 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1299,8 +1299,8 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) int size = PAGE_SIZE; int sret;
- if (cpu_has_pku()) { - dprintf1("SKIP: %s: no CPU support\n", __func__); + if (is_pkey_supported()) { + dprintf1("SKIP: %s: no CPU/kernel support\n", __func__); return; }
@@ -1362,12 +1362,13 @@ void run_tests_once(void) int main(void) { int nr_iterations = 22; + int pkey_supported = is_pkey_supported();
setup_handlers();
- printf("has pkey: %d\n", cpu_has_pku()); + printf("has pkey: %s\n", pkey_supported ? "Yes" : "No");
- if (!cpu_has_pku()) { + if (!pkey_supported) { int size = PAGE_SIZE; int *ptr;
 
            The maximum number of keys that can be allocated has to take into consideration, that some keys are reserved by the architecture for specific purpose. Hence cannot be allocated.
Fix the assertion in test_pkey_alloc_exhaust()
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/pkey-helpers.h | 14 ++++++++++++++ tools/testing/selftests/vm/protection_keys.c | 9 ++++----- 2 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 3559527..9d06b4a 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -401,4 +401,18 @@ static inline int get_start_key(void) #endif /* arch */ }
+static inline int arch_reserved_keys(void) +{ +#if defined(__i386__) || defined(__x86_64__) /* arch */ + return NR_RESERVED_PKEYS; +#elif __powerpc64__ /* arch */ + if (sysconf(_SC_PAGESIZE) == 4096) + return NR_RESERVED_PKEYS_4K; + else + return NR_RESERVED_PKEYS_64K; +#else /* arch */ + NOT SUPPORTED +#endif /* arch */ +} + #endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 65e6dd6..33d5839 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1167,12 +1167,11 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) pkey_assert(i < NR_PKEYS*2);
/* - * There are 16 pkeys supported in hardware. One is taken - * up for the default (0) and another can be taken up by - * an execute-only mapping. Ensure that we can allocate - * at least 14 (16-2). + * There are NR_PKEYS pkeys supported in hardware. arch_reserved_keys() + * are reserved. One can be taken up by an execute-only mapping. + * Ensure that we can allocate at least the remaining. */ - pkey_assert(i >= NR_PKEYS-2); + pkey_assert(i >= (NR_PKEYS-arch_reserved_keys()-1));
for (i = 0; i < nr_allocated_pkeys; i++) { err = sys_pkey_free(allocated_pkeys[i]);
 
            detect access-violation on a page to which access-disabled key is associated much after the page is mapped.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 19 +++++++++++++++++++ 1 files changed, 19 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 33d5839..1d3f596 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1016,6 +1016,24 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey) dprintf1("*ptr: %d\n", ptr_contents); expected_pkey_fault(pkey); } + +void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, + u16 pkey) +{ + int ptr_contents; + + dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", + pkey, ptr); + ptr_contents = read_ptr(ptr); + dprintf1("reading ptr before disabling the read : %d\n", + ptr_contents); + rdpkey_reg(); + pkey_access_deny(pkey); + ptr_contents = read_ptr(ptr); + dprintf1("*ptr: %d\n", ptr_contents); + expected_pkey_fault(pkey); +} + void test_write_of_write_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); @@ -1310,6 +1328,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) void (*pkey_tests[])(int *ptr, u16 pkey) = { test_read_of_write_disabled_region, test_read_of_access_disabled_region, + test_read_of_access_disabled_region_with_page_already_mapped, test_write_of_write_disabled_region, test_write_of_access_disabled_region, test_kernel_write_of_access_disabled_region,
 
            detect write-violation on a page to which write-disabled key is associated much after the page is mapped.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 12 ++++++++++++ 1 files changed, 12 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 1d3f596..f90a8b6 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1034,6 +1034,17 @@ void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, expected_pkey_fault(pkey); }
+void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, + u16 pkey) +{ + *ptr = __LINE__; + dprintf1("disabling write access; after accessing the page, " + "to PKEY[%02d], doing write\n", pkey); + pkey_write_deny(pkey); + *ptr = __LINE__; + expected_pkey_fault(pkey); +} + void test_write_of_write_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); @@ -1330,6 +1341,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) test_read_of_access_disabled_region, test_read_of_access_disabled_region_with_page_already_mapped, test_write_of_write_disabled_region, + test_write_of_write_disabled_region_with_page_already_mapped, test_write_of_access_disabled_region, test_kernel_write_of_access_disabled_region, test_kernel_write_of_write_disabled_region,
 
            detect write-violation on a page to which access-disabled key is associated much after the page is mapped.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 13 +++++++++++++ 1 files changed, 13 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index f90a8b6..3e9e36d 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1059,6 +1059,18 @@ void test_write_of_access_disabled_region(int *ptr, u16 pkey) *ptr = __LINE__; expected_pkey_fault(pkey); } + +void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, + u16 pkey) +{ + *ptr = __LINE__; + dprintf1("disabling access; after accessing the page, " + " to PKEY[%02d], doing write\n", pkey); + pkey_access_deny(pkey); + *ptr = __LINE__; + expected_pkey_fault(pkey); +} + void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) { int ret; @@ -1343,6 +1355,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) test_write_of_write_disabled_region, test_write_of_write_disabled_region_with_page_already_mapped, test_write_of_access_disabled_region, + test_write_of_access_disabled_region_with_page_already_mapped, test_kernel_write_of_access_disabled_region, test_kernel_write_of_write_disabled_region, test_kernel_gup_of_access_disabled_region,
 
            Generally the signal handler restores the state of the pkey register before returning. However there are times when the read/write operation can legitamely fail without invoking the signal handler. Eg: A sys_read() operaton to a write-protected page should be disallowed. In such a case the state of the pkey register is not restored to its original state. The test case is responsible for restoring the key register state to its original value.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 5 +++++ 1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 3e9e36d..5783587 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -1003,6 +1003,7 @@ void test_read_of_write_disabled_region(int *ptr, u16 pkey) ptr_contents = read_ptr(ptr); dprintf1("*ptr: %d\n", ptr_contents); dprintf1("\n"); + pkey_write_allow(pkey); } void test_read_of_access_disabled_region(int *ptr, u16 pkey) { @@ -1082,6 +1083,7 @@ void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) ret = read(test_fd, ptr, 1); dprintf1("read ret: %d\n", ret); pkey_assert(ret); + pkey_access_allow(pkey); } void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) { @@ -1094,6 +1096,7 @@ void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) if (ret < 0 && (DEBUG_LEVEL > 0)) perror("verbose read result (OK for this to be bad)"); pkey_assert(ret); + pkey_write_allow(pkey); }
void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) @@ -1113,6 +1116,7 @@ void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); dprintf1("vmsplice() ret: %d\n", vmsplice_ret); pkey_assert(vmsplice_ret == -1); + pkey_access_allow(pkey);
close(pipe_fds[0]); close(pipe_fds[1]); @@ -1133,6 +1137,7 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) if (DEBUG_LEVEL > 0) perror("futex"); dprintf1("futex() ret: %d\n", futex_ret); + pkey_write_allow(pkey); }
/* Assumes that all pkeys other than 'pkey' are unallocated */
 
            introduce a new allocator that allocates 4k hardware-pages to back 64k linux-page. This allocator is only applicable on powerpc.
Signed-off-by: Ram Pai linuxram@us.ibm.com --- tools/testing/selftests/vm/protection_keys.c | 30 ++++++++++++++++++++++++++ 1 files changed, 30 insertions(+), 0 deletions(-)
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 5783587..ae71dad 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -766,6 +766,35 @@ void free_pkey_malloc(void *ptr) return ptr; }
+void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +{ +#ifdef __powerpc64__ + void *ptr; + int ret; + + dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, + size, prot, pkey); + pkey_assert(pkey < NR_PKEYS); + ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + pkey_assert(ptr != (void *)-1); + + ret = syscall(__NR_subpage_prot, ptr, size, NULL); + if (ret) { + perror("subpage_perm"); + return PTR_ERR_ENOTSUP; + } + + ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); + pkey_assert(!ret); + record_pkey_malloc(ptr, size); + + dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); + return ptr; +#else /* __powerpc64__ */ + return PTR_ERR_ENOTSUP; +#endif /* __powerpc64__ */ +} + void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) { int ret; @@ -888,6 +917,7 @@ void setup_hugetlbfs(void) void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
malloc_pkey_with_mprotect, + malloc_pkey_with_mprotect_subpage, malloc_pkey_anon_huge, malloc_pkey_hugetlb /* can not do direct with the pkey_mprotect() API:
 
            From: Thiago Jung Bauermann bauerman@linux.vnet.ibm.com
The sig_chld() handler calls dprintf2() taking care of setting dprint_in_signal so that sigsafe_printf() won't call printf(). Unfortunately, this precaution is is negated by dprintf_level(), which has a call to fflush().
This function acquires a lock, which means that if the signal interrupts an ongoing fflush() the process will deadlock. At least on powerpc this is easy to trigger, resulting in the following backtrace when attaching to the frozen process:
(gdb) bt #0 0x00007fff9f96c7d8 in __lll_lock_wait_private () from /lib64/power8/libc.so.6 #1 0x00007fff9f8cba4c in _IO_flush_all_lockp () from /lib64/power8/libc.so.6 #2 0x00007fff9f8cbd1c in __GI__IO_flush_all () from /lib64/power8/libc.so.6 #3 0x00007fff9f8b7424 in fflush () from /lib64/power8/libc.so.6 #4 0x00000000100504f8 in sig_chld (x=17) at protection_keys.c:283 #5 <signal handler called> #6 0x00007fff9f8cb8ac in _IO_flush_all_lockp () from /lib64/power8/libc.so.6 #7 0x00007fff9f8cbd1c in __GI__IO_flush_all () from /lib64/power8/libc.so.6 #8 0x00007fff9f8b7424 in fflush () from /lib64/power8/libc.so.6 #9 0x0000000010050b50 in pkey_get (pkey=7, flags=0) at protection_keys.c:379 #10 0x0000000010050dc0 in pkey_disable_set (pkey=7, flags=2) at protection_keys.c:423 #11 0x0000000010051414 in pkey_write_deny (pkey=7) at protection_keys.c:486 #12 0x00000000100556bc in test_ptrace_of_child (ptr=0x7fff9f7f0000, pkey=7) at protection_keys.c:1288 #13 0x0000000010055f60 in run_tests_once () at protection_keys.c:1414 #14 0x00000000100561a4 in main () at protection_keys.c:1459
The fix is to refrain from calling fflush() when inside a signal handler. The output may not be as pretty but at least the testcase will be able to move on.
Signed-off-by: Ram Pai linuxram@us.ibm.com Signed-off-by: Thiago Jung Bauermann bauerman@linux.vnet.ibm.com
tools/testing/selftests/vm/pkey-helpers.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) --- tools/testing/selftests/vm/pkey-helpers.h | 3 ++- 1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h index 9d06b4a..965cfcd 100644 --- a/tools/testing/selftests/vm/pkey-helpers.h +++ b/tools/testing/selftests/vm/pkey-helpers.h @@ -131,7 +131,8 @@ static inline void sigsafe_printf(const char *format, ...) #define dprintf_level(level, args...) do { \ if (level <= DEBUG_LEVEL) \ sigsafe_printf(args); \ - fflush(NULL); \ + if (!dprint_in_signal) \ + fflush(NULL); \ } while (0) #define dprintf0(args...) dprintf_level(0, args) #define dprintf1(args...) dprintf_level(1, args)
 
            From: Thiago Jung Bauermann bauerman@linux.vnet.ibm.com
This test exercises read and write access to the AMR.
Signed-off-by: Thiago Jung Bauermann bauerman@linux.vnet.ibm.com --- tools/testing/selftests/powerpc/include/reg.h | 1 + tools/testing/selftests/powerpc/ptrace/Makefile | 5 +- .../testing/selftests/powerpc/ptrace/ptrace-pkey.c | 443 ++++++++++++++++++++ 3 files changed, 448 insertions(+), 1 deletions(-) create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index 4afdebc..7f348c0 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -54,6 +54,7 @@ #define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */ #define SPRN_DSCR 0x03 /* Data Stream Control Register */ #define SPRN_PPR 896 /* Program Priority Register */ +#define SPRN_AMR 13 /* Authority Mask Register - problem state */
/* TEXASR register bits */ #define TEXASR_FC 0xFE00000000000000 diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 4803052..fd896b2 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ - ptrace-tm-spd-vsx ptrace-tm-spr + ptrace-tm-spd-vsx ptrace-tm-spr ptrace-pkey
include ../../lib.mk
@@ -9,6 +9,9 @@ all: $(TEST_PROGS)
CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
+ptrace-pkey: ../harness.c ../utils.c ../lib/reg.S ptrace.h ptrace-pkey.c + $(LINK.c) $^ $(LDLIBS) -pthread -o $@ + $(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
clean: diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c new file mode 100644 index 0000000..2e5b676 --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -0,0 +1,443 @@ +/* + * Ptrace test for Memory Protection Key registers + * + * Copyright (C) 2015 Anshuman Khandual, IBM Corporation. + * Copyright (C) 2017 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <semaphore.h> +#include "ptrace.h" + +#ifndef __NR_pkey_alloc +#define __NR_pkey_alloc 384 +#endif + +#ifndef __NR_pkey_free +#define __NR_pkey_free 385 +#endif + +#ifndef NT_PPC_PKEY +#define NT_PPC_PKEY 0x110 +#endif + +#ifndef PKEY_DISABLE_EXECUTE +#define PKEY_DISABLE_EXECUTE 0x4 +#endif + +#define AMR_BITS_PER_PKEY 2 +#define PKEY_REG_BITS (sizeof(u64) * 8) +#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) + +static const char user_read[] = "[User Read (Running)]"; +static const char user_write[] = "[User Write (Running)]"; +static const char ptrace_read_running[] = "[Ptrace Read (Running)]"; +static const char ptrace_write_running[] = "[Ptrace Write (Running)]"; + +/* Information shared between the parent and the child. */ +struct shared_info { + /* AMR value the parent expects to read from the child. */ + unsigned long amr1; + + /* AMR value the parent is expected to write to the child. */ + unsigned long amr2; + + /* AMR value that ptrace should refuse to write to the child. */ + unsigned long amr3; + + /* IAMR value the parent expects to read from the child. */ + unsigned long expected_iamr; + + /* UAMOR value the parent expects to read from the child. */ + unsigned long expected_uamor; + + /* + * IAMR and UAMOR values that ptrace should refuse to write to the child + * (even though they're valid ones) because userspace doesn't have + * access to those registers. + */ + unsigned long new_iamr; + unsigned long new_uamor; + + /* The parent waits on this semaphore. */ + sem_t sem_parent; + + /* If true, the child should give up as well. */ + bool parent_gave_up; + + /* The child waits on this semaphore. */ + sem_t sem_child; + + /* If true, the parent should give up as well. */ + bool child_gave_up; +}; + +#define CHILD_FAIL_IF(x, info) \ + do { \ + if ((x)) { \ + fprintf(stderr, \ + "[FAIL] Test FAILED on line %d\n", __LINE__); \ + (info)->child_gave_up = true; \ + prod_parent(info); \ + return 1; \ + } \ + } while (0) + +#define PARENT_FAIL_IF(x, info) \ + do { \ + if ((x)) { \ + fprintf(stderr, \ + "[FAIL] Test FAILED on line %d\n", __LINE__); \ + (info)->parent_gave_up = true; \ + prod_child(info); \ + return 1; \ + } \ + } while (0) + +static int wait_child(struct shared_info *info) +{ + int ret; + + /* Wait until the child prods us. */ + ret = sem_wait(&info->sem_parent); + if (ret) { + perror("Error waiting for child"); + return TEST_FAIL; + } + + return info->child_gave_up ? TEST_FAIL : TEST_PASS; +} + +static int prod_child(struct shared_info *info) +{ + int ret; + + /* Unblock the child now. */ + ret = sem_post(&info->sem_child); + if (ret) { + perror("Error prodding child"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +static int wait_parent(struct shared_info *info) +{ + int ret; + + /* Wait until the parent prods us. */ + ret = sem_wait(&info->sem_child); + if (ret) { + perror("Error waiting for parent"); + return TEST_FAIL; + } + + return info->parent_gave_up ? TEST_FAIL : TEST_PASS; +} + +static int prod_parent(struct shared_info *info) +{ + int ret; + + /* Unblock the parent now. */ + ret = sem_post(&info->sem_parent); + if (ret) { + perror("Error prodding parent"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) +{ + return syscall(__NR_pkey_alloc, flags, init_access_rights); +} + +static int sys_pkey_free(int pkey) +{ + return syscall(__NR_pkey_free, pkey); +} + +static int ptrace_read_regs(pid_t child, unsigned long regs[], int n) +{ + struct iovec iov; + long ret; + + FAIL_IF(start_trace(child)); + + iov.iov_base = regs; + iov.iov_len = n * sizeof(unsigned long); + + ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_PKEY, &iov); + FAIL_IF(ret != 0); + + FAIL_IF(stop_trace(child)); + + return TEST_PASS; +} + +static long ptrace_write_regs(pid_t child, unsigned long regs[], int n) +{ + struct iovec iov; + long ret; + + FAIL_IF(start_trace(child)); + + iov.iov_base = regs; + iov.iov_len = n * sizeof(unsigned long); + + ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_PKEY, &iov); + + FAIL_IF(stop_trace(child)); + + return ret; +} + +static int child(struct shared_info *info) +{ + unsigned long reg; + bool disable_execute = true; + int pkey1, pkey2, pkey3; + int ret; + + /* Get some pkeys so that we can change their bits in the AMR. */ + pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE); + if (pkey1 < 0) { + pkey1 = sys_pkey_alloc(0, 0); + CHILD_FAIL_IF(pkey1 < 0, info); + + disable_execute = false; + } + + pkey2 = sys_pkey_alloc(0, 0); + CHILD_FAIL_IF(pkey2 < 0, info); + + pkey3 = sys_pkey_alloc(0, 0); + CHILD_FAIL_IF(pkey3 < 0, info); + + info->amr1 = 3ul << pkeyshift(pkey1); + info->amr2 = 3ul << pkeyshift(pkey2); + info->amr3 = info->amr2 | 3ul << pkeyshift(pkey3); + + if (disable_execute) + info->expected_iamr = 1ul << pkeyshift(pkey1); + else + info->expected_iamr = 0; + + info->expected_uamor = 3ul << pkeyshift(pkey1) | + 3ul << pkeyshift(pkey2); + info->new_iamr = 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2); + info->new_uamor = 3ul << pkeyshift(pkey1); + + /* + * We won't use pkey3. We just want a plausible but invalid key to test + * whether ptrace will let us write to AMR bits we are not supposed to. + * + * This also tests whether the kernel restores the UAMOR permissions + * after a key is freed. + */ + sys_pkey_free(pkey3); + + printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n", + user_write, info->amr1, pkey1, pkey2, pkey3); + + mtspr(SPRN_AMR, info->amr1); + + /* Wait for parent to read our AMR value and write a new one. */ + ret = prod_parent(info); + CHILD_FAIL_IF(ret, info); + + ret = wait_parent(info); + if (ret) + return ret; + + reg = mfspr(SPRN_AMR); + + printf("%-30s AMR: %016lx\n", user_read, reg); + + CHILD_FAIL_IF(reg != info->amr2, info); + + /* + * Wait for parent to try to write an invalid AMR value. + */ + ret = prod_parent(info); + CHILD_FAIL_IF(ret, info); + + ret = wait_parent(info); + if (ret) + return ret; + + reg = mfspr(SPRN_AMR); + + printf("%-30s AMR: %016lx\n", user_read, reg); + + CHILD_FAIL_IF(reg != info->amr2, info); + + /* + * Wait for parent to try to write an IAMR and a UAMOR value. We can't + * verify them, but we can verify that the AMR didn't change. + */ + ret = prod_parent(info); + CHILD_FAIL_IF(ret, info); + + ret = wait_parent(info); + if (ret) + return ret; + + reg = mfspr(SPRN_AMR); + + printf("%-30s AMR: %016lx\n", user_read, reg); + + CHILD_FAIL_IF(reg != info->amr2, info); + + /* Now let parent now that we are finished. */ + + ret = prod_parent(info); + CHILD_FAIL_IF(ret, info); + + return TEST_PASS; +} + +static int parent(struct shared_info *info, pid_t pid) +{ + unsigned long regs[4]; + int ret, status; + + ret = wait_child(info); + if (ret) + return ret; + + /* Verify that we can read the pkey registers from the child. */ + ret = ptrace_read_regs(pid, regs, 3); + PARENT_FAIL_IF(ret, info); + + printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n", + ptrace_read_running, regs[0], regs[1], regs[2]); + + PARENT_FAIL_IF(regs[0] != info->amr1, info); + PARENT_FAIL_IF(regs[1] != info->expected_iamr, info); + PARENT_FAIL_IF(regs[2] != info->expected_uamor, info); + + /* Write valid AMR value in child. */ + ret = ptrace_write_regs(pid, &info->amr2, 1); + PARENT_FAIL_IF(ret, info); + + printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr2); + + /* Wake up child so that it can verify it changed. */ + ret = prod_child(info); + PARENT_FAIL_IF(ret, info); + + ret = wait_child(info); + if (ret) + return ret; + + /* Write invalid AMR value in child. */ + ret = ptrace_write_regs(pid, &info->amr3, 1); + PARENT_FAIL_IF(ret, info); + + printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3); + + /* Wake up child so that it can verify it didn't change. */ + ret = prod_child(info); + PARENT_FAIL_IF(ret, info); + + ret = wait_child(info); + if (ret) + return ret; + + /* Try to write to IAMR. */ + regs[0] = info->amr1; + regs[1] = info->new_iamr; + ret = ptrace_write_regs(pid, regs, 2); + PARENT_FAIL_IF(!ret, info); + + printf("%-30s AMR: %016lx IAMR: %016lx\n", + ptrace_write_running, regs[0], regs[1]); + + /* Try to write to IAMR and UAMOR. */ + regs[2] = info->new_uamor; + ret = ptrace_write_regs(pid, regs, 3); + PARENT_FAIL_IF(!ret, info); + + printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n", + ptrace_write_running, regs[0], regs[1], regs[2]); + + /* Verify that all registers still have their expected values. */ + ret = ptrace_read_regs(pid, regs, 3); + PARENT_FAIL_IF(ret, info); + + printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n", + ptrace_read_running, regs[0], regs[1], regs[2]); + + PARENT_FAIL_IF(regs[0] != info->amr2, info); + PARENT_FAIL_IF(regs[1] != info->expected_iamr, info); + PARENT_FAIL_IF(regs[2] != info->expected_uamor, info); + + /* Wake up child so that it can verify AMR didn't change and wrap up. */ + ret = prod_child(info); + PARENT_FAIL_IF(ret, info); + + ret = wait(&status); + if (ret != pid) { + printf("Child's exit status not captured\n"); + ret = TEST_PASS; + } else if (!WIFEXITED(status)) { + printf("Child exited abnormally\n"); + ret = TEST_FAIL; + } else + ret = WEXITSTATUS(status) ? TEST_FAIL : TEST_PASS; + + return ret; +} + +static int ptrace_pkey(void) +{ + struct shared_info *info; + int shm_id; + int ret; + pid_t pid; + + shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT); + info = shmat(shm_id, NULL, 0); + + ret = sem_init(&info->sem_parent, 1, 0); + if (ret) { + perror("Semaphore initialization failed"); + return TEST_FAIL; + } + ret = sem_init(&info->sem_child, 1, 0); + if (ret) { + perror("Semaphore initialization failed"); + return TEST_FAIL; + } + + pid = fork(); + if (pid < 0) { + perror("fork() failed"); + ret = TEST_FAIL; + } else if (pid == 0) + ret = child(info); + else + ret = parent(info, pid); + + shmdt(info); + + if (pid) { + sem_destroy(&info->sem_parent); + sem_destroy(&info->sem_child); + shmctl(shm_id, IPC_RMID, NULL); + } + + return ret; +} + +int main(int argc, char *argv[]) +{ + return test_harness(ptrace_pkey, "ptrace_pkey"); +}
 
            From: Thiago Jung Bauermann bauerman@linux.vnet.ibm.com
This test verifies that the AMR is being written to a process' core file.
Signed-off-by: Thiago Jung Bauermann bauerman@linux.vnet.ibm.com --- tools/testing/selftests/powerpc/ptrace/Makefile | 2 +- tools/testing/selftests/powerpc/ptrace/core-pkey.c | 438 ++++++++++++++++++++ 2 files changed, 439 insertions(+), 1 deletions(-) create mode 100644 tools/testing/selftests/powerpc/ptrace/core-pkey.c
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index fd896b2..ca25fda 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ - ptrace-tm-spd-vsx ptrace-tm-spr ptrace-pkey + ptrace-tm-spd-vsx ptrace-tm-spr ptrace-pkey core-pkey
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c new file mode 100644 index 0000000..2328f8c --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -0,0 +1,438 @@ +/* + * Ptrace test for Memory Protection Key registers + * + * Copyright (C) 2015 Anshuman Khandual, IBM Corporation. + * Copyright (C) 2017 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <limits.h> +#include <semaphore.h> +#include <linux/kernel.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <unistd.h> +#include "ptrace.h" + +#ifndef __NR_pkey_alloc +#define __NR_pkey_alloc 384 +#endif + +#ifndef __NR_pkey_free +#define __NR_pkey_free 385 +#endif + +#ifndef NT_PPC_PKEY +#define NT_PPC_PKEY 0x110 +#endif + +#ifndef PKEY_DISABLE_EXECUTE +#define PKEY_DISABLE_EXECUTE 0x4 +#endif + +#define AMR_BITS_PER_PKEY 2 +#define PKEY_REG_BITS (sizeof(u64) * 8) +#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY)) + +#define CORE_FILE_LIMIT (5 * 1024 * 1024) /* 5 MB should be enough */ + +static const char core_pattern_file[] = "/proc/sys/kernel/core_pattern"; + +static const char user_write[] = "[User Write (Running)]"; +static const char core_read_running[] = "[Core Read (Running)]"; + +/* Information shared between the parent and the child. */ +struct shared_info { + /* AMR value the parent expects to read in the core file. */ + unsigned long amr; + + /* IAMR value the parent expects to read from the child. */ + unsigned long iamr; + + /* UAMOR value the parent expects to read from the child. */ + unsigned long uamor; + + /* When the child crashed. */ + time_t core_time; +}; + +static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights) +{ + return syscall(__NR_pkey_alloc, flags, init_access_rights); +} + +static int sys_pkey_free(int pkey) +{ + return syscall(__NR_pkey_free, pkey); +} + +static int increase_core_file_limit(void) +{ + struct rlimit rlim; + int ret; + + ret = getrlimit(RLIMIT_CORE, &rlim); + FAIL_IF(ret); + + if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) { + rlim.rlim_cur = CORE_FILE_LIMIT; + + if (rlim.rlim_max != RLIM_INFINITY && + rlim.rlim_max < CORE_FILE_LIMIT) + rlim.rlim_max = CORE_FILE_LIMIT; + + ret = setrlimit(RLIMIT_CORE, &rlim); + FAIL_IF(ret); + } + + ret = getrlimit(RLIMIT_FSIZE, &rlim); + FAIL_IF(ret); + + if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) { + rlim.rlim_cur = CORE_FILE_LIMIT; + + if (rlim.rlim_max != RLIM_INFINITY && + rlim.rlim_max < CORE_FILE_LIMIT) + rlim.rlim_max = CORE_FILE_LIMIT; + + ret = setrlimit(RLIMIT_FSIZE, &rlim); + FAIL_IF(ret); + } + + return TEST_PASS; +} + +static int child(struct shared_info *info) +{ + bool disable_execute = true; + int pkey1, pkey2, pkey3; + int *ptr, ret; + + ret = increase_core_file_limit(); + FAIL_IF(ret); + + /* Get some pkeys so that we can change their bits in the AMR. */ + pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE); + if (pkey1 < 0) { + pkey1 = sys_pkey_alloc(0, 0); + FAIL_IF(pkey1 < 0); + + disable_execute = false; + } + + pkey2 = sys_pkey_alloc(0, 0); + FAIL_IF(pkey2 < 0); + + pkey3 = sys_pkey_alloc(0, 0); + FAIL_IF(pkey3 < 0); + + info->amr = 3ul << pkeyshift(pkey1) | 2ul << pkeyshift(pkey2); + + if (disable_execute) + info->iamr = 1ul << pkeyshift(pkey1); + else + info->iamr = 0; + + info->uamor = 3ul << pkeyshift(pkey1) | 3ul << pkeyshift(pkey2); + + printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n", + user_write, info->amr, pkey1, pkey2, pkey3); + + mtspr(SPRN_AMR, info->amr); + + /* + * We won't use pkey3. This tests whether the kernel restores the UAMOR + * permissions after a key is freed. + */ + sys_pkey_free(pkey3); + + info->core_time = time(NULL); + + /* Crash. */ + ptr = 0; + *ptr = 1; + + /* Shouldn't get here. */ + FAIL_IF(true); + + return TEST_FAIL; +} + +/* Return file size if filename exists and pass sanity check, or zero if not. */ +static off_t try_core_file(const char *filename, struct shared_info *info, + pid_t pid) +{ + struct stat buf; + int ret; + + ret = stat(filename, &buf); + if (ret == -1) + return TEST_FAIL; + + /* Make sure we're not using a stale core file. */ + return buf.st_mtime >= info->core_time ? buf.st_size : TEST_FAIL; +} + +static Elf64_Nhdr *next_note(Elf64_Nhdr *nhdr) +{ + return (void *) nhdr + sizeof(*nhdr) + + __ALIGN_KERNEL(nhdr->n_namesz, 4) + + __ALIGN_KERNEL(nhdr->n_descsz, 4); +} + +static int check_core_file(struct shared_info *info, Elf64_Ehdr *ehdr, + off_t core_size) +{ + unsigned long *regs; + Elf64_Phdr *phdr; + Elf64_Nhdr *nhdr; + size_t phdr_size; + void *p = ehdr, *note; + int ret; + + ret = memcmp(ehdr->e_ident, ELFMAG, SELFMAG); + FAIL_IF(ret); + + FAIL_IF(ehdr->e_type != ET_CORE); + FAIL_IF(ehdr->e_machine != EM_PPC64); + FAIL_IF(ehdr->e_phoff == 0 || ehdr->e_phnum == 0); + + /* + * e_phnum is at most 65535 so calculating the size of the + * program header cannot overflow. + */ + phdr_size = sizeof(*phdr) * ehdr->e_phnum; + + /* Sanity check the program header table location. */ + FAIL_IF(ehdr->e_phoff + phdr_size < ehdr->e_phoff); + FAIL_IF(ehdr->e_phoff + phdr_size > core_size); + + /* Find the PT_NOTE segment. */ + for (phdr = p + ehdr->e_phoff; + (void *) phdr < p + ehdr->e_phoff + phdr_size; + phdr += ehdr->e_phentsize) + if (phdr->p_type == PT_NOTE) + break; + + FAIL_IF((void *) phdr >= p + ehdr->e_phoff + phdr_size); + + /* Find the NT_PPC_PKEY note. */ + for (nhdr = p + phdr->p_offset; + (void *) nhdr < p + phdr->p_offset + phdr->p_filesz; + nhdr = next_note(nhdr)) + if (nhdr->n_type == NT_PPC_PKEY) + break; + + FAIL_IF((void *) nhdr >= p + phdr->p_offset + phdr->p_filesz); + FAIL_IF(nhdr->n_descsz == 0); + + p = nhdr; + note = p + sizeof(*nhdr) + __ALIGN_KERNEL(nhdr->n_namesz, 4); + + regs = (unsigned long *) note; + + printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n", + core_read_running, regs[0], regs[1], regs[2]); + + FAIL_IF(regs[0] != info->amr); + FAIL_IF(regs[1] != info->iamr); + FAIL_IF(regs[2] != info->uamor); + + return TEST_PASS; +} + +static int parent(struct shared_info *info, pid_t pid) +{ + char *filenames, *filename[3]; + int fd, i, ret, status; + off_t core_size; + void *core; + + ret = wait(&status); + if (ret != pid) { + printf("Child's exit status not captured\n"); + return TEST_FAIL; + } else if (!WIFSIGNALED(status) || !WCOREDUMP(status)) { + printf("Child didn't dump core\n"); + return TEST_FAIL; + } + + /* Construct array of core file names to try. */ + + filename[0] = filenames = malloc(PATH_MAX); + if (!filenames) { + perror("Error allocating memory"); + return TEST_FAIL; + } + + ret = snprintf(filename[0], PATH_MAX, "core-pkey.%d", pid); + if (ret < 0 || ret >= PATH_MAX) { + ret = TEST_FAIL; + goto out; + } + + filename[1] = filename[0] + ret + 1; + ret = snprintf(filename[1], PATH_MAX - ret - 1, "core.%d", pid); + if (ret < 0 || ret >= PATH_MAX - ret - 1) { + ret = TEST_FAIL; + goto out; + } + filename[2] = "core"; + + for (i = 0; i < 3; i++) { + core_size = try_core_file(filename[i], info, pid); + if (core_size != TEST_FAIL) + break; + } + + if (i == 3) { + printf("Couldn't find core file\n"); + ret = TEST_FAIL; + goto out; + } + + fd = open(filename[i], O_RDONLY); + if (fd == -1) { + perror("Error opening core file"); + ret = TEST_FAIL; + goto out; + } + + core = mmap(NULL, core_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (core == (void *) -1) { + perror("Error mmaping core file"); + ret = TEST_FAIL; + goto out; + } + + ret = check_core_file(info, core, core_size); + + munmap(core, core_size); + close(fd); + unlink(filename[i]); + + out: + free(filenames); + + return ret; +} + +static int write_core_pattern(const char *core_pattern) +{ + size_t len = strlen(core_pattern), ret; + FILE *f; + + f = fopen(core_pattern_file, "w"); + if (!f) { + perror("Error writing to core_pattern file"); + return TEST_FAIL; + } + + ret = fwrite(core_pattern, 1, len, f); + fclose(f); + if (ret != len) { + perror("Error writing to core_pattern file"); + return TEST_FAIL; + } + + return TEST_PASS; +} + +static int setup_core_pattern(char **core_pattern_, bool *changed_) +{ + FILE *f; + char *core_pattern; + int ret; + + core_pattern = malloc(PATH_MAX); + if (!core_pattern) { + perror("Error allocating memory"); + return TEST_FAIL; + } + + f = fopen(core_pattern_file, "r"); + if (!f) { + perror("Error opening core_pattern file"); + ret = TEST_FAIL; + goto out; + } + + ret = fread(core_pattern, 1, PATH_MAX, f); + fclose(f); + if (!ret) { + perror("Error reading core_pattern file"); + ret = TEST_FAIL; + goto out; + } + + /* Check whether we can predict the name of the core file. */ + if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p")) + *changed_ = false; + else { + ret = write_core_pattern("core-pkey.%p"); + if (ret) + goto out; + + *changed_ = true; + } + + *core_pattern_ = core_pattern; + ret = TEST_PASS; + + out: + if (ret) + free(core_pattern); + + return ret; +} + +static int core_pkey(void) +{ + char *core_pattern; + bool changed_core_pattern; + struct shared_info *info; + int shm_id; + int ret; + pid_t pid; + + ret = setup_core_pattern(&core_pattern, &changed_core_pattern); + if (ret) + return ret; + + shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT); + info = shmat(shm_id, NULL, 0); + + pid = fork(); + if (pid < 0) { + perror("fork() failed"); + ret = TEST_FAIL; + } else if (pid == 0) + ret = child(info); + else + ret = parent(info, pid); + + shmdt(info); + + if (pid) { + shmctl(shm_id, IPC_RMID, NULL); + + if (changed_core_pattern) + write_core_pattern(core_pattern); + } + + free(core_pattern); + + return ret; +} + +int main(int argc, char *argv[]) +{ + return test_harness(core_pkey, "core_pkey"); +}
linux-kselftest-mirror@lists.linaro.org
