On Thu, 22 Dec 2022 05:34:52 +0100, Willy Tarreau wrote:
This one relies on /proc/self/auxv, but we'll quickly run into a chicken-and-egg situation given that nolibc is used by init programs that mount /proc. Instead I think that we should modify the _start code to retrieve the auxv at startup and store it somewhere. This "somewhere" is not yet defined, but I'm thinking that it could deserve reserving some room in the stack to store some nolibc-defined information (possibly even a copy of a pointer to environ and/or errno) and figure a reliable and simple way to access this. Note that one way could also be to know that it's after the NULL that follows envp, and to start from environ. In fact there are plenty of ways and we just need to decide what's the least ugly. But once we have access to the auxv from the process, then we could implement a getauxval() function to retrieve the information you need for getpagesize().
Thanks for the great feedback!
I agree with following the @envp pointer to get the auxv. I was trying to wire up a new function '__start' (with double underscores) written in C that accepts @argc, @argv and @envp. Then it calls 'main'. Then we call '__start' instead of 'main' from '_start'. This way, we can arrange nolibc-defined data without touching Assembly much in '__start' (before main).
But then I noticed that it wouldn't work because we may have users who define the 'main' function differently, e.g.:
int main(void); int main(int argc, char **argv); int main(int argc, char **argv, char **envp);
So '__start' can't call main. We still need to call the main from the inline Assembly (from '_start').
Just a quick dirty patch to get getauxval() works on x86-64 below. This needs more work, but at least something like this for starting:
$ ./nolibc-test Running test 'syscall' AT_SYSINFO_EHDR = 140737354125312 AT_HWCAP = 3219913727 AT_PAGESZ = 4096 AT_CLKTCK = 100 AT_PHDR = 4194368 AT_PHENT = 56 AT_PHNUM = 9 AT_BASE = 0 AT_FLAGS = 0 AT_ENTRY = 4199128 AT_UID = 0 AT_EUID = 0 AT_GID = 0 AT_EGID = 0 AT_SECURE = 0 AT_RANDOM = 140737488349065 AT_EXECFN = 140737488351210 AT_PLATFORM = 140737488349081
I'll probably only spare more intensive time on this after the holiday season ends. I still check in email, but the activity will be sparse.
Signed-off-by: [Do not commit] ---
base-commit: caf5c36025ec9395c8d7c78957b016a284812d23 ("srcu: Update comment after the index flip") diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h index 0e1e9eb8545d..5ee945370ff5 100644 --- a/tools/include/nolibc/arch-x86_64.h +++ b/tools/include/nolibc/arch-x86_64.h @@ -199,17 +199,49 @@ struct sys_stat_struct { */ __asm__ (".section .text\n" ".weak _start\n" "_start:\n" "pop %rdi\n" // argc (first arg, %rdi) "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) "xor %ebp, %ebp\n" // zero the stack frame "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call + "push %rdi\n" // Save argc + "push %rsi\n" // Save arg + "push %rdx\n" // Save envp. + "push %rcx\n" // Keep the 16-byte alignment + "call __start\n" // Save environ and auxv + "pop %rcx\n" // Restore alignment + "pop %rdx\n" // Restore envp + "pop %rsi\n" // Restore argv + "pop %rdi\n" // Restore argc "call main\n" // main() returns the status code, we'll exit with it. "mov %eax, %edi\n" // retrieve exit code (32 bit) "mov $60, %eax\n" // NR_exit == 60 "syscall\n" // really exit "hlt\n" // ensure it does not return "");
+struct __nolibc_internal { + char **envp; + unsigned long *auxv; +}; + +static struct __nolibc_internal __nolibc_internal; + +/* + * Mark this __used__ to avoid being optimized away. + * Reason: Called from inline Assembly. + */ +static __attribute__((__used__)) +void __start(int argc, char **argv, char **envp) +{ + char **p = envp; + + while (*p) + p++; + + __nolibc_internal.auxv = (unsigned long *)++p; + __nolibc_internal.envp = envp; +} + #endif // _NOLIBC_ARCH_X86_64_H diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 3db1dd8c74ee..04bff724e056 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -12,18 +12,19 @@
/* system includes */ #include <asm/unistd.h> #include <asm/signal.h> // for SIGCHLD #include <asm/ioctls.h> #include <asm/mman.h> #include <linux/fs.h> #include <linux/loop.h> #include <linux/time.h> +#include <linux/auxvec.h>
#include "arch.h" #include "errno.h" #include "types.h"
/* Functions in this file only describe syscalls. They're declared static so * that the compiler usually decides to inline them while still being allowed * to pass a pointer to one of their instances. Each syscall exists in two @@ -379,18 +380,49 @@ int fsync(int fd)
if (ret < 0) { SET_ERRNO(-ret); ret = -1; } return ret; }
+/* + * On success, getauxval() returns the value corresponding to type. + * If type is not found, 0 is returned. + * + * unsigned long getauxval(unsigned long type); + */ + +static __attribute__((unused)) +unsigned long getauxval(unsigned long type) +{ + unsigned long *auxv = __nolibc_internal.auxv; + + if (__builtin_expect(!auxv, 0)) + return 0; + + while (1) { + if (!auxv[0] && !auxv[1]) + /* + * We've reached the end of auxv. + */ + return 0; + + if (auxv[0] == type) + return auxv[1]; + + auxv += 2; + } + __builtin_unreachable(); +} + + /* * int getdents64(int fd, struct linux_dirent64 *dirp, int count); */
static __attribute__((unused)) int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count) { return my_syscall3(__NR_getdents64, fd, dirp, count); } diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index f14f5076fb6d..bca29a952c69 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -436,30 +436,60 @@ int test_getdents64(const char *dir)
ret = getdents64(fd, (void *)buffer, sizeof(buffer)); err = errno; close(fd);
errno = err; return ret; }
+static void test_getauxval(void) +{ + #define PRINT_AUXVAL(KEY) \ + do { \ + printf(#KEY " = %lu\n", getauxval(KEY)); \ + } while (0) + + PRINT_AUXVAL(AT_SYSINFO_EHDR); + PRINT_AUXVAL(AT_HWCAP); + PRINT_AUXVAL(AT_PAGESZ); + PRINT_AUXVAL(AT_CLKTCK); + PRINT_AUXVAL(AT_PHDR); + PRINT_AUXVAL(AT_PHENT); + PRINT_AUXVAL(AT_PHNUM); + PRINT_AUXVAL(AT_BASE); + PRINT_AUXVAL(AT_FLAGS); + PRINT_AUXVAL(AT_ENTRY); + PRINT_AUXVAL(AT_UID); + PRINT_AUXVAL(AT_EUID); + PRINT_AUXVAL(AT_GID); + PRINT_AUXVAL(AT_EGID); + PRINT_AUXVAL(AT_SECURE); + PRINT_AUXVAL(AT_RANDOM); + PRINT_AUXVAL(AT_EXECFN); + PRINT_AUXVAL(AT_PLATFORM); + exit(0); +} + /* Run syscall tests between IDs <min> and <max>. * Return 0 on success, non-zero on failure. */ int run_syscall(int min, int max) { struct stat stat_buf; int proc; int test; int tmp; int ret = 0; void *p1, *p2;
+ test_getauxval(); + /* <proc> indicates whether or not /proc is mounted */ proc = stat("/proc", &stat_buf) == 0;
for (test = min; test >= 0 && test <= max; test++) { int llen = 0; // line length
/* avoid leaving empty lines below, this will insert holes into * test numbers. */