Create a test for the robust list mechanism.
Signed-off-by: André Almeida <andrealmeid(a)igalia.com>
---
.../selftests/futex/functional/.gitignore | 1 +
.../selftests/futex/functional/Makefile | 3 +-
.../selftests/futex/functional/robust_list.c | 450 ++++++++++++++++++
3 files changed, 453 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/futex/functional/robust_list.c
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index fbcbdb6963b3..4726e1be7497 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -9,3 +9,4 @@ futex_wait_wouldblock
futex_wait
futex_requeue
futex_waitv
+robust_list
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index f79f9bac7918..b8635a1ac7f6 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -17,7 +17,8 @@ TEST_GEN_PROGS := \
futex_wait_private_mapped_file \
futex_wait \
futex_requeue \
- futex_waitv
+ futex_waitv \
+ robust_list
TEST_PROGS := run.sh
diff --git a/tools/testing/selftests/futex/functional/robust_list.c b/tools/testing/selftests/futex/functional/robust_list.c
new file mode 100644
index 000000000000..5cc0edaaf028
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/robust_list.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright Igalia, 2024
+ *
+ * Robust list test by André Almeida <andrealmeid(a)igalia.com>
+ *
+ * The robust list uAPI allows userspace to create "robust" locks, in the sense
+ * that if the lock holder thread dies, the remaining threads that are waiting
+ * for the lock won't block forever, waiting for a lock that will never be
+ * released.
+ *
+ * This is achieve by userspace setting a list where a thread can enter all the
+ * locks (futexes) that it is holding. The robust list is a linked list, and
+ * userspace register the start of the list with the syscall set_robust_list().
+ * If such thread eventually dies, the kernel will walk this list, waking up one
+ * thread waiting for each futex and marking the futex word with the flag
+ * FUTEX_OWNER_DIED.
+ *
+ * See also
+ * man set_robust_list
+ * Documententation/locking/robust-futex-ABI.rst
+ * Documententation/locking/robust-futexes.rst
+ */
+
+#define _GNU_SOURCE
+
+#include "../../kselftest_harness.h"
+
+#include "futextest.h"
+
+#include <pthread.h>
+#include <stdatomic.h>
+#include <stddef.h>
+
+#define STACK_SIZE (1024 * 1024)
+
+#define FUTEX_TIMEOUT 3
+
+static pthread_barrier_t barrier, barrier2;
+
+int set_robust_list(struct robust_list_head *head, size_t len)
+{
+ return syscall(SYS_set_robust_list, head, len);
+}
+
+int get_robust_list(int pid, struct robust_list_head **head, size_t *len_ptr)
+{
+ return syscall(SYS_get_robust_list, pid, head, len_ptr);
+}
+
+int futex2_wait(void *futex, int val, struct timespec *timo)
+{
+ return syscall(SYS_futex_wait, futex, val, ~0U, FUTEX2_SIZE_U32, timo, CLOCK_MONOTONIC);
+}
+
+/*
+ * Basic lock struct, contains just the futex word and the robust list element
+ * Real implementations have also a *prev to easily walk in the list
+ */
+struct lock_struct {
+ int futex;
+ struct robust_list list;
+};
+
+/*
+ * Helper function to spawn a child thread. Returns -1 on error, pid on success
+ */
+static int create_child(int (*fn)(void *arg), void *arg)
+{
+ char *stack;
+ pid_t pid;
+
+ stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (stack == MAP_FAILED)
+ return -1;
+
+ stack += STACK_SIZE;
+
+ pid = clone(fn, stack, CLONE_VM | SIGCHLD, arg);
+
+ if (pid == -1)
+ return -1;
+
+ return pid;
+}
+
+/*
+ * Helper function to prepare and register a robust list
+ */
+static int set_list(struct robust_list_head *head)
+{
+ int ret;
+
+ ret = set_robust_list(head, sizeof(struct robust_list_head));
+ if (ret)
+ return ret;
+
+ head->futex_offset = (size_t) offsetof(struct lock_struct, futex) -
+ (size_t) offsetof(struct lock_struct, list);
+ head->list.next = &head->list;
+ head->list_op_pending = NULL;
+
+ return 0;
+}
+
+/*
+ * A basic (and incomplete) mutex lock function with robustness
+ */
+static int mutex_lock(struct lock_struct *lock, struct robust_list_head *head, bool error_inject)
+{
+ int *futex = &lock->futex, zero = 0, ret = -1;
+ pid_t tid = gettid();
+
+ /*
+ * Set list_op_pending before starting the lock, so the kernel can catch
+ * the case where the thread died during the lock operation
+ */
+ head->list_op_pending = &lock->list;
+
+ if (atomic_compare_exchange_strong(futex, &zero, tid)) {
+ /*
+ * We took the lock, insert it in the robust list
+ */
+ struct robust_list *list = &head->list;
+
+ /* Error injection to test list_op_pending */
+ if (error_inject)
+ return 0;
+
+ while (list->next != &head->list)
+ list = list->next;
+
+ list->next = &lock->list;
+ lock->list.next = &head->list;
+
+ ret = 0;
+ } else {
+ /*
+ * We didn't take the lock, wait until the owner wakes (or dies)
+ */
+ struct timespec to;
+
+ clock_gettime(CLOCK_MONOTONIC, &to);
+ to.tv_sec = to.tv_sec + FUTEX_TIMEOUT;
+
+ tid = atomic_load(futex);
+ /* Kernel ignores futexes without the waiters flag */
+ tid |= FUTEX_WAITERS;
+ atomic_store(futex, tid);
+
+ ret = futex2_wait(futex, tid, &to);
+
+ /*
+ * A real mutex_lock() implementation would loop here to finally
+ * take the lock. We don't care about that, so we stop here.
+ */
+ }
+
+ head->list_op_pending = NULL;
+
+ return ret;
+}
+
+/*
+ * This child thread will succeed taking the lock, and then will exit holding it
+ */
+static int child_fn_lock(void *arg)
+{
+ struct lock_struct *lock = (struct lock_struct *) arg;
+ struct robust_list_head head;
+ int ret;
+
+ ret = set_list(&head);
+ if (ret)
+ ksft_test_result_fail("set_robust_list error\n");
+
+ ret = mutex_lock(lock, &head, false);
+ if (ret)
+ ksft_test_result_fail("mutex_lock error\n");
+
+ pthread_barrier_wait(&barrier);
+
+ /*
+ * There's a race here: the parent thread needs to be inside
+ * futex_wait() before the child thread dies, otherwise it will miss the
+ * wakeup from handle_futex_death() that this child will emit. We wait a
+ * little bit just to make sure that this happens.
+ */
+ sleep(1);
+
+ return 0;
+}
+
+/*
+ * Spawns a child thread that will set a robust list, take the lock, register it
+ * in the robust list and die. The parent thread will wait on this futex, and
+ * should be waken up when the child exits.
+ */
+TEST(robustness)
+{
+ struct lock_struct lock = { .futex = 0 };
+ struct robust_list_head head;
+ int ret, *futex = &lock.futex;
+
+ ret = set_list(&head);
+ ASSERT_EQ(ret, 0);
+
+ /*
+ * Lets use a barrier to ensure that the child thread takes the lock
+ * before the parent
+ */
+ ret = pthread_barrier_init(&barrier, NULL, 2);
+ ASSERT_EQ(ret, 0);
+
+ ret = create_child(&child_fn_lock, &lock);
+ ASSERT_NE(ret, -1);
+
+ pthread_barrier_wait(&barrier);
+ ret = mutex_lock(&lock, &head, false);
+
+ /*
+ * futex_wait() should return 0 and the futex word should be marked with
+ * FUTEX_OWNER_DIED
+ */
+ ASSERT_EQ(ret, 0) TH_LOG("futex wait returned %d", errno);
+ ASSERT_TRUE(*futex | FUTEX_OWNER_DIED);
+
+ pthread_barrier_destroy(&barrier);
+}
+
+/*
+ * The only valid value for len is sizeof(*head)
+ */
+TEST(set_robust_list_invalid_size)
+{
+ struct robust_list_head head;
+ size_t head_size = sizeof(struct robust_list_head);
+ int ret;
+
+ ret = set_robust_list(&head, head_size);
+ ASSERT_EQ(ret, 0);
+
+ ret = set_robust_list(&head, head_size * 2);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EINVAL);
+
+ ret = set_robust_list(&head, head_size - 1);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EINVAL);
+
+ ret = set_robust_list(&head, 0);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EINVAL);
+}
+
+/*
+ * Test get_robust_list with pid = 0, getting the list of the running thread
+ */
+TEST(get_robust_list_self)
+{
+ struct robust_list_head head, head2, *get_head;
+ size_t head_size = sizeof(struct robust_list_head), len_ptr;
+ int ret;
+
+ ret = set_robust_list(&head, head_size);
+ ASSERT_EQ(ret, 0);
+
+ ret = get_robust_list(0, &get_head, &len_ptr);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(get_head, &head);
+ ASSERT_EQ(head_size, len_ptr);
+
+ ret = set_robust_list(&head2, head_size);
+ ASSERT_EQ(ret, 0);
+
+ ret = get_robust_list(0, &get_head, &len_ptr);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(get_head, &head2);
+ ASSERT_EQ(head_size, len_ptr);
+}
+
+static int child_list(void *arg)
+{
+ struct robust_list_head *head = (struct robust_list_head *) arg;
+ int ret;
+
+ ret = set_robust_list(head, sizeof(struct robust_list_head));
+ if (ret)
+ ksft_test_result_fail("set_robust_list error\n");
+
+ pthread_barrier_wait(&barrier);
+ pthread_barrier_wait(&barrier2);
+
+ return 0;
+}
+
+/*
+ * Test get_robust_list from another thread. We use two barriers here to ensure
+ * that:
+ * 1) the child thread set the list before we try to get it from the
+ * parent
+ * 2) the child thread still alive when we try to get the list from it
+ */
+TEST(get_robust_list_child)
+{
+ pid_t tid;
+ int ret;
+ struct robust_list_head head, *get_head;
+ size_t len_ptr;
+
+ ret = pthread_barrier_init(&barrier, NULL, 2);
+ ret = pthread_barrier_init(&barrier2, NULL, 2);
+ ASSERT_EQ(ret, 0);
+
+ tid = create_child(&child_list, &head);
+ ASSERT_NE(tid, -1);
+
+ pthread_barrier_wait(&barrier);
+
+ ret = get_robust_list(tid, &get_head, &len_ptr);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(&head, get_head);
+
+ pthread_barrier_wait(&barrier2);
+
+ pthread_barrier_destroy(&barrier);
+ pthread_barrier_destroy(&barrier2);
+}
+
+static int child_fn_lock_with_error(void *arg)
+{
+ struct lock_struct *lock = (struct lock_struct *) arg;
+ struct robust_list_head head;
+ int ret;
+
+ ret = set_list(&head);
+ if (ret)
+ ksft_test_result_fail("set_robust_list error\n");
+
+ ret = mutex_lock(lock, &head, true);
+ if (ret)
+ ksft_test_result_fail("mutex_lock error\n");
+
+ pthread_barrier_wait(&barrier);
+
+ sleep(1);
+
+ return 0;
+}
+
+/*
+ * Same as robustness test, but inject an error where the mutex_lock() exits
+ * earlier, just after setting list_op_pending and taking the lock, to test the
+ * list_op_pending mechanism
+ */
+TEST(set_list_op_pending)
+{
+ struct lock_struct lock = { .futex = 0 };
+ struct robust_list_head head;
+ int ret, *futex = &lock.futex;
+
+ ret = set_list(&head);
+ ASSERT_EQ(ret, 0);
+
+ ret = pthread_barrier_init(&barrier, NULL, 2);
+ ASSERT_EQ(ret, 0);
+
+ ret = create_child(&child_fn_lock_with_error, &lock);
+ ASSERT_NE(ret, -1);
+
+ pthread_barrier_wait(&barrier);
+ ret = mutex_lock(&lock, &head, false);
+
+ ASSERT_EQ(ret, 0) TH_LOG("futex wait returned %d", errno);
+ ASSERT_TRUE(*futex | FUTEX_OWNER_DIED);
+
+ pthread_barrier_destroy(&barrier);
+}
+
+#define CHILD_NR 10
+
+static int child_lock_holder(void *arg)
+{
+ struct lock_struct *locks = (struct lock_struct *) arg;
+ struct robust_list_head head;
+ int i;
+
+ set_list(&head);
+
+ for (i = 0; i < CHILD_NR; i++) {
+ locks[i].futex = 0;
+ mutex_lock(&locks[i], &head, false);
+ }
+
+ pthread_barrier_wait(&barrier);
+ pthread_barrier_wait(&barrier2);
+
+ sleep(1);
+ return 0;
+}
+
+static int child_wait_lock(void *arg)
+{
+ struct lock_struct *lock = (struct lock_struct *) arg;
+ struct robust_list_head head;
+ int ret;
+
+ pthread_barrier_wait(&barrier2);
+ ret = mutex_lock(lock, &head, false);
+
+ if (ret)
+ ksft_test_result_fail("mutex_lock error\n");
+
+ if (!(lock->futex | FUTEX_OWNER_DIED))
+ ksft_test_result_fail("futex not marked with FUTEX_OWNER_DIED\n");
+
+ return 0;
+}
+
+/*
+ * Test a robust list of more than one element. All the waiters should wake when
+ * the holder dies
+ */
+TEST(robust_list_multiple_elements)
+{
+ struct lock_struct locks[CHILD_NR];
+ int i, ret;
+
+ ret = pthread_barrier_init(&barrier, NULL, 2);
+ ASSERT_EQ(ret, 0);
+ ret = pthread_barrier_init(&barrier2, NULL, CHILD_NR + 1);
+ ASSERT_EQ(ret, 0);
+
+ create_child(&child_lock_holder, &locks);
+
+ /* Wait until the locker thread takes the look */
+ pthread_barrier_wait(&barrier);
+
+ for (i = 0; i < CHILD_NR; i++)
+ create_child(&child_wait_lock, &locks[i]);
+
+ /* Wait for all children to return */
+ while (wait(NULL) > 0);
+
+ pthread_barrier_destroy(&barrier);
+ pthread_barrier_destroy(&barrier2);
+}
+
+TEST_HARNESS_MAIN
--
2.46.0
The chacha vDSO selftest doesn't check the way the counter is handled
by __arch_chacha20_blocks_nostack(). It indirectly checks that the
counter is writen on exit and read back on new entry, but it doesn't
check that the format is correct. It has led to an invisible erroneous
implementation on powerpc where the counter was writen and read in
wrong byte order.
Also, the counter uses two words, but the tests with a zero counter
and uses a small amount of blocks so at the end the upper part of the
counter is always 0 so it is not checked.
Add a verification of counter's content in addition to the
verification of the output.
Also add two tests where the counter crosses the u32 upper limit. The
first test verifies that the function properly writes back the upper
word, the second test verifies that the function properly reads back
the upper word.
While at it, remove 'nonce' which is not unused anymore after the
replacement of libsodium by open coded chacha implementation.
Signed-off-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
---
.../testing/selftests/vDSO/vdso_test_chacha.c | 39 ++++++++++++++-----
1 file changed, 30 insertions(+), 9 deletions(-)
diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c
index 9d18d49a82f8..ed6cf372d9ee 100644
--- a/tools/testing/selftests/vDSO/vdso_test_chacha.c
+++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c
@@ -17,11 +17,12 @@ static uint32_t rol32(uint32_t word, unsigned int shift)
return (word << (shift & 31)) | (word >> ((-shift) & 31));
}
-static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, size_t nblocks)
+static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, uint32_t *counter, size_t nblocks)
{
uint32_t s[16] = {
0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U,
- key[0], key[1], key[2], key[3], key[4], key[5], key[6], key[7]
+ key[0], key[1], key[2], key[3], key[4], key[5], key[6], key[7],
+ counter[0], counter[1],
};
while (nblocks--) {
@@ -52,6 +53,8 @@ static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, s
if (!++s[12])
++s[13];
}
+ counter[0] = s[12];
+ counter[1] = s[13];
}
typedef uint8_t u8;
@@ -66,8 +69,7 @@ typedef uint64_t u64;
int main(int argc, char *argv[])
{
enum { TRIALS = 1000, BLOCKS = 128, BLOCK_SIZE = 64 };
- static const uint8_t nonce[8] = { 0 };
- uint32_t counter[2];
+ uint32_t counter1[2], counter2[2];
uint32_t key[8];
uint8_t output1[BLOCK_SIZE * BLOCKS], output2[BLOCK_SIZE * BLOCKS];
@@ -84,17 +86,36 @@ int main(int argc, char *argv[])
printf("getrandom() failed!\n");
return KSFT_SKIP;
}
- reference_chacha20_blocks(output1, key, BLOCKS);
+ memset(counter1, 0, sizeof(counter1));
+ reference_chacha20_blocks(output1, key, counter1, BLOCKS);
for (unsigned int split = 0; split < BLOCKS; ++split) {
memset(output2, 'X', sizeof(output2));
- memset(counter, 0, sizeof(counter));
+ memset(counter2, 0, sizeof(counter2));
if (split)
- __arch_chacha20_blocks_nostack(output2, key, counter, split);
- __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter, BLOCKS - split);
- if (memcmp(output1, output2, sizeof(output1)))
+ __arch_chacha20_blocks_nostack(output2, key, counter2, split);
+ __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter2, BLOCKS - split);
+ if (memcmp(output1, output2, sizeof(output1)) ||
+ memcmp(counter2, counter2, sizeof(counter1)))
return KSFT_FAIL;
}
}
+ memset(counter1, 0, sizeof(counter1));
+ counter1[0] = (uint32_t)-BLOCKS + 2;
+ memset(counter2, 0, sizeof(counter2));
+ counter2[0] = (uint32_t)-BLOCKS + 2;
+
+ reference_chacha20_blocks(output1, key, counter1, BLOCKS);
+ __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS);
+ if (memcmp(output1, output2, sizeof(output1)) ||
+ memcmp(counter2, counter2, sizeof(counter1)))
+ return KSFT_FAIL;
+
+ reference_chacha20_blocks(output1, key, counter1, BLOCKS);
+ __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS);
+ if (memcmp(output1, output2, sizeof(output1)) ||
+ memcmp(counter2, counter2, sizeof(counter1)))
+ return KSFT_FAIL;
+
ksft_test_result_pass("chacha: PASS\n");
return KSFT_PASS;
}
--
2.44.0
Without -O2, the generated code for testing chacha function is awful.
GCC even implements rol32() as a function instead of just using the
rotlwi instruction, that function is 20 instructions long.
~# time ./vdso_test_chacha
TAP version 13
1..1
ok 1 chacha: PASS
real 0m 37.16s
user 0m 36.89s
sys 0m 0.26s
Several other selftests directory add -O2, and the kernel is also
always built with optimisation active. Do the same for vDSO selftests.
With this patch the time is reduced by approx 15%.
~# time ./vdso_test_chacha
TAP version 13
1..1
ok 1 chacha: PASS
real 0m 32.09s
user 0m 31.86s
sys 0m 0.22s
Signed-off-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
---
tools/testing/selftests/vDSO/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index cfb7c281b22c..96f25aa2f84e 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -13,7 +13,7 @@ TEST_GEN_PROGS += vdso_test_correctness
TEST_GEN_PROGS += vdso_test_getrandom
TEST_GEN_PROGS += vdso_test_chacha
-CFLAGS := -std=gnu99
+CFLAGS := -std=gnu99 -O2
ifeq ($(CONFIG_X86_32),y)
LDLIBS += -lgcc_s
--
2.44.0
Hello everyone,
I am working on running Kselftest on an ARM64 platform and have facing a few issues that I am hoping someone here might have experience with. I have successfully compiled the tests and am able to run most of them but I am facing a specific problem with the memory management tests. They seem to fail consistently; even though I have confirmed that the kernel configuration should support them.
The errors I am seeing are related to page allocation failures & Also i have double checked that there ample memory available on the system. I have also tried running these tests on a different ARM64 platform with similar kernel configurations and encountered the same issue.
Is this a known problem with ARM64 Kselftest, or is there something unique to my configuration that I am not seeing?
if you have any advice; any suggestions or pointers to relevant documentation would be greatly appreciated.
Thank you
<a href="https://www.igmguru.com/blog/what-is-ampscript-in-salesforce-marketing-cloud">https://www.igmguru.com/blog/what-is-ampscript-in-salesforce-marketing-cloud</a>
This series wires up getrandom() vDSO implementation on powerpc.
Tested on PPC32 on real hardware.
Tested on PPC64 (both BE and LE) on QEMU:
Performance on powerpc 885:
~# ./vdso_test_getrandom bench-single
vdso: 25000000 times in 62.938002291 seconds
libc: 25000000 times in 535.581916866 seconds
syscall: 25000000 times in 531.525042806 seconds
Performance on powerpc 8321:
~# ./vdso_test_getrandom bench-single
vdso: 25000000 times in 16.899318858 seconds
libc: 25000000 times in 131.050596522 seconds
syscall: 25000000 times in 129.794790389 seconds
Performance on QEMU pseries:
~ # ./vdso_test_getrandom bench-single
vdso: 25000000 times in 4.977777162 seconds
libc: 25000000 times in 75.516749981 seconds
syscall: 25000000 times in 86.842242014 seconds
In order to run selftests, some fixes are needed, see
https://lore.kernel.org/linuxppc-dev/6c5da802e72befecfa09046c489aa45d934d61…
Those selftest fixes are independant and are not required to apply
and use this series.
Changes in v3:
- Rebased on recent random git tree (0c7e00e22c21)
- Fixed build failures reported by robots around VM_DROPPABLE
- Fixed crash on PPC64 due to clobbered r13 by not using r13 anymore (saving it was not enough for signals).
- Split final patch in two, first for PPC32, second for PPC64
- Moved selftest fixes out of this series
Changes in v2:
- Define VM_DROPPABLE for powerpc/32
- Fixes generic vDSO getrandom headers to enable CONFIG_COMPAT build.
- Fixed size of generation counter
- Fixed selftests to work on non x86 architectures
Christophe Leroy (5):
mm: Define VM_DROPPABLE for powerpc/32
powerpc/vdso32: Add crtsavres
powerpc/vdso: Refactor CFLAGS for CVDSO build
powerpc/vdso: Wire up getrandom() vDSO implementation on PPC32
powerpc/vdso: Wire up getrandom() vDSO implementation on PPC64
arch/powerpc/Kconfig | 1 +
arch/powerpc/include/asm/asm-compat.h | 8 +
arch/powerpc/include/asm/mman.h | 2 +-
arch/powerpc/include/asm/vdso/getrandom.h | 54 ++++
arch/powerpc/include/asm/vdso/vsyscall.h | 6 +
arch/powerpc/include/asm/vdso_datapage.h | 2 +
arch/powerpc/kernel/asm-offsets.c | 1 +
arch/powerpc/kernel/vdso/Makefile | 57 ++--
arch/powerpc/kernel/vdso/getrandom.S | 58 ++++
arch/powerpc/kernel/vdso/gettimeofday.S | 13 -
arch/powerpc/kernel/vdso/vdso32.lds.S | 1 +
arch/powerpc/kernel/vdso/vdso64.lds.S | 1 +
arch/powerpc/kernel/vdso/vgetrandom-chacha.S | 299 +++++++++++++++++++
arch/powerpc/kernel/vdso/vgetrandom.c | 14 +
fs/proc/task_mmu.c | 4 +-
include/linux/mm.h | 4 +-
include/trace/events/mmflags.h | 4 +-
tools/arch/powerpc/vdso | 1 +
tools/testing/selftests/vDSO/Makefile | 4 +
19 files changed, 492 insertions(+), 42 deletions(-)
create mode 100644 arch/powerpc/include/asm/vdso/getrandom.h
create mode 100644 arch/powerpc/kernel/vdso/getrandom.S
create mode 100644 arch/powerpc/kernel/vdso/vgetrandom-chacha.S
create mode 100644 arch/powerpc/kernel/vdso/vgetrandom.c
create mode 120000 tools/arch/powerpc/vdso
--
2.44.0
Some applications rely on placing data in free bits addresses allocated
by mmap. Various architectures (eg. x86, arm64, powerpc) restrict the
address returned by mmap to be less than the maximum address space,
unless the hint address is greater than this value.
On arm64 this barrier is at 52 bits and on x86 it is at 56 bits. This
flag allows applications a way to specify exactly how many bits they
want to be left unused by mmap. This eliminates the need for
applications to know the page table hierarchy of the system to be able
to reason which addresses mmap will be allowed to return.
---
riscv made this feature of mmap returning addresses less than the hint
address the default behavior. This was in contrast to the implementation
of x86/arm64 that have a single boundary at the 5-level page table
region. However this restriction proved too great -- the reduced
address space when using a hint address was too small.
A patch for riscv [1] reverts the behavior that broke userspace. This
series serves to make this feature available to all architectures.
I have only tested on riscv and x86. There is a tremendous amount of
duplicated code in mmap so the implementations across architectures I
believe should be mostly consistent. I added this feature to all
architectures that implement either
arch_get_mmap_end()/arch_get_mmap_base() or
arch_get_unmapped_area_topdown()/arch_get_unmapped_area(). I also added
it to the default behavior for arch_get_mmap_end()/arch_get_mmap_base().
Link: https://lore.kernel.org/lkml/20240826-riscv_mmap-v1-2-cd8962afe47f@rivosinc… [1]
To: Arnd Bergmann <arnd(a)arndb.de>
To: Paul Walmsley <paul.walmsley(a)sifive.com>
To: Palmer Dabbelt <palmer(a)dabbelt.com>
To: Albert Ou <aou(a)eecs.berkeley.edu>
To: Catalin Marinas <catalin.marinas(a)arm.com>
To: Will Deacon <will(a)kernel.org>
To: Michael Ellerman <mpe(a)ellerman.id.au>
To: Nicholas Piggin <npiggin(a)gmail.com>
To: Christophe Leroy <christophe.leroy(a)csgroup.eu>
To: Naveen N Rao <naveen(a)kernel.org>
To: Muchun Song <muchun.song(a)linux.dev>
To: Andrew Morton <akpm(a)linux-foundation.org>
To: Liam R. Howlett <Liam.Howlett(a)oracle.com>
To: Vlastimil Babka <vbabka(a)suse.cz>
To: Lorenzo Stoakes <lorenzo.stoakes(a)oracle.com>
To: Thomas Gleixner <tglx(a)linutronix.de>
To: Ingo Molnar <mingo(a)redhat.com>
To: Borislav Petkov <bp(a)alien8.de>
To: Dave Hansen <dave.hansen(a)linux.intel.com>
To: x86(a)kernel.org
To: H. Peter Anvin <hpa(a)zytor.com>
To: Huacai Chen <chenhuacai(a)kernel.org>
To: WANG Xuerui <kernel(a)xen0n.name>
To: Russell King <linux(a)armlinux.org.uk>
To: Thomas Bogendoerfer <tsbogend(a)alpha.franken.de>
To: James E.J. Bottomley <James.Bottomley(a)HansenPartnership.com>
To: Helge Deller <deller(a)gmx.de>
To: Alexander Gordeev <agordeev(a)linux.ibm.com>
To: Gerald Schaefer <gerald.schaefer(a)linux.ibm.com>
To: Heiko Carstens <hca(a)linux.ibm.com>
To: Vasily Gorbik <gor(a)linux.ibm.com>
To: Christian Borntraeger <borntraeger(a)linux.ibm.com>
To: Sven Schnelle <svens(a)linux.ibm.com>
To: Yoshinori Sato <ysato(a)users.sourceforge.jp>
To: Rich Felker <dalias(a)libc.org>
To: John Paul Adrian Glaubitz <glaubitz(a)physik.fu-berlin.de>
To: David S. Miller <davem(a)davemloft.net>
To: Andreas Larsson <andreas(a)gaisler.com>
To: Shuah Khan <shuah(a)kernel.org>
To: Alexandre Ghiti <alexghiti(a)rivosinc.com>
Cc: linux-arch(a)vger.kernel.org
Cc: linux-kernel(a)vger.kernel.org
Cc: Palmer Dabbelt <palmer(a)rivosinc.com>
Cc: linux-riscv(a)lists.infradead.org
Cc: linux-arm-kernel(a)lists.infradead.org
Cc: linuxppc-dev(a)lists.ozlabs.org
Cc: linux-mm(a)kvack.org
Cc: loongarch(a)lists.linux.dev
Cc: linux-mips(a)vger.kernel.org
Cc: linux-parisc(a)vger.kernel.org
Cc: linux-s390(a)vger.kernel.org
Cc: linux-sh(a)vger.kernel.org
Cc: sparclinux(a)vger.kernel.org
Cc: linux-kselftest(a)vger.kernel.org
Signed-off-by: Charlie Jenkins <charlie(a)rivosinc.com>
---
Charlie Jenkins (16):
mm: Add MAP_BELOW_HINT
riscv: mm: Do not restrict mmap address based on hint
mm: Add flag and len param to arch_get_mmap_base()
mm: Add generic MAP_BELOW_HINT
riscv: mm: Support MAP_BELOW_HINT
arm64: mm: Support MAP_BELOW_HINT
powerpc: mm: Support MAP_BELOW_HINT
x86: mm: Support MAP_BELOW_HINT
loongarch: mm: Support MAP_BELOW_HINT
arm: mm: Support MAP_BELOW_HINT
mips: mm: Support MAP_BELOW_HINT
parisc: mm: Support MAP_BELOW_HINT
s390: mm: Support MAP_BELOW_HINT
sh: mm: Support MAP_BELOW_HINT
sparc: mm: Support MAP_BELOW_HINT
selftests/mm: Create MAP_BELOW_HINT test
arch/arm/mm/mmap.c | 10 ++++++++
arch/arm64/include/asm/processor.h | 34 ++++++++++++++++++++++----
arch/loongarch/mm/mmap.c | 11 +++++++++
arch/mips/mm/mmap.c | 9 +++++++
arch/parisc/include/uapi/asm/mman.h | 1 +
arch/parisc/kernel/sys_parisc.c | 9 +++++++
arch/powerpc/include/asm/task_size_64.h | 36 +++++++++++++++++++++++-----
arch/riscv/include/asm/processor.h | 32 -------------------------
arch/s390/mm/mmap.c | 10 ++++++++
arch/sh/mm/mmap.c | 10 ++++++++
arch/sparc/kernel/sys_sparc_64.c | 8 +++++++
arch/x86/kernel/sys_x86_64.c | 25 ++++++++++++++++---
fs/hugetlbfs/inode.c | 2 +-
include/linux/sched/mm.h | 34 ++++++++++++++++++++++++--
include/uapi/asm-generic/mman-common.h | 1 +
mm/mmap.c | 2 +-
tools/arch/parisc/include/uapi/asm/mman.h | 1 +
tools/include/uapi/asm-generic/mman-common.h | 1 +
tools/testing/selftests/mm/Makefile | 1 +
tools/testing/selftests/mm/map_below_hint.c | 29 ++++++++++++++++++++++
20 files changed, 216 insertions(+), 50 deletions(-)
---
base-commit: 5be63fc19fcaa4c236b307420483578a56986a37
change-id: 20240827-patches-below_hint_mmap-b13d79ae1c55
--
- Charlie