The chacha vDSO selftest doesn't check the way the counter is handled
by __arch_chacha20_blocks_nostack(). It indirectly checks that the
counter is writen on exit and read back on new entry, but it doesn't
check that the format is correct. It has led to an invisible erroneous
implementation on powerpc where the counter was writen and read in
wrong byte order.
Also, the counter uses two words, but the tests with a zero counter
and uses a small amount of blocks so at the end the upper part of the
counter is always 0 so it is not checked.
Add a verification of counter's content in addition to the
verification of the output.
Also add two tests where the counter crosses the u32 upper limit. The
first test verifies that the function properly writes back the upper
word, the second test verifies that the function properly reads back
the upper word.
While at it, remove 'nonce' which is not unused anymore after the
replacement of libsodium by open coded chacha implementation.
Signed-off-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
---
.../testing/selftests/vDSO/vdso_test_chacha.c | 39 ++++++++++++++-----
1 file changed, 30 insertions(+), 9 deletions(-)
diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c
index 9d18d49a82f8..ed6cf372d9ee 100644
--- a/tools/testing/selftests/vDSO/vdso_test_chacha.c
+++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c
@@ -17,11 +17,12 @@ static uint32_t rol32(uint32_t word, unsigned int shift)
return (word << (shift & 31)) | (word >> ((-shift) & 31));
}
-static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, size_t nblocks)
+static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, uint32_t *counter, size_t nblocks)
{
uint32_t s[16] = {
0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U,
- key[0], key[1], key[2], key[3], key[4], key[5], key[6], key[7]
+ key[0], key[1], key[2], key[3], key[4], key[5], key[6], key[7],
+ counter[0], counter[1],
};
while (nblocks--) {
@@ -52,6 +53,8 @@ static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, s
if (!++s[12])
++s[13];
}
+ counter[0] = s[12];
+ counter[1] = s[13];
}
typedef uint8_t u8;
@@ -66,8 +69,7 @@ typedef uint64_t u64;
int main(int argc, char *argv[])
{
enum { TRIALS = 1000, BLOCKS = 128, BLOCK_SIZE = 64 };
- static const uint8_t nonce[8] = { 0 };
- uint32_t counter[2];
+ uint32_t counter1[2], counter2[2];
uint32_t key[8];
uint8_t output1[BLOCK_SIZE * BLOCKS], output2[BLOCK_SIZE * BLOCKS];
@@ -84,17 +86,36 @@ int main(int argc, char *argv[])
printf("getrandom() failed!\n");
return KSFT_SKIP;
}
- reference_chacha20_blocks(output1, key, BLOCKS);
+ memset(counter1, 0, sizeof(counter1));
+ reference_chacha20_blocks(output1, key, counter1, BLOCKS);
for (unsigned int split = 0; split < BLOCKS; ++split) {
memset(output2, 'X', sizeof(output2));
- memset(counter, 0, sizeof(counter));
+ memset(counter2, 0, sizeof(counter2));
if (split)
- __arch_chacha20_blocks_nostack(output2, key, counter, split);
- __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter, BLOCKS - split);
- if (memcmp(output1, output2, sizeof(output1)))
+ __arch_chacha20_blocks_nostack(output2, key, counter2, split);
+ __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter2, BLOCKS - split);
+ if (memcmp(output1, output2, sizeof(output1)) ||
+ memcmp(counter2, counter2, sizeof(counter1)))
return KSFT_FAIL;
}
}
+ memset(counter1, 0, sizeof(counter1));
+ counter1[0] = (uint32_t)-BLOCKS + 2;
+ memset(counter2, 0, sizeof(counter2));
+ counter2[0] = (uint32_t)-BLOCKS + 2;
+
+ reference_chacha20_blocks(output1, key, counter1, BLOCKS);
+ __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS);
+ if (memcmp(output1, output2, sizeof(output1)) ||
+ memcmp(counter2, counter2, sizeof(counter1)))
+ return KSFT_FAIL;
+
+ reference_chacha20_blocks(output1, key, counter1, BLOCKS);
+ __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS);
+ if (memcmp(output1, output2, sizeof(output1)) ||
+ memcmp(counter2, counter2, sizeof(counter1)))
+ return KSFT_FAIL;
+
ksft_test_result_pass("chacha: PASS\n");
return KSFT_PASS;
}
--
2.44.0
Without -O2, the generated code for testing chacha function is awful.
GCC even implements rol32() as a function instead of just using the
rotlwi instruction, that function is 20 instructions long.
~# time ./vdso_test_chacha
TAP version 13
1..1
ok 1 chacha: PASS
real 0m 37.16s
user 0m 36.89s
sys 0m 0.26s
Several other selftests directory add -O2, and the kernel is also
always built with optimisation active. Do the same for vDSO selftests.
With this patch the time is reduced by approx 15%.
~# time ./vdso_test_chacha
TAP version 13
1..1
ok 1 chacha: PASS
real 0m 32.09s
user 0m 31.86s
sys 0m 0.22s
Signed-off-by: Christophe Leroy <christophe.leroy(a)csgroup.eu>
---
tools/testing/selftests/vDSO/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index cfb7c281b22c..96f25aa2f84e 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -13,7 +13,7 @@ TEST_GEN_PROGS += vdso_test_correctness
TEST_GEN_PROGS += vdso_test_getrandom
TEST_GEN_PROGS += vdso_test_chacha
-CFLAGS := -std=gnu99
+CFLAGS := -std=gnu99 -O2
ifeq ($(CONFIG_X86_32),y)
LDLIBS += -lgcc_s
--
2.44.0
Hello everyone,
I am working on running Kselftest on an ARM64 platform and have facing a few issues that I am hoping someone here might have experience with. I have successfully compiled the tests and am able to run most of them but I am facing a specific problem with the memory management tests. They seem to fail consistently; even though I have confirmed that the kernel configuration should support them.
The errors I am seeing are related to page allocation failures & Also i have double checked that there ample memory available on the system. I have also tried running these tests on a different ARM64 platform with similar kernel configurations and encountered the same issue.
Is this a known problem with ARM64 Kselftest, or is there something unique to my configuration that I am not seeing?
if you have any advice; any suggestions or pointers to relevant documentation would be greatly appreciated.
Thank you
<a href="https://www.igmguru.com/blog/what-is-ampscript-in-salesforce-marketing-cloud">https://www.igmguru.com/blog/what-is-ampscript-in-salesforce-marketing-cloud</a>
This series wires up getrandom() vDSO implementation on powerpc.
Tested on PPC32 on real hardware.
Tested on PPC64 (both BE and LE) on QEMU:
Performance on powerpc 885:
~# ./vdso_test_getrandom bench-single
vdso: 25000000 times in 62.938002291 seconds
libc: 25000000 times in 535.581916866 seconds
syscall: 25000000 times in 531.525042806 seconds
Performance on powerpc 8321:
~# ./vdso_test_getrandom bench-single
vdso: 25000000 times in 16.899318858 seconds
libc: 25000000 times in 131.050596522 seconds
syscall: 25000000 times in 129.794790389 seconds
Performance on QEMU pseries:
~ # ./vdso_test_getrandom bench-single
vdso: 25000000 times in 4.977777162 seconds
libc: 25000000 times in 75.516749981 seconds
syscall: 25000000 times in 86.842242014 seconds
In order to run selftests, some fixes are needed, see
https://lore.kernel.org/linuxppc-dev/6c5da802e72befecfa09046c489aa45d934d61…
Those selftest fixes are independant and are not required to apply
and use this series.
Changes in v3:
- Rebased on recent random git tree (0c7e00e22c21)
- Fixed build failures reported by robots around VM_DROPPABLE
- Fixed crash on PPC64 due to clobbered r13 by not using r13 anymore (saving it was not enough for signals).
- Split final patch in two, first for PPC32, second for PPC64
- Moved selftest fixes out of this series
Changes in v2:
- Define VM_DROPPABLE for powerpc/32
- Fixes generic vDSO getrandom headers to enable CONFIG_COMPAT build.
- Fixed size of generation counter
- Fixed selftests to work on non x86 architectures
Christophe Leroy (5):
mm: Define VM_DROPPABLE for powerpc/32
powerpc/vdso32: Add crtsavres
powerpc/vdso: Refactor CFLAGS for CVDSO build
powerpc/vdso: Wire up getrandom() vDSO implementation on PPC32
powerpc/vdso: Wire up getrandom() vDSO implementation on PPC64
arch/powerpc/Kconfig | 1 +
arch/powerpc/include/asm/asm-compat.h | 8 +
arch/powerpc/include/asm/mman.h | 2 +-
arch/powerpc/include/asm/vdso/getrandom.h | 54 ++++
arch/powerpc/include/asm/vdso/vsyscall.h | 6 +
arch/powerpc/include/asm/vdso_datapage.h | 2 +
arch/powerpc/kernel/asm-offsets.c | 1 +
arch/powerpc/kernel/vdso/Makefile | 57 ++--
arch/powerpc/kernel/vdso/getrandom.S | 58 ++++
arch/powerpc/kernel/vdso/gettimeofday.S | 13 -
arch/powerpc/kernel/vdso/vdso32.lds.S | 1 +
arch/powerpc/kernel/vdso/vdso64.lds.S | 1 +
arch/powerpc/kernel/vdso/vgetrandom-chacha.S | 299 +++++++++++++++++++
arch/powerpc/kernel/vdso/vgetrandom.c | 14 +
fs/proc/task_mmu.c | 4 +-
include/linux/mm.h | 4 +-
include/trace/events/mmflags.h | 4 +-
tools/arch/powerpc/vdso | 1 +
tools/testing/selftests/vDSO/Makefile | 4 +
19 files changed, 492 insertions(+), 42 deletions(-)
create mode 100644 arch/powerpc/include/asm/vdso/getrandom.h
create mode 100644 arch/powerpc/kernel/vdso/getrandom.S
create mode 100644 arch/powerpc/kernel/vdso/vgetrandom-chacha.S
create mode 100644 arch/powerpc/kernel/vdso/vgetrandom.c
create mode 120000 tools/arch/powerpc/vdso
--
2.44.0