Linking to libsodium makes building this test annoying in cross compilation environments and is just way too much. Since this is just a basic correctness test, simply open code a simple, unoptimized, dumb chacha, rather than linking to libsodium.
Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com --- tools/testing/selftests/vDSO/Makefile | 7 +-- .../testing/selftests/vDSO/vdso_test_chacha.c | 46 ++++++++++++++++++- 2 files changed, 45 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 13a626ef64f7..93c50a462858 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -SODIUM_LIBS := $(shell pkg-config --libs libsodium 2>/dev/null) -SODIUM_CFLAGS := $(shell pkg-config --cflags libsodium 2>/dev/null)
TEST_GEN_PROGS := vdso_test_gettimeofday TEST_GEN_PROGS += vdso_test_getcpu @@ -14,10 +12,8 @@ endif TEST_GEN_PROGS += vdso_test_correctness ifeq ($(uname_M),x86_64) TEST_GEN_PROGS += vdso_test_getrandom -ifneq ($(SODIUM_LIBS),) TEST_GEN_PROGS += vdso_test_chacha endif -endif
CFLAGS := -std=gnu99
@@ -43,8 +39,7 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ -isystem $(top_srcdir)/include/uapi
$(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(ARCH)/vdso/vgetrandom-chacha.S -$(OUTPUT)/vdso_test_chacha: LDLIBS += $(SODIUM_LIBS) $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ -idirafter $(top_srcdir)/arch/$(ARCH)/include \ -idirafter $(top_srcdir)/include \ - -D__ASSEMBLY__ -Wa,--noexecstack $(SODIUM_CFLAGS) + -D__ASSEMBLY__ -Wa,--noexecstack diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c index ca5639d02969..019e8fbdf570 100644 --- a/tools/testing/selftests/vDSO/vdso_test_chacha.c +++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c @@ -3,7 +3,6 @@ * Copyright (C) 2022-2024 Jason A. Donenfeld Jason@zx2c4.com. All Rights Reserved. */
-#include <sodium/crypto_stream_chacha20.h> #include <sys/random.h> #include <string.h> #include <stdint.h> @@ -14,6 +13,49 @@ typedef uint8_t u8; typedef uint32_t u32; typedef uint64_t u64; #include <vdso/getrandom.h> +#include <tools/le_byteshift.h> + +static u32 rol32(u32 word, unsigned int shift) +{ + return (word << (shift & 31)) | (word >> ((-shift) & 31)); +} + +static void reference_chacha20_blocks(u8 *dst_bytes, const u32 *key, size_t nblocks) +{ + u32 s[16] = { + 0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U, + key[0], key[1], key[2], key[3], key[4], key[5], key[6], key[7] + }; + + while (nblocks--) { + u32 x[16]; + memcpy(x, s, sizeof(x)); + for (unsigned int r = 0; r < 20; r += 2) { + #define QR(a, b, c, d) ( \ + x[a] += x[b], \ + x[d] = rol32(x[d] ^ x[a], 16), \ + x[c] += x[d], \ + x[b] = rol32(x[b] ^ x[c], 12), \ + x[a] += x[b], \ + x[d] = rol32(x[d] ^ x[a], 8), \ + x[c] += x[d], \ + x[b] = rol32(x[b] ^ x[c], 7)) + + QR(0, 4, 8, 12); + QR(1, 5, 9, 13); + QR(2, 6, 10, 14); + QR(3, 7, 11, 15); + QR(0, 5, 10, 15); + QR(1, 6, 11, 12); + QR(2, 7, 8, 13); + QR(3, 4, 9, 14); + } + for (unsigned int i = 0; i < 16; ++i, dst_bytes += sizeof(u32)) + put_unaligned_le32(x[i] + s[i], dst_bytes); + if (!++s[12]) + ++s[13]; + } +}
int main(int argc, char *argv[]) { @@ -31,7 +73,7 @@ int main(int argc, char *argv[]) printf("getrandom() failed!\n"); return KSFT_SKIP; } - crypto_stream_chacha20(output1, sizeof(output1), nonce, (uint8_t *)key); + reference_chacha20_blocks(output1, key, BLOCKS); for (unsigned int split = 0; split < BLOCKS; ++split) { memset(output2, 'X', sizeof(output2)); memset(counter, 0, sizeof(counter));
Hi Jason,
Le 28/08/2024 à 15:55, Jason A. Donenfeld a écrit :
Linking to libsodium makes building this test annoying in cross compilation environments and is just way too much. Since this is just a basic correctness test, simply open code a simple, unoptimized, dumb chacha, rather than linking to libsodium.
It doesn't work.
Works with the following change:
diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c index 3a5a08d857cf..7443657aa7da 100644 --- a/tools/testing/selftests/vDSO/vdso_test_chacha.c +++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c @@ -8,6 +8,7 @@ #include <string.h> #include <stdint.h> #include <stdbool.h> +#include <endian.h> #include "../kselftest.h"
static uint32_t rol32(uint32_t word, unsigned int shift) @@ -19,7 +20,8 @@ static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, s { uint32_t s[16] = { 0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U, - key[0], key[1], key[2], key[3], key[4], key[5], key[6], key[7] + le32toh(key[0]), le32toh(key[1]), le32toh(key[2]), le32toh(key[3]), + le32toh(key[4]), le32toh(key[5]), le32toh(key[6]), le32toh(key[7]) };
while (nblocks--) {
Christophe
Signed-off-by: Jason A. Donenfeld Jason@zx2c4.com
tools/testing/selftests/vDSO/Makefile | 7 +-- .../testing/selftests/vDSO/vdso_test_chacha.c | 46 ++++++++++++++++++- 2 files changed, 45 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 13a626ef64f7..93c50a462858 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -SODIUM_LIBS := $(shell pkg-config --libs libsodium 2>/dev/null) -SODIUM_CFLAGS := $(shell pkg-config --cflags libsodium 2>/dev/null) TEST_GEN_PROGS := vdso_test_gettimeofday TEST_GEN_PROGS += vdso_test_getcpu @@ -14,10 +12,8 @@ endif TEST_GEN_PROGS += vdso_test_correctness ifeq ($(uname_M),x86_64) TEST_GEN_PROGS += vdso_test_getrandom -ifneq ($(SODIUM_LIBS),) TEST_GEN_PROGS += vdso_test_chacha endif -endif CFLAGS := -std=gnu99 @@ -43,8 +39,7 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ -isystem $(top_srcdir)/include/uapi $(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(ARCH)/vdso/vgetrandom-chacha.S -$(OUTPUT)/vdso_test_chacha: LDLIBS += $(SODIUM_LIBS) $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ -idirafter $(top_srcdir)/arch/$(ARCH)/include \ -idirafter $(top_srcdir)/include \
-D__ASSEMBLY__ -Wa,--noexecstack $(SODIUM_CFLAGS)
-D__ASSEMBLY__ -Wa,--noexecstack
diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c index ca5639d02969..019e8fbdf570 100644 --- a/tools/testing/selftests/vDSO/vdso_test_chacha.c +++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c @@ -3,7 +3,6 @@
- Copyright (C) 2022-2024 Jason A. Donenfeld Jason@zx2c4.com. All Rights Reserved.
*/ -#include <sodium/crypto_stream_chacha20.h> #include <sys/random.h> #include <string.h> #include <stdint.h> @@ -14,6 +13,49 @@ typedef uint8_t u8; typedef uint32_t u32; typedef uint64_t u64; #include <vdso/getrandom.h> +#include <tools/le_byteshift.h>
+static u32 rol32(u32 word, unsigned int shift) +{
- return (word << (shift & 31)) | (word >> ((-shift) & 31));
+}
+static void reference_chacha20_blocks(u8 *dst_bytes, const u32 *key, size_t nblocks) +{
- u32 s[16] = {
0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U,
key[0], key[1], key[2], key[3], key[4], key[5], key[6], key[7]
- };
- while (nblocks--) {
u32 x[16];
memcpy(x, s, sizeof(x));
for (unsigned int r = 0; r < 20; r += 2) {
#define QR(a, b, c, d) ( \
x[a] += x[b], \
x[d] = rol32(x[d] ^ x[a], 16), \
x[c] += x[d], \
x[b] = rol32(x[b] ^ x[c], 12), \
x[a] += x[b], \
x[d] = rol32(x[d] ^ x[a], 8), \
x[c] += x[d], \
x[b] = rol32(x[b] ^ x[c], 7))
QR(0, 4, 8, 12);
QR(1, 5, 9, 13);
QR(2, 6, 10, 14);
QR(3, 7, 11, 15);
QR(0, 5, 10, 15);
QR(1, 6, 11, 12);
QR(2, 7, 8, 13);
QR(3, 4, 9, 14);
}
for (unsigned int i = 0; i < 16; ++i, dst_bytes += sizeof(u32))
put_unaligned_le32(x[i] + s[i], dst_bytes);
if (!++s[12])
++s[13];
- }
+} int main(int argc, char *argv[]) { @@ -31,7 +73,7 @@ int main(int argc, char *argv[]) printf("getrandom() failed!\n"); return KSFT_SKIP; }
crypto_stream_chacha20(output1, sizeof(output1), nonce, (uint8_t *)key);
for (unsigned int split = 0; split < BLOCKS; ++split) { memset(output2, 'X', sizeof(output2)); memset(counter, 0, sizeof(counter));reference_chacha20_blocks(output1, key, BLOCKS);
Hi Christophe,
On Wed, Aug 28, 2024 at 4:38 PM LEROY Christophe christophe.leroy2@cs-soprasteria.com wrote:
key[0], key[1], key[2], key[3], key[4], key[5], key[6], key[7]
le32toh(key[0]), le32toh(key[1]), le32toh(key[2]), le32toh(key[3]),
le32toh(key[4]), le32toh(key[5]), le32toh(key[6]), le32toh(key[7])
Are you sure about that?
From drivers/char/random.c:
static void _get_random_bytes(void *buf, size_t len) { u32 chacha_state[CHACHA_STATE_WORDS]; ... crng_make_state(chacha_state, buf, first_block_len); ... while (len) { if (len < CHACHA_BLOCK_SIZE) { chacha20_block(chacha_state, tmp);
static void crng_make_state(u32 chacha_state[CHACHA_STATE_WORDS], u8 *random_data, size_t random_data_len) { ... crng_fast_key_erasure(base_crng.key, chacha_state, random_data, random_data_len); ... crng_fast_key_erasure(base_crng.key, chacha_state, crng->key, sizeof(crng->key));
static void crng_fast_key_erasure(u8 key[CHACHA_KEY_SIZE], u32 chacha_state[CHACHA_STATE_WORDS], u8 *random_data, size_t random_data_len) { u8 first_block[CHACHA_BLOCK_SIZE];
BUG_ON(random_data_len > 32);
chacha_init_consts(chacha_state); memcpy(&chacha_state[4], key, CHACHA_KEY_SIZE); memset(&chacha_state[12], 0, sizeof(u32) * 4); chacha20_block(chacha_state, first_block);
memcpy(key, first_block, CHACHA_KEY_SIZE); memcpy(random_data, first_block + CHACHA_KEY_SIZE, random_data_len); memzero_explicit(first_block, sizeof(first_block)); }
And then if we look at chacha20_block->chacha_block_generic in lib/crypto/chacha.c:
void chacha_block_generic(u32 *state, u8 *stream, int nrounds) { u32 x[16]; int i;
memcpy(x, state, 64);
chacha_permute(x, nrounds);
for (i = 0; i < ARRAY_SIZE(x); i++) put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
state[12]++; }
So I don't see any endianness conversion happening with the key bytes. They're memcpy'd from rng output bytes directly into native endian u32 words.
You may have an objection to this. But the goal of the vDSO code is to match the kernel's algorithm 1:1 without deviations. To that end, I suspect this patch actually improves the unit test to ensure that.
With regards to your objection, though, if you feel strongly enough about it, I suppose that's something you could attempt to change throughout, with one commit that touches random.c and the vDSO code. I'm not sure whether or not I'd go along with that yet, but if it were to happen, I think that's the way to do it. For now, though, the goal is for the vDSO algorithm to copy the kernel algorithm.
Do you agree that this patch helps the vDSO algorithm copy the kernel algorithm better? Genuinely asking, because maybe I got it all backwards somehow.
Jason
Le 28/08/2024 à 16:49, Jason A. Donenfeld a écrit :
Hi Christophe,
On Wed, Aug 28, 2024 at 4:38 PM LEROY Christophe christophe.leroy2@cs-soprasteria.com wrote:
key[0], key[1], key[2], key[3], key[4], key[5], key[6], key[7]
le32toh(key[0]), le32toh(key[1]), le32toh(key[2]), le32toh(key[3]),
le32toh(key[4]), le32toh(key[5]), le32toh(key[6]), le32toh(key[7])
Are you sure about that?
I'm sure it is needed to get the same behaviour as before.
So I don't see any endianness conversion happening with the key bytes. They're memcpy'd from rng output bytes directly into native endian u32 words.
You may have an objection to this. But the goal of the vDSO code is to match the kernel's algorithm 1:1 without deviations. To that end, I suspect this patch actually improves the unit test to ensure that.
With regards to your objection, though, if you feel strongly enough about it, I suppose that's something you could attempt to change throughout, with one commit that touches random.c and the vDSO code. I'm not sure whether or not I'd go along with that yet, but if it were to happen, I think that's the way to do it. For now, though, the goal is for the vDSO algorithm to copy the kernel algorithm.
Do you agree that this patch helps the vDSO algorithm copy the kernel algorithm better? Genuinely asking, because maybe I got it all backwards somehow.
As I said several times, I was a bit puzzled by the fact that I had to read key words in reversed byte order when I implemented the powerpc chacha20. This was needed to get the selftest succeed. So if doing it the new way makes it closer to the kernel implementation, I'm 100% fine with it, it will also slightly simplify powerpc chacha20 vDSO function.
The only thing is that you must describe this behaviour change in your commit message, you can't just let people believe it is a one-to-one replacement of the previous test implementation that used Sodium.
Christophe
On Wed, Aug 28, 2024 at 6:54 PM Christophe Leroy christophe.leroy@csgroup.eu wrote:
The only thing is that you must describe this behaviour change in your commit message, you can't just let people believe it is a one-to-one replacement of the previous test implementation that used Sodium.
Good point, will do.
linux-kselftest-mirror@lists.linaro.org