From: Joey Gouly <joey.gouly(a)arm.com>
commit 387d828adffcf1eb949f3141079c479793c59aac upstream.
Import the latest version of the Arm Optimized Routines strncmp function based
on the upstream code of string/aarch64/strncmp.S at commit 189dfefe37d5 from:
https://github.com/ARM-software/optimized-routines
This latest version includes MTE support.
Note that for simplicity Arm have chosen to contribute this code to Linux under
GPLv2 rather than the original MIT OR Apache-2.0 WITH LLVM-…
[View More]exception license.
Arm is the sole copyright holder for this code.
Signed-off-by: Joey Gouly <joey.gouly(a)arm.com>
Cc: Robin Murphy <robin.murphy(a)arm.com>
Cc: Mark Rutland <mark.rutland(a)arm.com>
Cc: Catalin Marinas <catalin.marinas(a)arm.com>
Cc: Will Deacon <will(a)kernel.org>
Acked-by: Mark Rutland <mark.rutland(a)arm.com>
Acked-by: Catalin Marinas <catalin.marinas(a)arm.com>
Link: https://lore.kernel.org/r/20220301101435.19327-3-joey.gouly@arm.com
(cherry picked from commit 387d828adffcf1eb949f3141079c479793c59aac)
Cc: <stable(a)vger.kernel.org> # 5.15.y only
Fixes: 020b199bc70d ("arm64: Import latest version of Cortex Strings' strncmp")
Reported-by: John Hsu <John.Hsu(a)mediatek.com>
Link: https://lore.kernel.org/all/e9f30f7d5b7d72a3521da31ab2002b49a26f542e.camel@…
Signed-off-by: Will Deacon <will(a)kernel.org>
---
This is a clean cherry-pick of the latest MTE-safe strncmp()
implementation for arm64 which landed in v5.18 and somewhat accidentally
fixed an out-of-bounds read introduced in v5.14.
An alternative would be to disable the optimised code altogether, but
given that this is self-contained and applies cleanly, I'd favour being
consistent with more recent kernels.
arch/arm64/lib/strncmp.S | 244 +++++++++++++++++++++++----------------
1 file changed, 146 insertions(+), 98 deletions(-)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index e42bcfcd37e6..a4884b97e9a8 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -1,9 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * Copyright (c) 2013-2021, Arm Limited.
+ * Copyright (c) 2013-2022, Arm Limited.
*
* Adapted from the original at:
- * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/st…
+ * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/st…
*/
#include <linux/linkage.h>
@@ -11,14 +11,14 @@
/* Assumptions:
*
- * ARMv8-a, AArch64
+ * ARMv8-a, AArch64.
+ * MTE compatible.
*/
#define L(label) .L ## label
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
/* Parameters and result. */
#define src1 x0
@@ -39,10 +39,24 @@
#define tmp3 x10
#define zeroones x11
#define pos x12
-#define limit_wd x13
-#define mask x14
-#define endloop x15
+#define mask x13
+#define endloop x14
#define count mask
+#define offset pos
+#define neg_offset x15
+
+/* Define endian dependent shift operations.
+ On big-endian early bytes are at MSB and on little-endian LSB.
+ LS_FW means shifting towards early bytes.
+ LS_BK means shifting towards later bytes.
+ */
+#ifdef __AARCH64EB__
+#define LS_FW lsl
+#define LS_BK lsr
+#else
+#define LS_FW lsr
+#define LS_BK lsl
+#endif
SYM_FUNC_START_WEAK_PI(strncmp)
cbz limit, L(ret0)
@@ -52,9 +66,6 @@ SYM_FUNC_START_WEAK_PI(strncmp)
and count, src1, #7
b.ne L(misaligned8)
cbnz count, L(mutual_align)
- /* Calculate the number of full and partial words -1. */
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
- lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
@@ -64,30 +75,45 @@ L(loop_aligned):
ldr data1, [src1], #8
ldr data2, [src2], #8
L(start_realigned):
- subs limit_wd, limit_wd, #1
+ subs limit, limit, #8
sub tmp1, data1, zeroones
orr tmp2, data1, #REP8_7f
eor diff, data1, data2 /* Non-zero if differences found. */
- csinv endloop, diff, xzr, pl /* Last Dword or differences. */
+ csinv endloop, diff, xzr, hi /* Last Dword or differences. */
bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
ccmp endloop, #0, #0, eq
b.eq L(loop_aligned)
/* End of main loop */
- /* Not reached the limit, must have found the end or a diff. */
- tbz limit_wd, #63, L(not_limit)
-
- /* Limit % 8 == 0 => all bytes significant. */
- ands limit, limit, #7
- b.eq L(not_limit)
-
- lsl limit, limit, #3 /* Bits -> bytes. */
- mov mask, #~0
-#ifdef __AARCH64EB__
- lsr mask, mask, limit
+L(full_check):
+#ifndef __AARCH64EB__
+ orr syndrome, diff, has_nul
+ add limit, limit, 8 /* Rewind limit to before last subs. */
+L(syndrome_check):
+ /* Limit was reached. Check if the NUL byte or the difference
+ is before the limit. */
+ rev syndrome, syndrome
+ rev data1, data1
+ clz pos, syndrome
+ rev data2, data2
+ lsl data1, data1, pos
+ cmp limit, pos, lsr #3
+ lsl data2, data2, pos
+ /* But we need to zero-extend (char is unsigned) the value and then
+ perform a signed 32-bit subtraction. */
+ lsr data1, data1, #56
+ sub result, data1, data2, lsr #56
+ csel result, result, xzr, hi
+ ret
#else
- lsl mask, mask, limit
-#endif
+ /* Not reached the limit, must have found the end or a diff. */
+ tbz limit, #63, L(not_limit)
+ add tmp1, limit, 8
+ cbz limit, L(not_limit)
+
+ lsl limit, tmp1, #3 /* Bits -> bytes. */
+ mov mask, #~0
+ lsr mask, mask, limit
bic data1, data1, mask
bic data2, data2, mask
@@ -95,25 +121,6 @@ L(start_realigned):
orr has_nul, has_nul, mask
L(not_limit):
- orr syndrome, diff, has_nul
-
-#ifndef __AARCH64EB__
- rev syndrome, syndrome
- rev data1, data1
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
- Shifting left now will bring the critical information into the
- top bits. */
- clz pos, syndrome
- rev data2, data2
- lsl data1, data1, pos
- lsl data2, data2, pos
- /* But we need to zero-extend (char is unsigned) the value and then
- perform a signed 32-bit subtraction. */
- lsr data1, data1, #56
- sub result, data1, data2, lsr #56
- ret
-#else
/* For big-endian we cannot use the trick with the syndrome value
as carry-propagation can corrupt the upper bits if the trailing
bytes in the string contain 0x01. */
@@ -134,10 +141,11 @@ L(not_limit):
rev has_nul, has_nul
orr syndrome, diff, has_nul
clz pos, syndrome
- /* The MS-non-zero bit of the syndrome marks either the first bit
- that is different, or the top bit of the first zero byte.
+ /* The most-significant-non-zero bit of the syndrome marks either the
+ first bit that is different, or the top bit of the first zero byte.
Shifting left now will bring the critical information into the
top bits. */
+L(end_quick):
lsl data1, data1, pos
lsl data2, data2, pos
/* But we need to zero-extend (char is unsigned) the value and then
@@ -159,22 +167,12 @@ L(mutual_align):
neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */
ldr data2, [src2], #8
mov tmp2, #~0
- sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
-#ifdef __AARCH64EB__
- /* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */
-#else
- /* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */
-#endif
- and tmp3, limit_wd, #7
- lsr limit_wd, limit_wd, #3
- /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
- add limit, limit, count
- add tmp3, tmp3, count
+ LS_FW tmp2, tmp2, tmp3 /* Shift (count & 63). */
+ /* Adjust the limit and ensure it doesn't overflow. */
+ adds limit, limit, count
+ csinv limit, limit, xzr, lo
orr data1, data1, tmp2
orr data2, data2, tmp2
- add limit_wd, limit_wd, tmp3, lsr #3
b L(start_realigned)
.p2align 4
@@ -197,13 +195,11 @@ L(done):
/* Align the SRC1 to a dword by doing a bytewise compare and then do
the dword loop. */
L(try_misaligned_words):
- lsr limit_wd, limit, #3
- cbz count, L(do_misaligned)
+ cbz count, L(src1_aligned)
neg count, count
and count, count, #7
sub limit, limit, count
- lsr limit_wd, limit, #3
L(page_end_loop):
ldrb data1w, [src1], #1
@@ -214,48 +210,100 @@ L(page_end_loop):
subs count, count, #1
b.hi L(page_end_loop)
-L(do_misaligned):
- /* Prepare ourselves for the next page crossing. Unlike the aligned
- loop, we fetch 1 less dword because we risk crossing bounds on
- SRC2. */
- mov count, #8
- subs limit_wd, limit_wd, #1
- b.lo L(done_loop)
+ /* The following diagram explains the comparison of misaligned strings.
+ The bytes are shown in natural order. For little-endian, it is
+ reversed in the registers. The "x" bytes are before the string.
+ The "|" separates data that is loaded at one time.
+ src1 | a a a a a a a a | b b b c c c c c | . . .
+ src2 | x x x x x a a a a a a a a b b b | c c c c c . . .
+
+ After shifting in each step, the data looks like this:
+ STEP_A STEP_B STEP_C
+ data1 a a a a a a a a b b b c c c c c b b b c c c c c
+ data2 a a a a a a a a b b b 0 0 0 0 0 0 0 0 c c c c c
+
+ The bytes with "0" are eliminated from the syndrome via mask.
+
+ Align SRC2 down to 16 bytes. This way we can read 16 bytes at a
+ time from SRC2. The comparison happens in 3 steps. After each step
+ the loop can exit, or read from SRC1 or SRC2. */
+L(src1_aligned):
+ /* Calculate offset from 8 byte alignment to string start in bits. No
+ need to mask offset since shifts are ignoring upper bits. */
+ lsl offset, src2, #3
+ bic src2, src2, #0xf
+ mov mask, -1
+ neg neg_offset, offset
+ ldr data1, [src1], #8
+ ldp tmp1, tmp2, [src2], #16
+ LS_BK mask, mask, neg_offset
+ and neg_offset, neg_offset, #63 /* Need actual value for cmp later. */
+ /* Skip the first compare if data in tmp1 is irrelevant. */
+ tbnz offset, 6, L(misaligned_mid_loop)
+
L(loop_misaligned):
- and tmp2, src2, #0xff8
- eor tmp2, tmp2, #0xff8
- cbz tmp2, L(page_end_loop)
+ /* STEP_A: Compare full 8 bytes when there is enough data from SRC2.*/
+ LS_FW data2, tmp1, offset
+ LS_BK tmp1, tmp2, neg_offset
+ subs limit, limit, #8
+ orr data2, data2, tmp1 /* 8 bytes from SRC2 combined from two regs.*/
+ sub has_nul, data1, zeroones
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ orr tmp3, data1, #REP8_7f
+ csinv endloop, diff, xzr, hi /* If limit, set to all ones. */
+ bic has_nul, has_nul, tmp3 /* Non-zero if NUL byte found in SRC1. */
+ orr tmp3, endloop, has_nul
+ cbnz tmp3, L(full_check)
ldr data1, [src1], #8
- ldr data2, [src2], #8
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
- ccmp diff, #0, #0, eq
- b.ne L(not_limit)
- subs limit_wd, limit_wd, #1
- b.pl L(loop_misaligned)
+L(misaligned_mid_loop):
+ /* STEP_B: Compare first part of data1 to second part of tmp2. */
+ LS_FW data2, tmp2, offset
+#ifdef __AARCH64EB__
+ /* For big-endian we do a byte reverse to avoid carry-propagation
+ problem described above. This way we can reuse the has_nul in the
+ next step and also use syndrome value trick at the end. */
+ rev tmp3, data1
+ #define data1_fixed tmp3
+#else
+ #define data1_fixed data1
+#endif
+ sub has_nul, data1_fixed, zeroones
+ orr tmp3, data1_fixed, #REP8_7f
+ eor diff, data2, data1 /* Non-zero if differences found. */
+ bic has_nul, has_nul, tmp3 /* Non-zero if NUL terminator. */
+#ifdef __AARCH64EB__
+ rev has_nul, has_nul
+#endif
+ cmp limit, neg_offset, lsr #3
+ orr syndrome, diff, has_nul
+ bic syndrome, syndrome, mask /* Ignore later bytes. */
+ csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */
+ cbnz tmp3, L(syndrome_check)
-L(done_loop):
- /* We found a difference or a NULL before the limit was reached. */
- and limit, limit, #7
- cbz limit, L(not_limit)
- /* Read the last word. */
- sub src1, src1, 8
- sub src2, src2, 8
- ldr data1, [src1, limit]
- ldr data2, [src2, limit]
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- eor diff, data1, data2 /* Non-zero if differences found. */
- bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
- ccmp diff, #0, #0, eq
- b.ne L(not_limit)
+ /* STEP_C: Compare second part of data1 to first part of tmp1. */
+ ldp tmp1, tmp2, [src2], #16
+ cmp limit, #8
+ LS_BK data2, tmp1, neg_offset
+ eor diff, data2, data1 /* Non-zero if differences found. */
+ orr syndrome, diff, has_nul
+ and syndrome, syndrome, mask /* Ignore earlier bytes. */
+ csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */
+ cbnz tmp3, L(syndrome_check)
+
+ ldr data1, [src1], #8
+ sub limit, limit, #8
+ b L(loop_misaligned)
+
+#ifdef __AARCH64EB__
+L(syndrome_check):
+ clz pos, syndrome
+ cmp pos, limit, lsl #3
+ b.lo L(end_quick)
+#endif
L(ret0):
mov result, #0
ret
-
SYM_FUNC_END_PI(strncmp)
EXPORT_SYMBOL_NOHWKASAN(strncmp)
--
2.42.0.283.g2d96d420d3-goog
[View Less]
Hi stable team, JFYI, the recently mainline commit 8f7f35e5aa6f21 ("tpm:
Enable hwrng only for Pluton on AMD CPUs") from Jarkko contains a stable
tag, but it might be worth picking up rather sooner than later, as it
fixes a regression that seems to annoy quite a few users of 6.1.y, 6.4.y
and 6.5; that's why at least Fedora is already working on picking the
fix up ahead of the stable-tree.
Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know …
[View More]about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
That page also explains what to do if mails like this annoy you.
[View Less]
There is a missing backport in the stables 6.1.x and 5.15.x that
combined with a backported patch as a dependency in the QAT driver
causes a kernel crash at boot under certain conditions.
In 6.1/5.15, the function pkcs1pad_create() in rsa-pkcs1pad.c [1] sets the
reqsize of its akcipher_instance using the value in the akcipher_alg of
the selected akcipher implementation. This assumes that the reqsize
field has been set for the akcipher implementation when the akcipher_alg
has been instantiated. …
[View More]The reqsize field is then used to allocate to
allocate memory for pkcs1pad requests.
In commit 80e62ad58db0 ("crypto: qat - Use helper to set reqsize"), the
reqsize for the rsa implementation in the QAT driver is moved from being
set in the akcipher_alg to being set when the tfm is initialized. This
means that the implementation of rsa-pkcs1pad won’t allocate any space
for the akcipher request when using the QAT driver.
This issue occurs only when CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not
set. When the crypto self-test is run, the correct value of the reqsize
is stored in the akcipher_alg in the qat driver by the first call to
akcipher_set_reqsize() and then when pkcs1pad_create() is executed, it
finds the correct value.
Options:
1. Cherry-pick 5b11d1a360ea ("crypto: rsa-pkcs1pad - Use helper to set
reqsize") to both 6.1.x and 5.15.x trees.
2. Revert upstream commit 80e62ad58db0 ("crypto: qat - Use helper
to set reqsize").
In 6.1 revert da1729e6619c414f34ce679247721603ebb957dc
In 5.15 revert 3894f5880f968f81c6f3ed37d96bdea01441a8b7
Option #1 is preferred as the same problem might be impacting other
akcipher implementations besides QAT. Option #2 is just specific to the
QAT driver.
@Herbert, can you have a quick look in case I missed something? I tried
both options in 6.1.51 and they appear to resolve the problem.
Thanks,
[1] https://elixir.bootlin.com/linux/v6.1.51/source/crypto/rsa-pkcs1pad.c#L673
--
Giovanni
[View Less]
From: Doug Smythies <dsmythies(a)telus.net>
commit d51847acb018d83186e4af67bc93f9a00a8644f7 upstream.
This fix applies to all stable kernel versions 5.18+.
The intel_pstate CPU frequency scaling driver does not
use policy->cur and it is 0.
When the CPU frequency is outdated arch_freq_get_on_cpu()
will default to the nominal clock frequency when its call to
cpufreq_quick_getpolicy_cur returns the never updated 0.
Thus, the listed frequency might be outside of currently
set limits. …
[View More]Some users are complaining about the high
reported frequency, albeit stale, when their system is
idle and/or it is above the reduced maximum they have set.
This patch will maintain policy_cur for the intel_pstate
driver at the current minimum CPU frequency.
Reported-by: Yang Jie <yang.jie(a)linux.intel.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217597
Signed-off-by: Doug Smythies <dsmythies(a)telus.net>
[ rjw: White space damage fixes and comment adjustment ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki(a)intel.com>
Signed-off-by: Keyon Jie <yang.jie(a)linux.intel.com>
---
drivers/cpufreq/intel_pstate.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index d51f90f55c05..fbe3a4098743 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2574,6 +2574,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
intel_pstate_clear_update_util_hook(policy->cpu);
intel_pstate_hwp_set(policy->cpu);
}
+ /*
+ * policy->cur is never updated with the intel_pstate driver, but it
+ * is used as a stale frequency value. So, keep it within limits.
+ */
+ policy->cur = policy->min;
mutex_unlock(&intel_pstate_limits_lock);
--
2.34.1
[View Less]
Stable Group,
Please apply commit 4a032827daa8 ("of: property: Simplify of_link_to_phandle()")
to the 6.1.y stable branch. It was originally part of a series that
was only partially applied to 6.1. Being partially applied left 6.1.y
in a state where a bunch of peripherals were deferred indefinitely on
the am3517-evm.
wl12xx_buf platform: supplier 48002000.scm not ready
wl12xx_vmmc2 platform: supplier wl12xx_buf not ready
48050000.dss platform: supplier display@0 not ready
48064800.ehci …
[View More]platform: supplier hsusb1_phy not ready
backlight platform: supplier 48002000.scm not ready
display@0 platform: supplier backlight not ready
dmtimer-pwm@11 platform: supplier 48002000.scm not ready
hsusb1_phy platform: supplier 48002000.scm not ready
gpio-leds platform: supplier 48002000.scm not ready
480b4000.mmc platform: supplier wl12xx_vmmc2 not ready
With the above commit applied, it appears to address most of the
deferred peripherals.
Fixes: eaf9b5612a47 ("driver core: fw_devlink: Don't purge child
fwnode's consumer links")
Signed-off-by: Adam Ford <aford173(a)gmail.com>
[View Less]
Dzień dobry,
możemy zaproponować Państwu laptopy, komputery stacjonarne, monitory, drukarki (fabrycznie nowy sprzęt) i inne rozwiązania sprzętowe w znacznie niższej cenie i możliwością bezpłatnej konfiguracji według potrzeb użytkowników (Ram, dysk, modem WWAN).
Zapewniamy różne formy finansowania – leasing, najem długoterminowy czy odroczony termin płatności. Gwarantujemy szybką reakcję na zapotrzebowanie i profesjonalny serwis posprzedażowy.
Chcieliby Państwo sprawdzić co możemy zaoferować?
…
[View More]
Z pozdrowieniami
Adam Halbert
[View Less]
The MAC address is stored at offset 0x107 in the EEPROM, like correctly
stated in the comment. Add a two bytes reserved field right before the
MAC address to shift it from offset 0x105 to 0x107.
With this the MAC address returned from my RTL8723du wifi stick can be
correctly decoded as "Shenzhen Four Seas Global Link Network Technology
Co., Ltd."
Signed-off-by: Sascha Hauer <s.hauer(a)pengutronix.de>
Reported-by: Yanik Fuchs <Yanik.fuchs(a)mbv.ch>
Cc: stable(a)vger.kernel.org
---
…
[View More]drivers/net/wireless/realtek/rtw88/rtw8723d.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
index 3642a2c7f80c9..2434e2480cbe2 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -46,6 +46,7 @@ struct rtw8723du_efuse {
u8 vender_id[2]; /* 0x100 */
u8 product_id[2]; /* 0x102 */
u8 usb_option; /* 0x104 */
+ u8 res5[2]; /* 0x105 */
u8 mac_addr[ETH_ALEN]; /* 0x107 */
};
--
2.39.2
[View Less]