- Linux-stable-mirror - lists.linaro.org

[PATCH v 7] usb: dwc2: gadget: Fix enter to hibernation for UTMI+ PHY

by Minas Harutyunyan

For UTMI+ PHY, according to programming guide, first should be set PMUACTV bit then STOPPCLK bit. Otherwise, when the device issues Remote Wakeup, then host notices disconnect instead. For ULPI PHY, above mentioned bits must be set in reversed order: STOPPCLK then PMUACTV. Fixes: 4483ef3c1685 ("usb: dwc2: Add hibernation updates for ULPI PHY") Cc: stable(a)vger.kernel.org Signed-off-by: Minas Harutyunyan <Minas.Harutyunyan(a)synopsys.com> --- Changes in v7: - Fixed email formating Changes in v6: - Rabased on usb-next branch over commit 7481a97c5f49 Changes in v5: - Rebased on top of Linux 6.16-rc2 Changes in v4: - Rebased on top of Linux 6.15-rc6 Changes in v3: - Rebased on top of Linux 6.15-rc4 Changes in v2: - Added Cc: stable(a)vger.kernel.org drivers/usb/dwc2/gadget.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index d5b622f78cf3..0637bfbc054e 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -5389,20 +5389,34 @@ int dwc2_gadget_enter_hibernation(struct dwc2_hsotg *hsotg) if (gusbcfg & GUSBCFG_ULPI_UTMI_SEL) { /* ULPI interface */ gpwrdn |= GPWRDN_ULPI_LATCH_EN_DURING_HIB_ENTRY; - } - dwc2_writel(hsotg, gpwrdn, GPWRDN); - udelay(10); + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); - /* Suspend the Phy Clock */ - pcgcctl = dwc2_readl(hsotg, PCGCTL); - pcgcctl |= PCGCTL_STOPPCLK; - dwc2_writel(hsotg, pcgcctl, PCGCTL); - udelay(10); + /* Suspend the Phy Clock */ + pcgcctl = dwc2_readl(hsotg, PCGCTL); + pcgcctl |= PCGCTL_STOPPCLK; + dwc2_writel(hsotg, pcgcctl, PCGCTL); + udelay(10); - gpwrdn = dwc2_readl(hsotg, GPWRDN); - gpwrdn |= GPWRDN_PMUACTV; - dwc2_writel(hsotg, gpwrdn, GPWRDN); - udelay(10); + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn |= GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } else { + /* UTMI+ Interface */ + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn |= GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + + pcgcctl = dwc2_readl(hsotg, PCGCTL); + pcgcctl |= PCGCTL_STOPPCLK; + dwc2_writel(hsotg, pcgcctl, PCGCTL); + udelay(10); + } /* Set flag to indicate that we are in hibernation */ hsotg->hibernated = 1; base-commit: 7481a97c5f49f10c7490bb990d0e863f23b9bb71 -- 2.41.0

4 days, 10 hours

1
0
0 0

[PATCH v 6] usb: dwc2: gadget: Fix enter to hibernation for UTMI+ PHY

by Minas Harutyunyan

For UTMI+ PHY, according to programming guide, first should be set PMUACTV bit then STOPPCLK bit. Otherwise, when the device issues Remote Wakeup, then host notices disconnect instead. For ULPI PHY, above mentioned bits must be set in reversed order: STOPPCLK then PMUACTV. Fixes: 4483ef3c1685 ("usb: dwc2: Add hibernation updates for ULPI PHY") Cc: stable(a)vger.kernel.org Signed-off-by: Minas Harutyunyan <Minas.Harutyunyan(a)synopsys.com> --- Changes in v6: - Rabased on usb-next branch over commit 7481a97c5f49 Changes in v5: - Rebased on top of Linux 6.16-rc2 Changes in v4: - Rebased on top of Linux 6.15-rc6 Changes in v3: - Rebased on top of Linux 6.15-rc4 Changes in v2: - Added Cc: stable(a)vger.kernel.org drivers/usb/dwc2/gadget.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index d5b622f78cf3..0637bfbc054e 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -5389,20 +5389,34 @@ int dwc2_gadget_enter_hibernation(struct dwc2_hsotg *hsotg) if (gusbcfg & GUSBCFG_ULPI_UTMI_SEL) { /* ULPI interface */ gpwrdn |= GPWRDN_ULPI_LATCH_EN_DURING_HIB_ENTRY; - } - dwc2_writel(hsotg, gpwrdn, GPWRDN); - udelay(10); + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); - /* Suspend the Phy Clock */ - pcgcctl = dwc2_readl(hsotg, PCGCTL); - pcgcctl |= PCGCTL_STOPPCLK; - dwc2_writel(hsotg, pcgcctl, PCGCTL); - udelay(10); + /* Suspend the Phy Clock */ + pcgcctl = dwc2_readl(hsotg, PCGCTL); + pcgcctl |= PCGCTL_STOPPCLK; + dwc2_writel(hsotg, pcgcctl, PCGCTL); + udelay(10); - gpwrdn = dwc2_readl(hsotg, GPWRDN); - gpwrdn |= GPWRDN_PMUACTV; - dwc2_writel(hsotg, gpwrdn, GPWRDN); - udelay(10); + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn |= GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + } else { + /* UTMI+ Interface */ + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + + gpwrdn = dwc2_readl(hsotg, GPWRDN); + gpwrdn |= GPWRDN_PMUACTV; + dwc2_writel(hsotg, gpwrdn, GPWRDN); + udelay(10); + + pcgcctl = dwc2_readl(hsotg, PCGCTL); + pcgcctl |= PCGCTL_STOPPCLK; + dwc2_writel(hsotg, pcgcctl, PCGCTL); + udelay(10); + } /* Set flag to indicate that we are in hibernation */ hsotg->hibernated = 1; base-commit: 7481a97c5f49f10c7490bb990d0e863f23b9bb71 -- 2.41.0

4 days, 11 hours

2
1
0 0

[PATCH v2] ice: Fix a null pointer dereference in ice_copy_and_init_pkg()

by Haoxiang Li

Add check for the return value of devm_kmemdup() to prevent potential null pointer dereference. Fixes: c76488109616 ("ice: Implement Dynamic Device Personalization (DDP) download") Cc: stable(a)vger.kernel.org Signed-off-by: Haoxiang Li <haoxiang_li2024(a)163.com> Reviewed-by: Michal Swiatkowski <michal.swiatkowski(a)linux.intel.com> Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov(a)intel.com> --- Changes in v2: - modify the Fixes commit number. Thanks, Michal! --- drivers/net/ethernet/intel/ice/ice_ddp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index 59323c019544..351824dc3c62 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -2301,6 +2301,8 @@ enum ice_ddp_state ice_copy_and_init_pkg(struct ice_hw *hw, const u8 *buf, return ICE_DDP_PKG_ERR; buf_copy = devm_kmemdup(ice_hw_to_dev(hw), buf, len, GFP_KERNEL); + if (!buf_copy) + return ICE_DDP_PKG_ERR; state = ice_init_pkg(hw, buf_copy, len); if (!ice_is_init_pkg_successful(state)) { -- 2.25.1

4 days, 11 hours

2
1
0 0

[PATCH v1 mm-hotfixes] mm/zsmalloc: do not pass __GFP_MOVABLE if CONFIG_COMPACTION=n

by Harry Yoo

Commit 48b4800a1c6a ("zsmalloc: page migration support") added support for migrating zsmalloc pages using the movable_operations migration framework. However, the commit did not take into account that zsmalloc supports migration only when CONFIG_COMPACTION is enabled. Tracing shows that zsmalloc was still passing the __GFP_MOVABLE flag even when compaction is not supported. This can result in unmovable pages being allocated from movable page blocks (even without stealing page blocks), ZONE_MOVABLE and CMA area. Clear the __GFP_MOVABLE flag when !IS_ENABLED(CONFIG_COMPACTION). Cc: stable(a)vger.kernel.org Fixes: 48b4800a1c6a ("zsmalloc: page migration support") Signed-off-by: Harry Yoo <harry.yoo(a)oracle.com> --- mm/zsmalloc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 999b513c7fdf..f3e2215f95eb 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1043,6 +1043,9 @@ static struct zspage *alloc_zspage(struct zs_pool *pool, if (!zspage) return NULL; + if (!IS_ENABLED(CONFIG_COMPACTION)) + gfp &= ~__GFP_MOVABLE; + zspage->magic = ZSPAGE_MAGIC; zspage->pool = pool; zspage->class = class->index; -- 2.43.0

4 days, 12 hours

3
3
0 0

[REGRESSION] thunderbolt: Fix a logic error in wake on connect

by Alyssa Ross

On Fri, Apr 11, 2025 at 10:14:44AM -0500, Mario Limonciello wrote: > From: Mario Limonciello <mario.limonciello(a)amd.com> > > commit a5cfc9d65879c ("thunderbolt: Add wake on connect/disconnect > on USB4 ports") introduced a sysfs file to control wake up policy > for a given USB4 port that defaulted to disabled. > > However when testing commit 4bfeea6ec1c02 ("thunderbolt: Use wake > on connect and disconnect over suspend") I found that it was working > even without making changes to the power/wakeup file (which defaults > to disabled). This is because of a logic error doing a bitwise or > of the wake-on-connect flag with device_may_wakeup() which should > have been a logical AND. > > Adjust the logic so that policy is only applied when wakeup is > actually enabled. > > Fixes: a5cfc9d65879c ("thunderbolt: Add wake on connect/disconnect on USB4 ports") > Signed-off-by: Mario Limonciello <mario.limonciello(a)amd.com> Hi! There have been a couple of reports of a Thunderbolt regression in recent stable kernels, and one reporter has now bisected it to this change: • https://bugzilla.kernel.org/show_bug.cgi?id=220284 • https://github.com/NixOS/nixpkgs/issues/420730 Both reporters are CCed, and say it starts working after the module is reloaded. Link: https://lore.kernel.org/r/bug-220284-208809@https.bugzilla.kernel.org%2F/ (for regzbot) > --- > drivers/thunderbolt/usb4.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c > index e51d01671d8e7..3e96f1afd4268 100644 > --- a/drivers/thunderbolt/usb4.c > +++ b/drivers/thunderbolt/usb4.c > @@ -440,10 +440,10 @@ int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags) > bool configured = val & PORT_CS_19_PC; > usb4 = port->usb4; > > - if (((flags & TB_WAKE_ON_CONNECT) | > + if (((flags & TB_WAKE_ON_CONNECT) && > device_may_wakeup(&usb4->dev)) && !configured) > val |= PORT_CS_19_WOC; > - if (((flags & TB_WAKE_ON_DISCONNECT) | > + if (((flags & TB_WAKE_ON_DISCONNECT) && > device_may_wakeup(&usb4->dev)) && configured) > val |= PORT_CS_19_WOD; > if ((flags & TB_WAKE_ON_USB4) && configured) > -- > 2.43.0 >

4 days, 12 hours

3
3
0 0

[PATCH v3] mtd: rawnand: fsmc: Add missing check after DMA map

by Thomas Fourier

The DMA map functions can fail and should be tested for errors. Fixes: 4774fb0a48aa ("mtd: nand/fsmc: Add DMA support") Cc: stable(a)vger.kernel.org Signed-off-by: Thomas Fourier <fourier.thomas(a)gmail.com> --- drivers/mtd/nand/raw/fsmc_nand.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c index d579d5dd60d6..df61db8ce466 100644 --- a/drivers/mtd/nand/raw/fsmc_nand.c +++ b/drivers/mtd/nand/raw/fsmc_nand.c @@ -503,6 +503,8 @@ static int dma_xfer(struct fsmc_nand_data *host, void *buffer, int len, dma_dev = chan->device; dma_addr = dma_map_single(dma_dev->dev, buffer, len, direction); + if (dma_mapping_error(dma_dev->dev, dma_addr)) + return -EINVAL; if (direction == DMA_TO_DEVICE) { dma_src = dma_addr; -- 2.43.0

4 days, 12 hours

1
0
0 0

[PATCH v1] arm64: dts: freescale: imx8mm-verdin: Keep LDO5 always on

by Francesco Dolcini

From: Francesco Dolcini <francesco.dolcini(a)toradex.com> LDO5 regulator is used to power the i.MX8MM NVCC_SD2 I/O supply, that is used for the SD2 card interface and also for some GPIOs. When the SD card interface is not enabled the regulator subsystem could turn off this supply, since it is not used anywhere else, however this will also remove the power to some other GPIOs, for example one I/O that is used to power the ethernet phy, leading to a non working ethernet interface. [ 31.820515] On-module +V3.3_1.8_SD (LDO5): disabling [ 31.821761] PMIC_USDHC_VSELECT: disabling [ 32.764949] fec 30be0000.ethernet end0: Link is Down Fix this keeping the LDO5 supply always on. Cc: stable(a)vger.kernel.org Fixes: 6a57f224f734 ("arm64: dts: freescale: add initial support for verdin imx8m mini") Fixes: f5aab0438ef1 ("regulator: pca9450: Fix enable register for LDO5") Signed-off-by: Francesco Dolcini <francesco.dolcini(a)toradex.com> --- arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi index d29710772569..1594ce9182a5 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-verdin.dtsi @@ -464,6 +464,7 @@ reg_vdd_phy: LDO4 { }; reg_nvcc_sd: LDO5 { + regulator-always-on; regulator-max-microvolt = <3300000>; regulator-min-microvolt = <1800000>; regulator-name = "On-module +V3.3_1.8_SD (LDO5)"; -- 2.39.5

4 days, 12 hours

3
2
0 0

[PATCH] crypto: s390/sha - Fix uninitialized variable in SHA-1 and SHA-2

by Eric Biggers

Commit 88c02b3f79a6 ("s390/sha3: Support sha3 performance enhancements") added the field s390_sha_ctx::first_message_part and made it be used by s390_sha_update_blocks(). At the time, s390_sha_update_blocks() was used by all the s390 SHA-1, SHA-2, and SHA-3 algorithms. However, only the initialization functions for SHA-3 were updated, leaving SHA-1 and SHA-2 using first_message_part uninitialized. This could cause e.g. CPACF_KIMD_SHA_512 | CPACF_KIMD_NIP to be used instead of just CPACF_KIMD_NIP. It's unclear why this didn't cause a problem earlier; this bug was found only when UBSAN detected the uninitialized boolean. Perhaps the CPU ignores CPACF_KIMD_NIP for SHA-1 and SHA-2. Regardless, let's fix this. For now just initialize to false, i.e. don't try to "optimize" the SHA state initialization. Note: in 6.16, we need to patch SHA-1, SHA-384, and SHA-512. In 6.15 and earlier, we'll also need to patch SHA-224 and SHA-256, as they hadn't yet been librarified (which incidentally fixed this bug). Fixes: 88c02b3f79a6 ("s390/sha3: Support sha3 performance enhancements") Cc: stable(a)vger.kernel.org Reported-by: Ingo Franzki <ifranzki(a)linux.ibm.com> Closes: https://lore.kernel.org/r/12740696-595c-4604-873e-aefe8b405fbf@linux.ibm.com Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> --- This is targeting 6.16. I'd prefer to take this through libcrypto-fixes, since the librarification work is also touching this area. But let me know if there's a preference for the crypto tree or the s390 tree instead. arch/s390/crypto/sha1_s390.c | 1 + arch/s390/crypto/sha512_s390.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index d229cbd2ba229..73672e76a88f9 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -36,10 +36,11 @@ static int s390_sha1_init(struct shash_desc *desc) sctx->state[2] = SHA1_H2; sctx->state[3] = SHA1_H3; sctx->state[4] = SHA1_H4; sctx->count = 0; sctx->func = CPACF_KIMD_SHA_1; + sctx->first_message_part = false; return 0; } static int s390_sha1_export(struct shash_desc *desc, void *out) diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 33711a29618c3..e9e112025ff22 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -30,10 +30,11 @@ static int sha512_init(struct shash_desc *desc) ctx->sha512.state[6] = SHA512_H6; ctx->sha512.state[7] = SHA512_H7; ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = false; return 0; } static int sha512_export(struct shash_desc *desc, void *out) @@ -95,10 +96,11 @@ static int sha384_init(struct shash_desc *desc) ctx->sha512.state[6] = SHA384_H6; ctx->sha512.state[7] = SHA384_H7; ctx->count = 0; ctx->sha512.count_hi = 0; ctx->func = CPACF_KIMD_SHA_512; + ctx->first_message_part = false; return 0; } static struct shash_alg sha384_alg = { base-commit: e540341508ce2f6e27810106253d5de194b66750 -- 2.50.0

4 days, 13 hours

3
8
0 0

[PATCH 5/5] lib/crypto: x86/poly1305: Fix performance regression on short messages

by Eric Biggers

Restore the len >= 288 condition on using the AVX implementation, which was incidentally removed by commit 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface"). This check took into account the overhead in key power computation, kernel-mode "FPU", and tail handling associated with the AVX code. Indeed, restoring this check slightly improves performance for len < 256 as measured using poly1305_kunit on an "AMD Ryzen AI 9 365" (Zen 5) CPU: Length Before After ====== ========== ========== 1 30 MB/s 36 MB/s 16 516 MB/s 598 MB/s 64 1700 MB/s 1882 MB/s 127 2265 MB/s 2651 MB/s 128 2457 MB/s 2827 MB/s 200 2702 MB/s 3238 MB/s 256 3841 MB/s 3768 MB/s 511 4580 MB/s 4585 MB/s 512 5430 MB/s 5398 MB/s 1024 7268 MB/s 7305 MB/s 3173 8999 MB/s 8948 MB/s 4096 9942 MB/s 9921 MB/s 16384 10557 MB/s 10545 MB/s While the optimal threshold for this CPU might be slightly lower than 288 (see the len == 256 case), other CPUs would need to be tested too, and these sorts of benchmarks can underestimate the true cost of kernel-mode "FPU". Therefore, for now just restore the 288 threshold. Fixes: 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> --- lib/crypto/x86/poly1305_glue.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305_glue.c index 968d84677631..856d48fd422b 100644 --- a/lib/crypto/x86/poly1305_glue.c +++ b/lib/crypto/x86/poly1305_glue.c @@ -96,11 +96,19 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp, /* SIMD disables preemption, so relax after processing each page. */ BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || SZ_4K % POLY1305_BLOCK_SIZE); + /* + * The AVX implementations have significant setup overhead (e.g. key + * power computation, kernel FPU enabling) which makes them slower for + * short messages. Fall back to the scalar implementation for messages + * shorter than 288 bytes, unless the AVX-specific key setup has already + * been performed (indicated by ctx->is_base2_26). + */ if (!static_branch_likely(&poly1305_use_avx) || + (len < POLY1305_BLOCK_SIZE * 18 && !ctx->is_base2_26) || unlikely(!irq_fpu_usable())) { convert_to_base2_64(ctx); poly1305_blocks_x86_64(ctx, inp, len, padbit); return; } -- 2.50.0

4 days, 21 hours

1
0
0 0

[PATCH 4/5] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD contexts

by Eric Biggers

Restore the SIMD usability check and base conversion that were removed by commit 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions *should* be called only where SIMD registers are usable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use irq_fpu_usable() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> --- lib/crypto/x86/poly1305_glue.c | 40 +++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305_glue.c index b7e78a583e07..968d84677631 100644 --- a/lib/crypto/x86/poly1305_glue.c +++ b/lib/crypto/x86/poly1305_glue.c @@ -23,10 +23,46 @@ struct poly1305_arch_internal { u64 r[2]; u64 pad; struct { u32 r2, r1, r4, r3; } rn[9]; }; +/* + * The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit + * the unfortunate situation of using AVX and then having to go back to scalar + * -- because the user is silly and has called the update function from two + * separate contexts -- then we need to convert back to the original base before + * proceeding. It is possible to reason that the initial reduction below is + * sufficient given the implementation invariants. However, for an avoidance of + * doubt and because this is not performance critical, we do the full reduction + * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py + */ +static void convert_to_base2_64(void *ctx) +{ + struct poly1305_arch_internal *state = ctx; + u32 cy; + + if (!state->is_base2_26) + return; + + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; + state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; + state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); + state->hs[2] = state->h[4] >> 24; + /* Unsigned Less Than: branchlessly produces 1 if a < b, else 0. */ +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) + cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); + state->hs[2] &= 3; + state->hs[0] += cy; + state->hs[1] += (cy = ULT(state->hs[0], cy)); + state->hs[2] += ULT(state->hs[1], cy); +#undef ULT + state->is_base2_26 = 0; +} + asmlinkage void poly1305_block_init_arch( struct poly1305_block_state *state, const u8 raw_key[POLY1305_BLOCK_SIZE]); EXPORT_SYMBOL_GPL(poly1305_block_init_arch); asmlinkage void poly1305_blocks_x86_64(struct poly1305_arch_internal *ctx, @@ -60,11 +96,13 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp, /* SIMD disables preemption, so relax after processing each page. */ BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || SZ_4K % POLY1305_BLOCK_SIZE); - if (!static_branch_likely(&poly1305_use_avx)) { + if (!static_branch_likely(&poly1305_use_avx) || + unlikely(!irq_fpu_usable())) { + convert_to_base2_64(ctx); poly1305_blocks_x86_64(ctx, inp, len, padbit); return; } do { -- 2.50.0

4 days, 21 hours

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror