- Linux-stable-mirror - lists.linaro.org

FAILED: patch "[PATCH] NFS: Fix the setting of capabilities when automounting a new" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x b01f21cacde9f2878492cf318fee61bf4ccad323 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081556-hazard-eccentric-fcb9@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b01f21cacde9f2878492cf318fee61bf4ccad323 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <trond.myklebust(a)hammerspace.com> Date: Sun, 3 Aug 2025 14:31:59 -0700 Subject: [PATCH] NFS: Fix the setting of capabilities when automounting a new filesystem Capabilities cannot be inherited when we cross into a new filesystem. They need to be reset to the minimal defaults, and then probed for again. Fixes: 54ceac451598 ("NFS: Share NFS superblocks per-protocol per-server per-FSID") Cc: stable(a)vger.kernel.org Reviewed-by: Benjamin Coddington <bcodding(a)redhat.com> Signed-off-by: Trond Myklebust <trond.myklebust(a)hammerspace.com> diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e13eb429b8b5..8fb4a950dd55 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -682,6 +682,44 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp, } EXPORT_SYMBOL_GPL(nfs_init_client); +static void nfs4_server_set_init_caps(struct nfs_server *server) +{ +#if IS_ENABLED(CONFIG_NFS_V4) + /* Set the basic capabilities */ + server->caps = server->nfs_client->cl_mvops->init_caps; + if (server->flags & NFS_MOUNT_NORDIRPLUS) + server->caps &= ~NFS_CAP_READDIRPLUS; + if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA) + server->caps &= ~NFS_CAP_READ_PLUS; + + /* + * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower + * authentication. + */ + if (nfs4_disable_idmapping && + server->client->cl_auth->au_flavor == RPC_AUTH_UNIX) + server->caps |= NFS_CAP_UIDGID_NOMAP; +#endif +} + +void nfs_server_set_init_caps(struct nfs_server *server) +{ + switch (server->nfs_client->rpc_ops->version) { + case 2: + server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS; + break; + case 3: + server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS; + if (!(server->flags & NFS_MOUNT_NORDIRPLUS)) + server->caps |= NFS_CAP_READDIRPLUS; + break; + default: + nfs4_server_set_init_caps(server); + break; + } +} +EXPORT_SYMBOL_GPL(nfs_server_set_init_caps); + /* * Create a version 2 or 3 client */ @@ -726,7 +764,6 @@ static int nfs_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = ctx->flags; server->options = ctx->options; - server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS; switch (clp->rpc_ops->version) { case 2: @@ -762,6 +799,8 @@ static int nfs_init_server(struct nfs_server *server, if (error < 0) goto error; + nfs_server_set_init_caps(server); + /* Preserve the values of mount_server-related mount options */ if (ctx->mount_server.addrlen) { memcpy(&server->mountd_address, &ctx->mount_server.address, @@ -934,7 +973,6 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour target->acregmax = source->acregmax; target->acdirmin = source->acdirmin; target->acdirmax = source->acdirmax; - target->caps = source->caps; target->options = source->options; target->auth_info = source->auth_info; target->port = source->port; @@ -1169,6 +1207,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, if (error < 0) goto out_free_server; + nfs_server_set_init_caps(server); + /* probe the filesystem info for this server filesystem */ error = nfs_probe_server(server, fh); if (error < 0) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 0143e0794d32..1a18d8d9be25 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -231,7 +231,7 @@ extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *, u32); extern struct nfs_server *nfs_create_server(struct fs_context *); -extern void nfs4_server_set_init_caps(struct nfs_server *); +extern void nfs_server_set_init_caps(struct nfs_server *); extern struct nfs_server *nfs4_create_server(struct fs_context *); extern struct nfs_server *nfs4_create_referral_server(struct fs_context *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 2ea98f1f116f..6fddf43d729c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1074,24 +1074,6 @@ static void nfs4_session_limit_xasize(struct nfs_server *server) #endif } -void nfs4_server_set_init_caps(struct nfs_server *server) -{ - /* Set the basic capabilities */ - server->caps |= server->nfs_client->cl_mvops->init_caps; - if (server->flags & NFS_MOUNT_NORDIRPLUS) - server->caps &= ~NFS_CAP_READDIRPLUS; - if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA) - server->caps &= ~NFS_CAP_READ_PLUS; - - /* - * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower - * authentication. - */ - if (nfs4_disable_idmapping && - server->client->cl_auth->au_flavor == RPC_AUTH_UNIX) - server->caps |= NFS_CAP_UIDGID_NOMAP; -} - static int nfs4_server_common_setup(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_probe) { @@ -1110,7 +1092,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, if (error < 0) return error; - nfs4_server_set_init_caps(server); + nfs_server_set_init_caps(server); /* Probe the root fh to retrieve its FSID and filehandle */ error = nfs4_get_rootfh(server, mntfh, auth_probe); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d7dc669d84c5..c7c7ec22f21d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4092,7 +4092,7 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) }; int err; - nfs4_server_set_init_caps(server); + nfs_server_set_init_caps(server); do { err = nfs4_handle_exception(server, _nfs4_server_capabilities(server, fhandle),

1 week, 5 days

2
2
0 0

FAILED: patch "[PATCH] net: enetc: fix device and OF node leak at probe" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 70458f8a6b44daf3ad39f0d9b6d1097c8a7780ed # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081556-icon-nursing-ee42@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 70458f8a6b44daf3ad39f0d9b6d1097c8a7780ed Mon Sep 17 00:00:00 2001 From: Johan Hovold <johan(a)kernel.org> Date: Fri, 25 Jul 2025 19:12:10 +0200 Subject: [PATCH] net: enetc: fix device and OF node leak at probe Make sure to drop the references to the IERB OF node and platform device taken by of_parse_phandle() and of_find_device_by_node() during probe. Fixes: e7d48e5fbf30 ("net: enetc: add a mini driver for the Integrated Endpoint Register Block") Cc: stable(a)vger.kernel.org # 5.13 Cc: Vladimir Oltean <vladimir.oltean(a)nxp.com> Signed-off-by: Johan Hovold <johan(a)kernel.org> Reviewed-by: Simon Horman <horms(a)kernel.org> Link: https://patch.msgid.link/20250725171213.880-3-johan@kernel.org Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index f63a29e2e031..de0fb272c847 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -829,19 +829,29 @@ static int enetc_pf_register_with_ierb(struct pci_dev *pdev) { struct platform_device *ierb_pdev; struct device_node *ierb_node; + int ret; ierb_node = of_find_compatible_node(NULL, NULL, "fsl,ls1028a-enetc-ierb"); - if (!ierb_node || !of_device_is_available(ierb_node)) + if (!ierb_node) return -ENODEV; + if (!of_device_is_available(ierb_node)) { + of_node_put(ierb_node); + return -ENODEV; + } + ierb_pdev = of_find_device_by_node(ierb_node); of_node_put(ierb_node); if (!ierb_pdev) return -EPROBE_DEFER; - return enetc_ierb_register_pf(ierb_pdev, pdev); + ret = enetc_ierb_register_pf(ierb_pdev, pdev); + + put_device(&ierb_pdev->dev); + + return ret; } static const struct enetc_si_ops enetc_psi_ops = {

1 week, 5 days

2
1
0 0

FAILED: patch "[PATCH] leds: flash: leds-qcom-flash: Fix registry access after" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x fab15f57360b1e6620a1d0d6b0fbee896e6c1f07 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081544-skillet-cofounder-e278@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From fab15f57360b1e6620a1d0d6b0fbee896e6c1f07 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org> Date: Thu, 29 May 2025 08:33:36 +0200 Subject: [PATCH] leds: flash: leds-qcom-flash: Fix registry access after re-bind Driver in probe() updates each of 'reg_field' with 'reg_base': for (i = 0; i < REG_MAX_COUNT; i++) regs[i].reg += reg_base; 'reg_field' array (under variable 'regs' above) is statically allocated, thus each re-bind would add another 'reg_base' leading to bogus register addresses. Constify the local 'reg_field' array and duplicate it in probe to solve this. Fixes: 96a2e242a5dc ("leds: flash: Add driver to support flash LED module in QCOM PMICs") Cc: stable(a)vger.kernel.org Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski(a)linaro.org> Reviewed-by: Fenglin Wu <fenglin.wu(a)oss.qualcomm.com> Link: https://lore.kernel.org/r/20250529063335.8785-2-krzysztof.kozlowski@linaro.… Signed-off-by: Lee Jones <lee(a)kernel.org> diff --git a/drivers/leds/flash/leds-qcom-flash.c b/drivers/leds/flash/leds-qcom-flash.c index b4c19be51c4d..89cf5120f5d5 100644 --- a/drivers/leds/flash/leds-qcom-flash.c +++ b/drivers/leds/flash/leds-qcom-flash.c @@ -117,7 +117,7 @@ enum { REG_MAX_COUNT, }; -static struct reg_field mvflash_3ch_regs[REG_MAX_COUNT] = { +static const struct reg_field mvflash_3ch_regs[REG_MAX_COUNT] = { REG_FIELD(0x08, 0, 7), /* status1 */ REG_FIELD(0x09, 0, 7), /* status2 */ REG_FIELD(0x0a, 0, 7), /* status3 */ @@ -132,7 +132,7 @@ static struct reg_field mvflash_3ch_regs[REG_MAX_COUNT] = { REG_FIELD(0x58, 0, 2), /* therm_thrsh3 */ }; -static struct reg_field mvflash_4ch_regs[REG_MAX_COUNT] = { +static const struct reg_field mvflash_4ch_regs[REG_MAX_COUNT] = { REG_FIELD(0x06, 0, 7), /* status1 */ REG_FIELD(0x07, 0, 6), /* status2 */ REG_FIELD(0x09, 0, 7), /* status3 */ @@ -854,11 +854,17 @@ static int qcom_flash_led_probe(struct platform_device *pdev) if (val == FLASH_SUBTYPE_3CH_PM8150_VAL || val == FLASH_SUBTYPE_3CH_PMI8998_VAL) { flash_data->hw_type = QCOM_MVFLASH_3CH; flash_data->max_channels = 3; - regs = mvflash_3ch_regs; + regs = devm_kmemdup(dev, mvflash_3ch_regs, sizeof(mvflash_3ch_regs), + GFP_KERNEL); + if (!regs) + return -ENOMEM; } else if (val == FLASH_SUBTYPE_4CH_VAL) { flash_data->hw_type = QCOM_MVFLASH_4CH; flash_data->max_channels = 4; - regs = mvflash_4ch_regs; + regs = devm_kmemdup(dev, mvflash_4ch_regs, sizeof(mvflash_4ch_regs), + GFP_KERNEL); + if (!regs) + return -ENOMEM; rc = regmap_read(regmap, reg_base + FLASH_REVISION_REG, &val); if (rc < 0) { @@ -880,6 +886,7 @@ static int qcom_flash_led_probe(struct platform_device *pdev) dev_err(dev, "Failed to allocate regmap field, rc=%d\n", rc); return rc; } + devm_kfree(dev, regs); /* devm_regmap_field_bulk_alloc() makes copies */ platform_set_drvdata(pdev, flash_data); mutex_init(&flash_data->lock);

1 week, 5 days

2
2
0 0

[PATCH 6.6] sched/fair: Fix frequency selection for non-invariant case

by Wentao Guan

Hi, Please apply the commit e37617c8e53a1f7fcba6d5e1041f4fd8a2425c27, it fix the REGRESSION by ada8d7fa0ad49a2a078f97f7f6e02d24d3c357a3 ("sched/cpufreq: Rework schedutil governor performance estimation") which introduced in v6.6.89. Best Regards Wentao Guan

1 week, 5 days

3
5
0 0

FAILED: patch "[PATCH] arm64: dts: ti: k3-j722s-evm: Fix USB gpio-hog level for" failed to apply to 6.12-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.12-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y git checkout FETCH_HEAD git cherry-pick -x 65ba2a6e77e9e5c843a591055789050e77b5c65e # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081521-dangle-drapery-9a9a@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 65ba2a6e77e9e5c843a591055789050e77b5c65e Mon Sep 17 00:00:00 2001 From: Siddharth Vadapalli <s-vadapalli(a)ti.com> Date: Mon, 23 Jun 2025 15:36:57 +0530 Subject: [PATCH] arm64: dts: ti: k3-j722s-evm: Fix USB gpio-hog level for Type-C According to the "GPIO Expander Map / Table" section of the J722S EVM Schematic within the Evaluation Module Design Files package [0], the GPIO Pin P05 located on the GPIO Expander 1 (I2C0/0x23) has to be pulled down to select the Type-C interface. Since commit under Fixes claims to enable the Type-C interface, update the property within "p05-hog" from "output-high" to "output-low", thereby switching from the Type-A interface to the Type-C interface. [0]: https://www.ti.com/lit/zip/sprr495 Cc: stable(a)vger.kernel.org Fixes: 485705df5d5f ("arm64: dts: ti: k3-j722s: Enable PCIe and USB support on J722S-EVM") Signed-off-by: Siddharth Vadapalli <s-vadapalli(a)ti.com> Link: https://lore.kernel.org/r/20250623100657.4082031-1-s-vadapalli@ti.com Signed-off-by: Vignesh Raghavendra <vigneshr(a)ti.com> diff --git a/arch/arm64/boot/dts/ti/k3-j722s-evm.dts b/arch/arm64/boot/dts/ti/k3-j722s-evm.dts index a47852fdca70..d0533723412a 100644 --- a/arch/arm64/boot/dts/ti/k3-j722s-evm.dts +++ b/arch/arm64/boot/dts/ti/k3-j722s-evm.dts @@ -634,7 +634,7 @@ p05-hog { /* P05 - USB2.0_MUX_SEL */ gpio-hog; gpios = <5 GPIO_ACTIVE_LOW>; - output-high; + output-low; }; p01_hog: p01-hog {

1 week, 5 days

2
2
0 0

FAILED: patch "[PATCH] eventpoll: Fix semi-unbounded recursion" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x f2e467a48287c868818085aa35389a224d226732 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081535-enrage-raging-295b@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From f2e467a48287c868818085aa35389a224d226732 Mon Sep 17 00:00:00 2001 From: Jann Horn <jannh(a)google.com> Date: Fri, 11 Jul 2025 18:33:36 +0200 Subject: [PATCH] eventpoll: Fix semi-unbounded recursion Ensure that epoll instances can never form a graph deeper than EP_MAX_NESTS+1 links. Currently, ep_loop_check_proc() ensures that the graph is loop-free and does some recursion depth checks, but those recursion depth checks don't limit the depth of the resulting tree for two reasons: - They don't look upwards in the tree. - If there are multiple downwards paths of different lengths, only one of the paths is actually considered for the depth check since commit 28d82dc1c4ed ("epoll: limit paths"). Essentially, the current recursion depth check in ep_loop_check_proc() just serves to prevent it from recursing too deeply while checking for loops. A more thorough check is done in reverse_path_check() after the new graph edge has already been created; this checks, among other things, that no paths going upwards from any non-epoll file with a length of more than 5 edges exist. However, this check does not apply to non-epoll files. As a result, it is possible to recurse to a depth of at least roughly 500, tested on v6.15. (I am unsure if deeper recursion is possible; and this may have changed with commit 8c44dac8add7 ("eventpoll: Fix priority inversion problem").) To fix it: 1. In ep_loop_check_proc(), note the subtree depth of each visited node, and use subtree depths for the total depth calculation even when a subtree has already been visited. 2. Add ep_get_upwards_depth_proc() for similarly determining the maximum depth of an upwards walk. 3. In ep_loop_check(), use these values to limit the total path length between epoll nodes to EP_MAX_NESTS edges. Fixes: 22bacca48a17 ("epoll: prevent creating circular epoll structures") Cc: stable(a)vger.kernel.org Signed-off-by: Jann Horn <jannh(a)google.com> Link: https://lore.kernel.org/20250711-epoll-recursion-fix-v1-1-fb2457c33292@goog… Signed-off-by: Christian Brauner <brauner(a)kernel.org> diff --git a/fs/eventpoll.c b/fs/eventpoll.c index d4dbffdedd08..44648cc09250 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -218,6 +218,7 @@ struct eventpoll { /* used to optimize loop detection check */ u64 gen; struct hlist_head refs; + u8 loop_check_depth; /* * usage count, used together with epitem->dying to @@ -2142,23 +2143,24 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, } /** - * ep_loop_check_proc - verify that adding an epoll file inside another - * epoll structure does not violate the constraints, in - * terms of closed loops, or too deep chains (which can - * result in excessive stack usage). + * ep_loop_check_proc - verify that adding an epoll file @ep inside another + * epoll file does not create closed loops, and + * determine the depth of the subtree starting at @ep * * @ep: the &struct eventpoll to be currently checked. * @depth: Current depth of the path being checked. * - * Return: %zero if adding the epoll @file inside current epoll - * structure @ep does not violate the constraints, or %-1 otherwise. + * Return: depth of the subtree, or INT_MAX if we found a loop or went too deep. */ static int ep_loop_check_proc(struct eventpoll *ep, int depth) { - int error = 0; + int result = 0; struct rb_node *rbp; struct epitem *epi; + if (ep->gen == loop_check_gen) + return ep->loop_check_depth; + mutex_lock_nested(&ep->mtx, depth + 1); ep->gen = loop_check_gen; for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { @@ -2166,13 +2168,11 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) if (unlikely(is_file_epoll(epi->ffd.file))) { struct eventpoll *ep_tovisit; ep_tovisit = epi->ffd.file->private_data; - if (ep_tovisit->gen == loop_check_gen) - continue; if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS) - error = -1; + result = INT_MAX; else - error = ep_loop_check_proc(ep_tovisit, depth + 1); - if (error != 0) + result = max(result, ep_loop_check_proc(ep_tovisit, depth + 1) + 1); + if (result > EP_MAX_NESTS) break; } else { /* @@ -2186,9 +2186,27 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) list_file(epi->ffd.file); } } + ep->loop_check_depth = result; mutex_unlock(&ep->mtx); - return error; + return result; +} + +/** + * ep_get_upwards_depth_proc - determine depth of @ep when traversed upwards + */ +static int ep_get_upwards_depth_proc(struct eventpoll *ep, int depth) +{ + int result = 0; + struct epitem *epi; + + if (ep->gen == loop_check_gen) + return ep->loop_check_depth; + hlist_for_each_entry_rcu(epi, &ep->refs, fllink) + result = max(result, ep_get_upwards_depth_proc(epi->ep, depth + 1) + 1); + ep->gen = loop_check_gen; + ep->loop_check_depth = result; + return result; } /** @@ -2204,8 +2222,22 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) */ static int ep_loop_check(struct eventpoll *ep, struct eventpoll *to) { + int depth, upwards_depth; + inserting_into = ep; - return ep_loop_check_proc(to, 0); + /* + * Check how deep down we can get from @to, and whether it is possible + * to loop up to @ep. + */ + depth = ep_loop_check_proc(to, 0); + if (depth > EP_MAX_NESTS) + return -1; + /* Check how far up we can go from @ep. */ + rcu_read_lock(); + upwards_depth = ep_get_upwards_depth_proc(ep, 0); + rcu_read_unlock(); + + return (depth+1+upwards_depth > EP_MAX_NESTS) ? -1 : 0; } static void clear_tfile_check_list(void)

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] eventpoll: Fix semi-unbounded recursion" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x f2e467a48287c868818085aa35389a224d226732 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081534-circling-deepen-8561@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From f2e467a48287c868818085aa35389a224d226732 Mon Sep 17 00:00:00 2001 From: Jann Horn <jannh(a)google.com> Date: Fri, 11 Jul 2025 18:33:36 +0200 Subject: [PATCH] eventpoll: Fix semi-unbounded recursion Ensure that epoll instances can never form a graph deeper than EP_MAX_NESTS+1 links. Currently, ep_loop_check_proc() ensures that the graph is loop-free and does some recursion depth checks, but those recursion depth checks don't limit the depth of the resulting tree for two reasons: - They don't look upwards in the tree. - If there are multiple downwards paths of different lengths, only one of the paths is actually considered for the depth check since commit 28d82dc1c4ed ("epoll: limit paths"). Essentially, the current recursion depth check in ep_loop_check_proc() just serves to prevent it from recursing too deeply while checking for loops. A more thorough check is done in reverse_path_check() after the new graph edge has already been created; this checks, among other things, that no paths going upwards from any non-epoll file with a length of more than 5 edges exist. However, this check does not apply to non-epoll files. As a result, it is possible to recurse to a depth of at least roughly 500, tested on v6.15. (I am unsure if deeper recursion is possible; and this may have changed with commit 8c44dac8add7 ("eventpoll: Fix priority inversion problem").) To fix it: 1. In ep_loop_check_proc(), note the subtree depth of each visited node, and use subtree depths for the total depth calculation even when a subtree has already been visited. 2. Add ep_get_upwards_depth_proc() for similarly determining the maximum depth of an upwards walk. 3. In ep_loop_check(), use these values to limit the total path length between epoll nodes to EP_MAX_NESTS edges. Fixes: 22bacca48a17 ("epoll: prevent creating circular epoll structures") Cc: stable(a)vger.kernel.org Signed-off-by: Jann Horn <jannh(a)google.com> Link: https://lore.kernel.org/20250711-epoll-recursion-fix-v1-1-fb2457c33292@goog… Signed-off-by: Christian Brauner <brauner(a)kernel.org> diff --git a/fs/eventpoll.c b/fs/eventpoll.c index d4dbffdedd08..44648cc09250 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -218,6 +218,7 @@ struct eventpoll { /* used to optimize loop detection check */ u64 gen; struct hlist_head refs; + u8 loop_check_depth; /* * usage count, used together with epitem->dying to @@ -2142,23 +2143,24 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, } /** - * ep_loop_check_proc - verify that adding an epoll file inside another - * epoll structure does not violate the constraints, in - * terms of closed loops, or too deep chains (which can - * result in excessive stack usage). + * ep_loop_check_proc - verify that adding an epoll file @ep inside another + * epoll file does not create closed loops, and + * determine the depth of the subtree starting at @ep * * @ep: the &struct eventpoll to be currently checked. * @depth: Current depth of the path being checked. * - * Return: %zero if adding the epoll @file inside current epoll - * structure @ep does not violate the constraints, or %-1 otherwise. + * Return: depth of the subtree, or INT_MAX if we found a loop or went too deep. */ static int ep_loop_check_proc(struct eventpoll *ep, int depth) { - int error = 0; + int result = 0; struct rb_node *rbp; struct epitem *epi; + if (ep->gen == loop_check_gen) + return ep->loop_check_depth; + mutex_lock_nested(&ep->mtx, depth + 1); ep->gen = loop_check_gen; for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { @@ -2166,13 +2168,11 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) if (unlikely(is_file_epoll(epi->ffd.file))) { struct eventpoll *ep_tovisit; ep_tovisit = epi->ffd.file->private_data; - if (ep_tovisit->gen == loop_check_gen) - continue; if (ep_tovisit == inserting_into || depth > EP_MAX_NESTS) - error = -1; + result = INT_MAX; else - error = ep_loop_check_proc(ep_tovisit, depth + 1); - if (error != 0) + result = max(result, ep_loop_check_proc(ep_tovisit, depth + 1) + 1); + if (result > EP_MAX_NESTS) break; } else { /* @@ -2186,9 +2186,27 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) list_file(epi->ffd.file); } } + ep->loop_check_depth = result; mutex_unlock(&ep->mtx); - return error; + return result; +} + +/** + * ep_get_upwards_depth_proc - determine depth of @ep when traversed upwards + */ +static int ep_get_upwards_depth_proc(struct eventpoll *ep, int depth) +{ + int result = 0; + struct epitem *epi; + + if (ep->gen == loop_check_gen) + return ep->loop_check_depth; + hlist_for_each_entry_rcu(epi, &ep->refs, fllink) + result = max(result, ep_get_upwards_depth_proc(epi->ep, depth + 1) + 1); + ep->gen = loop_check_gen; + ep->loop_check_depth = result; + return result; } /** @@ -2204,8 +2222,22 @@ static int ep_loop_check_proc(struct eventpoll *ep, int depth) */ static int ep_loop_check(struct eventpoll *ep, struct eventpoll *to) { + int depth, upwards_depth; + inserting_into = ep; - return ep_loop_check_proc(to, 0); + /* + * Check how deep down we can get from @to, and whether it is possible + * to loop up to @ep. + */ + depth = ep_loop_check_proc(to, 0); + if (depth > EP_MAX_NESTS) + return -1; + /* Check how far up we can go from @ep. */ + rcu_read_lock(); + upwards_depth = ep_get_upwards_depth_proc(ep, 0); + rcu_read_unlock(); + + return (depth+1+upwards_depth > EP_MAX_NESTS) ? -1 : 0; } static void clear_tfile_check_list(void)

1 week, 5 days

1
0
0 0

Re:

by Greg KH

On Wed, Aug 13, 2025 at 06:25:32PM +0100, Jon Hunter wrote: > On Wed, Aug 13, 2025 at 08:48:28AM -0700, Jon Hunter wrote: > > On Tue, 12 Aug 2025 19:43:28 +0200, Greg Kroah-Hartman wrote: > > > This is the start of the stable review cycle for the 6.15.10 release. > > > There are 480 patches in this series, all will be posted as a response > > > to this one. If anyone has any issues with these being applied, please > > > let me know. > > > > > > Responses should be made by Thu, 14 Aug 2025 17:42:20 +0000. > > > Anything received after that time might be too late. > > > > > > The whole patch series can be found in one patch at: > > > https://www.kernel.org/pub/linux/kernel/v6.x/stable-review/patch-6.15.10-rc… > > > or in the git tree and branch at: > > > git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-6.15.y > > > and the diffstat can be found below. > > > > > > thanks, > > > > > > greg k-h > > > > Failures detected for Tegra ... > > > > Test results for stable-v6.15: > > 10 builds: 10 pass, 0 fail > > 28 boots: 28 pass, 0 fail > > 120 tests: 119 pass, 1 fail > > > > Linux version: 6.15.10-rc1-g2510f67e2e34 > > Boards tested: tegra124-jetson-tk1, tegra186-p2771-0000, > > tegra186-p3509-0000+p3636-0001, tegra194-p2972-0000, > > tegra194-p3509-0000+p3668-0000, tegra20-ventana, > > tegra210-p2371-2180, tegra210-p3450-0000, > > tegra30-cardhu-a04 > > > > Test failures: tegra194-p2972-0000: boot.py > > I am seeing the following kernel warning for both linux-6.15.y and linux-6.16.y … > > WARNING KERN sched: DL replenish lagged too much > > I believe that this is introduced by … > > Peter Zijlstra <peterz(a)infradead.org> > sched/deadline: Less agressive dl_server handling > > This has been reported here: https://lore.kernel.org/all/CAMuHMdXn4z1pioTtBGMfQM0jsLviqS2jwysaWXpoLxWYoG… I've now dropped this. Is that causing the test failure for you? thanks, greg k-h

1 week, 5 days

2
2
0 0

FAILED: patch "[PATCH] lib/crypto: arm64/poly1305: Fix register corruption in" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x eec76ea5a7213c48529a46eed1b343e5cee3aaab # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081559-frighten-antivirus-2da1@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From eec76ea5a7213c48529a46eed1b343e5cee3aaab Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Sun, 6 Jul 2025 16:10:58 -0700 Subject: [PATCH] lib/crypto: arm64/poly1305: Fix register corruption in no-SIMD contexts Restore the SIMD usability check that was removed by commit a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions should not be called when SIMD registers are unusable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use may_use_simd() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250706231100.176113-4-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/lib/crypto/arm64/poly1305-glue.c b/lib/crypto/arm64/poly1305-glue.c index c9a74766785b..31aea21ce42f 100644 --- a/lib/crypto/arm64/poly1305-glue.c +++ b/lib/crypto/arm64/poly1305-glue.c @@ -7,6 +7,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> +#include <asm/simd.h> #include <crypto/internal/poly1305.h> #include <linux/cpufeature.h> #include <linux/jump_label.h> @@ -33,7 +34,7 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int len, u32 padbit) { len = round_down(len, POLY1305_BLOCK_SIZE); - if (static_branch_likely(&have_neon)) { + if (static_branch_likely(&have_neon) && likely(may_use_simd())) { do { unsigned int todo = min_t(unsigned int, len, SZ_4K);

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] lib/crypto: arm64/poly1305: Fix register corruption in" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x eec76ea5a7213c48529a46eed1b343e5cee3aaab # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081558-kissable-seducing-8159@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From eec76ea5a7213c48529a46eed1b343e5cee3aaab Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Sun, 6 Jul 2025 16:10:58 -0700 Subject: [PATCH] lib/crypto: arm64/poly1305: Fix register corruption in no-SIMD contexts Restore the SIMD usability check that was removed by commit a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions should not be called when SIMD registers are unusable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use may_use_simd() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250706231100.176113-4-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/lib/crypto/arm64/poly1305-glue.c b/lib/crypto/arm64/poly1305-glue.c index c9a74766785b..31aea21ce42f 100644 --- a/lib/crypto/arm64/poly1305-glue.c +++ b/lib/crypto/arm64/poly1305-glue.c @@ -7,6 +7,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> +#include <asm/simd.h> #include <crypto/internal/poly1305.h> #include <linux/cpufeature.h> #include <linux/jump_label.h> @@ -33,7 +34,7 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int len, u32 padbit) { len = round_down(len, POLY1305_BLOCK_SIZE); - if (static_branch_likely(&have_neon)) { + if (static_branch_likely(&have_neon) && likely(may_use_simd())) { do { unsigned int todo = min_t(unsigned int, len, SZ_4K);

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] lib/crypto: arm64/poly1305: Fix register corruption in" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x eec76ea5a7213c48529a46eed1b343e5cee3aaab # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081558-designate-eatery-407a@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From eec76ea5a7213c48529a46eed1b343e5cee3aaab Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Sun, 6 Jul 2025 16:10:58 -0700 Subject: [PATCH] lib/crypto: arm64/poly1305: Fix register corruption in no-SIMD contexts Restore the SIMD usability check that was removed by commit a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions should not be called when SIMD registers are unusable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use may_use_simd() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250706231100.176113-4-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/lib/crypto/arm64/poly1305-glue.c b/lib/crypto/arm64/poly1305-glue.c index c9a74766785b..31aea21ce42f 100644 --- a/lib/crypto/arm64/poly1305-glue.c +++ b/lib/crypto/arm64/poly1305-glue.c @@ -7,6 +7,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> +#include <asm/simd.h> #include <crypto/internal/poly1305.h> #include <linux/cpufeature.h> #include <linux/jump_label.h> @@ -33,7 +34,7 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int len, u32 padbit) { len = round_down(len, POLY1305_BLOCK_SIZE); - if (static_branch_likely(&have_neon)) { + if (static_branch_likely(&have_neon) && likely(may_use_simd())) { do { unsigned int todo = min_t(unsigned int, len, SZ_4K);

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] lib/crypto: arm64/poly1305: Fix register corruption in" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x eec76ea5a7213c48529a46eed1b343e5cee3aaab # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081557-sinless-cleat-22ee@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From eec76ea5a7213c48529a46eed1b343e5cee3aaab Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Sun, 6 Jul 2025 16:10:58 -0700 Subject: [PATCH] lib/crypto: arm64/poly1305: Fix register corruption in no-SIMD contexts Restore the SIMD usability check that was removed by commit a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions should not be called when SIMD registers are unusable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use may_use_simd() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250706231100.176113-4-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/lib/crypto/arm64/poly1305-glue.c b/lib/crypto/arm64/poly1305-glue.c index c9a74766785b..31aea21ce42f 100644 --- a/lib/crypto/arm64/poly1305-glue.c +++ b/lib/crypto/arm64/poly1305-glue.c @@ -7,6 +7,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> +#include <asm/simd.h> #include <crypto/internal/poly1305.h> #include <linux/cpufeature.h> #include <linux/jump_label.h> @@ -33,7 +34,7 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int len, u32 padbit) { len = round_down(len, POLY1305_BLOCK_SIZE); - if (static_branch_likely(&have_neon)) { + if (static_branch_likely(&have_neon) && likely(may_use_simd())) { do { unsigned int todo = min_t(unsigned int, len, SZ_4K);

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] lib/crypto: arm64/poly1305: Fix register corruption in" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x eec76ea5a7213c48529a46eed1b343e5cee3aaab # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081557-unnatural-unexpired-089f@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From eec76ea5a7213c48529a46eed1b343e5cee3aaab Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Sun, 6 Jul 2025 16:10:58 -0700 Subject: [PATCH] lib/crypto: arm64/poly1305: Fix register corruption in no-SIMD contexts Restore the SIMD usability check that was removed by commit a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions should not be called when SIMD registers are unusable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use may_use_simd() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250706231100.176113-4-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/lib/crypto/arm64/poly1305-glue.c b/lib/crypto/arm64/poly1305-glue.c index c9a74766785b..31aea21ce42f 100644 --- a/lib/crypto/arm64/poly1305-glue.c +++ b/lib/crypto/arm64/poly1305-glue.c @@ -7,6 +7,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> +#include <asm/simd.h> #include <crypto/internal/poly1305.h> #include <linux/cpufeature.h> #include <linux/jump_label.h> @@ -33,7 +34,7 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int len, u32 padbit) { len = round_down(len, POLY1305_BLOCK_SIZE); - if (static_branch_likely(&have_neon)) { + if (static_branch_likely(&have_neon) && likely(may_use_simd())) { do { unsigned int todo = min_t(unsigned int, len, SZ_4K);

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] lib/crypto: arm64/poly1305: Fix register corruption in" failed to apply to 6.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y git checkout FETCH_HEAD git cherry-pick -x eec76ea5a7213c48529a46eed1b343e5cee3aaab # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081556-septic-skeleton-d360@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From eec76ea5a7213c48529a46eed1b343e5cee3aaab Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Sun, 6 Jul 2025 16:10:58 -0700 Subject: [PATCH] lib/crypto: arm64/poly1305: Fix register corruption in no-SIMD contexts Restore the SIMD usability check that was removed by commit a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions should not be called when SIMD registers are unusable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use may_use_simd() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250706231100.176113-4-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/lib/crypto/arm64/poly1305-glue.c b/lib/crypto/arm64/poly1305-glue.c index c9a74766785b..31aea21ce42f 100644 --- a/lib/crypto/arm64/poly1305-glue.c +++ b/lib/crypto/arm64/poly1305-glue.c @@ -7,6 +7,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> +#include <asm/simd.h> #include <crypto/internal/poly1305.h> #include <linux/cpufeature.h> #include <linux/jump_label.h> @@ -33,7 +34,7 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int len, u32 padbit) { len = round_down(len, POLY1305_BLOCK_SIZE); - if (static_branch_likely(&have_neon)) { + if (static_branch_likely(&have_neon) && likely(may_use_simd())) { do { unsigned int todo = min_t(unsigned int, len, SZ_4K);

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] lib/crypto: arm64/poly1305: Fix register corruption in" failed to apply to 6.12-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.12-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y git checkout FETCH_HEAD git cherry-pick -x eec76ea5a7213c48529a46eed1b343e5cee3aaab # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081556-relish-alongside-9c22@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From eec76ea5a7213c48529a46eed1b343e5cee3aaab Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Sun, 6 Jul 2025 16:10:58 -0700 Subject: [PATCH] lib/crypto: arm64/poly1305: Fix register corruption in no-SIMD contexts Restore the SIMD usability check that was removed by commit a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions should not be called when SIMD registers are unusable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use may_use_simd() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: a59e5468a921 ("crypto: arm64/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250706231100.176113-4-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/lib/crypto/arm64/poly1305-glue.c b/lib/crypto/arm64/poly1305-glue.c index c9a74766785b..31aea21ce42f 100644 --- a/lib/crypto/arm64/poly1305-glue.c +++ b/lib/crypto/arm64/poly1305-glue.c @@ -7,6 +7,7 @@ #include <asm/hwcap.h> #include <asm/neon.h> +#include <asm/simd.h> #include <crypto/internal/poly1305.h> #include <linux/cpufeature.h> #include <linux/jump_label.h> @@ -33,7 +34,7 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, unsigned int len, u32 padbit) { len = round_down(len, POLY1305_BLOCK_SIZE); - if (static_branch_likely(&have_neon)) { + if (static_branch_likely(&have_neon) && likely(may_use_simd())) { do { unsigned int todo = min_t(unsigned int, len, SZ_4K);

1 week, 5 days

1
0
0 0

[PATCH] netfs: Fix unbuffered write error handling

by David Howells

If all the subrequests in an unbuffered write stream fail, the subrequest collector doesn't update the stream->transferred value and it retains its initial LONG_MAX value. Unfortunately, if all active streams fail, then we take the smallest value of { LONG_MAX, LONG_MAX, ... } as the value to set in wreq->transferred - which is then returned from ->write_iter(). LONG_MAX was chosen as the initial value so that all the streams can be quickly assessed by taking the smallest value of all stream->transferred - but this only works if we've set any of them. Fix this by adding a flag to indicate whether the value in stream->transferred is valid and checking that when we integrate the values. stream->transferred can then be initialised to zero. This was found by running the generic/750 xfstest against cifs with cache=none. It splices data to the target file. Once (if) it has used up all the available scratch space, the writes start failing with ENOSPC. This causes ->write_iter() to fail. However, it was returning wreq->transferred, i.e. LONG_MAX, rather than an error (because it thought the amount transferred was non-zero) and iter_file_splice_write() would then try to clean up that amount of pipe bufferage - leading to an oops when it overran. The kernel log showed: CIFS: VFS: Send error in write = -28 followed by: BUG: kernel NULL pointer dereference, address: 0000000000000008 with: RIP: 0010:iter_file_splice_write+0x3a4/0x520 do_splice+0x197/0x4e0 or: RIP: 0010:pipe_buf_release (include/linux/pipe_fs_i.h:282) iter_file_splice_write (fs/splice.c:755) Also put a warning check into splice to announce if ->write_iter() returned that it had written more than it was asked to. Fixes: 288ace2f57c9 ("netfs: New writeback implementation") Reported-by: Xiaoli Feng <fengxiaoli0714(a)gmail.com> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220445 Signed-off-by: David Howells <dhowells(a)redhat.com> cc: Paulo Alcantara <pc(a)manguebit.org> cc: Steve French <sfrench(a)samba.org> cc: Shyam Prasad N <sprasad(a)microsoft.com> cc: netfs(a)lists.linux.dev cc: linux-cifs(a)vger.kernel.org cc: linux-fsdevel(a)vger.kernel.org cc: stable(a)vger.kernel.org --- fs/netfs/read_collect.c | 4 +++- fs/netfs/write_collect.c | 10 ++++++++-- fs/netfs/write_issue.c | 4 ++-- fs/splice.c | 3 +++ include/linux/netfs.h | 1 + 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 3e804da1e1eb..a95e7aadafd0 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -281,8 +281,10 @@ static void netfs_collect_read_results(struct netfs_io_request *rreq) } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) { notes |= MADE_PROGRESS; } else { - if (!stream->failed) + if (!stream->failed) { stream->transferred += transferred; + stream->transferred_valid = true; + } if (front->transferred < front->len) set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags); notes |= MADE_PROGRESS; diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 0f3a36852a4d..cbf3d9194c7b 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -254,6 +254,7 @@ static void netfs_collect_write_results(struct netfs_io_request *wreq) if (front->start + front->transferred > stream->collected_to) { stream->collected_to = front->start + front->transferred; stream->transferred = stream->collected_to - wreq->start; + stream->transferred_valid = true; notes |= MADE_PROGRESS; } if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { @@ -356,6 +357,7 @@ bool netfs_write_collection(struct netfs_io_request *wreq) { struct netfs_inode *ictx = netfs_inode(wreq->inode); size_t transferred; + bool transferred_valid = false; int s; _enter("R=%x", wreq->debug_id); @@ -376,12 +378,16 @@ bool netfs_write_collection(struct netfs_io_request *wreq) continue; if (!list_empty(&stream->subrequests)) return false; - if (stream->transferred < transferred) + if (stream->transferred_valid && + stream->transferred < transferred) { transferred = stream->transferred; + transferred_valid = true; + } } /* Okay, declare that all I/O is complete. */ - wreq->transferred = transferred; + if (transferred_valid) + wreq->transferred = transferred; trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); if (wreq->io_streams[1].active && diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 50bee2c4130d..0584cba1a043 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -118,12 +118,12 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, wreq->io_streams[0].prepare_write = ictx->ops->prepare_write; wreq->io_streams[0].issue_write = ictx->ops->issue_write; wreq->io_streams[0].collected_to = start; - wreq->io_streams[0].transferred = LONG_MAX; + wreq->io_streams[0].transferred = 0; wreq->io_streams[1].stream_nr = 1; wreq->io_streams[1].source = NETFS_WRITE_TO_CACHE; wreq->io_streams[1].collected_to = start; - wreq->io_streams[1].transferred = LONG_MAX; + wreq->io_streams[1].transferred = 0; if (fscache_resources_valid(&wreq->cache_resources)) { wreq->io_streams[1].avail = true; wreq->io_streams[1].active = true; diff --git a/fs/splice.c b/fs/splice.c index 4d6df083e0c0..f5094b6d00a0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -739,6 +739,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, sd.pos = kiocb.ki_pos; if (ret <= 0) break; + WARN_ONCE(ret > sd.total_len - left, + "Splice Exceeded! ret=%zd tot=%zu left=%zu\n", + ret, sd.total_len, left); sd.num_spliced += ret; sd.total_len -= ret; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 185bd8196503..98c96d649bf9 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -150,6 +150,7 @@ struct netfs_io_stream { bool active; /* T if stream is active */ bool need_retry; /* T if this stream needs retrying */ bool failed; /* T if this stream failed */ + bool transferred_valid; /* T is ->transferred is valid */ }; /*

1 week, 5 days

3
2
0 0

FAILED: patch "[PATCH] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x 16f2c30e290e04135b70ad374fb7e1d1ed9ff5e7 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081509-octopus-proponent-0409@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 16f2c30e290e04135b70ad374fb7e1d1ed9ff5e7 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Sun, 6 Jul 2025 16:10:59 -0700 Subject: [PATCH] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD contexts Restore the SIMD usability check and base conversion that were removed by commit 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions should not be called when SIMD registers are unusable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use irq_fpu_usable() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250706231100.176113-5-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305_glue.c index b7e78a583e07..968d84677631 100644 --- a/lib/crypto/x86/poly1305_glue.c +++ b/lib/crypto/x86/poly1305_glue.c @@ -25,6 +25,42 @@ struct poly1305_arch_internal { struct { u32 r2, r1, r4, r3; } rn[9]; }; +/* + * The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit + * the unfortunate situation of using AVX and then having to go back to scalar + * -- because the user is silly and has called the update function from two + * separate contexts -- then we need to convert back to the original base before + * proceeding. It is possible to reason that the initial reduction below is + * sufficient given the implementation invariants. However, for an avoidance of + * doubt and because this is not performance critical, we do the full reduction + * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py + */ +static void convert_to_base2_64(void *ctx) +{ + struct poly1305_arch_internal *state = ctx; + u32 cy; + + if (!state->is_base2_26) + return; + + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; + state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; + state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); + state->hs[2] = state->h[4] >> 24; + /* Unsigned Less Than: branchlessly produces 1 if a < b, else 0. */ +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) + cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); + state->hs[2] &= 3; + state->hs[0] += cy; + state->hs[1] += (cy = ULT(state->hs[0], cy)); + state->hs[2] += ULT(state->hs[1], cy); +#undef ULT + state->is_base2_26 = 0; +} + asmlinkage void poly1305_block_init_arch( struct poly1305_block_state *state, const u8 raw_key[POLY1305_BLOCK_SIZE]); @@ -62,7 +98,9 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp, BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || SZ_4K % POLY1305_BLOCK_SIZE); - if (!static_branch_likely(&poly1305_use_avx)) { + if (!static_branch_likely(&poly1305_use_avx) || + unlikely(!irq_fpu_usable())) { + convert_to_base2_64(ctx); poly1305_blocks_x86_64(ctx, inp, len, padbit); return; }

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x 16f2c30e290e04135b70ad374fb7e1d1ed9ff5e7 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081508-pelt-scouting-b79c@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 16f2c30e290e04135b70ad374fb7e1d1ed9ff5e7 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Sun, 6 Jul 2025 16:10:59 -0700 Subject: [PATCH] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD contexts Restore the SIMD usability check and base conversion that were removed by commit 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions should not be called when SIMD registers are unusable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use irq_fpu_usable() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250706231100.176113-5-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305_glue.c index b7e78a583e07..968d84677631 100644 --- a/lib/crypto/x86/poly1305_glue.c +++ b/lib/crypto/x86/poly1305_glue.c @@ -25,6 +25,42 @@ struct poly1305_arch_internal { struct { u32 r2, r1, r4, r3; } rn[9]; }; +/* + * The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit + * the unfortunate situation of using AVX and then having to go back to scalar + * -- because the user is silly and has called the update function from two + * separate contexts -- then we need to convert back to the original base before + * proceeding. It is possible to reason that the initial reduction below is + * sufficient given the implementation invariants. However, for an avoidance of + * doubt and because this is not performance critical, we do the full reduction + * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py + */ +static void convert_to_base2_64(void *ctx) +{ + struct poly1305_arch_internal *state = ctx; + u32 cy; + + if (!state->is_base2_26) + return; + + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; + state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; + state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); + state->hs[2] = state->h[4] >> 24; + /* Unsigned Less Than: branchlessly produces 1 if a < b, else 0. */ +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) + cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); + state->hs[2] &= 3; + state->hs[0] += cy; + state->hs[1] += (cy = ULT(state->hs[0], cy)); + state->hs[2] += ULT(state->hs[1], cy); +#undef ULT + state->is_base2_26 = 0; +} + asmlinkage void poly1305_block_init_arch( struct poly1305_block_state *state, const u8 raw_key[POLY1305_BLOCK_SIZE]); @@ -62,7 +98,9 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp, BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || SZ_4K % POLY1305_BLOCK_SIZE); - if (!static_branch_likely(&poly1305_use_avx)) { + if (!static_branch_likely(&poly1305_use_avx) || + unlikely(!irq_fpu_usable())) { + convert_to_base2_64(ctx); poly1305_blocks_x86_64(ctx, inp, len, padbit); return; }

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD" failed to apply to 5.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.15.y git checkout FETCH_HEAD git cherry-pick -x 16f2c30e290e04135b70ad374fb7e1d1ed9ff5e7 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081508-handcart-oversweet-f21e@gregkh' --subject-prefix 'PATCH 5.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 16f2c30e290e04135b70ad374fb7e1d1ed9ff5e7 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Sun, 6 Jul 2025 16:10:59 -0700 Subject: [PATCH] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD contexts Restore the SIMD usability check and base conversion that were removed by commit 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions should not be called when SIMD registers are unusable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use irq_fpu_usable() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250706231100.176113-5-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305_glue.c index b7e78a583e07..968d84677631 100644 --- a/lib/crypto/x86/poly1305_glue.c +++ b/lib/crypto/x86/poly1305_glue.c @@ -25,6 +25,42 @@ struct poly1305_arch_internal { struct { u32 r2, r1, r4, r3; } rn[9]; }; +/* + * The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit + * the unfortunate situation of using AVX and then having to go back to scalar + * -- because the user is silly and has called the update function from two + * separate contexts -- then we need to convert back to the original base before + * proceeding. It is possible to reason that the initial reduction below is + * sufficient given the implementation invariants. However, for an avoidance of + * doubt and because this is not performance critical, we do the full reduction + * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py + */ +static void convert_to_base2_64(void *ctx) +{ + struct poly1305_arch_internal *state = ctx; + u32 cy; + + if (!state->is_base2_26) + return; + + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; + state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; + state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); + state->hs[2] = state->h[4] >> 24; + /* Unsigned Less Than: branchlessly produces 1 if a < b, else 0. */ +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) + cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); + state->hs[2] &= 3; + state->hs[0] += cy; + state->hs[1] += (cy = ULT(state->hs[0], cy)); + state->hs[2] += ULT(state->hs[1], cy); +#undef ULT + state->is_base2_26 = 0; +} + asmlinkage void poly1305_block_init_arch( struct poly1305_block_state *state, const u8 raw_key[POLY1305_BLOCK_SIZE]); @@ -62,7 +98,9 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp, BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || SZ_4K % POLY1305_BLOCK_SIZE); - if (!static_branch_likely(&poly1305_use_avx)) { + if (!static_branch_likely(&poly1305_use_avx) || + unlikely(!irq_fpu_usable())) { + convert_to_base2_64(ctx); poly1305_blocks_x86_64(ctx, inp, len, padbit); return; }

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD" failed to apply to 6.6-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.6-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.6.y git checkout FETCH_HEAD git cherry-pick -x 16f2c30e290e04135b70ad374fb7e1d1ed9ff5e7 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081507-verse-prozac-3c44@gregkh' --subject-prefix 'PATCH 6.6.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 16f2c30e290e04135b70ad374fb7e1d1ed9ff5e7 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Sun, 6 Jul 2025 16:10:59 -0700 Subject: [PATCH] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD contexts Restore the SIMD usability check and base conversion that were removed by commit 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions should not be called when SIMD registers are unusable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use irq_fpu_usable() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250706231100.176113-5-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305_glue.c index b7e78a583e07..968d84677631 100644 --- a/lib/crypto/x86/poly1305_glue.c +++ b/lib/crypto/x86/poly1305_glue.c @@ -25,6 +25,42 @@ struct poly1305_arch_internal { struct { u32 r2, r1, r4, r3; } rn[9]; }; +/* + * The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit + * the unfortunate situation of using AVX and then having to go back to scalar + * -- because the user is silly and has called the update function from two + * separate contexts -- then we need to convert back to the original base before + * proceeding. It is possible to reason that the initial reduction below is + * sufficient given the implementation invariants. However, for an avoidance of + * doubt and because this is not performance critical, we do the full reduction + * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py + */ +static void convert_to_base2_64(void *ctx) +{ + struct poly1305_arch_internal *state = ctx; + u32 cy; + + if (!state->is_base2_26) + return; + + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; + state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; + state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); + state->hs[2] = state->h[4] >> 24; + /* Unsigned Less Than: branchlessly produces 1 if a < b, else 0. */ +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) + cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); + state->hs[2] &= 3; + state->hs[0] += cy; + state->hs[1] += (cy = ULT(state->hs[0], cy)); + state->hs[2] += ULT(state->hs[1], cy); +#undef ULT + state->is_base2_26 = 0; +} + asmlinkage void poly1305_block_init_arch( struct poly1305_block_state *state, const u8 raw_key[POLY1305_BLOCK_SIZE]); @@ -62,7 +98,9 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp, BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || SZ_4K % POLY1305_BLOCK_SIZE); - if (!static_branch_likely(&poly1305_use_avx)) { + if (!static_branch_likely(&poly1305_use_avx) || + unlikely(!irq_fpu_usable())) { + convert_to_base2_64(ctx); poly1305_blocks_x86_64(ctx, inp, len, padbit); return; }

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD" failed to apply to 6.1-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.1-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.1.y git checkout FETCH_HEAD git cherry-pick -x 16f2c30e290e04135b70ad374fb7e1d1ed9ff5e7 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081507-raking-viewless-4b7d@gregkh' --subject-prefix 'PATCH 6.1.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 16f2c30e290e04135b70ad374fb7e1d1ed9ff5e7 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Sun, 6 Jul 2025 16:10:59 -0700 Subject: [PATCH] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD contexts Restore the SIMD usability check and base conversion that were removed by commit 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions should not be called when SIMD registers are unusable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use irq_fpu_usable() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250706231100.176113-5-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305_glue.c index b7e78a583e07..968d84677631 100644 --- a/lib/crypto/x86/poly1305_glue.c +++ b/lib/crypto/x86/poly1305_glue.c @@ -25,6 +25,42 @@ struct poly1305_arch_internal { struct { u32 r2, r1, r4, r3; } rn[9]; }; +/* + * The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit + * the unfortunate situation of using AVX and then having to go back to scalar + * -- because the user is silly and has called the update function from two + * separate contexts -- then we need to convert back to the original base before + * proceeding. It is possible to reason that the initial reduction below is + * sufficient given the implementation invariants. However, for an avoidance of + * doubt and because this is not performance critical, we do the full reduction + * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py + */ +static void convert_to_base2_64(void *ctx) +{ + struct poly1305_arch_internal *state = ctx; + u32 cy; + + if (!state->is_base2_26) + return; + + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; + state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; + state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); + state->hs[2] = state->h[4] >> 24; + /* Unsigned Less Than: branchlessly produces 1 if a < b, else 0. */ +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) + cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); + state->hs[2] &= 3; + state->hs[0] += cy; + state->hs[1] += (cy = ULT(state->hs[0], cy)); + state->hs[2] += ULT(state->hs[1], cy); +#undef ULT + state->is_base2_26 = 0; +} + asmlinkage void poly1305_block_init_arch( struct poly1305_block_state *state, const u8 raw_key[POLY1305_BLOCK_SIZE]); @@ -62,7 +98,9 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp, BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || SZ_4K % POLY1305_BLOCK_SIZE); - if (!static_branch_likely(&poly1305_use_avx)) { + if (!static_branch_likely(&poly1305_use_avx) || + unlikely(!irq_fpu_usable())) { + convert_to_base2_64(ctx); poly1305_blocks_x86_64(ctx, inp, len, padbit); return; }

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD" failed to apply to 6.15-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.15-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.15.y git checkout FETCH_HEAD git cherry-pick -x 16f2c30e290e04135b70ad374fb7e1d1ed9ff5e7 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081506-washable-climatic-2c7b@gregkh' --subject-prefix 'PATCH 6.15.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 16f2c30e290e04135b70ad374fb7e1d1ed9ff5e7 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Sun, 6 Jul 2025 16:10:59 -0700 Subject: [PATCH] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD contexts Restore the SIMD usability check and base conversion that were removed by commit 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions should not be called when SIMD registers are unusable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use irq_fpu_usable() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250706231100.176113-5-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305_glue.c index b7e78a583e07..968d84677631 100644 --- a/lib/crypto/x86/poly1305_glue.c +++ b/lib/crypto/x86/poly1305_glue.c @@ -25,6 +25,42 @@ struct poly1305_arch_internal { struct { u32 r2, r1, r4, r3; } rn[9]; }; +/* + * The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit + * the unfortunate situation of using AVX and then having to go back to scalar + * -- because the user is silly and has called the update function from two + * separate contexts -- then we need to convert back to the original base before + * proceeding. It is possible to reason that the initial reduction below is + * sufficient given the implementation invariants. However, for an avoidance of + * doubt and because this is not performance critical, we do the full reduction + * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py + */ +static void convert_to_base2_64(void *ctx) +{ + struct poly1305_arch_internal *state = ctx; + u32 cy; + + if (!state->is_base2_26) + return; + + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; + state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; + state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); + state->hs[2] = state->h[4] >> 24; + /* Unsigned Less Than: branchlessly produces 1 if a < b, else 0. */ +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) + cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); + state->hs[2] &= 3; + state->hs[0] += cy; + state->hs[1] += (cy = ULT(state->hs[0], cy)); + state->hs[2] += ULT(state->hs[1], cy); +#undef ULT + state->is_base2_26 = 0; +} + asmlinkage void poly1305_block_init_arch( struct poly1305_block_state *state, const u8 raw_key[POLY1305_BLOCK_SIZE]); @@ -62,7 +98,9 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp, BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || SZ_4K % POLY1305_BLOCK_SIZE); - if (!static_branch_likely(&poly1305_use_avx)) { + if (!static_branch_likely(&poly1305_use_avx) || + unlikely(!irq_fpu_usable())) { + convert_to_base2_64(ctx); poly1305_blocks_x86_64(ctx, inp, len, padbit); return; }

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD" failed to apply to 6.12-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 6.12-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-6.12.y git checkout FETCH_HEAD git cherry-pick -x 16f2c30e290e04135b70ad374fb7e1d1ed9ff5e7 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081506-outsource-shorter-c39a@gregkh' --subject-prefix 'PATCH 6.12.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From 16f2c30e290e04135b70ad374fb7e1d1ed9ff5e7 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Sun, 6 Jul 2025 16:10:59 -0700 Subject: [PATCH] lib/crypto: x86/poly1305: Fix register corruption in no-SIMD contexts Restore the SIMD usability check and base conversion that were removed by commit 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface"). This safety check is cheap and is well worth eliminating a footgun. While the Poly1305 functions should not be called when SIMD registers are unusable, if they are anyway, they should just do the right thing instead of corrupting random tasks' registers and/or computing incorrect MACs. Fixing this is also needed for poly1305_kunit to pass. Just use irq_fpu_usable() instead of the original crypto_simd_usable(), since poly1305_kunit won't rely on crypto_simd_disabled_for_test. Fixes: 318c53ae02f2 ("crypto: x86/poly1305 - Add block-only interface") Cc: stable(a)vger.kernel.org Reviewed-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250706231100.176113-5-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305_glue.c index b7e78a583e07..968d84677631 100644 --- a/lib/crypto/x86/poly1305_glue.c +++ b/lib/crypto/x86/poly1305_glue.c @@ -25,6 +25,42 @@ struct poly1305_arch_internal { struct { u32 r2, r1, r4, r3; } rn[9]; }; +/* + * The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit + * the unfortunate situation of using AVX and then having to go back to scalar + * -- because the user is silly and has called the update function from two + * separate contexts -- then we need to convert back to the original base before + * proceeding. It is possible to reason that the initial reduction below is + * sufficient given the implementation invariants. However, for an avoidance of + * doubt and because this is not performance critical, we do the full reduction + * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py + */ +static void convert_to_base2_64(void *ctx) +{ + struct poly1305_arch_internal *state = ctx; + u32 cy; + + if (!state->is_base2_26) + return; + + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; + state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; + state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); + state->hs[2] = state->h[4] >> 24; + /* Unsigned Less Than: branchlessly produces 1 if a < b, else 0. */ +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) + cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); + state->hs[2] &= 3; + state->hs[0] += cy; + state->hs[1] += (cy = ULT(state->hs[0], cy)); + state->hs[2] += ULT(state->hs[1], cy); +#undef ULT + state->is_base2_26 = 0; +} + asmlinkage void poly1305_block_init_arch( struct poly1305_block_state *state, const u8 raw_key[POLY1305_BLOCK_SIZE]); @@ -62,7 +98,9 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp, BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || SZ_4K % POLY1305_BLOCK_SIZE); - if (!static_branch_likely(&poly1305_use_avx)) { + if (!static_branch_likely(&poly1305_use_avx) || + unlikely(!irq_fpu_usable())) { + convert_to_base2_64(ctx); poly1305_blocks_x86_64(ctx, inp, len, padbit); return; }

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] fscrypt: Don't use problematic non-inline crypto engines" failed to apply to 5.4-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.4-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.4.y git checkout FETCH_HEAD git cherry-pick -x b41c1d8d07906786c60893980d52688f31d114a6 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081520-around-unsliced-699b@gregkh' --subject-prefix 'PATCH 5.4.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b41c1d8d07906786c60893980d52688f31d114a6 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Fri, 4 Jul 2025 00:03:22 -0700 Subject: [PATCH] fscrypt: Don't use problematic non-inline crypto engines Make fscrypt no longer use Crypto API drivers for non-inline crypto engines, even when the Crypto API prioritizes them over CPU-based code (which unfortunately it often does). These drivers tend to be really problematic, especially for fscrypt's workload. This commit has no effect on inline crypto engines, which are different and do work well. Specifically, exclude drivers that have CRYPTO_ALG_KERN_DRIVER_ONLY or CRYPTO_ALG_ALLOCATES_MEMORY set. (Later, CRYPTO_ALG_ASYNC should be excluded too. That's omitted for now to keep this commit backportable, since until recently some CPU-based code had CRYPTO_ALG_ASYNC set.) There are two major issues with these drivers: bugs and performance. First, these drivers tend to be buggy. They're fundamentally much more error-prone and harder to test than the CPU-based code. They often don't get tested before kernel releases, and even if they do, the crypto self-tests don't properly test these drivers. Released drivers have en/decrypted or hashed data incorrectly. These bugs cause issues for fscrypt users who often didn't even want to use these drivers, e.g.: - https://github.com/google/fscryptctl/issues/32 - https://github.com/google/fscryptctl/issues/9 - https://lore.kernel.org/r/PH0PR02MB731916ECDB6C613665863B6CFFAA2@PH0PR02MB7… These drivers have also similarly caused issues for dm-crypt users, including data corruption and deadlocks. Since Linux v5.10, dm-crypt has disabled most of them by excluding CRYPTO_ALG_ALLOCATES_MEMORY. Second, these drivers tend to be *much* slower than the CPU-based code. This may seem counterintuitive, but benchmarks clearly show it. There's a *lot* of overhead associated with going to a hardware driver, off the CPU, and back again. To prove this, I gathered as many systems with this type of crypto engine as I could, and I measured synchronous encryption of 4096-byte messages (which matches fscrypt's workload): Intel Emerald Rapids server: AES-256-XTS: xts-aes-vaes-avx512 16171 MB/s [CPU-based, Vector AES] qat_aes_xts 289 MB/s [Offload, Intel QuickAssist] Qualcomm SM8650 HDK: AES-256-XTS: xts-aes-ce 4301 MB/s [CPU-based, ARMv8 Crypto Extensions] xts-aes-qce 73 MB/s [Offload, Qualcomm Crypto Engine] i.MX 8M Nano LPDDR4 EVK: AES-256-XTS: xts-aes-ce 647 MB/s [CPU-based, ARMv8 Crypto Extensions] xts(ecb-aes-caam) 20 MB/s [Offload, CAAM] AES-128-CBC-ESSIV: essiv(cbc-aes-caam,sha256-lib) 23 MB/s [Offload, CAAM] STM32MP157F-DK2: AES-256-XTS: xts-aes-neonbs 13.2 MB/s [CPU-based, ARM NEON] xts(stm32-ecb-aes) 3.1 MB/s [Offload, STM32 crypto engine] AES-128-CBC-ESSIV: essiv(cbc-aes-neonbs,sha256-lib) 14.7 MB/s [CPU-based, ARM NEON] essiv(stm32-cbc-aes,sha256-lib) 3.2 MB/s [Offload, STM32 crypto engine] Adiantum: adiantum(xchacha12-arm,aes-arm,nhpoly1305-neon) 52.8 MB/s [CPU-based, ARM scalar + NEON] So, there was no case in which the crypto engine was even *close* to being faster. On the first three, which have AES instructions in the CPU, the CPU was 30 to 55 times faster (!). Even on STM32MP157F-DK2 which has a Cortex-A7 CPU that doesn't have AES instructions, AES was over 4 times faster on the CPU. And Adiantum encryption, which is what actually should be used on CPUs like that, was over 17 times faster. Other justifications that have been given for these non-inline crypto engines (almost always coming from the hardware vendors, not actual users) don't seem very plausible either: - The crypto engine throughput could be improved by processing multiple requests concurrently. Currently irrelevant to fscrypt, since it doesn't do that. This would also be complex, and unhelpful in many cases. 2 of the 4 engines I tested even had only one queue. - Some of the engines, e.g. STM32, support hardware keys. Also currently irrelevant to fscrypt, since it doesn't support these. Interestingly, the STM32 driver itself doesn't support this either. - Free up CPU for other tasks and/or reduce energy usage. Not very plausible considering the "short" message length, driver overhead, and scheduling overhead. There's just very little time for the CPU to do something else like run another task or enter low-power state, before the message finishes and it's time to process the next one. - Some of these engines resist power analysis and electromagnetic attacks, while the CPU-based crypto generally does not. In theory, this sounds great. In practice, if this benefit requires the use of an off-CPU offload that massively regresses performance and has a low-quality, buggy driver, the price for this hardening (which is not relevant to most fscrypt users, and tends to be incomplete) is just too high. Inline crypto engines are much more promising here, as are on-CPU solutions like RISC-V High Assurance Cryptography. Fixes: b30ab0e03407 ("ext4 crypto: add ext4 encryption facilities") Cc: stable(a)vger.kernel.org Acked-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250704070322.20692-1-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index f63791641c1d..696a5844bfa3 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -147,9 +147,8 @@ However, these ioctls have some limitations: were wiped. To partially solve this, you can add init_on_free=1 to your kernel command line. However, this has a performance cost. -- Secret keys might still exist in CPU registers, in crypto - accelerator hardware (if used by the crypto API to implement any of - the algorithms), or in other places not explicitly considered here. +- Secret keys might still exist in CPU registers or in other places + not explicitly considered here. Full system compromise ~~~~~~~~~~~~~~~~~~~~~~ @@ -406,9 +405,12 @@ the work is done by XChaCha12, which is much faster than AES when AES acceleration is unavailable. For more information about Adiantum, see `the Adiantum paper <https://eprint.iacr.org/2018/720.pdf>`_. -The (AES-128-CBC-ESSIV, AES-128-CBC-CTS) pair exists only to support -systems whose only form of AES acceleration is an off-CPU crypto -accelerator such as CAAM or CESA that does not support XTS. +The (AES-128-CBC-ESSIV, AES-128-CBC-CTS) pair was added to try to +provide a more efficient option for systems that lack AES instructions +in the CPU but do have a non-inline crypto engine such as CAAM or CESA +that supports AES-CBC (and not AES-XTS). This is deprecated. It has +been shown that just doing AES on the CPU is actually faster. +Moreover, Adiantum is faster still and is recommended on such systems. The remaining mode pairs are the "national pride ciphers": @@ -1318,22 +1320,13 @@ this by validating all top-level encryption policies prior to access. Inline encryption support ========================= -By default, fscrypt uses the kernel crypto API for all cryptographic -operations (other than HKDF, which fscrypt partially implements -itself). The kernel crypto API supports hardware crypto accelerators, -but only ones that work in the traditional way where all inputs and -outputs (e.g. plaintexts and ciphertexts) are in memory. fscrypt can -take advantage of such hardware, but the traditional acceleration -model isn't particularly efficient and fscrypt hasn't been optimized -for it. - -Instead, many newer systems (especially mobile SoCs) have *inline -encryption hardware* that can encrypt/decrypt data while it is on its -way to/from the storage device. Linux supports inline encryption -through a set of extensions to the block layer called *blk-crypto*. -blk-crypto allows filesystems to attach encryption contexts to bios -(I/O requests) to specify how the data will be encrypted or decrypted -in-line. For more information about blk-crypto, see +Many newer systems (especially mobile SoCs) have *inline encryption +hardware* that can encrypt/decrypt data while it is on its way to/from +the storage device. Linux supports inline encryption through a set of +extensions to the block layer called *blk-crypto*. blk-crypto allows +filesystems to attach encryption contexts to bios (I/O requests) to +specify how the data will be encrypted or decrypted in-line. For more +information about blk-crypto, see :ref:`Documentation/block/inline-encryption.rst <inline_encryption>`. On supported filesystems (currently ext4 and f2fs), fscrypt can use diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index c1d92074b65c..6e7164530a1e 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -45,6 +45,23 @@ */ #undef FSCRYPT_MAX_KEY_SIZE +/* + * This mask is passed as the third argument to the crypto_alloc_*() functions + * to prevent fscrypt from using the Crypto API drivers for non-inline crypto + * engines. Those drivers have been problematic for fscrypt. fscrypt users + * have reported hangs and even incorrect en/decryption with these drivers. + * Since going to the driver, off CPU, and back again is really slow, such + * drivers can be over 50 times slower than the CPU-based code for fscrypt's + * workload. Even on platforms that lack AES instructions on the CPU, using the + * offloads has been shown to be slower, even staying with AES. (Of course, + * Adiantum is faster still, and is the recommended option on such platforms...) + * + * Note that fscrypt also supports inline crypto engines. Those don't use the + * Crypto API and work much better than the old-style (non-inline) engines. + */ +#define FSCRYPT_CRYPTOAPI_MASK \ + (CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY) + #define FSCRYPT_CONTEXT_V1 1 #define FSCRYPT_CONTEXT_V2 2 diff --git a/fs/crypto/hkdf.c b/fs/crypto/hkdf.c index 5c095c8aa3b5..b1ef506cd341 100644 --- a/fs/crypto/hkdf.c +++ b/fs/crypto/hkdf.c @@ -58,7 +58,7 @@ int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, u8 prk[HKDF_HASHLEN]; int err; - hmac_tfm = crypto_alloc_shash(HKDF_HMAC_ALG, 0, 0); + hmac_tfm = crypto_alloc_shash(HKDF_HMAC_ALG, 0, FSCRYPT_CRYPTOAPI_MASK); if (IS_ERR(hmac_tfm)) { fscrypt_err(NULL, "Error allocating " HKDF_HMAC_ALG ": %ld", PTR_ERR(hmac_tfm)); diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index a67e20d126c9..74d4a2e1ad23 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -104,7 +104,8 @@ fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, struct crypto_skcipher *tfm; int err; - tfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0); + tfm = crypto_alloc_skcipher(mode->cipher_str, 0, + FSCRYPT_CRYPTOAPI_MASK); if (IS_ERR(tfm)) { if (PTR_ERR(tfm) == -ENOENT) { fscrypt_warn(inode, diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index b70521c55132..158ceae8a5bc 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -52,7 +52,8 @@ static int derive_key_aes(const u8 *master_key, struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; - struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0); + struct crypto_skcipher *tfm = + crypto_alloc_skcipher("ecb(aes)", 0, FSCRYPT_CRYPTOAPI_MASK); if (IS_ERR(tfm)) { res = PTR_ERR(tfm);

1 week, 5 days

1
0
0 0

FAILED: patch "[PATCH] fscrypt: Don't use problematic non-inline crypto engines" failed to apply to 5.10-stable tree

by gregkh＠linuxfoundation.org

The patch below does not apply to the 5.10-stable tree. If someone wants it applied there, or to any other stable or longterm tree, then please email the backport, including the original git commit id to <stable(a)vger.kernel.org>. To reproduce the conflict and resubmit, you may use the following commands: git fetch https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/ linux-5.10.y git checkout FETCH_HEAD git cherry-pick -x b41c1d8d07906786c60893980d52688f31d114a6 # <resolve conflicts, build, test, etc.> git commit -s git send-email --to '<stable(a)vger.kernel.org>' --in-reply-to '2025081518-cusp-plausibly-6afc@gregkh' --subject-prefix 'PATCH 5.10.y' HEAD^.. Possible dependencies: thanks, greg k-h ------------------ original commit in Linus's tree ------------------ From b41c1d8d07906786c60893980d52688f31d114a6 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers(a)kernel.org> Date: Fri, 4 Jul 2025 00:03:22 -0700 Subject: [PATCH] fscrypt: Don't use problematic non-inline crypto engines Make fscrypt no longer use Crypto API drivers for non-inline crypto engines, even when the Crypto API prioritizes them over CPU-based code (which unfortunately it often does). These drivers tend to be really problematic, especially for fscrypt's workload. This commit has no effect on inline crypto engines, which are different and do work well. Specifically, exclude drivers that have CRYPTO_ALG_KERN_DRIVER_ONLY or CRYPTO_ALG_ALLOCATES_MEMORY set. (Later, CRYPTO_ALG_ASYNC should be excluded too. That's omitted for now to keep this commit backportable, since until recently some CPU-based code had CRYPTO_ALG_ASYNC set.) There are two major issues with these drivers: bugs and performance. First, these drivers tend to be buggy. They're fundamentally much more error-prone and harder to test than the CPU-based code. They often don't get tested before kernel releases, and even if they do, the crypto self-tests don't properly test these drivers. Released drivers have en/decrypted or hashed data incorrectly. These bugs cause issues for fscrypt users who often didn't even want to use these drivers, e.g.: - https://github.com/google/fscryptctl/issues/32 - https://github.com/google/fscryptctl/issues/9 - https://lore.kernel.org/r/PH0PR02MB731916ECDB6C613665863B6CFFAA2@PH0PR02MB7… These drivers have also similarly caused issues for dm-crypt users, including data corruption and deadlocks. Since Linux v5.10, dm-crypt has disabled most of them by excluding CRYPTO_ALG_ALLOCATES_MEMORY. Second, these drivers tend to be *much* slower than the CPU-based code. This may seem counterintuitive, but benchmarks clearly show it. There's a *lot* of overhead associated with going to a hardware driver, off the CPU, and back again. To prove this, I gathered as many systems with this type of crypto engine as I could, and I measured synchronous encryption of 4096-byte messages (which matches fscrypt's workload): Intel Emerald Rapids server: AES-256-XTS: xts-aes-vaes-avx512 16171 MB/s [CPU-based, Vector AES] qat_aes_xts 289 MB/s [Offload, Intel QuickAssist] Qualcomm SM8650 HDK: AES-256-XTS: xts-aes-ce 4301 MB/s [CPU-based, ARMv8 Crypto Extensions] xts-aes-qce 73 MB/s [Offload, Qualcomm Crypto Engine] i.MX 8M Nano LPDDR4 EVK: AES-256-XTS: xts-aes-ce 647 MB/s [CPU-based, ARMv8 Crypto Extensions] xts(ecb-aes-caam) 20 MB/s [Offload, CAAM] AES-128-CBC-ESSIV: essiv(cbc-aes-caam,sha256-lib) 23 MB/s [Offload, CAAM] STM32MP157F-DK2: AES-256-XTS: xts-aes-neonbs 13.2 MB/s [CPU-based, ARM NEON] xts(stm32-ecb-aes) 3.1 MB/s [Offload, STM32 crypto engine] AES-128-CBC-ESSIV: essiv(cbc-aes-neonbs,sha256-lib) 14.7 MB/s [CPU-based, ARM NEON] essiv(stm32-cbc-aes,sha256-lib) 3.2 MB/s [Offload, STM32 crypto engine] Adiantum: adiantum(xchacha12-arm,aes-arm,nhpoly1305-neon) 52.8 MB/s [CPU-based, ARM scalar + NEON] So, there was no case in which the crypto engine was even *close* to being faster. On the first three, which have AES instructions in the CPU, the CPU was 30 to 55 times faster (!). Even on STM32MP157F-DK2 which has a Cortex-A7 CPU that doesn't have AES instructions, AES was over 4 times faster on the CPU. And Adiantum encryption, which is what actually should be used on CPUs like that, was over 17 times faster. Other justifications that have been given for these non-inline crypto engines (almost always coming from the hardware vendors, not actual users) don't seem very plausible either: - The crypto engine throughput could be improved by processing multiple requests concurrently. Currently irrelevant to fscrypt, since it doesn't do that. This would also be complex, and unhelpful in many cases. 2 of the 4 engines I tested even had only one queue. - Some of the engines, e.g. STM32, support hardware keys. Also currently irrelevant to fscrypt, since it doesn't support these. Interestingly, the STM32 driver itself doesn't support this either. - Free up CPU for other tasks and/or reduce energy usage. Not very plausible considering the "short" message length, driver overhead, and scheduling overhead. There's just very little time for the CPU to do something else like run another task or enter low-power state, before the message finishes and it's time to process the next one. - Some of these engines resist power analysis and electromagnetic attacks, while the CPU-based crypto generally does not. In theory, this sounds great. In practice, if this benefit requires the use of an off-CPU offload that massively regresses performance and has a low-quality, buggy driver, the price for this hardening (which is not relevant to most fscrypt users, and tends to be incomplete) is just too high. Inline crypto engines are much more promising here, as are on-CPU solutions like RISC-V High Assurance Cryptography. Fixes: b30ab0e03407 ("ext4 crypto: add ext4 encryption facilities") Cc: stable(a)vger.kernel.org Acked-by: Ard Biesheuvel <ardb(a)kernel.org> Link: https://lore.kernel.org/r/20250704070322.20692-1-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers(a)kernel.org> diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index f63791641c1d..696a5844bfa3 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -147,9 +147,8 @@ However, these ioctls have some limitations: were wiped. To partially solve this, you can add init_on_free=1 to your kernel command line. However, this has a performance cost. -- Secret keys might still exist in CPU registers, in crypto - accelerator hardware (if used by the crypto API to implement any of - the algorithms), or in other places not explicitly considered here. +- Secret keys might still exist in CPU registers or in other places + not explicitly considered here. Full system compromise ~~~~~~~~~~~~~~~~~~~~~~ @@ -406,9 +405,12 @@ the work is done by XChaCha12, which is much faster than AES when AES acceleration is unavailable. For more information about Adiantum, see `the Adiantum paper <https://eprint.iacr.org/2018/720.pdf>`_. -The (AES-128-CBC-ESSIV, AES-128-CBC-CTS) pair exists only to support -systems whose only form of AES acceleration is an off-CPU crypto -accelerator such as CAAM or CESA that does not support XTS. +The (AES-128-CBC-ESSIV, AES-128-CBC-CTS) pair was added to try to +provide a more efficient option for systems that lack AES instructions +in the CPU but do have a non-inline crypto engine such as CAAM or CESA +that supports AES-CBC (and not AES-XTS). This is deprecated. It has +been shown that just doing AES on the CPU is actually faster. +Moreover, Adiantum is faster still and is recommended on such systems. The remaining mode pairs are the "national pride ciphers": @@ -1318,22 +1320,13 @@ this by validating all top-level encryption policies prior to access. Inline encryption support ========================= -By default, fscrypt uses the kernel crypto API for all cryptographic -operations (other than HKDF, which fscrypt partially implements -itself). The kernel crypto API supports hardware crypto accelerators, -but only ones that work in the traditional way where all inputs and -outputs (e.g. plaintexts and ciphertexts) are in memory. fscrypt can -take advantage of such hardware, but the traditional acceleration -model isn't particularly efficient and fscrypt hasn't been optimized -for it. - -Instead, many newer systems (especially mobile SoCs) have *inline -encryption hardware* that can encrypt/decrypt data while it is on its -way to/from the storage device. Linux supports inline encryption -through a set of extensions to the block layer called *blk-crypto*. -blk-crypto allows filesystems to attach encryption contexts to bios -(I/O requests) to specify how the data will be encrypted or decrypted -in-line. For more information about blk-crypto, see +Many newer systems (especially mobile SoCs) have *inline encryption +hardware* that can encrypt/decrypt data while it is on its way to/from +the storage device. Linux supports inline encryption through a set of +extensions to the block layer called *blk-crypto*. blk-crypto allows +filesystems to attach encryption contexts to bios (I/O requests) to +specify how the data will be encrypted or decrypted in-line. For more +information about blk-crypto, see :ref:`Documentation/block/inline-encryption.rst <inline_encryption>`. On supported filesystems (currently ext4 and f2fs), fscrypt can use diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index c1d92074b65c..6e7164530a1e 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -45,6 +45,23 @@ */ #undef FSCRYPT_MAX_KEY_SIZE +/* + * This mask is passed as the third argument to the crypto_alloc_*() functions + * to prevent fscrypt from using the Crypto API drivers for non-inline crypto + * engines. Those drivers have been problematic for fscrypt. fscrypt users + * have reported hangs and even incorrect en/decryption with these drivers. + * Since going to the driver, off CPU, and back again is really slow, such + * drivers can be over 50 times slower than the CPU-based code for fscrypt's + * workload. Even on platforms that lack AES instructions on the CPU, using the + * offloads has been shown to be slower, even staying with AES. (Of course, + * Adiantum is faster still, and is the recommended option on such platforms...) + * + * Note that fscrypt also supports inline crypto engines. Those don't use the + * Crypto API and work much better than the old-style (non-inline) engines. + */ +#define FSCRYPT_CRYPTOAPI_MASK \ + (CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY) + #define FSCRYPT_CONTEXT_V1 1 #define FSCRYPT_CONTEXT_V2 2 diff --git a/fs/crypto/hkdf.c b/fs/crypto/hkdf.c index 5c095c8aa3b5..b1ef506cd341 100644 --- a/fs/crypto/hkdf.c +++ b/fs/crypto/hkdf.c @@ -58,7 +58,7 @@ int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, u8 prk[HKDF_HASHLEN]; int err; - hmac_tfm = crypto_alloc_shash(HKDF_HMAC_ALG, 0, 0); + hmac_tfm = crypto_alloc_shash(HKDF_HMAC_ALG, 0, FSCRYPT_CRYPTOAPI_MASK); if (IS_ERR(hmac_tfm)) { fscrypt_err(NULL, "Error allocating " HKDF_HMAC_ALG ": %ld", PTR_ERR(hmac_tfm)); diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index a67e20d126c9..74d4a2e1ad23 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -104,7 +104,8 @@ fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, struct crypto_skcipher *tfm; int err; - tfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0); + tfm = crypto_alloc_skcipher(mode->cipher_str, 0, + FSCRYPT_CRYPTOAPI_MASK); if (IS_ERR(tfm)) { if (PTR_ERR(tfm) == -ENOENT) { fscrypt_warn(inode, diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index b70521c55132..158ceae8a5bc 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -52,7 +52,8 @@ static int derive_key_aes(const u8 *master_key, struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; - struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0); + struct crypto_skcipher *tfm = + crypto_alloc_skcipher("ecb(aes)", 0, FSCRYPT_CRYPTOAPI_MASK); if (IS_ERR(tfm)) { res = PTR_ERR(tfm);

1 week, 5 days

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror