First patch of the series fixes possible infinite loop.
Remaining three patches fixes address alignment issue observed after "9382bc44b5f5 arm64: allow kmalloc() caches aligned to the smaller cache_line_size()"
Patch-2 and patch-3 applies to stable version 6.6 onwards. Patch-4 applies to stable version 6.12 onwards
Bharat Bhushan (4): crypto: octeontx2: add timeout for load_fvc completion poll crypto: octeontx2: Fix address alignment issue on ucode loading crypto: octeontx2: Fix address alignment on CN10K A0/A1 and OcteonTX2 crypto: octeontx2: Fix address alignment on CN10KB and CN10KA-B0
.../marvell/octeontx2/otx2_cpt_reqmgr.h | 119 +++++++++++++----- .../marvell/octeontx2/otx2_cptpf_ucode.c | 46 ++++--- 2 files changed, 121 insertions(+), 44 deletions(-)
Adds timeout to exit from possible infinite loop, which polls on CPT instruction(load_fvc) completion.
Signed-off-by: Srujana Challa schalla@marvell.com Signed-off-by: Bharat Bhushan bbhushan2@marvell.com --- .../crypto/marvell/octeontx2/otx2_cptpf_ucode.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c index 42c5484ce66a..3a818ac89295 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c @@ -1494,6 +1494,7 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) dma_addr_t rptr_baddr; struct pci_dev *pdev; u32 len, compl_rlen; + int timeout = 10000; int ret, etype; void *rptr;
@@ -1556,16 +1557,27 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) etype); otx2_cpt_fill_inst(&inst, &iq_cmd, rptr_baddr); lfs->ops->send_cmd(&inst, 1, &cptpf->lfs.lf[0]); + timeout = 10000;
while (lfs->ops->cpt_get_compcode(result) == - OTX2_CPT_COMPLETION_CODE_INIT) + OTX2_CPT_COMPLETION_CODE_INIT) { cpu_relax(); + udelay(1); + timeout--; + if (!timeout) { + ret = -ENODEV; + cptpf->is_eng_caps_discovered = false; + dev_warn(&pdev->dev, "Timeout on CPT load_fvc completion poll\n"); + goto error_no_response; + } + }
cptpf->eng_caps[etype].u = be64_to_cpup(rptr); } - dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL); cptpf->is_eng_caps_discovered = true;
+error_no_response: + dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL); free_result: kfree(result); lf_cleanup:
Hi,
Thanks for your patch.
FYI: kernel test robot notices the stable kernel rule is not satisfied.
The check is based on https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html#opti...
Rule: add the tag "Cc: stable@vger.kernel.org" in the sign-off area to have the patch automatically included in the stable tree. Subject: [PATCH 1/4 RESEND] crypto: octeontx2: add timeout for load_fvc completion poll Link: https://lore.kernel.org/stable/20250514051043.3178659-2-bbhushan2%40marvell....
octeontx2 crypto driver allocates memory using kmalloc/kzalloc, and uses this memory for dma (does dma_map_single()). It assumes that kmalloc/kzalloc will return 128-byte aligned address. But kmalloc/kzalloc returns 8-byte aligned address after below changes: "9382bc44b5f5 arm64: allow kmalloc() caches aligned to the smaller cache_line_size()"
Completion address should be 32-Byte alignment when loading microcode.
Signed-off-by: Bharat Bhushan bbhushan2@marvell.com --- .../marvell/octeontx2/otx2_cptpf_ucode.c | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c index 3a818ac89295..1c2aa9626088 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c @@ -1491,12 +1491,13 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) union otx2_cpt_opcode opcode; union otx2_cpt_res_s *result; union otx2_cpt_inst_s inst; + dma_addr_t result_baddr; dma_addr_t rptr_baddr; struct pci_dev *pdev; - u32 len, compl_rlen; int timeout = 10000; int ret, etype; void *rptr; + u32 len;
/* * We don't get capabilities if it was already done @@ -1521,22 +1522,27 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) if (ret) goto delete_grps;
- compl_rlen = ALIGN(sizeof(union otx2_cpt_res_s), OTX2_CPT_DMA_MINALIGN); - len = compl_rlen + LOADFVC_RLEN; + len = LOADFVC_RLEN + sizeof(union otx2_cpt_res_s) + + OTX2_CPT_RES_ADDR_ALIGN;
- result = kzalloc(len, GFP_KERNEL); - if (!result) { + rptr = kzalloc(len, GFP_KERNEL); + if (!rptr) { ret = -ENOMEM; goto lf_cleanup; } - rptr_baddr = dma_map_single(&pdev->dev, (void *)result, len, + + rptr_baddr = dma_map_single(&pdev->dev, rptr, len, DMA_BIDIRECTIONAL); if (dma_mapping_error(&pdev->dev, rptr_baddr)) { dev_err(&pdev->dev, "DMA mapping failed\n"); ret = -EFAULT; - goto free_result; + goto free_rptr; } - rptr = (u8 *)result + compl_rlen; + + result = (union otx2_cpt_res_s *)PTR_ALIGN(rptr + LOADFVC_RLEN, + OTX2_CPT_RES_ADDR_ALIGN); + result_baddr = ALIGN(rptr_baddr + LOADFVC_RLEN, + OTX2_CPT_RES_ADDR_ALIGN);
/* Fill in the command */ opcode.s.major = LOADFVC_MAJOR_OP; @@ -1548,14 +1554,14 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) /* 64-bit swap for microcode data reads, not needed for addresses */ cpu_to_be64s(&iq_cmd.cmd.u); iq_cmd.dptr = 0; - iq_cmd.rptr = rptr_baddr + compl_rlen; + iq_cmd.rptr = rptr_baddr; iq_cmd.cptr.u = 0;
for (etype = 1; etype < OTX2_CPT_MAX_ENG_TYPES; etype++) { result->s.compcode = OTX2_CPT_COMPLETION_CODE_INIT; iq_cmd.cptr.s.grp = otx2_cpt_get_eng_grp(&cptpf->eng_grps, etype); - otx2_cpt_fill_inst(&inst, &iq_cmd, rptr_baddr); + otx2_cpt_fill_inst(&inst, &iq_cmd, result_baddr); lfs->ops->send_cmd(&inst, 1, &cptpf->lfs.lf[0]); timeout = 10000;
@@ -1578,8 +1584,8 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
error_no_response: dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL); -free_result: - kfree(result); +free_rptr: + kfree(rptr); lf_cleanup: otx2_cptlf_shutdown(lfs); delete_grps:
octeontx2 crypto driver allocates memory using kmalloc/kzalloc, and uses this memory for dma (does dma_map_single()). It assumes that kmalloc/kzalloc will return 128-byte aligned address. But kmalloc/kzalloc returns 8-byte aligned address after below changes: "9382bc44b5f5 arm64: allow kmalloc() caches aligned to the smaller cache_line_size()
Memory allocated are used for following purpose: - Input data or scatter list address - 8-Byte alignment - Output data or gather list address - 8-Byte alignment - Completion address - 32-Byte alignment.
This patch ensures all addresses are aligned as mentioned above.
Signed-off-by: Bharat Bhushan bbhushan2@marvell.com --- .../marvell/octeontx2/otx2_cpt_reqmgr.h | 62 ++++++++++++++----- 1 file changed, 47 insertions(+), 15 deletions(-)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h index e27e849b01df..f0f1ff45c383 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h @@ -34,6 +34,9 @@ #define SG_COMP_2 2 #define SG_COMP_1 1
+#define OTX2_CPT_DPTR_RPTR_ALIGN 8 +#define OTX2_CPT_RES_ADDR_ALIGN 32 + union otx2_cpt_opcode { u16 flags; struct { @@ -417,10 +420,9 @@ static inline struct otx2_cpt_inst_info * otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, gfp_t gfp) { - int align = OTX2_CPT_DMA_MINALIGN; struct otx2_cpt_inst_info *info; - u32 dlen, align_dlen, info_len; - u16 g_sz_bytes, s_sz_bytes; + u32 dlen, info_len; + u16 g_len, s_len; u32 total_mem_len;
if (unlikely(req->in_cnt > OTX2_CPT_MAX_SG_IN_CNT || @@ -429,22 +431,50 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, return NULL; }
- g_sz_bytes = ((req->in_cnt + 3) / 4) * - sizeof(struct otx2_cpt_sglist_component); - s_sz_bytes = ((req->out_cnt + 3) / 4) * - sizeof(struct otx2_cpt_sglist_component); + /* Allocate memory to meet below alignment requirement: + * ---------------------------------- + * | struct otx2_cpt_inst_info | + * | (No alignment required) | + * | -----------------------------| + * | | padding for 8B alignment | + * |----------------------------------| + * | SG List Gather/Input memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * |----------------------------------| + * | SG List Scatter/Output memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * | (padding for below alignment) | + * | -----------------------------| + * | | padding for 32B alignment | + * |----------------------------------| + * | Result response memory | + * ---------------------------------- + */
- dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; - align_dlen = ALIGN(dlen, align); - info_len = ALIGN(sizeof(*info), align); - total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s); + info_len = sizeof(*info); + + g_len = ((req->in_cnt + 3) / 4) * + sizeof(struct otx2_cpt_sglist_component); + s_len = ((req->out_cnt + 3) / 4) * + sizeof(struct otx2_cpt_sglist_component); + + dlen = g_len + s_len + SG_LIST_HDR_SIZE; + + /* Allocate extra memory for SG and response address alignment */ + total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen; + total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) + + sizeof(union otx2_cpt_res_s);
info = kzalloc(total_mem_len, gfp); if (unlikely(!info)) return NULL;
info->dlen = dlen; - info->in_buffer = (u8 *)info + info_len; + info->in_buffer = PTR_ALIGN((u8 *)info + info_len, + OTX2_CPT_DPTR_RPTR_ALIGN); + info->out_buffer = info->in_buffer + 8 + g_len;
((u16 *)info->in_buffer)[0] = req->out_cnt; ((u16 *)info->in_buffer)[1] = req->in_cnt; @@ -460,7 +490,7 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, }
if (setup_sgio_components(pdev, req->out, req->out_cnt, - &info->in_buffer[8 + g_sz_bytes])) { + info->out_buffer)) { dev_err(&pdev->dev, "Failed to setup scatter list\n"); goto destroy_info; } @@ -476,8 +506,10 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, * Get buffer for union otx2_cpt_res_s response * structure and its physical address */ - info->completion_addr = info->in_buffer + align_dlen; - info->comp_baddr = info->dptr_baddr + align_dlen; + info->completion_addr = PTR_ALIGN((info->in_buffer + dlen), + OTX2_CPT_RES_ADDR_ALIGN); + info->comp_baddr = ALIGN((info->dptr_baddr + dlen), + OTX2_CPT_RES_ADDR_ALIGN);
return info;
On Wed, May 14, 2025 at 10:40:42AM +0530, Bharat Bhushan wrote:
@@ -429,22 +431,50 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, return NULL; }
- g_sz_bytes = ((req->in_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
- s_sz_bytes = ((req->out_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
- /* Allocate memory to meet below alignment requirement:
* ----------------------------------
* | struct otx2_cpt_inst_info |
* | (No alignment required) |
* | -----------------------------|
* | | padding for 8B alignment |
* |----------------------------------|
* | SG List Gather/Input memory |
* | Length = multiple of 32Bytes |
* | Alignment = 8Byte |
* |----------------------------------|
* | SG List Scatter/Output memory |
* | Length = multiple of 32Bytes |
* | Alignment = 8Byte |
* | (padding for below alignment) |
* | -----------------------------|
* | | padding for 32B alignment |
* |----------------------------------|
* | Result response memory |
* ----------------------------------
*/
- dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
- align_dlen = ALIGN(dlen, align);
- info_len = ALIGN(sizeof(*info), align);
- total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s);
- info_len = sizeof(*info);
- g_len = ((req->in_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
- s_len = ((req->out_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
- dlen = g_len + s_len + SG_LIST_HDR_SIZE;
- /* Allocate extra memory for SG and response address alignment */
- total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
- total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
sizeof(union otx2_cpt_res_s);
This doesn't look right. It would be correct if kzalloc returned a 32-byte aligned pointer to start with. But it doesn't anymore, which is why you're making this patch in the first place :)
So you need to add extra memory to bridge the gap between what it returns and what you expect. Since it returns 8-byte aligned memory, and you expect 32-byte aligned pointers, you should add 24 bytes.
IOW the calculation should be:
total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen; total_mem_len = ALIGN(total_mem_len, OTX2_CPT_DPTR_RPTR_ALIGN); total_mem_len += (OTX2_CPT_RES_ADDR_ALIGN - 1) & ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1);
info = kzalloc(total_mem_len, gfp); if (unlikely(!info)) return NULL; info->dlen = dlen;
- info->in_buffer = (u8 *)info + info_len;
- info->in_buffer = PTR_ALIGN((u8 *)info + info_len,
OTX2_CPT_DPTR_RPTR_ALIGN);
- info->out_buffer = info->in_buffer + 8 + g_len;
I presume the 8 here corresponds to SG_LIST_HDR_SIZE from the dlen calculation above. If so please spell it out as otherwise it's just confusing.
Cheers,
On Mon, May 19, 2025 at 9:57 AM Herbert Xu herbert@gondor.apana.org.au wrote:
On Wed, May 14, 2025 at 10:40:42AM +0530, Bharat Bhushan wrote:
@@ -429,22 +431,50 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, return NULL; }
g_sz_bytes = ((req->in_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
s_sz_bytes = ((req->out_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
/* Allocate memory to meet below alignment requirement:
* ----------------------------------
* | struct otx2_cpt_inst_info |
* | (No alignment required) |
* | -----------------------------|
* | | padding for 8B alignment |
* |----------------------------------|
* | SG List Gather/Input memory |
* | Length = multiple of 32Bytes |
* | Alignment = 8Byte |
* |----------------------------------|
* | SG List Scatter/Output memory |
* | Length = multiple of 32Bytes |
* | Alignment = 8Byte |
* | (padding for below alignment) |
* | -----------------------------|
* | | padding for 32B alignment |
* |----------------------------------|
* | Result response memory |
* ----------------------------------
*/
dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
align_dlen = ALIGN(dlen, align);
info_len = ALIGN(sizeof(*info), align);
total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s);
info_len = sizeof(*info);
g_len = ((req->in_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
s_len = ((req->out_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
dlen = g_len + s_len + SG_LIST_HDR_SIZE;
/* Allocate extra memory for SG and response address alignment */
total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
This add extra memory for 8-byte (OTX2_CPT_DPTR_RPTR_ALIGN) alignment
total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
sizeof(union otx2_cpt_res_s);
This add extra memory for 32-byte (OTX2_CPT_RES_ADDR_ALIGN)) In case not observed, OTX2_CPT_RES_ADDR_ALIGN is not the same as OTX2_CPT_DPTR_RPTR_ALIGN.
This doesn't look right. It would be correct if kzalloc returned a 32-byte aligned pointer to start with. But it doesn't anymore, which is why you're making this patch in the first place :)
So you need to add extra memory to bridge the gap between what it returns and what you expect. Since it returns 8-byte aligned memory, and you expect 32-byte aligned pointers, you should add 24 bytes.
IOW the calculation should be:
total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen; total_mem_len = ALIGN(total_mem_len, OTX2_CPT_DPTR_RPTR_ALIGN); total_mem_len += (OTX2_CPT_RES_ADDR_ALIGN - 1) & ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1);
info = kzalloc(total_mem_len, gfp); if (unlikely(!info)) return NULL; info->dlen = dlen;
info->in_buffer = (u8 *)info + info_len;
info->in_buffer = PTR_ALIGN((u8 *)info + info_len,
OTX2_CPT_DPTR_RPTR_ALIGN);
info->out_buffer = info->in_buffer + 8 + g_len;
I presume the 8 here corresponds to SG_LIST_HDR_SIZE from the dlen calculation above. If so please spell it out as otherwise it's just confusing.
Yes, this is for SG_LIST_HDR_SIZE, will use same here.
Thanks -Bharat
Cheers,
Email: Herbert Xu herbert@gondor.apana.org.au Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
On Mon, May 19, 2025 at 11:47:18AM +0530, Bharat Bhushan wrote:
/* Allocate extra memory for SG and response address alignment */
total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
This add extra memory for 8-byte (OTX2_CPT_DPTR_RPTR_ALIGN) alignment
total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
sizeof(union otx2_cpt_res_s);
This add extra memory for 32-byte (OTX2_CPT_RES_ADDR_ALIGN)) In case not observed, OTX2_CPT_RES_ADDR_ALIGN is not the same as OTX2_CPT_DPTR_RPTR_ALIGN.
But it doesn't do that. Look, assume that total_mem_len is 64, then ALIGN(64, 32) will still be 64. You're not adding any extra space for the alignment padding.
OTOH, kmalloc can return something that has a page offset of 8, and you will need 24 extra bytes in your structure to make it align at 32.
Now of course if you're very lucky, and total_mem_len starts out at 8, then it would work but that's purely by chance.
Cheers,
On Mon, May 19, 2025 at 1:05 PM Herbert Xu herbert@gondor.apana.org.au wrote:
On Mon, May 19, 2025 at 11:47:18AM +0530, Bharat Bhushan wrote:
/* Allocate extra memory for SG and response address alignment */
total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + dlen;
This add extra memory for 8-byte (OTX2_CPT_DPTR_RPTR_ALIGN) alignment
total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) +
sizeof(union otx2_cpt_res_s);
This add extra memory for 32-byte (OTX2_CPT_RES_ADDR_ALIGN)) In case not observed, OTX2_CPT_RES_ADDR_ALIGN is not the same as OTX2_CPT_DPTR_RPTR_ALIGN.
But it doesn't do that. Look, assume that total_mem_len is 64, then ALIGN(64, 32) will still be 64. You're not adding any extra space for the alignment padding.
OTOH, kmalloc can return something that has a page offset of 8, and you will need 24 extra bytes in your structure to make it align at 32.
Now of course if you're very lucky, and total_mem_len starts out at 8, then it would work but that's purely by chance.
Thanks for explaining, will change in the next version.
Thanks -Bharat
Cheers,
Email: Herbert Xu herbert@gondor.apana.org.au Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
octeontx2 crypto driver allocates memory using kmalloc/kzalloc, and uses this memory for dma (does dma_map_single()). It assumes that kmalloc/kzalloc will return 128-byte aligned address. But kmalloc/kzalloc returns 8-byte aligned address after below changes: "9382bc44b5f5 arm64: allow kmalloc() caches aligned to the smaller cache_line_size()
Memory allocated are used for following purpose: - Input data or scatter list address - 8-Byte alignment - Output data or gather list address - 8-Byte alignment - Completion address - 32-Byte alignment.
This patch ensures all addresses are aligned as mentioned above.
Signed-off-by: Bharat Bhushan bbhushan2@marvell.com --- .../marvell/octeontx2/otx2_cpt_reqmgr.h | 57 ++++++++++++++----- 1 file changed, 42 insertions(+), 15 deletions(-)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h index f0f1ff45c383..b49dafc596c7 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h @@ -350,22 +350,45 @@ static inline struct otx2_cpt_inst_info * cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, gfp_t gfp) { - u32 dlen = 0, g_len, sg_len, info_len; - int align = OTX2_CPT_DMA_MINALIGN; + u32 dlen = 0, g_len, s_len, sg_len, info_len; struct otx2_cpt_inst_info *info; - u16 g_sz_bytes, s_sz_bytes; u32 total_mem_len; int i;
- g_sz_bytes = ((req->in_cnt + 2) / 3) * - sizeof(struct cn10kb_cpt_sglist_component); - s_sz_bytes = ((req->out_cnt + 2) / 3) * - sizeof(struct cn10kb_cpt_sglist_component); + /* Allocate memory to meet below alignment requirement: + * ---------------------------------- + * | struct otx2_cpt_inst_info | + * | (No alignment required) | + * | -----------------------------| + * | | padding for 8B alignment | + * |----------------------------------| + * | SG List Gather/Input memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * |----------------------------------| + * | SG List Scatter/Output memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * | (padding for below alignment) | + * | -----------------------------| + * | | padding for 32B alignment | + * |----------------------------------| + * | Result response memory | + * ---------------------------------- + */ + + info_len = sizeof(*info);
- g_len = ALIGN(g_sz_bytes, align); - sg_len = ALIGN(g_len + s_sz_bytes, align); - info_len = ALIGN(sizeof(*info), align); - total_mem_len = sg_len + info_len + sizeof(union otx2_cpt_res_s); + g_len = ((req->in_cnt + 2) / 3) * + sizeof(struct cn10kb_cpt_sglist_component); + s_len = ((req->out_cnt + 2) / 3) * + sizeof(struct cn10kb_cpt_sglist_component); + sg_len = g_len + s_len; + + /* Allocate extra memory for SG and response address alignment */ + total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN) + sg_len; + total_mem_len = ALIGN(total_mem_len, OTX2_CPT_RES_ADDR_ALIGN) + + sizeof(union otx2_cpt_res_s);
info = kzalloc(total_mem_len, gfp); if (unlikely(!info)) @@ -375,7 +398,9 @@ cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, dlen += req->in[i].size;
info->dlen = dlen; - info->in_buffer = (u8 *)info + info_len; + info->in_buffer = PTR_ALIGN((u8 *)info + info_len, + OTX2_CPT_DPTR_RPTR_ALIGN); + info->out_buffer = info->in_buffer + g_len; info->gthr_sz = req->in_cnt; info->sctr_sz = req->out_cnt;
@@ -387,7 +412,7 @@ cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, }
if (sgv2io_components_setup(pdev, req->out, req->out_cnt, - &info->in_buffer[g_len])) { + info->out_buffer)) { dev_err(&pdev->dev, "Failed to setup scatter list\n"); goto destroy_info; } @@ -404,8 +429,10 @@ cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, * Get buffer for union otx2_cpt_res_s response * structure and its physical address */ - info->completion_addr = info->in_buffer + sg_len; - info->comp_baddr = info->dptr_baddr + sg_len; + info->completion_addr = PTR_ALIGN((info->in_buffer + sg_len), + OTX2_CPT_RES_ADDR_ALIGN); + info->comp_baddr = ALIGN((info->dptr_baddr + sg_len), + OTX2_CPT_RES_ADDR_ALIGN);
return info;
linux-stable-mirror@lists.linaro.org