octeontx2 crypto driver allocates memory using kmalloc/kzalloc, and uses this memory for dma (does dma_map_single()). It assumes that kmalloc/kzalloc will return 128-byte aligned address. But kmalloc/kzalloc returns 8-byte aligned address after below changes: "9382bc44b5f5 arm64: allow kmalloc() caches aligned to the smaller cache_line_size()
Memory allocated are used for following purpose: - Input data or scatter list address - 8-Byte alignment - Output data or gather list address - 8-Byte alignment - Completion address - 32-Byte alignment.
This patch ensures all addresses are aligned as mentioned above.
Signed-off-by: Bharat Bhushan bbhushan2@marvell.com Cc: stable@vger.kernel.org #v6.5+ --- v2->v3: - Align DMA memory to ARCH_DMA_MINALIGN as that is mapped as bidirectional
v1->v2: - Fixed memory padding size calculation as per review comment
.../marvell/octeontx2/otx2_cpt_reqmgr.h | 63 ++++++++++++++----- 1 file changed, 48 insertions(+), 15 deletions(-)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h index e27e849b01df..204a31755710 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h @@ -34,6 +34,9 @@ #define SG_COMP_2 2 #define SG_COMP_1 1
+#define OTX2_CPT_DPTR_RPTR_ALIGN 8 +#define OTX2_CPT_RES_ADDR_ALIGN 32 + union otx2_cpt_opcode { u16 flags; struct { @@ -417,10 +420,9 @@ static inline struct otx2_cpt_inst_info * otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, gfp_t gfp) { - int align = OTX2_CPT_DMA_MINALIGN; struct otx2_cpt_inst_info *info; - u32 dlen, align_dlen, info_len; - u16 g_sz_bytes, s_sz_bytes; + u32 dlen, info_len; + u16 g_len, s_len; u32 total_mem_len;
if (unlikely(req->in_cnt > OTX2_CPT_MAX_SG_IN_CNT || @@ -429,22 +431,51 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, return NULL; }
- g_sz_bytes = ((req->in_cnt + 3) / 4) * - sizeof(struct otx2_cpt_sglist_component); - s_sz_bytes = ((req->out_cnt + 3) / 4) * - sizeof(struct otx2_cpt_sglist_component); + /* Allocate memory to meet below alignment requirement: + * ---------------------------------- + * | struct otx2_cpt_inst_info | + * | (No alignment required) | + * | -----------------------------| + * | | padding for 8B alignment | + * |----------------------------------| + * | SG List Gather/Input memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * |----------------------------------| + * | SG List Scatter/Output memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * | (padding for below alignment) | + * | -----------------------------| + * | | padding for 32B alignment | + * |----------------------------------| + * | Result response memory | + * ---------------------------------- + */
- dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; - align_dlen = ALIGN(dlen, align); - info_len = ALIGN(sizeof(*info), align); - total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s); + info_len = sizeof(*info); + + g_len = ((req->in_cnt + 3) / 4) * + sizeof(struct otx2_cpt_sglist_component); + s_len = ((req->out_cnt + 3) / 4) * + sizeof(struct otx2_cpt_sglist_component); + + dlen = g_len + s_len + SG_LIST_HDR_SIZE; + + /* Allocate extra memory for SG and response address alignment */ + total_mem_len = ALIGN(info_len, ARCH_DMA_MINALIGN) + dlen; + total_mem_len = ALIGN(total_mem_len, OTX2_CPT_DPTR_RPTR_ALIGN); + total_mem_len += (OTX2_CPT_RES_ADDR_ALIGN - 1) & + ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1); + total_mem_len += sizeof(union otx2_cpt_res_s);
info = kzalloc(total_mem_len, gfp); if (unlikely(!info)) return NULL;
info->dlen = dlen; - info->in_buffer = (u8 *)info + info_len; + info->in_buffer = PTR_ALIGN((u8 *)info + info_len, ARCH_DMA_MINALIGN); + info->out_buffer = info->in_buffer + SG_LIST_HDR_SIZE + g_len;
((u16 *)info->in_buffer)[0] = req->out_cnt; ((u16 *)info->in_buffer)[1] = req->in_cnt; @@ -460,7 +491,7 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, }
if (setup_sgio_components(pdev, req->out, req->out_cnt, - &info->in_buffer[8 + g_sz_bytes])) { + info->out_buffer)) { dev_err(&pdev->dev, "Failed to setup scatter list\n"); goto destroy_info; } @@ -476,8 +507,10 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, * Get buffer for union otx2_cpt_res_s response * structure and its physical address */ - info->completion_addr = info->in_buffer + align_dlen; - info->comp_baddr = info->dptr_baddr + align_dlen; + info->completion_addr = PTR_ALIGN((info->in_buffer + dlen), + OTX2_CPT_RES_ADDR_ALIGN); + info->comp_baddr = ALIGN((info->dptr_baddr + dlen), + OTX2_CPT_RES_ADDR_ALIGN);
return info;
On Wed, May 21, 2025 at 03:34:46PM +0530, Bharat Bhushan wrote:
@@ -429,22 +431,51 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, return NULL; }
- g_sz_bytes = ((req->in_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
- s_sz_bytes = ((req->out_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
- /* Allocate memory to meet below alignment requirement:
* ----------------------------------
* | struct otx2_cpt_inst_info |
* | (No alignment required) |
* | -----------------------------|
* | | padding for 8B alignment |
* |----------------------------------|
This should be updated to show that everything following this is on an 128-byte boundary.
* | SG List Gather/Input memory |
* | Length = multiple of 32Bytes |
* | Alignment = 8Byte |
* |----------------------------------|
* | SG List Scatter/Output memory |
* | Length = multiple of 32Bytes |
* | Alignment = 8Byte |
* | (padding for below alignment) |
* | -----------------------------|
* | | padding for 32B alignment |
* |----------------------------------|
* | Result response memory |
* ----------------------------------
*/
- dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
- align_dlen = ALIGN(dlen, align);
- info_len = ALIGN(sizeof(*info), align);
- total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s);
- info_len = sizeof(*info);
- g_len = ((req->in_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
- s_len = ((req->out_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
- dlen = g_len + s_len + SG_LIST_HDR_SIZE;
- /* Allocate extra memory for SG and response address alignment */
- total_mem_len = ALIGN(info_len, ARCH_DMA_MINALIGN) + dlen;
- total_mem_len = ALIGN(total_mem_len, OTX2_CPT_DPTR_RPTR_ALIGN);
- total_mem_len += (OTX2_CPT_RES_ADDR_ALIGN - 1) &
~(OTX2_CPT_DPTR_RPTR_ALIGN - 1);
- total_mem_len += sizeof(union otx2_cpt_res_s);
This calculation is wrong again. It should be:
total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN); total_mem_len += (ARCH_DMA_MINALIGN - 1) & ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1); total_mem_len += ALIGN(dlen, OTX2_CPT_RES_ADDR_ALIGN); total_mem_len += sizeof(union otx2_cpt_res_s);
Remember ALIGN may not actually give you extra memory. So if you need to add memory for alignment padding, you will need to do it by hand.
Cheers,
On Wed, May 21, 2025 at 4:58 PM Herbert Xu herbert@gondor.apana.org.au wrote:
On Wed, May 21, 2025 at 03:34:46PM +0530, Bharat Bhushan wrote:
@@ -429,22 +431,51 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, return NULL; }
g_sz_bytes = ((req->in_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
s_sz_bytes = ((req->out_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
/* Allocate memory to meet below alignment requirement:
* ----------------------------------
* | struct otx2_cpt_inst_info |
* | (No alignment required) |
* | -----------------------------|
* | | padding for 8B alignment |
* |----------------------------------|
This should be updated to show that everything following this is on an 128-byte boundary.
* | SG List Gather/Input memory |
* | Length = multiple of 32Bytes |
* | Alignment = 8Byte |
* |----------------------------------|
* | SG List Scatter/Output memory |
* | Length = multiple of 32Bytes |
* | Alignment = 8Byte |
* | (padding for below alignment) |
* | -----------------------------|
* | | padding for 32B alignment |
* |----------------------------------|
* | Result response memory |
* ----------------------------------
*/
dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE;
align_dlen = ALIGN(dlen, align);
info_len = ALIGN(sizeof(*info), align);
total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s);
info_len = sizeof(*info);
g_len = ((req->in_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
s_len = ((req->out_cnt + 3) / 4) *
sizeof(struct otx2_cpt_sglist_component);
dlen = g_len + s_len + SG_LIST_HDR_SIZE;
/* Allocate extra memory for SG and response address alignment */
total_mem_len = ALIGN(info_len, ARCH_DMA_MINALIGN) + dlen;
total_mem_len = ALIGN(total_mem_len, OTX2_CPT_DPTR_RPTR_ALIGN);
total_mem_len += (OTX2_CPT_RES_ADDR_ALIGN - 1) &
~(OTX2_CPT_DPTR_RPTR_ALIGN - 1);
total_mem_len += sizeof(union otx2_cpt_res_s);
This calculation is wrong again. It should be:
total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN); total_mem_len += (ARCH_DMA_MINALIGN - 1) & ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1); total_mem_len += ALIGN(dlen, OTX2_CPT_RES_ADDR_ALIGN); total_mem_len += sizeof(union otx2_cpt_res_s);
Remember ALIGN may not actually give you extra memory. So if you need to add memory for alignment padding, you will need to do it by hand.
Will do changes as proposed above, Thanks for reviewing.
Thanks -Bharat
Cheers,
Email: Herbert Xu herbert@gondor.apana.org.au Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
linux-stable-mirror@lists.linaro.org