doc https://docs.qq.com/doc/DRVBtbVh6TkdaTFVB
memcpy ++ ./uadk_tool/uadk_tool benchmark --alg zlib --mode sva --opt 0 --sync --pktlen 1024 algname: length: perf: iops: CPU_rate: zlib 1024Bytes 336851.00KiB/s 336.9Kops 293.67%
user pointer // --user ++ ./uadk_tool/uadk_tool benchmark --alg zlib --mode sva --user --opt 0 --sync --pktlen 1024 algname: length: perf: iops: CPU_rate: zlib 1024Bytes 522916.00KiB/s 522.9Kops 299.67%
sgl // --sgl ++ ./uadk_tool/uadk_tool benchmark --alg zlib --mode sva --sgl --opt 0 --sync --pktlen 1024 algname: length: perf: iops: CPU_rate: zlib 1024Bytes 415577.00KiB/s 415.6Kops 299.67%
Zhangfei Gao (7): uadk: remove nosva limitation uadk: add wd_reserve_mem & wd_is_noiommu for nosva uadk: add blkpool uadk: ctxs call wd_blkpool_new at init and delete blkpool at uninit wd_comp: add wd_comp_setup_blkpool wd_comp: support nosva case uadk_tool: support nosva test
Makefile.am | 1 + drv/hisi_comp.c | 98 ++- drv/hisi_qm_udrv.c | 14 +- drv/hisi_qm_udrv.h | 3 +- drv/hisi_sec.c | 8 +- include/drv/wd_comp_drv.h | 7 + include/uacce.h | 6 +- include/wd.h | 16 + include/wd_alg_common.h | 9 + include/wd_bmm.h | 66 ++ include/wd_comp.h | 2 + libwd.map | 10 + libwd_comp.map | 1 + uadk_tool/benchmark/uadk_benchmark.c | 8 + uadk_tool/benchmark/uadk_benchmark.h | 2 + uadk_tool/benchmark/zip_uadk_benchmark.c | 146 ++++- wd.c | 34 +- wd_alg.c | 44 +- wd_bmm.c | 739 +++++++++++++++++++++++ wd_comp.c | 84 +++ wd_util.c | 13 +- 21 files changed, 1213 insertions(+), 98 deletions(-) create mode 100644 include/wd_bmm.h create mode 100644 wd_bmm.c
Remove nosva limitation to permit nosva run
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- wd.c | 4 ---- wd_alg.c | 44 +------------------------------------------- wd_util.c | 4 ---- 3 files changed, 1 insertion(+), 51 deletions(-)
diff --git a/wd.c b/wd.c index 75a9469..5fa8feb 100644 --- a/wd.c +++ b/wd.c @@ -235,10 +235,6 @@ static int get_dev_info(struct uacce_dev *dev) ret = get_int_attr(dev, "flags", &dev->flags); if (ret < 0) return ret; - else if (!((unsigned int)dev->flags & UACCE_DEV_SVA)) { - WD_ERR("skip none sva uacce device!\n"); - return -WD_ENODEV; - }
ret = get_int_attr(dev, "region_mmio_size", &value); if (ret < 0) diff --git a/wd_alg.c b/wd_alg.c index 08f0e2e..45619ba 100644 --- a/wd_alg.c +++ b/wd_alg.c @@ -23,47 +23,6 @@ static struct wd_alg_list alg_list_head; static struct wd_alg_list *alg_list_tail = &alg_list_head;
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - -static bool wd_check_dev_sva(const char *dev_name) -{ - char dev_path[PATH_MAX] = {'\0'}; - char buf[DEV_SVA_SIZE] = {'\0'}; - unsigned int val; - ssize_t ret; - int fd; - - ret = snprintf(dev_path, PATH_STR_SIZE, "%s/%s/%s", SYS_CLASS_DIR, - dev_name, SVA_FILE_NAME); - if (ret < 0) { - WD_ERR("failed to snprintf, device name: %s!\n", dev_name); - return false; - } - - /** - * The opened file is the specified device driver file. - * no need for realpath processing. - */ - fd = open(dev_path, O_RDONLY, 0); - if (fd < 0) { - WD_ERR("failed to open %s(%d)!\n", dev_path, -errno); - return false; - } - - ret = read(fd, buf, DEV_SVA_SIZE - 1); - if (ret <= 0) { - WD_ERR("failed to read anything at %s!\n", dev_path); - close(fd); - return false; - } - close(fd); - - val = strtol(buf, NULL, STR_DECIMAL); - if (val & UACCE_DEV_SVA) - return true; - - return false; -} - static bool wd_check_accel_dev(const char *dev_name) { struct dirent *dev_dir; @@ -80,8 +39,7 @@ static bool wd_check_accel_dev(const char *dev_name) !strncmp(dev_dir->d_name, "..", LINUX_PRTDIR_SIZE)) continue;
- if (!strncmp(dev_dir->d_name, dev_name, strlen(dev_name)) && - wd_check_dev_sva(dev_dir->d_name)) { + if (!strncmp(dev_dir->d_name, dev_name, strlen(dev_name))) { closedir(wd_class); return true; } diff --git a/wd_util.c b/wd_util.c index f1b27bf..9675098 100644 --- a/wd_util.c +++ b/wd_util.c @@ -1883,10 +1883,6 @@ int wd_init_param_check(struct wd_ctx_config *config, struct wd_sched *sched) return -WD_EINVAL; }
- if (!wd_is_sva(config->ctxs[0].ctx)) { - WD_ERR("invalid: the mode is non sva, please check system!\n"); - return -WD_EINVAL; - }
return 0; }
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- include/uacce.h | 6 +++++- include/wd.h | 16 ++++++++++++++++ wd.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/include/uacce.h b/include/uacce.h index f7fae27..c7a5752 100644 --- a/include/uacce.h +++ b/include/uacce.h @@ -15,6 +15,7 @@ extern "C" {
#define UACCE_CMD_START _IO('W', 0) #define UACCE_CMD_PUT_Q _IO('W', 1) +#define UACCE_CMD_GET_SS_DMA _IOR('W', 3, unsigned long)
/** * UACCE Device flags: @@ -25,7 +26,9 @@ extern "C" { */
enum { - UACCE_DEV_SVA = 0x1, + UACCE_DEV_SVA = 0x1, + UACCE_DEV_NOIOMMU = 0x2, + UACCE_DEV_IOMMUU = 0x80, };
#define UACCE_API_VER_NOIOMMU_SUBFIX "_noiommu" @@ -33,6 +36,7 @@ enum { enum uacce_qfrt { UACCE_QFRT_MMIO = 0, /* device mmio region */ UACCE_QFRT_DUS = 1, /* device user share */ + UACCE_QFRT_SS, /* static share memory */ UACCE_QFRT_MAX, };
diff --git a/include/wd.h b/include/wd.h index b62d355..556a992 100644 --- a/include/wd.h +++ b/include/wd.h @@ -604,6 +604,22 @@ struct wd_capability { struct wd_capability *wd_get_alg_cap(void); void wd_release_alg_cap(struct wd_capability *head);
+/** + * wd_is_noiommu() - Check if the system is noiommu. + * @h_ctx: The handle of context. + * + * Return 1 if noiommu, 0 for has iommu, less than 0 otherwise. + */ +int wd_is_noiommu(handle_t h_ctx); + +/** + * wd_reserve_mem() - Reserve memory iva mmap. + * @h_ctx: The handle of context. + * @size: size of the reserved memory. + * + * Return NULL if fail, pointer of the memory if success. + */ +void *wd_reserve_mem(handle_t h_ctx, size_t size); #ifdef __cplusplus } #endif diff --git a/wd.c b/wd.c index 5fa8feb..5c930f4 100644 --- a/wd.c +++ b/wd.c @@ -578,6 +578,19 @@ int wd_ctx_wait(handle_t h_ctx, __u16 ms) return ret; }
+int wd_is_noiommu(handle_t h_ctx) +{ + struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; + + if (!ctx || !ctx->dev) + return -WD_EINVAL; + + if ((unsigned int)ctx->dev->flags & UACCE_DEV_NOIOMMU) + return 1; + + return 0; +} + int wd_is_sva(handle_t h_ctx) { struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; @@ -974,3 +987,20 @@ alloc_err: return NULL; }
+void *wd_reserve_mem(handle_t h_ctx, size_t size) +{ + struct wd_ctx_h *ctx = (struct wd_ctx_h *)h_ctx; + void *ptr; + + if (!ctx) + return NULL; + + ptr = mmap(0, size, PROT_READ | PROT_WRITE, + MAP_SHARED, ctx->fd, UACCE_QFRT_SS * getpagesize()); + if (ptr == MAP_FAILED) { + WD_ERR("wd drv mmap fail!(err = %d)\n", errno); + return NULL; + } + + return ptr; +}
Add new apis: wd_blkpool_new; wd_blkpool_delete; wd_blkpool_phy; wd_blkpool_alloc; wd_blkpool_free; wd_blkpool_setup; wd_blkpool_destroy_mem; wd_blkpool_create_sglpool; wd_blkpool_destroy_sglpool;
App only use two apis after setup blkpool wd_blkpool_alloc; wd_blkpool_free;
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- Makefile.am | 1 + include/wd_bmm.h | 66 +++++ libwd.map | 10 + wd_bmm.c | 739 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 816 insertions(+) create mode 100644 include/wd_bmm.h create mode 100644 wd_bmm.c
diff --git a/Makefile.am b/Makefile.am index c4b9c52..87b3811 100644 --- a/Makefile.am +++ b/Makefile.am @@ -60,6 +60,7 @@ libwd_la_SOURCES=wd.c wd_mempool.c wd.h wd_alg.c wd_alg.h \ v1/wd_bmm.c v1/wd_bmm.h \ v1/wd_ecc.c v1/wd_ecc.h \ v1/wd_sgl.c v1/wd_sgl.h \ + wd_bmm.c \ aes.h sm4.h galois.h \ lib/crypto/aes.c lib/crypto/sm4.c lib/crypto/galois.c \ v1/drv/hisi_qm_udrv.c v1/drv/hisi_qm_udrv.h \ diff --git a/include/wd_bmm.h b/include/wd_bmm.h new file mode 100644 index 0000000..15443fd --- /dev/null +++ b/include/wd_bmm.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright 2025-2026 Huawei Technologies Co.,Ltd. All rights reserved. + * Copyright 2025-2026 Linaro ltd. + */ + +#ifndef _WD_BMM_H +#define _WD_BMM_H + +#include <asm/types.h> +#include "wd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define DEFAULT_BLK_ALIGN 0x1000 +#define DEFAULT_BLOCK_NM 16384 +#define DEFAULT_ALIGN_SIZE 0x40 + +/* the max sge num in one sgl */ +#define HISI_SGE_NUM_IN_SGL 255 + +/* the max sge num in on BD, QM user it be the sgl pool size */ +#define HISI_SGL_NUM_IN_BD 256 + +/* memory APIs for Algorithm Layer */ +typedef void *(*wd_alloc)(void *usr, size_t size); +typedef void (*wd_free)(void *usr, void *va); + + /* memory VA to DMA address map */ +typedef void *(*wd_map)(void *usr, void *va, size_t sz); +typedef __u32 (*wd_bufsize)(void *usr); + +/* Memory from user, it is given at ctx creating. */ +struct wd_mm_br { + wd_alloc alloc; /* Memory allocation */ + wd_free free; /* Memory free */ + wd_map iova_map; /* get iova from user space VA */ + void *usr; /* data for the above operations */ + wd_bufsize get_bufsize; /* optional */ +}; + +/* Memory pool creating parameters */ +struct wd_blkpool_setup { + __u32 block_size; /* Block buffer size */ + __u32 block_num; /* Block buffer number */ + __u32 align_size; /* Block buffer starting address align size */ + struct wd_mm_br br; /* memory from user if don't use WD memory */ +}; + + +void *wd_blkpool_new(handle_t h_ctx); +void wd_blkpool_delete(void *pool); +int wd_blkpool_setup(void *pool, struct wd_blkpool_setup *setup); +void wd_blkpool_destroy_mem(void *pool); +void *wd_blkpool_alloc(void *pool, size_t size); +void wd_blkpool_free(void *pool, void *va); +void *wd_blkpool_phy(void *pool, void *va); +handle_t wd_blkpool_create_sglpool(void *pool, __u32 sgl_num, __u32 sge_num); +void wd_blkpool_destroy_sglpool(void *pool, handle_t sgl_pool); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/libwd.map b/libwd.map index 5522ec0..e884671 100644 --- a/libwd.map +++ b/libwd.map @@ -49,5 +49,15 @@ global: wd_enable_drv; wd_disable_drv; wd_get_alg_head; + + wd_blkpool_new; + wd_blkpool_delete; + wd_blkpool_phy; + wd_blkpool_alloc; + wd_blkpool_free; + wd_blkpool_setup; + wd_blkpool_destroy_mem; + wd_blkpool_create_sglpool; + wd_blkpool_destroy_sglpool; local: *; }; diff --git a/wd_bmm.c b/wd_bmm.c new file mode 100644 index 0000000..c44c7d6 --- /dev/null +++ b/wd_bmm.c @@ -0,0 +1,739 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Copyright 2025-2026 Huawei Technologies Co.,Ltd. All rights reserved. + * Copyright 2025-2026 Linaro ltd. + */ + +/* Block Memory Management (lib): A block memory algorithm */ +#include <asm/byteorder.h> +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <unistd.h> +#include <stdlib.h> +#include <errno.h> +#include <sys/queue.h> +#include <sys/mman.h> +#include <pthread.h> + +#include "wd.h" +#include "wd_bmm.h" + +#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) +#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) + +#define TAG_FREE 0x12345678 /* block is free */ +#define TAG_USED 0x87654321 /* block is busy */ +#define MAX_ALIGN_SIZE 0x1000 /* 4KB */ +#define MAX_BLOCK_SIZE 0x10000000 /* 256MB */ +#define BLK_BALANCE_SZ 0x100000ul +#define NUM_TIMES(x) (87 * (x) / 100) + +struct wd_blk_hd { + unsigned int blk_tag; + void *blk_dma; + void *blk; + + TAILQ_ENTRY(wd_blk_hd) next; +}; + +TAILQ_HEAD(wd_blk_list, wd_blk_hd); + +struct wd_ss_region { + void *va; + unsigned long long pa; + size_t size; + + TAILQ_ENTRY(wd_ss_region) next; +}; + +TAILQ_HEAD(wd_ss_region_list, wd_ss_region); + +struct wd_blkpool { + pthread_spinlock_t lock; + unsigned int free_blk_num; + unsigned int blk_num; + unsigned int alloc_failures; + struct wd_blk_list head; + void *act_start; + unsigned int hd_sz; + unsigned int blk_sz; + struct wd_blkpool_setup setup; + + handle_t ctx; + void *mem; + unsigned long size; + struct wd_ss_region_list ss_list; + struct wd_ss_region_list *ss_head; +}; + +static struct wd_blk_hd *wd_blk_head(struct wd_blkpool *pool, void *blk) +{ + unsigned long offset = (unsigned long)((uintptr_t)blk - + (uintptr_t)pool->act_start); + unsigned long sz = pool->hd_sz + pool->blk_sz; + unsigned long blk_idx = offset / sz; + + return (struct wd_blk_hd *)((uintptr_t)pool->act_start + blk_idx * sz); +} + +static int pool_params_check(struct wd_blkpool_setup *setup) +{ + if (!setup->block_size || + setup->block_size > MAX_BLOCK_SIZE) { + WD_ERR("Invalid block_size (%x)!\n", + setup->block_size); + return -WD_EINVAL; + } + + /* check parameters, and align_size must be 2^N */ + if (setup->align_size == 0x1 || setup->align_size > MAX_ALIGN_SIZE || + setup->align_size & (setup->align_size - 0x1)) { + WD_ERR("Invalid align_size.\n"); + return -WD_EINVAL; + } + + return WD_SUCCESS; +} + +#define WD_UACCE_GRAN_SIZE 0x10000ull +static int wd_pool_pre_layout(struct wd_blkpool *p, + struct wd_blkpool_setup *sp) +{ + unsigned int asz; + int ret; + + ret = pool_params_check(sp); + if (ret) + return ret; + + asz = sp->align_size; + + /* Get actual value by align */ + p->hd_sz = ALIGN(sizeof(struct wd_blk_hd), asz); + p->blk_sz = ALIGN(sp->block_size, asz); + if (p->size == 0 && !p->mem) { + p->size = (p->hd_sz + p->blk_sz) * + (unsigned long)sp->block_num + asz; + + /* Make sure memory map granularity size align */ + if (wd_is_noiommu(p->ctx)) + p->size = ALIGN(p->size, WD_UACCE_GRAN_SIZE); + } + + return WD_SUCCESS; +} + +static void *wd_get_phys(struct wd_blkpool *pool, void *va) +{ + struct wd_ss_region *rgn; + + TAILQ_FOREACH(rgn, pool->ss_head, next) { + if (rgn->va <= va && va < rgn->va + rgn->size) + return (void *)(uintptr_t)(rgn->pa + + ((uintptr_t)va - (uintptr_t)rgn->va)); + } + + return NULL; +} + +static int wd_pool_init(struct wd_blkpool *p) +{ + __u32 blk_size = p->setup.block_size; + void *dma_start, *dma_end, *va; + struct wd_blk_hd *hd = NULL; + unsigned int dma_num = 0; + unsigned int i, act_num; + unsigned long loss; + + p->act_start = (void *)ALIGN((uintptr_t)p->mem, + p->setup.align_size); + loss = p->act_start - p->mem; + + act_num = (p->size - loss) / (p->hd_sz + p->blk_sz); + + /* get dma address and initialize blocks */ + for (i = 0; i < act_num; i++) { + va = (void *)((uintptr_t)p->act_start + p->hd_sz + + (unsigned long)(p->hd_sz + + p->blk_sz) * i); + dma_start = wd_get_phys(p, va); + dma_end = wd_get_phys(p, va + blk_size - 1); + if (!dma_start || !dma_end) { + WD_ERR("wd_get_phys err.\n"); + return -WD_ENOMEM; + } + + if ((uintptr_t)dma_end - (uintptr_t)dma_start != blk_size - 1) + continue; + + hd = (void *)((uintptr_t)va - p->hd_sz); + hd->blk_dma = dma_start; + hd->blk = va; + hd->blk_tag = TAG_FREE; + TAILQ_INSERT_TAIL(&p->head, hd, next); + + dma_num++; + } + + p->free_blk_num = dma_num; + p->blk_num = dma_num; + + return WD_SUCCESS; +} + +static int usr_pool_init(struct wd_blkpool *p) +{ + struct wd_blkpool_setup *sp = &p->setup; + __u32 blk_size = sp->block_size; + struct wd_blk_hd *hd = NULL; + unsigned long loss; + unsigned int i, act_num; + + p->act_start = (void *)ALIGN((uintptr_t)p->mem, + sp->align_size); + loss = p->act_start - p->mem; + act_num = (p->size - loss) / (p->hd_sz + p->blk_sz); + + for (i = 0; i < act_num; i++) { + hd = (void *)((uintptr_t)p->act_start + (p->hd_sz + p->blk_sz) * i); + hd->blk = (void *)((uintptr_t)hd + p->hd_sz); + hd->blk_dma = sp->br.iova_map(sp->br.usr, hd->blk, blk_size); + if (!hd->blk_dma) { + WD_ERR("failed to map usr blk.\n"); + return -WD_ENOMEM; + } + hd->blk_tag = TAG_FREE; + TAILQ_INSERT_TAIL(&p->head, hd, next); + } + + p->free_blk_num = act_num; + p->blk_num = p->free_blk_num; + + return WD_SUCCESS; +} + +static void drv_free_slice(struct wd_blkpool *p) +{ + struct wd_ss_region *rgn; + + while (true) { + rgn = TAILQ_FIRST(&p->ss_list); + if (!rgn) + break; + TAILQ_REMOVE(&p->ss_list, rgn, next); + free(rgn); + } +} + +static void drv_add_slice(struct wd_blkpool *p, struct wd_ss_region *rgn) +{ + struct wd_ss_region *rg; + + rg = TAILQ_LAST(&p->ss_list, wd_ss_region_list); + if (rg) { + if (rg->pa + rg->size == rgn->pa) { + rg->size += rgn->size; + free(rgn); + return; + } + } + + TAILQ_INSERT_TAIL(&p->ss_list, rgn, next); +} + +#define WD_UACCE_GRAN_SHIFT 16 +#define WD_UACCE_GRAN_NUM_MASK 0xfffull +static void *pool_reserve_mem(struct wd_blkpool *p, size_t size) +{ + struct wd_ss_region *rgn = NULL; + unsigned long info = 0; + size_t tmp = 0; + unsigned long i = 0; + void *ptr = NULL; + int ret = 1; + + if (!p->ctx) + return NULL; + + if (p->mem) + return NULL; + + ptr = wd_reserve_mem(p->ctx, size); + if (!ptr) + return NULL; + + p->ss_head = &p->ss_list; + TAILQ_INIT(&p->ss_list); + + while (ret > 0) { + info = i; + ret = wd_ctx_set_io_cmd(p->ctx, UACCE_CMD_GET_SS_DMA, &info); + if (ret < 0) { + WD_ERR("get DMA fail!\n"); + goto err_out; + } + rgn = malloc(sizeof(*rgn)); + if (!rgn) { + WD_ERR("alloc ss region fail!\n"); + goto err_out; + } + memset(rgn, 0, sizeof(*rgn)); + + if (wd_is_noiommu(p->ctx)) + rgn->size = (info & WD_UACCE_GRAN_NUM_MASK) << + WD_UACCE_GRAN_SHIFT; + else + rgn->size = p->size; + rgn->pa = info & (~WD_UACCE_GRAN_NUM_MASK); + rgn->va = ptr + tmp; + tmp += rgn->size; + drv_add_slice(p, rgn); + i++; + } + + return ptr; + +err_out: + drv_free_slice(p); + munmap(p->mem, size); + + return NULL; +} + +static int pool_init(struct wd_blkpool *pool, + struct wd_blkpool_setup *setup) +{ + void *addr = NULL; + + /* use user's memory, and its br alloc function */ + if (setup->br.alloc && setup->br.free) { + if (!pool->mem) { + addr = setup->br.alloc(setup->br.usr, pool->size); + if (!addr) { + WD_ERR("failed to allocate memory in user pool.\n"); + return -EINVAL; + } + pool->mem = addr; + } + if (usr_pool_init(pool)) { + WD_ERR("failed to initialize user pool.\n"); + setup->br.free(setup->br.usr, addr); + return -EINVAL; + } + } else { + if (!pool->mem) { + /* use wd to reserve memory */ + addr = pool_reserve_mem(pool, pool->size); + if (!addr) { + WD_ERR("wd pool failed to reserve memory.\n"); + return -EINVAL; + } + pool->mem = addr; + } + + if (wd_pool_init(pool)) { + WD_ERR("failed to initialize wd pool.\n"); + wd_blkpool_destroy_mem(pool); + return -EINVAL; + } + } + + return 0; +} + +void *wd_blkpool_new(handle_t h_ctx) +{ + struct wd_blkpool *pool; + + if (wd_is_sva(h_ctx)) + return NULL; + + pool = calloc(1, sizeof(*pool)); + if (!pool) { + WD_ERR("failed to malloc pool.\n"); + return NULL; + } + pool->ctx = h_ctx; + + if (pthread_spin_init(&pool->lock, PTHREAD_PROCESS_SHARED) != 0) { + free(pool); + return NULL; + } + return pool; +} + +int wd_blkpool_setup(void *pool, struct wd_blkpool_setup *setup) +{ + struct wd_blkpool *p = pool; + int ret = 0; + + if (!p || !setup) + return -EINVAL; + + pthread_spin_lock(&p->lock); + if (p->mem && p->size != 0) { + if (p->setup.block_size == setup->block_size || + p->blk_sz == ALIGN(setup->block_size, setup->align_size)) + goto out; + + /* re-org blk_size, no need reserve mem */ + if (p->free_blk_num != p->blk_num) { + WD_ERR("Can not reset blk pool, as it's in use.\n"); + ret = -EINVAL; + goto out; + } + } + + memcpy(&p->setup, setup, sizeof(p->setup)); + + ret = wd_pool_pre_layout(p, setup); + if (ret) + goto out; + + TAILQ_INIT(&p->head); + + ret = pool_init(p, setup); + +out: + pthread_spin_unlock(&p->lock); + return ret; +} + +void *wd_blkpool_alloc(void *pool, size_t size) +{ + struct wd_blkpool *p = pool; + struct wd_blk_hd *hd; + int ret; + + if (unlikely(!p)) { + WD_ERR("blk alloc pool is null!\n"); + return NULL; + } + + if (!p->mem || size > p->blk_sz) { + struct wd_blkpool_setup setup; + /* + * if empty pool, will reserve mem and init pool + * if size > blk_size, will re-org as align 4K if free pool + */ + + memset(&setup, 0, sizeof(setup)); + setup.block_size = ALIGN(size, DEFAULT_BLK_ALIGN); + setup.block_num = DEFAULT_BLOCK_NM; + setup.align_size = DEFAULT_ALIGN_SIZE; + ret = wd_blkpool_setup(p, &setup); + if (ret) + return NULL; + } + + pthread_spin_lock(&p->lock); + hd = TAILQ_LAST(&p->head, wd_blk_list); + if (unlikely(!hd || hd->blk_tag != TAG_FREE)) { + p->alloc_failures++; + goto out; + } + + /* Delete the block buffer from free list */ + TAILQ_REMOVE(&p->head, hd, next); + p->free_blk_num--; + hd->blk_tag = TAG_USED; + pthread_spin_unlock(&p->lock); + + return hd->blk; + +out: + pthread_spin_unlock(&p->lock); + WD_ERR("Failed to malloc blk.\n"); + + return NULL; +} + +void wd_blkpool_free(void *pool, void *va) +{ + struct wd_blkpool *p = pool; + struct wd_blk_hd *hd; + + if (unlikely(!p || !va)) { + WD_ERR("free blk parameters err!\n"); + return; + } + + hd = wd_blk_head(p, va); + if (unlikely(hd->blk_tag != TAG_USED)) { + WD_ERR("free block fail!\n"); + return; + } + + pthread_spin_lock(&p->lock); + TAILQ_INSERT_TAIL(&p->head, hd, next); + p->free_blk_num++; + hd->blk_tag = TAG_FREE; + pthread_spin_unlock(&p->lock); +} + +void *wd_blkpool_phy(void *pool, void *va) +{ + struct wd_blk_hd *hd; + + if (unlikely(!pool || !va)) { + WD_ERR("blk map err, pool is NULL!\n"); + return NULL; + } + + hd = wd_blk_head(pool, va); + if (unlikely(hd->blk_tag != TAG_USED || + (uintptr_t)va < (uintptr_t)hd->blk)) { + WD_ERR("dma map fail!\n"); + return NULL; + } + + return (void *)((uintptr_t)hd->blk_dma + ((uintptr_t)va - + (uintptr_t)hd->blk)); +} + +/* no work */ +void *wd_blkpool_va(void *pool, void *pa) +{ + struct wd_blkpool *p = pool; + struct wd_blk_hd *hd; + + if (unlikely(!pool || !pa)) { + WD_ERR("blk map err, pool is NULL!\n"); + return NULL; + } + + TAILQ_FOREACH(hd, &p->head, next) { + if ((uintptr_t)pa >= (uintptr_t)hd->blk_dma && + (uintptr_t)pa < (uintptr_t)hd->blk_dma + p->blk_sz) { + return (void *)((uintptr_t)hd->blk + + ((uintptr_t)pa - (uintptr_t)hd->blk_dma)); + } + } + + return NULL; +} + +int wd_blkpool_get_free_blk_num(void *pool, __u32 *free_num) +{ + struct wd_blkpool *p = pool; + + if (!p || !free_num) { + WD_ERR("get_free_blk_num err, parameter err!\n"); + return -WD_EINVAL; + } + + *free_num = __atomic_load_n(&p->free_blk_num, __ATOMIC_RELAXED); + + return WD_SUCCESS; +} + +int wd_blkpool_alloc_failures(void *pool, __u32 *fail_num) +{ + struct wd_blkpool *p = pool; + + if (!p || !fail_num) { + WD_ERR("get_blk_alloc_failure err, pool is NULL!\n"); + return -WD_EINVAL; + } + + *fail_num = __atomic_load_n(&p->alloc_failures, __ATOMIC_RELAXED); + + return WD_SUCCESS; +} + +__u32 wd_blkpool_blksize(void *pool) +{ + struct wd_blkpool *p = pool; + + if (!p) { + WD_ERR("get blk_size pool is null!\n"); + return 0; + } + + return p->blk_sz; +} + +void wd_blkpool_destroy_mem(void *pool) +{ + struct wd_blkpool_setup *setup; + struct wd_blkpool *p = pool; + + if (!p) { + WD_ERR("pool destroy err, pool is NULL.\n"); + return; + } + + pthread_spin_lock(&p->lock); + if (p->mem) { + setup = &p->setup; + if (setup->br.free) { + setup->br.free(setup->br.usr, p->mem); + } else { + drv_free_slice(p); + munmap(p->mem, p->size); + } + p->mem = NULL; + p->size = 0; + } + pthread_spin_unlock(&p->lock); +} + +void wd_blkpool_delete(void *pool) +{ + struct wd_blkpool *p = pool; + + if (!p) + return; + + wd_blkpool_destroy_mem(pool); + pthread_spin_destroy(&p->lock); + free(p); +} + +struct hisi_sge { + uintptr_t buff; + void *page_ctrl; + __le32 len; + __le32 pad; + __le32 pad0; + __le32 pad1; +}; + +/* use default hw sgl head size 64B, in little-endian */ +struct hisi_sgl { + /* the next sgl address */ + uintptr_t next_dma; + /* the sge num of all the sgl */ + __le16 entry_sum_in_chain; + /* valid sge(has buff) num in this sgl */ + __le16 entry_sum_in_sgl; + /* the sge num in this sgl */ + __le16 entry_length_in_sgl; + __le16 pad0; + __le64 pad1[5]; + /* valid sge buffs total size */ + __le64 entry_size_in_sgl; + struct hisi_sge sge_entries[]; +}; + +struct hisi_sgl_pool { + /* the addr64 align offset base sgl */ + void **sgl_align; + /* the sgl src address array */ + void **sgl; + /* the sgl pool stack depth */ + __u32 depth; + __u32 top; + __u32 sge_num; + __u32 sgl_num; + pthread_spinlock_t lock; + __u32 blk_num; + void **blks; +}; + +#define ADDR_ALIGN_64(addr) (((uintptr_t)(addr) + 63) & ~63) +handle_t wd_blkpool_create_sglpool(void *pool, __u32 sgl_num, __u32 sge_num) +{ + struct wd_blkpool *p = pool; + struct hisi_sgl_pool *sgl_pool; + struct hisi_sgl *sgl_align; + int sgl_size, size, i, j, blk_num; + __u32 num = 0, num_per_blk; + void *base; + + if (!sge_num || sge_num > HISI_SGE_NUM_IN_SGL || + !sgl_num || sgl_num > HISI_SGL_NUM_IN_BD) { + WD_ERR("failed to create sgl_pool, sgl_num=%u, sge_num=%u!\n", + sgl_num, sge_num); + return 0; + } + + sgl_pool = calloc(1, sizeof(struct hisi_sgl_pool)); + if (!sgl_pool) { + WD_ERR("failed to alloc memory for sgl_pool!\n"); + return 0; + } + + sgl_pool->sgl_align = calloc(sgl_num, sizeof(void *)); + if (!sgl_pool->sgl_align) { + WD_ERR("failed to alloc memory for sgl align!\n"); + goto err_out; + } + + sgl_size = sizeof(struct hisi_sgl) + sge_num * sizeof(struct hisi_sge); + sgl_size = ALIGN(sgl_size, 64); /* 64 bytes aligned */ + num_per_blk = p->blk_sz / sgl_size; + size = num_per_blk * sgl_size; + blk_num = (sgl_num + num_per_blk - 1) / num_per_blk; + sgl_pool->blk_num = blk_num; + + sgl_pool->blks = calloc(blk_num, sizeof(void *)); + if (!sgl_pool->blks) { + WD_ERR("failed to alloc memory for sgl blks!\n"); + goto err_out; + } + + for (i = 0; i < blk_num; i++) { + base = wd_blkpool_alloc(p, size); + if (!base) { + WD_ERR("blk_pool_alloc failed!\n"); + goto err_out; + } + sgl_pool->blks[i] = base; + + for (j = 0; j < num_per_blk; j++) { + sgl_align = (struct hisi_sgl *)ADDR_ALIGN_64(base + sgl_size * j); + sgl_align->entry_sum_in_chain = sge_num; + sgl_align->entry_sum_in_sgl = 0; + sgl_align->entry_length_in_sgl = sge_num; + sgl_align->next_dma = 0; + sgl_pool->sgl_align[num] = sgl_align; + if (++num == sgl_num) + break; + } + } + + if (pthread_spin_init(&sgl_pool->lock, PTHREAD_PROCESS_SHARED) != 0) { + WD_ERR("failed to init sgl pool lock!\n"); + goto err_out; + } + + sgl_pool->sgl_num = sgl_num; + sgl_pool->sge_num = sge_num; + sgl_pool->depth = sgl_num; + sgl_pool->top = sgl_num; + + return (handle_t)sgl_pool; + +err_out: + if (sgl_pool->blks) { + for (i = 0; i < sgl_pool->blk_num; i++) + wd_blkpool_free(p, sgl_pool->blks[i]); + free(sgl_pool->blks); + } + if (sgl_pool->sgl_align) + free(sgl_pool->sgl_align); + free(sgl_pool); + return (handle_t)0; +} + +void wd_blkpool_destroy_sglpool(void *pool, handle_t h_sgl_pool) +{ + struct hisi_sgl_pool *sgl_pool = (struct hisi_sgl_pool *)h_sgl_pool; + struct wd_blkpool *p = pool; + int i; + + if (!h_sgl_pool || !pool) + return; + + pthread_spin_destroy(&sgl_pool->lock); + if (sgl_pool->blks) { + for (i = 0; i < sgl_pool->blk_num; i++) + wd_blkpool_free(p, sgl_pool->blks[i]); + free(sgl_pool->blks); + } + if (sgl_pool->sgl_align) + free(sgl_pool->sgl_align); + free(sgl_pool); +}
When init, all ctxs will call wd_blkpool_new, Only nosva case will get a pointer, while sva case get NULL.
When uninit, delete blkpool and related resources.
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- include/wd_alg_common.h | 9 +++++++++ wd_util.c | 9 ++++++++- 2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/include/wd_alg_common.h b/include/wd_alg_common.h index fd77426..951995f 100644 --- a/include/wd_alg_common.h +++ b/include/wd_alg_common.h @@ -136,12 +136,21 @@ struct wd_soft_ctx { void *priv; };
+enum wd_blkpool_mode { + BLKPOOL_MODE_MEMCPY, + BLKPOOL_MODE_USER, + BLKPOOL_MODE_MAX, +}; + struct wd_ctx_internal { handle_t ctx; __u8 op_type; __u8 ctx_mode; __u16 sqn; pthread_spinlock_t lock; + void *blkpool; + __u8 blkpool_mode; + handle_t h_sgl_pool; };
struct wd_ctx_config_internal { diff --git a/wd_util.c b/wd_util.c index 9675098..949d467 100644 --- a/wd_util.c +++ b/wd_util.c @@ -12,6 +12,7 @@ #include <string.h> #include <ctype.h> #include "wd_sched.h" +#include "wd_bmm.h" #include "wd_util.h"
#define WD_ASYNC_DEF_POLL_NUM 1 @@ -247,6 +248,7 @@ int wd_init_ctx_config(struct wd_ctx_config_internal *in, WD_ERR("failed to init ctxs lock!\n"); goto err_out; } + ctxs[i].blkpool = wd_blkpool_new(ctxs[i].ctx); }
in->ctxs = ctxs; @@ -298,8 +300,13 @@ void wd_clear_ctx_config(struct wd_ctx_config_internal *in) { __u32 i;
- for (i = 0; i < in->ctx_num; i++) + for (i = 0; i < in->ctx_num; i++) { + if (in->ctxs[i].blkpool) { + wd_blkpool_destroy_sglpool(in->ctxs[i].blkpool, in->ctxs[i].h_sgl_pool); + wd_blkpool_delete(in->ctxs[i].blkpool); + } pthread_spin_destroy(&in->ctxs[i].lock); + }
in->priv = NULL; in->ctx_num = 0;
Add api wd_comp_setup_blkpool. Other alg.c will need wd_xxx_setup_blkpool as well. The reason is app does not know ctx.
It will setup blkpool for ctx[0] and sglpool for sgl mode. The blkpool will be used by app and driver.
App need call wd_xxx_setup_blkpool for user pointer mode and sgl mode. The returned blkpool will be used for wd_blkpool_alloc/free.
Alloc_sess will call wd_xxx_setup_blkpool if it is not called by app. Then uadk library will alloc blkpool and memcpy to user memory, with poorer performance.
The driver will translate va to pa when configure register.
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- include/wd_comp.h | 2 ++ libwd_comp.map | 1 + wd_comp.c | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+)
diff --git a/include/wd_comp.h b/include/wd_comp.h index 45994ff..a957021 100644 --- a/include/wd_comp.h +++ b/include/wd_comp.h @@ -10,6 +10,7 @@ #include <numa.h>
#include "wd_alg_common.h" +#include "wd_bmm.h"
#ifdef __cplusplus extern "C" { @@ -256,6 +257,7 @@ void wd_comp_ctx_num_uninit(void); int wd_comp_get_env_param(__u32 node, __u32 type, __u32 mode, __u32 *num, __u8 *is_enable);
+void *wd_comp_setup_blkpool(struct wd_blkpool_setup *setup); #ifdef __cplusplus } #endif diff --git a/libwd_comp.map b/libwd_comp.map index 6b1f8c2..033b476 100644 --- a/libwd_comp.map +++ b/libwd_comp.map @@ -22,6 +22,7 @@ global: wd_comp_get_driver; wd_comp_get_msg; wd_comp_reset_sess; + wd_comp_setup_blkpool;
wd_sched_rr_instance; wd_sched_rr_alloc; diff --git a/wd_comp.c b/wd_comp.c index 647c320..21a57ac 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -588,6 +588,25 @@ static int wd_comp_check_params(struct wd_comp_sess *sess, return 0; }
+void *wd_comp_setup_blkpool(struct wd_blkpool_setup *setup) +{ + struct wd_ctx_config_internal *config = &wd_comp_setting.config; + struct wd_ctx_internal *ctx = config->ctxs; + int ret; + + ret = wd_blkpool_setup(ctx->blkpool, setup); + if (ret) + return NULL; + + ctx->blkpool_mode = BLKPOOL_MODE_USER; + pthread_spin_lock(&ctx->lock); + if (ctx->h_sgl_pool == 0) + ctx->h_sgl_pool = wd_blkpool_create_sglpool(ctx->blkpool, + HISI_SGL_NUM_IN_BD, HISI_SGE_NUM_IN_SGL); + pthread_spin_unlock(&ctx->lock); + return ctx->blkpool; +} + static int wd_comp_sync_job(struct wd_comp_sess *sess, struct wd_comp_req *req, struct wd_comp_msg *msg)
Support mempcy, user pointer and sgl case
For flat memory: If user call wd_xxx_setup_blkpool, will use user pointer mode. uadk directly use pointer for app, assume it is continuous memory and translate va to pa when configure register
Otherwise, alloc_sess will setup blkpool and use memcpy mode. wd_comp alloc continuous memory for hardware and memcpy from src pointer and memcpy results to dst pointer
For sgl memory: App has to call wd_xxx_setup_blkpool. The wd_datalist.data has to use continuous memory
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- drv/hisi_comp.c | 98 +++++++++++++++++++++++++++++---------- drv/hisi_qm_udrv.c | 14 ++++-- drv/hisi_qm_udrv.h | 3 +- drv/hisi_sec.c | 8 ++-- include/drv/wd_comp_drv.h | 7 +++ wd_comp.c | 65 ++++++++++++++++++++++++++ 6 files changed, 162 insertions(+), 33 deletions(-)
diff --git a/drv/hisi_comp.c b/drv/hisi_comp.c index cd558a8..c897efe 100644 --- a/drv/hisi_comp.c +++ b/drv/hisi_comp.c @@ -431,7 +431,14 @@ static int fill_buf_deflate_generic(struct hisi_zip_sqe *sqe, if (msg->ctx_buf) ctx_buf = msg->ctx_buf + RSV_OFFSET;
- fill_buf_addr_deflate(sqe, src, dst, ctx_buf); + if (msg->blkpool) { + fill_buf_addr_deflate(sqe, + wd_blkpool_phy(msg->blkpool, src), + wd_blkpool_phy(msg->blkpool, dst), + wd_blkpool_phy(msg->blkpool, ctx_buf)); + } else { + fill_buf_addr_deflate(sqe, src, dst, ctx_buf); + }
return 0; } @@ -464,32 +471,45 @@ static void fill_buf_type_sgl(struct hisi_zip_sqe *sqe) }
static int fill_buf_addr_deflate_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, + struct wd_comp_msg *msg, struct wd_datalist *list_src, struct wd_datalist *list_dst) { void *hw_sgl_in, *hw_sgl_out; handle_t h_sgl_pool;
- h_sgl_pool = hisi_qm_get_sglpool(h_qp); + if (msg->h_sgl_pool) + h_sgl_pool = msg->h_sgl_pool; + else + h_sgl_pool = hisi_qm_get_sglpool(h_qp); if (unlikely(!h_sgl_pool)) { WD_ERR("failed to get sglpool!\n"); return -WD_EINVAL; }
- hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, list_src); + hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, list_src, msg->blkpool); if (unlikely(!hw_sgl_in)) { WD_ERR("failed to get hw sgl in!\n"); return -WD_ENOMEM; }
- hw_sgl_out = hisi_qm_get_hw_sgl(h_sgl_pool, list_dst); + hw_sgl_out = hisi_qm_get_hw_sgl(h_sgl_pool, list_dst, msg->blkpool); if (unlikely(!hw_sgl_out)) { WD_ERR("failed to get hw sgl out!\n"); hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_in); return -WD_ENOMEM; }
- fill_buf_addr_deflate(sqe, hw_sgl_in, hw_sgl_out, NULL); + if (msg->h_sgl_pool) { + fill_buf_addr_deflate(sqe, + wd_blkpool_phy(msg->blkpool, hw_sgl_in), + wd_blkpool_phy(msg->blkpool, hw_sgl_out), + NULL); + msg->hw_sgl_in = hw_sgl_in; + msg->hw_sgl_out = hw_sgl_out; + } else { + fill_buf_addr_deflate(sqe, hw_sgl_in, hw_sgl_out, NULL); + }
return 0; } @@ -543,7 +563,7 @@ static int fill_buf_deflate_sgl_generic(handle_t h_qp, struct hisi_zip_sqe *sqe,
fill_buf_type_sgl(sqe);
- ret = fill_buf_addr_deflate_sgl(h_qp, sqe, list_src, list_dst); + ret = fill_buf_addr_deflate_sgl(h_qp, sqe, msg, list_src, list_dst); if (unlikely(ret)) return ret;
@@ -738,34 +758,48 @@ static int fill_buf_lz77_zstd_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe,
fill_buf_size_lz77_zstd(sqe, in_size, lits_size, out_size - lits_size);
- h_sgl_pool = hisi_qm_get_sglpool(h_qp); + if (msg->h_sgl_pool) + h_sgl_pool = msg->h_sgl_pool; + else + h_sgl_pool = hisi_qm_get_sglpool(h_qp); if (unlikely(!h_sgl_pool)) { WD_ERR("failed to get sglpool!\n"); return -WD_EINVAL; }
- hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, req->list_src); + hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, req->list_src, msg->blkpool); if (unlikely(!hw_sgl_in)) { WD_ERR("failed to get hw sgl in!\n"); return -WD_ENOMEM; }
- hw_sgl_out_lit = hisi_qm_get_hw_sgl(h_sgl_pool, req->list_dst); + hw_sgl_out_lit = hisi_qm_get_hw_sgl(h_sgl_pool, req->list_dst, msg->blkpool); if (unlikely(!hw_sgl_out_lit)) { WD_ERR("failed to get hw sgl out for literals!\n"); ret = -WD_ENOMEM; goto err_free_sgl_in; }
- hw_sgl_out_seq = hisi_qm_get_hw_sgl(h_sgl_pool, seq_start); + hw_sgl_out_seq = hisi_qm_get_hw_sgl(h_sgl_pool, seq_start, msg->blkpool); if (unlikely(!hw_sgl_out_seq)) { WD_ERR("failed to get hw sgl out for sequences!\n"); ret = -WD_ENOMEM; goto err_free_sgl_out_lit; }
- fill_buf_addr_lz77_zstd(sqe, hw_sgl_in, hw_sgl_out_lit, + if (msg->h_sgl_pool) { + fill_buf_addr_lz77_zstd(sqe, + wd_blkpool_phy(msg->blkpool, hw_sgl_in), + wd_blkpool_phy(msg->blkpool, hw_sgl_out_lit), + wd_blkpool_phy(msg->blkpool, hw_sgl_out_seq), + NULL); + msg->hw_sgl_in = hw_sgl_in; + msg->hw_sgl_out = hw_sgl_out_lit; + msg->hw_sgl_out_seq = hw_sgl_out_seq; + } else { + fill_buf_addr_lz77_zstd(sqe, hw_sgl_in, hw_sgl_out_lit, hw_sgl_out_seq, NULL); + }
return 0;
@@ -1116,27 +1150,41 @@ static int fill_zip_comp_sqe(struct hisi_qp *qp, struct wd_comp_msg *msg, }
static void free_hw_sgl(handle_t h_qp, struct hisi_zip_sqe *sqe, + struct wd_comp_msg *msg, enum wd_comp_alg_type alg_type) { void *hw_sgl_in, *hw_sgl_out; handle_t h_sgl_pool;
- h_sgl_pool = hisi_qm_get_sglpool(h_qp); - if (unlikely(!h_sgl_pool)) { - WD_ERR("failed to get sglpool to free hw sgl!\n"); - return; - } + if (msg->h_sgl_pool) { + h_sgl_pool = msg->h_sgl_pool; + if (unlikely(!h_sgl_pool)) { + WD_ERR("failed to get sglpool to free hw sgl!\n"); + return; + } + hisi_qm_put_hw_sgl(h_sgl_pool, msg->hw_sgl_in); + hisi_qm_put_hw_sgl(h_sgl_pool, msg->hw_sgl_out); + if (alg_type == WD_LZ77_ZSTD) + hisi_qm_put_hw_sgl(h_sgl_pool, msg->hw_sgl_out_seq); + } else {
- hw_sgl_in = VA_ADDR(sqe->source_addr_h, sqe->source_addr_l); - hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_in); + h_sgl_pool = hisi_qm_get_sglpool(h_qp); + if (unlikely(!h_sgl_pool)) { + WD_ERR("failed to get sglpool to free hw sgl!\n"); + return; + }
- hw_sgl_out = VA_ADDR(sqe->dest_addr_h, sqe->dest_addr_l); - hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_out); + hw_sgl_in = VA_ADDR(sqe->source_addr_h, sqe->source_addr_l); + hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_in);
- if (alg_type == WD_LZ77_ZSTD) { - hw_sgl_out = VA_ADDR(sqe->literals_addr_h, - sqe->literals_addr_l); + hw_sgl_out = VA_ADDR(sqe->dest_addr_h, sqe->dest_addr_l); hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_out); + + if (alg_type == WD_LZ77_ZSTD) { + hw_sgl_out = VA_ADDR(sqe->literals_addr_h, + sqe->literals_addr_l); + hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_out); + } } }
@@ -1163,7 +1211,7 @@ static int hisi_zip_comp_send(struct wd_alg_driver *drv, handle_t ctx, void *com ret = hisi_qm_send(h_qp, &sqe, 1, &count); if (unlikely(ret < 0)) { if (msg->req.data_fmt == WD_SGL_BUF) - free_hw_sgl(h_qp, &sqe, msg->alg_type); + free_hw_sgl(h_qp, &sqe, msg, msg->alg_type); if (ret != -WD_EBUSY) WD_ERR("failed to send to hardware, ret = %d!\n", ret);
@@ -1304,7 +1352,7 @@ static int parse_zip_sqe(struct hisi_qp *qp, struct hisi_zip_sqe *sqe, recv_msg->alg_type = alg_type;
if (buf_type == WD_SGL_BUF) - free_hw_sgl((handle_t)qp, sqe, alg_type); + free_hw_sgl((handle_t)qp, sqe, msg, alg_type);
if (unlikely(recv_msg->req.status == WD_IN_EPARA)) dump_zip_msg(recv_msg); diff --git a/drv/hisi_qm_udrv.c b/drv/hisi_qm_udrv.c index 304764e..78f6583 100644 --- a/drv/hisi_qm_udrv.c +++ b/drv/hisi_qm_udrv.c @@ -9,6 +9,7 @@
#include "hisi_qm_udrv.h" #include "wd_util.h" +#include "wd_bmm.h"
#define QM_DBELL_CMD_SQ 0 #define QM_DBELL_CMD_CQ 1 @@ -842,7 +843,8 @@ static void hisi_qm_dump_sgl(void *sgl) } }
-void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl) +void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl, + void *blkpool) { struct hisi_sgl_pool *pool = (struct hisi_sgl_pool *)sgl_pool; struct wd_datalist *tmp = sgl; @@ -872,7 +874,10 @@ void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl) goto err_out; }
- cur->sge_entries[i].buff = (uintptr_t)tmp->data; + if (blkpool) + cur->sge_entries[i].buff = (uintptr_t)wd_blkpool_phy(blkpool, tmp->data); + else + cur->sge_entries[i].buff = (uintptr_t)tmp->data; cur->sge_entries[i].len = tmp->len; cur->entry_sum_in_sgl++; cur->entry_size_in_sgl += tmp->len; @@ -890,7 +895,10 @@ void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl) WD_ERR("invalid: the sgl pool is not enough!\n"); goto err_out; } - cur->next_dma = (uintptr_t)next; + if (blkpool) + cur->next_dma = (uintptr_t)wd_blkpool_phy(blkpool, next); + else + cur->next_dma = (uintptr_t)next; cur = next; head->entry_sum_in_chain += pool->sge_num; /* In the new sgl chain, the subscript must be reset */ diff --git a/drv/hisi_qm_udrv.h b/drv/hisi_qm_udrv.h index b02e8e7..ddb666e 100644 --- a/drv/hisi_qm_udrv.h +++ b/drv/hisi_qm_udrv.h @@ -162,7 +162,8 @@ void hisi_qm_destroy_sglpool(handle_t sgl_pool); * * Return the hw sgl addr which can fill into the sqe. */ -void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl); +void *hisi_qm_get_hw_sgl(handle_t sgl_pool, struct wd_datalist *sgl, + void *blkpool);
/** * hisi_qm_put_hw_sgl - Reback the hw sgl to the sgl pool. diff --git a/drv/hisi_sec.c b/drv/hisi_sec.c index 747d3a8..a305985 100644 --- a/drv/hisi_sec.c +++ b/drv/hisi_sec.c @@ -1050,7 +1050,7 @@ static int hisi_sec_fill_sgl(handle_t h_qp, __u8 **in, __u8 **out, return -WD_EINVAL; }
- hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, (struct wd_datalist *)(*in)); + hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, (struct wd_datalist *)(*in), NULL); if (!hw_sgl_in) { WD_ERR("failed to get sgl in for hw_v2!\n"); return -WD_EINVAL; @@ -1060,7 +1060,7 @@ static int hisi_sec_fill_sgl(handle_t h_qp, __u8 **in, __u8 **out, hw_sgl_out = *out; } else { hw_sgl_out = hisi_qm_get_hw_sgl(h_sgl_pool, - (struct wd_datalist *)(*out)); + (struct wd_datalist *)(*out), NULL); if (!hw_sgl_out) { WD_ERR("failed to get hw sgl out for hw_v2!\n"); hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_in); @@ -1090,7 +1090,7 @@ static int hisi_sec_fill_sgl_v3(handle_t h_qp, __u8 **in, __u8 **out, return -WD_EINVAL; }
- hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, (struct wd_datalist *)(*in)); + hw_sgl_in = hisi_qm_get_hw_sgl(h_sgl_pool, (struct wd_datalist *)(*in), NULL); if (!hw_sgl_in) { WD_ERR("failed to get sgl in for hw_v3!\n"); return -WD_EINVAL; @@ -1101,7 +1101,7 @@ static int hisi_sec_fill_sgl_v3(handle_t h_qp, __u8 **in, __u8 **out, sqe->bd_param |= SEC_PBUFF_MODE_MASK_V3; } else { hw_sgl_out = hisi_qm_get_hw_sgl(h_sgl_pool, - (struct wd_datalist *)(*out)); + (struct wd_datalist *)(*out), NULL); if (!hw_sgl_out) { WD_ERR("failed to get hw sgl out for hw_v3!\n"); hisi_qm_put_hw_sgl(h_sgl_pool, hw_sgl_in); diff --git a/include/drv/wd_comp_drv.h b/include/drv/wd_comp_drv.h index 213cf2d..1b8273c 100644 --- a/include/drv/wd_comp_drv.h +++ b/include/drv/wd_comp_drv.h @@ -53,6 +53,13 @@ struct wd_comp_msg { __u32 checksum; /* Request identifier */ __u32 tag; + void *blkpool; + void *src; + void *dst; + handle_t h_sgl_pool; + void *hw_sgl_in; + void *hw_sgl_out; + void *hw_sgl_out_seq; };
struct wd_comp_msg *wd_comp_get_msg(__u32 idx, __u32 tag); diff --git a/wd_comp.c b/wd_comp.c index 21a57ac..f45621c 100644 --- a/wd_comp.c +++ b/wd_comp.c @@ -39,6 +39,10 @@ struct wd_comp_sess { __u32 checksum; __u8 *ctx_buf; void *sched_key; + void *blkpool; + __u8 *blkpool_ctxbuf; + handle_t h_sgl_pool; + __u8 blkpool_mode; };
struct wd_comp_setting { @@ -436,6 +440,8 @@ static int wd_comp_check_sess_params(struct wd_comp_sess_setup *setup)
handle_t wd_comp_alloc_sess(struct wd_comp_sess_setup *setup) { + struct wd_ctx_config_internal *config = &wd_comp_setting.config; + struct wd_ctx_internal *ctx = config->ctxs; struct wd_comp_sess *sess; int ret;
@@ -467,6 +473,28 @@ handle_t wd_comp_alloc_sess(struct wd_comp_sess_setup *setup) goto sched_err; }
+ if (ctx->blkpool) { + sess->blkpool = ctx->blkpool; + sess->h_sgl_pool = ctx->h_sgl_pool; + sess->blkpool_mode = ctx->blkpool_mode; + + if (ctx->blkpool_mode == BLKPOOL_MODE_MEMCPY) { + struct wd_blkpool_setup blksetup; + + memset(&blksetup, 0, sizeof(blksetup)); + blksetup.block_size = HW_CTX_SIZE; + blksetup.block_num = DEFAULT_BLOCK_NM; + blksetup.align_size = DEFAULT_ALIGN_SIZE; + ret = wd_blkpool_setup(sess->blkpool, &blksetup); + if (ret) + goto sched_err; + } + + sess->blkpool_ctxbuf = wd_blkpool_alloc(sess->blkpool, HW_CTX_SIZE); + if (!sess->blkpool_ctxbuf) + goto sched_err; + } + return (handle_t)sess;
sched_err: @@ -486,6 +514,9 @@ void wd_comp_free_sess(handle_t h_sess) if (sess->ctx_buf) free(sess->ctx_buf);
+ if (sess->blkpool_ctxbuf) + wd_blkpool_free(sess->blkpool, sess->blkpool_ctxbuf); + if (sess->sched_key) free(sess->sched_key);
@@ -506,6 +537,9 @@ int wd_comp_reset_sess(handle_t h_sess) if (sess->ctx_buf) memset(sess->ctx_buf, 0, HW_CTX_SIZE);
+ if (sess->blkpool_ctxbuf) + memset(sess->blkpool_ctxbuf, 0, HW_CTX_SIZE); + return 0; }
@@ -628,6 +662,29 @@ static int wd_comp_sync_job(struct wd_comp_sess *sess, wd_dfx_msg_cnt(config, WD_CTX_CNT_NUM, idx); ctx = config->ctxs + idx;
+ if (sess->blkpool) { + msg->ctx_buf = sess->blkpool_ctxbuf; + msg->blkpool = sess->blkpool; + msg->h_sgl_pool = sess->h_sgl_pool; + + if (sess->blkpool_mode == BLKPOOL_MODE_MEMCPY) { + void *src = wd_blkpool_alloc(sess->blkpool, req->src_len); + void *dst = wd_blkpool_alloc(sess->blkpool, req->dst_len); + + if (!src || !dst) + return -ENOMEM; + + /* save */ + msg->src = msg->req.src; + msg->dst = msg->req.dst; + /* replace */ + msg->req.src = src; + msg->req.dst = dst; + + memcpy(msg->req.src, msg->src, req->src_len); + } + } + msg_handle.send = wd_comp_setting.driver->send; msg_handle.recv = wd_comp_setting.driver->recv;
@@ -636,6 +693,14 @@ static int wd_comp_sync_job(struct wd_comp_sess *sess, msg, NULL, config->epoll_en); pthread_spin_unlock(&ctx->lock);
+ if (sess->blkpool) { + if (sess->blkpool_mode == BLKPOOL_MODE_MEMCPY) { + memcpy(msg->dst, req->dst, req->dst_len); + wd_blkpool_free(sess->blkpool, msg->req.src); + wd_blkpool_free(sess->blkpool, msg->req.dst); + } + } + return ret; }
mempcy mode ./uadk_tool/uadk_tool benchmark --alg zlib --mode sva \ --opt 0 --sync --pktlen 1024
user pointer mode, --user ./uadk_tool/uadk_tool benchmark --alg zlib --mode sva \ --user --opt 0 --sync --pktlen 1024
sgl mode, --sgl ./uadk_tool/uadk_tool benchmark --alg zlib --mode sva \ --sgl --opt 0 --sync --pktlen 1024
Signed-off-by: Zhangfei Gao zhangfei.gao@linaro.org --- uadk_tool/benchmark/uadk_benchmark.c | 8 ++ uadk_tool/benchmark/uadk_benchmark.h | 2 + uadk_tool/benchmark/zip_uadk_benchmark.c | 146 +++++++++++++++++++++-- 3 files changed, 144 insertions(+), 12 deletions(-)
diff --git a/uadk_tool/benchmark/uadk_benchmark.c b/uadk_tool/benchmark/uadk_benchmark.c index 1698061..064affc 100644 --- a/uadk_tool/benchmark/uadk_benchmark.c +++ b/uadk_tool/benchmark/uadk_benchmark.c @@ -718,6 +718,8 @@ int acc_cmd_parse(int argc, char *argv[], struct acc_option *option) {"complevel", required_argument, 0, 16}, {"init2", no_argument, 0, 17}, {"device", required_argument, 0, 18}, + {"user", no_argument, 0, 19}, + {"sgl", no_argument, 0, 20}, {0, 0, 0, 0} };
@@ -789,6 +791,12 @@ int acc_cmd_parse(int argc, char *argv[], struct acc_option *option) } strcpy(option->device, optarg); break; + case 19: + option->user = true; + break; + case 20: + option->sgl = true; + break; default: ACC_TST_PRT("invalid: bad input parameter!\n"); print_benchmark_help(); diff --git a/uadk_tool/benchmark/uadk_benchmark.h b/uadk_tool/benchmark/uadk_benchmark.h index 2739a0e..7fde341 100644 --- a/uadk_tool/benchmark/uadk_benchmark.h +++ b/uadk_tool/benchmark/uadk_benchmark.h @@ -81,6 +81,8 @@ struct acc_option { bool latency; u32 sched_type; int task_type; + bool user; + bool sgl; };
enum acc_type { diff --git a/uadk_tool/benchmark/zip_uadk_benchmark.c b/uadk_tool/benchmark/zip_uadk_benchmark.c index cad8016..0fd9ee2 100644 --- a/uadk_tool/benchmark/zip_uadk_benchmark.c +++ b/uadk_tool/benchmark/zip_uadk_benchmark.c @@ -8,6 +8,8 @@ #include "include/wd_sched.h" #include "include/fse.h"
+#define HW_CTX_SIZE (64 * 1024) + #define ZIP_TST_PRT printf #define PATH_SIZE 64 #define ZIP_FILE "./zip" @@ -22,6 +24,8 @@ struct uadk_bd { u8 *dst; u32 src_len; u32 dst_len; + void *pool_src; + void *pool_dst; };
struct bd_pool { @@ -32,6 +36,8 @@ struct thread_pool { struct bd_pool *pool; } g_zip_pool;
+void *g_blkpool; + enum ZIP_OP_MODE { BLOCK_MODE, STREAM_MODE @@ -62,6 +68,7 @@ typedef struct uadk_thread_res { struct zip_async_tag *tag; COMP_TUPLE_TAG *ftuple; char *hw_buff_out; + bool sgl; } thread_data;
struct zip_file_head { @@ -145,7 +152,11 @@ static int save_file_data(const char *alg, u32 pkg_len, u32 optype)
// write data for one buffer one buffer to file line. for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { - size = write(fd, g_zip_pool.pool[0].bds[j].dst, + if (g_blkpool) + size = write(fd, g_zip_pool.pool[0].bds[j].pool_dst, + fhead->blk_sz[j]); + else + size = write(fd, g_zip_pool.pool[0].bds[j].dst, fhead->blk_sz[j]); if (size < 0) { ZIP_TST_PRT("compress write data error size: %lu!\n", size); @@ -297,6 +308,17 @@ static void uninit_ctx_config2(void) wd_comp_uninit2(); }
+static void init_blkpool(struct acc_option *options) +{ + struct wd_blkpool_setup setup; + + memset(&setup, 0, sizeof(setup)); + setup.block_size = HW_CTX_SIZE; + setup.block_num = DEFAULT_BLOCK_NM; + setup.align_size = DEFAULT_ALIGN_SIZE; + g_blkpool = wd_comp_setup_blkpool(&setup); +} + static int init_ctx_config2(struct acc_option *options) { struct wd_ctx_params cparams = {0}; @@ -333,6 +355,10 @@ static int init_ctx_config2(struct acc_option *options) ZIP_TST_PRT("failed to do comp init2!\n");
free(ctx_set_num); + + if (options->user || options->sgl) + init_blkpool(options); + return ret; }
@@ -506,6 +532,8 @@ static int init_ctx_config(struct acc_option *options) goto free_sched; }
+ if (options->user || options->sgl) + init_blkpool(options); return 0;
free_sched: @@ -614,6 +642,13 @@ static void free_uadk_bd_pool(void) for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { free(g_zip_pool.pool[i].bds[j].src); free(g_zip_pool.pool[i].bds[j].dst); + + if (g_blkpool) { + wd_blkpool_free(g_blkpool, + g_zip_pool.pool[i].bds[j].pool_src); + wd_blkpool_free(g_blkpool, + g_zip_pool.pool[i].bds[j].pool_dst); + } } } free(g_zip_pool.pool[i].bds); @@ -981,6 +1016,7 @@ static void *zip_uadk_blk_sync_run(void *arg) u32 out_len = 0; u32 count = 0; int ret, i; + struct wd_datalist *list_src = NULL, *list_dst = NULL;
if (pdata->td_id > g_thread_num) return NULL; @@ -1003,23 +1039,71 @@ static void *zip_uadk_blk_sync_run(void *arg) out_len = uadk_pool->bds[0].dst_len;
creq.cb = NULL; - creq.data_fmt = 0; + if (pdata->sgl) + creq.data_fmt = WD_SGL_BUF; + else + creq.data_fmt = 0; creq.priv = 0; creq.status = 0;
+ if (pdata->sgl) { + struct wd_datalist *src, *dst; + + list_src = calloc(MAX_POOL_LENTH_COMP, sizeof(struct wd_datalist)); + list_dst = calloc(MAX_POOL_LENTH_COMP, sizeof(struct wd_datalist)); + + for (i = 0; i < MAX_POOL_LENTH_COMP; i++) { + src = &list_src[i]; + dst = &list_dst[i]; + if (g_blkpool) { + src->data = uadk_pool->bds[i].pool_src; + dst->data = uadk_pool->bds[i].pool_dst; + } else { + src->data = uadk_pool->bds[i].src; + dst->data = uadk_pool->bds[i].dst; + } + src->len = uadk_pool->bds[i].src_len; + dst->len = uadk_pool->bds[i].dst_len; + src->next = (i < MAX_POOL_LENTH_COMP-1) ? &list_src[i+1] : NULL; + dst->next = (i < MAX_POOL_LENTH_COMP-1) ? &list_dst[i+1] : NULL; + } + } + while(1) { - i = count % MAX_POOL_LENTH_COMP; - creq.src = uadk_pool->bds[i].src; - creq.dst = uadk_pool->bds[i].dst; - creq.src_len = uadk_pool->bds[i].src_len; - creq.dst_len = out_len; + if (pdata->sgl) { + creq.list_src = list_src; + creq.list_dst = list_dst;
- ret = wd_do_comp_sync(h_sess, &creq); - if (ret || creq.status) - break; + creq.src_len = uadk_pool->bds[0].src_len * MAX_POOL_LENTH_COMP; + creq.dst_len = out_len * MAX_POOL_LENTH_COMP;
- count++; - uadk_pool->bds[i].dst_len = creq.dst_len; + ret = wd_do_comp_sync(h_sess, &creq); + if (ret || creq.status) + break; + count++; + uadk_pool->bds[0].dst_len = creq.dst_len; + if (get_run_state() == 0) + break; + + } else { + i = count % MAX_POOL_LENTH_COMP; + if (g_blkpool) { + creq.src = uadk_pool->bds[i].pool_src; + creq.dst = uadk_pool->bds[i].pool_dst; + } else { + creq.src = uadk_pool->bds[i].src; + creq.dst = uadk_pool->bds[i].dst; + } + creq.src_len = uadk_pool->bds[i].src_len; + creq.dst_len = out_len; + + ret = wd_do_comp_sync(h_sess, &creq); + if (ret || creq.status) + break; + + count++; + uadk_pool->bds[i].dst_len = creq.dst_len; + } if (get_run_state() == 0) break; } @@ -1028,6 +1112,10 @@ static void *zip_uadk_blk_sync_run(void *arg) cal_avg_latency(count); add_recv_data(count, g_pktlen);
+ if (pdata->sgl) { + free(list_src); + free(list_dst); + } return NULL; }
@@ -1202,6 +1290,7 @@ static int zip_uadk_sync_threads(struct acc_option *options) threads_args[i].optype = threads_option.optype; threads_args[i].win_sz = threads_option.win_sz; threads_args[i].comp_lv = threads_option.comp_lv; + threads_args[i].sgl = options->sgl; threads_args[i].td_id = i; ret = pthread_create(&tdid[i], NULL, uadk_zip_sync_run, &threads_args[i]); if (ret) { @@ -1344,6 +1433,35 @@ async_error: return ret; }
+static int load_blkpool_data(void) +{ + int i, j; + int src_len, dst_len; + + if (!g_blkpool) + return 0; + + for (i = 0; i < g_thread_num; i++) { + for (j = 0; j < MAX_POOL_LENTH_COMP; j++) { + src_len = g_zip_pool.pool[i].bds[j].src_len; + g_zip_pool.pool[i].bds[j].pool_src = + wd_blkpool_alloc(g_blkpool, src_len); + + dst_len = g_zip_pool.pool[i].bds[j].dst_len; + g_zip_pool.pool[i].bds[j].pool_dst = + wd_blkpool_alloc(g_blkpool, dst_len); + + if (!g_zip_pool.pool[i].bds[j].pool_src || + !g_zip_pool.pool[i].bds[j].pool_dst) + return -EINVAL; + + memcpy(g_zip_pool.pool[i].bds[j].pool_src, + g_zip_pool.pool[0].bds[j].src, src_len); + } + } + return 0; +} + int zip_uadk_benchmark(struct acc_option *options) { u32 ptime; @@ -1375,6 +1493,10 @@ int zip_uadk_benchmark(struct acc_option *options) if (ret) return ret;
+ ret = load_blkpool_data(); + if (ret) + return ret; + get_pid_cpu_time(&ptime); time_start(options->times); if (options->syncmode)