This is just quick look up. I basically check some style issues and did some basic static analyzing.
I have run - cppcheck (which found couple mistakes) - flawfinder (did not found anything to my eyes) - codespell (did find couple typo)
You can run these yourself also or check below.
Couple common things which you can ignore or not . - Usually in this code there is goto exit and it is just return. Maybe use just return straight away. No need to jump. - Some comments start capital others not. Maybe all should start capital. Very small nit, but makes nice touch to the code. - Lot of oneline comments are unneccessary three line comments.
On 19.4.2022 16.59, Cai Huoqing wrote:
The NVIDIA Deep Learning Accelerator (NVDLA) is an open source IP which is integrated into NVIDIA Jetson AGX Xavier, so add driver support for this accelerator.
Signed-off-by: Cai Huoqing cai.huoqing@linux.dev
... snip
diff --git a/drivers/gpu/drm/nvdla/nvdla_bdma.c b/drivers/gpu/drm/nvdla/nvdla_bdma.c new file mode 100644 index 000000000000..225613f27acf --- /dev/null +++ b/drivers/gpu/drm/nvdla/nvdla_bdma.c
... snip
+static int32_t +processor_bdma_program_slot(struct dla_engine *engine,
struct dla_bdma_surface_desc *bdma_surface,struct dla_bdma_transfer_desc *transfer)+{
- int32_t ret = 0;
 - uint64_t source_addr = 0;
 - uint64_t destination_addr = 0;
 - uint32_t high, low, reg;
 - uint8_t bdma_free_slots = 0;
 - /* make sure there're enough free slots */
 - if (bdma_free_slots <= 0) {
 
This is always true right now.
do {reg = bdma_reg_read(engine, STATUS);reg = (reg & MASK(BDMA_STATUS_0, FREE_SLOT)) >>SHIFT(BDMA_STATUS_0, FREE_SLOT);} while (reg == 0);bdma_free_slots = (uint8_t)reg;- }
 - dla_get_dma_address(engine->driver_context, engine->task->task_data,
 transfer->source_address,(void *)&source_addr,DESTINATION_DMA);- dla_get_dma_address(engine->driver_context, engine->task->task_data,
 transfer->destination_address,(void *)&destination_addr,DESTINATION_DMA);- ASSERT_GOTO((transfer->line_repeat <= 8192),
 ret, -EINVAL, exit);- ASSERT_GOTO((transfer->surface_repeat <= 8192),
 ret, -EINVAL, exit);- ASSERT_GOTO((transfer->line_size % 32) == 0,
 ret, -EINVAL, exit);- ASSERT_GOTO(transfer->source_line >= transfer->line_size,
 ret, -EINVAL, exit);- ASSERT_GOTO(transfer->destination_line >= transfer->line_size,
 ret, -EINVAL, exit);- ASSERT_GOTO(transfer->source_surface >=
 (transfer->source_line * transfer->line_repeat),ret, -EINVAL, exit);- ASSERT_GOTO(transfer->destination_surface >=
 (transfer->destination_line * transfer->line_repeat),ret, -EINVAL, exit);- /* config registers */
 - high = upper_32_bits(source_addr);
 - low = lower_32_bits(source_addr);
 - bdma_reg_write(engine, CFG_SRC_ADDR_LOW, low);
 - bdma_reg_write(engine, CFG_SRC_ADDR_HIGH, high);
 - high = upper_32_bits(destination_addr);
 - low = lower_32_bits(destination_addr);
 - bdma_reg_write(engine, CFG_DST_ADDR_LOW, low);
 - bdma_reg_write(engine, CFG_DST_ADDR_HIGH, high);
 - bdma_reg_write(engine, CFG_LINE, (transfer->line_size >> 5) - 1);
 - reg = (map_mem[bdma_surface->source_type] <<
 SHIFT(BDMA_CFG_CMD_0, SRC_RAM_TYPE)) |(map_mem[bdma_surface->destination_type] <<SHIFT(BDMA_CFG_CMD_0, DST_RAM_TYPE));- bdma_reg_write(engine, CFG_CMD, reg);
 - bdma_reg_write(engine, CFG_LINE_REPEAT, transfer->line_repeat - 1);
 - bdma_reg_write(engine, CFG_SRC_LINE, transfer->source_line);
 - bdma_reg_write(engine, CFG_DST_LINE, transfer->destination_line);
 - bdma_reg_write(engine, CFG_SURF_REPEAT, transfer->surface_repeat - 1);
 - bdma_reg_write(engine, CFG_SRC_SURF, transfer->source_surface);
 - bdma_reg_write(engine, CFG_DST_SURF, transfer->destination_surface);
 - bdma_reg_write(engine, CFG_OP, FIELD_ENUM(BDMA_CFG_OP_0, EN, ENABLE));
 +exit:
- return ret;
 +}
... snip
diff --git a/drivers/gpu/drm/nvdla/nvdla_cache.c b/drivers/gpu/drm/nvdla/nvdla_cache.c new file mode 100644 index 000000000000..f8bd7b514aab --- /dev/null +++ b/drivers/gpu/drm/nvdla/nvdla_cache.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/*
- Copyright (C) 2017-2018 NVIDIA CORPORATION
 
- Copyright (C) 2022 Cai Huoqing
 - */
 +#include "nvdla_common.h" +#include "nvdla_drm.h" +#include "nvdla_reg.h" +#include "nvdla_engine.h"
+#define DLA_OP_CACHE_SIZE (DLA_NUM_GROUPS * ((DLA_OP_NUM + 2) * 2))
+static struct dla_common_op_desc desc_cache[DLA_OP_NUM][DLA_OP_CACHE_SIZE]; +static int32_t desc_refcount[DLA_OP_NUM][DLA_OP_CACHE_SIZE];
+void +dla_get_refcount(struct dla_common_op_desc *op_desc) +{
- int32_t i;
 - struct dla_common_op_desc *desc = NULL;
 - if (op_desc == NULL)
 return;- if (op_desc->index == -1)
 return;- desc = &desc_cache[op_desc->op_type][0];
 - for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) {
 if (desc->index == op_desc->index &&desc->roi_index == op_desc->roi_index) {
reverse if
if (desc->index != op_desc->index) continue; if (desc->roi_index != op_desc->roi_index) continue;
desc_refcount[op_desc->op_type][i]++;return;}- }
 +}
+struct dla_common_op_desc * +dla_get_op_desc(struct dla_engine *engine,
struct dla_task *task, int16_t index,uint8_t op_type, uint8_t roi_index)+{
- int32_t i;
 - int32_t ret;
 - uint64_t op_base;
 - uint64_t dep_graph_addr;
 - struct dla_common_op_desc *desc = NULL;
 - if (index == -1) {
 pr_debug("no desc get due to index==-1\n");goto exit;- }
 - dep_graph_addr = (sizeof(struct dla_common_op_desc) *
 engine->network->num_operations * roi_index);- desc = &desc_cache[op_type][0];
 - for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) {
 if (desc->index == index && desc->roi_index == roi_index) {if (desc->op_type != op_type) {pr_err("op_cache[op=%u] contains incorrect entry of op[%u]\n",op_type, desc->op_type);continue;}
reverse if so this will be pretty clean
if (desc->index != index) continue; if (desc->roi_index != roi_index) continue; if (desc->op_type != op_type) { pr_err("op_cache[op=%u] contains incorrect entry of op[%u]\n", op_type, desc->op_type); continue; }
desc_refcount[op_type][i]++;goto exit;}- }
 - desc = &desc_cache[op_type][0];
 - for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) {
 if (desc->index == -1) {
reverse if if (desc->index != -1) continue;
op_base = dep_graph_addr +(sizeof(struct dla_common_op_desc) *(uint64_t)index);ret = dla_data_read(engine->driver_context,task->task_data,task->dependency_graph_addr,(void *)(desc),sizeof(struct dla_common_op_desc),op_base);if (ret) {desc = NULL;goto exit;}if (op_type != desc->op_type) {/** op_type of entry read from DRAM should not* mismatch with given op_type. If they* mismatches, then wrong entry is fetched, so* report this issue by throwing error.*/pr_err("Fetched [op_type=%u] from DRAM doesn't match with op_type[%u]\n",desc->op_type, op_type);desc->op_type = op_type;desc->index = -1;desc->roi_index = -1;desc = NULL;goto exit;}desc->index = index;desc->roi_index = roi_index;desc_refcount[op_type][i]++;goto exit;}- }
 +exit:
- return desc;
 +}
+static void +dla_free_op_desc(struct dla_engine *engine, struct dla_common_op_desc *op_desc) +{
- uint64_t op_base;
 - uint64_t dep_graph_addr;
 - struct dla_task *task;
 - pr_debug("Enter: %s op desc index %u ROI %d\n", __func__,
 op_desc->index, op_desc->roi_index);
Possiple null pointer dereference
- task = engine->task;
 - dep_graph_addr = (sizeof(struct dla_common_op_desc) *
 engine->network->num_operations *op_desc->roi_index);- if (op_desc->index == -1)
 goto exit;
Possiple null pointer dereference
- if (op_desc == NULL)
 goto exit;
Or this is unnecessary.
- /**
 * TODO: keeping the depth value hardcoded as 0 for now,* need to replace it once corresponding implementation is done.*/- op_base = (dep_graph_addr +
 (sizeof(struct dla_common_op_desc) *(uint64_t)op_desc->index));- /**
 * Flush descriptor to DRAM*/- dla_data_write(engine->driver_context,
 task->task_data,(void *)op_desc,task->dependency_graph_addr,sizeof(struct dla_common_op_desc),op_base);- /**
 * Release it*/- op_desc->index = -1;
 - op_desc->roi_index = -1;
 +exit:
- return;
 +}
+void +dla_put_op_desc(struct dla_engine *engine, struct dla_common_op_desc *op_desc) +{
- int32_t i;
 - struct dla_common_op_desc *desc;
 - if (op_desc == NULL)
 return;- if (op_desc->index == -1)
 return;- desc = &desc_cache[op_desc->op_type][0];
 - for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) {
 if (desc->index == op_desc->index &&desc->roi_index == op_desc->roi_index) {
Reverse if.
if (desc->index != op_desc->index) continue; if (desc->roi_index != op_desc->roi_index) continue;
desc_refcount[op_desc->op_type][i]--;/*** Free desc if refcount is 0*/
Pretty useless comment and totally not needed three line for this.
if (desc_refcount[op_desc->op_type][i] == 0)dla_free_op_desc(engine, op_desc);return;}- }
 +}
+void +dla_init_op_cache(struct dla_engine *engine) +{
- int32_t i, j;
 - struct dla_common_op_desc *desc = &desc_cache[0][0];
 - memset((uint8_t *)&desc_cache[0][0], 0, sizeof(desc_cache));
 - memset((uint8_t *)&desc_refcount[0][0], 0, sizeof(desc_refcount));
 - for (i = 0; i < DLA_OP_NUM; i++) {
 for (j = 0; j < DLA_OP_CACHE_SIZE; j++) {desc->index = -1;desc->roi_index = -1;desc->op_type = (uint8_t)i;desc++;}- }
 +}
... snip
diff --git a/drivers/gpu/drm/nvdla/nvdla_common.h b/drivers/gpu/drm/nvdla/nvdla_common.h new file mode 100644 index 000000000000..38cf43246890 --- /dev/null +++ b/drivers/gpu/drm/nvdla/nvdla_common.h @@ -0,0 +1,835 @@
... snip
+struct dla_conv_op_desc {
- /* Performance parameters */
 - /* dla_conv_mode */
 - uint8_t conv_mode;
 - uint8_t data_reuse;
 - uint8_t weight_reuse;
 - uint8_t skip_data_rls;
 - uint8_t skip_weight_rls;
 - uint8_t reserved0;
 - uint16_t entry_per_slice;
 - /* dla_data_format */
 - uint8_t data_format;
 - /* dla_pixel_mapping */
 - uint8_t pixel_mapping;
 - /* number of free slices before fetch */
 - uint16_t fetch_grain;
 - uint8_t reserved_b[8];
 - /* batch_num */
 - uint8_t batch;
 - /* dla_weight_format */
 - uint8_t weight_format;
 - uint8_t data_bank;
 - uint8_t weight_bank;
 - /* the offset in bytes of each data cube in a batch */
 - uint32_t batch_stride;
 - uint8_t post_extension;
 - uint8_t pixel_override;
 - /* number of slices need to be released */
 - uint16_t release;
 /* The input cube dimension for CSC */- uint16_t input_width_csc;
 - uint16_t input_height_csc;
 - uint16_t input_channel_csc;
 - uint16_t kernel_width_csc;
 - uint16_t kernel_height_csc;
 - uint16_t kernel_channel_csc;
 - /* The input cube dimension for CMAC */
 - uint16_t input_width_cmac;
 - uint16_t input_height_cmac;
 - /* actual size in bytes */
 - uint32_t bytes_per_kernel;
 - /* Algorithm parameters */
 - int16_t mean_ry; /* mean value for red in RGB or Y in YUV */
 - int16_t mean_gu; /* mean value for green in RGB or U in YUV */
 - int16_t mean_bv; /* mean value for blue in RGB or V in YUV */
 - int16_t mean_ax;
 - uint8_t mean_format; /* dla_mean_format */
 - uint8_t conv_stride_x;
 - uint8_t conv_stride_y;
 - uint8_t pad_x_left;
 - uint8_t pad_x_right;
 - uint8_t pad_y_top;
 - uint8_t pad_y_bottom;
 - uint8_t dilation_x;
 - uint8_t dilation_y;
 - uint8_t reserved2[2];
 - /* Precision parameters */
 - uint8_t pra_truncate;
 - uint8_t in_precision;
 - /* The output precision from CONV, it's the MAC processing precison */
 
./nvdla_common.h:428: precison ==> precision
- uint8_t out_precision;
 - int16_t pad_val;
 - /* input converter parameters */
 - struct dla_cvt_param in_cvt;
 - /* output converter parameters, support truncate only */
 - struct dla_cvt_param out_cvt;
 +} __packed __aligned(4);
+struct dla_conv_stat_desc {
- uint32_t data_read_stall;
 - uint32_t weight_read_stall;
 - uint32_t data_read_latency;
 - uint32_t weight_read_latency;
 - uint32_t saturation_count;
 - uint32_t nan_data_num;
 - uint32_t nan_weight_num;
 - uint32_t inf_data_num;
 - uint32_t inf_weight_num;
 +} __packed __aligned(4);
+/**
- @ingroup SDP
 
- @name Activation functions
 
- @brief Activation functions supported in SDP
 
- @{
 - */
 +#define ACTIVATION_NONE 0 +#define ACTIVATION_RELU 1 +#define ACTIVATION_LUT 2 +#define ACTIVATION_PRELU 3 +/** @} */
+/**
- @ingroup LUT
 
- @name LUT size
 
- @brief LUT sizes for linear and exponentila LUT
 
- @{
 - */
 +#define LUT_LINEAR_EXP_TABLE_ENTRY_LOG2 6 +#define LUT_LINEAR_ONLY_TABLE_ENTRY_LOG2 8 +/** @} */
+/**
- @ingroup LUT
 
- @name LUT types
 
- @brief DLA supports two types of LUT, linear and exonential
 
- @{
 - */
 +#define LUT_LINEAR_EXP_TABLE 0 +#define LUT_LINEAR_ONLY_TABLE 1 +/** @} */
+/**
- @ingroup LUT
 
- @name LUT methods
 
- @brief DLA supports two types of LUT, linear and exonential
 
- @{
 - */
 +#define LUT_METHOD_EXPONENTIAL 0 +#define LUT_METHOD_LINEAR 1 +/** @} */
+/**
- @ingroup LUT
 
- @name LUT
 
- @brief DLA supports two types of LUT, linear and exonential
 
- @{
 - */
 +#define LUT_PRI_LINEAR_EXP 0 +#define LUT_PRI_LINEAR_ONLY 1 +/** @} */
+union dla_lut_offset {
- /**
 * Number should be substracted on log domain before look up
./nvdla_common.h:505: substracted ==> subtracted
* exponetial table it has the same definition as hardware
./nvdla_common.h:506: exponetial ==> exponential
* thus input scaling should also take into account when* set this field.*/- int8_t exp_offset;
 - /**
 * Number of bits should be right shift before looking* up linear table*/- int8_t frac_bits;
 - uint16_t reserved0;
 +};
... snip
diff --git a/drivers/gpu/drm/nvdla/nvdla_drm.c b/drivers/gpu/drm/nvdla/nvdla_drm.c new file mode 100644 index 000000000000..9217eee1de3b --- /dev/null +++ b/drivers/gpu/drm/nvdla/nvdla_drm.c @@ -0,0 +1,695 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/*
- Copyright (C) 2017-2018 NVIDIA CORPORATION
 
- Copyright (C) 2022 Cai Huoqing
 - */
 +#include <linux/dma-buf.h> +#include <linux/dma-mapping.h> +#include <linux/fs.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_irq.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/time.h> +#include <linux/uaccess.h> +#include <linux/types.h>
+#include "nvdla_drm.h" +#include "nvdla_ioctl.h" +#include "nvdla_engine.h"
+static struct nvdla_config nvdla_config_os_initial = {
- .atom_size = 32,
 - .bdma_enable = true,
 - .rubik_enable = true,
 - .weight_compress_support = true,
 +};
+static struct nvdla_config nvdla_config_small = {
- //.atom_size = 8,
 - .atom_size = 32, // nv_large config
 - .bdma_enable = false,
 - .rubik_enable = false,
 - .weight_compress_support = false,
 +};
+int64_t dla_get_time_us(void)
Funtion is never used.
+{
- return ktime_get_ns() / NSEC_PER_USEC;
 +}
+void dla_reg_write(void *driver_context, uint32_t addr, uint32_t reg) +{
- struct nvdla_device *nvdla_dev =
 (struct nvdla_device *)driver_context;- if (!nvdla_dev)
 return;- writel(reg, nvdla_dev->base + addr);
 +}
+uint32_t dla_reg_read(void *driver_context, uint32_t addr) +{
- struct nvdla_device *nvdla_dev =
 (struct nvdla_device *)driver_context;- if (!nvdla_dev)
 return 0;- return readl(nvdla_dev->base + addr);
 +}
+static irqreturn_t nvdla_engine_isr(int32_t irq, void *data) +{
- unsigned long flags;
 - uint32_t mask;
 - uint32_t reg;
 - struct dla_processor *processor = NULL;
 - struct dla_processor_group *group;
 - struct dla_engine *engine;
 - struct nvdla_device *nvdla_dev = (struct nvdla_device *)data;
 - if (!nvdla_dev)
 return IRQ_NONE;- engine = nvdla_dev->engine_context;
 - spin_lock_irqsave(&nvdla_dev->nvdla_lock, flags);
 - mask = glb_reg_read(engine, S_INTR_MASK);
 
Never used. It would be nice so that static analyzer will not complain these anymore, but your choice what you want to do.
- reg = glb_reg_read(engine, S_INTR_STATUS);
 - if (reg & MASK(GLB_S_INTR_STATUS_0, CACC_DONE_STATUS0)) {
 processor = &engine->processors[DLA_OP_CONV];group = &processor->groups[0];group->events |= (1 << DLA_EVENT_OP_COMPLETED);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, CACC_DONE_STATUS1)) {
 processor = &engine->processors[DLA_OP_CONV];group = &processor->groups[1];group->events |= (1 << DLA_EVENT_OP_COMPLETED);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, SDP_DONE_STATUS0)) {
 processor = &engine->processors[DLA_OP_SDP];group = &processor->groups[0];group->events |= (1 << DLA_EVENT_OP_COMPLETED);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, SDP_DONE_STATUS1)) {
 processor = &engine->processors[DLA_OP_SDP];group = &processor->groups[1];group->events |= (1 << DLA_EVENT_OP_COMPLETED);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, CDP_DONE_STATUS0)) {
 processor = &engine->processors[DLA_OP_CDP];group = &processor->groups[0];group->events |= (1 << DLA_EVENT_OP_COMPLETED);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, CDP_DONE_STATUS1)) {
 processor = &engine->processors[DLA_OP_CDP];group = &processor->groups[1];group->events |= (1 << DLA_EVENT_OP_COMPLETED);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, RUBIK_DONE_STATUS0)) {
 processor = &engine->processors[DLA_OP_RUBIK];group = &processor->groups[0];group->events |= (1 << DLA_EVENT_OP_COMPLETED);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, RUBIK_DONE_STATUS1)) {
 processor = &engine->processors[DLA_OP_RUBIK];group = &processor->groups[1];group->events |= (1 << DLA_EVENT_OP_COMPLETED);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, PDP_DONE_STATUS0)) {
 processor = &engine->processors[DLA_OP_PDP];group = &processor->groups[0];group->events |= (1 << DLA_EVENT_OP_COMPLETED);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, PDP_DONE_STATUS1)) {
 processor = &engine->processors[DLA_OP_PDP];group = &processor->groups[1];group->events |= (1 << DLA_EVENT_OP_COMPLETED);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, BDMA_DONE_STATUS0)) {
 processor = &engine->processors[DLA_OP_BDMA];group = &processor->groups[0];group->events |= (1 << DLA_EVENT_OP_COMPLETED);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, BDMA_DONE_STATUS1)) {
 processor = &engine->processors[DLA_OP_BDMA];group = &processor->groups[1];group->events |= (1 << DLA_EVENT_OP_COMPLETED);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_DAT_DONE_STATUS0)) {
 processor = &engine->processors[DLA_OP_CONV];group = &processor->groups[0];group->events |= (1 << DLA_EVENT_CDMA_DT_DONE);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_DAT_DONE_STATUS1)) {
 processor = &engine->processors[DLA_OP_CONV];group = &processor->groups[1];group->events |= (1 << DLA_EVENT_CDMA_DT_DONE);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_WT_DONE_STATUS0)) {
 processor = &engine->processors[DLA_OP_CONV];group = &processor->groups[0];group->events |= (1 << DLA_EVENT_CDMA_WT_DONE);- }
 - if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_WT_DONE_STATUS1)) {
 processor = &engine->processors[DLA_OP_CONV];group = &processor->groups[1];group->events |= (1 << DLA_EVENT_CDMA_WT_DONE);- }
 - glb_reg_write(engine, S_INTR_STATUS, reg);
 - mask = glb_reg_read(engine, S_INTR_MASK);
 
Never used
- reg = glb_reg_read(engine, S_INTR_STATUS);
 
Never used.
- complete(&nvdla_dev->event_notifier);
 - spin_unlock_irqrestore(&nvdla_dev->nvdla_lock, flags);
 - return IRQ_HANDLED;
 +}
... snip
diff --git a/drivers/gpu/drm/nvdla/nvdla_gem.c b/drivers/gpu/drm/nvdla/nvdla_gem.c new file mode 100644 index 000000000000..cccf6d01a564 --- /dev/null +++ b/drivers/gpu/drm/nvdla/nvdla_gem.c
... snip
+static const struct drm_ioctl_desc nvdla_drm_ioctls[] = {
- DRM_IOCTL_DEF_DRV(NVDLA_SUBMIT, nvdla_submit, DRM_RENDER_ALLOW),
 - DRM_IOCTL_DEF_DRV(NVDLA_GEM_CREATE, nvdla_gem_create, DRM_RENDER_ALLOW),
 - DRM_IOCTL_DEF_DRV(NVDLA_GEM_MMAP, nvdla_gem_map_offset, DRM_RENDER_ALLOW),
 - /* use DRM_IOCTL_MODE_DESTROY_DUMB to destory */
 
./nvdla_gem.c:347: destory ==> destroy
+};
... snip
diff --git a/drivers/gpu/drm/nvdla/nvdla_scheduler.c b/drivers/gpu/drm/nvdla/nvdla_scheduler.c new file mode 100644 index 000000000000..b814077478c6 --- /dev/null +++ b/drivers/gpu/drm/nvdla/nvdla_scheduler.c
... snip
+static int +dla_update_dependency(struct dla_engine *engine,
struct dla_consumer *consumer,struct dla_common_op_desc *op_desc,uint8_t event, uint8_t roi_index)+{
- int32_t ret = 0;
 - struct dla_processor *processor;
 - if (consumer->index == -1)
 goto exit;- /* Update dependency only if event matches */
 - if (event != consumer->event)
 goto exit;- /**
 * If consumer index is valid but op desc is NULL means* op desc for consumer was not pre-fetched*/- if (op_desc == NULL) {
 ret = -EINVAL;pr_err("Operation descriptor is NULL, consumer index %d",consumer->index);goto exit;- }
 - pr_debug("Update dependency operation index %d ROI %d DEP_COUNT=%d\n",
 op_desc->index, op_desc->roi_index,op_desc->dependency_count);- op_desc->dependency_count--;
 - if (op_desc->dependency_count == 0) {
 processor = &engine->processors[op_desc->op_type];pr_debug("enable %s in %s as depdency are resolved\n",
./nvdla_scheduler.c:455: depdency ==> dependency
processor->name, __func__);ret = dla_enable_operation(engine, processor, op_desc);if (ret)goto exit;- }
 +exit:
- return ret;
 +}
... snip
+int +dla_process_events(struct dla_engine *engine, uint32_t *task_complete) +{
- int32_t i;
 - int32_t ret = 0;
 - for (i = 0; i < DLA_OP_NUM; i++) {
 struct dla_processor *processor;processor = &engine->processors[i];ret = dla_handle_events(engine, processor);/*** Incase engine status is non-zero, then don't
./nvdla_scheduler.c:905: Incase ==> In case
* update the engine status. We should keep its* status for later cleaning of engine.*/if (!engine->status)engine->status = ret;- }
 - if (engine->network->num_operations == engine->num_proc_hwl)
 *task_complete = 1;- return ret;
 +}
... snip
Argillander