[Linaro-mm-sig] Re: [PATCH 2/2] drm/nvdla: Add driver support for NVDLA

21 Apr 2022

This is just quick look up. I basically check some style issues and did
some basic static analyzing.
I have run
  - cppcheck (which found couple mistakes)
  - flawfinder (did not found anything to my eyes)
  - codespell (did find couple typo)
You can run these yourself also or check below.
Couple common things which you can ignore or not	.
- Usually in this code there is goto exit and it is just return. Maybe
   use just return straight away. No need to jump.
- Some comments start capital others not. Maybe all should start
   capital. Very small nit, but makes nice touch to the code.
- Lot of oneline comments are unneccessary three line comments.
On 19.4.2022 16.59, Cai Huoqing wrote:
...
The NVIDIA Deep Learning Accelerator (NVDLA) is an open source IP
which is integrated into NVIDIA Jetson AGX Xavier,
so add driver support for this accelerator.
Signed-off-by: Cai Huoqing cai.huoqing@linux.dev
... snip
...

diff --git a/drivers/gpu/drm/nvdla/nvdla_bdma.c b/drivers/gpu/drm/nvdla/nvdla_bdma.c
new file mode 100644
index 000000000000..225613f27acf
--- /dev/null
+++ b/drivers/gpu/drm/nvdla/nvdla_bdma.c
... snip
...
+static int32_t
+processor_bdma_program_slot(struct dla_engine *engine,

					struct dla_bdma_surface_desc *bdma_surface,


					struct dla_bdma_transfer_desc *transfer)



+{

int32_t ret = 0;
uint64_t source_addr = 0;
uint64_t destination_addr = 0;
uint32_t high, low, reg;
uint8_t  bdma_free_slots = 0;

/* make sure there're enough free slots */
if (bdma_free_slots <= 0) {

This is always true right now.
...

do {


	reg = bdma_reg_read(engine, STATUS);


	reg = (reg & MASK(BDMA_STATUS_0, FREE_SLOT)) >>


			SHIFT(BDMA_STATUS_0, FREE_SLOT);


} while (reg == 0);


bdma_free_slots = (uint8_t)reg;


}

dla_get_dma_address(engine->driver_context, engine->task->task_data,
				transfer->source_address,


				(void *)&source_addr,


				DESTINATION_DMA);


dla_get_dma_address(engine->driver_context, engine->task->task_data,
				transfer->destination_address,


				(void *)&destination_addr,


				DESTINATION_DMA);



ASSERT_GOTO((transfer->line_repeat <= 8192),
		ret, -EINVAL, exit);


ASSERT_GOTO((transfer->surface_repeat <= 8192),
		ret, -EINVAL, exit);


ASSERT_GOTO((transfer->line_size % 32) == 0,
		ret, -EINVAL, exit);


ASSERT_GOTO(transfer->source_line >= transfer->line_size,
		ret, -EINVAL, exit);


ASSERT_GOTO(transfer->destination_line >= transfer->line_size,
		ret, -EINVAL, exit);


ASSERT_GOTO(transfer->source_surface >=
	(transfer->source_line * transfer->line_repeat),


		ret, -EINVAL, exit);


ASSERT_GOTO(transfer->destination_surface >=
	(transfer->destination_line * transfer->line_repeat),


		ret, -EINVAL, exit);



/* config registers */
high = upper_32_bits(source_addr);
low = lower_32_bits(source_addr);
bdma_reg_write(engine, CFG_SRC_ADDR_LOW, low);
bdma_reg_write(engine, CFG_SRC_ADDR_HIGH, high);
high = upper_32_bits(destination_addr);
low = lower_32_bits(destination_addr);
bdma_reg_write(engine, CFG_DST_ADDR_LOW, low);
bdma_reg_write(engine, CFG_DST_ADDR_HIGH, high);
bdma_reg_write(engine, CFG_LINE, (transfer->line_size >> 5) - 1);
reg = (map_mem[bdma_surface->source_type] <<
		SHIFT(BDMA_CFG_CMD_0, SRC_RAM_TYPE)) |


(map_mem[bdma_surface->destination_type] <<


		SHIFT(BDMA_CFG_CMD_0, DST_RAM_TYPE));


bdma_reg_write(engine, CFG_CMD, reg);
bdma_reg_write(engine, CFG_LINE_REPEAT, transfer->line_repeat - 1);
bdma_reg_write(engine, CFG_SRC_LINE, transfer->source_line);
bdma_reg_write(engine, CFG_DST_LINE, transfer->destination_line);
bdma_reg_write(engine, CFG_SURF_REPEAT, transfer->surface_repeat - 1);
bdma_reg_write(engine, CFG_SRC_SURF, transfer->source_surface);
bdma_reg_write(engine, CFG_DST_SURF, transfer->destination_surface);
bdma_reg_write(engine, CFG_OP, FIELD_ENUM(BDMA_CFG_OP_0, EN, ENABLE));


+exit:

return ret;

+}
... snip
...
diff --git a/drivers/gpu/drm/nvdla/nvdla_cache.c b/drivers/gpu/drm/nvdla/nvdla_cache.c
new file mode 100644
index 000000000000..f8bd7b514aab
--- /dev/null
+++ b/drivers/gpu/drm/nvdla/nvdla_cache.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/*


Copyright (C) 2017-2018 NVIDIA CORPORATION



Copyright (C) 2022 Cai Huoqing


*/


+#include "nvdla_common.h"
+#include "nvdla_drm.h"
+#include "nvdla_reg.h"
+#include "nvdla_engine.h"



+#define DLA_OP_CACHE_SIZE (DLA_NUM_GROUPS * ((DLA_OP_NUM + 2) * 2))



+static struct dla_common_op_desc desc_cache[DLA_OP_NUM][DLA_OP_CACHE_SIZE];
+static int32_t desc_refcount[DLA_OP_NUM][DLA_OP_CACHE_SIZE];



+void
+dla_get_refcount(struct dla_common_op_desc *op_desc)
+{

int32_t i;
struct dla_common_op_desc *desc = NULL;

if (op_desc == NULL)
return;



if (op_desc->index == -1)
return;



desc = &desc_cache[op_desc->op_type][0];

for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) {
if (desc->index == op_desc->index &&


		desc->roi_index == op_desc->roi_index) {



reverse if
if (desc->index != op_desc->index)
    		continue;
    	if (desc->roi_index != op_desc->roi_index)
    		continue;
...

	desc_refcount[op_desc->op_type][i]++;


	return;


}


}

+}



+struct dla_common_op_desc *
+dla_get_op_desc(struct dla_engine *engine,

		struct dla_task *task, int16_t index,


		uint8_t op_type, uint8_t roi_index)



+{

int32_t i;
int32_t ret;
uint64_t op_base;
uint64_t dep_graph_addr;
struct dla_common_op_desc *desc = NULL;

if (index == -1) {
pr_debug("no desc get due to index==-1\n");


goto exit;


}

dep_graph_addr = (sizeof(struct dla_common_op_desc) *
		engine->network->num_operations * roi_index);



desc = &desc_cache[op_type][0];

for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) {
if (desc->index == index && desc->roi_index == roi_index) {


	if (desc->op_type != op_type) {


		pr_err("op_cache[op=%u] contains incorrect entry of op[%u]\n",


			   op_type, desc->op_type);


		continue;


	}



reverse if so this will be pretty clean
if (desc->index != index)
    		continue;
    	if (desc->roi_index != roi_index)
    		continue;
    	if (desc->op_type != op_type) {
    		pr_err("op_cache[op=%u] contains incorrect entry of op[%u]\n",
    				op_type, desc->op_type);
    		continue;
    	}
...

	desc_refcount[op_type][i]++;


	goto exit;


}


}

desc = &desc_cache[op_type][0];

for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) {
if (desc->index == -1) {



reverse if
    	if (desc->index != -1)
    		continue;
...

	op_base = dep_graph_addr +


			(sizeof(struct dla_common_op_desc) *


			(uint64_t)index);


	ret = dla_data_read(engine->driver_context,


			task->task_data,


			task->dependency_graph_addr,


			(void *)(desc),


			sizeof(struct dla_common_op_desc),


			op_base);


	if (ret) {


		desc = NULL;


		goto exit;


	}



	if (op_type != desc->op_type) {


		/*


		 * op_type of entry read from DRAM should not


		 * mismatch with given op_type. If they


		 * mismatches, then wrong entry is fetched, so


		 * report this issue by throwing error.


		 */


		pr_err("Fetched [op_type=%u] from DRAM doesn't match with op_type[%u]\n",


			   desc->op_type, op_type);


		desc->op_type = op_type;


		desc->index = -1;


		desc->roi_index = -1;


		desc = NULL;


		goto exit;


	}



	desc->index = index;


	desc->roi_index = roi_index;



	desc_refcount[op_type][i]++;


	goto exit;


}


}


+exit:

return desc;

+}



+static void
+dla_free_op_desc(struct dla_engine *engine, struct dla_common_op_desc *op_desc)
+{

uint64_t op_base;
uint64_t dep_graph_addr;
struct dla_task *task;

pr_debug("Enter: %s op desc index %u ROI %d\n", __func__,
		op_desc->index, op_desc->roi_index);



Possiple null pointer dereference
...

task = engine->task;
dep_graph_addr = (sizeof(struct dla_common_op_desc) *
		engine->network->num_operations *


		op_desc->roi_index);



if (op_desc->index == -1)
goto exit;



Possiple null pointer dereference
...

if (op_desc == NULL)
goto exit;



Or this is unnecessary.
...


/**
* TODO: keeping the depth value hardcoded as 0 for now,


* need to replace it once corresponding implementation is done.


*/


op_base = (dep_graph_addr +
	(sizeof(struct dla_common_op_desc) *


	(uint64_t)op_desc->index));



/**
* Flush descriptor to DRAM


*/


dla_data_write(engine->driver_context,
	task->task_data,


	(void *)op_desc,


	task->dependency_graph_addr,


	sizeof(struct dla_common_op_desc),


	op_base);



/**
* Release it


*/


op_desc->index = -1;
op_desc->roi_index = -1;

+exit:

return;

+}



+void
+dla_put_op_desc(struct dla_engine *engine, struct dla_common_op_desc *op_desc)
+{

int32_t i;
struct dla_common_op_desc *desc;

if (op_desc == NULL)
return;



if (op_desc->index == -1)
return;



desc = &desc_cache[op_desc->op_type][0];

for (i = 0; i < DLA_OP_CACHE_SIZE; i++, desc++) {
if (desc->index == op_desc->index &&


		desc->roi_index == op_desc->roi_index) {



Reverse if.
if (desc->index != op_desc->index)
    		continue;
    	if (desc->roi_index != op_desc->roi_index)
    		continue;
...


	desc_refcount[op_desc->op_type][i]--;



	/**


	 * Free desc if refcount is 0


	 */



Pretty useless comment and totally not needed three line for this.
...

	if (desc_refcount[op_desc->op_type][i] == 0)


		dla_free_op_desc(engine, op_desc);



	return;


}


}

+}



+void
+dla_init_op_cache(struct dla_engine *engine)
+{

int32_t i, j;
struct dla_common_op_desc *desc = &desc_cache[0][0];

memset((uint8_t *)&desc_cache[0][0], 0, sizeof(desc_cache));
memset((uint8_t *)&desc_refcount[0][0], 0, sizeof(desc_refcount));

for (i = 0; i < DLA_OP_NUM; i++) {
for (j = 0; j < DLA_OP_CACHE_SIZE; j++) {


	desc->index = -1;


	desc->roi_index = -1;


	desc->op_type = (uint8_t)i;


	desc++;


}


}

+}
... snip
...
diff --git a/drivers/gpu/drm/nvdla/nvdla_common.h b/drivers/gpu/drm/nvdla/nvdla_common.h
new file mode 100644
index 000000000000..38cf43246890
--- /dev/null
+++ b/drivers/gpu/drm/nvdla/nvdla_common.h
@@ -0,0 +1,835 @@
... snip
...
+struct dla_conv_op_desc {

/* Performance parameters */

/* dla_conv_mode */
uint8_t conv_mode;
uint8_t data_reuse;
uint8_t weight_reuse;
uint8_t skip_data_rls;

uint8_t skip_weight_rls;
uint8_t reserved0;
uint16_t entry_per_slice;

/* dla_data_format */
uint8_t data_format;
/* dla_pixel_mapping */
uint8_t pixel_mapping;
/* number of free slices before fetch */
uint16_t fetch_grain;

uint8_t reserved_b[8];

/* batch_num */
uint8_t batch;
/* dla_weight_format */
uint8_t weight_format;
uint8_t data_bank;
uint8_t weight_bank;

/* the offset in bytes of each data cube in a batch */
uint32_t batch_stride;

uint8_t post_extension;
uint8_t pixel_override;
/* number of slices need to be released */
uint16_t release;

/* The input cube dimension for CSC */


uint16_t input_width_csc;
uint16_t input_height_csc;

uint16_t input_channel_csc;
uint16_t kernel_width_csc;

uint16_t kernel_height_csc;
uint16_t kernel_channel_csc;

/* The input cube dimension for CMAC */
uint16_t input_width_cmac;
uint16_t input_height_cmac;

/* actual size in bytes */
uint32_t bytes_per_kernel;

/* Algorithm parameters */

int16_t mean_ry; /* mean value for red in RGB or Y in YUV */
int16_t mean_gu; /* mean value for green in RGB or U in YUV */

int16_t mean_bv; /* mean value for blue in RGB or V in YUV */
int16_t mean_ax;

uint8_t mean_format; /* dla_mean_format */
uint8_t conv_stride_x;
uint8_t conv_stride_y;
uint8_t pad_x_left;

uint8_t pad_x_right;
uint8_t pad_y_top;
uint8_t pad_y_bottom;
uint8_t dilation_x;

uint8_t dilation_y;
uint8_t reserved2[2];

/* Precision parameters */
uint8_t pra_truncate;

uint8_t in_precision;
/* The output precision from CONV, it's the MAC processing precison */

./nvdla_common.h:428: precison ==> precision
...

uint8_t out_precision;
int16_t pad_val;

/* input converter parameters */
struct dla_cvt_param in_cvt;
/* output converter parameters, support truncate only */
struct dla_cvt_param out_cvt;


+} __packed __aligned(4);



+struct dla_conv_stat_desc {

uint32_t data_read_stall;
uint32_t weight_read_stall;
uint32_t data_read_latency;
uint32_t weight_read_latency;
uint32_t saturation_count;
uint32_t nan_data_num;
uint32_t nan_weight_num;
uint32_t inf_data_num;
uint32_t inf_weight_num;

+} __packed __aligned(4);



+/**


@ingroup SDP



@name Activation functions



@brief Activation functions supported in SDP



@{


*/

+#define ACTIVATION_NONE		0
+#define ACTIVATION_RELU		1
+#define ACTIVATION_LUT		2
+#define ACTIVATION_PRELU	3
+/** @} */



+/**


@ingroup LUT



@name LUT size



@brief LUT sizes for linear and exponentila LUT



@{


*/

+#define LUT_LINEAR_EXP_TABLE_ENTRY_LOG2		6
+#define LUT_LINEAR_ONLY_TABLE_ENTRY_LOG2	8
+/** @} */



+/**


@ingroup LUT



@name LUT types



@brief DLA supports two types of LUT, linear and exonential



@{


*/

+#define LUT_LINEAR_EXP_TABLE		0
+#define LUT_LINEAR_ONLY_TABLE		1
+/** @} */



+/**


@ingroup LUT



@name LUT methods



@brief DLA supports two types of LUT, linear and exonential



@{


*/

+#define LUT_METHOD_EXPONENTIAL		0
+#define LUT_METHOD_LINEAR		1
+/** @} */



+/**


@ingroup LUT



@name LUT



@brief DLA supports two types of LUT, linear and exonential



@{


*/

+#define LUT_PRI_LINEAR_EXP		0
+#define LUT_PRI_LINEAR_ONLY		1
+/** @} */



+union dla_lut_offset {

/**
* Number should be substracted on log domain before look up



./nvdla_common.h:505: substracted ==> subtracted
...

* exponetial table it has the same definition as hardware



./nvdla_common.h:506: exponetial ==> exponential
...

* thus input scaling should also take into account when


* set this field.


*/


int8_t exp_offset;
/**
* Number of bits should be right shift before looking


* up linear table


*/


int8_t frac_bits;
uint16_t reserved0;

+};
... snip
...
diff --git a/drivers/gpu/drm/nvdla/nvdla_drm.c b/drivers/gpu/drm/nvdla/nvdla_drm.c
new file mode 100644
index 000000000000..9217eee1de3b
--- /dev/null
+++ b/drivers/gpu/drm/nvdla/nvdla_drm.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/*


Copyright (C) 2017-2018 NVIDIA CORPORATION



Copyright (C) 2022 Cai Huoqing


*/


+#include <linux/dma-buf.h>
+#include <linux/dma-mapping.h>
+#include <linux/fs.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/time.h>
+#include <linux/uaccess.h>
+#include <linux/types.h>



+#include "nvdla_drm.h"
+#include "nvdla_ioctl.h"
+#include "nvdla_engine.h"



+static struct nvdla_config nvdla_config_os_initial = {

.atom_size = 32,
.bdma_enable = true,
.rubik_enable = true,
.weight_compress_support = true,

+};



+static struct nvdla_config nvdla_config_small = {

//.atom_size = 8,
.atom_size = 32,  // nv_large config
.bdma_enable = false,
.rubik_enable = false,
.weight_compress_support = false,

+};



+int64_t dla_get_time_us(void)
Funtion is never used.
...
+{

return ktime_get_ns() / NSEC_PER_USEC;

+}



+void dla_reg_write(void *driver_context, uint32_t addr, uint32_t reg)
+{

struct nvdla_device *nvdla_dev =
	(struct nvdla_device *)driver_context;



if (!nvdla_dev)
return;



writel(reg, nvdla_dev->base + addr);

+}



+uint32_t dla_reg_read(void *driver_context, uint32_t addr)
+{

struct nvdla_device *nvdla_dev =
	(struct nvdla_device *)driver_context;



if (!nvdla_dev)
return 0;



return readl(nvdla_dev->base + addr);

+}



+static irqreturn_t nvdla_engine_isr(int32_t irq, void *data)
+{

unsigned long flags;
uint32_t mask;
uint32_t reg;
struct dla_processor *processor = NULL;
struct dla_processor_group *group;
struct dla_engine *engine;
struct nvdla_device *nvdla_dev = (struct nvdla_device *)data;

if (!nvdla_dev)
return IRQ_NONE;



engine = nvdla_dev->engine_context;
spin_lock_irqsave(&nvdla_dev->nvdla_lock, flags);

mask = glb_reg_read(engine, S_INTR_MASK);

Never used. It would be nice so that static analyzer will not complain
these anymore, but your choice what you want to do.
...

reg = glb_reg_read(engine, S_INTR_STATUS);

if (reg & MASK(GLB_S_INTR_STATUS_0, CACC_DONE_STATUS0)) {
processor = &engine->processors[DLA_OP_CONV];


group = &processor->groups[0];


group->events |= (1 << DLA_EVENT_OP_COMPLETED);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, CACC_DONE_STATUS1)) {
processor = &engine->processors[DLA_OP_CONV];


group = &processor->groups[1];


group->events |= (1 << DLA_EVENT_OP_COMPLETED);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, SDP_DONE_STATUS0)) {
processor = &engine->processors[DLA_OP_SDP];


group = &processor->groups[0];


group->events |= (1 << DLA_EVENT_OP_COMPLETED);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, SDP_DONE_STATUS1)) {
processor = &engine->processors[DLA_OP_SDP];


group = &processor->groups[1];


group->events |= (1 << DLA_EVENT_OP_COMPLETED);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, CDP_DONE_STATUS0)) {
processor = &engine->processors[DLA_OP_CDP];


group = &processor->groups[0];


group->events |= (1 << DLA_EVENT_OP_COMPLETED);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, CDP_DONE_STATUS1)) {
processor = &engine->processors[DLA_OP_CDP];


group = &processor->groups[1];


group->events |= (1 << DLA_EVENT_OP_COMPLETED);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, RUBIK_DONE_STATUS0)) {
processor = &engine->processors[DLA_OP_RUBIK];


group = &processor->groups[0];


group->events |= (1 << DLA_EVENT_OP_COMPLETED);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, RUBIK_DONE_STATUS1)) {
processor = &engine->processors[DLA_OP_RUBIK];


group = &processor->groups[1];


group->events |= (1 << DLA_EVENT_OP_COMPLETED);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, PDP_DONE_STATUS0)) {
processor = &engine->processors[DLA_OP_PDP];


group = &processor->groups[0];


group->events |= (1 << DLA_EVENT_OP_COMPLETED);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, PDP_DONE_STATUS1)) {
processor = &engine->processors[DLA_OP_PDP];


group = &processor->groups[1];


group->events |= (1 << DLA_EVENT_OP_COMPLETED);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, BDMA_DONE_STATUS0)) {
processor = &engine->processors[DLA_OP_BDMA];


group = &processor->groups[0];


group->events |= (1 << DLA_EVENT_OP_COMPLETED);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, BDMA_DONE_STATUS1)) {
processor = &engine->processors[DLA_OP_BDMA];


group = &processor->groups[1];


group->events |= (1 << DLA_EVENT_OP_COMPLETED);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_DAT_DONE_STATUS0)) {
processor = &engine->processors[DLA_OP_CONV];


group = &processor->groups[0];


group->events |= (1 << DLA_EVENT_CDMA_DT_DONE);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_DAT_DONE_STATUS1)) {
processor = &engine->processors[DLA_OP_CONV];


group = &processor->groups[1];


group->events |= (1 << DLA_EVENT_CDMA_DT_DONE);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_WT_DONE_STATUS0)) {
processor = &engine->processors[DLA_OP_CONV];


group = &processor->groups[0];


group->events |= (1 << DLA_EVENT_CDMA_WT_DONE);


}
if (reg & MASK(GLB_S_INTR_STATUS_0, CDMA_WT_DONE_STATUS1)) {
processor = &engine->processors[DLA_OP_CONV];


group = &processor->groups[1];


group->events |= (1 << DLA_EVENT_CDMA_WT_DONE);


}

glb_reg_write(engine, S_INTR_STATUS, reg);
mask = glb_reg_read(engine, S_INTR_MASK);

Never used
...

reg = glb_reg_read(engine, S_INTR_STATUS);

Never used.
...


complete(&nvdla_dev->event_notifier);
spin_unlock_irqrestore(&nvdla_dev->nvdla_lock, flags);

return IRQ_HANDLED;

+}
... snip
...
diff --git a/drivers/gpu/drm/nvdla/nvdla_gem.c b/drivers/gpu/drm/nvdla/nvdla_gem.c
new file mode 100644
index 000000000000..cccf6d01a564
--- /dev/null
+++ b/drivers/gpu/drm/nvdla/nvdla_gem.c
... snip
...
+static const struct drm_ioctl_desc nvdla_drm_ioctls[] = {

DRM_IOCTL_DEF_DRV(NVDLA_SUBMIT, nvdla_submit, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(NVDLA_GEM_CREATE, nvdla_gem_create, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(NVDLA_GEM_MMAP, nvdla_gem_map_offset, DRM_RENDER_ALLOW),
/* use DRM_IOCTL_MODE_DESTROY_DUMB to destory */

./nvdla_gem.c:347: destory ==> destroy
...
+};
... snip
...
diff --git a/drivers/gpu/drm/nvdla/nvdla_scheduler.c b/drivers/gpu/drm/nvdla/nvdla_scheduler.c
new file mode 100644
index 000000000000..b814077478c6
--- /dev/null
+++ b/drivers/gpu/drm/nvdla/nvdla_scheduler.c
... snip
...
+static int
+dla_update_dependency(struct dla_engine *engine,

			  struct dla_consumer *consumer,


			  struct dla_common_op_desc *op_desc,


			  uint8_t event, uint8_t roi_index)



+{

int32_t ret = 0;
struct dla_processor *processor;

if (consumer->index == -1)
goto exit;



/* Update dependency only if event matches */
if (event != consumer->event)
goto exit;



/**
* If consumer index is valid but op desc is NULL means


* op desc for consumer was not pre-fetched


*/


if (op_desc == NULL) {
ret = -EINVAL;


pr_err("Operation descriptor is NULL, consumer index %d",


		consumer->index);


goto exit;


}

pr_debug("Update dependency operation index %d ROI %d DEP_COUNT=%d\n",
			op_desc->index, op_desc->roi_index,


			op_desc->dependency_count);


op_desc->dependency_count--;

if (op_desc->dependency_count == 0) {
processor = &engine->processors[op_desc->op_type];


pr_debug("enable %s in %s as depdency are resolved\n",



./nvdla_scheduler.c:455: depdency ==> dependency
...

	processor->name, __func__);



ret = dla_enable_operation(engine, processor, op_desc);


if (ret)


	goto exit;


}

+exit:

return ret;

+}
... snip
...
+int
+dla_process_events(struct dla_engine *engine, uint32_t *task_complete)
+{

int32_t i;
int32_t ret = 0;

for (i = 0; i < DLA_OP_NUM; i++) {
struct dla_processor *processor;



processor = &engine->processors[i];


ret = dla_handle_events(engine, processor);


/**


 * Incase engine status is non-zero, then don't



./nvdla_scheduler.c:905: Incase ==> In case
...

 * update the engine status. We should keep its


 * status for later cleaning of engine.


 */


if (!engine->status)


	engine->status = ret;


}

if (engine->network->num_operations == engine->num_proc_hwl)
*task_complete = 1;



return ret;

+}
... snip
Argillander

    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

[Linaro-mm-sig] Re: [PATCH 2/2] drm/nvdla: Add driver support for NVDLA