Hi
Am 25.02.25 um 08:55 schrieb Tomeu Vizoso:
Using the DRM GPU scheduler infrastructure, with a scheduler for each core.
Userspace can decide for a series of tasks to be executed sequentially in the same core, so SRAM locality can be taken advantage of.
The job submission code was initially based on Panfrost.
v2:
- Remove hardcoded number of cores
- Misc. style fixes (Jeffrey Hugo)
- Repack IOCTL struct (Jeffrey Hugo)
Signed-off-by: Tomeu Vizoso tomeu@tomeuvizoso.net
drivers/accel/rocket/Makefile | 3 +- drivers/accel/rocket/rocket_core.c | 6 + drivers/accel/rocket/rocket_core.h | 14 + drivers/accel/rocket/rocket_device.c | 2 + drivers/accel/rocket/rocket_device.h | 2 + drivers/accel/rocket/rocket_drv.c | 15 + drivers/accel/rocket/rocket_drv.h | 4 + drivers/accel/rocket/rocket_job.c | 710 +++++++++++++++++++++++++++++++++++ drivers/accel/rocket/rocket_job.h | 50 +++ include/uapi/drm/rocket_accel.h | 55 +++ 10 files changed, 860 insertions(+), 1 deletion(-)
diff --git a/drivers/accel/rocket/Makefile b/drivers/accel/rocket/Makefile index 875cac2243d902694e0d5d05e60b4ae551a633c4..4d59036af8d9c213d3cac0559eb66e3ebb0320e7 100644 --- a/drivers/accel/rocket/Makefile +++ b/drivers/accel/rocket/Makefile @@ -6,4 +6,5 @@ rocket-y := \ rocket_core.o \ rocket_device.o \ rocket_drv.o \
- rocket_gem.o
- rocket_gem.o \
- rocket_job.o
diff --git a/drivers/accel/rocket/rocket_core.c b/drivers/accel/rocket/rocket_core.c index 09d966c826b5b1090a18cb24b3aa4aba286a12d4..2b522592693874eed90463e8f85653d5282ae5b8 100644 --- a/drivers/accel/rocket/rocket_core.c +++ b/drivers/accel/rocket/rocket_core.c @@ -6,6 +6,7 @@ #include <linux/pm_runtime.h> #include "rocket_core.h" +#include "rocket_job.h" #include "rocket_registers.h" static int rocket_clk_init(struct rocket_core *core) @@ -48,6 +49,10 @@ int rocket_core_init(struct rocket_core *core) if (IS_ERR(core->iomem)) return PTR_ERR(core->iomem);
- err = rocket_job_init(core);
- if (err)
return err;
- pm_runtime_use_autosuspend(dev); pm_runtime_set_autosuspend_delay(dev, 50); /* ~3 frames */ pm_runtime_enable(dev);
@@ -68,4 +73,5 @@ int rocket_core_init(struct rocket_core *core) void rocket_core_fini(struct rocket_core *core) { pm_runtime_disable(core->dev);
- rocket_job_fini(core); }
diff --git a/drivers/accel/rocket/rocket_core.h b/drivers/accel/rocket/rocket_core.h index 2171eba7139ccc63fe24802dc81b4adb7f3abf31..045a46a2010a2ffd6122ed86c379e5fabc70365a 100644 --- a/drivers/accel/rocket/rocket_core.h +++ b/drivers/accel/rocket/rocket_core.h @@ -21,6 +21,20 @@ struct rocket_core { void __iomem *iomem; struct clk *a_clk; struct clk *h_clk;
- struct rocket_job *in_flight_job;
- spinlock_t job_lock;
- struct {
struct workqueue_struct *wq;
struct work_struct work;
atomic_t pending;
- } reset;
- struct drm_gpu_scheduler sched;
- u64 fence_context;
- u64 emit_seqno; };
int rocket_core_init(struct rocket_core *core); diff --git a/drivers/accel/rocket/rocket_device.c b/drivers/accel/rocket/rocket_device.c index 9af36357caba7148dcac764c8222699f3b572d60..62c640e1e0200fe25b6834e45d71f6de139ff3ab 100644 --- a/drivers/accel/rocket/rocket_device.c +++ b/drivers/accel/rocket/rocket_device.c @@ -12,6 +12,7 @@ int rocket_device_init(struct rocket_device *rdev) int err; mutex_init(&rdev->iommu_lock);
- mutex_init(&rdev->sched_lock);
rdev->clk_npu = devm_clk_get(dev, "npu"); rdev->pclk = devm_clk_get(dev, "pclk"); @@ -29,5 +30,6 @@ int rocket_device_init(struct rocket_device *rdev) void rocket_device_fini(struct rocket_device *rdev) { rocket_core_fini(&rdev->cores[0]);
- mutex_destroy(&rdev->sched_lock); mutex_destroy(&rdev->iommu_lock); }
diff --git a/drivers/accel/rocket/rocket_device.h b/drivers/accel/rocket/rocket_device.h index c6152569fdd9e5587c8e8d7b0d7c2e2a77af6000..4168ae8da2d38c2ea114b37c6e053b02611a0232 100644 --- a/drivers/accel/rocket/rocket_device.h +++ b/drivers/accel/rocket/rocket_device.h @@ -11,6 +11,8 @@ struct rocket_device { struct drm_device ddev;
- struct mutex sched_lock;
- struct clk *clk_npu; struct clk *pclk;
diff --git a/drivers/accel/rocket/rocket_drv.c b/drivers/accel/rocket/rocket_drv.c index e5612b52952fa7a0cd0af02aef314984bc483b05..a6b486e2d4f648d7b1d8831590b633bf661c7bc4 100644 --- a/drivers/accel/rocket/rocket_drv.c +++ b/drivers/accel/rocket/rocket_drv.c @@ -16,12 +16,14 @@ #include "rocket_drv.h" #include "rocket_gem.h" +#include "rocket_job.h" static int rocket_open(struct drm_device *dev, struct drm_file *file) { struct rocket_device *rdev = to_rocket_device(dev); struct rocket_file_priv *rocket_priv;
- int ret;
rocket_priv = kzalloc(sizeof(*rocket_priv), GFP_KERNEL); if (!rocket_priv) @@ -30,7 +32,15 @@ rocket_open(struct drm_device *dev, struct drm_file *file) rocket_priv->rdev = rdev; file->driver_priv = rocket_priv;
- ret = rocket_job_open(rocket_priv);
- if (ret)
goto err_free;
- return 0;
+err_free:
- kfree(rocket_priv);
- return ret; }
static void @@ -38,6 +48,7 @@ rocket_postclose(struct drm_device *dev, struct drm_file *file) { struct rocket_file_priv *rocket_priv = file->driver_priv;
- rocket_job_close(rocket_priv); kfree(rocket_priv); }
@@ -46,6 +57,7 @@ static const struct drm_ioctl_desc rocket_drm_driver_ioctls[] = { DRM_IOCTL_DEF_DRV(ROCKET_##n, rocket_ioctl_##func, 0) ROCKET_IOCTL(CREATE_BO, create_bo),
- ROCKET_IOCTL(SUBMIT, submit), };
DEFINE_DRM_ACCEL_FOPS(rocket_accel_driver_fops); @@ -245,6 +257,9 @@ static int rocket_device_runtime_suspend(struct device *dev) if (dev != rdev->cores[core].dev) continue;
if (!rocket_job_is_idle(&rdev->cores[core]))
return -EBUSY;
- clk_disable_unprepare(rdev->cores[core].a_clk); clk_disable_unprepare(rdev->cores[core].h_clk);
diff --git a/drivers/accel/rocket/rocket_drv.h b/drivers/accel/rocket/rocket_drv.h index ccdd50c69d4c033eea18cb800407fdcfb3bf2e9b..54e21a61006057aee293496016e54b495a2f6d55 100644 --- a/drivers/accel/rocket/rocket_drv.h +++ b/drivers/accel/rocket/rocket_drv.h @@ -4,10 +4,14 @@ #ifndef __ROCKET_DRV_H__ #define __ROCKET_DRV_H__ +#include <drm/gpu_scheduler.h>
- #include "rocket_device.h"
struct rocket_file_priv { struct rocket_device *rdev;
- struct drm_sched_entity sched_entity; };
#endif diff --git a/drivers/accel/rocket/rocket_job.c b/drivers/accel/rocket/rocket_job.c new file mode 100644 index 0000000000000000000000000000000000000000..25b31f28e932aaee86173b9a0962932c9c640c03 --- /dev/null +++ b/drivers/accel/rocket/rocket_job.c @@ -0,0 +1,710 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2019 Linaro, Ltd, Rob Herring robh@kernel.org */ +/* Copyright 2019 Collabora ltd. */ +/* Copyright 2024 Tomeu Vizoso tomeu@tomeuvizoso.net */
+#include <drm/drm_file.h> +#include <drm/drm_gem.h> +#include <drm/rocket_accel.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h>
+#include "rocket_core.h" +#include "rocket_device.h" +#include "rocket_drv.h" +#include "rocket_job.h" +#include "rocket_registers.h"
+#define JOB_TIMEOUT_MS 500
+#define job_write(dev, reg, data) writel(data, dev->iomem + (reg)) +#define job_read(dev, reg) readl(dev->iomem + (reg))
+static struct rocket_job * +to_rocket_job(struct drm_sched_job *sched_job) +{
- return container_of(sched_job, struct rocket_job, base);
+}
+struct rocket_fence {
- struct dma_fence base;
- struct drm_device *dev;
- /* rocket seqno for signaled() test */
- u64 seqno;
- int queue;
+};
+static inline struct rocket_fence * +to_rocket_fence(struct dma_fence *fence) +{
- return (struct rocket_fence *)fence;
Proper upcast with container_of() please.
+}
+static const char *rocket_fence_get_driver_name(struct dma_fence *fence) +{
- return "rocket";
+}
+static const char *rocket_fence_get_timeline_name(struct dma_fence *fence) +{
- return "rockchip-npu";
+}
+static const struct dma_fence_ops rocket_fence_ops = {
- .get_driver_name = rocket_fence_get_driver_name,
- .get_timeline_name = rocket_fence_get_timeline_name,
+};
+static struct dma_fence *rocket_fence_create(struct rocket_core *core) +{
- struct rocket_device *rdev = core->rdev;
- struct rocket_fence *fence;
- fence = kzalloc(sizeof(*fence), GFP_KERNEL);
- if (!fence)
return ERR_PTR(-ENOMEM);
- fence->dev = &rdev->ddev;
- fence->seqno = ++core->emit_seqno;
- dma_fence_init(&fence->base, &rocket_fence_ops, &core->job_lock,
core->fence_context, fence->seqno);
- return &fence->base;
+}
+static int +rocket_copy_tasks(struct drm_device *dev,
struct drm_file *file_priv,
struct drm_rocket_job *job,
struct rocket_job *rjob)
+{
- struct drm_rocket_task *tasks;
- int ret = 0;
- int i;
- rjob->task_count = job->task_count;
- if (!rjob->task_count)
return 0;
- tasks = kvmalloc_array(rjob->task_count, sizeof(*tasks), GFP_KERNEL);
- if (!tasks) {
ret = -ENOMEM;
DRM_DEBUG("Failed to allocate incoming tasks\n");
These logging macros are deprecated. Please use drm_dbg() and similar functions instead. Here and everywhere else.
goto fail;
- }
- if (copy_from_user(tasks,
(void __user *)(uintptr_t)job->tasks,
rjob->task_count * sizeof(*tasks))) {
ret = -EFAULT;
DRM_DEBUG("Failed to copy incoming tasks\n");
goto fail;
- }
- rjob->tasks = kvmalloc_array(job->task_count, sizeof(*rjob->tasks), GFP_KERNEL);
- if (!rjob->tasks) {
DRM_DEBUG("Failed to allocate task array\n");
ret = -ENOMEM;
goto fail;
- }
- for (i = 0; i < rjob->task_count; i++) {
if (tasks[i].regcmd_count == 0) {
ret = -EINVAL;
goto fail;
}
rjob->tasks[i].regcmd = tasks[i].regcmd;
rjob->tasks[i].regcmd_count = tasks[i].regcmd_count;
- }
+fail:
- kvfree(tasks);
- return ret;
+}
+static void rocket_job_hw_submit(struct rocket_core *core, struct rocket_job *job) +{
- struct rocket_task *task;
- bool task_pp_en = 1;
- bool task_count = 1;
- /* GO ! */
- /* Don't queue the job if a reset is in progress */
- if (!atomic_read(&core->reset.pending)) {
task = &job->tasks[job->next_task_idx];
job->next_task_idx++; /* TODO: Do this only after a successful run? */
rocket_write(core, REG_PC_BASE_ADDRESS, 0x1);
Just 'write' seems a little imprecise. Maybe 'writel' would be better.
Best regards Thomas
rocket_write(core, REG_CNA_S_POINTER, 0xe + 0x10000000 * core->index);
rocket_write(core, REG_CORE_S_POINTER, 0xe + 0x10000000 * core->index);
rocket_write(core, REG_PC_BASE_ADDRESS, task->regcmd);
rocket_write(core, REG_PC_REGISTER_AMOUNTS, (task->regcmd_count + 1) / 2 - 1);
rocket_write(core, REG_PC_INTERRUPT_MASK,
PC_INTERRUPT_MASK_DPU_0 | PC_INTERRUPT_MASK_DPU_1);
rocket_write(core, REG_PC_INTERRUPT_CLEAR,
PC_INTERRUPT_CLEAR_DPU_0 | PC_INTERRUPT_CLEAR_DPU_1);
rocket_write(core, REG_PC_TASK_CON, ((0x6 | task_pp_en) << 12) | task_count);
rocket_write(core, REG_PC_TASK_DMA_BASE_ADDR, 0x0);
rocket_write(core, REG_PC_OPERATION_ENABLE, 0x1);
dev_dbg(core->dev,
"Submitted regcmd at 0x%llx to core %d",
task->regcmd, core->index);
- }
+}
+static int rocket_acquire_object_fences(struct drm_gem_object **bos,
int bo_count,
struct drm_sched_job *job,
bool is_write)
+{
- int i, ret;
- for (i = 0; i < bo_count; i++) {
ret = dma_resv_reserve_fences(bos[i]->resv, 1);
if (ret)
return ret;
ret = drm_sched_job_add_implicit_dependencies(job, bos[i],
is_write);
if (ret)
return ret;
- }
- return 0;
+}
+static void rocket_attach_object_fences(struct drm_gem_object **bos,
int bo_count,
struct dma_fence *fence)
+{
- int i;
- for (i = 0; i < bo_count; i++)
dma_resv_add_fence(bos[i]->resv, fence, DMA_RESV_USAGE_WRITE);
+}
+static int rocket_job_push(struct rocket_job *job) +{
- struct rocket_device *rdev = job->rdev;
- struct drm_gem_object **bos;
- struct ww_acquire_ctx acquire_ctx;
- int ret = 0;
- bos = kvmalloc_array(job->in_bo_count + job->out_bo_count, sizeof(void *),
GFP_KERNEL);
- memcpy(bos, job->in_bos, job->in_bo_count * sizeof(void *));
- memcpy(&bos[job->in_bo_count], job->out_bos, job->out_bo_count * sizeof(void *));
- ret = drm_gem_lock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx);
- if (ret)
goto err;
- mutex_lock(&rdev->sched_lock);
- drm_sched_job_arm(&job->base);
- job->inference_done_fence = dma_fence_get(&job->base.s_fence->finished);
- ret = rocket_acquire_object_fences(job->in_bos, job->in_bo_count, &job->base, false);
- if (ret) {
mutex_unlock(&rdev->sched_lock);
goto err_unlock;
- }
- ret = rocket_acquire_object_fences(job->out_bos, job->out_bo_count, &job->base, true);
- if (ret) {
mutex_unlock(&rdev->sched_lock);
goto err_unlock;
- }
- kref_get(&job->refcount); /* put by scheduler job completion */
- drm_sched_entity_push_job(&job->base);
- mutex_unlock(&rdev->sched_lock);
- rocket_attach_object_fences(job->out_bos, job->out_bo_count, job->inference_done_fence);
+err_unlock:
- drm_gem_unlock_reservations(bos, job->in_bo_count + job->out_bo_count, &acquire_ctx);
+err:
- kfree(bos);
- return ret;
+}
+static void rocket_job_cleanup(struct kref *ref) +{
- struct rocket_job *job = container_of(ref, struct rocket_job,
refcount);
- unsigned int i;
- dma_fence_put(job->done_fence);
- dma_fence_put(job->inference_done_fence);
- if (job->in_bos) {
for (i = 0; i < job->in_bo_count; i++)
drm_gem_object_put(job->in_bos[i]);
kvfree(job->in_bos);
- }
- if (job->out_bos) {
for (i = 0; i < job->out_bo_count; i++)
drm_gem_object_put(job->out_bos[i]);
kvfree(job->out_bos);
- }
- kfree(job->tasks);
- kfree(job);
+}
+static void rocket_job_put(struct rocket_job *job) +{
- kref_put(&job->refcount, rocket_job_cleanup);
+}
+static void rocket_job_free(struct drm_sched_job *sched_job) +{
- struct rocket_job *job = to_rocket_job(sched_job);
- drm_sched_job_cleanup(sched_job);
- rocket_job_put(job);
+}
+static struct rocket_core *sched_to_core(struct rocket_device *rdev,
struct drm_gpu_scheduler *sched)
+{
- unsigned int core;
- for (core = 0; core < rdev->num_cores; core++) {
if (&rdev->cores[core].sched == sched)
return &rdev->cores[core];
- }
- return NULL;
+}
+static struct dma_fence *rocket_job_run(struct drm_sched_job *sched_job) +{
- struct rocket_job *job = to_rocket_job(sched_job);
- struct rocket_device *rdev = job->rdev;
- struct rocket_core *core = sched_to_core(rdev, sched_job->sched);
- struct dma_fence *fence = NULL;
- int ret;
- if (unlikely(job->base.s_fence->finished.error))
return NULL;
- /*
* Nothing to execute: can happen if the job has finished while
* we were resetting the GPU.
*/
- if (job->next_task_idx == job->task_count)
return NULL;
- fence = rocket_fence_create(core);
- if (IS_ERR(fence))
return fence;
- if (job->done_fence)
dma_fence_put(job->done_fence);
- job->done_fence = dma_fence_get(fence);
- ret = pm_runtime_get_sync(core->dev);
- if (ret < 0)
return fence;
- spin_lock(&core->job_lock);
- core->in_flight_job = job;
- rocket_job_hw_submit(core, job);
- spin_unlock(&core->job_lock);
- return fence;
+}
+static void rocket_job_handle_done(struct rocket_core *core,
struct rocket_job *job)
+{
- if (job->next_task_idx < job->task_count) {
rocket_job_hw_submit(core, job);
return;
- }
- core->in_flight_job = NULL;
- dma_fence_signal_locked(job->done_fence);
- pm_runtime_put_autosuspend(core->dev);
+}
+static void rocket_job_handle_irq(struct rocket_core *core) +{
- uint32_t status, raw_status;
- pm_runtime_mark_last_busy(core->dev);
- status = rocket_read(core, REG_PC_INTERRUPT_STATUS);
- raw_status = rocket_read(core, REG_PC_INTERRUPT_RAW_STATUS);
- rocket_write(core, REG_PC_OPERATION_ENABLE, 0x0);
- rocket_write(core, REG_PC_INTERRUPT_CLEAR, 0x1ffff);
- spin_lock(&core->job_lock);
- if (core->in_flight_job)
rocket_job_handle_done(core, core->in_flight_job);
- spin_unlock(&core->job_lock);
+}
+static void +rocket_reset(struct rocket_core *core, struct drm_sched_job *bad) +{
- bool cookie;
- if (!atomic_read(&core->reset.pending))
return;
- /*
* Stop the scheduler.
*
* FIXME: We temporarily get out of the dma_fence_signalling section
* because the cleanup path generate lockdep splats when taking locks
* to release job resources. We should rework the code to follow this
* pattern:
*
* try_lock
* if (locked)
* release
* else
* schedule_work_to_release_later
*/
- drm_sched_stop(&core->sched, bad);
- cookie = dma_fence_begin_signalling();
- if (bad)
drm_sched_increase_karma(bad);
- /*
* Mask job interrupts and synchronize to make sure we won't be
* interrupted during our reset.
*/
- rocket_write(core, REG_PC_INTERRUPT_MASK, 0x0);
- synchronize_irq(core->irq);
- /* Handle the remaining interrupts before we reset. */
- rocket_job_handle_irq(core);
- /*
* Remaining interrupts have been handled, but we might still have
* stuck jobs. Let's make sure the PM counters stay balanced by
* manually calling pm_runtime_put_noidle() and
* rocket_devfreq_record_idle() for each stuck job.
* Let's also make sure the cycle counting register's refcnt is
* kept balanced to prevent it from running forever
*/
- spin_lock(&core->job_lock);
- if (core->in_flight_job)
pm_runtime_put_noidle(core->dev);
- core->in_flight_job = NULL;
- spin_unlock(&core->job_lock);
- /* Proceed with reset now. */
- pm_runtime_force_suspend(core->dev);
- pm_runtime_force_resume(core->dev);
- /* GPU has been reset, we can clear the reset pending bit. */
- atomic_set(&core->reset.pending, 0);
- /*
* Now resubmit jobs that were previously queued but didn't have a
* chance to finish.
* FIXME: We temporarily get out of the DMA fence signalling section
* while resubmitting jobs because the job submission logic will
* allocate memory with the GFP_KERNEL flag which can trigger memory
* reclaim and exposes a lock ordering issue.
*/
- dma_fence_end_signalling(cookie);
- drm_sched_resubmit_jobs(&core->sched);
- cookie = dma_fence_begin_signalling();
- /* Restart the scheduler */
- drm_sched_start(&core->sched, 0);
- dma_fence_end_signalling(cookie);
+}
+static enum drm_gpu_sched_stat rocket_job_timedout(struct drm_sched_job *sched_job) +{
- struct rocket_job *job = to_rocket_job(sched_job);
- struct rocket_device *rdev = job->rdev;
- struct rocket_core *core = sched_to_core(rdev, sched_job->sched);
- /*
* If the GPU managed to complete this jobs fence, the timeout is
* spurious. Bail out.
*/
- if (dma_fence_is_signaled(job->done_fence))
return DRM_GPU_SCHED_STAT_NOMINAL;
- /*
* Rocket IRQ handler may take a long time to process an interrupt
* if there is another IRQ handler hogging the processing.
* For example, the HDMI encoder driver might be stuck in the IRQ
* handler for a significant time in a case of bad cable connection.
* In order to catch such cases and not report spurious rocket
* job timeouts, synchronize the IRQ handler and re-check the fence
* status.
*/
- synchronize_irq(core->irq);
- if (dma_fence_is_signaled(job->done_fence)) {
dev_warn(core->dev, "unexpectedly high interrupt latency\n");
return DRM_GPU_SCHED_STAT_NOMINAL;
- }
- dev_err(core->dev, "gpu sched timeout");
- atomic_set(&core->reset.pending, 1);
- rocket_reset(core, sched_job);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+static void rocket_reset_work(struct work_struct *work) +{
- struct rocket_core *core;
- core = container_of(work, struct rocket_core, reset.work);
- rocket_reset(core, NULL);
+}
+static const struct drm_sched_backend_ops rocket_sched_ops = {
- .run_job = rocket_job_run,
- .timedout_job = rocket_job_timedout,
- .free_job = rocket_job_free
+};
+static irqreturn_t rocket_job_irq_handler_thread(int irq, void *data) +{
- struct rocket_core *core = data;
- rocket_job_handle_irq(core);
- return IRQ_HANDLED;
+}
+static irqreturn_t rocket_job_irq_handler(int irq, void *data) +{
- struct rocket_core *core = data;
- uint32_t raw_status = rocket_read(core, REG_PC_INTERRUPT_RAW_STATUS);
- WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_READ_ERROR);
- WARN_ON(raw_status & PC_INTERRUPT_RAW_STATUS_DMA_READ_ERROR);
- if (!(raw_status & PC_INTERRUPT_RAW_STATUS_DPU_0 ||
raw_status & PC_INTERRUPT_RAW_STATUS_DPU_1))
return IRQ_NONE;
- rocket_write(core, REG_PC_INTERRUPT_MASK, 0x0);
- return IRQ_WAKE_THREAD;
+}
+int rocket_job_init(struct rocket_core *core) +{
- int ret;
- INIT_WORK(&core->reset.work, rocket_reset_work);
- spin_lock_init(&core->job_lock);
- core->irq = platform_get_irq(to_platform_device(core->dev), 0);
- if (core->irq < 0)
return core->irq;
- ret = devm_request_threaded_irq(core->dev, core->irq,
rocket_job_irq_handler,
rocket_job_irq_handler_thread,
IRQF_SHARED, KBUILD_MODNAME "-job",
core);
- if (ret) {
dev_err(core->dev, "failed to request job irq");
return ret;
- }
- core->reset.wq = alloc_ordered_workqueue("rocket-reset-%d", 0, core->index);
- if (!core->reset.wq)
return -ENOMEM;
- core->fence_context = dma_fence_context_alloc(1);
- ret = drm_sched_init(&core->sched,
&rocket_sched_ops, NULL,
DRM_SCHED_PRIORITY_COUNT,
1, 0,
msecs_to_jiffies(JOB_TIMEOUT_MS),
core->reset.wq,
NULL, "rocket", core->dev);
- if (ret) {
dev_err(core->dev, "Failed to create scheduler: %d.", ret);
goto err_sched;
- }
- return 0;
+err_sched:
- drm_sched_fini(&core->sched);
- destroy_workqueue(core->reset.wq);
- return ret;
+}
+void rocket_job_fini(struct rocket_core *core) +{
- drm_sched_fini(&core->sched);
- cancel_work_sync(&core->reset.work);
- destroy_workqueue(core->reset.wq);
+}
+int rocket_job_open(struct rocket_file_priv *rocket_priv) +{
- struct rocket_device *rdev = rocket_priv->rdev;
- struct drm_gpu_scheduler **scheds = kmalloc_array(rdev->num_cores, sizeof(scheds),
GFP_KERNEL);
- unsigned int core;
- int ret;
- for (core = 0; core < rdev->num_cores; core++)
scheds[core] = &rdev->cores[core].sched;
- ret = drm_sched_entity_init(&rocket_priv->sched_entity,
DRM_SCHED_PRIORITY_NORMAL,
scheds,
rdev->num_cores, NULL);
- if (WARN_ON(ret))
return ret;
- return 0;
+}
+void rocket_job_close(struct rocket_file_priv *rocket_priv) +{
- struct drm_sched_entity *entity = &rocket_priv->sched_entity;
- kfree(entity->sched_list);
- drm_sched_entity_destroy(entity);
+}
+int rocket_job_is_idle(struct rocket_core *core) +{
- /* If there are any jobs in this HW queue, we're not idle */
- if (atomic_read(&core->sched.credit_count))
return false;
- return true;
+}
+static int rocket_ioctl_submit_job(struct drm_device *dev, struct drm_file *file,
struct drm_rocket_job *job)
+{
- struct rocket_device *rdev = to_rocket_device(dev);
- struct rocket_file_priv *file_priv = file->driver_priv;
- struct rocket_job *rjob = NULL;
- int ret = 0;
- if (job->task_count == 0)
return -EINVAL;
- rjob = kzalloc(sizeof(*rjob), GFP_KERNEL);
- if (!rjob)
return -ENOMEM;
- kref_init(&rjob->refcount);
- rjob->rdev = rdev;
- ret = drm_sched_job_init(&rjob->base,
&file_priv->sched_entity,
1, NULL);
- if (ret)
goto out_put_job;
- ret = rocket_copy_tasks(dev, file, job, rjob);
- if (ret)
goto out_cleanup_job;
- ret = drm_gem_objects_lookup(file,
(void __user *)(uintptr_t)job->in_bo_handles,
job->in_bo_handle_count, &rjob->in_bos);
- if (ret)
goto out_cleanup_job;
- rjob->in_bo_count = job->in_bo_handle_count;
- ret = drm_gem_objects_lookup(file,
(void __user *)(uintptr_t)job->out_bo_handles,
job->out_bo_handle_count, &rjob->out_bos);
- if (ret)
goto out_cleanup_job;
- rjob->out_bo_count = job->out_bo_handle_count;
- ret = rocket_job_push(rjob);
- if (ret)
goto out_cleanup_job;
+out_cleanup_job:
- if (ret)
drm_sched_job_cleanup(&rjob->base);
+out_put_job:
- rocket_job_put(rjob);
- return ret;
+}
+int rocket_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file) +{
- struct drm_rocket_submit *args = data;
- struct drm_rocket_job *jobs;
- int ret = 0;
- unsigned int i = 0;
- jobs = kvmalloc_array(args->job_count, sizeof(*jobs), GFP_KERNEL);
- if (!jobs) {
DRM_DEBUG("Failed to allocate incoming job array\n");
return -ENOMEM;
- }
- if (copy_from_user(jobs,
(void __user *)(uintptr_t)args->jobs,
args->job_count * sizeof(*jobs))) {
ret = -EFAULT;
DRM_DEBUG("Failed to copy incoming job array\n");
goto exit;
- }
- for (i = 0; i < args->job_count; i++)
rocket_ioctl_submit_job(dev, file, &jobs[i]);
+exit:
- kfree(jobs);
- return ret;
+} diff --git a/drivers/accel/rocket/rocket_job.h b/drivers/accel/rocket/rocket_job.h new file mode 100644 index 0000000000000000000000000000000000000000..93fa1f988c72adb7a405acbf08c1c9b87d22f9c5 --- /dev/null +++ b/drivers/accel/rocket/rocket_job.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2024 Tomeu Vizoso tomeu@tomeuvizoso.net */
+#ifndef __ROCKET_JOB_H__ +#define __ROCKET_JOB_H__
+#include <drm/drm_drv.h> +#include <drm/gpu_scheduler.h>
+#include "rocket_core.h" +#include "rocket_drv.h"
+struct rocket_task {
- u64 regcmd;
- u32 regcmd_count;
+};
+struct rocket_job {
- struct drm_sched_job base;
- struct rocket_device *rdev;
- struct drm_gem_object **in_bos;
- struct drm_gem_object **out_bos;
- u32 in_bo_count;
- u32 out_bo_count;
- struct rocket_task *tasks;
- u32 task_count;
- u32 next_task_idx;
- /* Fence to be signaled by drm-sched once its done with the job */
- struct dma_fence *inference_done_fence;
- /* Fence to be signaled by IRQ handler when the job is complete. */
- struct dma_fence *done_fence;
- struct kref refcount;
+};
+int rocket_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file);
+int rocket_job_init(struct rocket_core *core); +void rocket_job_fini(struct rocket_core *core); +int rocket_job_open(struct rocket_file_priv *rocket_priv); +void rocket_job_close(struct rocket_file_priv *rocket_priv); +int rocket_job_is_idle(struct rocket_core *core);
+#endif diff --git a/include/uapi/drm/rocket_accel.h b/include/uapi/drm/rocket_accel.h index 8338726a83c31b954608ca505cf78bcd70d3494b..eb886351134ebef62969b1e1182ccc174f88fe9d 100644 --- a/include/uapi/drm/rocket_accel.h +++ b/include/uapi/drm/rocket_accel.h @@ -12,8 +12,10 @@ extern "C" { #endif #define DRM_ROCKET_CREATE_BO 0x00 +#define DRM_ROCKET_SUBMIT 0x01 #define DRM_IOCTL_ROCKET_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_ROCKET_CREATE_BO, struct drm_rocket_create_bo) +#define DRM_IOCTL_ROCKET_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_ROCKET_SUBMIT, struct drm_rocket_submit) /**
- struct drm_rocket_create_bo - ioctl argument for creating Rocket BOs.
@@ -36,6 +38,59 @@ struct drm_rocket_create_bo { __u64 offset; }; +/**
- struct drm_rocket_task - A task to be run on the NPU
- A task is the smallest unit of work that can be run on the NPU.
- */
+struct drm_rocket_task {
- /** DMA address to NPU mapping of register command buffer */
- __u64 regcmd;
- /** Number of commands in the register command buffer */
- __u32 regcmd_count;
+};
+/**
- struct drm_rocket_job - A job to be run on the NPU
- The kernel will schedule the execution of this job taking into account its
- dependencies with other jobs. All tasks in the same job will be executed
- sequentially on the same core, to benefit from memory residency in SRAM.
- */
+struct drm_rocket_job {
- /** Pointer to an array of struct drm_rocket_task. */
- __u64 tasks;
- /** Pointer to a u32 array of the BOs that are read by the job. */
- __u64 in_bo_handles;
- /** Pointer to a u32 array of the BOs that are written to by the job. */
- __u64 out_bo_handles;
- /** Number of tasks passed in. */
- __u32 task_count;
- /** Number of input BO handles passed in (size is that times 4). */
- __u32 in_bo_handle_count;
- /** Number of output BO handles passed in (size is that times 4). */
- __u32 out_bo_handle_count;
+};
+/**
- struct drm_rocket_submit - ioctl argument for submitting commands to the NPU.
- The kernel will schedule the execution of these jobs in dependency order.
- */
+struct drm_rocket_submit {
- /** Pointer to an array of struct drm_rocket_job. */
- __u64 jobs;
- /** Number of jobs passed in. */
- __u32 job_count;
+};
- #if defined(__cplusplus) } #endif