[v5,6/7] accel/ivpu: Add command buffer submission logic

Message ID	20230109122344.253994-7-jacek.lawrynowicz@linux.intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <dri-devel-bounces@lists.freedesktop.org> From: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> To: dri-devel@lists.freedesktop.org, oded.gabbay@gmail.com, airlied@gmail.com, daniel@ffwll.ch, tzimmermann@suse.de, quic_jhugo@quicinc.com Subject: [PATCH v5 6/7] accel/ivpu: Add command buffer submission logic Date: Mon, 9 Jan 2023 13:23:43 +0100 Message-Id: <20230109122344.253994-7-jacek.lawrynowicz@linux.intel.com> In-Reply-To: <20230109122344.253994-1-jacek.lawrynowicz@linux.intel.com> References: <20230109122344.253994-1-jacek.lawrynowicz@linux.intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: list Cc: andrzej.kacprowski@linux.intel.com, Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>, stanislaw.gruszka@linux.intel.com Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" <dri-devel-bounces@lists.freedesktop.org>
Series	New DRM accel driver for Intel VPU \| expand [v5,0/7] New DRM accel driver for Intel VPU [v5,1/7] accel/ivpu: Introduce a new DRM driver for Intel VPU [v5,2/7] accel/ivpu: Add Intel VPU MMU support [v5,3/7] accel/ivpu: Add GEM buffer object management [v5,4/7] accel/ivpu: Add IPC driver and JSM messages [v5,5/7] accel/ivpu: Implement firmware parsing and booting [v5,6/7] accel/ivpu: Add command buffer submission logic [v5,7/7] accel/ivpu: Add PM support

diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index 9fa6a76e9d79..e61ece53a9ae 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -7,6 +7,7 @@ intel_vpu-y := \ ivpu_gem.o \ ivpu_hw_mtl.o \ ivpu_ipc.o \ + ivpu_job.o \ ivpu_jsm_msg.o \ ivpu_mmu.o \ ivpu_mmu_context.o diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 53e103f64832..279ca15bff15 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -20,6 +20,7 @@ #include "ivpu_gem.h" #include "ivpu_hw.h" #include "ivpu_ipc.h" +#include "ivpu_job.h" #include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" @@ -31,6 +32,8 @@ static const struct drm_driver driver; +static struct lock_class_key submitted_jobs_xa_lock_class_key; + int ivpu_dbg_mask; module_param_named(dbg_mask, ivpu_dbg_mask, int, 0644); MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros."); @@ -84,8 +87,11 @@ static void file_priv_release(struct kref *ref) ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id); + ivpu_cmdq_release_all(file_priv); + ivpu_bo_remove_all_bos_from_context(&file_priv->ctx); ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv); + mutex_destroy(&file_priv->lock); kfree(file_priv); } @@ -209,10 +215,11 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) file_priv->vdev = vdev; file_priv->priority = DRM_IVPU_CONTEXT_PRIORITY_NORMAL; kref_init(&file_priv->ref); + mutex_init(&file_priv->lock); ret = ivpu_mmu_user_context_init(vdev, &file_priv->ctx, ctx_id); if (ret) - goto err_free_file_priv; + goto err_mutex_destroy; old = xa_store_irq(&vdev->context_xa, ctx_id, file_priv, GFP_KERNEL); if (xa_is_err(old)) { @@ -229,7 +236,8 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) err_ctx_fini: ivpu_mmu_user_context_fini(vdev, &file_priv->ctx); -err_free_file_priv: +err_mutex_destroy: + mutex_destroy(&file_priv->lock); kfree(file_priv); err_xa_erase: xa_erase_irq(&vdev->context_xa, ctx_id); @@ -253,6 +261,8 @@ static const struct drm_ioctl_desc ivpu_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(IVPU_BO_CREATE, ivpu_bo_create_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_BO_USERPTR, ivpu_bo_userptr_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_SUBMIT, ivpu_submit_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_BO_WAIT, ivpu_bo_wait_ioctl, 0), }; static int ivpu_wait_for_ready(struct ivpu_device *vdev) @@ -465,6 +475,8 @@ static int ivpu_dev_init(struct ivpu_device *vdev) vdev->context_xa_limit.max = IVPU_CONTEXT_LIMIT; atomic64_set(&vdev->unique_id_counter, 0); xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC); + xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1); + lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key); ret = ivpu_pci_init(vdev); if (ret) { @@ -516,20 +528,30 @@ static int ivpu_dev_init(struct ivpu_device *vdev) goto err_fw_fini; } + ret = ivpu_job_done_thread_init(vdev); + if (ret) { + ivpu_err(vdev, "Failed to initialize job done thread: %d\n", ret); + goto err_ipc_fini; + } + ret = ivpu_fw_load(vdev); if (ret) { ivpu_err(vdev, "Failed to load firmware: %d\n", ret); - goto err_fw_fini; + goto err_job_done_thread_fini; } ret = ivpu_boot(vdev); if (ret) { ivpu_err(vdev, "Failed to boot: %d\n", ret); - goto err_fw_fini; + goto err_job_done_thread_fini; } return 0; +err_job_done_thread_fini: + ivpu_job_done_thread_fini(vdev); +err_ipc_fini: + ivpu_ipc_fini(vdev); err_fw_fini: ivpu_fw_fini(vdev); err_mmu_gctx_fini: @@ -537,6 +559,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev) err_power_down: ivpu_hw_power_down(vdev); err_xa_destroy: + xa_destroy(&vdev->submitted_jobs_xa); xa_destroy(&vdev->context_xa); return ret; } @@ -544,10 +567,13 @@ static int ivpu_dev_init(struct ivpu_device *vdev) static void ivpu_dev_fini(struct ivpu_device *vdev) { ivpu_shutdown(vdev); + ivpu_job_done_thread_fini(vdev); ivpu_ipc_fini(vdev); ivpu_fw_fini(vdev); ivpu_mmu_global_context_fini(vdev); + drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->submitted_jobs_xa)); + xa_destroy(&vdev->submitted_jobs_xa); drm_WARN_ON(&vdev->drm, !xa_empty(&vdev->context_xa)); xa_destroy(&vdev->context_xa); } diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 317ae61f43bd..2545a32fe910 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -94,6 +94,9 @@ struct ivpu_device { struct xarray context_xa; struct xa_limit context_xa_limit; + struct xarray submitted_jobs_xa; + struct task_struct *job_done_thread; + atomic64_t unique_id_counter; struct { @@ -111,6 +114,8 @@ struct ivpu_device { struct ivpu_file_priv { struct kref ref; struct ivpu_device *vdev; + struct mutex lock; /* Protects cmdq */ + struct ivpu_cmdq *cmdq[IVPU_NUM_ENGINES]; struct ivpu_mmu_context ctx; u32 priority; bool has_mmu_faults; diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c index f6d1937c798f..ec4248e21443 100644 --- a/drivers/accel/ivpu/ivpu_gem.c +++ b/drivers/accel/ivpu/ivpu_gem.c @@ -771,6 +771,32 @@ int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file return ret; } +int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ivpu_bo_wait *args = data; + struct drm_gem_object *obj; + unsigned long timeout; + long ret; + + timeout = drm_timeout_abs_to_jiffies(args->timeout_ns); + + obj = drm_gem_object_lookup(file, args->handle); + if (!obj) + return -EINVAL; + + ret = dma_resv_wait_timeout(obj->resv, DMA_RESV_USAGE_READ, true, timeout); + if (ret == 0) { + ret = -ETIMEDOUT; + } else if (ret > 0) { + ret = 0; + args->job_status = to_ivpu_bo(obj)->job_status; + } + + drm_gem_object_put(obj); + + return ret; +} + static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p) { unsigned long dma_refcount = 0; diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h index d71e6fe207d4..e9af4aefdbcf 100644 --- a/drivers/accel/ivpu/ivpu_gem.h +++ b/drivers/accel/ivpu/ivpu_gem.h @@ -30,6 +30,7 @@ struct ivpu_bo { u32 handle; u32 flags; uintptr_t user_ptr; + u32 job_status; }; enum ivpu_bo_type { diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c new file mode 100644 index 000000000000..41181e3e4e3f --- /dev/null +++ b/drivers/accel/ivpu/ivpu_job.c @@ -0,0 +1,603 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#include <drm/drm_file.h> + +#include <linux/bitfield.h> +#include <linux/highmem.h> +#include <linux/kthread.h> +#include <linux/pci.h> +#include <linux/module.h> +#include <uapi/drm/ivpu_accel.h> + +#include "ivpu_drv.h" +#include "ivpu_hw.h" +#include "ivpu_ipc.h" +#include "ivpu_job.h" +#include "ivpu_jsm_msg.h" + +#define CMD_BUF_IDX 0 +#define JOB_ID_JOB_MASK GENMASK(7, 0) +#define JOB_ID_CONTEXT_MASK GENMASK(31, 8) +#define JOB_MAX_BUFFER_COUNT 65535 + +static unsigned int ivpu_tdr_timeout_ms; +module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644); +MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default"); + +static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq) +{ + ivpu_hw_reg_db_set(vdev, cmdq->db_id); +} + +static struct ivpu_cmdq *ivpu_cmdq_alloc(struct ivpu_file_priv *file_priv, u16 engine) +{ + struct ivpu_device *vdev = file_priv->vdev; + struct vpu_job_queue_header *jobq_header; + struct ivpu_cmdq *cmdq; + + cmdq = kzalloc(sizeof(*cmdq), GFP_KERNEL); + if (!cmdq) + return NULL; + + cmdq->mem = ivpu_bo_alloc_internal(vdev, 0, SZ_4K, DRM_IVPU_BO_WC); + if (!cmdq->mem) + goto cmdq_free; + + cmdq->db_id = file_priv->ctx.id + engine * ivpu_get_context_count(vdev); + cmdq->entry_count = (u32)((cmdq->mem->base.size - sizeof(struct vpu_job_queue_header)) / + sizeof(struct vpu_job_queue_entry)); + + cmdq->jobq = (struct vpu_job_queue *)cmdq->mem->kvaddr; + jobq_header = &cmdq->jobq->header; + jobq_header->engine_idx = engine; + jobq_header->head = 0; + jobq_header->tail = 0; + wmb(); /* Flush WC buffer for jobq->header */ + + return cmdq; + +cmdq_free: + kfree(cmdq); + return NULL; +} + +static void ivpu_cmdq_free(struct ivpu_file_priv *file_priv, struct ivpu_cmdq *cmdq) +{ + if (!cmdq) + return; + + ivpu_bo_free_internal(cmdq->mem); + kfree(cmdq); +} + +static struct ivpu_cmdq *ivpu_cmdq_acquire(struct ivpu_file_priv *file_priv, u16 engine) +{ + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + int ret; + + lockdep_assert_held(&file_priv->lock); + + if (!cmdq) { + cmdq = ivpu_cmdq_alloc(file_priv, engine); + if (!cmdq) + return NULL; + file_priv->cmdq[engine] = cmdq; + } + + if (cmdq->db_registered) + return cmdq; + + ret = ivpu_jsm_register_db(vdev, file_priv->ctx.id, cmdq->db_id, + cmdq->mem->vpu_addr, cmdq->mem->base.size); + if (ret) + return NULL; + + cmdq->db_registered = true; + + return cmdq; +} + +static void ivpu_cmdq_release_locked(struct ivpu_file_priv *file_priv, u16 engine) +{ + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + + lockdep_assert_held(&file_priv->lock); + + if (cmdq) { + file_priv->cmdq[engine] = NULL; + if (cmdq->db_registered) + ivpu_jsm_unregister_db(file_priv->vdev, cmdq->db_id); + + ivpu_cmdq_free(file_priv, cmdq); + } +} + +void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv) +{ + int i; + + mutex_lock(&file_priv->lock); + + for (i = 0; i < IVPU_NUM_ENGINES; i++) + ivpu_cmdq_release_locked(file_priv, i); + + mutex_unlock(&file_priv->lock); +} + +/* + * Mark the doorbell as unregistered and reset job queue pointers. + * This function needs to be called when the VPU hardware is restarted + * and FW looses job queue state. The next time job queue is used it + * will be registered again. + */ +static void ivpu_cmdq_reset_locked(struct ivpu_file_priv *file_priv, u16 engine) +{ + struct ivpu_cmdq *cmdq = file_priv->cmdq[engine]; + + lockdep_assert_held(&file_priv->lock); + + if (cmdq) { + cmdq->db_registered = false; + cmdq->jobq->header.head = 0; + cmdq->jobq->header.tail = 0; + wmb(); /* Flush WC buffer for jobq header */ + } +} + +static void ivpu_cmdq_reset_all(struct ivpu_file_priv *file_priv) +{ + int i; + + mutex_lock(&file_priv->lock); + + for (i = 0; i < IVPU_NUM_ENGINES; i++) + ivpu_cmdq_reset_locked(file_priv, i); + + mutex_unlock(&file_priv->lock); +} + +void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev) +{ + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + + xa_for_each(&vdev->context_xa, ctx_id, file_priv) { + file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id); + if (!file_priv) + continue; + + ivpu_cmdq_reset_all(file_priv); + + ivpu_file_priv_put(&file_priv); + } +} + +static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job) +{ + struct ivpu_device *vdev = job->vdev; + struct vpu_job_queue_header *header = &cmdq->jobq->header; + struct vpu_job_queue_entry *entry; + u32 tail = READ_ONCE(header->tail); + u32 next_entry = (tail + 1) % cmdq->entry_count; + + /* Check if there is space left in job queue */ + if (next_entry == header->head) { + ivpu_dbg(vdev, JOB, "Job queue full: ctx %d engine %d db %d head %d tail %d\n", + job->file_priv->ctx.id, job->engine_idx, cmdq->db_id, header->head, tail); + return -EBUSY; + } + + entry = &cmdq->jobq->job[tail]; + entry->batch_buf_addr = job->cmd_buf_vpu_addr; + entry->job_id = job->job_id; + entry->flags = 0; + wmb(); /* Ensure that tail is updated after filling entry */ + header->tail = next_entry; + wmb(); /* Flush WC buffer for jobq header */ + + return 0; +} + +struct ivpu_fence { + struct dma_fence base; + spinlock_t lock; /* protects base */ + struct ivpu_device *vdev; +}; + +static inline struct ivpu_fence *to_vpu_fence(struct dma_fence *fence) +{ + return container_of(fence, struct ivpu_fence, base); +} + +static const char *ivpu_fence_get_driver_name(struct dma_fence *fence) +{ + return DRIVER_NAME; +} + +static const char *ivpu_fence_get_timeline_name(struct dma_fence *fence) +{ + struct ivpu_fence *ivpu_fence = to_vpu_fence(fence); + + return dev_name(ivpu_fence->vdev->drm.dev); +} + +static const struct dma_fence_ops ivpu_fence_ops = { + .get_driver_name = ivpu_fence_get_driver_name, + .get_timeline_name = ivpu_fence_get_timeline_name, +}; + +static struct dma_fence *ivpu_fence_create(struct ivpu_device *vdev) +{ + struct ivpu_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return NULL; + + fence->vdev = vdev; + spin_lock_init(&fence->lock); + dma_fence_init(&fence->base, &ivpu_fence_ops, &fence->lock, dma_fence_context_alloc(1), 1); + + return &fence->base; +} + +static void job_get(struct ivpu_job *job, struct ivpu_job **link) +{ + struct ivpu_device *vdev = job->vdev; + + kref_get(&job->ref); + *link = job; + + ivpu_dbg(vdev, KREF, "Job get: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); +} + +static void job_release(struct kref *ref) +{ + struct ivpu_job *job = container_of(ref, struct ivpu_job, ref); + struct ivpu_device *vdev = job->vdev; + u32 i; + + for (i = 0; i < job->bo_count; i++) + if (job->bos[i]) + drm_gem_object_put(&job->bos[i]->base); + + dma_fence_put(job->done_fence); + ivpu_file_priv_put(&job->file_priv); + + ivpu_dbg(vdev, KREF, "Job released: id %u\n", job->job_id); + kfree(job); +} + +static void job_put(struct ivpu_job *job) +{ + struct ivpu_device *vdev = job->vdev; + + ivpu_dbg(vdev, KREF, "Job put: id %u refcount %u\n", job->job_id, kref_read(&job->ref)); + kref_put(&job->ref, job_release); +} + +static struct ivpu_job * +ivpu_create_job(struct ivpu_file_priv *file_priv, u32 engine_idx, u32 bo_count) +{ + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_job *job; + size_t buf_size; + + buf_size = sizeof(*job) + bo_count * sizeof(struct ivpu_bo *); + job = kzalloc(buf_size, GFP_KERNEL); + if (!job) + return NULL; + + kref_init(&job->ref); + + job->vdev = vdev; + job->engine_idx = engine_idx; + job->bo_count = bo_count; + job->done_fence = ivpu_fence_create(vdev); + if (!job->done_fence) { + ivpu_warn_ratelimited(vdev, "Failed to create a fence\n"); + goto err_free_job; + } + + job->file_priv = ivpu_file_priv_get(file_priv); + + ivpu_dbg(vdev, JOB, "Job created: ctx %2d engine %d", file_priv->ctx.id, job->engine_idx); + + return job; + +err_free_job: + kfree(job); + return NULL; +} + +static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status) +{ + struct ivpu_job *job; + + job = xa_erase(&vdev->submitted_jobs_xa, job_id); + if (!job) + return -ENOENT; + + if (job->file_priv->has_mmu_faults) + job_status = VPU_JSM_STATUS_ABORTED; + + job->bos[CMD_BUF_IDX]->job_status = job_status; + dma_fence_signal(job->done_fence); + + ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n", + job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status); + + job_put(job); + return 0; +} + +static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg) +{ + struct vpu_ipc_msg_payload_job_done *payload; + struct vpu_jsm_msg *job_ret_msg = msg; + int ret; + + payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload; + + ret = ivpu_job_done(vdev, payload->job_id, payload->job_status); + if (ret) + ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret); +} + +void ivpu_jobs_abort_all(struct ivpu_device *vdev) +{ + struct ivpu_job *job; + unsigned long id; + + xa_for_each(&vdev->submitted_jobs_xa, id, job) + ivpu_job_done(vdev, id, VPU_JSM_STATUS_ABORTED); +} + +static int ivpu_direct_job_submission(struct ivpu_job *job) +{ + struct ivpu_file_priv *file_priv = job->file_priv; + struct ivpu_device *vdev = job->vdev; + struct xa_limit job_id_range; + struct ivpu_cmdq *cmdq; + int ret; + + mutex_lock(&file_priv->lock); + + cmdq = ivpu_cmdq_acquire(job->file_priv, job->engine_idx); + if (!cmdq) { + ivpu_warn(vdev, "Failed get job queue, ctx %d engine %d\n", + file_priv->ctx.id, job->engine_idx); + ret = -EINVAL; + goto err_unlock; + } + + job_id_range.min = FIELD_PREP(JOB_ID_CONTEXT_MASK, (file_priv->ctx.id - 1)); + job_id_range.max = job_id_range.min | JOB_ID_JOB_MASK; + + job_get(job, &job); + ret = xa_alloc(&vdev->submitted_jobs_xa, &job->job_id, job, job_id_range, GFP_KERNEL); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to allocate job id: %d\n", ret); + goto err_job_put; + } + + ret = ivpu_cmdq_push_job(cmdq, job); + if (ret) + goto err_xa_erase; + + ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d next %d\n", + job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->jobq->header.tail); + + if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) { + ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS); + cmdq->jobq->header.head = cmdq->jobq->header.tail; + wmb(); /* Flush WC buffer for jobq header */ + } else { + ivpu_cmdq_ring_db(vdev, cmdq); + } + + mutex_unlock(&file_priv->lock); + return 0; + +err_xa_erase: + xa_erase(&vdev->submitted_jobs_xa, job->job_id); +err_job_put: + job_put(job); +err_unlock: + mutex_unlock(&file_priv->lock); + return ret; +} + +static int +ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32 *buf_handles, + u32 buf_count, u32 commands_offset) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct ww_acquire_ctx acquire_ctx; + struct ivpu_bo *bo; + int ret; + u32 i; + + for (i = 0; i < buf_count; i++) { + struct drm_gem_object *obj = drm_gem_object_lookup(file, buf_handles[i]); + + if (!obj) + return -ENOENT; + + job->bos[i] = to_ivpu_bo(obj); + + ret = ivpu_bo_pin(job->bos[i]); + if (ret) + return ret; + } + + bo = job->bos[CMD_BUF_IDX]; + if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) { + ivpu_warn(vdev, "Buffer is already in use\n"); + return -EBUSY; + } + + if (commands_offset >= bo->base.size) { + ivpu_warn(vdev, "Invalid command buffer offset %u\n", commands_offset); + return -EINVAL; + } + + job->cmd_buf_vpu_addr = bo->vpu_addr + commands_offset; + + ret = drm_gem_lock_reservations((struct drm_gem_object **)job->bos, buf_count, + &acquire_ctx); + if (ret) { + ivpu_warn(vdev, "Failed to lock reservations: %d\n", ret); + return ret; + } + + for (i = 0; i < buf_count; i++) { + ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1); + if (ret) { + ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret); + goto unlock_reservations; + } + } + + for (i = 0; i < buf_count; i++) + dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, DMA_RESV_USAGE_WRITE); + +unlock_reservations: + drm_gem_unlock_reservations((struct drm_gem_object **)job->bos, buf_count, &acquire_ctx); + + wmb(); /* Flush write combining buffers */ + + return ret; +} + +int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + int ret = 0; + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct drm_ivpu_submit *params = data; + struct ivpu_job *job; + u32 *buf_handles; + + if (params->engine > DRM_IVPU_ENGINE_COPY) + return -EINVAL; + + if (params->buffer_count == 0 || params->buffer_count > JOB_MAX_BUFFER_COUNT) + return -EINVAL; + + if (!IS_ALIGNED(params->commands_offset, 8)) + return -EINVAL; + + if (!file_priv->ctx.id) + return -EINVAL; + + if (file_priv->has_mmu_faults) + return -EBADFD; + + buf_handles = kcalloc(params->buffer_count, sizeof(u32), GFP_KERNEL); + if (!buf_handles) + return -ENOMEM; + + ret = copy_from_user(buf_handles, + (void __user *)params->buffers_ptr, + params->buffer_count * sizeof(u32)); + if (ret) { + ret = -EFAULT; + goto free_handles; + } + + ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", + file_priv->ctx.id, params->buffer_count); + + job = ivpu_create_job(file_priv, params->engine, params->buffer_count); + if (!job) { + ivpu_err(vdev, "Failed to create job\n"); + ret = -ENOMEM; + goto free_handles; + } + + ret = ivpu_job_prepare_bos_for_submit(file, job, buf_handles, params->buffer_count, + params->commands_offset); + if (ret) { + ivpu_err(vdev, "Failed to prepare job, ret %d\n", ret); + goto job_put; + } + + ret = ivpu_direct_job_submission(job); + if (ret) { + dma_fence_signal(job->done_fence); + ivpu_err(vdev, "Failed to submit job to the HW, ret %d\n", ret); + } + +job_put: + job_put(job); +free_handles: + kfree(buf_handles); + + return ret; +} + +static int ivpu_job_done_thread(void *arg) +{ + struct ivpu_device *vdev = (struct ivpu_device *)arg; + struct ivpu_ipc_consumer cons; + struct vpu_jsm_msg jsm_msg; + bool jobs_submitted; + unsigned int timeout; + int ret; + + ivpu_dbg(vdev, JOB, "Started %s\n", __func__); + + ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET); + + while (!kthread_should_stop()) { + timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr; + jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa); + ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout); + if (!ret) { + ivpu_job_done_message(vdev, &jsm_msg); + } else if (ret == -ETIMEDOUT) { + if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) { + ivpu_err(vdev, "TDR detected, timeout %d ms", timeout); + ivpu_hw_diagnose_failure(vdev); + ivpu_pm_schedule_recovery(vdev); + } + } + } + + ivpu_ipc_consumer_del(vdev, &cons); + + ivpu_jobs_abort_all(vdev); + + ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__); + return 0; +} + +int ivpu_job_done_thread_init(struct ivpu_device *vdev) +{ + struct task_struct *thread; + + thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread"); + if (IS_ERR(thread)) { + ivpu_err(vdev, "Failed to start job completion thread\n"); + return -EIO; + } + + get_task_struct(thread); + wake_up_process(thread); + + vdev->job_done_thread = thread; + + return 0; +} + +void ivpu_job_done_thread_fini(struct ivpu_device *vdev) +{ + kthread_stop(vdev->job_done_thread); + put_task_struct(vdev->job_done_thread); +} diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h new file mode 100644 index 000000000000..aa1f0b9479b0 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_job.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020-2023 Intel Corporation + */ + +#ifndef __IVPU_JOB_H__ +#define __IVPU_JOB_H__ + +#include <linux/kref.h> +#include <linux/idr.h> + +#include "ivpu_gem.h" + +struct ivpu_device; +struct ivpu_file_priv; + +/** + * struct ivpu_cmdq - Object representing device queue used to send jobs. + * @jobq: Pointer to job queue memory shared with the device + * @mem: Memory allocated for the job queue, shared with device + * @entry_count Number of job entries in the queue + * @db_id: Doorbell assigned to this job queue + * @db_registered: True if doorbell is registered in device + */ +struct ivpu_cmdq { + struct vpu_job_queue *jobq; + struct ivpu_bo *mem; + u32 entry_count; + u32 db_id; + bool db_registered; +}; + +/** + * struct ivpu_job - KMD object that represents batchbuffer / DMA buffer. + * Each batch / DMA buffer is a job to be submitted and executed by the VPU FW. + * This is a unit of execution, and be tracked by the job_id for + * any status reporting from VPU FW through IPC JOB RET/DONE message. + * @file_priv: The client that submitted this job + * @job_id: Job ID for KMD tracking and job status reporting from VPU FW + * @status: Status of the Job from IPC JOB RET/DONE message + * @batch_buffer: CPU vaddr points to the batch buffer memory allocated for the job + * @submit_status_offset: Offset within batch buffer where job completion handler + will update the job status + */ +struct ivpu_job { + struct kref ref; + struct ivpu_device *vdev; + struct ivpu_file_priv *file_priv; + struct dma_fence *done_fence; + u64 cmd_buf_vpu_addr; + u32 job_id; + u32 engine_idx; + size_t bo_count; + struct ivpu_bo *bos[]; +}; + +int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv); +void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev); + +int ivpu_job_done_thread_init(struct ivpu_device *vdev); +void ivpu_job_done_thread_fini(struct ivpu_device *vdev); + +void ivpu_jobs_abort_all(struct ivpu_device *vdev); + +#endif /* __IVPU_JOB_H__ */ diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 005b40b7326c..6c95f72e66ee 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -20,6 +20,8 @@ extern "C" { #define DRM_IVPU_BO_CREATE 0x02 #define DRM_IVPU_BO_INFO 0x03 #define DRM_IVPU_BO_USERPTR 0x04 +#define DRM_IVPU_SUBMIT 0x05 +#define DRM_IVPU_BO_WAIT 0x06 #define DRM_IOCTL_IVPU_GET_PARAM \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) @@ -36,6 +38,12 @@ extern "C" { #define DRM_IOCTL_IVPU_BO_USERPTR \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_USERPTR, struct drm_ivpu_bo_userptr) +#define DRM_IOCTL_IVPU_SUBMIT \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SUBMIT, struct drm_ivpu_submit) + +#define DRM_IOCTL_IVPU_BO_WAIT \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_WAIT, struct drm_ivpu_bo_wait) + /** * DOC: contexts * @@ -240,6 +248,90 @@ struct drm_ivpu_bo_userptr { __u64 vpu_addr; }; +/* drm_ivpu_submit engines */ +#define DRM_IVPU_ENGINE_COMPUTE 0 +#define DRM_IVPU_ENGINE_COPY 1 + +/** + * struct drm_ivpu_submit - Submit commands to the VPU + * + * Execute a single command buffer on a given VPU engine. + * Handles to all referenced buffer objects have to be provided in @buffers_ptr. + * + * User space may wait on job completion using %DRM_IVPU_BO_WAIT ioctl. + */ +struct drm_ivpu_submit { + /** + * @buffers_ptr: + * + * A pointer to an u32 array of GEM handles of the BOs required for this job. + * The number of elements in the array must be equal to the value given by @buffer_count. + * + * The first BO is the command buffer. The rest of array has to contain all + * BOs referenced from the command buffer. + */ + __u64 buffers_ptr; + + /** @buffer_count: Number of elements in the @buffers_ptr */ + __u32 buffer_count; + + /** + * @engine: Select the engine this job should be executed on + * + * %DRM_IVPU_ENGINE_COMPUTE: + * + * Performs Deep Learning Neural Compute Inference Operations + * + * %DRM_IVPU_ENGINE_COPY: + * + * Performs memory copy operations to/from system memory allocated for VPU + */ + __u32 engine; + + /** @flags: Reserved for future use - must be zero */ + __u32 flags; + + /** + * @commands_offset: + * + * Offset inside the first buffer in @buffers_ptr containing commands + * to be executed. The offset has to be 8-byte aligned. + */ + __u32 commands_offset; +}; + +/* drm_ivpu_bo_wait job status codes */ +#define DRM_IVPU_JOB_STATUS_SUCCESS 0 + +/** + * struct drm_ivpu_bo_wait - Wait for BO to become inactive + * + * Blocks until a given buffer object becomes inactive. + * With @timeout_ms set to 0 returns immediately. + */ +struct drm_ivpu_bo_wait { + /** @handle: Handle to the buffer object to be waited on */ + __u32 handle; + + /** @flags: Reserved for future use - must be zero */ + __u32 flags; + + /** @timeout_ns: Absolute timeout in nanoseconds (may be zero) */ + __s64 timeout_ns; + + /** + * @job_status: + * + * Job status code which is updated after the job is completed. + * &DRM_IVPU_JOB_STATUS_SUCCESS or device specific error otherwise. + * Valid only if @handle points to a command buffer. + */ + __u32 job_status; + + /** @pad: Padding - must be zero */ + __u32 pad; +}; + #if defined(__cplusplus) } #endif

[v5,6/7] accel/ivpu: Add command buffer submission logic

Commit Message

Comments

Patch