new file mode 100644
@@ -0,0 +1,497 @@
+/*
+ * Copyright (C) 2017-2018 Lima Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/kthread.h>
+#include <linux/slab.h>
+
+#include "lima_drv.h"
+#include "lima_sched.h"
+#include "lima_vm.h"
+#include "lima_mmu.h"
+#include "lima_l2_cache.h"
+
+struct lima_fence {
+ struct dma_fence base;
+ struct lima_sched_pipe *pipe;
+};
+
+static struct kmem_cache *lima_fence_slab = NULL;
+
+int lima_sched_slab_init(void)
+{
+ lima_fence_slab = kmem_cache_create(
+ "lima_fence", sizeof(struct lima_fence), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!lima_fence_slab)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void lima_sched_slab_fini(void)
+{
+ if (lima_fence_slab)
+ kmem_cache_destroy(lima_fence_slab);
+}
+
+static inline struct lima_fence *to_lima_fence(struct dma_fence *fence)
+{
+ return container_of(fence, struct lima_fence, base);
+}
+
+static const char *lima_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "lima";
+}
+
+static const char *lima_fence_get_timeline_name(struct dma_fence *fence)
+{
+ struct lima_fence *f = to_lima_fence(fence);
+
+ return f->pipe->base.name;
+}
+
+static bool lima_fence_enable_signaling(struct dma_fence *fence)
+{
+ return true;
+}
+
+static void lima_fence_release_rcu(struct rcu_head *rcu)
+{
+ struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
+ struct lima_fence *fence = to_lima_fence(f);
+
+ kmem_cache_free(lima_fence_slab, fence);
+}
+
+static void lima_fence_release(struct dma_fence *fence)
+{
+ struct lima_fence *f = to_lima_fence(fence);
+
+ call_rcu(&f->base.rcu, lima_fence_release_rcu);
+}
+
+static const struct dma_fence_ops lima_fence_ops = {
+ .get_driver_name = lima_fence_get_driver_name,
+ .get_timeline_name = lima_fence_get_timeline_name,
+ .enable_signaling = lima_fence_enable_signaling,
+ .wait = dma_fence_default_wait,
+ .release = lima_fence_release,
+};
+
+static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe)
+{
+ struct lima_fence *fence;
+
+ fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL);
+ if (!fence)
+ return NULL;
+
+ fence->pipe = pipe;
+ dma_fence_init(&fence->base, &lima_fence_ops, &pipe->fence_lock,
+ pipe->fence_context, ++pipe->fence_seqno);
+
+ return fence;
+}
+
+static inline struct lima_sched_task *to_lima_task(struct drm_sched_job *job)
+{
+ return container_of(job, struct lima_sched_task, base);
+}
+
+static inline struct lima_sched_pipe *to_lima_pipe(struct drm_gpu_scheduler *sched)
+{
+ return container_of(sched, struct lima_sched_pipe, base);
+}
+
+int lima_sched_task_init(struct lima_sched_task *task,
+ struct lima_sched_context *context,
+ struct lima_vm *vm)
+{
+ int err;
+
+ err = drm_sched_job_init(&task->base, context->base.sched,
+ &context->base, context);
+ if (err)
+ return err;
+
+ task->vm = lima_vm_get(vm);
+ return 0;
+}
+
+void lima_sched_task_fini(struct lima_sched_task *task)
+{
+ dma_fence_put(&task->base.s_fence->finished);
+ lima_vm_put(task->vm);
+}
+
+int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence)
+{
+ int i, new_dep = 4;
+
+ if (task->dep && task->num_dep == task->max_dep)
+ new_dep = task->max_dep * 2;
+
+ if (task->max_dep < new_dep) {
+ void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, GFP_KERNEL);
+ if (!dep)
+ return -ENOMEM;
+ task->max_dep = new_dep;
+ task->dep = dep;
+ }
+
+ dma_fence_get(fence);
+ for (i = 0; i < task->num_dep; i++) {
+ if (task->dep[i]->context == fence->context &&
+ dma_fence_is_later(fence, task->dep[i])) {
+ dma_fence_put(task->dep[i]);
+ task->dep[i] = fence;
+ return 0;
+ }
+ }
+
+ task->dep[task->num_dep++] = fence;
+ return 0;
+}
+
+int lima_sched_context_init(struct lima_sched_pipe *pipe,
+ struct lima_sched_context *context,
+ atomic_t *guilty)
+{
+ struct drm_sched_rq *rq = pipe->base.sched_rq + DRM_SCHED_PRIORITY_NORMAL;
+ int err;
+
+ context->fences =
+ kzalloc(sizeof(*context->fences) * lima_sched_max_tasks, GFP_KERNEL);
+ if (!context->fences)
+ return -ENOMEM;
+
+ mutex_init(&context->lock);
+ err = drm_sched_entity_init(&pipe->base, &context->base, rq,
+ lima_sched_max_tasks, guilty);
+ if (err) {
+ kfree(context->fences);
+ context->fences = NULL;
+ return err;
+ }
+
+ return 0;
+}
+
+void lima_sched_context_fini(struct lima_sched_pipe *pipe,
+ struct lima_sched_context *context)
+{
+ drm_sched_entity_fini(&pipe->base, &context->base);
+
+ mutex_destroy(&context->lock);
+
+ if (context->fences)
+ kfree(context->fences);
+}
+
+static uint32_t lima_sched_context_add_fence(struct lima_sched_context *context,
+ struct dma_fence *fence,
+ uint32_t *done)
+{
+ uint32_t seq, idx, i;
+ struct dma_fence *other;
+
+ mutex_lock(&context->lock);
+
+ seq = context->sequence;
+ idx = seq & (lima_sched_max_tasks - 1);
+ other = context->fences[idx];
+
+ if (other) {
+ int err = dma_fence_wait(other, false);
+ if (err)
+ DRM_ERROR("Error %d waiting context fence\n", err);
+ }
+
+ context->fences[idx] = dma_fence_get(fence);
+ context->sequence++;
+
+ /* get finished fence offset from seq */
+ for (i = 1; i < lima_sched_max_tasks; i++) {
+ idx = (seq - i) & (lima_sched_max_tasks - 1);
+ if (!context->fences[idx] ||
+ dma_fence_is_signaled(context->fences[idx]))
+ break;
+ }
+
+ mutex_unlock(&context->lock);
+
+ dma_fence_put(other);
+
+ *done = i;
+ return seq;
+}
+
+struct dma_fence *lima_sched_context_get_fence(
+ struct lima_sched_context *context, uint32_t seq)
+{
+ struct dma_fence *fence;
+ int idx;
+ uint32_t max, min;
+
+ mutex_lock(&context->lock);
+
+ max = context->sequence - 1;
+ min = context->sequence - lima_sched_max_tasks;
+
+ /* handle overflow case */
+ if ((min < max && (seq < min || seq > max)) ||
+ (min > max && (seq < min && seq > max))) {
+ fence = NULL;
+ goto out;
+ }
+
+ idx = seq & (lima_sched_max_tasks - 1);
+ fence = dma_fence_get(context->fences[idx]);
+
+out:
+ mutex_unlock(&context->lock);
+
+ return fence;
+}
+
+uint32_t lima_sched_context_queue_task(struct lima_sched_context *context,
+ struct lima_sched_task *task,
+ uint32_t *done)
+{
+ uint32_t seq = lima_sched_context_add_fence(
+ context, &task->base.s_fence->finished, done);
+ drm_sched_entity_push_job(&task->base, &context->base);
+ return seq;
+}
+
+static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job,
+ struct drm_sched_entity *entity)
+{
+ struct lima_sched_task *task = to_lima_task(job);
+ int i;
+
+ for (i = 0; i < task->num_dep; i++) {
+ struct dma_fence *fence = task->dep[i];
+
+ if (!task->dep[i])
+ continue;
+
+ task->dep[i] = NULL;
+
+ if (!dma_fence_is_signaled(fence))
+ return fence;
+
+ dma_fence_put(fence);
+ }
+
+ return NULL;
+}
+
+static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
+{
+ struct lima_sched_task *task = to_lima_task(job);
+ struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+ struct lima_fence *fence;
+ struct dma_fence *ret;
+ struct lima_vm *vm = NULL, *last_vm = NULL;
+ int i;
+
+ /* after GPU reset */
+ if (job->s_fence->finished.error < 0)
+ return NULL;
+
+ fence = lima_fence_create(pipe);
+ if (!fence)
+ return NULL;
+ task->fence = &fence->base;
+
+ /* for caller usage of the fence, otherwise irq handler
+ * may consume the fence before caller use it */
+ ret = dma_fence_get(task->fence);
+
+ pipe->current_task = task;
+
+ /* this is needed for MMU to work correctly, otherwise GP/PP
+ * will hang or page fault for unknown reason after running for
+ * a while.
+ *
+ * Need to investigate:
+ * 1. is it related to TLB
+ * 2. how much performance will be affected by L2 cache flush
+ * 3. can we reduce the calling of this function because all
+ * GP/PP use the same L2 cache on mali400
+ *
+ * TODO:
+ * 1. move this to task fini to save some wait time?
+ * 2. when GP/PP use different l2 cache, need PP wait GP l2
+ * cache flush?
+ */
+ for (i = 0; i < pipe->num_l2_cache; i++)
+ lima_l2_cache_flush(pipe->l2_cache[i]);
+
+ if (task->vm != pipe->current_vm) {
+ vm = lima_vm_get(task->vm);
+ last_vm = pipe->current_vm;
+ pipe->current_vm = task->vm;
+ }
+
+ if (pipe->bcast_mmu)
+ lima_mmu_switch_vm(pipe->bcast_mmu, vm);
+ else {
+ for (i = 0; i < pipe->num_mmu; i++)
+ lima_mmu_switch_vm(pipe->mmu[i], vm);
+ }
+
+ if (last_vm)
+ lima_vm_put(last_vm);
+
+ pipe->error = false;
+ pipe->task_run(pipe, task);
+
+ return task->fence;
+}
+
+static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe,
+ struct lima_sched_task *task)
+{
+ kthread_park(pipe->base.thread);
+ drm_sched_hw_job_reset(&pipe->base, &task->base);
+
+ pipe->task_error(pipe);
+
+ if (pipe->bcast_mmu)
+ lima_mmu_page_fault_resume(pipe->bcast_mmu);
+ else {
+ int i;
+ for (i = 0; i < pipe->num_mmu; i++)
+ lima_mmu_page_fault_resume(pipe->mmu[i]);
+ }
+
+ if (pipe->current_vm)
+ lima_vm_put(pipe->current_vm);
+
+ pipe->current_vm = NULL;
+ pipe->current_task = NULL;
+
+ drm_sched_job_recovery(&pipe->base);
+ kthread_unpark(pipe->base.thread);
+}
+
+static void lima_sched_timedout_job(struct drm_sched_job *job)
+{
+ struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+ struct lima_sched_task *task = to_lima_task(job);
+
+ lima_sched_handle_error_task(pipe, task);
+}
+
+static void lima_sched_free_job(struct drm_sched_job *job)
+{
+ struct lima_sched_task *task = to_lima_task(job);
+ struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
+ int i;
+
+ dma_fence_put(task->fence);
+
+ for (i = 0; i < task->num_dep; i++) {
+ if (task->dep[i])
+ dma_fence_put(task->dep[i]);
+ }
+
+ if (task->dep)
+ kfree(task->dep);
+
+ lima_vm_put(task->vm);
+ kmem_cache_free(pipe->task_slab, task);
+}
+
+const struct drm_sched_backend_ops lima_sched_ops = {
+ .dependency = lima_sched_dependency,
+ .run_job = lima_sched_run_job,
+ .timedout_job = lima_sched_timedout_job,
+ .free_job = lima_sched_free_job,
+};
+
+static void lima_sched_error_work(struct work_struct *work)
+{
+ struct lima_sched_pipe *pipe =
+ container_of(work, struct lima_sched_pipe, error_work);
+ struct lima_sched_task *task = pipe->current_task;
+
+ lima_sched_handle_error_task(pipe, task);
+}
+
+int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
+{
+ long timeout;
+
+ if (lima_sched_timeout_ms <= 0)
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ else
+ timeout = msecs_to_jiffies(lima_sched_timeout_ms);
+
+ pipe->fence_context = dma_fence_context_alloc(1);
+ spin_lock_init(&pipe->fence_lock);
+
+ INIT_WORK(&pipe->error_work, lima_sched_error_work);
+
+ return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, timeout, name);
+}
+
+void lima_sched_pipe_fini(struct lima_sched_pipe *pipe)
+{
+ drm_sched_fini(&pipe->base);
+}
+
+unsigned long lima_timeout_to_jiffies(u64 timeout_ns)
+{
+ unsigned long timeout_jiffies;
+ ktime_t timeout;
+
+ /* clamp timeout if it's to large */
+ if (((s64)timeout_ns) < 0)
+ return MAX_SCHEDULE_TIMEOUT;
+
+ timeout = ktime_sub(ns_to_ktime(timeout_ns), ktime_get());
+ if (ktime_to_ns(timeout) < 0)
+ return 0;
+
+ timeout_jiffies = nsecs_to_jiffies(ktime_to_ns(timeout));
+ /* clamp timeout to avoid unsigned-> signed overflow */
+ if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT )
+ return MAX_SCHEDULE_TIMEOUT;
+
+ return timeout_jiffies;
+}
+
+void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe)
+{
+ if (pipe->error)
+ schedule_work(&pipe->error_work);
+ else {
+ struct lima_sched_task *task = pipe->current_task;
+
+ pipe->task_fini(pipe);
+ dma_fence_signal(task->fence);
+ }
+}
new file mode 100644
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2017-2018 Lima Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __LIMA_SCHED_H__
+#define __LIMA_SCHED_H__
+
+#include <drm/gpu_scheduler.h>
+
+struct lima_vm;
+
+struct lima_sched_task {
+ struct drm_sched_job base;
+
+ struct lima_vm *vm;
+ void *frame;
+
+ struct dma_fence **dep;
+ int num_dep;
+ int max_dep;
+
+ /* pipe fence */
+ struct dma_fence *fence;
+};
+
+struct lima_sched_context {
+ struct drm_sched_entity base;
+ struct mutex lock;
+ struct dma_fence **fences;
+ uint32_t sequence;
+};
+
+#define LIMA_SCHED_PIPE_MAX_MMU 8
+#define LIMA_SCHED_PIPE_MAX_L2_CACHE 2
+#define LIMA_SCHED_PIPE_MAX_PROCESSOR 8
+
+struct lima_ip;
+
+struct lima_sched_pipe {
+ struct drm_gpu_scheduler base;
+
+ u64 fence_context;
+ u32 fence_seqno;
+ spinlock_t fence_lock;
+
+ struct lima_sched_task *current_task;
+ struct lima_vm *current_vm;
+
+ struct lima_ip *mmu[LIMA_SCHED_PIPE_MAX_MMU];
+ int num_mmu;
+
+ struct lima_ip *l2_cache[LIMA_SCHED_PIPE_MAX_L2_CACHE];
+ int num_l2_cache;
+
+ struct lima_ip *processor[LIMA_SCHED_PIPE_MAX_PROCESSOR];
+ int num_processor;
+
+ struct lima_ip *bcast_processor;
+ struct lima_ip *bcast_mmu;
+
+ u32 done;
+ bool error;
+ atomic_t task;
+
+ int frame_size;
+ struct kmem_cache *task_slab;
+
+ int (*task_validate)(struct lima_sched_pipe *pipe, struct lima_sched_task *task);
+ void (*task_run)(struct lima_sched_pipe *pipe, struct lima_sched_task *task);
+ void (*task_fini)(struct lima_sched_pipe *pipe);
+ void (*task_error)(struct lima_sched_pipe *pipe);
+ void (*task_mmu_error)(struct lima_sched_pipe *pipe);
+
+ struct work_struct error_work;
+};
+
+int lima_sched_task_init(struct lima_sched_task *task,
+ struct lima_sched_context *context,
+ struct lima_vm *vm);
+void lima_sched_task_fini(struct lima_sched_task *task);
+int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence);
+
+int lima_sched_context_init(struct lima_sched_pipe *pipe,
+ struct lima_sched_context *context,
+ atomic_t *guilty);
+void lima_sched_context_fini(struct lima_sched_pipe *pipe,
+ struct lima_sched_context *context);
+uint32_t lima_sched_context_queue_task(struct lima_sched_context *context,
+ struct lima_sched_task *task,
+ uint32_t *done);
+struct dma_fence *lima_sched_context_get_fence(
+ struct lima_sched_context *context, uint32_t seq);
+
+int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name);
+void lima_sched_pipe_fini(struct lima_sched_pipe *pipe);
+void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe);
+
+static inline void lima_sched_pipe_mmu_error(struct lima_sched_pipe *pipe)
+{
+ pipe->error = true;
+ pipe->task_mmu_error(pipe);
+}
+
+int lima_sched_slab_init(void);
+void lima_sched_slab_fini(void);
+
+unsigned long lima_timeout_to_jiffies(u64 timeout_ns);
+
+#endif
Signed-off-by: Qiang Yu <yuq825@gmail.com> --- drivers/gpu/drm/lima/lima_sched.c | 497 ++++++++++++++++++++++++++++++ drivers/gpu/drm/lima/lima_sched.h | 126 ++++++++ 2 files changed, 623 insertions(+) create mode 100644 drivers/gpu/drm/lima/lima_sched.c create mode 100644 drivers/gpu/drm/lima/lima_sched.h