[RFC,22/24] drm/lima: add GPU schedule using DRM_SCHED

Message ID	20180519065243.27600-23-yuq825@gmail.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <dri-devel-bounces@lists.freedesktop.org> From: Qiang Yu <yuq825@gmail.com> To: dri-devel@lists.freedesktop.org Subject: [PATCH RFC 22/24] drm/lima: add GPU schedule using DRM_SCHED Date: Sat, 19 May 2018 14:52:41 +0800 Message-Id: <20180519065243.27600-23-yuq825@gmail.com> In-Reply-To: <20180519065243.27600-1-yuq825@gmail.com> References: <20180519065243.27600-1-yuq825@gmail.com> Precedence: list Cc: Simon Shields <simon@lineageos.org>, Marek Vasut <marex@denx.de>, Connor Abbott <cwabbott0@gmail.com>, Neil Armstrong <narmstrong@baylibre.com>, Andrei Paulau <7134956@gmail.com>, Vasily Khoruzhick <anarsoul@gmail.com>, Qiang Yu <yuq825@gmail.com>, Erico Nunes <nunes.erico@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" <dri-devel-bounces@lists.freedesktop.org>

diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c new file mode 100644 index 000000000000..190932955e9b --- /dev/null +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -0,0 +1,497 @@ +/* + * Copyright (C) 2017-2018 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/kthread.h> +#include <linux/slab.h> + +#include "lima_drv.h" +#include "lima_sched.h" +#include "lima_vm.h" +#include "lima_mmu.h" +#include "lima_l2_cache.h" + +struct lima_fence { + struct dma_fence base; + struct lima_sched_pipe *pipe; +}; + +static struct kmem_cache *lima_fence_slab = NULL; + +int lima_sched_slab_init(void) +{ + lima_fence_slab = kmem_cache_create( + "lima_fence", sizeof(struct lima_fence), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!lima_fence_slab) + return -ENOMEM; + + return 0; +} + +void lima_sched_slab_fini(void) +{ + if (lima_fence_slab) + kmem_cache_destroy(lima_fence_slab); +} + +static inline struct lima_fence *to_lima_fence(struct dma_fence *fence) +{ + return container_of(fence, struct lima_fence, base); +} + +static const char *lima_fence_get_driver_name(struct dma_fence *fence) +{ + return "lima"; +} + +static const char *lima_fence_get_timeline_name(struct dma_fence *fence) +{ + struct lima_fence *f = to_lima_fence(fence); + + return f->pipe->base.name; +} + +static bool lima_fence_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static void lima_fence_release_rcu(struct rcu_head *rcu) +{ + struct dma_fence *f = container_of(rcu, struct dma_fence, rcu); + struct lima_fence *fence = to_lima_fence(f); + + kmem_cache_free(lima_fence_slab, fence); +} + +static void lima_fence_release(struct dma_fence *fence) +{ + struct lima_fence *f = to_lima_fence(fence); + + call_rcu(&f->base.rcu, lima_fence_release_rcu); +} + +static const struct dma_fence_ops lima_fence_ops = { + .get_driver_name = lima_fence_get_driver_name, + .get_timeline_name = lima_fence_get_timeline_name, + .enable_signaling = lima_fence_enable_signaling, + .wait = dma_fence_default_wait, + .release = lima_fence_release, +}; + +static struct lima_fence *lima_fence_create(struct lima_sched_pipe *pipe) +{ + struct lima_fence *fence; + + fence = kmem_cache_zalloc(lima_fence_slab, GFP_KERNEL); + if (!fence) + return NULL; + + fence->pipe = pipe; + dma_fence_init(&fence->base, &lima_fence_ops, &pipe->fence_lock, + pipe->fence_context, ++pipe->fence_seqno); + + return fence; +} + +static inline struct lima_sched_task *to_lima_task(struct drm_sched_job *job) +{ + return container_of(job, struct lima_sched_task, base); +} + +static inline struct lima_sched_pipe *to_lima_pipe(struct drm_gpu_scheduler *sched) +{ + return container_of(sched, struct lima_sched_pipe, base); +} + +int lima_sched_task_init(struct lima_sched_task *task, + struct lima_sched_context *context, + struct lima_vm *vm) +{ + int err; + + err = drm_sched_job_init(&task->base, context->base.sched, + &context->base, context); + if (err) + return err; + + task->vm = lima_vm_get(vm); + return 0; +} + +void lima_sched_task_fini(struct lima_sched_task *task) +{ + dma_fence_put(&task->base.s_fence->finished); + lima_vm_put(task->vm); +} + +int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence) +{ + int i, new_dep = 4; + + if (task->dep && task->num_dep == task->max_dep) + new_dep = task->max_dep * 2; + + if (task->max_dep < new_dep) { + void *dep = krealloc(task->dep, sizeof(*task->dep) * new_dep, GFP_KERNEL); + if (!dep) + return -ENOMEM; + task->max_dep = new_dep; + task->dep = dep; + } + + dma_fence_get(fence); + for (i = 0; i < task->num_dep; i++) { + if (task->dep[i]->context == fence->context && + dma_fence_is_later(fence, task->dep[i])) { + dma_fence_put(task->dep[i]); + task->dep[i] = fence; + return 0; + } + } + + task->dep[task->num_dep++] = fence; + return 0; +} + +int lima_sched_context_init(struct lima_sched_pipe *pipe, + struct lima_sched_context *context, + atomic_t *guilty) +{ + struct drm_sched_rq *rq = pipe->base.sched_rq + DRM_SCHED_PRIORITY_NORMAL; + int err; + + context->fences = + kzalloc(sizeof(*context->fences) * lima_sched_max_tasks, GFP_KERNEL); + if (!context->fences) + return -ENOMEM; + + mutex_init(&context->lock); + err = drm_sched_entity_init(&pipe->base, &context->base, rq, + lima_sched_max_tasks, guilty); + if (err) { + kfree(context->fences); + context->fences = NULL; + return err; + } + + return 0; +} + +void lima_sched_context_fini(struct lima_sched_pipe *pipe, + struct lima_sched_context *context) +{ + drm_sched_entity_fini(&pipe->base, &context->base); + + mutex_destroy(&context->lock); + + if (context->fences) + kfree(context->fences); +} + +static uint32_t lima_sched_context_add_fence(struct lima_sched_context *context, + struct dma_fence *fence, + uint32_t *done) +{ + uint32_t seq, idx, i; + struct dma_fence *other; + + mutex_lock(&context->lock); + + seq = context->sequence; + idx = seq & (lima_sched_max_tasks - 1); + other = context->fences[idx]; + + if (other) { + int err = dma_fence_wait(other, false); + if (err) + DRM_ERROR("Error %d waiting context fence\n", err); + } + + context->fences[idx] = dma_fence_get(fence); + context->sequence++; + + /* get finished fence offset from seq */ + for (i = 1; i < lima_sched_max_tasks; i++) { + idx = (seq - i) & (lima_sched_max_tasks - 1); + if (!context->fences[idx] || + dma_fence_is_signaled(context->fences[idx])) + break; + } + + mutex_unlock(&context->lock); + + dma_fence_put(other); + + *done = i; + return seq; +} + +struct dma_fence *lima_sched_context_get_fence( + struct lima_sched_context *context, uint32_t seq) +{ + struct dma_fence *fence; + int idx; + uint32_t max, min; + + mutex_lock(&context->lock); + + max = context->sequence - 1; + min = context->sequence - lima_sched_max_tasks; + + /* handle overflow case */ + if ((min < max && (seq < min || seq > max)) || + (min > max && (seq < min && seq > max))) { + fence = NULL; + goto out; + } + + idx = seq & (lima_sched_max_tasks - 1); + fence = dma_fence_get(context->fences[idx]); + +out: + mutex_unlock(&context->lock); + + return fence; +} + +uint32_t lima_sched_context_queue_task(struct lima_sched_context *context, + struct lima_sched_task *task, + uint32_t *done) +{ + uint32_t seq = lima_sched_context_add_fence( + context, &task->base.s_fence->finished, done); + drm_sched_entity_push_job(&task->base, &context->base); + return seq; +} + +static struct dma_fence *lima_sched_dependency(struct drm_sched_job *job, + struct drm_sched_entity *entity) +{ + struct lima_sched_task *task = to_lima_task(job); + int i; + + for (i = 0; i < task->num_dep; i++) { + struct dma_fence *fence = task->dep[i]; + + if (!task->dep[i]) + continue; + + task->dep[i] = NULL; + + if (!dma_fence_is_signaled(fence)) + return fence; + + dma_fence_put(fence); + } + + return NULL; +} + +static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job) +{ + struct lima_sched_task *task = to_lima_task(job); + struct lima_sched_pipe *pipe = to_lima_pipe(job->sched); + struct lima_fence *fence; + struct dma_fence *ret; + struct lima_vm *vm = NULL, *last_vm = NULL; + int i; + + /* after GPU reset */ + if (job->s_fence->finished.error < 0) + return NULL; + + fence = lima_fence_create(pipe); + if (!fence) + return NULL; + task->fence = &fence->base; + + /* for caller usage of the fence, otherwise irq handler + * may consume the fence before caller use it */ + ret = dma_fence_get(task->fence); + + pipe->current_task = task; + + /* this is needed for MMU to work correctly, otherwise GP/PP + * will hang or page fault for unknown reason after running for + * a while. + * + * Need to investigate: + * 1. is it related to TLB + * 2. how much performance will be affected by L2 cache flush + * 3. can we reduce the calling of this function because all + * GP/PP use the same L2 cache on mali400 + * + * TODO: + * 1. move this to task fini to save some wait time? + * 2. when GP/PP use different l2 cache, need PP wait GP l2 + * cache flush? + */ + for (i = 0; i < pipe->num_l2_cache; i++) + lima_l2_cache_flush(pipe->l2_cache[i]); + + if (task->vm != pipe->current_vm) { + vm = lima_vm_get(task->vm); + last_vm = pipe->current_vm; + pipe->current_vm = task->vm; + } + + if (pipe->bcast_mmu) + lima_mmu_switch_vm(pipe->bcast_mmu, vm); + else { + for (i = 0; i < pipe->num_mmu; i++) + lima_mmu_switch_vm(pipe->mmu[i], vm); + } + + if (last_vm) + lima_vm_put(last_vm); + + pipe->error = false; + pipe->task_run(pipe, task); + + return task->fence; +} + +static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe, + struct lima_sched_task *task) +{ + kthread_park(pipe->base.thread); + drm_sched_hw_job_reset(&pipe->base, &task->base); + + pipe->task_error(pipe); + + if (pipe->bcast_mmu) + lima_mmu_page_fault_resume(pipe->bcast_mmu); + else { + int i; + for (i = 0; i < pipe->num_mmu; i++) + lima_mmu_page_fault_resume(pipe->mmu[i]); + } + + if (pipe->current_vm) + lima_vm_put(pipe->current_vm); + + pipe->current_vm = NULL; + pipe->current_task = NULL; + + drm_sched_job_recovery(&pipe->base); + kthread_unpark(pipe->base.thread); +} + +static void lima_sched_timedout_job(struct drm_sched_job *job) +{ + struct lima_sched_pipe *pipe = to_lima_pipe(job->sched); + struct lima_sched_task *task = to_lima_task(job); + + lima_sched_handle_error_task(pipe, task); +} + +static void lima_sched_free_job(struct drm_sched_job *job) +{ + struct lima_sched_task *task = to_lima_task(job); + struct lima_sched_pipe *pipe = to_lima_pipe(job->sched); + int i; + + dma_fence_put(task->fence); + + for (i = 0; i < task->num_dep; i++) { + if (task->dep[i]) + dma_fence_put(task->dep[i]); + } + + if (task->dep) + kfree(task->dep); + + lima_vm_put(task->vm); + kmem_cache_free(pipe->task_slab, task); +} + +const struct drm_sched_backend_ops lima_sched_ops = { + .dependency = lima_sched_dependency, + .run_job = lima_sched_run_job, + .timedout_job = lima_sched_timedout_job, + .free_job = lima_sched_free_job, +}; + +static void lima_sched_error_work(struct work_struct *work) +{ + struct lima_sched_pipe *pipe = + container_of(work, struct lima_sched_pipe, error_work); + struct lima_sched_task *task = pipe->current_task; + + lima_sched_handle_error_task(pipe, task); +} + +int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name) +{ + long timeout; + + if (lima_sched_timeout_ms <= 0) + timeout = MAX_SCHEDULE_TIMEOUT; + else + timeout = msecs_to_jiffies(lima_sched_timeout_ms); + + pipe->fence_context = dma_fence_context_alloc(1); + spin_lock_init(&pipe->fence_lock); + + INIT_WORK(&pipe->error_work, lima_sched_error_work); + + return drm_sched_init(&pipe->base, &lima_sched_ops, 1, 0, timeout, name); +} + +void lima_sched_pipe_fini(struct lima_sched_pipe *pipe) +{ + drm_sched_fini(&pipe->base); +} + +unsigned long lima_timeout_to_jiffies(u64 timeout_ns) +{ + unsigned long timeout_jiffies; + ktime_t timeout; + + /* clamp timeout if it's to large */ + if (((s64)timeout_ns) < 0) + return MAX_SCHEDULE_TIMEOUT; + + timeout = ktime_sub(ns_to_ktime(timeout_ns), ktime_get()); + if (ktime_to_ns(timeout) < 0) + return 0; + + timeout_jiffies = nsecs_to_jiffies(ktime_to_ns(timeout)); + /* clamp timeout to avoid unsigned-> signed overflow */ + if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT ) + return MAX_SCHEDULE_TIMEOUT; + + return timeout_jiffies; +} + +void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe) +{ + if (pipe->error) + schedule_work(&pipe->error_work); + else { + struct lima_sched_task *task = pipe->current_task; + + pipe->task_fini(pipe); + dma_fence_signal(task->fence); + } +} diff --git a/drivers/gpu/drm/lima/lima_sched.h b/drivers/gpu/drm/lima/lima_sched.h new file mode 100644 index 000000000000..b93b7b4eded4 --- /dev/null +++ b/drivers/gpu/drm/lima/lima_sched.h @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2017-2018 Lima Project + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __LIMA_SCHED_H__ +#define __LIMA_SCHED_H__ + +#include <drm/gpu_scheduler.h> + +struct lima_vm; + +struct lima_sched_task { + struct drm_sched_job base; + + struct lima_vm *vm; + void *frame; + + struct dma_fence **dep; + int num_dep; + int max_dep; + + /* pipe fence */ + struct dma_fence *fence; +}; + +struct lima_sched_context { + struct drm_sched_entity base; + struct mutex lock; + struct dma_fence **fences; + uint32_t sequence; +}; + +#define LIMA_SCHED_PIPE_MAX_MMU 8 +#define LIMA_SCHED_PIPE_MAX_L2_CACHE 2 +#define LIMA_SCHED_PIPE_MAX_PROCESSOR 8 + +struct lima_ip; + +struct lima_sched_pipe { + struct drm_gpu_scheduler base; + + u64 fence_context; + u32 fence_seqno; + spinlock_t fence_lock; + + struct lima_sched_task *current_task; + struct lima_vm *current_vm; + + struct lima_ip *mmu[LIMA_SCHED_PIPE_MAX_MMU]; + int num_mmu; + + struct lima_ip *l2_cache[LIMA_SCHED_PIPE_MAX_L2_CACHE]; + int num_l2_cache; + + struct lima_ip *processor[LIMA_SCHED_PIPE_MAX_PROCESSOR]; + int num_processor; + + struct lima_ip *bcast_processor; + struct lima_ip *bcast_mmu; + + u32 done; + bool error; + atomic_t task; + + int frame_size; + struct kmem_cache *task_slab; + + int (*task_validate)(struct lima_sched_pipe *pipe, struct lima_sched_task *task); + void (*task_run)(struct lima_sched_pipe *pipe, struct lima_sched_task *task); + void (*task_fini)(struct lima_sched_pipe *pipe); + void (*task_error)(struct lima_sched_pipe *pipe); + void (*task_mmu_error)(struct lima_sched_pipe *pipe); + + struct work_struct error_work; +}; + +int lima_sched_task_init(struct lima_sched_task *task, + struct lima_sched_context *context, + struct lima_vm *vm); +void lima_sched_task_fini(struct lima_sched_task *task); +int lima_sched_task_add_dep(struct lima_sched_task *task, struct dma_fence *fence); + +int lima_sched_context_init(struct lima_sched_pipe *pipe, + struct lima_sched_context *context, + atomic_t *guilty); +void lima_sched_context_fini(struct lima_sched_pipe *pipe, + struct lima_sched_context *context); +uint32_t lima_sched_context_queue_task(struct lima_sched_context *context, + struct lima_sched_task *task, + uint32_t *done); +struct dma_fence *lima_sched_context_get_fence( + struct lima_sched_context *context, uint32_t seq); + +int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name); +void lima_sched_pipe_fini(struct lima_sched_pipe *pipe); +void lima_sched_pipe_task_done(struct lima_sched_pipe *pipe); + +static inline void lima_sched_pipe_mmu_error(struct lima_sched_pipe *pipe) +{ + pipe->error = true; + pipe->task_mmu_error(pipe); +} + +int lima_sched_slab_init(void); +void lima_sched_slab_fini(void); + +unsigned long lima_timeout_to_jiffies(u64 timeout_ns); + +#endif

[RFC,22/24] drm/lima: add GPU schedule using DRM_SCHED

Commit Message

Patch