@@ -106,13 +106,13 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
}
OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
- OUT_RING(ring, submit->fence->seqno);
+ OUT_RING(ring, submit->seqno);
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, fence)));
OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, fence)));
- OUT_RING(ring, submit->fence->seqno);
+ OUT_RING(ring, submit->seqno);
gpu->funcs->flush(gpu);
}
@@ -75,7 +75,7 @@ int adreno_hw_init(struct msm_gpu *gpu)
gpu->rb->cur = gpu->rb->start;
/* reset completed fence seqno: */
- adreno_gpu->memptrs->fence = gpu->fctx->completed_fence;
+ adreno_gpu->memptrs->fence = gpu->seqno;
adreno_gpu->memptrs->rptr = 0;
/* Setup REG_CP_RB_CNTL: */
@@ -165,7 +165,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
}
OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
- OUT_RING(ring, submit->fence->seqno);
+ OUT_RING(ring, submit->seqno);
if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) {
/* Flush HLSQ lazy updates to make sure there is nothing
@@ -182,7 +182,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
OUT_PKT3(ring, CP_EVENT_WRITE, 3);
OUT_RING(ring, CACHE_FLUSH_TS);
OUT_RING(ring, rbmemptr(adreno_gpu, fence));
- OUT_RING(ring, submit->fence->seqno);
+ OUT_RING(ring, submit->seqno);
/* we could maybe be clever and only CP_COND_EXEC the interrupt: */
OUT_PKT3(ring, CP_INTERRUPT, 1);
@@ -255,7 +255,7 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
adreno_gpu->rev.patchid);
seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence,
- gpu->fctx->last_fence);
+ gpu->seqno);
seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu));
seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb));
@@ -290,7 +290,7 @@ void adreno_dump_info(struct msm_gpu *gpu)
adreno_gpu->rev.patchid);
printk("fence: %d/%d\n", adreno_gpu->memptrs->fence,
- gpu->fctx->last_fence);
+ gpu->seqno);
printk("rptr: %d\n", get_rptr(adreno_gpu));
printk("rb wptr: %d\n", get_wptr(gpu->rb));
}
@@ -510,7 +510,7 @@ static void load_gpu(struct drm_device *dev)
mutex_unlock(&init_lock);
}
-static int context_init(struct drm_file *file)
+static int context_init(struct drm_device *dev, struct drm_file *file)
{
struct msm_file_private *ctx;
@@ -518,7 +518,7 @@ static int context_init(struct drm_file *file)
if (!ctx)
return -ENOMEM;
- msm_submitqueue_init(ctx);
+ msm_submitqueue_init(dev, ctx);
file->driver_priv = ctx;
@@ -532,7 +532,7 @@ static int msm_open(struct drm_device *dev, struct drm_file *file)
*/
load_gpu(dev);
- return context_init(file);
+ return context_init(dev, file);
}
static void context_close(struct msm_file_private *ctx)
@@ -746,6 +746,8 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
struct msm_drm_private *priv = dev->dev_private;
struct drm_msm_wait_fence *args = data;
ktime_t timeout = to_ktime(args->timeout);
+ struct msm_gpu_submitqueue *queue;
+ int ret;
if (args->pad) {
DRM_ERROR("invalid pad: %08x\n", args->pad);
@@ -755,7 +757,14 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
if (!priv->gpu)
return 0;
- return msm_wait_fence(priv->gpu->fctx, args->fence, &timeout, true);
+ queue = msm_submitqueue_get(file->driver_priv, args->queueid);
+ if (!queue)
+ return -ENOENT;
+
+ ret = msm_wait_fence(queue->fctx, args->fence, &timeout, true);
+
+ msm_submitqueue_put(queue);
+ return ret;
}
static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data,
@@ -805,7 +814,7 @@ static int msm_ioctl_submitqueue_new(struct drm_device *dev, void *data,
if (args->flags & ~MSM_SUBMITQUEUE_FLAGS)
return -EINVAL;
- return msm_submitqueue_create(file->driver_priv, args->prio,
+ return msm_submitqueue_create(dev, file->driver_priv, args->prio,
args->flags, &args->id);
}
@@ -314,11 +314,11 @@ void __iomem *msm_ioremap(struct platform_device *pdev, const char *name,
u32 msm_readl(const void __iomem *addr);
struct msm_gpu_submitqueue;
-int msm_submitqueue_init(struct msm_file_private *ctx);
+int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx);
struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx,
u32 id);
-int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio,
- u32 flags, u32 *id);
+int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
+ u32 prio, u32 flags, u32 *id);
int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id);
void msm_submitqueue_close(struct msm_file_private *ctx);
@@ -31,7 +31,7 @@ struct msm_fence_context *
return ERR_PTR(-ENOMEM);
fctx->dev = dev;
- fctx->name = name;
+ strncpy(fctx->name, name, sizeof(fctx->name));
fctx->context = dma_fence_context_alloc(1);
init_waitqueue_head(&fctx->event);
spin_lock_init(&fctx->spinlock);
@@ -22,7 +22,7 @@
struct msm_fence_context {
struct drm_device *dev;
- const char *name;
+ char name[32];
unsigned context;
/* last_fence == completed_fence --> no pending work */
uint32_t last_fence; /* last assigned fence */
@@ -141,6 +141,7 @@ struct msm_gem_submit {
struct list_head node; /* node in gpu submit_list */
struct list_head bo_list;
struct ww_acquire_ctx ticket;
+ uint32_t seqno; /* Sequence number of the submit on the ring */
struct dma_fence *fence;
struct msm_gpu_submitqueue *queue;
struct pid *pid; /* submitting process */
@@ -233,7 +233,8 @@ static int submit_fence_sync(struct msm_gem_submit *submit)
struct msm_gem_object *msm_obj = submit->bos[i].obj;
bool write = submit->bos[i].flags & MSM_SUBMIT_BO_WRITE;
- ret = msm_gem_sync_object(&msm_obj->base, submit->gpu->fctx, write);
+ ret = msm_gem_sync_object(&msm_obj->base, submit->queue->fctx,
+ write);
if (ret)
break;
}
@@ -426,7 +427,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
* Wait if the fence is from a foreign context, or if the fence
* array contains any fence from a foreign context.
*/
- if (!dma_fence_match_context(in_fence, gpu->fctx->context)) {
+ if (!dma_fence_match_context(in_fence, queue->fctx->context)) {
ret = dma_fence_wait(in_fence, true);
if (ret)
return ret;
@@ -531,7 +532,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
submit->nr_cmds = i;
- submit->fence = msm_fence_alloc(gpu->fctx);
+ submit->fence = msm_fence_alloc(queue->fctx);
if (IS_ERR(submit->fence)) {
ret = PTR_ERR(submit->fence);
submit->fence = NULL;
@@ -221,6 +221,19 @@ int msm_gpu_hw_init(struct msm_gpu *gpu)
* Hangcheck detection for locked gpu:
*/
+static void update_fences(struct msm_gpu *gpu, uint32_t fence)
+{
+ struct msm_gem_submit *submit;
+
+ list_for_each_entry(submit, &gpu->submit_list, node) {
+ if (submit->seqno > fence)
+ break;
+
+ msm_update_fence(submit->queue->fctx,
+ submit->fence->seqno);
+ }
+}
+
static void retire_submits(struct msm_gpu *gpu);
static void recover_worker(struct work_struct *work)
@@ -230,13 +243,13 @@ static void recover_worker(struct work_struct *work)
struct msm_gem_submit *submit;
uint32_t fence = gpu->funcs->last_fence(gpu);
- msm_update_fence(gpu->fctx, fence + 1);
+ update_fences(gpu, fence + 1);
mutex_lock(&dev->struct_mutex);
dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
list_for_each_entry(submit, &gpu->submit_list, node) {
- if (submit->fence->seqno == (fence + 1)) {
+ if (submit->seqno == (fence + 1)) {
struct task_struct *task;
rcu_read_lock();
@@ -286,7 +299,7 @@ static void hangcheck_handler(unsigned long data)
if (fence != gpu->hangcheck_fence) {
/* some progress has been made.. ya! */
gpu->hangcheck_fence = fence;
- } else if (fence < gpu->fctx->last_fence) {
+ } else if (fence < gpu->seqno) {
/* no progress and not done.. hung! */
gpu->hangcheck_fence = fence;
dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n",
@@ -294,12 +307,12 @@ static void hangcheck_handler(unsigned long data)
dev_err(dev->dev, "%s: completed fence: %u\n",
gpu->name, fence);
dev_err(dev->dev, "%s: submitted fence: %u\n",
- gpu->name, gpu->fctx->last_fence);
+ gpu->name, gpu->seqno);
queue_work(priv->wq, &gpu->recover_work);
}
/* if still more pending work, reset the hangcheck timer: */
- if (gpu->fctx->last_fence > gpu->hangcheck_fence)
+ if (gpu->seqno > gpu->hangcheck_fence)
hangcheck_timer_reset(gpu);
/* workaround for missing irq: */
@@ -451,7 +464,7 @@ static void retire_worker(struct work_struct *work)
struct drm_device *dev = gpu->dev;
uint32_t fence = gpu->funcs->last_fence(gpu);
- msm_update_fence(gpu->fctx, fence);
+ update_fences(gpu, fence);
mutex_lock(&dev->struct_mutex);
retire_submits(gpu);
@@ -480,6 +493,8 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
msm_gpu_hw_init(gpu);
+ submit->seqno = ++gpu->seqno;
+
list_add_tail(&submit->node, &gpu->submit_list);
msm_rd_dump_submit(submit);
@@ -575,12 +590,6 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
gpu->dev = drm;
gpu->funcs = funcs;
gpu->name = name;
- gpu->fctx = msm_fence_context_alloc(drm, name);
- if (IS_ERR(gpu->fctx)) {
- ret = PTR_ERR(gpu->fctx);
- gpu->fctx = NULL;
- goto fail;
- }
INIT_LIST_HEAD(&gpu->active_list);
INIT_WORK(&gpu->retire_work, retire_worker);
@@ -693,7 +702,4 @@ void msm_gpu_cleanup(struct msm_gpu *gpu)
msm_gem_put_iova(gpu->rb->bo, gpu->aspace);
msm_ringbuffer_destroy(gpu->rb);
}
-
- if (gpu->fctx)
- msm_fence_context_free(gpu->fctx);
}
@@ -93,8 +93,8 @@ struct msm_gpu {
/* list of GEM active objects: */
struct list_head active_list;
- /* fencing: */
- struct msm_fence_context *fctx;
+ /* The sequence number for ring submissions */
+ uint32_t seqno;
/* does gpu need hw_init? */
bool needs_hw_init;
@@ -134,7 +134,7 @@ struct msm_gpu {
static inline bool msm_gpu_active(struct msm_gpu *gpu)
{
- return gpu->fctx->last_fence > gpu->funcs->last_fence(gpu);
+ return gpu->seqno > gpu->funcs->last_fence(gpu);
}
/* Perf-Counters:
@@ -157,6 +157,8 @@ struct msm_gpu_submitqueue {
int faults;
struct list_head node;
struct kref ref;
+
+ struct msm_fence_context *fctx;
};
static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
@@ -19,6 +19,7 @@ void msm_submitqueue_destroy(struct kref *kref)
struct msm_gpu_submitqueue *queue = container_of(kref,
struct msm_gpu_submitqueue, ref);
+ msm_fence_context_free(queue->fctx);
kfree(queue);
}
@@ -60,16 +61,17 @@ void msm_submitqueue_close(struct msm_file_private *ctx)
msm_submitqueue_put(entry);
}
-int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags,
- u32 *id)
+int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
+ u32 prio, u32 flags, u32 *id)
{
struct msm_gpu_submitqueue *queue;
+ char name[32];
+ int ret = 0;
if (!ctx)
return -ENODEV;
queue = kzalloc(sizeof(*queue), GFP_KERNEL);
-
if (!queue)
return -ENOMEM;
@@ -86,12 +88,31 @@ int msm_submitqueue_create(struct msm_file_private *ctx, u32 prio, u32 flags,
list_add_tail(&queue->node, &ctx->submitqueues);
+ /*
+ * Get another reference to the queue before releasing the lock
+ * so it does't get destroyed while we are still setting it up
+ */
+ kref_get(&queue->ref);
+
write_unlock(&ctx->queuelock);
- return 0;
+ /* FIXME: What should the name be? */
+ sprintf(name, "gpu-queue-%d", queue->id);
+
+ /* Allocate a fence domain for the queue */
+
+ queue->fctx = msm_fence_context_alloc(drm, name);
+ if (IS_ERR(queue->fctx)) {
+ ret = PTR_ERR(queue->fctx);
+ msm_submitqueue_put(queue);
+ }
+
+ msm_submitqueue_put(queue);
+
+ return ret;
}
-int msm_submitqueue_init(struct msm_file_private *ctx)
+int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx)
{
if (!ctx)
return 0;
@@ -104,7 +125,7 @@ int msm_submitqueue_init(struct msm_file_private *ctx)
* Add the "default" submitqueue with id 0
* "medium" priority (3) and no flags
*/
- return msm_submitqueue_create(ctx, 3, 0, NULL);
+ return msm_submitqueue_create(drm, ctx, 3, 0, NULL);
}
int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id)
@@ -232,6 +232,7 @@ struct drm_msm_wait_fence {
__u32 fence; /* in */
__u32 pad;
struct drm_msm_timespec timeout; /* in */
+ __u32 queueid; /* in, submitqueue id that owns the fence */
};
/* madvise provides a way to tell the kernel in case a buffers contents
Currently we use a single fence context for the entire GPU. That works until we start dealing with multiple ringbuffers that will all need their own sequence number. So in order for the user to continue using the fence functions we will need some way to uniquely identify a submission that gets mapped into the appropriate ringbuffer and sequence ID. The best way to do this is to move to per-submitqueue fences. This allows each user context to have their own monotonically increasing identifier and we can easily map back and forth. To implement this, create a fence context for each submitqueue and assign user fences from that context. Each submission will have be assigned a submitqueue fence and a unique sequence ID for the target ringbuffer. When the submission is retired on the ring the fence will get signaled. struct drm_msm_wait_fence in the UABI needs to be extended to include the queue ID for the fence being queried. Legacy applications that don't use submitqueues will use queue 0 by default. Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org> --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++-- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 10 ++++----- drivers/gpu/drm/msm/msm_drv.c | 19 ++++++++++++----- drivers/gpu/drm/msm/msm_drv.h | 6 +++--- drivers/gpu/drm/msm/msm_fence.c | 2 +- drivers/gpu/drm/msm/msm_fence.h | 2 +- drivers/gpu/drm/msm/msm_gem.h | 1 + drivers/gpu/drm/msm/msm_gem_submit.c | 7 ++++--- drivers/gpu/drm/msm/msm_gpu.c | 36 +++++++++++++++++++-------------- drivers/gpu/drm/msm/msm_gpu.h | 8 +++++--- drivers/gpu/drm/msm/msm_submitqueue.c | 33 ++++++++++++++++++++++++------ include/uapi/drm/msm_drm.h | 1 + 12 files changed, 85 insertions(+), 44 deletions(-)