diff mbox

[RFC,43/44] drm/i915: Added support for submitting out-of-batch ring commands

Message ID 1403803475-16337-44-git-send-email-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

John Harrison June 26, 2014, 5:24 p.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

There is a problem with any commands written to the ring that the scheduler does
not know about. Basically, they can get lost if the scheduler issues a
pre-emption as the pre-emption mechanism discards the current ring contents.
Thus any non-batch buffer submission has the potential to be skipped.

The solution is to make sure that nothing is written to the ring that did not
come from the scheduler. Not many pieces of code do write to the ring directly.
The only one that seems to occur on modern systems is the page flip code.

This checkin adds scheduler support for command submission without having a
batch buffer - just an arbitrary sized block of data to be written to the ring.
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  109 ++++++++++++++++------------
 drivers/gpu/drm/i915/i915_scheduler.c      |   88 +++++++++++++++++++---
 drivers/gpu/drm/i915/i915_scheduler.h      |   12 +++
 3 files changed, 153 insertions(+), 56 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b7d0737..48379fb 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -987,16 +987,18 @@  i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 }
 
 static void
-i915_gem_execbuffer_retire_commands(struct drm_device *dev,
-				    struct drm_file *file,
-				    struct intel_engine_cs *ring,
-				    struct drm_i915_gem_object *obj)
+i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
 {
+	if(params->scheduler_flags & i915_ebp_sf_not_a_batch) {
+		i915_add_request_wo_flush(params->ring);
+		return;
+	}
+
 	/* Unconditionally force add_request to emit a full flush. */
-	ring->gpu_caches_dirty = true;
+	params->ring->gpu_caches_dirty = true;
 
 	/* Add a breadcrumb for the completion of the batch buffer */
-	(void)__i915_add_request(ring, file, obj, NULL, true);
+	(void)__i915_add_request(params->ring, params->file, params->batch_obj, NULL, true);
 }
 
 static int
@@ -1659,7 +1661,7 @@  static void
 emit_preamble(struct intel_engine_cs *ring, uint32_t seqno, struct intel_context *ctx, bool preemptive)
 {
 	emit_store_dw_index(ring, seqno, preemptive ? I915_PREEMPTIVE_ACTIVE_SEQNO : I915_BATCH_ACTIVE_SEQNO);
-	if (preemptive || i915_gem_context_is_default(ctx))
+	if (preemptive || !ctx || i915_gem_context_is_default(ctx))
 		intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
 	else
 		intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
@@ -1761,7 +1763,8 @@  int i915_gem_do_execbuffer_final(struct i915_execbuffer_params *params)
 	 * to span the transition from the end to the beginning of the ring.
 	 */
 #define I915_BATCH_EXEC_MAX_LEN         256	/* max dwords emitted here	*/
-	min_space = I915_BATCH_EXEC_MAX_LEN * 2 * sizeof(uint32_t);
+	min_space = I915_BATCH_EXEC_MAX_LEN + params->emit_len;
+	min_space = min_space * 2 * sizeof(uint32_t);
 	ret = intel_ring_test_space(ring, min_space);
 	if (ret)
 		goto early_err;
@@ -1811,30 +1814,34 @@  int i915_gem_do_execbuffer_final(struct i915_execbuffer_params *params)
 		emit_regular_prequel(ring, seqno, start);
 #endif
 
-	/* Switch to the correct context for the batch */
-	ret = i915_switch_context(ring, params->ctx);
-	if (ret)
-		goto err;
+	if(params->ctx) {
+		/* Switch to the correct context for the batch */
+		ret = i915_switch_context(ring, params->ctx);
+		if (ret)
+			goto err;
+	}
 
 	/* Seqno matches? */
 	BUG_ON(seqno != params->seqno);
 	BUG_ON(ring->outstanding_lazy_seqno != params->seqno);
 
-	if (ring == &dev_priv->ring[RCS] &&
-	    params->mode != dev_priv->relative_constants_mode) {
+	if((params->scheduler_flags & i915_ebp_sf_not_a_batch) == 0) {
+		if (ring == &dev_priv->ring[RCS] &&
+		    params->mode != dev_priv->relative_constants_mode) {
 #ifndef CONFIG_DRM_I915_SCHEDULER
-		ret = intel_ring_begin(ring, 4);
-		if (ret)
-			goto err;
+			ret = intel_ring_begin(ring, 4);
+			if (ret)
+				goto err;
 #endif
 
-		intel_ring_emit(ring, MI_NOOP);
-		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-		intel_ring_emit(ring, INSTPM);
-		intel_ring_emit(ring, params->mask << 16 | params->mode);
-		intel_ring_advance(ring);
+			intel_ring_emit(ring, MI_NOOP);
+			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+			intel_ring_emit(ring, INSTPM);
+			intel_ring_emit(ring, params->mask << 16 | params->mode);
+			intel_ring_advance(ring);
 
-		dev_priv->relative_constants_mode = params->mode;
+			dev_priv->relative_constants_mode = params->mode;
+		}
 	}
 
 	if (params->args_flags & I915_EXEC_GEN7_SOL_RESET) {
@@ -1855,37 +1862,48 @@  int i915_gem_do_execbuffer_final(struct i915_execbuffer_params *params)
 	emit_preamble(ring, seqno, params->ctx, preemptive);
 #endif
 
-	exec_len   = params->args_batch_len;
-	exec_start = params->batch_obj_vm_offset +
-		     params->args_batch_start_offset;
+	if(params->scheduler_flags & i915_ebp_sf_not_a_batch) {
+		if(params->scheduler_flags & i915_ebp_sf_cacheline_align) {
+			ret = intel_ring_cacheline_align(ring);
+			if (ret)
+				goto err;
+		}
+
+		for( i = 0; i < params->emit_len; i++ )
+			intel_ring_emit(ring, params->emit_data[i]);
+	} else {
+		exec_len   = params->args_batch_len;
+		exec_start = params->batch_obj_vm_offset +
+			     params->args_batch_start_offset;
 
 #ifdef CONFIG_DRM_I915_SCHEDULER_PREEMPTION
-	if (params->preemption_point) {
-		uint32_t preemption_offset = params->preemption_point - exec_start;
-		exec_start += preemption_offset;
-		exec_len   -= preemption_offset;
-	}
+		if (params->preemption_point) {
+			uint32_t preemption_offset = params->preemption_point - exec_start;
+			exec_start += preemption_offset;
+			exec_len   -= preemption_offset;
+		}
 #endif
 
-	if (params->cliprects) {
-		for (i = 0; i < params->args_num_cliprects; i++) {
-			ret = i915_emit_box(params->dev, &params->cliprects[i],
-					    params->args_DR1, params->args_DR4);
-			if (ret)
-				goto err;
-
+		if (params->cliprects) {
+			for (i = 0; i < params->args_num_cliprects; i++) {
+				ret = i915_emit_box(params->dev, &params->cliprects[i],
+						    params->args_DR1, params->args_DR4);
+				if (ret)
+					goto err;
+
+				ret = ring->dispatch_execbuffer(ring,
+								exec_start, exec_len,
+								params->eb_flags);
+				if (ret)
+					goto err;
+			}
+		} else {
 			ret = ring->dispatch_execbuffer(ring,
 							exec_start, exec_len,
 							params->eb_flags);
 			if (ret)
 				goto err;
 		}
-	} else {
-		ret = ring->dispatch_execbuffer(ring,
-						exec_start, exec_len,
-						params->eb_flags);
-		if (ret)
-			goto err;
 	}
 
 #ifdef CONFIG_DRM_I915_SCHEDULER_PREEMPTION
@@ -1899,8 +1917,7 @@  int i915_gem_do_execbuffer_final(struct i915_execbuffer_params *params)
 	BUG_ON(params->seqno   != ring->outstanding_lazy_seqno);
 	BUG_ON(params->request != ring->preallocated_lazy_request);
 
-	i915_gem_execbuffer_retire_commands(params->dev, params->file, ring,
-					    params->batch_obj);
+	i915_gem_execbuffer_retire_commands(params);
 
 	/* OLS should be zero by now! */
 	BUG_ON(ring->outstanding_lazy_seqno);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index c679513..127ded9 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -49,6 +49,7 @@  const char *i915_qe_state_str(struct i915_scheduler_queue_entry *node)
 	*(ptr++) = node->bumped ? 'B' : '-',
 	*(ptr++) = (node->params.scheduler_flags & i915_ebp_sf_preempt) ? 'P' : '-';
 	*(ptr++) = (node->params.scheduler_flags & i915_ebp_sf_was_preempt) ? 'p' : '-';
+	*(ptr++) = (node->params.scheduler_flags & i915_ebp_sf_not_a_batch) ? '!' : '-';
 
 	*ptr = 0;
 
@@ -247,15 +248,30 @@  int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe)
 	node->status = i915_sqs_queued;
 	node->stamp  = stamp;
 
-	/*
-	 * Verify that the batch buffer itself is included in the object list.
-	 */
-	for (i = 0; i < node->num_objs; i++) {
-		if (node->saved_objects[i].obj == node->params.batch_obj)
-			got_batch++;
-	}
+	if( node->params.scheduler_flags & i915_ebp_sf_not_a_batch ) {
+		uint32_t size;
+
+		size = sizeof(*node->params.emit_data) * node->params.emit_len;
+		node->params.emit_data = kmalloc(size, GFP_KERNEL);
+		if (!node->params.emit_data) {
+			kfree(node);
+			return -ENOMEM;
+		}
+
+		memcpy(node->params.emit_data, qe->params.emit_data, size);
+	} else {
+		BUG_ON(node->params.emit_len || node->params.emit_data);
 
-	BUG_ON(got_batch != 1);
+		/*
+		 * Verify that the batch buffer itself is included in the object list.
+		 */
+		for (i = 0; i < node->num_objs; i++) {
+			if (node->saved_objects[i].obj == node->params.batch_obj)
+				got_batch++;
+		}
+
+		BUG_ON(got_batch != 1);
+	}
 
 	/* Need to determine the number of incomplete entries in the list as
 	 * that will be the maximum size of the dependency list.
@@ -282,6 +298,7 @@  int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe)
 		node->dep_list = kmalloc(sizeof(node->dep_list[0]) * incomplete,
 					 GFP_KERNEL);
 		if (!node->dep_list) {
+			kfree(node->params.emit_data);
 			kfree(node);
 			return -ENOMEM;
 		}
@@ -297,7 +314,10 @@  int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe)
 				if (I915_SQS_IS_COMPLETE(test))
 					continue;
 
-				found = (node->params.ctx == test->params.ctx);
+				if (node->params.ctx && test->params.ctx)
+					found = (node->params.ctx == test->params.ctx);
+				else
+					found = false;
 
 				for (i = 0; (i < node->num_objs) && !found; i++) {
 					for (j = 0; j < test->num_objs; j++) {
@@ -332,7 +352,8 @@  int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe)
 
 	list_add_tail(&node->link, &scheduler->node_queue[ring->id]);
 
-	i915_scheduler_file_queue_inc(node->params.file);
+	if (node->params.file)
+		i915_scheduler_file_queue_inc(node->params.file);
 
 	if (i915.scheduler_override & i915_so_submit_on_queue)
 		not_flying = true;
@@ -1051,6 +1072,7 @@  int i915_scheduler_remove(struct intel_engine_cs *ring)
 			i915_gem_context_unreference(node->params.ctx);
 
 		/* And anything else owned by the node: */
+		kfree(node->params.emit_data);
 		kfree(node->params.cliprects);
 		kfree(node->dep_list);
 		kfree(node);
@@ -1909,3 +1931,49 @@  int i915_scheduler_handle_IRQ(struct intel_engine_cs *ring)
 }
 
 #endif  /* CONFIG_DRM_I915_SCHEDULER */
+
+int i915_scheduler_queue_nonbatch(struct intel_engine_cs *ring,
+				  uint32_t *data, uint32_t len,
+				  struct drm_i915_gem_object *objs[],
+				  uint32_t num_objs, uint32_t flags)
+{
+	struct i915_scheduler_queue_entry qe;
+	int ret;
+
+	memset(&qe, 0x00, sizeof(qe));
+
+	ret = intel_ring_alloc_seqno(ring);
+	if (ret)
+		return ret;
+
+	qe.params.ring            = ring;
+	qe.params.dev             = ring->dev;
+	qe.params.seqno           = ring->outstanding_lazy_seqno;
+	qe.params.request         = ring->preallocated_lazy_request;
+	qe.params.emit_len        = len;
+	qe.params.emit_data       = data;
+	qe.params.scheduler_flags = flags | i915_ebp_sf_not_a_batch;
+
+#ifdef CONFIG_DRM_I915_SCHEDULER
+{
+	int i;
+
+	qe.num_objs      = num_objs;
+	qe.saved_objects = kmalloc(sizeof(qe.saved_objects[0]) * num_objs, GFP_KERNEL);
+	if (!qe.saved_objects)
+		return -ENOMEM;
+
+	for (i = 0; i < num_objs; i++) {
+		qe.saved_objects[i].obj = objs[i];
+		drm_gem_object_reference(&objs[i]->base);
+	}
+}
+#endif
+
+	ring->outstanding_lazy_seqno    = 0;
+	ring->preallocated_lazy_request = NULL;
+
+	trace_i915_gem_ring_queue(ring, &qe);
+
+	return i915_scheduler_queue_execbuffer(&qe);
+}
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 8d2289f..f2a9243 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -47,6 +47,8 @@  struct i915_execbuffer_params {
 	struct drm_i915_gem_request     *request;
 	uint32_t                        scheduler_index;
 	uint32_t                        scheduler_flags;
+	uint32_t                        *emit_data;
+	uint32_t                        emit_len;
 };
 
 /* Flag bits for i915_execbuffer_params::scheduler_flags */
@@ -55,6 +57,12 @@  enum {
 	i915_ebp_sf_preempt          = (1 << 0),
 	/* Preemption was originally requested */
 	i915_ebp_sf_was_preempt      = (1 << 1),
+
+	/* Non-batch internal driver submissions */
+	i915_ebp_sf_not_a_batch      = (1 << 2),
+
+	/* Payload should be cacheline aligned in ring */
+	i915_ebp_sf_cacheline_align  = (1 << 2),
 };
 
 enum i915_scheduler_queue_status {
@@ -118,6 +126,10 @@  int         i915_scheduler_init(struct drm_device *dev);
 int         i915_scheduler_closefile(struct drm_device *dev,
 				     struct drm_file *file);
 int         i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe);
+int         i915_scheduler_queue_nonbatch(struct intel_engine_cs *ring,
+					  uint32_t *data, uint32_t len,
+					  struct drm_i915_gem_object *objs[],
+					  uint32_t num_objs, uint32_t flags);
 int         i915_scheduler_handle_IRQ(struct intel_engine_cs *ring);
 bool        i915_scheduler_is_idle(struct intel_engine_cs *ring);