@@ -1957,7 +1957,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
return;
}
- page = i915_gem_object_get_page(ctx_obj, 1);
+ page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
if (!WARN_ON(page == NULL)) {
reg_state = kmap_atomic(page);
@@ -348,16 +348,52 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
{
struct guc_context_desc desc;
struct sg_table *sg;
+ int i;
memset(&desc, 0, sizeof(desc));
desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL;
desc.context_id = client->ctx_index;
desc.priority = client->priority;
- desc.engines_used = (1 << RCS) | (1 << VCS) | (1 << BCS) |
- (1 << VECS) | (1 << VCS2); /* all engines */
desc.db_id = client->doorbell_id;
+ for (i = 0; i < I915_NUM_RINGS; i++) {
+ struct guc_execlist_context *lrc = &desc.lrc[i];
+ struct intel_engine_cs *ring;
+ struct drm_i915_gem_object *obj;
+
+ /* TODO: We have a design issue to be solved here. Only when we
+ * receive the first batch, we know which engine is used by the
+ * user. But here GuC expects the lrc and ring to be pinned. It
+ * is not an issue for default context, which is the only one
+ * for now who owns a GuC client. But for future owner of GuC
+ * client, need to make sure lrc is pinned prior to enter here.
+ */
+ obj = client->owner->engine[i].state;
+ if (!obj)
+ break;
+
+ ring = client->owner->engine[i].ringbuf->ring;
+
+ lrc->context_desc = (u32)intel_lr_context_descriptor(ring, obj);
+ /* The state page is after PPHWSP */
+ lrc->ring_lcra = i915_gem_obj_ggtt_offset(obj) +
+ LRC_STATE_PN * PAGE_SIZE;
+ lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
+ (ring->id << GUC_ELC_ENGINE_OFFSET);
+
+ obj = client->owner->engine[i].ringbuf->obj;
+
+ lrc->ring_begin = i915_gem_obj_ggtt_offset(obj);
+ lrc->ring_end = lrc->ring_begin + obj->base.size - 1;
+ lrc->ring_next_free_location = lrc->ring_begin;
+ lrc->ring_current_tail_pointer_value = 0;
+
+ desc.engines_used |= (1 << ring->id);
+ }
+
+ WARN_ON(desc.engines_used == 0);
+
/*
* The CPU address is only needed at certain points, so kmap_atomic on
* demand instead of storing it in the ctx descriptor.
@@ -634,11 +670,13 @@ static void guc_client_free(struct drm_device *dev,
* The kernel client to replace ExecList submission is created with
* NORMAL priority. Priority of a client for scheduler can be HIGH,
* while a preemption context can use CRITICAL.
+ * @ctx the context to own the client (we use the default render context)
*
* Return: An i915_guc_client object if success.
*/
static struct i915_guc_client *guc_client_alloc(struct drm_device *dev,
- uint32_t priority)
+ uint32_t priority,
+ struct intel_context *ctx)
{
struct i915_guc_client *client;
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -651,6 +689,7 @@ static struct i915_guc_client *guc_client_alloc(struct drm_device *dev,
client->doorbell_id = GUC_INVALID_DOORBELL_ID;
client->priority = priority;
+ client->owner = ctx;
client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0,
GUC_MAX_GPU_CONTEXTS, GFP_KERNEL);
@@ -784,10 +823,11 @@ int i915_guc_submission_enable(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_guc *guc = &dev_priv->guc;
+ struct intel_context *ctx = dev_priv->ring[RCS].default_context;
struct i915_guc_client *client;
/* client for execbuf submission */
- client = guc_client_alloc(dev, GUC_CTX_PRIORITY_NORMAL);
+ client = guc_client_alloc(dev, GUC_CTX_PRIORITY_NORMAL, ctx);
if (!client) {
DRM_ERROR("Failed to create execbuf guc_client\n");
return -ENOMEM;
@@ -30,6 +30,7 @@
struct i915_guc_client {
struct drm_i915_gem_object *client_obj;
+ struct intel_context *owner;
uint32_t priority;
uint32_t ctx_index;
@@ -257,7 +257,8 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists
*/
u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
{
- u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj);
+ u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
+ LRC_PPHWSP_PN * PAGE_SIZE;
/* LRCA is required to be 4K aligned so the more significant 20 bits
* are globally unique */
@@ -269,7 +270,8 @@ uint64_t intel_lr_context_descriptor(struct intel_engine_cs *ring,
{
struct drm_device *dev = ring->dev;
uint64_t desc;
- uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
+ uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
+ LRC_PPHWSP_PN * PAGE_SIZE;
WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
@@ -339,7 +341,7 @@ void intel_lr_context_update(struct drm_i915_gem_object *ctx_obj,
struct page *page;
uint32_t *reg_state;
- page = i915_gem_object_get_page(ctx_obj, 1);
+ page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
reg_state = kmap_atomic(page);
reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
@@ -698,13 +700,17 @@ static void
intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
{
struct intel_engine_cs *ring = request->ring;
+ struct drm_i915_private *dev_priv = request->i915;
intel_logical_ring_advance(request->ringbuf);
if (intel_ring_stopped(ring))
return;
- execlists_context_queue(request);
+ if (dev_priv->guc.execbuf_client)
+ i915_guc_submit(dev_priv->guc.execbuf_client, request->ctx, ring);
+ else
+ execlists_context_queue(request);
}
static int logical_ring_wrap_buffer(struct drm_i915_gem_request *req)
@@ -999,18 +1005,23 @@ static int intel_lr_context_pin(struct intel_engine_cs *ring,
{
struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
int ret = 0;
WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
if (ctx->engine[ring->id].pin_count++ == 0) {
- ret = i915_gem_obj_ggtt_pin(ctx_obj,
- GEN8_LR_CONTEXT_ALIGN, 0);
+ ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
+ PIN_OFFSET_BIAS | GUC_WOPCM_SIZE_VALUE);
if (ret)
goto reset_pin_count;
ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
if (ret)
goto unpin_ctx_obj;
+
+ /* Invalidate GuC TLB. */
+ if (i915.enable_guc_submission)
+ I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
}
return ret;
@@ -1647,8 +1658,13 @@ out:
static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
{
+ struct drm_i915_private *dev_priv = req->i915;
int ret;
+ /* Invalidate GuC TLB. */
+ if (i915.enable_guc_submission)
+ I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+
ret = intel_logical_ring_workarounds_emit(req);
if (ret)
return ret;
@@ -2005,7 +2021,7 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
/* The second page of the context object contains some fields which must
* be set up prior to the first execution. */
- page = i915_gem_object_get_page(ctx_obj, 1);
+ page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
reg_state = kmap_atomic(page);
/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
@@ -2163,12 +2179,14 @@ static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
struct drm_i915_gem_object *default_ctx_obj)
{
struct drm_i915_private *dev_priv = ring->dev->dev_private;
+ struct page *page;
/* The status page is offset 0 from the default context object
* in LRC mode. */
- ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj);
- ring->status_page.page_addr =
- kmap(sg_page(default_ctx_obj->pages->sgl));
+ ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj)
+ + LRC_PPHWSP_PN * PAGE_SIZE;
+ page = i915_gem_object_get_page(default_ctx_obj, LRC_PPHWSP_PN);
+ ring->status_page.page_addr = kmap(page);
ring->status_page.obj = default_ctx_obj;
I915_WRITE(RING_HWS_PGA(ring->mmio_base),
@@ -2204,6 +2222,9 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
context_size = round_up(get_lr_context_size(ring), 4096);
+ /* One extra page as the sharing data between driver and GuC */
+ context_size += PAGE_SIZE * LRC_PPHWSP_PN;
+
ctx_obj = i915_gem_alloc_object(dev, context_size);
if (!ctx_obj) {
DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n");
@@ -2211,7 +2232,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
}
if (is_global_default_ctx) {
- ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
+ ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
+ PIN_OFFSET_BIAS | GUC_WOPCM_SIZE_VALUE);
if (ret) {
DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n",
ret);
@@ -2230,7 +2252,7 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
ringbuf->ring = ring;
- ringbuf->size = 32 * PAGE_SIZE;
+ ringbuf->size = 4 * PAGE_SIZE;
ringbuf->effective_size = ringbuf->size;
ringbuf->head = 0;
ringbuf->tail = 0;
@@ -2330,7 +2352,7 @@ void intel_lr_context_reset(struct drm_device *dev,
WARN(1, "Failed get_pages for context obj\n");
continue;
}
- page = i915_gem_object_get_page(ctx_obj, 1);
+ page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
reg_state = kmap_atomic(page);
reg_state[CTX_RING_HEAD+1] = 0;
@@ -66,6 +66,12 @@ static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
}
/* Logical Ring Contexts */
+
+/* One extra page is added before LRC for GuC as shared data */
+#define LRC_GUCSHR_PN (0)
+#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + 1)
+#define LRC_STATE_PN (LRC_PPHWSP_PN + 1)
+
void intel_lr_context_free(struct intel_context *ctx);
int intel_lr_context_deferred_create(struct intel_context *ctx,
struct intel_engine_cs *ring);