@@ -211,6 +211,7 @@ enum {
FAULT_AND_CONTINUE /* Unsupported */
};
#define GEN8_CTX_ID_SHIFT 32
+#define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
static int intel_lr_context_pin(struct intel_engine_cs *ring,
struct intel_context *ctx);
@@ -1077,6 +1078,96 @@ static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
return 0;
}
+static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring)
+{
+ int index;
+ int end;
+ struct page *page;
+ uint32_t *cmd;
+
+ page = i915_gem_object_get_page(ring->wa_ctx.obj, 0);
+ cmd = kmap_atomic(page);
+
+ index = ring->wa_ctx.indctx_batch_offset / sizeof(uint32_t);
+ end = index + (ring->wa_ctx.indctx_batch_size *
+ CACHELINE_BYTES) / sizeof(uint32_t);
+
+ if ((end * sizeof(uint32_t)) > PAGE_SIZE) {
+ DRM_ERROR("context WA instruction exceeding alloted size\n");
+ kunmap_atomic(cmd);
+ return -EINVAL;
+ }
+
+ /* FIXME: fill unused locations with NOOPs.
+ * Replace these instructions with WA
+ */
+ while (index < end)
+ cmd[index++] = MI_NOOP;
+
+ /*
+ * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
+ * execution depends on the length specified in terms of cache lines
+ * in the register CTX_RCS_INDIRECT_CTX
+ */
+
+ kunmap_atomic(cmd);
+
+ return 0;
+}
+
+static int gen8_init_perctx_bb(struct intel_engine_cs *ring)
+{
+ int index;
+ int end;
+ struct page *page;
+ uint32_t *cmd;
+
+ page = i915_gem_object_get_page(ring->wa_ctx.obj, 0);
+ cmd = kmap_atomic(page);
+
+ index = ring->wa_ctx.perctx_batch_offset / sizeof(uint32_t);
+ end = index + (ring->wa_ctx.perctx_batch_size *
+ CACHELINE_BYTES) / sizeof(uint32_t);
+
+ if ((end * sizeof(uint32_t)) > PAGE_SIZE) {
+ DRM_ERROR("context WA instruction exceeding alloted size\n");
+ kunmap_atomic(cmd);
+ return -EINVAL;
+ }
+
+ /* FIXME: fill unused locations with NOOPs.
+ * Replace these instructions with WA
+ */
+ while (index < end)
+ cmd[index++] = MI_NOOP;
+
+ cmd[index - 1] = MI_BATCH_BUFFER_END;
+ kunmap_atomic(cmd);
+
+ return 0;
+}
+
+static int intel_init_workaround_bb(struct intel_engine_cs *ring)
+{
+ int ret;
+
+ if (INTEL_INFO(ring->dev)->gen == 8) {
+ ret = gen8_init_indirectctx_bb(ring);
+ if (ret)
+ return ret;
+
+ ret = gen8_init_perctx_bb(ring);
+ if (ret)
+ return ret;
+ } else {
+ WARN(INTEL_INFO(ring->dev)->gen >= 9,
+ "WA batch buffer is not initialized for Gen%d\n",
+ INTEL_INFO(ring->dev)->gen);
+ }
+
+ return 0;
+}
+
static int gen8_init_common_ring(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
@@ -1382,6 +1473,46 @@ static int gen8_init_rcs_context(struct intel_engine_cs *ring,
return intel_lr_context_render_state_init(ring, ctx);
}
+static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *ring, u32 size)
+{
+ int ret;
+ struct drm_device *dev = ring->dev;
+
+ WARN_ON(ring->id != RCS);
+
+ size = roundup(size, PAGE_SIZE);
+ ring->wa_ctx.obj = i915_gem_alloc_object(dev, size);
+ if (!ring->wa_ctx.obj) {
+ DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n");
+ return -ENOMEM;
+ }
+
+ ret = i915_gem_obj_ggtt_pin(ring->wa_ctx.obj, GEN8_LR_CONTEXT_ALIGN, 0);
+ if (ret) {
+ DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
+ ret);
+ drm_gem_object_unreference(&ring->wa_ctx.obj->base);
+ return ret;
+ }
+
+ ring->wa_ctx.indctx_batch_offset = 0;
+ ring->wa_ctx.indctx_batch_size = 4; /* in cache lines */
+ ring->wa_ctx.perctx_batch_offset =
+ ring->wa_ctx.indctx_batch_size * CACHELINE_BYTES;
+ ring->wa_ctx.perctx_batch_size = 2;
+
+ return 0;
+}
+
+static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *ring)
+{
+ WARN_ON(ring->id != RCS);
+
+ i915_gem_object_ggtt_unpin(ring->wa_ctx.obj);
+ drm_gem_object_unreference(&ring->wa_ctx.obj->base);
+ ring->wa_ctx.obj = NULL;
+}
+
/**
* intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
*
@@ -1470,11 +1601,37 @@ static int logical_render_ring_init(struct drm_device *dev)
ring->emit_bb_start = gen8_emit_bb_start;
ring->dev = dev;
+
+ if (INTEL_INFO(ring->dev)->gen >= 8) {
+ ret = lrc_setup_wa_ctx_obj(ring, PAGE_SIZE);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n",
+ ret);
+ return ret;
+ }
+
+ ret = intel_init_workaround_bb(ring);
+ if (ret) {
+ lrc_destroy_wa_ctx_obj(ring);
+ DRM_ERROR("WA batch buffers are not initialized: %d\n",
+ ret);
+ }
+ }
+
ret = logical_ring_init(dev, ring);
if (ret)
- return ret;
+ goto clear_wa_ctx;
+
+ ret = intel_init_pipe_control(ring);
+ if (ret)
+ goto clear_wa_ctx;
+
+ return 0;
- return intel_init_pipe_control(ring);
+clear_wa_ctx:
+ if (ring->wa_ctx.obj)
+ lrc_destroy_wa_ctx_obj(ring);
+ return ret;
}
static int logical_bsd_ring_init(struct drm_device *dev)
@@ -1754,15 +1911,25 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
reg_state[CTX_SECOND_BB_STATE+1] = 0;
if (ring->id == RCS) {
- /* TODO: according to BSpec, the register state context
- * for CHV does not have these. OTOH, these registers do
- * exist in CHV. I'm waiting for a clarification */
reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
+ if (ring->wa_ctx.obj) {
+ reg_state[CTX_RCS_INDIRECT_CTX+1] =
+ (i915_gem_obj_ggtt_offset(ring->wa_ctx.obj) +
+ ring->wa_ctx.indctx_batch_offset) |
+ ring->wa_ctx.indctx_batch_size;
+
+ reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] =
+ CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT << 6;
+
+ reg_state[CTX_BB_PER_CTX_PTR+1] =
+ (i915_gem_obj_ggtt_offset(ring->wa_ctx.obj) +
+ ring->wa_ctx.perctx_batch_offset) | 0x01;
+ }
}
reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
@@ -119,6 +119,14 @@ struct intel_ringbuffer {
struct intel_context;
+struct i915_ctx_workarounds {
+ u32 indctx_batch_offset;
+ u32 indctx_batch_size;
+ u32 perctx_batch_offset;
+ u32 perctx_batch_size;
+ struct drm_i915_gem_object *obj;
+};
+
struct intel_engine_cs {
const char *name;
enum intel_ring_id {
@@ -142,6 +150,7 @@ struct intel_engine_cs {
struct i915_gem_batch_pool batch_pool;
struct intel_hw_status_page status_page;
+ struct i915_ctx_workarounds wa_ctx;
unsigned irq_refcount; /* protected by dev_priv->irq_lock */
u32 irq_enable_mask; /* bitmask to enable ring interrupt */