Message ID | 1434482725-21823-2-git-send-email-arun.siluvery@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Jun 16, 2015 at 08:25:20PM +0100, Arun Siluvery wrote: > +static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring, > + uint32_t offset, > + uint32_t *num_dwords) > +{ > + uint32_t index; > + struct page *page; > + uint32_t *cmd; > + > + page = i915_gem_object_get_page(ring->wa_ctx.obj, 0); > + cmd = kmap_atomic(page); > + > + index = offset; > + > + /* FIXME: fill one cacheline with NOOPs. > + * Replace these instructions with WA > + */ > + while (index < (offset + 16)) > + cmd[index++] = MI_NOOP; > + > + /* > + * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because > + * execution depends on the length specified in terms of cache lines > + * in the register CTX_RCS_INDIRECT_CTX > + */ > + > + kunmap_atomic(cmd); > + > + if (index > (PAGE_SIZE / sizeof(uint32_t))) > + return -EINVAL; Check before you GPF! You just overran the buffer and corrupted memory, if you didn't succeed in trapping a segfault. To be generic, align to the cacheline then check you have enough room for your own data. -Chris
On Tue, Jun 16, 2015 at 08:25:20PM +0100, Arun Siluvery wrote: > +static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *ring, u32 size) > +{ > + int ret; > + struct drm_device *dev = ring->dev; You only use it once, keeping it as a local seems counter-intuitive. > + WARN_ON(ring->id != RCS); > + > + size = roundup(size, PAGE_SIZE); Out of curiousity is gcc smart enough to turn this into an ALIGN()? > + ring->wa_ctx.obj = i915_gem_alloc_object(dev, size); > + if (!ring->wa_ctx.obj) { > + DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n"); > + return -ENOMEM; > + } > + > + ret = i915_gem_obj_ggtt_pin(ring->wa_ctx.obj, GEN8_LR_CONTEXT_ALIGN, 0); Strange choice of alignment since we pass around cacheline offsets. > + if (ret) { > + DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n", > + ret); > + drm_gem_object_unreference(&ring->wa_ctx.obj->base); > + return ret; > + } > + > + return 0; > +} > + > +static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *ring) > +{ > + WARN_ON(ring->id != RCS); > + > + i915_gem_object_ggtt_unpin(ring->wa_ctx.obj); > + drm_gem_object_unreference(&ring->wa_ctx.obj->base); > + ring->wa_ctx.obj = NULL; > +} > + > /** > * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer > * > @@ -1474,7 +1612,29 @@ static int logical_render_ring_init(struct drm_device *dev) > if (ret) > return ret; > > - return intel_init_pipe_control(ring); > + if (INTEL_INFO(ring->dev)->gen >= 8) { > + ret = lrc_setup_wa_ctx_obj(ring, PAGE_SIZE); > + if (ret) { > + DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", > + ret); > + return ret; > + } > + > + ret = intel_init_workaround_bb(ring); > + if (ret) { > + lrc_destroy_wa_ctx_obj(ring); > + DRM_ERROR("WA batch buffers are not initialized: %d\n", > + ret); > + } > + } > + > + ret = intel_init_pipe_control(ring); Did you consider stuffing it into the spare are of the pipe control scatch bo? :) -Chris
On 16/06/2015 21:33, Chris Wilson wrote: > On Tue, Jun 16, 2015 at 08:25:20PM +0100, Arun Siluvery wrote: >> +static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *ring, u32 size) >> +{ >> + int ret; >> + struct drm_device *dev = ring->dev; > > You only use it once, keeping it as a local seems counter-intuitive. > >> + WARN_ON(ring->id != RCS); >> + >> + size = roundup(size, PAGE_SIZE); > > Out of curiousity is gcc smart enough to turn this into an ALIGN()? replaced with PAGE_ALIGN(size) > >> + ring->wa_ctx.obj = i915_gem_alloc_object(dev, size); >> + if (!ring->wa_ctx.obj) { >> + DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n"); >> + return -ENOMEM; >> + } >> + >> + ret = i915_gem_obj_ggtt_pin(ring->wa_ctx.obj, GEN8_LR_CONTEXT_ALIGN, 0); > > Strange choice of alignment since we pass around cacheline offsets. > this is from the initial version where it was part of context, sorry missed this, replaced with PAGE_SIZE. >> + if (ret) { >> + DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n", >> + ret); >> + drm_gem_object_unreference(&ring->wa_ctx.obj->base); >> + return ret; >> + } >> + >> + return 0; >> +} >> + >> +static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *ring) >> +{ >> + WARN_ON(ring->id != RCS); >> + >> + i915_gem_object_ggtt_unpin(ring->wa_ctx.obj); >> + drm_gem_object_unreference(&ring->wa_ctx.obj->base); >> + ring->wa_ctx.obj = NULL; >> +} >> + >> /** >> * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer >> * >> @@ -1474,7 +1612,29 @@ static int logical_render_ring_init(struct drm_device *dev) >> if (ret) >> return ret; >> >> - return intel_init_pipe_control(ring); >> + if (INTEL_INFO(ring->dev)->gen >= 8) { >> + ret = lrc_setup_wa_ctx_obj(ring, PAGE_SIZE); >> + if (ret) { >> + DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", >> + ret); >> + return ret; >> + } >> + >> + ret = intel_init_workaround_bb(ring); >> + if (ret) { >> + lrc_destroy_wa_ctx_obj(ring); >> + DRM_ERROR("WA batch buffers are not initialized: %d\n", >> + ret); >> + } >> + } >> + >> + ret = intel_init_pipe_control(ring); > > Did you consider stuffing it into the spare are of the pipe control > scatch bo? :) Not exactly but I think it is better to keep them separate. It is not that a single page is not sufficient even if we add more WA in future but for logical reasons. In case if there is any error while initializing these WA we are destroying the page and continuing further which cannot be done with scratch page. regards Arun > -Chris >
On 16/06/2015 21:25, Chris Wilson wrote: > On Tue, Jun 16, 2015 at 08:25:20PM +0100, Arun Siluvery wrote: >> +static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring, >> + uint32_t offset, >> + uint32_t *num_dwords) >> +{ >> + uint32_t index; >> + struct page *page; >> + uint32_t *cmd; >> + >> + page = i915_gem_object_get_page(ring->wa_ctx.obj, 0); >> + cmd = kmap_atomic(page); >> + >> + index = offset; >> + >> + /* FIXME: fill one cacheline with NOOPs. >> + * Replace these instructions with WA >> + */ >> + while (index < (offset + 16)) >> + cmd[index++] = MI_NOOP; >> + >> + /* >> + * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because >> + * execution depends on the length specified in terms of cache lines >> + * in the register CTX_RCS_INDIRECT_CTX >> + */ >> + >> + kunmap_atomic(cmd); >> + >> + if (index > (PAGE_SIZE / sizeof(uint32_t))) >> + return -EINVAL; > > Check before you GPF! > > You just overran the buffer and corrupted memory, if you didn't succeed > in trapping a segfault. > > To be generic, align to the cacheline then check you have enough room > for your own data. > -Chris > Hi Chris, The placement of condition is not correct. I don't completely follow your suggestion, could you please elaborate; here we don't know upfront how much more data to be written. I have made below changes to check after writing every command and return error as soon as we reach the end. #define wa_ctx_emit(batch, cmd) { \ if (WARN_ON(index >= (PAGE_SIZE / sizeof(uint32_t)))) { \ kunmap_atomic(batch); \ return -ENOSPC; \ } \ batch[index++] = (cmd); \ } is this acceptable? I think this is the only one issue, all other comments are addressed. regards Arun
On 17/06/2015 19:48, Siluvery, Arun wrote: > On 16/06/2015 21:25, Chris Wilson wrote: >> On Tue, Jun 16, 2015 at 08:25:20PM +0100, Arun Siluvery wrote: >>> +static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring, >>> + uint32_t offset, >>> + uint32_t *num_dwords) >>> +{ >>> + uint32_t index; >>> + struct page *page; >>> + uint32_t *cmd; >>> + >>> + page = i915_gem_object_get_page(ring->wa_ctx.obj, 0); >>> + cmd = kmap_atomic(page); >>> + >>> + index = offset; >>> + >>> + /* FIXME: fill one cacheline with NOOPs. >>> + * Replace these instructions with WA >>> + */ >>> + while (index < (offset + 16)) >>> + cmd[index++] = MI_NOOP; >>> + >>> + /* >>> + * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because >>> + * execution depends on the length specified in terms of cache lines >>> + * in the register CTX_RCS_INDIRECT_CTX >>> + */ >>> + >>> + kunmap_atomic(cmd); >>> + >>> + if (index > (PAGE_SIZE / sizeof(uint32_t))) >>> + return -EINVAL; >> >> Check before you GPF! >> >> You just overran the buffer and corrupted memory, if you didn't succeed >> in trapping a segfault. >> >> To be generic, align to the cacheline then check you have enough room >> for your own data. >> -Chris >> > Hi Chris, > > The placement of condition is not correct. I don't completely follow > your suggestion, could you please elaborate; here we don't know upfront > how much more data to be written. > I have made below changes to check after writing every command and > return error as soon as we reach the end. > > #define wa_ctx_emit(batch, cmd) { \ > if (WARN_ON(index >= (PAGE_SIZE / sizeof(uint32_t)))) { \ > kunmap_atomic(batch); \ > return -ENOSPC; \ > } \ > batch[index++] = (cmd); \ > } > is this acceptable? > I think this is the only one issue, all other comments are addressed. > one other improvement is possible - mapping/unmapping page can be kept in common path, will update the patch accordingly. regards Arun > regards > Arun > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx >
On Wed, Jun 17, 2015 at 07:48:16PM +0100, Siluvery, Arun wrote: > On 16/06/2015 21:25, Chris Wilson wrote: > >On Tue, Jun 16, 2015 at 08:25:20PM +0100, Arun Siluvery wrote: > >>+static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring, > >>+ uint32_t offset, > >>+ uint32_t *num_dwords) > >>+{ > >>+ uint32_t index; > >>+ struct page *page; > >>+ uint32_t *cmd; > >>+ > >>+ page = i915_gem_object_get_page(ring->wa_ctx.obj, 0); > >>+ cmd = kmap_atomic(page); > >>+ > >>+ index = offset; > >>+ > >>+ /* FIXME: fill one cacheline with NOOPs. > >>+ * Replace these instructions with WA > >>+ */ > >>+ while (index < (offset + 16)) > >>+ cmd[index++] = MI_NOOP; > >>+ > >>+ /* > >>+ * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because > >>+ * execution depends on the length specified in terms of cache lines > >>+ * in the register CTX_RCS_INDIRECT_CTX > >>+ */ > >>+ > >>+ kunmap_atomic(cmd); > >>+ > >>+ if (index > (PAGE_SIZE / sizeof(uint32_t))) > >>+ return -EINVAL; > > > >Check before you GPF! > > > >You just overran the buffer and corrupted memory, if you didn't succeed > >in trapping a segfault. > > > >To be generic, align to the cacheline then check you have enough room > >for your own data. > >-Chris > > > Hi Chris, > > The placement of condition is not correct. I don't completely follow > your suggestion, could you please elaborate; here we don't know > upfront how much more data to be written. Hmm, are we anticipating an unbounded number of workarounds? At some point you have to have a rough upper bound in order to do the bo allocation. If we are really unsure, then we do need to split this into two passes, one to count the number of dwords and the second to allocate and actually fill the cmd[]. > I have made below changes to check after writing every command and > return error as soon as we reach the end. > > #define wa_ctx_emit(batch, cmd) { \ > if (WARN_ON(index >= (PAGE_SIZE / sizeof(uint32_t)))) { \ > kunmap_atomic(batch); \ > return -ENOSPC; \ > } \ > batch[index++] = (cmd); \ > } > is this acceptable? > I think this is the only one issue, all other comments are addressed. It's the lesser of evils for sure. Still feel dubious that we don't know upfront how much data we need to allocate. -Chris
On 17/06/2015 21:21, Chris Wilson wrote: > On Wed, Jun 17, 2015 at 07:48:16PM +0100, Siluvery, Arun wrote: >> On 16/06/2015 21:25, Chris Wilson wrote: >>> On Tue, Jun 16, 2015 at 08:25:20PM +0100, Arun Siluvery wrote: >>>> +static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring, >>>> + uint32_t offset, >>>> + uint32_t *num_dwords) >>>> +{ >>>> + uint32_t index; >>>> + struct page *page; >>>> + uint32_t *cmd; >>>> + >>>> + page = i915_gem_object_get_page(ring->wa_ctx.obj, 0); >>>> + cmd = kmap_atomic(page); >>>> + >>>> + index = offset; >>>> + >>>> + /* FIXME: fill one cacheline with NOOPs. >>>> + * Replace these instructions with WA >>>> + */ >>>> + while (index < (offset + 16)) >>>> + cmd[index++] = MI_NOOP; >>>> + >>>> + /* >>>> + * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because >>>> + * execution depends on the length specified in terms of cache lines >>>> + * in the register CTX_RCS_INDIRECT_CTX >>>> + */ >>>> + >>>> + kunmap_atomic(cmd); >>>> + >>>> + if (index > (PAGE_SIZE / sizeof(uint32_t))) >>>> + return -EINVAL; >>> >>> Check before you GPF! >>> >>> You just overran the buffer and corrupted memory, if you didn't succeed >>> in trapping a segfault. >>> >>> To be generic, align to the cacheline then check you have enough room >>> for your own data. >>> -Chris >>> >> Hi Chris, >> >> The placement of condition is not correct. I don't completely follow >> your suggestion, could you please elaborate; here we don't know >> upfront how much more data to be written. > > Hmm, are we anticipating an unbounded number of workarounds? At some > point you have to have a rough upper bound in order to do the bo > allocation. If we are really unsure, then we do need to split this into > two passes, one to count the number of dwords and the second to allocate > and actually fill the cmd[]. > Since we have a full page dedicated for this, that should be sufficient for good number of WA; if we need more than one page means we have major issues. The list for Gen8 is small, same for Gen9 also, maybe few more gets added going forward but not close to filling entire page. Some of them will even be restricted to specific steppings/revisions. For these reasons I think a single page setup is sufficient. Do you anticipate any other use cases that require allocating more than one page? Two pass approach can be implemented but it adds unnecessary complexity which may not be required in this case. please let me know your thoughts. >> I have made below changes to check after writing every command and >> return error as soon as we reach the end. >> >> #define wa_ctx_emit(batch, cmd) { \ >> if (WARN_ON(index >= (PAGE_SIZE / sizeof(uint32_t)))) { \ >> kunmap_atomic(batch); \ >> return -ENOSPC; \ >> } \ >> batch[index++] = (cmd); \ >> } >> is this acceptable? >> I think this is the only one issue, all other comments are addressed. > > It's the lesser of evils for sure. Still feel dubious that we don't know > upfront how much data we need to allocate. yes, but with single pass approach do you see any way it can be improved? regards Arun > -Chris >
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0413b8f..cad274a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -211,6 +211,7 @@ enum { FAULT_AND_CONTINUE /* Unsupported */ }; #define GEN8_CTX_ID_SHIFT 32 +#define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 static int intel_lr_context_pin(struct intel_engine_cs *ring, struct intel_context *ctx); @@ -1077,6 +1078,109 @@ static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring, return 0; } +static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring, + uint32_t offset, + uint32_t *num_dwords) +{ + uint32_t index; + struct page *page; + uint32_t *cmd; + + page = i915_gem_object_get_page(ring->wa_ctx.obj, 0); + cmd = kmap_atomic(page); + + index = offset; + + /* FIXME: fill one cacheline with NOOPs. + * Replace these instructions with WA + */ + while (index < (offset + 16)) + cmd[index++] = MI_NOOP; + + /* + * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because + * execution depends on the length specified in terms of cache lines + * in the register CTX_RCS_INDIRECT_CTX + */ + + kunmap_atomic(cmd); + + if (index > (PAGE_SIZE / sizeof(uint32_t))) + return -EINVAL; + + *num_dwords = index - offset; + + return 0; +} + +static int gen8_init_perctx_bb(struct intel_engine_cs *ring, + uint32_t offset, + uint32_t *num_dwords) +{ + uint32_t index; + struct page *page; + uint32_t *cmd; + + page = i915_gem_object_get_page(ring->wa_ctx.obj, 0); + cmd = kmap_atomic(page); + + index = offset; + + /* FIXME: fill one cacheline with NOOPs. + * Replace these instructions with WA + */ + while (index < (offset + 16)) + cmd[index++] = MI_NOOP; + + cmd[index - 1] = MI_BATCH_BUFFER_END; + + kunmap_atomic(cmd); + + if (index > (PAGE_SIZE / sizeof(uint32_t))) + return -EINVAL; + + *num_dwords = index - offset; + + return 0; +} + +static int intel_init_workaround_bb(struct intel_engine_cs *ring) +{ + int ret; + uint32_t num_dwords; + struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx; + + if (ring->scratch.obj == NULL) { + DRM_ERROR("scratch page not allocated for %s\n", ring->name); + return -EINVAL; + } + + if (INTEL_INFO(ring->dev)->gen == 8) { + wa_ctx->indctx_batch_offset = 0; + + ret = gen8_init_indirectctx_bb(ring, + wa_ctx->indctx_batch_offset, + &num_dwords); + if (ret) + return ret; + + wa_ctx->indctx_batch_size = round_up(num_dwords, CACHELINE_DWORDS); + wa_ctx->perctx_batch_offset = wa_ctx->indctx_batch_size; + + ret = gen8_init_perctx_bb(ring, + wa_ctx->perctx_batch_offset, + &num_dwords); + if (ret) + return ret; + } else { + WARN(INTEL_INFO(ring->dev)->gen >= 8, + "WA batch buffer is not initialized for Gen%d\n", + INTEL_INFO(ring->dev)->gen); + } + + return 0; +} + static int gen8_init_common_ring(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; @@ -1382,6 +1486,40 @@ static int gen8_init_rcs_context(struct intel_engine_cs *ring, return intel_lr_context_render_state_init(ring, ctx); } +static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *ring, u32 size) +{ + int ret; + struct drm_device *dev = ring->dev; + + WARN_ON(ring->id != RCS); + + size = roundup(size, PAGE_SIZE); + ring->wa_ctx.obj = i915_gem_alloc_object(dev, size); + if (!ring->wa_ctx.obj) { + DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n"); + return -ENOMEM; + } + + ret = i915_gem_obj_ggtt_pin(ring->wa_ctx.obj, GEN8_LR_CONTEXT_ALIGN, 0); + if (ret) { + DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n", + ret); + drm_gem_object_unreference(&ring->wa_ctx.obj->base); + return ret; + } + + return 0; +} + +static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *ring) +{ + WARN_ON(ring->id != RCS); + + i915_gem_object_ggtt_unpin(ring->wa_ctx.obj); + drm_gem_object_unreference(&ring->wa_ctx.obj->base); + ring->wa_ctx.obj = NULL; +} + /** * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer * @@ -1474,7 +1612,29 @@ static int logical_render_ring_init(struct drm_device *dev) if (ret) return ret; - return intel_init_pipe_control(ring); + if (INTEL_INFO(ring->dev)->gen >= 8) { + ret = lrc_setup_wa_ctx_obj(ring, PAGE_SIZE); + if (ret) { + DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", + ret); + return ret; + } + + ret = intel_init_workaround_bb(ring); + if (ret) { + lrc_destroy_wa_ctx_obj(ring); + DRM_ERROR("WA batch buffers are not initialized: %d\n", + ret); + } + } + + ret = intel_init_pipe_control(ring); + if (ret) { + if (ring->wa_ctx.obj) + lrc_destroy_wa_ctx_obj(ring); + } + + return ret; } static int logical_bsd_ring_init(struct drm_device *dev) @@ -1754,15 +1914,26 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118; reg_state[CTX_SECOND_BB_STATE+1] = 0; if (ring->id == RCS) { - /* TODO: according to BSpec, the register state context - * for CHV does not have these. OTOH, these registers do - * exist in CHV. I'm waiting for a clarification */ reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0; reg_state[CTX_BB_PER_CTX_PTR+1] = 0; reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4; reg_state[CTX_RCS_INDIRECT_CTX+1] = 0; reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8; reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0; + if (ring->wa_ctx.obj) { + reg_state[CTX_RCS_INDIRECT_CTX+1] = + (i915_gem_obj_ggtt_offset(ring->wa_ctx.obj) + + ring->wa_ctx.indctx_batch_offset * sizeof(uint32_t)) | + (ring->wa_ctx.indctx_batch_size / CACHELINE_DWORDS); + + reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = + CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT << 6; + + reg_state[CTX_BB_PER_CTX_PTR+1] = + (i915_gem_obj_ggtt_offset(ring->wa_ctx.obj) + + ring->wa_ctx.perctx_batch_offset * sizeof(uint32_t)) | + 0x01; + } } reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9); reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 39f6dfc..1f38af3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -12,6 +12,7 @@ * workarounds! */ #define CACHELINE_BYTES 64 +#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t)) /* * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use" @@ -119,6 +120,22 @@ struct intel_ringbuffer { struct intel_context; +/* + * we use a single page to load ctx workarounds so all of these + * values are referred in terms of dwords + * + * offset field - helpful in case if we want to have multiple batches + * at different offsets based on some conditions. It is not a requirement + * at the moment but provides an option for future use. + * indctx_batch_size - HW expects this value in terms of cachelines + */ +struct i915_ctx_workarounds { + u32 indctx_batch_offset; + u32 indctx_batch_size; + u32 perctx_batch_offset; + struct drm_i915_gem_object *obj; +}; + struct intel_engine_cs { const char *name; enum intel_ring_id { @@ -142,6 +159,7 @@ struct intel_engine_cs { struct i915_gem_batch_pool batch_pool; struct intel_hw_status_page status_page; + struct i915_ctx_workarounds wa_ctx; unsigned irq_refcount; /* protected by dev_priv->irq_lock */ u32 irq_enable_mask; /* bitmask to enable ring interrupt */