Message ID | 1399405683-27409-2-git-send-email-abdiel.janulgue@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, May 06, 2014 at 10:48:02PM +0300, Abdiel Janulgue wrote: > Use on-chip hw-binding table generator to generate binding > tables when the test emits SURFACE_STATES packets. The hw generates > these binding table packets internally so we don't have to > allocate space on the batchbuffer. > > Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com> > --- > lib/gen8_render.h | 13 +++++++ > lib/intel_batchbuffer.c | 12 ++++++ > lib/intel_batchbuffer.h | 3 ++ > lib/intel_reg.h | 3 ++ > lib/rendercopy_gen8.c | 97 ++++++++++++++++++++++++++++++++++++++++++++--- > 5 files changed, 123 insertions(+), 5 deletions(-) > > diff --git a/lib/gen8_render.h b/lib/gen8_render.h > index fffc100..4d5f7ba 100644 > --- a/lib/gen8_render.h > +++ b/lib/gen8_render.h > @@ -83,6 +83,19 @@ > /* Random shifts */ > #define GEN8_3DSTATE_PS_MAX_THREADS_SHIFT 23 > > +/* GEN8+ resource streamer */ > +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC (0x7919 << 16) /* GEN8 */ > +# define BINDING_TABLE_POOL_ENABLE 0x0800 > +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS (0x7843 << 16) /* GEN8 */ > +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS (0x7844 << 16) /* GEN8 */ > +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS (0x7845 << 16) /* GEN8 */ > +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS (0x7846 << 16) /* GEN8 */ > +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS (0x7847 << 16) /* GEN8 */ > + > +/* Toggling RS needs PIPE_CONTROL SC invalidate */ > +#define _3DSTATE_PIPE_CONTROL ((0x3 << 29) | (3 << 27) | (2 << 24)) > +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) > + > /* Shamelessly ripped from mesa */ > struct gen8_surface_state > { > diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c > index e868922..26c9b89 100644 > --- a/lib/intel_batchbuffer.c > +++ b/lib/intel_batchbuffer.c > @@ -81,6 +81,14 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) > memset(batch->buffer, 0, sizeof(batch->buffer)); > > batch->ptr = batch->buffer; > + > + if (batch->hw_bt_pool_bo != NULL) { > + drm_intel_bo_unreference(batch->hw_bt_pool_bo); > + batch->hw_bt_pool_bo = NULL; > + } > + > + batch->hw_bt_pool_bo = drm_intel_bo_alloc(batch->bufmgr, "hw_bt", > + 131072, 4096); > } > > /** > @@ -116,6 +124,10 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch) > { > drm_intel_bo_unreference(batch->bo); > batch->bo = NULL; > + > + drm_intel_bo_unreference(batch->hw_bt_pool_bo); > + batch->hw_bt_pool_bo = NULL; > + > free(batch); > } > > diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h > index 49dbcf0..2a516bc 100644 > --- a/lib/intel_batchbuffer.h > +++ b/lib/intel_batchbuffer.h > @@ -18,6 +18,9 @@ struct intel_batchbuffer { > uint8_t buffer[BATCH_SZ]; > uint8_t *ptr; > uint8_t *state; > + > + drm_intel_bo *hw_bt_pool_bo; > + int use_resource_streamer; > }; > > struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr, > diff --git a/lib/intel_reg.h b/lib/intel_reg.h > index 4b3a102..1cb9a29 100644 > --- a/lib/intel_reg.h > +++ b/lib/intel_reg.h > @@ -2560,6 +2560,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. > #define MI_BATCH_NON_SECURE (1) > #define MI_BATCH_NON_SECURE_I965 (1 << 8) > > +/* Resource Streamer */ > +#define MI_RS_CONTROL (0x6 << 23) > + > #define MAX_DISPLAY_PIPES 2 > > typedef enum { > diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c > index 6f5a698..b0dd0f3 100644 > --- a/lib/rendercopy_gen8.c > +++ b/lib/rendercopy_gen8.c > @@ -169,12 +169,14 @@ static void > gen6_render_flush(struct intel_batchbuffer *batch, > drm_intel_context *context, uint32_t batch_end) > { > - int ret; > + int ret = 0; > + uint32_t flags = 0; > > ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer); > + flags = batch->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0; > if (ret == 0) > ret = drm_intel_gem_bo_context_exec(batch->bo, context, > - batch_end, 0); > + batch_end, flags | I915_EXEC_RENDER); > assert(ret == 0); > } > > @@ -228,18 +230,83 @@ gen8_bind_buf(struct intel_batchbuffer *batch, struct igt_buf *buf, > return offset; > } > > +static void > +gen8_hw_binding_table(struct intel_batchbuffer *batch, bool enable) > +{ > + if (!enable) { > + OUT_BATCH(MI_RS_CONTROL | 0x0); > + > + OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2)); > + /* binding table pool base address */ > + OUT_BATCH(0); > + OUT_BATCH(0); > + /* Upper bound */ > + OUT_BATCH(0); > + WaCsStallBeforeStateCacheInvalidate says you need to emit another PIPE_CONTROL here with CS stall bit set. And thanks to the other restrictions that PIPE_CONTORL needs one of the other magic bits also set. Stall at scoreboard perhaps? I think that's what we set in the kernel usually. > + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); > + OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + return; > + } > + OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2)); > + > + /* binding table pool base address */ > + OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0, > + BINDING_TABLE_POOL_ENABLE); > + OUT_BATCH(0); I suppose it doesn't matter much which domain we use here. But the hardware writes to the BT pool so maybe you should set write domain as well? > + > + /* Upper bound */ > + OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0, > + batch->hw_bt_pool_bo->size); AFAICS on bdw this should be just the bt pool size and not a reloc. > + Again need another CS stall PIPE_CONTROL. > + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); > + OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > + OUT_BATCH(0); > +} > + > +static uint32_t > +gen8_rs_bind_surfaces(struct intel_batchbuffer *batch, > + struct igt_buf *src, > + struct igt_buf *dst, > + uint32_t *surf0, uint32_t *surf1) > +{ > + *surf0 = gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 1); > + *surf1 = gen8_bind_buf(batch, src, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 0); > + > + return 0; > +} > + > +static void > +gen8_rs_edit_surfaces(struct intel_batchbuffer *batch, > + uint32_t surf0, uint32_t surf1) > +{ > + OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_EDIT_PS | (4 - 2)); > + OUT_BATCH(0x3); So I take it there's no need to clear the entries first? > + { > + OUT_BATCH(0 << 16 | surf0 >> 6); > + OUT_BATCH(1 << 16 | surf1 >> 6); > + } > +} > + > static uint32_t > gen8_bind_surfaces(struct intel_batchbuffer *batch, > struct igt_buf *src, > struct igt_buf *dst) > { > - uint32_t *binding_table, offset; > + uint32_t offset = 0; > + uint32_t *binding_table; > > binding_table = batch_alloc(batch, 8, 32); > offset = batch_offset(batch, binding_table); > annotation_add_state(&aub_annotations, AUB_TRACE_BINDING_TABLE, > offset, 8); > - > binding_table[0] = > gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 1); > binding_table[1] = > @@ -512,6 +579,9 @@ gen7_emit_push_constants(struct intel_batchbuffer *batch) { > > static void > gen8_emit_state_base_address(struct intel_batchbuffer *batch) { > + if (batch->use_resource_streamer) > + OUT_BATCH(MI_RS_CONTROL | 0x0); > + > OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (16 - 2)); > > /* general */ > @@ -546,6 +616,9 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch) { > OUT_BATCH(0xfffff000 | 1); > /* intruction buffer size */ > OUT_BATCH(1 << 12 | 1); > + > + if (batch->use_resource_streamer) > + OUT_BATCH(MI_RS_CONTROL | 0x1); > } > > static void > @@ -918,6 +991,7 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch, > uint32_t scissor_state; > uint32_t vertex_buffer; > uint32_t batch_end; > + uint32_t surf0 = 0, surf1 = 1; > > intel_batchbuffer_flush_with_context(batch, context); > > @@ -927,7 +1001,11 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch, > > annotation_init(&aub_annotations); > > - ps_binding_table = gen8_bind_surfaces(batch, src, dst); > + if (batch->use_resource_streamer) > + ps_binding_table = gen8_rs_bind_surfaces(batch, src, dst, > + &surf0, &surf1); > + else > + ps_binding_table = gen8_bind_surfaces(batch, src, dst); > ps_sampler_state = gen8_create_sampler(batch); > ps_kernel_off = gen8_fill_ps(batch, ps_kernel, sizeof(ps_kernel)); > vertex_buffer = gen7_fill_vertex_buffer_data(batch, src, > @@ -955,6 +1033,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch, > > gen8_emit_state_base_address(batch); > > + if (batch->use_resource_streamer) > + gen8_hw_binding_table(batch, true); > + > OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC); > OUT_BATCH(viewport.cc_state); > OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); > @@ -978,6 +1059,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch, > > gen8_emit_sf(batch); > > + if (batch->use_resource_streamer) > + gen8_rs_edit_surfaces(batch, surf0, surf1); > + > OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS); > OUT_BATCH(ps_binding_table); > > @@ -1001,6 +1085,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch, > gen8_emit_vf_topology(batch); > gen8_emit_primitive(batch, vertex_buffer); > > + if (batch->use_resource_streamer) > + gen8_hw_binding_table(batch, false); > + > OUT_BATCH(MI_BATCH_BUFFER_END); > > batch_end = batch_align(batch, 8); > -- > 1.7.9.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Wednesday, May 07, 2014 12:38:04 AM Ville Syrjälä wrote: > On Tue, May 06, 2014 at 10:48:02PM +0300, Abdiel Janulgue wrote: > > Use on-chip hw-binding table generator to generate binding > > tables when the test emits SURFACE_STATES packets. The hw generates > > these binding table packets internally so we don't have to > > allocate space on the batchbuffer. > > > > Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com> > > --- > > > > lib/gen8_render.h | 13 +++++++ > > lib/intel_batchbuffer.c | 12 ++++++ > > lib/intel_batchbuffer.h | 3 ++ > > lib/intel_reg.h | 3 ++ > > lib/rendercopy_gen8.c | 97 > > ++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 123 > > insertions(+), 5 deletions(-) > > > > diff --git a/lib/gen8_render.h b/lib/gen8_render.h > > index fffc100..4d5f7ba 100644 > > --- a/lib/gen8_render.h > > +++ b/lib/gen8_render.h > > @@ -83,6 +83,19 @@ > > > > /* Random shifts */ > > #define GEN8_3DSTATE_PS_MAX_THREADS_SHIFT 23 > > > > +/* GEN8+ resource streamer */ > > +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC (0x7919 << 16) /* > > GEN8 */ +# define BINDING_TABLE_POOL_ENABLE 0x0800 > > +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS (0x7843 << 16) /* > > GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS (0x7844 << > > 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS > > (0x7845 << 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS > > (0x7846 << 16) /* GEN8 */ +#define > > GEN8_3DSTATE_BINDING_TABLE_EDIT_PS (0x7847 << 16) /* GEN8 */ + > > +/* Toggling RS needs PIPE_CONTROL SC invalidate */ > > +#define _3DSTATE_PIPE_CONTROL ((0x3 << 29) | (3 << 27) | (2 << 24)) > > +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) > > + > > > > /* Shamelessly ripped from mesa */ > > struct gen8_surface_state > > { > > > > diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c > > index e868922..26c9b89 100644 > > --- a/lib/intel_batchbuffer.c > > +++ b/lib/intel_batchbuffer.c > > @@ -81,6 +81,14 @@ intel_batchbuffer_reset(struct intel_batchbuffer > > *batch) > > > > memset(batch->buffer, 0, sizeof(batch->buffer)); > > > > batch->ptr = batch->buffer; > > > > + > > + if (batch->hw_bt_pool_bo != NULL) { > > + drm_intel_bo_unreference(batch->hw_bt_pool_bo); > > + batch->hw_bt_pool_bo = NULL; > > + } > > + > > + batch->hw_bt_pool_bo = drm_intel_bo_alloc(batch->bufmgr, "hw_bt", > > + 131072, 4096); > > > > } > > > > /** > > > > @@ -116,6 +124,10 @@ intel_batchbuffer_free(struct intel_batchbuffer > > *batch)> > > { > > > > drm_intel_bo_unreference(batch->bo); > > batch->bo = NULL; > > > > + > > + drm_intel_bo_unreference(batch->hw_bt_pool_bo); > > + batch->hw_bt_pool_bo = NULL; > > + > > > > free(batch); > > > > } > > > > diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h > > index 49dbcf0..2a516bc 100644 > > --- a/lib/intel_batchbuffer.h > > +++ b/lib/intel_batchbuffer.h > > @@ -18,6 +18,9 @@ struct intel_batchbuffer { > > > > uint8_t buffer[BATCH_SZ]; > > uint8_t *ptr; > > uint8_t *state; > > > > + > > + drm_intel_bo *hw_bt_pool_bo; > > + int use_resource_streamer; > > > > }; > > > > struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr > > *bufmgr, > > > > diff --git a/lib/intel_reg.h b/lib/intel_reg.h > > index 4b3a102..1cb9a29 100644 > > --- a/lib/intel_reg.h > > +++ b/lib/intel_reg.h > > @@ -2560,6 +2560,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > SOFTWARE.> > > #define MI_BATCH_NON_SECURE (1) > > #define MI_BATCH_NON_SECURE_I965 (1 << 8) > > > > +/* Resource Streamer */ > > +#define MI_RS_CONTROL (0x6 << 23) > > + > > > > #define MAX_DISPLAY_PIPES 2 > > > > typedef enum { > > > > diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c > > index 6f5a698..b0dd0f3 100644 > > --- a/lib/rendercopy_gen8.c > > +++ b/lib/rendercopy_gen8.c > > @@ -169,12 +169,14 @@ static void > > > > gen6_render_flush(struct intel_batchbuffer *batch, > > > > drm_intel_context *context, uint32_t batch_end) > > > > { > > > > - int ret; > > + int ret = 0; > > + uint32_t flags = 0; > > > > ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer); > > > > + flags = batch->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0; > > > > if (ret == 0) > > > > ret = drm_intel_gem_bo_context_exec(batch->bo, context, > > > > - batch_end, 0); > > + batch_end, flags | I915_EXEC_RENDER); > > > > assert(ret == 0); > > > > } > > > > @@ -228,18 +230,83 @@ gen8_bind_buf(struct intel_batchbuffer *batch, > > struct igt_buf *buf,> > > return offset; > > > > } > > > > +static void > > +gen8_hw_binding_table(struct intel_batchbuffer *batch, bool enable) > > +{ > > + if (!enable) { > > + OUT_BATCH(MI_RS_CONTROL | 0x0); > > + > > + OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2)); > > + /* binding table pool base address */ > > + OUT_BATCH(0); > > + OUT_BATCH(0); > > + /* Upper bound */ > > + OUT_BATCH(0); > > + > > WaCsStallBeforeStateCacheInvalidate says you need to emit another > PIPE_CONTROL here with CS stall bit set. And thanks to the other > restrictions that PIPE_CONTORL needs one of the other magic bits also > set. Stall at scoreboard perhaps? I think that's what we set in the > kernel usually. It used to have the CS stall bit. But it seems to work without it. In any case, you might be right. It's a good idea to put in back in. > > > + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); > > + OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE); > > + OUT_BATCH(0); > > + OUT_BATCH(0); > > + OUT_BATCH(0); > > + OUT_BATCH(0); > > + > > + return; > > + } > > + OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2)); > > + > > + /* binding table pool base address */ > > + OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0, > > + BINDING_TABLE_POOL_ENABLE); > > + OUT_BATCH(0); > > I suppose it doesn't matter much which domain we use here. But the > hardware writes to the BT pool so maybe you should set write domain > as well? > > > + > > + /* Upper bound */ > > + OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0, > > + batch->hw_bt_pool_bo->size); > > AFAICS on bdw this should be just the bt pool size and not a reloc. > > > + > > Again need another CS stall PIPE_CONTROL. > > > + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); > > + OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE); > > + OUT_BATCH(0); > > + OUT_BATCH(0); > > + OUT_BATCH(0); > > + OUT_BATCH(0); > > +} > > + > > +static uint32_t > > +gen8_rs_bind_surfaces(struct intel_batchbuffer *batch, > > + struct igt_buf *src, > > + struct igt_buf *dst, > > + uint32_t *surf0, uint32_t *surf1) > > +{ > > + *surf0 = gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, > > 1); > > + *surf1 = gen8_bind_buf(batch, src, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, > > 0); > > + > > + return 0; > > +} > > + > > +static void > > +gen8_rs_edit_surfaces(struct intel_batchbuffer *batch, > > + uint32_t surf0, uint32_t surf1) > > +{ > > + OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_EDIT_PS | (4 - 2)); > > + OUT_BATCH(0x3); > > So I take it there's no need to clear the entries first? I don't think it's necessary on this case. The clear is only neded if the binding table pool is re-used from the previous batch - which might contain stale entries. At least it causes hungs in Mesa. In this test, we are allocating the pool same time as the batch is allocated. > > > + { > > + OUT_BATCH(0 << 16 | surf0 >> 6); > > + OUT_BATCH(1 << 16 | surf1 >> 6); > > + } > > +} > > + > > > > static uint32_t > > gen8_bind_surfaces(struct intel_batchbuffer *batch, > > > > struct igt_buf *src, > > struct igt_buf *dst) > > > > { > > > > - uint32_t *binding_table, offset; > > + uint32_t offset = 0; > > + uint32_t *binding_table; > > > > binding_table = batch_alloc(batch, 8, 32); > > offset = batch_offset(batch, binding_table); > > annotation_add_state(&aub_annotations, AUB_TRACE_BINDING_TABLE, > > > > offset, 8); > > > > - > > > > binding_table[0] = > > > > gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 1); > > > > binding_table[1] = > > > > @@ -512,6 +579,9 @@ gen7_emit_push_constants(struct intel_batchbuffer > > *batch) {> > > static void > > gen8_emit_state_base_address(struct intel_batchbuffer *batch) { > > > > + if (batch->use_resource_streamer) > > + OUT_BATCH(MI_RS_CONTROL | 0x0); > > + > > > > OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (16 - 2)); > > > > /* general */ > > > > @@ -546,6 +616,9 @@ gen8_emit_state_base_address(struct intel_batchbuffer > > *batch) {> > > OUT_BATCH(0xfffff000 | 1); > > /* intruction buffer size */ > > OUT_BATCH(1 << 12 | 1); > > > > + > > + if (batch->use_resource_streamer) > > + OUT_BATCH(MI_RS_CONTROL | 0x1); > > > > } > > > > static void > > > > @@ -918,6 +991,7 @@ void gen8_render_copyfunc(struct intel_batchbuffer > > *batch,> > > uint32_t scissor_state; > > uint32_t vertex_buffer; > > uint32_t batch_end; > > > > + uint32_t surf0 = 0, surf1 = 1; > > > > intel_batchbuffer_flush_with_context(batch, context); > > > > @@ -927,7 +1001,11 @@ void gen8_render_copyfunc(struct intel_batchbuffer > > *batch,> > > annotation_init(&aub_annotations); > > > > - ps_binding_table = gen8_bind_surfaces(batch, src, dst); > > + if (batch->use_resource_streamer) > > + ps_binding_table = gen8_rs_bind_surfaces(batch, src, dst, > > + &surf0, &surf1); > > + else > > + ps_binding_table = gen8_bind_surfaces(batch, src, dst); > > > > ps_sampler_state = gen8_create_sampler(batch); > > ps_kernel_off = gen8_fill_ps(batch, ps_kernel, sizeof(ps_kernel)); > > vertex_buffer = gen7_fill_vertex_buffer_data(batch, src, > > > > @@ -955,6 +1033,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer > > *batch,> > > gen8_emit_state_base_address(batch); > > > > + if (batch->use_resource_streamer) > > + gen8_hw_binding_table(batch, true); > > + > > > > OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC); > > OUT_BATCH(viewport.cc_state); > > OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); > > > > @@ -978,6 +1059,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer > > *batch,> > > gen8_emit_sf(batch); > > > > + if (batch->use_resource_streamer) > > + gen8_rs_edit_surfaces(batch, surf0, surf1); > > + > > > > OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS); > > OUT_BATCH(ps_binding_table); > > > > @@ -1001,6 +1085,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer > > *batch,> > > gen8_emit_vf_topology(batch); > > gen8_emit_primitive(batch, vertex_buffer); > > > > + if (batch->use_resource_streamer) > > + gen8_hw_binding_table(batch, false); > > + > > > > OUT_BATCH(MI_BATCH_BUFFER_END); > > > > batch_end = batch_align(batch, 8); Thanks for the review. I'll update v2 soon to address the comments -abdiel
diff --git a/lib/gen8_render.h b/lib/gen8_render.h index fffc100..4d5f7ba 100644 --- a/lib/gen8_render.h +++ b/lib/gen8_render.h @@ -83,6 +83,19 @@ /* Random shifts */ #define GEN8_3DSTATE_PS_MAX_THREADS_SHIFT 23 +/* GEN8+ resource streamer */ +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC (0x7919 << 16) /* GEN8 */ +# define BINDING_TABLE_POOL_ENABLE 0x0800 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS (0x7843 << 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS (0x7844 << 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS (0x7845 << 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS (0x7846 << 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS (0x7847 << 16) /* GEN8 */ + +/* Toggling RS needs PIPE_CONTROL SC invalidate */ +#define _3DSTATE_PIPE_CONTROL ((0x3 << 29) | (3 << 27) | (2 << 24)) +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2) + /* Shamelessly ripped from mesa */ struct gen8_surface_state { diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c index e868922..26c9b89 100644 --- a/lib/intel_batchbuffer.c +++ b/lib/intel_batchbuffer.c @@ -81,6 +81,14 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) memset(batch->buffer, 0, sizeof(batch->buffer)); batch->ptr = batch->buffer; + + if (batch->hw_bt_pool_bo != NULL) { + drm_intel_bo_unreference(batch->hw_bt_pool_bo); + batch->hw_bt_pool_bo = NULL; + } + + batch->hw_bt_pool_bo = drm_intel_bo_alloc(batch->bufmgr, "hw_bt", + 131072, 4096); } /** @@ -116,6 +124,10 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch) { drm_intel_bo_unreference(batch->bo); batch->bo = NULL; + + drm_intel_bo_unreference(batch->hw_bt_pool_bo); + batch->hw_bt_pool_bo = NULL; + free(batch); } diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h index 49dbcf0..2a516bc 100644 --- a/lib/intel_batchbuffer.h +++ b/lib/intel_batchbuffer.h @@ -18,6 +18,9 @@ struct intel_batchbuffer { uint8_t buffer[BATCH_SZ]; uint8_t *ptr; uint8_t *state; + + drm_intel_bo *hw_bt_pool_bo; + int use_resource_streamer; }; struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr, diff --git a/lib/intel_reg.h b/lib/intel_reg.h index 4b3a102..1cb9a29 100644 --- a/lib/intel_reg.h +++ b/lib/intel_reg.h @@ -2560,6 +2560,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define MI_BATCH_NON_SECURE (1) #define MI_BATCH_NON_SECURE_I965 (1 << 8) +/* Resource Streamer */ +#define MI_RS_CONTROL (0x6 << 23) + #define MAX_DISPLAY_PIPES 2 typedef enum { diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c index 6f5a698..b0dd0f3 100644 --- a/lib/rendercopy_gen8.c +++ b/lib/rendercopy_gen8.c @@ -169,12 +169,14 @@ static void gen6_render_flush(struct intel_batchbuffer *batch, drm_intel_context *context, uint32_t batch_end) { - int ret; + int ret = 0; + uint32_t flags = 0; ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer); + flags = batch->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0; if (ret == 0) ret = drm_intel_gem_bo_context_exec(batch->bo, context, - batch_end, 0); + batch_end, flags | I915_EXEC_RENDER); assert(ret == 0); } @@ -228,18 +230,83 @@ gen8_bind_buf(struct intel_batchbuffer *batch, struct igt_buf *buf, return offset; } +static void +gen8_hw_binding_table(struct intel_batchbuffer *batch, bool enable) +{ + if (!enable) { + OUT_BATCH(MI_RS_CONTROL | 0x0); + + OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2)); + /* binding table pool base address */ + OUT_BATCH(0); + OUT_BATCH(0); + /* Upper bound */ + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); + OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + return; + } + OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2)); + + /* binding table pool base address */ + OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0, + BINDING_TABLE_POOL_ENABLE); + OUT_BATCH(0); + + /* Upper bound */ + OUT_RELOC(batch->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0, + batch->hw_bt_pool_bo->size); + + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); + OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static uint32_t +gen8_rs_bind_surfaces(struct intel_batchbuffer *batch, + struct igt_buf *src, + struct igt_buf *dst, + uint32_t *surf0, uint32_t *surf1) +{ + *surf0 = gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 1); + *surf1 = gen8_bind_buf(batch, src, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 0); + + return 0; +} + +static void +gen8_rs_edit_surfaces(struct intel_batchbuffer *batch, + uint32_t surf0, uint32_t surf1) +{ + OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_EDIT_PS | (4 - 2)); + OUT_BATCH(0x3); + { + OUT_BATCH(0 << 16 | surf0 >> 6); + OUT_BATCH(1 << 16 | surf1 >> 6); + } +} + static uint32_t gen8_bind_surfaces(struct intel_batchbuffer *batch, struct igt_buf *src, struct igt_buf *dst) { - uint32_t *binding_table, offset; + uint32_t offset = 0; + uint32_t *binding_table; binding_table = batch_alloc(batch, 8, 32); offset = batch_offset(batch, binding_table); annotation_add_state(&aub_annotations, AUB_TRACE_BINDING_TABLE, offset, 8); - binding_table[0] = gen8_bind_buf(batch, dst, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, 1); binding_table[1] = @@ -512,6 +579,9 @@ gen7_emit_push_constants(struct intel_batchbuffer *batch) { static void gen8_emit_state_base_address(struct intel_batchbuffer *batch) { + if (batch->use_resource_streamer) + OUT_BATCH(MI_RS_CONTROL | 0x0); + OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (16 - 2)); /* general */ @@ -546,6 +616,9 @@ gen8_emit_state_base_address(struct intel_batchbuffer *batch) { OUT_BATCH(0xfffff000 | 1); /* intruction buffer size */ OUT_BATCH(1 << 12 | 1); + + if (batch->use_resource_streamer) + OUT_BATCH(MI_RS_CONTROL | 0x1); } static void @@ -918,6 +991,7 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch, uint32_t scissor_state; uint32_t vertex_buffer; uint32_t batch_end; + uint32_t surf0 = 0, surf1 = 1; intel_batchbuffer_flush_with_context(batch, context); @@ -927,7 +1001,11 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch, annotation_init(&aub_annotations); - ps_binding_table = gen8_bind_surfaces(batch, src, dst); + if (batch->use_resource_streamer) + ps_binding_table = gen8_rs_bind_surfaces(batch, src, dst, + &surf0, &surf1); + else + ps_binding_table = gen8_bind_surfaces(batch, src, dst); ps_sampler_state = gen8_create_sampler(batch); ps_kernel_off = gen8_fill_ps(batch, ps_kernel, sizeof(ps_kernel)); vertex_buffer = gen7_fill_vertex_buffer_data(batch, src, @@ -955,6 +1033,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch, gen8_emit_state_base_address(batch); + if (batch->use_resource_streamer) + gen8_hw_binding_table(batch, true); + OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC); OUT_BATCH(viewport.cc_state); OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP); @@ -978,6 +1059,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch, gen8_emit_sf(batch); + if (batch->use_resource_streamer) + gen8_rs_edit_surfaces(batch, surf0, surf1); + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS); OUT_BATCH(ps_binding_table); @@ -1001,6 +1085,9 @@ void gen8_render_copyfunc(struct intel_batchbuffer *batch, gen8_emit_vf_topology(batch); gen8_emit_primitive(batch, vertex_buffer); + if (batch->use_resource_streamer) + gen8_hw_binding_table(batch, false); + OUT_BATCH(MI_BATCH_BUFFER_END); batch_end = batch_align(batch, 8);
Use on-chip hw-binding table generator to generate binding tables when the test emits SURFACE_STATES packets. The hw generates these binding table packets internally so we don't have to allocate space on the batchbuffer. Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com> --- lib/gen8_render.h | 13 +++++++ lib/intel_batchbuffer.c | 12 ++++++ lib/intel_batchbuffer.h | 3 ++ lib/intel_reg.h | 3 ++ lib/rendercopy_gen8.c | 97 ++++++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 123 insertions(+), 5 deletions(-)