@@ -406,7 +406,10 @@ gen7_upload_binding_table(intel_screen_private *intel,
uint32_t ps_binding_table_offset)
{
OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
- OUT_BATCH(ps_binding_table_offset);
+ if (intel->use_resource_streamer)
+ OUT_BATCH(ps_binding_table_offset >> 1);
+ else
+ OUT_BATCH(ps_binding_table_offset);
}
void
@@ -296,6 +296,14 @@
/* DW1 */
# define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
+/* GEN7+ resource streamer */
+#define GEN7_3DSTATE_BINDING_TABLE_POOL_ALLOC BRW_3D(3, 1, 0x19)
+# define BINDING_TABLE_POOL_ENABLE 0x0860
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_VS BRW_3D(3, 0, 0x43)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_GS BRW_3D(3, 0, 0x44)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_HS BRW_3D(3, 0, 0x45)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_DS BRW_3D(3, 0, 0x46)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_PS BRW_3D(3, 0, 0x47)
#define PIPELINE_SELECT_3D 0
#define PIPELINE_SELECT_MEDIA 1
@@ -1783,6 +1783,10 @@ static void i965_surface_flush(struct intel_screen_private *intel)
sizeof(intel->surface_data), 4096);
assert(intel->surface_bo);
+ drm_intel_bo_unreference(intel->hw_bt_pool_bo);
+ intel->hw_bt_pool_bo = drm_intel_bo_alloc(intel->bufmgr, "hw_bt",
+ 131072, 4096);
+ assert(intel->hw_bt_pool_bo);
return;
(void)ret;
}
@@ -2217,32 +2221,70 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel)
static void i965_bind_surfaces(struct intel_screen_private *intel)
{
uint32_t *binding_table;
+ uint32_t surf0 = 0, surf1 = 0, surf2 = 0;
assert(intel->surface_used + 4 * SURFACE_STATE_PADDED_SIZE <= sizeof(intel->surface_data));
- binding_table = (uint32_t*) (intel->surface_data + intel->surface_used);
- intel->surface_table = intel->surface_used;
- intel->surface_used += SURFACE_STATE_PADDED_SIZE;
-
- binding_table[0] =
- i965_set_picture_surface_state(intel,
+ surf0 = i965_set_picture_surface_state(intel,
intel->render_dest_picture,
intel->render_dest,
TRUE);
- binding_table[1] =
- i965_set_picture_surface_state(intel,
+ surf1 = i965_set_picture_surface_state(intel,
intel->render_source_picture,
intel->render_source,
FALSE);
if (intel->render_mask) {
- binding_table[2] =
- i965_set_picture_surface_state(intel,
- intel->render_mask_picture,
- intel->render_mask,
- FALSE);
+ surf2 = i965_set_picture_surface_state(intel,
+ intel->render_mask_picture,
+ intel->render_mask,
+ FALSE);
+ }
+
+ if (intel->use_resource_streamer) {
+ intel->surface_table += (256 * sizeof(uint16_t));
+ OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_EDIT_PS | (5 - 2));
+ OUT_BATCH(0x3);
+ {
+ OUT_BATCH(0 << 16 | surf0 >> 5);
+ OUT_BATCH(1 << 16 | surf1 >> 5);
+ OUT_BATCH(2 << 16 | surf2 >> 5);
+ }
+ } else {
+ binding_table = (uint32_t*) (intel->surface_data + intel->surface_used);
+ intel->surface_table = intel->surface_used;
+ intel->surface_used += SURFACE_STATE_PADDED_SIZE;
+
+ binding_table[0] = surf0;
+ binding_table[1] = surf1;
+ binding_table[2] = surf2;
}
}
+static void i965_enable_hw_binding_table(struct intel_screen_private *intel)
+{
+ if (!intel->use_resource_streamer)
+ return;
+
+ OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2));
+ OUT_RELOC(intel->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
+ BINDING_TABLE_POOL_ENABLE);
+ OUT_RELOC(intel->hw_bt_pool_bo, I915_GEM_DOMAIN_SAMPLER, 0,
+ intel->hw_bt_pool_bo->size);
+
+ OUT_BATCH(BRW_PIPE_CONTROL | (4 - 2));
+ OUT_BATCH(BRW_PIPE_CONTROL_GLOBAL_GTT);
+ OUT_BATCH(0); /* address */
+ OUT_BATCH(0); /* write data */
+
+ /* Do a block clear for existing on-chip binding table entries
+ that might have stuck from the old batch. Otherwise, this
+ causes GPU hungs
+ */
+ OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_EDIT_PS | (3 - 2));
+ OUT_BATCH(0xffff << 16 | 0x3 );
+ OUT_BATCH(0);
+}
+
void
i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
int dstX, int dstY, int w, int h)
@@ -2252,6 +2294,7 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
intel_batch_start_atomic(scrn, 200);
if (intel->needs_render_state_emit) {
+ i965_enable_hw_binding_table(intel);
i965_bind_surfaces(intel);
if (INTEL_INFO(intel)->gen >= 060)
@@ -2349,6 +2392,8 @@ void gen4_render_state_init(ScrnInfoPtr scrn)
drm_intel_bo_alloc(intel->bufmgr, "surface data",
sizeof(intel->surface_data), 4096);
assert(intel->surface_bo);
+ intel->hw_bt_pool_bo = drm_intel_bo_alloc(intel->bufmgr, "hw_bt",
+ 131072, 4096);
intel->surface_used = 0;
@@ -2445,6 +2490,7 @@ void gen4_render_state_cleanup(ScrnInfoPtr scrn)
int i, j, k, l, m;
drm_intel_bo_unreference(intel->surface_bo);
+ drm_intel_bo_unreference(intel->hw_bt_pool_bo);
drm_intel_bo_unreference(render_state->vs_state_bo);
drm_intel_bo_unreference(render_state->sf_state_bo);
drm_intel_bo_unreference(render_state->sf_mask_state_bo);
@@ -2571,9 +2617,13 @@ gen6_composite_create_depth_stencil_state(intel_screen_private *intel)
(void)ret;
}
+#define MI_RS_CONTROL (0x6 << 23)
+
static void
gen6_composite_state_base_address(intel_screen_private *intel)
{
+ OUT_BATCH(MI_RS_CONTROL | 0x0);
+
OUT_BATCH(BRW_STATE_BASE_ADDRESS | (10 - 2));
OUT_BATCH(BASE_ADDRESS_MODIFY); /* General state base address */
intel->surface_reloc = intel->batch_used;
@@ -2586,6 +2636,8 @@ gen6_composite_state_base_address(intel_screen_private *intel)
OUT_BATCH(BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
OUT_BATCH(BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
OUT_BATCH(BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
+
+ OUT_BATCH(MI_RS_CONTROL | 0x1);
}
static void
@@ -260,13 +260,12 @@ void intel_batch_submit(ScrnInfoPtr scrn)
}
ret = dri_bo_subdata(intel->batch_bo, 0, intel->batch_used*4, intel->batch_ptr);
+ uint32_t flags = HAS_BLT(intel) ? intel->current_batch: I915_EXEC_DEFAULT;
+ flags |= intel->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0;
if (ret == 0) {
ret = drm_intel_bo_mrb_exec(intel->batch_bo,
intel->batch_used*4,
- NULL, 0, 0xffffffff,
- (HAS_BLT(intel) ?
- intel->current_batch:
- I915_EXEC_DEFAULT));
+ NULL, 0, 0xffffffff, flags);
}
if (ret != 0) {
Code is based on my hw-generated binding table code for Mesa adapted to i965_composite path in UXA. Signed-off-by: Abdiel Janulgue <abdiel.janulgue@linux.intel.com> --- src/uxa/i965_3d.c | 5 ++- src/uxa/i965_reg.h | 8 +++++ src/uxa/i965_render.c | 78 +++++++++++++++++++++++++++++++++++-------- src/uxa/intel_batchbuffer.c | 7 ++-- 4 files changed, 80 insertions(+), 18 deletions(-)