diff mbox

[2/2] tools/null_state_gen: Add GEN10 golden context batch buffer creation

Message ID 1493370648-14389-2-git-send-email-oscar.mateo@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

oscar.mateo@intel.com April 28, 2017, 9:10 a.m. UTC
This batchbuffer is over 4096 bytes, so we need to increase the size of the
array (and the KMD has to be modified to deal with more than one page).

Notice that there to workarounds embedded here:
- WaRsGatherPoolEnable is to be applied to all CNL steppings, so it belongs
  here.
- WaPSRandomCSNotDone is A0 only, but since the golden context batch buffer
  is created offline in i-g-t (as opposed to dinamically in i915) we cannot
  really make it dependent on the stepping (there is a mechanism in i915 to
  *add* extra stuff to the golden context , via an additional auxiliary bb,
  but nothing to modify things *inside* the offline-created bb). So maybe
  apply the WA for now and remove it once production chips are the norm?

Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 lib/gen10_render.h                             |  63 +++
 tools/null_state_gen/Makefile.am               |   3 +-
 tools/null_state_gen/intel_batchbuffer.h       |   2 +-
 tools/null_state_gen/intel_null_state_gen.c    |   5 +-
 tools/null_state_gen/intel_renderstate.h       |   1 +
 tools/null_state_gen/intel_renderstate_gen10.c | 538 +++++++++++++++++++++++++
 6 files changed, 609 insertions(+), 3 deletions(-)
 create mode 100644 lib/gen10_render.h
 create mode 100644 tools/null_state_gen/intel_renderstate_gen10.c
diff mbox

Patch

diff --git a/lib/gen10_render.h b/lib/gen10_render.h
new file mode 100644
index 0000000..f4a7dff
--- /dev/null
+++ b/lib/gen10_render.h
@@ -0,0 +1,63 @@ 
+#ifndef GEN10_RENDER_H
+#define GEN10_RENDER_H
+
+#include "gen9_render.h"
+
+#define GEN7_MI_RS_CONTROL			(0x6 << 23)
+# define GEN7_MI_RS_CONTROL_ENABLE		(1 << 0)
+
+#define GEN10_3DSTATE_GATHER_POOL_ALLOC		GEN6_3D(3, 1, 0x1a)
+# define GEN10_3DSTATE_GATHER_POOL_ENABLE	(1 << 11)
+
+#define GEN10_3DSTATE_GATHER_CONSTANT_VS	GEN6_3D(3, 0, 0x34)
+#define GEN10_3DSTATE_GATHER_CONSTANT_HS	GEN6_3D(3, 0, 0x36)
+#define GEN10_3DSTATE_GATHER_CONSTANT_DS	GEN6_3D(3, 0, 0x37)
+#define GEN10_3DSTATE_GATHER_CONSTANT_GS	GEN6_3D(3, 0, 0x35)
+#define GEN10_3DSTATE_GATHER_CONSTANT_PS	GEN6_3D(3, 0, 0x38)
+
+#define GEN10_3DSTATE_WM_DEPTH_STENCIL		GEN6_3D(3, 0, 0x4e)
+#define GEN10_3DSTATE_WM_CHROMAKEY		GEN6_3D(3, 0, 0x4c)
+
+#define GEN8_REG_L3_CACHE_CONFIG	0x7034
+
+/*
+ * Programming for L3 cache allocations can be made per bank. Based on the
+ * programmed value HW will apply same allocations on other available banks.
+ * Total L3 Cache size per bank = 256 KB.
+ * {SLM,    URB,     DC,      RO(I/S, C, T),   L3 Client Pool}
+ * {  0,    96,      32,      128,                 0      }
+ */
+#define GEN10_L3_CACHE_CONFIG_VALUE	0x00420060
+
+#define URB_ALIGN(val, align)	((val % align) ? (val - (val % align)) : val)
+
+#define GEN10_VS_MIN_NUM_OF_URB_ENTRIES		64
+#define GEN10_VS_MAX_NUM_OF_URB_ENTRIES		2752
+
+#define GEN10_KB_PER_URB_INDEX			8
+#define GEN10_L3_URB_SIZE_PER_BANK_IN_KB	96
+
+#define GEN10_URB_RESERVED_SIZE_KB		32
+#define GEN10_URB_RESERVED_END_SIZE_KB		8
+
+#define GEN10_VS_NUM_BITS_PER_URB_UNIT		512
+#define GEN10_VS_NUM_OF_URB_UNITS		1 // zero based
+#define GEN10_VS_URB_ENTRY_SIZE_IN_BITS		(GEN10_VS_NUM_BITS_PER_URB_UNIT * \
+						(GEN10_VS_NUM_OF_URB_UNITS + 1))
+
+#define GEN10_VS_URB_START_INDEX (GEN10_URB_RESERVED_SIZE_KB / GEN10_KB_PER_URB_INDEX)
+
+#define GEN10_URB_SIZE_PER_SLICE_KB(l3_bank_count, slice_count)		\
+	URB_ALIGN((uint32_t)(GEN10_L3_URB_SIZE_PER_BANK_IN_KB * l3_bank_count / slice_count), GEN10_KB_PER_URB_INDEX)
+
+#define GEN10_VS_URB_SIZE_PER_SLICE_KB(total_urb_size_per_slice)	\
+	(total_urb_size_per_slice - GEN10_URB_RESERVED_SIZE_KB - GEN10_URB_RESERVED_END_SIZE_KB)
+
+#define GEN10_VS_NUM_URB_ENTRIES_PER_SLICE(total_urb_size_per_slice)	\
+	((GEN10_VS_URB_SIZE_PER_SLICE_KB(total_urb_size_per_slice) *	\
+	1024 * 8) / GEN10_VS_URB_ENTRY_SIZE_IN_BITS)
+
+#define GEN10_VS_END_URB_INDEX(urb_size_per_slice)			\
+	((urb_size_per_slice - GEN10_URB_RESERVED_END_SIZE_KB) / GEN10_KB_PER_URB_INDEX)
+
+#endif
diff --git a/tools/null_state_gen/Makefile.am b/tools/null_state_gen/Makefile.am
index 24884a7..2f90990 100644
--- a/tools/null_state_gen/Makefile.am
+++ b/tools/null_state_gen/Makefile.am
@@ -12,9 +12,10 @@  intel_null_state_gen_SOURCES = 	\
 	intel_renderstate_gen7.c \
 	intel_renderstate_gen8.c \
 	intel_renderstate_gen9.c \
+	intel_renderstate_gen10.c \
 	intel_null_state_gen.c
 
-gens := 6 7 8 9
+gens := 6 7 8 9 10
 
 h = /tmp/intel_renderstate_gen$$gen.c
 states: intel_null_state_gen
diff --git a/tools/null_state_gen/intel_batchbuffer.h b/tools/null_state_gen/intel_batchbuffer.h
index 771d1c8..e40e01b 100644
--- a/tools/null_state_gen/intel_batchbuffer.h
+++ b/tools/null_state_gen/intel_batchbuffer.h
@@ -34,7 +34,7 @@ 
 #include <stdint.h>
 
 #define MAX_RELOCS 64
-#define MAX_ITEMS 1024
+#define MAX_ITEMS 2048
 #define MAX_STRLEN 256
 
 #define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1))
diff --git a/tools/null_state_gen/intel_null_state_gen.c b/tools/null_state_gen/intel_null_state_gen.c
index 06eb954..4f12f5f 100644
--- a/tools/null_state_gen/intel_null_state_gen.c
+++ b/tools/null_state_gen/intel_null_state_gen.c
@@ -41,7 +41,7 @@  static int debug = 0;
 static void print_usage(char *s)
 {
 	fprintf(stderr, "%s: <gen>\n"
-		"     gen:     gen to generate for (6,7,8,9)\n",
+		"     gen:     gen to generate for (6,7,8,9,10)\n",
 		s);
 }
 
@@ -173,6 +173,9 @@  static int do_generate(int gen)
 	case 9:
 		null_state_gen = gen9_setup_null_render_state;
 		break;
+	case 10:
+		null_state_gen = gen10_setup_null_render_state;
+		break;
 	}
 
 	if (null_state_gen == NULL) {
diff --git a/tools/null_state_gen/intel_renderstate.h b/tools/null_state_gen/intel_renderstate.h
index b27b434..b3c8c2b 100644
--- a/tools/null_state_gen/intel_renderstate.h
+++ b/tools/null_state_gen/intel_renderstate.h
@@ -30,5 +30,6 @@  void gen6_setup_null_render_state(struct intel_batchbuffer *batch);
 void gen7_setup_null_render_state(struct intel_batchbuffer *batch);
 void gen8_setup_null_render_state(struct intel_batchbuffer *batch);
 void gen9_setup_null_render_state(struct intel_batchbuffer *batch);
+void gen10_setup_null_render_state(struct intel_batchbuffer *batch);
 
 #endif /* __INTEL_RENDERSTATE_H__ */
diff --git a/tools/null_state_gen/intel_renderstate_gen10.c b/tools/null_state_gen/intel_renderstate_gen10.c
new file mode 100644
index 0000000..905c6c7
--- /dev/null
+++ b/tools/null_state_gen/intel_renderstate_gen10.c
@@ -0,0 +1,538 @@ 
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *	Oscar Mateo <oscar.mateo@intel.com>
+ */
+
+#include "intel_renderstate.h"
+#include <lib/gen10_render.h>
+#include <lib/intel_reg.h>
+
+static void gen8_emit_wm(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN6_3DSTATE_WM | (2 - 2));
+	OUT_BATCH(GEN7_WM_LEGACY_DIAMOND_LINE_RASTERIZATION);
+}
+
+static void gen8_emit_ps(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN7_3DSTATE_PS | (12 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0); /* kernel hi */
+	OUT_BATCH(GEN7_PS_SPF_MODE);
+	OUT_BATCH(0); /* scratch space stuff */
+	OUT_BATCH(0); /* scratch hi */
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0); // kernel 1
+	OUT_BATCH(0); /* kernel 1 hi */
+	OUT_BATCH(0); // kernel 2
+	OUT_BATCH(0); /* kernel 2 hi */
+}
+
+static void gen8_emit_sf(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN6_3DSTATE_SF | (4 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(1 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT |
+		  1 << GEN6_3DSTATE_SF_VERTEX_SUB_PIXEL_PRECISION_SHIFT |
+		  GEN7_SF_POINT_WIDTH_FROM_SOURCE |
+		  8);
+}
+
+static void gen8_emit_vs(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN6_3DSTATE_VS | (9 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(GEN7_VS_FLOATING_POINT_MODE_ALTERNATE);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+static void gen8_emit_hs(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN7_3DSTATE_HS | (9 - 2));
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT);
+	OUT_BATCH(0);
+}
+
+static void gen8_emit_raster(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_3DSTATE_RASTER | (5 - 2));
+	OUT_BATCH(GEN8_RASTER_CULL_NONE | GEN8_RASTER_FRONT_WINDING_CCW);
+	OUT_BATCH(0.0);
+	OUT_BATCH(0.0);
+	OUT_BATCH(0.0);
+}
+
+static void gen10_emit_urb(struct intel_batchbuffer *batch)
+{
+	/* Smallest SKU: 3x8*/
+	int l3_bank_count = 3;
+	int slice_count = 1;
+	int urb_size_per_slice = GEN10_URB_SIZE_PER_SLICE_KB(l3_bank_count, slice_count);
+	int other_urb_start_addr = GEN10_VS_END_URB_INDEX(urb_size_per_slice);
+	const int vs_urb_start_addr = GEN10_VS_URB_START_INDEX;
+	const int vs_urb_alloc_size = GEN10_VS_NUM_OF_URB_UNITS;
+	int vs_urb_entries = GEN10_VS_NUM_URB_ENTRIES_PER_SLICE(urb_size_per_slice);
+
+	if (vs_urb_entries < GEN10_VS_MIN_NUM_OF_URB_ENTRIES)
+		vs_urb_entries = GEN10_VS_MIN_NUM_OF_URB_ENTRIES;
+	if (vs_urb_entries > GEN10_VS_MAX_NUM_OF_URB_ENTRIES)
+		vs_urb_entries = GEN10_VS_MAX_NUM_OF_URB_ENTRIES;
+
+	OUT_BATCH(GEN7_3DSTATE_URB_VS);
+	OUT_BATCH(vs_urb_entries |
+		 (vs_urb_alloc_size << 16) |
+		 (vs_urb_start_addr << 25));
+
+	OUT_BATCH(GEN7_3DSTATE_URB_HS);
+	OUT_BATCH(other_urb_start_addr << 25);
+
+	OUT_BATCH(GEN7_3DSTATE_URB_DS);
+	OUT_BATCH(other_urb_start_addr << 25);
+
+	OUT_BATCH(GEN7_3DSTATE_URB_GS);
+	OUT_BATCH(other_urb_start_addr << 25);
+}
+
+static void gen8_emit_vf_topology(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_3DSTATE_VF_TOPOLOGY);
+	OUT_BATCH(_3DPRIM_TRILIST);
+}
+
+static void gen8_emit_so_decl_list(struct intel_batchbuffer *batch)
+{
+	const int num_decls = 128;
+	int i;
+
+	OUT_BATCH(GEN8_3DSTATE_SO_DECL_LIST |
+		(((2 * num_decls) + 3) - 2) /* DWORD count - 2 */);
+	OUT_BATCH(0);
+	OUT_BATCH(num_decls);
+
+	for (i = 0; i < num_decls; i++) {
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+	}
+}
+
+static void gen8_emit_so_buffer(struct intel_batchbuffer *batch, const int index)
+{
+	OUT_BATCH(GEN8_3DSTATE_SO_BUFFER | (8 - 2));
+	OUT_BATCH(index << 29);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+static void gen8_emit_chroma_key(struct intel_batchbuffer *batch, const int index)
+{
+	OUT_BATCH(GEN6_3DSTATE_CHROMA_KEY | (4 - 2));
+	OUT_BATCH(index << 30);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+}
+
+static void gen8_emit_vertex_buffers(struct intel_batchbuffer *batch)
+{
+	const int buffers = 33;
+	int i;
+
+	OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS |
+		(((4 * buffers) + 1)- 2) /* DWORD count - 2 */);
+
+	for (i = 0; i < buffers; i++) {
+		OUT_BATCH(i << VB0_BUFFER_INDEX_SHIFT |
+			  GEN7_VB0_BUFFER_ADDR_MOD_EN);
+		OUT_BATCH(0); /* Address */
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+	}
+}
+
+static void gen8_emit_vertex_elements(struct intel_batchbuffer *batch)
+{
+	const int elements = 34;
+	int i;
+
+	OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
+		(((2 * elements) + 1) - 2) /* DWORD count - 2 */);
+
+	/* Element 0 */
+	OUT_BATCH(VE0_VALID);
+	OUT_BATCH(
+		GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
+		GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
+		GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
+		GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
+	/* Elements 1 -> 33 */
+	for (i = 1; i < elements; i++) {
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+	}
+}
+
+static void gen8_emit_cc_state_pointers(struct intel_batchbuffer *batch)
+{
+	union {
+		float fval;
+		uint32_t uval;
+	} u;
+
+	unsigned offset;
+
+	u.fval = 1.0f;
+
+	offset = intel_batch_state_offset(batch, 64);
+	OUT_STATE(0);
+	OUT_STATE(0);      /* Alpha reference value */
+	OUT_STATE(u.uval); /* Blend constant color RED */
+	OUT_STATE(u.uval); /* Blend constant color BLUE */
+	OUT_STATE(u.uval); /* Blend constant color GREEN */
+	OUT_STATE(u.uval); /* Blend constant color ALPHA */
+
+	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS);
+	OUT_BATCH_STATE_OFFSET(offset | 1);
+}
+
+static void gen8_emit_blend_state_pointers(struct intel_batchbuffer *batch)
+{
+	unsigned offset;
+	int i;
+
+	offset = intel_batch_state_offset(batch, 64);
+
+	for (i = 0; i < 17; i++)
+		OUT_STATE(0);
+
+	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
+	OUT_BATCH_STATE_OFFSET(offset | 1);
+}
+
+static void gen8_emit_ps_extra(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_3DSTATE_PS_EXTRA | (2 - 2));
+	OUT_BATCH(GEN8_PSX_PIXEL_SHADER_VALID |
+		  GEN8_PSX_ATTRIBUTE_ENABLE);
+
+}
+
+static void gen8_emit_ps_blend(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN8_3DSTATE_PS_BLEND | (2 - 2));
+	OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT);
+}
+
+static void gen8_emit_viewport_state_pointers_cc(struct intel_batchbuffer *batch)
+{
+	unsigned offset;
+
+	offset = intel_batch_state_offset(batch, 32);
+
+	OUT_STATE((uint32_t)0.0f); /* Minimum depth */
+	OUT_STATE((uint32_t)0.0f); /* Maximum depth */
+
+	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
+	OUT_BATCH_STATE_OFFSET(offset);
+}
+
+static void gen8_emit_viewport_state_pointers_sf_clip(struct intel_batchbuffer *batch)
+{
+	unsigned offset;
+	int i;
+
+	offset = intel_batch_state_offset(batch, 64);
+
+	for (i = 0; i < 16; i++)
+		OUT_STATE(0);
+
+	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP | (2 - 2));
+	OUT_BATCH_STATE_OFFSET(offset);
+}
+
+static void gen8_emit_primitive(struct intel_batchbuffer *batch)
+{
+	OUT_BATCH(GEN6_3DPRIMITIVE | (10-2));
+	OUT_BATCH(4);   /* gen8+ ignore the topology type field */
+	OUT_BATCH(1);   /* vertex count */
+	OUT_BATCH(0);
+	OUT_BATCH(1);   /* single instance */
+	OUT_BATCH(0);   /* start instance location */
+	OUT_BATCH(0);   /* index buffer offset, ignored */
+	OUT_BATCH(0);   /* extended parameter 0 */
+	OUT_BATCH(0);   /* extended parameter 1 */
+	OUT_BATCH(0);   /* extended parameter 2 */
+}
+
+static void gen9_emit_state_base_address(struct intel_batchbuffer *batch) {
+	const unsigned offset = 0;
+	OUT_BATCH(GEN6_STATE_BASE_ADDRESS |
+		(22 - 2) /* DWORD count - 2 */);
+
+	/* general state base address - requires BB address
+	 * added to state offset to be stored in this location
+	 */
+	OUT_RELOC(batch, 0, 0, offset | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+
+	/* stateless data port */
+	OUT_BATCH(0);
+
+	/* surface state base address - requires BB address
+	 * added to state offset to be stored in this location
+	 */
+	OUT_RELOC(batch, 0, 0, offset | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+
+	/* dynamic state base address - requires BB address
+	 * added to state offset to be stored in this location
+	 */
+	OUT_RELOC(batch, 0, 0, offset | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+
+	/* indirect state base address */
+	OUT_BATCH(BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+
+	/* instruction state base address - requires BB address
+	 * added to state offset to be stored in this location
+	 */
+	OUT_RELOC(batch, 0, 0, offset | BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+
+	/* general state buffer size */
+	OUT_BATCH(GEN8_STATE_SIZE_PAGES(1) | BUFFER_SIZE_MODIFY);
+	/* dynamic state buffer size */
+	OUT_BATCH(GEN8_STATE_SIZE_PAGES(1) | BUFFER_SIZE_MODIFY);
+	/* indirect object buffer size */
+	OUT_BATCH(0x0 | BUFFER_SIZE_MODIFY);
+	/* intruction buffer size */
+	OUT_BATCH(GEN8_STATE_SIZE_PAGES(1) | BUFFER_SIZE_MODIFY);
+
+	/* bindless surface state base address */
+	OUT_BATCH(BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+	/* bindless surface state size */
+	OUT_BATCH(0);
+
+	/* bindless sampler state base address */
+	OUT_BATCH(BASE_ADDRESS_MODIFY);
+	OUT_BATCH(0);
+	/* bindless sampler state size */
+	OUT_BATCH(0);
+}
+
+/*
+ * Generate the batch buffer commands needed to initialize the 3D engine
+ * to its "golden state".
+ */
+void gen10_setup_null_render_state(struct intel_batchbuffer *batch)
+{
+	int i;
+
+	/* WaRsGatherPoolEnable: cnl */
+	OUT_BATCH(GEN7_MI_RS_CONTROL);
+
+#define GEN8_PIPE_CONTROL_GLOBAL_GTT   (1 << 24)
+	/* PIPE_CONTROL */
+	OUT_BATCH(GEN6_PIPE_CONTROL |
+	         (6 - 2));	/* DWORD count - 2 */
+	OUT_BATCH(GEN8_PIPE_CONTROL_GLOBAL_GTT);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	/* PIPELINE_SELECT */
+	OUT_BATCH(GEN9_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+	OUT_BATCH(MI_LOAD_REGISTER_IMM);
+	OUT_BATCH(GEN8_REG_L3_CACHE_CONFIG);
+	OUT_BATCH(GEN10_L3_CACHE_CONFIG_VALUE);
+
+	gen8_emit_wm(batch);
+	gen8_emit_ps(batch);
+	gen8_emit_sf(batch);
+
+	OUT_CMD(GEN7_3DSTATE_SBE, 6); /* Check w/ Gen8 code */
+	OUT_CMD(GEN8_3DSTATE_SBE_SWIZ, 11);
+
+	gen8_emit_vs(batch);
+	gen8_emit_hs(batch);
+
+	OUT_CMD(GEN7_3DSTATE_GS, 10);
+	OUT_CMD(GEN7_3DSTATE_STREAMOUT, 5);
+	OUT_CMD(GEN7_3DSTATE_DS, 11); /* Check w/ Gen8 code */
+	OUT_CMD(GEN6_3DSTATE_CLIP, 4);
+	OUT_CMD(GEN7_3DSTATE_TE, 4);
+	OUT_CMD(GEN8_3DSTATE_VF, 2);
+	OUT_CMD(GEN8_3DSTATE_WM_HZ_OP, 5);
+
+	/* URB States */
+	gen10_emit_urb(batch);
+
+	OUT_CMD(GEN10_3DSTATE_GATHER_CONSTANT_VS, 130);
+	OUT_CMD(GEN10_3DSTATE_GATHER_CONSTANT_HS, 130);
+	OUT_CMD(GEN10_3DSTATE_GATHER_CONSTANT_DS, 130);
+	OUT_CMD(GEN10_3DSTATE_GATHER_CONSTANT_GS, 130);
+	OUT_CMD(GEN10_3DSTATE_GATHER_CONSTANT_PS, 130);
+
+	OUT_CMD(GEN8_3DSTATE_BIND_TABLE_POOL_ALLOC, 4);
+	OUT_CMD(GEN8_3DSTATE_GATHER_POOL_ALLOC, 4);
+	OUT_CMD(GEN8_3DSTATE_DX9_CONSTANT_BUFFER_POOL_ALLOC, 4);
+
+	/* Push Constants */
+	OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS, 2);
+	OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS, 2);
+	OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS, 2);
+	OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS, 2);
+	OUT_CMD(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS, 2);
+
+	/* Constants */
+	OUT_CMD(GEN6_3DSTATE_CONSTANT_VS, 11);
+	OUT_CMD(GEN7_3DSTATE_CONSTANT_HS, 11);
+	OUT_CMD(GEN7_3DSTATE_CONSTANT_DS, 11);
+	OUT_CMD(GEN7_3DSTATE_CONSTANT_GS, 11);
+	OUT_CMD(GEN7_3DSTATE_CONSTANT_PS, 11);
+
+	OUT_CMD(GEN8_3DSTATE_VF_INSTANCING, 3);
+	OUT_CMD(GEN8_3DSTATE_VF_SGVS, 2);
+	gen8_emit_vf_topology(batch);
+
+	/* Streamer out declaration list */
+	gen8_emit_so_decl_list(batch);
+
+	/* Streamer out buffers */
+	for (i = 0; i < 4; i++) {
+		gen8_emit_so_buffer(batch, i);
+	}
+
+	/* State base addresses */
+	gen9_emit_state_base_address(batch);
+
+	OUT_CMD(GEN6_STATE_SIP, 3);
+	OUT_CMD(GEN6_3DSTATE_DRAWING_RECTANGLE, 4);
+	OUT_CMD(GEN7_3DSTATE_DEPTH_BUFFER, 8);
+
+	/* Chroma key */
+	for (i = 0; i < 4; i++) {
+		gen8_emit_chroma_key(batch, i);
+	}
+
+	OUT_CMD(GEN6_3DSTATE_LINE_STIPPLE, 3);
+	OUT_CMD(GEN6_3DSTATE_AA_LINE_PARAMS, 3);
+	OUT_CMD(GEN7_3DSTATE_STENCIL_BUFFER, 5);
+	OUT_CMD(GEN7_3DSTATE_HIER_DEPTH_BUFFER, 5);
+	OUT_CMD(GEN7_3DSTATE_CLEAR_PARAMS, 3);
+	OUT_CMD(GEN6_3DSTATE_MONOFILTER_SIZE, 2);
+
+	/* WaPSRandomCSNotDone:cnl (pre-production) */
+#define GEN8_PIPE_CONTROL_STALL_ENABLE   (1 << 20)
+	OUT_BATCH(GEN6_PIPE_CONTROL | (6 - 2));
+	OUT_BATCH(GEN8_PIPE_CONTROL_STALL_ENABLE);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	OUT_CMD(GEN8_3DSTATE_MULTISAMPLE, 2);
+	OUT_CMD(GEN8_3DSTATE_POLY_STIPPLE_OFFSET, 2);
+	OUT_CMD(GEN8_3DSTATE_POLY_STIPPLE_PATTERN, 1 + 32);
+	OUT_CMD(GEN8_3DSTATE_SAMPLER_PALETTE_LOAD0, 1 + 16);
+	OUT_CMD(GEN8_3DSTATE_SAMPLER_PALETTE_LOAD1, 1 + 16);
+	OUT_CMD(GEN6_3DSTATE_INDEX_BUFFER, 5);
+
+	/* Vertex buffers */
+	gen8_emit_vertex_buffers(batch);
+	gen8_emit_vertex_elements(batch);
+
+	OUT_BATCH(GEN6_3DSTATE_VF_STATISTICS | 1 /* Enable */);
+
+	/* 3D state binding table pointers */
+	OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS, 2);
+	OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS, 2);
+	OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS, 2);
+	OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS, 2);
+	OUT_CMD(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS, 2);
+
+	gen8_emit_cc_state_pointers(batch);
+	gen8_emit_blend_state_pointers(batch);
+	gen8_emit_ps_extra(batch);
+	gen8_emit_ps_blend(batch);
+
+	/* 3D state sampler state pointers */
+	OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS, 2);
+	OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS, 2);
+	OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS, 2);
+	OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS, 2);
+	OUT_CMD(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS, 2);
+
+	OUT_CMD(GEN6_3DSTATE_SCISSOR_STATE_POINTERS, 2);
+
+	gen8_emit_viewport_state_pointers_cc(batch);
+	gen8_emit_viewport_state_pointers_sf_clip(batch);
+
+	/* WaPSRandomCSNotDone:cnl (pre-production) */
+#define GEN8_PIPE_CONTROL_STALL_ENABLE   (1 << 20)
+	OUT_BATCH(GEN6_PIPE_CONTROL | (6 - 2));
+	OUT_BATCH(GEN8_PIPE_CONTROL_STALL_ENABLE);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+	OUT_BATCH(0);
+
+	gen8_emit_raster(batch);
+
+	OUT_CMD(GEN10_3DSTATE_WM_DEPTH_STENCIL, 4);
+	OUT_CMD(GEN10_3DSTATE_WM_CHROMAKEY, 2);
+
+	/* Launch 3D operation */
+	gen8_emit_primitive(batch);
+
+	/* WaRsGatherPoolEnable: cnl */
+	OUT_BATCH(GEN7_MI_RS_CONTROL | GEN7_MI_RS_CONTROL_ENABLE);
+	OUT_BATCH(GEN10_3DSTATE_GATHER_POOL_ALLOC | (4 - 2));
+	OUT_BATCH(GEN10_3DSTATE_GATHER_POOL_ENABLE);
+	OUT_BATCH(0);
+	OUT_BATCH(0xfffff << 12);
+	OUT_BATCH(GEN7_MI_RS_CONTROL);
+	OUT_CMD(GEN10_3DSTATE_GATHER_POOL_ALLOC, 4);
+
+	OUT_BATCH(MI_BATCH_BUFFER_END);
+}