@@ -230,6 +230,17 @@ GLboolean brwCreateContext( int api,
brw->wm_max_threads = 1;
}
+ if (getenv("INTEL_SHADER_DEBUG") &&
+ (!strncmp("wm", getenv("INTEL_SHADER_DEBUG"), 2))) {
+ char *sr_path;
+ if ((sr_path = getenv("INTEL_SYSTEM_ROUTINE_PATH"))) {
+ brw_wm_init_debug( brw, (const char *)sr_path );
+ } else {
+ fprintf(stderr, "please add INTEL_SYSTEM_ROUTINE_PATH "
+ "environment variable\n");
+ }
+ }
+
brw_init_state( brw );
brw->curbe.last_buf = calloc(1, 4096);
@@ -713,6 +713,10 @@ struct brw_context
* Pre-gen6, push constants live in the CURBE.
*/
uint32_t push_const_offset;
+
+ /** Debug the wm shaders with the system routine */
+ GLboolean debugging;
+ drm_intel_bo *system_routine;
} wm;
@@ -194,6 +194,7 @@ bool do_wm_prog(struct brw_context *brw,
struct brw_wm_compile *c;
const GLuint *program;
GLuint program_size;
+ uint32_t total_scratch = 0;
c = brw->wm.compile_data;
if (c == NULL) {
@@ -239,30 +240,32 @@ bool do_wm_prog(struct brw_context *brw,
c->prog_data.dispatch_width = 16;
}
- /* Scratch space is used for register spilling */
- if (c->last_scratch) {
- uint32_t total_scratch;
-
- /* Per-thread scratch space is power-of-two sized. */
- for (c->prog_data.total_scratch = 1024;
- c->prog_data.total_scratch <= c->last_scratch;
- c->prog_data.total_scratch *= 2) {
- /* empty */
- }
- total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
-
+ /* Scratch space is used for register spilling and debugger shared memory.
+ * Since the debugger must know the size in advance, we try for a fixed size,
+ * and hope it's large enough
+ */
+ if (c->last_scratch && !brw->wm.debugging) {
+ /* Per-thread scratch space is power-of-two sized. */
+ for (c->prog_data.total_scratch = 1024;
+ c->prog_data.total_scratch <= c->last_scratch;
+ c->prog_data.total_scratch *= 2) {
+ /* empty */
+ }
if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
- drm_intel_bo_unreference(brw->wm.scratch_bo);
- brw->wm.scratch_bo = NULL;
+ drm_intel_bo_unreference(brw->wm.scratch_bo);
+ brw->wm.scratch_bo = NULL;
}
if (brw->wm.scratch_bo == NULL) {
- brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
+ brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
"wm scratch",
total_scratch,
4096);
}
- }
- else {
+ } else if (brw->wm.debugging) {
+ assert(brw->wm.scratch_bo);
+ assert(brw->wm.scratch_bo->size / 2 >= c->last_scratch);
+ c->prog_data.total_scratch = brw->wm.scratch_bo->size;
+ } else {
c->prog_data.total_scratch = 0;
}
@@ -309,6 +309,8 @@ void brw_wm_print_insn( struct brw_wm_compile *c,
void brw_wm_print_program( struct brw_wm_compile *c,
const char *stage );
+void brw_wm_init_debug( struct brw_context *brw, const char *sr_path );
+
void brw_wm_lookup_iz(struct intel_context *intel,
struct brw_wm_compile *c);
@@ -172,3 +172,72 @@ void brw_wm_print_program( struct brw_wm_compile *c,
printf("\n");
}
+/* This define should be shared with the debugger. Not sure of the best place
+ * for it */
+#define SCRATCH_SIZE (512 * 1024)
+void brw_wm_init_debug( struct brw_context *brw,
+ const char *sr_path )
+{
+ struct intel_context *intel = &brw->intel;
+ struct drm_i915_gem_set_domain domain;
+ struct brw_instruction *insn;
+ int sr_insns, i;
+ FILE *stream;
+ size_t size;
+ uint32_t name;
+ int ret;
+
+ stream = fopen(sr_path, "rb");
+ if (stream == NULL) {
+ fprintf(stderr, "Couldn't find the system routine in %s, shader debugging"
+ " will be disabled\n", sr_path);
+ brw->wm.debugging = GL_FALSE;
+ return;
+ }
+
+ /* can we use sys headers? */
+ fseek(stream, 0, SEEK_END);
+ sr_insns = ftell(stream) / sizeof(*insn);
+ insn = calloc(sr_insns, sizeof(*insn));
+ fseek(stream, 0, SEEK_SET);
+
+ for (i = 0; i < sr_insns; i++) {
+ size = fread(&insn[i], sizeof(*insn), 1, stream);
+ if (size != 1)
+ break;
+ }
+
+ if (i != sr_insns) {
+ fprintf(stderr, "Not all system routine bytes absorbed (%d/%d)\n",
+ i, sr_insns);
+ goto done;
+ }
+
+ brw->wm.system_routine = drm_intel_bo_alloc(intel->bufmgr, "system routine",
+ i * sizeof(*insn), 4096);
+ drm_intel_bo_subdata(brw->wm.system_routine, 0, i * sizeof(*insn), insn);
+
+ /* In the debug case, we allocate the buffer now because we'll need to attach
+ * with the debugger at a later time when flink will not work (the ring /
+ * struct_mutex are likely to be frozen). If we make the buffer visible early
+ * enough, the debugger can map it before the first breakpoint.
+ */
+ brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
+ "wm scratch",
+ SCRATCH_SIZE * brw->wm_max_threads,
+ 4096);
+ assert(brw->wm.scratch_bo);
+
+ drm_intel_bo_map(brw->wm.scratch_bo, 0);
+ memset(brw->wm.scratch_bo->virtual, 0xa5, SCRATCH_SIZE * brw->wm_max_threads);
+ drm_intel_bo_unmap(brw->wm.scratch_bo);
+
+ ret = drm_intel_bo_flink(brw->wm.scratch_bo, &name);
+ assert(ret == 0);
+
+ brw->wm.debugging = GL_TRUE;
+
+done:
+ free(insn);
+ fclose(stream);
+}
The debugger shared memory needs to be a fixed size. Since this is scratch memory that is already used by register spilling, add appropriate hooks to do the right thing when debugging. Also copy in a binary blob system routine based on an environment variable. This blob will need to be relocated, and emitted as part of the state setup. We do this part now since there is no point debugging if that fails. Signed-off-by: Ben Widawsky <ben@bwidawsk.net> --- src/mesa/drivers/dri/i965/brw_context.c | 11 +++++ src/mesa/drivers/dri/i965/brw_context.h | 4 ++ src/mesa/drivers/dri/i965/brw_wm.c | 37 +++++++++------- src/mesa/drivers/dri/i965/brw_wm.h | 2 + src/mesa/drivers/dri/i965/brw_wm_debug.c | 69 ++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 17 deletions(-)