@@ -11,6 +11,8 @@ gram.h: gram.c
intel_gen4asm_SOURCES = \
brw_defines.h \
+ brw_eu.h \
+ brw_reg.h \
brw_structs.h \
gen4asm.h \
gram.y \
@@ -19,7 +21,7 @@ intel_gen4asm_SOURCES = \
$(NULL)
intel_gen4disasm_SOURCES = \
- disasm.c disasm-main.c
+ brw_disasm.c disasm-main.c
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = intel-gen4asm.pc
similarity index 52%
rename from assembler/disasm.c
rename to assembler/brw_disasm.c
@@ -28,13 +28,9 @@
#include <stdarg.h>
#include "gen4asm.h"
-#include "brw_defines.h"
+#include "brw_eu.h"
-struct {
- char *name;
- int nsrc;
- int ndst;
-} opcode[128] = {
+const struct opcode_desc opcode_descs[128] = {
[BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
@@ -48,13 +44,15 @@ struct {
[BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
[BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1},
+ [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
@@ -71,12 +69,12 @@ struct {
[BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 },
[BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
- [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 },
[BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 1, .ndst = 01 },
- [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
- [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 1, .ndst = 0 },
+ [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
[BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
[BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
@@ -87,8 +85,9 @@ struct {
[BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
[BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
};
+static const struct opcode_desc *opcode = opcode_descs;
-char *conditional_modifier[16] = {
+static const char * const conditional_modifier[16] = {
[BRW_CONDITIONAL_NONE] = "",
[BRW_CONDITIONAL_Z] = ".e",
[BRW_CONDITIONAL_NZ] = ".ne",
@@ -101,17 +100,17 @@ char *conditional_modifier[16] = {
[BRW_CONDITIONAL_U] = ".u",
};
-char *negate[2] = {
+static const char * const negate_op[2] = {
[0] = "",
[1] = "-",
};
-char *_abs[2] = {
+static const char * const _abs[2] = {
[0] = "",
[1] = "(abs)",
};
-char *vert_stride[16] = {
+static const char * const vert_stride[16] = {
[0] = "0",
[1] = "1",
[2] = "2",
@@ -122,7 +121,7 @@ char *vert_stride[16] = {
[15] = "VxH",
};
-char *width[8] = {
+static const char * const width[8] = {
[0] = "1",
[1] = "2",
[2] = "4",
@@ -130,34 +129,41 @@ char *width[8] = {
[4] = "16",
};
-char *horiz_stride[4] = {
+static const char * const horiz_stride[4] = {
[0] = "0",
[1] = "1",
[2] = "2",
[3] = "4"
};
-char *chan_sel[4] = {
+static const char * const chan_sel[4] = {
[0] = "x",
[1] = "y",
[2] = "z",
[3] = "w",
};
-char *dest_condmod[16] = {
-};
-
-char *debug_ctrl[2] = {
+static const char * const debug_ctrl[2] = {
[0] = "",
[1] = ".breakpoint"
};
-char *saturate[2] = {
+static const char * const saturate[2] = {
[0] = "",
[1] = ".sat"
};
-char *exec_size[8] = {
+static const char * const accwr[2] = {
+ [0] = "",
+ [1] = "AccWrEnable"
+};
+
+static const char * const wectrl[2] = {
+ [0] = "WE_normal",
+ [1] = "WE_all"
+};
+
+static const char * const exec_size[8] = {
[0] = "1",
[1] = "2",
[2] = "4",
@@ -166,12 +172,12 @@ char *exec_size[8] = {
[5] = "32"
};
-char *pred_inv[2] = {
+static const char * const pred_inv[2] = {
[0] = "+",
[1] = "-"
};
-char *pred_ctrl_align16[16] = {
+static const char * const pred_ctrl_align16[16] = {
[1] = "",
[2] = ".x",
[3] = ".y",
@@ -181,7 +187,7 @@ char *pred_ctrl_align16[16] = {
[7] = ".all4h",
};
-char *pred_ctrl_align1[16] = {
+static const char * const pred_ctrl_align1[16] = {
[1] = "",
[2] = ".anyv",
[3] = ".allv",
@@ -195,35 +201,36 @@ char *pred_ctrl_align1[16] = {
[11] = ".all16h",
};
-char *thread_ctrl[4] = {
+static const char * const thread_ctrl[4] = {
[0] = "",
[2] = "switch"
};
-char *compr_ctrl[4] = {
+static const char * const compr_ctrl[4] = {
[0] = "",
[1] = "sechalf",
[2] = "compr",
+ [3] = "compr4",
};
-char *dep_ctrl[4] = {
+static const char * const dep_ctrl[4] = {
[0] = "",
[1] = "NoDDClr",
[2] = "NoDDChk",
[3] = "NoDDClr,NoDDChk",
};
-char *mask_ctrl[4] = {
+static const char * const mask_ctrl[4] = {
[0] = "",
[1] = "nomask",
};
-char *access_mode[2] = {
+static const char * const access_mode[2] = {
[0] = "align1",
[1] = "align16",
};
-char *reg_encoding[8] = {
+static const char * const reg_encoding[8] = {
[0] = "UD",
[1] = "D",
[2] = "UW",
@@ -233,24 +240,24 @@ char *reg_encoding[8] = {
[7] = "F"
};
-char *imm_encoding[8] = {
- [0] = "UD",
- [1] = "D",
- [2] = "UW",
- [3] = "W",
- [5] = "VF",
- [6] = "V",
- [7] = "F"
+const int reg_type_size[8] = {
+ [0] = 4,
+ [1] = 4,
+ [2] = 2,
+ [3] = 2,
+ [4] = 1,
+ [5] = 1,
+ [7] = 4
};
-char *reg_file[4] = {
+static const char * const reg_file[4] = {
[0] = "A",
[1] = "g",
[2] = "m",
[3] = "imm",
};
-char *writemask[16] = {
+static const char * const writemask[16] = {
[0x0] = ".",
[0x1] = ".x",
[0x2] = ".y",
@@ -269,12 +276,12 @@ char *writemask[16] = {
[0xf] = "",
};
-char *end_of_thread[2] = {
+static const char * const end_of_thread[2] = {
[0] = "",
[1] = "EOT"
};
-char *target_function[16] = {
+static const char * const target_function[16] = {
[BRW_SFID_NULL] = "null",
[BRW_SFID_MATH] = "math",
[BRW_SFID_SAMPLER] = "sampler",
@@ -285,7 +292,37 @@ char *target_function[16] = {
[BRW_SFID_THREAD_SPAWNER] = "thread_spawner"
};
-char *math_function[16] = {
+static const char * const target_function_gen6[16] = {
+ [BRW_SFID_NULL] = "null",
+ [BRW_SFID_MATH] = "math",
+ [BRW_SFID_SAMPLER] = "sampler",
+ [BRW_SFID_MESSAGE_GATEWAY] = "gateway",
+ [BRW_SFID_URB] = "urb",
+ [BRW_SFID_THREAD_SPAWNER] = "thread_spawner",
+ [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler",
+ [GEN6_SFID_DATAPORT_RENDER_CACHE] = "render",
+ [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const",
+ [GEN7_SFID_DATAPORT_DATA_CACHE] = "data"
+};
+
+static const char * const dp_rc_msg_type_gen6[16] = {
+ [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read",
+ [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read",
+ [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read",
+ [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read",
+ [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read",
+ [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read",
+ [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write",
+ [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write",
+};
+
+static const char * const math_function[16] = {
[BRW_MATH_FUNCTION_INV] = "inv",
[BRW_MATH_FUNCTION_LOG] = "log",
[BRW_MATH_FUNCTION_EXP] = "exp",
@@ -297,52 +334,57 @@ char *math_function[16] = {
[BRW_MATH_FUNCTION_TAN] = "tan",
[BRW_MATH_FUNCTION_POW] = "pow",
[BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
- [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intmod",
- [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intdiv",
+ [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv",
+ [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod",
};
-char *math_saturate[2] = {
+static const char * const math_saturate[2] = {
[0] = "",
[1] = "sat"
};
-char *math_signed[2] = {
+static const char * const math_signed[2] = {
[0] = "",
[1] = "signed"
};
-char *math_scalar[2] = {
+static const char * const math_scalar[2] = {
[0] = "",
[1] = "scalar"
};
-char *math_precision[2] = {
+static const char * const math_precision[2] = {
[0] = "",
[1] = "partial_precision"
};
-char *urb_swizzle[4] = {
+static const char * const urb_opcode[2] = {
+ [0] = "urb_write",
+ [1] = "ff_sync",
+};
+
+static const char * const urb_swizzle[4] = {
[BRW_URB_SWIZZLE_NONE] = "",
[BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
[BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
};
-char *urb_allocate[2] = {
+static const char * const urb_allocate[2] = {
[0] = "",
[1] = "allocate"
};
-char *urb_used[2] = {
+static const char * const urb_used[2] = {
[0] = "",
[1] = "used"
};
-char *urb_complete[2] = {
+static const char * const urb_complete[2] = {
[0] = "",
[1] = "complete"
};
-char *sampler_target_format[4] = {
+static const char * const sampler_target_format[4] = {
[0] = "F",
[2] = "UD",
[3] = "D"
@@ -351,20 +393,21 @@ char *sampler_target_format[4] = {
static int column;
-static int string (FILE *file, char *string)
+static int string (FILE *file, const char *string)
{
fputs (string, file);
column += strlen (string);
return 0;
}
-static int format (FILE *f, char *format, ...)
+static int format (FILE *f, const char *format, ...)
{
char buf[1024];
va_list args;
va_start (args, format);
vsnprintf (buf, sizeof (buf) - 1, format, args);
+ va_end (args);
string (f, buf);
return 0;
}
@@ -384,7 +427,8 @@ static int pad (FILE *f, int c)
return 0;
}
-static int control (FILE *file, char *name, char *ctrl[], GLuint id, int *space)
+static int control (FILE *file, const char *name, const char * const ctrl[],
+ GLuint id, int *space)
{
if (!ctrl[id]) {
fprintf (file, "*** invalid %s value %d ",
@@ -415,6 +459,11 @@ static int print_opcode (FILE *file, int id)
static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
{
int err = 0;
+
+ /* Clear the Compr4 instruction compression bit. */
+ if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
+ _reg_nr &= ~(1 << 7);
+
if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
switch (_reg_nr & 0xf0) {
case BRW_ARF_NULL:
@@ -426,6 +475,9 @@ static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
case BRW_ARF_ACCUMULATOR:
format (file, "acc%d", _reg_nr & 0x0f);
break;
+ case BRW_ARF_FLAG:
+ format (file, "f%d", _reg_nr & 0x0f);
+ break;
case BRW_ARF_MASK:
format (file, "mask%d", _reg_nr & 0x0f);
break;
@@ -468,7 +520,8 @@ static int dest (FILE *file, struct brw_instruction *inst)
if (err == -1)
return 0;
if (inst->bits1.da1.dest_subreg_nr)
- format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr /
+ reg_type_size[inst->bits1.da1.dest_reg_type]);
format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
}
@@ -476,7 +529,8 @@ static int dest (FILE *file, struct brw_instruction *inst)
{
string (file, "g[a0");
if (inst->bits1.ia1.dest_subreg_nr)
- format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr /
+ reg_type_size[inst->bits1.ia1.dest_reg_type]);
if (inst->bits1.ia1.dest_indirect_offset)
format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
string (file, "]");
@@ -492,7 +546,8 @@ static int dest (FILE *file, struct brw_instruction *inst)
if (err == -1)
return 0;
if (inst->bits1.da16.dest_subreg_nr)
- format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr /
+ reg_type_size[inst->bits1.da16.dest_reg_type]);
string (file, "<1>");
err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
@@ -507,6 +562,28 @@ static int dest (FILE *file, struct brw_instruction *inst)
return 0;
}
+static int dest_3src (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ uint32_t reg_file;
+
+ if (inst->bits1.da3src.dest_reg_file)
+ reg_file = BRW_MESSAGE_REGISTER_FILE;
+ else
+ reg_file = BRW_GENERAL_REGISTER_FILE;
+
+ err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits1.da3src.dest_subreg_nr)
+ format (file, ".%d", inst->bits1.da3src.dest_subreg_nr);
+ string (file, "<1>");
+ err |= control (file, "writemask", writemask, inst->bits1.da3src.dest_writemask, NULL);
+ err |= control (file, "dest reg encoding", reg_encoding, BRW_REGISTER_TYPE_F, NULL);
+
+ return 0;
+}
+
static int src_align1_region (FILE *file,
GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
{
@@ -526,14 +603,14 @@ static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
{
int err = 0;
- err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "negate", negate_op, _negate, NULL);
err |= control (file, "abs", _abs, __abs, NULL);
err |= reg (file, _reg_file, reg_num);
if (err == -1)
return 0;
if (sub_reg_num)
- format (file, ".%d", sub_reg_num);
+ format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */
src_align1_region (file, _vert_stride, _width, _horiz_stride);
err |= control (file, "src reg encoding", reg_encoding, type, NULL);
return err;
@@ -552,7 +629,7 @@ static int src_ia1 (FILE *file,
GLuint _vert_stride)
{
int err = 0;
- err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "negate", negate_op, _negate, NULL);
err |= control (file, "abs", _abs, __abs, NULL);
string (file, "g[a0");
@@ -580,18 +657,120 @@ static int src_da16 (FILE *file,
GLuint swz_w)
{
int err = 0;
- err |= control (file, "negate", negate, _negate, NULL);
+ err |= control (file, "negate", negate_op, _negate, NULL);
err |= control (file, "abs", _abs, __abs, NULL);
err |= reg (file, _reg_file, _reg_nr);
if (err == -1)
return 0;
if (_subreg_nr)
- format (file, ".%d", _subreg_nr);
+ /* bit4 for subreg number byte addressing. Make this same meaning as
+ in da1 case, so output looks consistent. */
+ format (file, ".%d", 16 / reg_type_size[_reg_type]);
string (file, "<");
err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
- string (file, ",1,1>");
+ string (file, ",4,1>");
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+ return err;
+}
+
+static int src0_3src (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ GLuint swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3;
+ GLuint swz_y = (inst->bits2.da3src.src0_swizzle >> 2) & 0x3;
+ GLuint swz_z = (inst->bits2.da3src.src0_swizzle >> 4) & 0x3;
+ GLuint swz_w = (inst->bits2.da3src.src0_swizzle >> 6) & 0x3;
+
+ err |= control (file, "negate", negate_op, inst->bits1.da3src.src0_negate, NULL);
+ err |= control (file, "abs", _abs, inst->bits1.da3src.src0_abs, NULL);
+
+ err |= reg (file, BRW_GENERAL_REGISTER_FILE, inst->bits2.da3src.src0_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits2.da3src.src0_subreg_nr)
+ format (file, ".%d", inst->bits2.da3src.src0_subreg_nr);
+ string (file, "<4,1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding,
+ BRW_REGISTER_TYPE_F, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
+static int src1_3src (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ GLuint swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3;
+ GLuint swz_y = (inst->bits2.da3src.src1_swizzle >> 2) & 0x3;
+ GLuint swz_z = (inst->bits2.da3src.src1_swizzle >> 4) & 0x3;
+ GLuint swz_w = (inst->bits2.da3src.src1_swizzle >> 6) & 0x3;
+ GLuint src1_subreg_nr = (inst->bits2.da3src.src1_subreg_nr_low |
+ (inst->bits3.da3src.src1_subreg_nr_high << 2));
+
+ err |= control (file, "negate", negate_op, inst->bits1.da3src.src1_negate,
+ NULL);
+ err |= control (file, "abs", _abs, inst->bits1.da3src.src1_abs, NULL);
+
+ err |= reg (file, BRW_GENERAL_REGISTER_FILE,
+ inst->bits3.da3src.src1_reg_nr);
+ if (err == -1)
+ return 0;
+ if (src1_subreg_nr)
+ format (file, ".%d", src1_subreg_nr);
+ string (file, "<4,1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding,
+ BRW_REGISTER_TYPE_F, NULL);
/*
* Three kinds of swizzle display:
* identity - nothing printed
@@ -622,6 +801,56 @@ static int src_da16 (FILE *file,
}
+static int src2_3src (FILE *file, struct brw_instruction *inst)
+{
+ int err = 0;
+ GLuint swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3;
+ GLuint swz_y = (inst->bits3.da3src.src2_swizzle >> 2) & 0x3;
+ GLuint swz_z = (inst->bits3.da3src.src2_swizzle >> 4) & 0x3;
+ GLuint swz_w = (inst->bits3.da3src.src2_swizzle >> 6) & 0x3;
+
+ err |= control (file, "negate", negate_op, inst->bits1.da3src.src2_negate,
+ NULL);
+ err |= control (file, "abs", _abs, inst->bits1.da3src.src2_abs, NULL);
+
+ err |= reg (file, BRW_GENERAL_REGISTER_FILE,
+ inst->bits3.da3src.src2_reg_nr);
+ if (err == -1)
+ return 0;
+ if (inst->bits3.da3src.src2_subreg_nr)
+ format (file, ".%d", inst->bits3.da3src.src2_subreg_nr);
+ string (file, "<4,1,1>");
+ err |= control (file, "src da16 reg type", reg_encoding,
+ BRW_REGISTER_TYPE_F, NULL);
+ /*
+ * Three kinds of swizzle display:
+ * identity - nothing printed
+ * 1->all - print the single channel
+ * 1->1 - print the mapping
+ */
+ if (swz_x == BRW_CHANNEL_X &&
+ swz_y == BRW_CHANNEL_Y &&
+ swz_z == BRW_CHANNEL_Z &&
+ swz_w == BRW_CHANNEL_W)
+ {
+ ;
+ }
+ else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ }
+ else
+ {
+ string (file, ".");
+ err |= control (file, "channel select", chan_sel, swz_x, NULL);
+ err |= control (file, "channel select", chan_sel, swz_y, NULL);
+ err |= control (file, "channel select", chan_sel, swz_z, NULL);
+ err |= control (file, "channel select", chan_sel, swz_w, NULL);
+ }
+ return err;
+}
+
static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
switch (type) {
case BRW_REGISTER_TYPE_UD:
@@ -771,7 +1000,45 @@ static int src1 (FILE *file, struct brw_instruction *inst)
}
}
-int disasm (FILE *file, struct brw_instruction *inst)
+int esize[6] = {
+ [0] = 1,
+ [1] = 2,
+ [2] = 4,
+ [3] = 8,
+ [4] = 16,
+ [5] = 32,
+};
+
+static int qtr_ctrl(FILE *file, struct brw_instruction *inst)
+{
+ int qtr_ctl = inst->header.compression_control;
+ int exec_size = esize[inst->header.execution_size];
+
+ if (exec_size == 8) {
+ switch (qtr_ctl) {
+ case 0:
+ string (file, " 1Q");
+ break;
+ case 1:
+ string (file, " 2Q");
+ break;
+ case 2:
+ string (file, " 3Q");
+ break;
+ case 3:
+ string (file, " 4Q");
+ break;
+ }
+ } else if (exec_size == 16){
+ if (qtr_ctl < 2)
+ string (file, " 1H");
+ else
+ string (file, " 2H");
+ }
+ return 0;
+}
+
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
{
int err = 0;
int space = 0;
@@ -779,7 +1046,7 @@ int disasm (FILE *file, struct brw_instruction *inst)
if (inst->header.predicate_control) {
string (file, "(");
err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
- format (file, "f%d", inst->bits2.da1.flag_reg_nr);
+ format (file, "f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0);
if (inst->bits2.da1.flag_subreg_nr)
format (file, ".%d", inst->bits2.da1.flag_subreg_nr);
if (inst->header.access_mode == BRW_ALIGN_1)
@@ -795,42 +1062,106 @@ int disasm (FILE *file, struct brw_instruction *inst)
err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
- if (inst->header.opcode != BRW_OPCODE_SEND &&
- inst->header.opcode != BRW_OPCODE_SENDC)
+ if (inst->header.opcode == BRW_OPCODE_MATH) {
+ string (file, " ");
+ err |= control (file, "function", math_function,
+ inst->header.destreg__conditionalmod, NULL);
+ } else if (inst->header.opcode != BRW_OPCODE_SEND &&
+ inst->header.opcode != BRW_OPCODE_SENDC) {
err |= control (file, "conditional modifier", conditional_modifier,
inst->header.destreg__conditionalmod, NULL);
+ /* If we're using the conditional modifier, print which flags reg is
+ * used for it. Note that on gen6+, the embedded-condition SEL and
+ * control flow doesn't update flags.
+ */
+ if (inst->header.destreg__conditionalmod &&
+ (gen < 6 || (inst->header.opcode != BRW_OPCODE_SEL &&
+ inst->header.opcode != BRW_OPCODE_IF &&
+ inst->header.opcode != BRW_OPCODE_WHILE))) {
+ format (file, ".f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0);
+ if (inst->bits2.da1.flag_subreg_nr)
+ format (file, ".%d", inst->bits2.da1.flag_subreg_nr);
+ }
+ }
+
if (inst->header.opcode != BRW_OPCODE_NOP) {
string (file, "(");
err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
string (file, ")");
}
- if (inst->header.opcode == BRW_OPCODE_SEND ||
- inst->header.opcode == BRW_OPCODE_SENDC)
+ if (inst->header.opcode == BRW_OPCODE_SEND && gen < 6)
format (file, " %d", inst->header.destreg__conditionalmod);
- if (opcode[inst->header.opcode].ndst > 0) {
- pad (file, 16);
- err |= dest (file, inst);
- }
- if (opcode[inst->header.opcode].nsrc > 0) {
- pad (file, 32);
- err |= src0 (file, inst);
- }
- if (opcode[inst->header.opcode].nsrc > 1) {
- pad (file, 48);
- err |= src1 (file, inst);
+ if (opcode[inst->header.opcode].nsrc == 3) {
+ pad (file, 16);
+ err |= dest_3src (file, inst);
+
+ pad (file, 32);
+ err |= src0_3src (file, inst);
+
+ pad (file, 48);
+ err |= src1_3src (file, inst);
+
+ pad (file, 64);
+ err |= src2_3src (file, inst);
+ } else {
+ if (opcode[inst->header.opcode].ndst > 0) {
+ pad (file, 16);
+ err |= dest (file, inst);
+ } else if (gen == 7 && (inst->header.opcode == BRW_OPCODE_ELSE ||
+ inst->header.opcode == BRW_OPCODE_ENDIF ||
+ inst->header.opcode == BRW_OPCODE_WHILE)) {
+ format (file, " %d", inst->bits3.break_cont.jip);
+ } else if (gen == 6 && (inst->header.opcode == BRW_OPCODE_IF ||
+ inst->header.opcode == BRW_OPCODE_ELSE ||
+ inst->header.opcode == BRW_OPCODE_ENDIF ||
+ inst->header.opcode == BRW_OPCODE_WHILE)) {
+ format (file, " %d", inst->bits1.branch_gen6.jump_count);
+ } else if ((gen >= 6 && (inst->header.opcode == BRW_OPCODE_BREAK ||
+ inst->header.opcode == BRW_OPCODE_CONTINUE ||
+ inst->header.opcode == BRW_OPCODE_HALT)) ||
+ (gen == 7 && inst->header.opcode == BRW_OPCODE_IF)) {
+ format (file, " %d %d", inst->bits3.break_cont.uip, inst->bits3.break_cont.jip);
+ } else if (inst->header.opcode == BRW_OPCODE_JMPI) {
+ format (file, " %d", inst->bits3.d);
+ }
+
+ if (opcode[inst->header.opcode].nsrc > 0) {
+ pad (file, 32);
+ err |= src0 (file, inst);
+ }
+ if (opcode[inst->header.opcode].nsrc > 1) {
+ pad (file, 48);
+ err |= src1 (file, inst);
+ }
}
if (inst->header.opcode == BRW_OPCODE_SEND ||
inst->header.opcode == BRW_OPCODE_SENDC) {
+ enum brw_message_target target;
+
+ if (gen >= 6)
+ target = inst->header.destreg__conditionalmod;
+ else if (gen == 5)
+ target = inst->bits2.send_gen5.sfid;
+ else
+ target = inst->bits3.generic.msg_target;
+
newline (file);
pad (file, 16);
space = 0;
- err |= control (file, "target function", target_function,
- inst->header.destreg__conditionalmod, &space);
- switch (inst->header.destreg__conditionalmod) {
+
+ if (gen >= 6) {
+ err |= control (file, "target function", target_function_gen6,
+ target, &space);
+ } else {
+ err |= control (file, "target function", target_function,
+ target, &space);
+ }
+
+ switch (target) {
case BRW_SFID_MATH:
err |= control (file, "math function", math_function,
inst->bits3.math.function, &space);
@@ -844,24 +1175,98 @@ int disasm (FILE *file, struct brw_instruction *inst)
inst->bits3.math.precision, &space);
break;
case BRW_SFID_SAMPLER:
- format (file, " (%d, %d, ",
- inst->bits3.sampler.binding_table_index,
- inst->bits3.sampler.sampler);
- err |= control (file, "sampler target format", sampler_target_format,
- inst->bits3.sampler.return_format, NULL);
- string (file, ")");
+ if (gen >= 7) {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.sampler_gen7.binding_table_index,
+ inst->bits3.sampler_gen7.sampler,
+ inst->bits3.sampler_gen7.msg_type,
+ inst->bits3.sampler_gen7.simd_mode);
+ } else if (gen >= 5) {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.sampler_gen5.binding_table_index,
+ inst->bits3.sampler_gen5.sampler,
+ inst->bits3.sampler_gen5.msg_type,
+ inst->bits3.sampler_gen5.simd_mode);
+ } else if (0 /* FINISHME: is_g4x */) {
+ format (file, " (%d, %d)",
+ inst->bits3.sampler_g4x.binding_table_index,
+ inst->bits3.sampler_g4x.sampler);
+ } else {
+ format (file, " (%d, %d, ",
+ inst->bits3.sampler.binding_table_index,
+ inst->bits3.sampler.sampler);
+ err |= control (file, "sampler target format",
+ sampler_target_format,
+ inst->bits3.sampler.return_format, NULL);
+ string (file, ")");
+ }
+ break;
+ case BRW_SFID_DATAPORT_READ:
+ if (gen >= 6) {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.gen6_dp.binding_table_index,
+ inst->bits3.gen6_dp.msg_control,
+ inst->bits3.gen6_dp.msg_type,
+ inst->bits3.gen6_dp.send_commit_msg);
+ } else if (gen >= 5 /* FINISHME: || is_g4x */) {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read_gen5.binding_table_index,
+ inst->bits3.dp_read_gen5.msg_control,
+ inst->bits3.dp_read_gen5.msg_type);
+ } else {
+ format (file, " (%d, %d, %d)",
+ inst->bits3.dp_read.binding_table_index,
+ inst->bits3.dp_read.msg_control,
+ inst->bits3.dp_read.msg_type);
+ }
break;
+
case BRW_SFID_DATAPORT_WRITE:
- format (file, " (%d, %d, %d, %d)",
- inst->bits3.dp_write.binding_table_index,
- (inst->bits3.dp_write.last_render_target << 3) |
- inst->bits3.dp_write.msg_control,
- inst->bits3.dp_write.msg_type,
- inst->bits3.dp_write.send_commit_msg);
+ if (gen >= 7) {
+ format (file, " (");
+
+ err |= control (file, "DP rc message type",
+ dp_rc_msg_type_gen6,
+ inst->bits3.gen7_dp.msg_type, &space);
+
+ format (file, ", %d, %d, %d)",
+ inst->bits3.gen7_dp.binding_table_index,
+ inst->bits3.gen7_dp.msg_control,
+ inst->bits3.gen7_dp.msg_type);
+ } else if (gen == 6) {
+ format (file, " (");
+
+ err |= control (file, "DP rc message type",
+ dp_rc_msg_type_gen6,
+ inst->bits3.gen6_dp.msg_type, &space);
+
+ format (file, ", %d, %d, %d, %d)",
+ inst->bits3.gen6_dp.binding_table_index,
+ inst->bits3.gen6_dp.msg_control,
+ inst->bits3.gen6_dp.msg_type,
+ inst->bits3.gen6_dp.send_commit_msg);
+ } else {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.dp_write.binding_table_index,
+ (inst->bits3.dp_write.last_render_target << 3) |
+ inst->bits3.dp_write.msg_control,
+ inst->bits3.dp_write.msg_type,
+ inst->bits3.dp_write.send_commit_msg);
+ }
break;
+
case BRW_SFID_URB:
- format (file, " %d", inst->bits3.urb.offset);
+ if (gen >= 5) {
+ format (file, " %d", inst->bits3.urb_gen5.offset);
+ } else {
+ format (file, " %d", inst->bits3.urb.offset);
+ }
+
space = 1;
+ if (gen >= 5) {
+ err |= control (file, "urb opcode", urb_opcode,
+ inst->bits3.urb_gen5.opcode, &space);
+ }
err |= control (file, "urb swizzle", urb_swizzle,
inst->bits3.urb.swizzle_control, &space);
err |= control (file, "urb allocate", urb_allocate,
@@ -873,27 +1278,62 @@ int disasm (FILE *file, struct brw_instruction *inst)
break;
case BRW_SFID_THREAD_SPAWNER:
break;
+ case GEN7_SFID_DATAPORT_DATA_CACHE:
+ format (file, " (%d, %d, %d)",
+ inst->bits3.gen7_dp.binding_table_index,
+ inst->bits3.gen7_dp.msg_control,
+ inst->bits3.gen7_dp.msg_type);
+ break;
+
+
default:
- format (file, "unsupported target %d", inst->bits3.generic.msg_target);
+ format (file, "unsupported target %d", target);
break;
}
if (space)
string (file, " ");
- format (file, "mlen %d",
- inst->bits3.generic.msg_length);
- format (file, " rlen %d",
- inst->bits3.generic.response_length);
+ if (gen >= 5) {
+ format (file, "mlen %d",
+ inst->bits3.generic_gen5.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic_gen5.response_length);
+ } else {
+ format (file, "mlen %d",
+ inst->bits3.generic.msg_length);
+ format (file, " rlen %d",
+ inst->bits3.generic.response_length);
+ }
}
pad (file, 64);
if (inst->header.opcode != BRW_OPCODE_NOP) {
string (file, "{");
space = 1;
err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
- err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+ if (gen >= 6)
+ err |= control (file, "write enable control", wectrl, inst->header.mask_control, &space);
+ else
+ err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
- err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
+
+ if (gen >= 6)
+ err |= qtr_ctrl (file, inst);
+ else {
+ if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED &&
+ opcode[inst->header.opcode].ndst > 0 &&
+ inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE &&
+ inst->bits1.da1.dest_reg_nr & (1 << 7)) {
+ format (file, " compr4");
+ } else {
+ err |= control (file, "compression control", compr_ctrl,
+ inst->header.compression_control, &space);
+ }
+ }
+
err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
- if (inst->header.opcode == BRW_OPCODE_SEND)
+ if (gen >= 6)
+ err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space);
+ if (inst->header.opcode == BRW_OPCODE_SEND ||
+ inst->header.opcode == BRW_OPCODE_SENDC)
err |= control (file, "end of thread", end_of_thread,
inst->bits3.generic.end_of_thread, &space);
if (space)
new file mode 100644
@@ -0,0 +1,405 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+
+#ifndef BRW_EU_H
+#define BRW_EU_H
+
+#include <stdbool.h>
+#include "brw_structs.h"
+#include "brw_defines.h"
+#include "brw_reg.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BRW_EU_MAX_INSN_STACK 5
+
+struct brw_compile {
+ struct brw_instruction *store;
+ int store_size;
+ GLuint nr_insn;
+ unsigned int next_insn_offset;
+
+ void *mem_ctx;
+
+ /* Allow clients to push/pop instruction state:
+ */
+ struct brw_instruction stack[BRW_EU_MAX_INSN_STACK];
+ bool compressed_stack[BRW_EU_MAX_INSN_STACK];
+ struct brw_instruction *current;
+
+ GLuint flag_value;
+ bool single_program_flow;
+ bool compressed;
+ struct brw_context *brw;
+
+ /* Control flow stacks:
+ * - if_stack contains IF and ELSE instructions which must be patched
+ * (and popped) once the matching ENDIF instruction is encountered.
+ *
+ * Just store the instruction pointer(an index).
+ */
+ int *if_stack;
+ int if_stack_depth;
+ int if_stack_array_size;
+
+ /**
+ * loop_stack contains the instruction pointers of the starts of loops which
+ * must be patched (and popped) once the matching WHILE instruction is
+ * encountered.
+ */
+ int *loop_stack;
+ /**
+ * pre-gen6, the BREAK and CONT instructions had to tell how many IF/ENDIF
+ * blocks they were popping out of, to fix up the mask stack. This tracks
+ * the IF/ENDIF nesting in each current nested loop level.
+ */
+ int *if_depth_in_loop;
+ int loop_stack_depth;
+ int loop_stack_array_size;
+};
+
+static inline struct brw_instruction *current_insn( struct brw_compile *p)
+{
+ return &p->store[p->nr_insn];
+}
+
+void brw_pop_insn_state( struct brw_compile *p );
+void brw_push_insn_state( struct brw_compile *p );
+void brw_set_mask_control( struct brw_compile *p, GLuint value );
+void brw_set_saturate( struct brw_compile *p, bool enable );
+void brw_set_access_mode( struct brw_compile *p, GLuint access_mode );
+void brw_set_compression_control(struct brw_compile *p, enum brw_compression c);
+void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
+void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
+void brw_set_predicate_inverse(struct brw_compile *p, bool predicate_inverse);
+void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
+void brw_set_flag_reg(struct brw_compile *p, int reg, int subreg);
+void brw_set_acc_write_control(struct brw_compile *p, GLuint value);
+
+void brw_init_compile(struct brw_context *, struct brw_compile *p,
+ void *mem_ctx);
+void brw_dump_compile(struct brw_compile *p, FILE *out, int start, int end);
+const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
+
+struct brw_instruction *brw_next_insn(struct brw_compile *p, GLuint opcode);
+void brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg dest);
+void brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg reg);
+
+void gen6_resolve_implied_move(struct brw_compile *p,
+ struct brw_reg *src,
+ GLuint msg_reg_nr);
+
+/* Helpers for regular instructions:
+ */
+#define ALU1(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0);
+
+#define ALU2(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1);
+
+#define ALU3(OP) \
+struct brw_instruction *brw_##OP(struct brw_compile *p, \
+ struct brw_reg dest, \
+ struct brw_reg src0, \
+ struct brw_reg src1, \
+ struct brw_reg src2);
+
+#define ROUND(OP) \
+void brw_##OP(struct brw_compile *p, struct brw_reg dest, struct brw_reg src0);
+
+ALU1(MOV)
+ALU2(SEL)
+ALU1(NOT)
+ALU2(AND)
+ALU2(OR)
+ALU2(XOR)
+ALU2(SHR)
+ALU2(SHL)
+ALU2(RSR)
+ALU2(RSL)
+ALU2(ASR)
+ALU2(JMPI)
+ALU2(ADD)
+ALU2(AVG)
+ALU2(MUL)
+ALU1(FRC)
+ALU1(RNDD)
+ALU2(MAC)
+ALU2(MACH)
+ALU1(LZD)
+ALU2(DP4)
+ALU2(DPH)
+ALU2(DP3)
+ALU2(DP2)
+ALU2(LINE)
+ALU2(PLN)
+ALU3(MAD)
+
+ROUND(RNDZ)
+ROUND(RNDE)
+
+#undef ALU1
+#undef ALU2
+#undef ALU3
+#undef ROUND
+
+
+/* Helpers for SEND instruction:
+ */
+void brw_set_sampler_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLuint header_present,
+ GLuint simd_mode,
+ GLuint return_format);
+
+void brw_set_dp_read_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint target_cache,
+ GLuint msg_length,
+ bool header_present,
+ GLuint response_length);
+
+void brw_set_dp_write_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint msg_length,
+ bool header_present,
+ GLuint last_render_target,
+ GLuint response_length,
+ GLuint end_of_thread,
+ GLuint send_commit_msg);
+
+void brw_urb_WRITE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ bool allocate,
+ bool used,
+ GLuint msg_length,
+ GLuint response_length,
+ bool eot,
+ bool writes_complete,
+ GLuint offset,
+ GLuint swizzle);
+
+void brw_ff_sync(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ bool allocate,
+ GLuint response_length,
+ bool eot);
+
+void brw_svb_write(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ bool send_commit_msg);
+
+void brw_fb_WRITE(struct brw_compile *p,
+ int dispatch_width,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint msg_control,
+ GLuint binding_table_index,
+ GLuint msg_length,
+ GLuint response_length,
+ bool eot,
+ bool header_present);
+
+void brw_SAMPLE(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint msg_reg_nr,
+ struct brw_reg src0,
+ GLuint binding_table_index,
+ GLuint sampler,
+ GLuint writemask,
+ GLuint msg_type,
+ GLuint response_length,
+ GLuint msg_length,
+ GLuint header_present,
+ GLuint simd_mode,
+ GLuint return_format);
+
+void brw_math( struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ GLuint msg_reg_nr,
+ struct brw_reg src,
+ GLuint data_type,
+ GLuint precision );
+
+void brw_math2(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint function,
+ struct brw_reg src0,
+ struct brw_reg src1);
+
+void brw_oword_block_read(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ uint32_t offset,
+ uint32_t bind_table_index);
+
+void brw_oword_block_read_scratch(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ int num_regs,
+ GLuint offset);
+
+void brw_oword_block_write_scratch(struct brw_compile *p,
+ struct brw_reg mrf,
+ int num_regs,
+ GLuint offset);
+
+void brw_shader_time_add(struct brw_compile *p,
+ int mrf,
+ uint32_t surf_index);
+
+/* If/else/endif. Works by manipulating the execution flags on each
+ * channel.
+ */
+struct brw_instruction *brw_IF(struct brw_compile *p,
+ GLuint execute_size);
+struct brw_instruction *gen6_IF(struct brw_compile *p, uint32_t conditional,
+ struct brw_reg src0, struct brw_reg src1);
+
+void brw_ELSE(struct brw_compile *p);
+void brw_ENDIF(struct brw_compile *p);
+
+/* DO/WHILE loops:
+ */
+struct brw_instruction *brw_DO(struct brw_compile *p,
+ GLuint execute_size);
+
+struct brw_instruction *brw_WHILE(struct brw_compile *p);
+
+struct brw_instruction *brw_BREAK(struct brw_compile *p);
+struct brw_instruction *brw_CONT(struct brw_compile *p);
+struct brw_instruction *gen6_CONT(struct brw_compile *p);
+struct brw_instruction *gen6_HALT(struct brw_compile *p);
+/* Forward jumps:
+ */
+void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx);
+
+
+
+void brw_NOP(struct brw_compile *p);
+
+void brw_WAIT(struct brw_compile *p);
+
+/* Special case: there is never a destination, execution size will be
+ * taken from src0:
+ */
+void brw_CMP(struct brw_compile *p,
+ struct brw_reg dest,
+ GLuint conditional,
+ struct brw_reg src0,
+ struct brw_reg src1);
+
+/***********************************************************************
+ * brw_eu_util.c:
+ */
+
+void brw_copy_indirect_to_indirect(struct brw_compile *p,
+ struct brw_indirect dst_ptr,
+ struct brw_indirect src_ptr,
+ GLuint count);
+
+void brw_copy_from_indirect(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_indirect ptr,
+ GLuint count);
+
+void brw_copy4(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count);
+
+void brw_copy8(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ GLuint count);
+
+void brw_math_invert( struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src);
+
+void brw_set_src1(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg reg);
+
+void brw_set_uip_jip(struct brw_compile *p);
+
+uint32_t brw_swap_cmod(uint32_t cmod);
+
+/* brw_optimize.c */
+void brw_optimize(struct brw_compile *p);
+void brw_remove_duplicate_mrf_moves(struct brw_compile *p);
+void brw_remove_grf_to_mrf_moves(struct brw_compile *p);
+
+/* brw_disasm.c */
+struct opcode_desc {
+ char *name;
+ int nsrc;
+ int ndst;
+};
+
+extern const struct opcode_desc opcode_descs[128];
+
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
new file mode 100644
@@ -0,0 +1,808 @@
+/*
+ Copyright (C) Intel Corp. 2006. All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **********************************************************************/
+ /*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+/** @file brw_reg.h
+ *
+ * This file defines struct brw_reg, which is our representation for EU
+ * registers. They're not a hardware specific format, just an abstraction
+ * that intends to capture the full flexibility of the hardware registers.
+ *
+ * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode
+ * the abstract brw_reg type into the actual hardware instruction encoding.
+ */
+
+#ifndef BRW_REG_H
+#define BRW_REG_H
+
+#include <stdbool.h>
+#include <assert.h>
+#include "brw_defines.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Number of general purpose registers (VS, WM, etc) */
+#define BRW_MAX_GRF 128
+
+/**
+ * First GRF used for the MRF hack.
+ *
+ * On gen7, MRFs are no longer used, and contiguous GRFs are used instead. We
+ * haven't converted our compiler to be aware of this, so it asks for MRFs and
+ * brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The
+ * register allocators have to be careful of this to avoid corrupting the "MRF"s
+ * with actual GRF allocations.
+ */
+#define GEN7_MRF_HACK_START 112
+
+/** Number of message register file registers */
+#define BRW_MAX_MRF 16
+
+#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
+#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
+
+#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
+#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
+#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
+#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
+#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
+
+static inline bool
+brw_is_single_value_swizzle(int swiz)
+{
+ return (swiz == BRW_SWIZZLE_XXXX ||
+ swiz == BRW_SWIZZLE_YYYY ||
+ swiz == BRW_SWIZZLE_ZZZZ ||
+ swiz == BRW_SWIZZLE_WWWW);
+}
+
+#define BRW_WRITEMASK_X 0x1
+#define BRW_WRITEMASK_Y 0x2
+#define BRW_WRITEMASK_Z 0x4
+#define BRW_WRITEMASK_W 0x8
+
+#define BRW_WRITEMASK_XY (BRW_WRITEMASK_X | BRW_WRITEMASK_Y)
+#define BRW_WRITEMASK_XZ (BRW_WRITEMASK_X | BRW_WRITEMASK_Z)
+#define BRW_WRITEMASK_XW (BRW_WRITEMASK_X | BRW_WRITEMASK_W)
+#define BRW_WRITEMASK_YW (BRW_WRITEMASK_Y | BRW_WRITEMASK_W)
+#define BRW_WRITEMASK_ZW (BRW_WRITEMASK_Z | BRW_WRITEMASK_W)
+#define BRW_WRITEMASK_XYZ (BRW_WRITEMASK_X | BRW_WRITEMASK_Y | BRW_WRITEMASK_Z)
+#define BRW_WRITEMASK_XYZW (BRW_WRITEMASK_X | BRW_WRITEMASK_Y | \
+ BRW_WRITEMASK_Z | BRW_WRITEMASK_W)
+
+#define REG_SIZE (8*4)
+
+/* These aren't hardware structs, just something useful for us to pass around:
+ *
+ * Align1 operation has a lot of control over input ranges. Used in
+ * WM programs to implement shaders decomposed into "channel serial"
+ * or "structure of array" form:
+ */
+struct brw_reg {
+ unsigned type:4;
+ unsigned file:2;
+ unsigned nr:8;
+ unsigned subnr:5; /* :1 in align16 */
+ unsigned negate:1; /* source only */
+ unsigned abs:1; /* source only */
+ unsigned vstride:4; /* source only */
+ unsigned width:3; /* src only, align1 only */
+ unsigned hstride:2; /* align1 only */
+ unsigned address_mode:1; /* relative addressing, hopefully! */
+ unsigned pad0:1;
+
+ union {
+ struct {
+ unsigned swizzle:8; /* src only, align16 only */
+ unsigned writemask:4; /* dest only, align16 only */
+ int indirect_offset:10; /* relative addressing offset */
+ unsigned pad1:10; /* two dwords total */
+ } bits;
+
+ float f;
+ int d;
+ unsigned ud;
+ } dw1;
+};
+
+
+struct brw_indirect {
+ unsigned addr_subnr:4;
+ int addr_offset:10;
+ unsigned pad:18;
+};
+
+
+static inline int
+type_sz(unsigned type)
+{
+ switch(type) {
+ case BRW_REGISTER_TYPE_UD:
+ case BRW_REGISTER_TYPE_D:
+ case BRW_REGISTER_TYPE_F:
+ return 4;
+ case BRW_REGISTER_TYPE_HF:
+ case BRW_REGISTER_TYPE_UW:
+ case BRW_REGISTER_TYPE_W:
+ return 2;
+ case BRW_REGISTER_TYPE_UB:
+ case BRW_REGISTER_TYPE_B:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * Construct a brw_reg.
+ * \param file one of the BRW_x_REGISTER_FILE values
+ * \param nr register number/index
+ * \param subnr register sub number
+ * \param type one of BRW_REGISTER_TYPE_x
+ * \param vstride one of BRW_VERTICAL_STRIDE_x
+ * \param width one of BRW_WIDTH_x
+ * \param hstride one of BRW_HORIZONTAL_STRIDE_x
+ * \param swizzle one of BRW_SWIZZLE_x
+ * \param writemask BRW_WRITEMASK_X/Y/Z/W bitfield
+ */
+static inline struct brw_reg
+brw_reg(unsigned file,
+ unsigned nr,
+ unsigned subnr,
+ unsigned type,
+ unsigned vstride,
+ unsigned width,
+ unsigned hstride,
+ unsigned swizzle,
+ unsigned writemask)
+{
+ struct brw_reg reg;
+ if (file == BRW_GENERAL_REGISTER_FILE)
+ assert(nr < BRW_MAX_GRF);
+ else if (file == BRW_MESSAGE_REGISTER_FILE)
+ assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+ else if (file == BRW_ARCHITECTURE_REGISTER_FILE)
+ assert(nr <= BRW_ARF_TIMESTAMP);
+
+ reg.type = type;
+ reg.file = file;
+ reg.nr = nr;
+ reg.subnr = subnr * type_sz(type);
+ reg.negate = 0;
+ reg.abs = 0;
+ reg.vstride = vstride;
+ reg.width = width;
+ reg.hstride = hstride;
+ reg.address_mode = BRW_ADDRESS_DIRECT;
+ reg.pad0 = 0;
+
+ /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
+ * set swizzle and writemask to W, as the lower bits of subnr will
+ * be lost when converted to align16. This is probably too much to
+ * keep track of as you'd want it adjusted by suboffset(), etc.
+ * Perhaps fix up when converting to align16?
+ */
+ reg.dw1.bits.swizzle = swizzle;
+ reg.dw1.bits.writemask = writemask;
+ reg.dw1.bits.indirect_offset = 0;
+ reg.dw1.bits.pad1 = 0;
+ return reg;
+}
+
+/** Construct float[16] register */
+static inline struct brw_reg
+brw_vec16_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_16,
+ BRW_WIDTH_16,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[8] register */
+static inline struct brw_reg
+brw_vec8_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_8,
+ BRW_WIDTH_8,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[4] register */
+static inline struct brw_reg
+brw_vec4_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_4,
+ BRW_WIDTH_4,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYZW,
+ BRW_WRITEMASK_XYZW);
+}
+
+/** Construct float[2] register */
+static inline struct brw_reg
+brw_vec2_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_2,
+ BRW_WIDTH_2,
+ BRW_HORIZONTAL_STRIDE_1,
+ BRW_SWIZZLE_XYXY,
+ BRW_WRITEMASK_XY);
+}
+
+/** Construct float[1] register */
+static inline struct brw_reg
+brw_vec1_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return brw_reg(file,
+ nr,
+ subnr,
+ BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ BRW_WRITEMASK_X);
+}
+
+
+static inline struct brw_reg
+retype(struct brw_reg reg, unsigned type)
+{
+ reg.type = type;
+ return reg;
+}
+
+static inline struct brw_reg
+sechalf(struct brw_reg reg)
+{
+ if (reg.vstride)
+ reg.nr++;
+ return reg;
+}
+
+static inline struct brw_reg
+suboffset(struct brw_reg reg, unsigned delta)
+{
+ reg.subnr += delta * type_sz(reg.type);
+ return reg;
+}
+
+
+static inline struct brw_reg
+offset(struct brw_reg reg, unsigned delta)
+{
+ reg.nr += delta;
+ return reg;
+}
+
+
+static inline struct brw_reg
+byte_offset(struct brw_reg reg, unsigned bytes)
+{
+ unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes;
+ reg.nr = newoffset / REG_SIZE;
+ reg.subnr = newoffset % REG_SIZE;
+ return reg;
+}
+
+
+/** Construct unsigned word[16] register */
+static inline struct brw_reg
+brw_uw16_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[8] register */
+static inline struct brw_reg
+brw_uw8_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+/** Construct unsigned word[1] register */
+static inline struct brw_reg
+brw_uw1_reg(unsigned file, unsigned nr, unsigned subnr)
+{
+ return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);
+}
+
+static inline struct brw_reg
+brw_imm_reg(unsigned type)
+{
+ return brw_reg(BRW_IMMEDIATE_VALUE,
+ 0,
+ 0,
+ type,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ 0,
+ 0);
+}
+
+/** Construct float immediate register */
+static inline struct brw_reg
+brw_imm_f(float f)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F);
+ imm.dw1.f = f;
+ return imm;
+}
+
+/** Construct integer immediate register */
+static inline struct brw_reg
+brw_imm_d(int d)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D);
+ imm.dw1.d = d;
+ return imm;
+}
+
+/** Construct uint immediate register */
+static inline struct brw_reg
+brw_imm_ud(unsigned ud)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD);
+ imm.dw1.ud = ud;
+ return imm;
+}
+
+/** Construct ushort immediate register */
+static inline struct brw_reg
+brw_imm_uw(uint16_t uw)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW);
+ imm.dw1.ud = uw | (uw << 16);
+ return imm;
+}
+
+/** Construct short immediate register */
+static inline struct brw_reg
+brw_imm_w(int16_t w)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W);
+ imm.dw1.d = w | (w << 16);
+ return imm;
+}
+
+/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
+ * numbers alias with _V and _VF below:
+ */
+
+/** Construct vector of eight signed half-byte values */
+static inline struct brw_reg
+brw_imm_v(unsigned v)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_8;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+/** Construct vector of four 8-bit float values */
+static inline struct brw_reg
+brw_imm_vf(unsigned v)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = v;
+ return imm;
+}
+
+#define VF_ZERO 0x0
+#define VF_ONE 0x30
+#define VF_NEG (1<<7)
+
+static inline struct brw_reg
+brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+{
+ struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
+ imm.vstride = BRW_VERTICAL_STRIDE_0;
+ imm.width = BRW_WIDTH_4;
+ imm.hstride = BRW_HORIZONTAL_STRIDE_1;
+ imm.dw1.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24));
+ return imm;
+}
+
+
+static inline struct brw_reg
+brw_address(struct brw_reg reg)
+{
+ return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr);
+}
+
+/** Construct float[1] general-purpose register */
+static inline struct brw_reg
+brw_vec1_grf(unsigned nr, unsigned subnr)
+{
+ return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[2] general-purpose register */
+static inline struct brw_reg
+brw_vec2_grf(unsigned nr, unsigned subnr)
+{
+ return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[4] general-purpose register */
+static inline struct brw_reg
+brw_vec4_grf(unsigned nr, unsigned subnr)
+{
+ return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+/** Construct float[8] general-purpose register */
+static inline struct brw_reg
+brw_vec8_grf(unsigned nr, unsigned subnr)
+{
+ return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+static inline struct brw_reg
+brw_uw8_grf(unsigned nr, unsigned subnr)
+{
+ return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+static inline struct brw_reg
+brw_uw16_grf(unsigned nr, unsigned subnr)
+{
+ return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr);
+}
+
+
+/** Construct null register (usually used for setting condition codes) */
+static inline struct brw_reg
+brw_null_reg(void)
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0);
+}
+
+static inline struct brw_reg
+brw_address_reg(unsigned subnr)
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, subnr);
+}
+
+/* If/else instructions break in align16 mode if writemask & swizzle
+ * aren't xyzw. This goes against the convention for other scalar
+ * regs:
+ */
+static inline struct brw_reg
+brw_ip_reg(void)
+{
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_IP,
+ 0,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_4, /* ? */
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XYZW, /* NOTE! */
+ BRW_WRITEMASK_XYZW); /* NOTE! */
+}
+
+static inline struct brw_reg
+brw_acc_reg(void)
+{
+ return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ACCUMULATOR, 0);
+}
+
+static inline struct brw_reg
+brw_notification_1_reg(void)
+{
+
+ return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NOTIFICATION_COUNT,
+ 1,
+ BRW_REGISTER_TYPE_UD,
+ BRW_VERTICAL_STRIDE_0,
+ BRW_WIDTH_1,
+ BRW_HORIZONTAL_STRIDE_0,
+ BRW_SWIZZLE_XXXX,
+ BRW_WRITEMASK_X);
+}
+
+
+static inline struct brw_reg
+brw_flag_reg(int reg, int subreg)
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_FLAG + reg, subreg);
+}
+
+
+static inline struct brw_reg
+brw_mask_reg(unsigned subnr)
+{
+ return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_MASK, subnr);
+}
+
+static inline struct brw_reg
+brw_message_reg(unsigned nr)
+{
+ assert((nr & ~(1 << 7)) < BRW_MAX_MRF);
+ return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0);
+}
+
+
+/* This is almost always called with a numeric constant argument, so
+ * make things easy to evaluate at compile time:
+ */
+static inline unsigned cvt(unsigned val)
+{
+ switch (val) {
+ case 0: return 0;
+ case 1: return 1;
+ case 2: return 2;
+ case 4: return 3;
+ case 8: return 4;
+ case 16: return 5;
+ case 32: return 6;
+ }
+ return 0;
+}
+
+static inline struct brw_reg
+stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride)
+{
+ reg.vstride = cvt(vstride);
+ reg.width = cvt(width) - 1;
+ reg.hstride = cvt(hstride);
+ return reg;
+}
+
+
+static inline struct brw_reg
+vec16(struct brw_reg reg)
+{
+ return stride(reg, 16,16,1);
+}
+
+static inline struct brw_reg
+vec8(struct brw_reg reg)
+{
+ return stride(reg, 8,8,1);
+}
+
+static inline struct brw_reg
+vec4(struct brw_reg reg)
+{
+ return stride(reg, 4,4,1);
+}
+
+static inline struct brw_reg
+vec2(struct brw_reg reg)
+{
+ return stride(reg, 2,2,1);
+}
+
+static inline struct brw_reg
+vec1(struct brw_reg reg)
+{
+ return stride(reg, 0,1,0);
+}
+
+
+static inline struct brw_reg
+get_element(struct brw_reg reg, unsigned elt)
+{
+ return vec1(suboffset(reg, elt));
+}
+
+static inline struct brw_reg
+get_element_ud(struct brw_reg reg, unsigned elt)
+{
+ return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
+}
+
+static inline struct brw_reg
+get_element_d(struct brw_reg reg, unsigned elt)
+{
+ return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt));
+}
+
+
+static inline struct brw_reg
+brw_swizzle(struct brw_reg reg, unsigned x, unsigned y, unsigned z, unsigned w)
+{
+ assert(reg.file != BRW_IMMEDIATE_VALUE);
+
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
+ BRW_GET_SWZ(reg.dw1.bits.swizzle, w));
+ return reg;
+}
+
+
+static inline struct brw_reg
+brw_swizzle1(struct brw_reg reg, unsigned x)
+{
+ return brw_swizzle(reg, x, x, x, x);
+}
+
+static inline struct brw_reg
+brw_writemask(struct brw_reg reg, unsigned mask)
+{
+ assert(reg.file != BRW_IMMEDIATE_VALUE);
+ reg.dw1.bits.writemask &= mask;
+ return reg;
+}
+
+static inline struct brw_reg
+brw_set_writemask(struct brw_reg reg, unsigned mask)
+{
+ assert(reg.file != BRW_IMMEDIATE_VALUE);
+ reg.dw1.bits.writemask = mask;
+ return reg;
+}
+
+static inline struct brw_reg
+negate(struct brw_reg reg)
+{
+ reg.negate ^= 1;
+ return reg;
+}
+
+static inline struct brw_reg
+brw_abs(struct brw_reg reg)
+{
+ reg.abs = 1;
+ reg.negate = 0;
+ return reg;
+}
+
+/************************************************************************/
+
+static inline struct brw_reg
+brw_vec4_indirect(unsigned subnr, int offset)
+{
+ struct brw_reg reg = brw_vec4_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static inline struct brw_reg
+brw_vec1_indirect(unsigned subnr, int offset)
+{
+ struct brw_reg reg = brw_vec1_grf(0, 0);
+ reg.subnr = subnr;
+ reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER;
+ reg.dw1.bits.indirect_offset = offset;
+ return reg;
+}
+
+static inline struct brw_reg
+deref_4f(struct brw_indirect ptr, int offset)
+{
+ return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static inline struct brw_reg
+deref_1f(struct brw_indirect ptr, int offset)
+{
+ return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset);
+}
+
+static inline struct brw_reg
+deref_4b(struct brw_indirect ptr, int offset)
+{
+ return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B);
+}
+
+static inline struct brw_reg
+deref_1uw(struct brw_indirect ptr, int offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW);
+}
+
+static inline struct brw_reg
+deref_1d(struct brw_indirect ptr, int offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D);
+}
+
+static inline struct brw_reg
+deref_1ud(struct brw_indirect ptr, int offset)
+{
+ return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD);
+}
+
+static inline struct brw_reg
+get_addr_reg(struct brw_indirect ptr)
+{
+ return brw_address_reg(ptr.addr_subnr);
+}
+
+static inline struct brw_indirect
+brw_indirect_offset(struct brw_indirect ptr, int offset)
+{
+ ptr.addr_offset += offset;
+ return ptr;
+}
+
+static inline struct brw_indirect
+brw_indirect(unsigned addr_subnr, int offset)
+{
+ struct brw_indirect ptr;
+ ptr.addr_subnr = addr_subnr;
+ ptr.addr_offset = offset;
+ ptr.pad = 0;
+ return ptr;
+}
+
+/** Do two brw_regs refer to the same register? */
+static inline bool
+brw_same_reg(struct brw_reg r1, struct brw_reg r2)
+{
+ return r1.file == r2.file && r1.nr == r2.nr;
+}
+
+void brw_print_reg(struct brw_reg reg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
@@ -27,6 +27,7 @@
#include <unistd.h>
#include "gen4asm.h"
+#include "brw_eu.h"
static const struct option longopts[] = {
{ NULL, 0, NULL, 0 }
@@ -95,7 +96,10 @@ read_program_binary (FILE *input)
static void usage(void)
{
- fprintf(stderr, "usage: intel-gen4disasm [-o outputfile] [-b] inputfile\n");
+ fprintf(stderr, "usage: intel-gen4disasm [options] inputfile\n");
+ fprintf(stderr, "\t-b, --binary C style binary output\n");
+ fprintf(stderr, "\t-o, --output {outputfile} Specify output file\n");
+ fprintf(stderr, "\t-g, --gen <4|5|6|7> Specify GPU generation\n");
}
int main(int argc, char **argv)
@@ -107,9 +111,10 @@ int main(int argc, char **argv)
char *output_file = NULL;
int byte_array_input = 0;
int o;
+ int gen = 4;
struct brw_program_instruction *inst;
- while ((o = getopt_long(argc, argv, "o:b", longopts, NULL)) != -1) {
+ while ((o = getopt_long(argc, argv, "o:bg:", longopts, NULL)) != -1) {
switch (o) {
case 'o':
if (strcmp(optarg, "-") != 0)
@@ -118,6 +123,15 @@ int main(int argc, char **argv)
case 'b':
byte_array_input = 1;
break;
+ case 'g':
+ gen = strtol(optarg, NULL, 10);
+
+ if (gen < 4 || gen > 7) {
+ usage();
+ exit(1);
+ }
+
+ break;
default:
usage();
exit(1);
@@ -153,6 +167,6 @@ int main(int argc, char **argv)
}
for (inst = program->first; inst; inst = inst->next)
- disasm (output, &inst->instruction);
+ brw_disasm (output, &inst->instruction, gen);
exit (0);
}
@@ -197,6 +197,3 @@ int yylex_destroy(void);
char *
lex_text(void);
-
-int
-disasm (FILE *output, struct brw_instruction *inst);
From Mesa. This imports a bit more the of brw_eu* infrastructure (which is going towards the right direction!) from mesa and the update is quite a significant improvement over what we had. I also verified that the changes that were done on the assembler old version of brw_disasm.c were already supported by the Mesa version, and indeed they were. Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> --- assembler/Makefile.am | 4 +- assembler/{disasm.c => brw_disasm.c} | 660 +++++++++++++++++++++++----- assembler/brw_eu.h | 405 +++++++++++++++++ assembler/brw_reg.h | 808 ++++++++++++++++++++++++++++++++++ assembler/disasm-main.c | 20 +- assembler/gen4asm.h | 3 - 6 files changed, 1783 insertions(+), 117 deletions(-) rename assembler/{disasm.c => brw_disasm.c} (52%) create mode 100644 assembler/brw_eu.h create mode 100644 assembler/brw_reg.h