Message ID | 1341082215-22933-4-git-send-email-galibert@pobox.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
On 30 June 2012 11:50, Olivier Galibert <galibert@pobox.com> wrote: > This patch also correct a couple of problems with noperspective > interpolation. > > At that point all the glsl 1.1/1.3 interpolation tests that do not > clip pass (the -none ones). > > The fs code does not use the pre-resolved interpolation modes in order > not to mess with gen6+. Sharing the resolution would require putting > brw_wm_prog before brw_clip_prog and brw_sf_prog. This may be a good > thing, but it could have unexpected consequences, so it's better be > done independently in any case. > > Signed-off-by: Olivier Galibert <galibert@pobox.com> > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 5 + > src/mesa/drivers/dri/i965/brw_sf.c | 9 +- > src/mesa/drivers/dri/i965/brw_sf.h | 2 +- > src/mesa/drivers/dri/i965/brw_sf_emit.c | 164 > +++++++++++++------------- > 5 files changed, 95 insertions(+), 87 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 710f2ff..b142f2b 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -506,7 +506,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) > struct brw_reg interp = interp_reg(location, k); > emit_linterp(attr, fs_reg(interp), interpolation_mode, > ir->centroid); > - if (intel->gen < 6) { > + if (intel->gen < 6 && interpolation_mode == > INTERP_QUALIFIER_SMOOTH) { > emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w); > } > } > diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > index 9bd1e67..ab83a95 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp > @@ -1924,6 +1924,11 @@ fs_visitor::emit_interpolation_setup_gen4() > emit(BRW_OPCODE_ADD, > this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], > this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))); > > + this->delta_x[BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC] = > + this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC]; > + this->delta_y[BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC] = > + this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC]; > + > Can we add a comment explaining why this is correct? Something like this maybe: "On Gen4-5, we accomplish perspective-correct interpolation by dividing the attribute values by w in the vertex shader, interpolating the result linearly in screen space, and then multiplying by w in the fragment shader. So the interpolation step is always linear in screen space, regardless of whether the attribute is perspective or non-perspective. Accordingly, we use the same delta_x and delta_y values for both kinds of interpolation." > this->current_annotation = "compute pos.w and 1/pos.w"; > /* Compute wpos.w. It's always in our setup, since it's needed to > * interpolate the other attributes. > diff --git a/src/mesa/drivers/dri/i965/brw_sf.c > b/src/mesa/drivers/dri/i965/brw_sf.c > index 0cc4fc7..85f5f51 100644 > --- a/src/mesa/drivers/dri/i965/brw_sf.c > +++ b/src/mesa/drivers/dri/i965/brw_sf.c > @@ -139,6 +139,7 @@ brw_upload_sf_prog(struct brw_context *brw) > struct brw_sf_prog_key key; > /* _NEW_BUFFERS */ > bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); > + int i; > > memset(&key, 0, sizeof(key)); > > @@ -191,7 +192,13 @@ brw_upload_sf_prog(struct brw_context *brw) > key.sprite_origin_lower_left = true; > > /* _NEW_LIGHT */ > - key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); > + key.has_flat_shading = 0; > + for (i = 0; i < BRW_VERT_RESULT_MAX; i++) { > + if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) { > + key.has_flat_shading = 1; > + break; > + } > + } > key.do_twoside_color = (ctx->Light.Enabled && > ctx->Light.Model.TwoSide) || > ctx->VertexProgram._TwoSideEnabled; > brw_copy_interpolation_modes(brw, key.interpolation_mode); > diff --git a/src/mesa/drivers/dri/i965/brw_sf.h > b/src/mesa/drivers/dri/i965/brw_sf.h > index 0a8135c..c718072 100644 > --- a/src/mesa/drivers/dri/i965/brw_sf.h > +++ b/src/mesa/drivers/dri/i965/brw_sf.h > @@ -50,7 +50,7 @@ struct brw_sf_prog_key { > uint8_t point_sprite_coord_replace; > GLuint primitive:2; > GLuint do_twoside_color:1; > - GLuint do_flat_shading:1; > + GLuint has_flat_shading:1; > GLuint frontface_ccw:1; > GLuint do_point_sprite:1; > GLuint do_point_coord:1; > diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c > b/src/mesa/drivers/dri/i965/brw_sf_emit.c > index 9d8aa38..387685a 100644 > --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c > +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c > @@ -44,6 +44,17 @@ > > > /** > + * Determine the vue slot corresponding to the given half of the given > + * register. half=0 means the first half of a register, half=1 means the > + * second half. > + */ > +static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint > reg, > + int half) > +{ > + return (reg + c->urb_entry_read_offset) * 2 + half; > +} > + > +/** > * Determine the vert_result corresponding to the given half of the given > * register. half=0 means the first half of a register, half=1 means the > * second half. > @@ -51,11 +62,24 @@ > static inline int vert_reg_to_vert_result(struct brw_sf_compile *c, > GLuint reg, > int half) > { > - int vue_slot = (reg + c->urb_entry_read_offset) * 2 + half; > + int vue_slot = vert_reg_to_vue_slot(c, reg, half); > return c->vue_map.slot_to_vert_result[vue_slot]; > } > > /** > + * Determine the register corresponding to the given vue slot. > + */ > +static struct brw_reg get_vue_slot(struct brw_sf_compile *c, > + struct brw_reg vert, > + int vue_slot) > +{ > + GLuint off = vue_slot / 2 - c->urb_entry_read_offset; > + GLuint sub = vue_slot % 2; > + > + return brw_vec4_grf(vert.nr + off, sub * 4); > +} > + > +/** > * Determine the register corresponding to the given vert_result. > */ > static struct brw_reg get_vert_result(struct brw_sf_compile *c, > @@ -64,10 +88,7 @@ static struct brw_reg get_vert_result(struct > brw_sf_compile *c, > { > int vue_slot = c->vue_map.vert_result_to_slot[vert_result]; > assert (vue_slot >= c->urb_entry_read_offset); > - GLuint off = vue_slot / 2 - c->urb_entry_read_offset; > - GLuint sub = vue_slot % 2; > - > - return brw_vec4_grf(vert.nr + off, sub * 4); > + return get_vue_slot(c, vert, vue_slot); > } > > static bool > @@ -128,31 +149,37 @@ static void do_twoside_color( struct brw_sf_compile > *c ) > * Flat shading > */ > > -#define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \ > - BITFIELD64_BIT(VERT_RESULT_COL1)) > - > -static void copy_colors( struct brw_sf_compile *c, > - struct brw_reg dst, > - struct brw_reg src, > - int allow_twoside) > +static void copy_flatshaded_attributes( struct brw_sf_compile *c, > + struct brw_reg dst, > + struct brw_reg src) > { > struct brw_compile *p = &c->func; > + struct brw_context *brw = p->brw; > GLuint i; > > - for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) { > - if (have_attr(c,i)) { > + for (i = 0; i < BRW_VERT_RESULT_MAX; i++) { > + if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) { > brw_MOV(p, > - get_vert_result(c, dst, i), > - get_vert_result(c, src, i)); > + get_vue_slot(c, dst, i), > + get_vue_slot(c, src, i)); > > - } else if(allow_twoside && have_attr(c, i - VERT_RESULT_COL0 + > VERT_RESULT_BFC0)) { > - brw_MOV(p, > - get_vert_result(c, dst, i - VERT_RESULT_COL0 + > VERT_RESULT_BFC0), > - get_vert_result(c, src, i - VERT_RESULT_COL0 + > VERT_RESULT_BFC0)); > } > } > } > > +static GLuint count_flatshaded_attributes(struct brw_sf_compile *c ) > +{ > + struct brw_compile *p = &c->func; > + struct brw_context *brw = p->brw; > + GLuint count = 0; > + GLuint i; > + for (i = 0; i < BRW_VERT_RESULT_MAX; i++) { > + if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) > + count++; > + } > + return count; > +} > + > > > /* Need to use a computed jump to copy flatshaded attributes as the > @@ -168,18 +195,6 @@ static void do_flatshade_triangle( struct > brw_sf_compile *c ) > > GLuint nr; > > - if (c->key.do_twoside_color) { > - nr = ((c->key.attrs & (BITFIELD64_BIT(VERT_RESULT_COL0) | > BITFIELD64_BIT(VERT_RESULT_BFC0))) != 0) + > - ((c->key.attrs & (BITFIELD64_BIT(VERT_RESULT_COL1) | > BITFIELD64_BIT(VERT_RESULT_BFC1))) != 0); > - > - } else { > - nr = ((c->key.attrs & BITFIELD64_BIT(VERT_RESULT_COL0)) != 0) + > - ((c->key.attrs & BITFIELD64_BIT(VERT_RESULT_COL1)) != 0); > - } > - > - if (!nr) > - return; > - > /* Already done in clip program: > */ > if (c->key.primitive == SF_UNFILLED_TRIS) > @@ -188,21 +203,23 @@ static void do_flatshade_triangle( struct > brw_sf_compile *c ) > if (intel->gen == 5) > jmpi = 2; > > + nr = count_flatshaded_attributes(c); > + > We used to have this optimization: if (!nr) return; I understand why you removed it: because now this code should only be called if c->key.has_flat_shading is true. However, for the sake of safety (and documentation), can we add something like this: /* This code should only be called if c->key.has_flat_shading is true, * meaning there is at least one flatshaded attribute. */ assert(nr != 0); If you would prefer to put this assertion inside count_flatshaded_attributes(), I would be happy with that too. > brw_push_insn_state(p); > > brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1))); > brw_JMPI(p, ip, ip, c->pv); > > - copy_colors(c, c->vert[1], c->vert[0], c->key.do_twoside_color); > - copy_colors(c, c->vert[2], c->vert[0], c->key.do_twoside_color); > + copy_flatshaded_attributes(c, c->vert[1], c->vert[0]); > + copy_flatshaded_attributes(c, c->vert[2], c->vert[0]); > brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1))); > > - copy_colors(c, c->vert[0], c->vert[1], c->key.do_twoside_color); > - copy_colors(c, c->vert[2], c->vert[1], c->key.do_twoside_color); > + copy_flatshaded_attributes(c, c->vert[0], c->vert[1]); > + copy_flatshaded_attributes(c, c->vert[2], c->vert[1]); > brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2)); > > - copy_colors(c, c->vert[0], c->vert[2], c->key.do_twoside_color); > - copy_colors(c, c->vert[1], c->vert[2], c->key.do_twoside_color); > + copy_flatshaded_attributes(c, c->vert[0], c->vert[2]); > + copy_flatshaded_attributes(c, c->vert[1], c->vert[2]); > > brw_pop_insn_state(p); > } > @@ -213,12 +230,9 @@ static void do_flatshade_line( struct brw_sf_compile > *c ) > struct brw_compile *p = &c->func; > struct intel_context *intel = &p->brw->intel; > struct brw_reg ip = brw_ip_reg(); > - GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS); > + GLuint nr; > GLuint jmpi = 1; > > - if (!nr) > - return; > - > /* Already done in clip program: > */ > if (c->key.primitive == SF_UNFILLED_TRIS) > @@ -227,14 +241,16 @@ static void do_flatshade_line( struct brw_sf_compile > *c ) > if (intel->gen == 5) > jmpi = 2; > > + nr = count_flatshaded_attributes(c); > + > A similar assertion should go here. > brw_push_insn_state(p); > > brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); > brw_JMPI(p, ip, ip, c->pv); > - copy_colors(c, c->vert[1], c->vert[0], 0); > + copy_flatshaded_attributes(c, c->vert[1], c->vert[0]); > > brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr)); > - copy_colors(c, c->vert[0], c->vert[1], 0); > + copy_flatshaded_attributes(c, c->vert[0], c->vert[1]); > > brw_pop_insn_state(p); > } > @@ -332,40 +348,25 @@ static void invert_det( struct brw_sf_compile *c) > > static bool > calculate_masks(struct brw_sf_compile *c, > - GLuint reg, > - GLushort *pc, > - GLushort *pc_persp, > - GLushort *pc_linear) > + GLuint reg, > + GLushort *pc, > + GLushort *pc_persp, > + GLushort *pc_linear) > I like the way you've rewritten this function. Nicely done. > { > + struct brw_compile *p = &c->func; > + struct brw_context *brw = p->brw; > + enum glsl_interp_qualifier interp; > bool is_last_attr = (reg == c->nr_setup_regs - 1); > - GLbitfield64 persp_mask; > - GLbitfield64 linear_mask; > - > - if (c->key.do_flat_shading) > - persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS) | > - BITFIELD64_BIT(VERT_RESULT_COL0) | > - BITFIELD64_BIT(VERT_RESULT_COL1) | > - BITFIELD64_BIT(VERT_RESULT_BFC0) | > - BITFIELD64_BIT(VERT_RESULT_BFC1)); > - else > - persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS)); > - > - if (c->key.do_flat_shading) > - linear_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_COL0) | > - BITFIELD64_BIT(VERT_RESULT_COL1) | > - BITFIELD64_BIT(VERT_RESULT_BFC0) | > - BITFIELD64_BIT(VERT_RESULT_BFC1)); > - else > - linear_mask = c->key.attrs; > > *pc_persp = 0; > *pc_linear = 0; > *pc = 0xf; > - > - if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0))) > - *pc_persp = 0xf; > > - if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0))) > + interp = brw_get_interpolation_mode(brw, vert_reg_to_vue_slot(c, reg, > 0)); > + if (interp == INTERP_QUALIFIER_SMOOTH) { > + *pc_linear = 0xf; > + *pc_persp = 0xf; > + } else if(interp == INTERP_QUALIFIER_NOPERSPECTIVE) > *pc_linear = 0xf; > > /* Maybe only processs one attribute on the final round: > @@ -373,11 +374,12 @@ calculate_masks(struct brw_sf_compile *c, > if (vert_reg_to_vert_result(c, reg, 1) != BRW_VERT_RESULT_MAX) { > *pc |= 0xf0; > > - if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 1))) > - *pc_persp |= 0xf0; > - > - if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, > 1))) > - *pc_linear |= 0xf0; > + interp = brw_get_interpolation_mode(brw, vert_reg_to_vue_slot(c, > reg, 1)); > + if (interp == INTERP_QUALIFIER_SMOOTH) { > + *pc_linear |= 0xf0; > + *pc_persp |= 0xf0; > + } else if(interp == INTERP_QUALIFIER_NOPERSPECTIVE) > + *pc_linear |= 0xf0; > } > > return is_last_attr; > @@ -430,7 +432,7 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool > allocate) > if (c->key.do_twoside_color) > do_twoside_color(c); > > - if (c->key.do_flat_shading) > + if (c->key.has_flat_shading) > do_flatshade_triangle(c); > > > @@ -443,7 +445,6 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool > allocate) > struct brw_reg a2 = offset(c->vert[2], i); > GLushort pc, pc_persp, pc_linear; > bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); > - > if (pc_persp) > { > brw_set_predicate_control_flag_value(p, pc_persp); > @@ -507,7 +508,6 @@ void brw_emit_line_setup(struct brw_sf_compile *c, > bool allocate) > struct brw_compile *p = &c->func; > GLuint i; > > - > c->nr_verts = 2; > > if (allocate) > @@ -516,7 +516,7 @@ void brw_emit_line_setup(struct brw_sf_compile *c, > bool allocate) > invert_det(c); > copy_z_inv_w(c); > > - if (c->key.do_flat_shading) > + if (c->key.has_flat_shading) > do_flatshade_line(c); > > for (i = 0; i < c->nr_setup_regs; i++) > @@ -799,7 +799,3 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) > > brw_emit_point_setup( c, false ); > } > - > - > - > - > -- > 1.7.10.rc3.1.gb306 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > With the minor changes noted above, this patch is: Reviewed-by: Paul Berry <stereotype441@gmail.com>
On 17 July 2012 05:50, Paul Berry <stereotype441@gmail.com> wrote: > On 30 June 2012 11:50, Olivier Galibert <galibert@pobox.com> wrote: > >> This patch also correct a couple of problems with noperspective >> interpolation. >> >> At that point all the glsl 1.1/1.3 interpolation tests that do not >> clip pass (the -none ones). >> >> The fs code does not use the pre-resolved interpolation modes in order >> not to mess with gen6+. Sharing the resolution would require putting >> brw_wm_prog before brw_clip_prog and brw_sf_prog. This may be a good >> thing, but it could have unexpected consequences, so it's better be >> done independently in any case. >> >> Signed-off-by: Olivier Galibert <galibert@pobox.com> >> --- >> src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- >> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 5 + >> src/mesa/drivers/dri/i965/brw_sf.c | 9 +- >> src/mesa/drivers/dri/i965/brw_sf.h | 2 +- >> src/mesa/drivers/dri/i965/brw_sf_emit.c | 164 >> +++++++++++++------------- >> 5 files changed, 95 insertions(+), 87 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp >> b/src/mesa/drivers/dri/i965/brw_fs.cpp >> index 710f2ff..b142f2b 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp >> @@ -506,7 +506,7 @@ fs_visitor::emit_general_interpolation(ir_variable >> *ir) >> struct brw_reg interp = interp_reg(location, k); >> emit_linterp(attr, fs_reg(interp), interpolation_mode, >> ir->centroid); >> - if (intel->gen < 6) { >> + if (intel->gen < 6 && interpolation_mode == >> INTERP_QUALIFIER_SMOOTH) { >> emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w); >> } >> } >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> index 9bd1e67..ab83a95 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> @@ -1924,6 +1924,11 @@ fs_visitor::emit_interpolation_setup_gen4() >> emit(BRW_OPCODE_ADD, >> this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], >> this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))); >> >> + this->delta_x[BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC] = >> + this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC]; >> + this->delta_y[BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC] = >> + this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC]; >> + >> > > Can we add a comment explaining why this is correct? Something like this > maybe: > > "On Gen4-5, we accomplish perspective-correct interpolation by dividing > the attribute values by w in the vertex shader, interpolating the result > linearly in screen space, and then multiplying by w in the fragment > shader. So the interpolation step is always linear in screen space, > regardless of whether the attribute is perspective or non-perspective. > Accordingly, we use the same delta_x and delta_y values for both kinds of > interpolation." > Whoops, my memory was faulty. This should say "...in the sf thread...", not "...in the vertex shader...". > > >> this->current_annotation = "compute pos.w and 1/pos.w"; >> /* Compute wpos.w. It's always in our setup, since it's needed to >> * interpolate the other attributes. >> diff --git a/src/mesa/drivers/dri/i965/brw_sf.c >> b/src/mesa/drivers/dri/i965/brw_sf.c >> index 0cc4fc7..85f5f51 100644 >> --- a/src/mesa/drivers/dri/i965/brw_sf.c >> +++ b/src/mesa/drivers/dri/i965/brw_sf.c >> @@ -139,6 +139,7 @@ brw_upload_sf_prog(struct brw_context *brw) >> struct brw_sf_prog_key key; >> /* _NEW_BUFFERS */ >> bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); >> + int i; >> >> memset(&key, 0, sizeof(key)); >> >> @@ -191,7 +192,13 @@ brw_upload_sf_prog(struct brw_context *brw) >> key.sprite_origin_lower_left = true; >> >> /* _NEW_LIGHT */ >> - key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); >> + key.has_flat_shading = 0; >> + for (i = 0; i < BRW_VERT_RESULT_MAX; i++) { >> + if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) { >> + key.has_flat_shading = 1; >> + break; >> + } >> + } >> key.do_twoside_color = (ctx->Light.Enabled && >> ctx->Light.Model.TwoSide) || >> ctx->VertexProgram._TwoSideEnabled; >> brw_copy_interpolation_modes(brw, key.interpolation_mode); >> diff --git a/src/mesa/drivers/dri/i965/brw_sf.h >> b/src/mesa/drivers/dri/i965/brw_sf.h >> index 0a8135c..c718072 100644 >> --- a/src/mesa/drivers/dri/i965/brw_sf.h >> +++ b/src/mesa/drivers/dri/i965/brw_sf.h >> @@ -50,7 +50,7 @@ struct brw_sf_prog_key { >> uint8_t point_sprite_coord_replace; >> GLuint primitive:2; >> GLuint do_twoside_color:1; >> - GLuint do_flat_shading:1; >> + GLuint has_flat_shading:1; >> GLuint frontface_ccw:1; >> GLuint do_point_sprite:1; >> GLuint do_point_coord:1; >> diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c >> b/src/mesa/drivers/dri/i965/brw_sf_emit.c >> index 9d8aa38..387685a 100644 >> --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c >> +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c >> @@ -44,6 +44,17 @@ >> >> >> /** >> + * Determine the vue slot corresponding to the given half of the given >> + * register. half=0 means the first half of a register, half=1 means the >> + * second half. >> + */ >> +static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint >> reg, >> + int half) >> +{ >> + return (reg + c->urb_entry_read_offset) * 2 + half; >> +} >> + >> +/** >> * Determine the vert_result corresponding to the given half of the given >> * register. half=0 means the first half of a register, half=1 means the >> * second half. >> @@ -51,11 +62,24 @@ >> static inline int vert_reg_to_vert_result(struct brw_sf_compile *c, >> GLuint reg, >> int half) >> { >> - int vue_slot = (reg + c->urb_entry_read_offset) * 2 + half; >> + int vue_slot = vert_reg_to_vue_slot(c, reg, half); >> return c->vue_map.slot_to_vert_result[vue_slot]; >> } >> >> /** >> + * Determine the register corresponding to the given vue slot. >> + */ >> +static struct brw_reg get_vue_slot(struct brw_sf_compile *c, >> + struct brw_reg vert, >> + int vue_slot) >> +{ >> + GLuint off = vue_slot / 2 - c->urb_entry_read_offset; >> + GLuint sub = vue_slot % 2; >> + >> + return brw_vec4_grf(vert.nr + off, sub * 4); >> > +} >> + >> +/** >> * Determine the register corresponding to the given vert_result. >> */ >> static struct brw_reg get_vert_result(struct brw_sf_compile *c, >> @@ -64,10 +88,7 @@ static struct brw_reg get_vert_result(struct >> brw_sf_compile *c, >> { >> int vue_slot = c->vue_map.vert_result_to_slot[vert_result]; >> assert (vue_slot >= c->urb_entry_read_offset); >> - GLuint off = vue_slot / 2 - c->urb_entry_read_offset; >> - GLuint sub = vue_slot % 2; >> - >> - return brw_vec4_grf(vert.nr + off, sub * 4); >> + return get_vue_slot(c, vert, vue_slot); >> } >> >> static bool >> @@ -128,31 +149,37 @@ static void do_twoside_color( struct brw_sf_compile >> *c ) >> * Flat shading >> */ >> >> -#define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \ >> - BITFIELD64_BIT(VERT_RESULT_COL1)) >> - >> -static void copy_colors( struct brw_sf_compile *c, >> - struct brw_reg dst, >> - struct brw_reg src, >> - int allow_twoside) >> +static void copy_flatshaded_attributes( struct brw_sf_compile *c, >> + struct brw_reg dst, >> + struct brw_reg src) >> { >> struct brw_compile *p = &c->func; >> + struct brw_context *brw = p->brw; >> GLuint i; >> >> - for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) { >> - if (have_attr(c,i)) { >> + for (i = 0; i < BRW_VERT_RESULT_MAX; i++) { >> + if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) { >> brw_MOV(p, >> - get_vert_result(c, dst, i), >> - get_vert_result(c, src, i)); >> + get_vue_slot(c, dst, i), >> + get_vue_slot(c, src, i)); >> >> - } else if(allow_twoside && have_attr(c, i - VERT_RESULT_COL0 + >> VERT_RESULT_BFC0)) { >> - brw_MOV(p, >> - get_vert_result(c, dst, i - VERT_RESULT_COL0 + >> VERT_RESULT_BFC0), >> - get_vert_result(c, src, i - VERT_RESULT_COL0 + >> VERT_RESULT_BFC0)); >> } >> } >> } >> >> +static GLuint count_flatshaded_attributes(struct brw_sf_compile *c ) >> +{ >> + struct brw_compile *p = &c->func; >> + struct brw_context *brw = p->brw; >> + GLuint count = 0; >> + GLuint i; >> + for (i = 0; i < BRW_VERT_RESULT_MAX; i++) { >> + if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) >> + count++; >> + } >> + return count; >> +} >> + >> >> >> /* Need to use a computed jump to copy flatshaded attributes as the >> @@ -168,18 +195,6 @@ static void do_flatshade_triangle( struct >> brw_sf_compile *c ) >> >> GLuint nr; >> >> - if (c->key.do_twoside_color) { >> - nr = ((c->key.attrs & (BITFIELD64_BIT(VERT_RESULT_COL0) | >> BITFIELD64_BIT(VERT_RESULT_BFC0))) != 0) + >> - ((c->key.attrs & (BITFIELD64_BIT(VERT_RESULT_COL1) | >> BITFIELD64_BIT(VERT_RESULT_BFC1))) != 0); >> - >> - } else { >> - nr = ((c->key.attrs & BITFIELD64_BIT(VERT_RESULT_COL0)) != 0) + >> - ((c->key.attrs & BITFIELD64_BIT(VERT_RESULT_COL1)) != 0); >> - } >> - >> - if (!nr) >> - return; >> - >> /* Already done in clip program: >> */ >> if (c->key.primitive == SF_UNFILLED_TRIS) >> @@ -188,21 +203,23 @@ static void do_flatshade_triangle( struct >> brw_sf_compile *c ) >> if (intel->gen == 5) >> jmpi = 2; >> >> + nr = count_flatshaded_attributes(c); >> + >> > > We used to have this optimization: > > if (!nr) > return; > > I understand why you removed it: because now this code should only be > called if c->key.has_flat_shading is true. However, for the sake of safety > (and documentation), can we add something like this: > > /* This code should only be called if c->key.has_flat_shading is true, > * meaning there is at least one flatshaded attribute. > */ > assert(nr != 0); > > If you would prefer to put this assertion inside > count_flatshaded_attributes(), I would be happy with that too. > > >> brw_push_insn_state(p); >> >> brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1))); >> brw_JMPI(p, ip, ip, c->pv); >> >> - copy_colors(c, c->vert[1], c->vert[0], c->key.do_twoside_color); >> - copy_colors(c, c->vert[2], c->vert[0], c->key.do_twoside_color); >> + copy_flatshaded_attributes(c, c->vert[1], c->vert[0]); >> + copy_flatshaded_attributes(c, c->vert[2], c->vert[0]); >> brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1))); >> >> - copy_colors(c, c->vert[0], c->vert[1], c->key.do_twoside_color); >> - copy_colors(c, c->vert[2], c->vert[1], c->key.do_twoside_color); >> + copy_flatshaded_attributes(c, c->vert[0], c->vert[1]); >> + copy_flatshaded_attributes(c, c->vert[2], c->vert[1]); >> brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2)); >> >> - copy_colors(c, c->vert[0], c->vert[2], c->key.do_twoside_color); >> - copy_colors(c, c->vert[1], c->vert[2], c->key.do_twoside_color); >> + copy_flatshaded_attributes(c, c->vert[0], c->vert[2]); >> + copy_flatshaded_attributes(c, c->vert[1], c->vert[2]); >> >> brw_pop_insn_state(p); >> } >> @@ -213,12 +230,9 @@ static void do_flatshade_line( struct brw_sf_compile >> *c ) >> struct brw_compile *p = &c->func; >> struct intel_context *intel = &p->brw->intel; >> struct brw_reg ip = brw_ip_reg(); >> - GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS); >> + GLuint nr; >> GLuint jmpi = 1; >> >> - if (!nr) >> - return; >> - >> /* Already done in clip program: >> */ >> if (c->key.primitive == SF_UNFILLED_TRIS) >> @@ -227,14 +241,16 @@ static void do_flatshade_line( struct >> brw_sf_compile *c ) >> if (intel->gen == 5) >> jmpi = 2; >> >> + nr = count_flatshaded_attributes(c); >> + >> > > A similar assertion should go here. > > >> brw_push_insn_state(p); >> >> brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); >> brw_JMPI(p, ip, ip, c->pv); >> - copy_colors(c, c->vert[1], c->vert[0], 0); >> + copy_flatshaded_attributes(c, c->vert[1], c->vert[0]); >> >> brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr)); >> - copy_colors(c, c->vert[0], c->vert[1], 0); >> + copy_flatshaded_attributes(c, c->vert[0], c->vert[1]); >> >> brw_pop_insn_state(p); >> } >> @@ -332,40 +348,25 @@ static void invert_det( struct brw_sf_compile *c) >> >> static bool >> calculate_masks(struct brw_sf_compile *c, >> - GLuint reg, >> - GLushort *pc, >> - GLushort *pc_persp, >> - GLushort *pc_linear) >> + GLuint reg, >> + GLushort *pc, >> + GLushort *pc_persp, >> + GLushort *pc_linear) >> > > I like the way you've rewritten this function. Nicely done. > > >> { >> + struct brw_compile *p = &c->func; >> + struct brw_context *brw = p->brw; >> + enum glsl_interp_qualifier interp; >> bool is_last_attr = (reg == c->nr_setup_regs - 1); >> - GLbitfield64 persp_mask; >> - GLbitfield64 linear_mask; >> - >> - if (c->key.do_flat_shading) >> - persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS) | >> - BITFIELD64_BIT(VERT_RESULT_COL0) | >> - BITFIELD64_BIT(VERT_RESULT_COL1) | >> - BITFIELD64_BIT(VERT_RESULT_BFC0) | >> - BITFIELD64_BIT(VERT_RESULT_BFC1)); >> - else >> - persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS)); >> - >> - if (c->key.do_flat_shading) >> - linear_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_COL0) | >> - BITFIELD64_BIT(VERT_RESULT_COL1) | >> - BITFIELD64_BIT(VERT_RESULT_BFC0) | >> - BITFIELD64_BIT(VERT_RESULT_BFC1)); >> - else >> - linear_mask = c->key.attrs; >> >> *pc_persp = 0; >> *pc_linear = 0; >> *pc = 0xf; >> - >> - if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0))) >> - *pc_persp = 0xf; >> >> - if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0))) >> + interp = brw_get_interpolation_mode(brw, vert_reg_to_vue_slot(c, reg, >> 0)); >> + if (interp == INTERP_QUALIFIER_SMOOTH) { >> + *pc_linear = 0xf; >> + *pc_persp = 0xf; >> + } else if(interp == INTERP_QUALIFIER_NOPERSPECTIVE) >> *pc_linear = 0xf; >> >> /* Maybe only processs one attribute on the final round: >> @@ -373,11 +374,12 @@ calculate_masks(struct brw_sf_compile *c, >> if (vert_reg_to_vert_result(c, reg, 1) != BRW_VERT_RESULT_MAX) { >> *pc |= 0xf0; >> >> - if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, >> 1))) >> - *pc_persp |= 0xf0; >> - >> - if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, >> 1))) >> - *pc_linear |= 0xf0; >> + interp = brw_get_interpolation_mode(brw, vert_reg_to_vue_slot(c, >> reg, 1)); >> + if (interp == INTERP_QUALIFIER_SMOOTH) { >> + *pc_linear |= 0xf0; >> + *pc_persp |= 0xf0; >> + } else if(interp == INTERP_QUALIFIER_NOPERSPECTIVE) >> + *pc_linear |= 0xf0; >> } >> >> return is_last_attr; >> @@ -430,7 +432,7 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, >> bool allocate) >> if (c->key.do_twoside_color) >> do_twoside_color(c); >> >> - if (c->key.do_flat_shading) >> + if (c->key.has_flat_shading) >> do_flatshade_triangle(c); >> >> >> @@ -443,7 +445,6 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, >> bool allocate) >> struct brw_reg a2 = offset(c->vert[2], i); >> GLushort pc, pc_persp, pc_linear; >> bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); >> - >> if (pc_persp) >> { >> brw_set_predicate_control_flag_value(p, pc_persp); >> @@ -507,7 +508,6 @@ void brw_emit_line_setup(struct brw_sf_compile *c, >> bool allocate) >> struct brw_compile *p = &c->func; >> GLuint i; >> >> - >> c->nr_verts = 2; >> >> if (allocate) >> @@ -516,7 +516,7 @@ void brw_emit_line_setup(struct brw_sf_compile *c, >> bool allocate) >> invert_det(c); >> copy_z_inv_w(c); >> >> - if (c->key.do_flat_shading) >> + if (c->key.has_flat_shading) >> do_flatshade_line(c); >> >> for (i = 0; i < c->nr_setup_regs; i++) >> @@ -799,7 +799,3 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c >> ) >> >> brw_emit_point_setup( c, false ); >> } >> - >> - >> - >> - >> -- >> 1.7.10.rc3.1.gb306 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > > With the minor changes noted above, this patch is: > > Reviewed-by: Paul Berry <stereotype441@gmail.com> >
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 710f2ff..b142f2b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -506,7 +506,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) struct brw_reg interp = interp_reg(location, k); emit_linterp(attr, fs_reg(interp), interpolation_mode, ir->centroid); - if (intel->gen < 6) { + if (intel->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) { emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 9bd1e67..ab83a95 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1924,6 +1924,11 @@ fs_visitor::emit_interpolation_setup_gen4() emit(BRW_OPCODE_ADD, this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))); + this->delta_x[BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC] = + this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC]; + this->delta_y[BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC] = + this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC]; + this->current_annotation = "compute pos.w and 1/pos.w"; /* Compute wpos.w. It's always in our setup, since it's needed to * interpolate the other attributes. diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 0cc4fc7..85f5f51 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -139,6 +139,7 @@ brw_upload_sf_prog(struct brw_context *brw) struct brw_sf_prog_key key; /* _NEW_BUFFERS */ bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); + int i; memset(&key, 0, sizeof(key)); @@ -191,7 +192,13 @@ brw_upload_sf_prog(struct brw_context *brw) key.sprite_origin_lower_left = true; /* _NEW_LIGHT */ - key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT); + key.has_flat_shading = 0; + for (i = 0; i < BRW_VERT_RESULT_MAX; i++) { + if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) { + key.has_flat_shading = 1; + break; + } + } key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide) || ctx->VertexProgram._TwoSideEnabled; brw_copy_interpolation_modes(brw, key.interpolation_mode); diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h index 0a8135c..c718072 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.h +++ b/src/mesa/drivers/dri/i965/brw_sf.h @@ -50,7 +50,7 @@ struct brw_sf_prog_key { uint8_t point_sprite_coord_replace; GLuint primitive:2; GLuint do_twoside_color:1; - GLuint do_flat_shading:1; + GLuint has_flat_shading:1; GLuint frontface_ccw:1; GLuint do_point_sprite:1; GLuint do_point_coord:1; diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index 9d8aa38..387685a 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -44,6 +44,17 @@ /** + * Determine the vue slot corresponding to the given half of the given + * register. half=0 means the first half of a register, half=1 means the + * second half. + */ +static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg, + int half) +{ + return (reg + c->urb_entry_read_offset) * 2 + half; +} + +/** * Determine the vert_result corresponding to the given half of the given * register. half=0 means the first half of a register, half=1 means the * second half. @@ -51,11 +62,24 @@ static inline int vert_reg_to_vert_result(struct brw_sf_compile *c, GLuint reg, int half) { - int vue_slot = (reg + c->urb_entry_read_offset) * 2 + half; + int vue_slot = vert_reg_to_vue_slot(c, reg, half); return c->vue_map.slot_to_vert_result[vue_slot]; } /** + * Determine the register corresponding to the given vue slot. + */ +static struct brw_reg get_vue_slot(struct brw_sf_compile *c, + struct brw_reg vert, + int vue_slot) +{ + GLuint off = vue_slot / 2 - c->urb_entry_read_offset; + GLuint sub = vue_slot % 2; + + return brw_vec4_grf(vert.nr + off, sub * 4); +} + +/** * Determine the register corresponding to the given vert_result. */ static struct brw_reg get_vert_result(struct brw_sf_compile *c, @@ -64,10 +88,7 @@ static struct brw_reg get_vert_result(struct brw_sf_compile *c, { int vue_slot = c->vue_map.vert_result_to_slot[vert_result]; assert (vue_slot >= c->urb_entry_read_offset); - GLuint off = vue_slot / 2 - c->urb_entry_read_offset; - GLuint sub = vue_slot % 2; - - return brw_vec4_grf(vert.nr + off, sub * 4); + return get_vue_slot(c, vert, vue_slot); } static bool @@ -128,31 +149,37 @@ static void do_twoside_color( struct brw_sf_compile *c ) * Flat shading */ -#define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \ - BITFIELD64_BIT(VERT_RESULT_COL1)) - -static void copy_colors( struct brw_sf_compile *c, - struct brw_reg dst, - struct brw_reg src, - int allow_twoside) +static void copy_flatshaded_attributes( struct brw_sf_compile *c, + struct brw_reg dst, + struct brw_reg src) { struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; GLuint i; - for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) { - if (have_attr(c,i)) { + for (i = 0; i < BRW_VERT_RESULT_MAX; i++) { + if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) { brw_MOV(p, - get_vert_result(c, dst, i), - get_vert_result(c, src, i)); + get_vue_slot(c, dst, i), + get_vue_slot(c, src, i)); - } else if(allow_twoside && have_attr(c, i - VERT_RESULT_COL0 + VERT_RESULT_BFC0)) { - brw_MOV(p, - get_vert_result(c, dst, i - VERT_RESULT_COL0 + VERT_RESULT_BFC0), - get_vert_result(c, src, i - VERT_RESULT_COL0 + VERT_RESULT_BFC0)); } } } +static GLuint count_flatshaded_attributes(struct brw_sf_compile *c ) +{ + struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + GLuint count = 0; + GLuint i; + for (i = 0; i < BRW_VERT_RESULT_MAX; i++) { + if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) + count++; + } + return count; +} + /* Need to use a computed jump to copy flatshaded attributes as the @@ -168,18 +195,6 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) GLuint nr; - if (c->key.do_twoside_color) { - nr = ((c->key.attrs & (BITFIELD64_BIT(VERT_RESULT_COL0) | BITFIELD64_BIT(VERT_RESULT_BFC0))) != 0) + - ((c->key.attrs & (BITFIELD64_BIT(VERT_RESULT_COL1) | BITFIELD64_BIT(VERT_RESULT_BFC1))) != 0); - - } else { - nr = ((c->key.attrs & BITFIELD64_BIT(VERT_RESULT_COL0)) != 0) + - ((c->key.attrs & BITFIELD64_BIT(VERT_RESULT_COL1)) != 0); - } - - if (!nr) - return; - /* Already done in clip program: */ if (c->key.primitive == SF_UNFILLED_TRIS) @@ -188,21 +203,23 @@ static void do_flatshade_triangle( struct brw_sf_compile *c ) if (intel->gen == 5) jmpi = 2; + nr = count_flatshaded_attributes(c); + brw_push_insn_state(p); brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1))); brw_JMPI(p, ip, ip, c->pv); - copy_colors(c, c->vert[1], c->vert[0], c->key.do_twoside_color); - copy_colors(c, c->vert[2], c->vert[0], c->key.do_twoside_color); + copy_flatshaded_attributes(c, c->vert[1], c->vert[0]); + copy_flatshaded_attributes(c, c->vert[2], c->vert[0]); brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1))); - copy_colors(c, c->vert[0], c->vert[1], c->key.do_twoside_color); - copy_colors(c, c->vert[2], c->vert[1], c->key.do_twoside_color); + copy_flatshaded_attributes(c, c->vert[0], c->vert[1]); + copy_flatshaded_attributes(c, c->vert[2], c->vert[1]); brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2)); - copy_colors(c, c->vert[0], c->vert[2], c->key.do_twoside_color); - copy_colors(c, c->vert[1], c->vert[2], c->key.do_twoside_color); + copy_flatshaded_attributes(c, c->vert[0], c->vert[2]); + copy_flatshaded_attributes(c, c->vert[1], c->vert[2]); brw_pop_insn_state(p); } @@ -213,12 +230,9 @@ static void do_flatshade_line( struct brw_sf_compile *c ) struct brw_compile *p = &c->func; struct intel_context *intel = &p->brw->intel; struct brw_reg ip = brw_ip_reg(); - GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS); + GLuint nr; GLuint jmpi = 1; - if (!nr) - return; - /* Already done in clip program: */ if (c->key.primitive == SF_UNFILLED_TRIS) @@ -227,14 +241,16 @@ static void do_flatshade_line( struct brw_sf_compile *c ) if (intel->gen == 5) jmpi = 2; + nr = count_flatshaded_attributes(c); + brw_push_insn_state(p); brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1))); brw_JMPI(p, ip, ip, c->pv); - copy_colors(c, c->vert[1], c->vert[0], 0); + copy_flatshaded_attributes(c, c->vert[1], c->vert[0]); brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr)); - copy_colors(c, c->vert[0], c->vert[1], 0); + copy_flatshaded_attributes(c, c->vert[0], c->vert[1]); brw_pop_insn_state(p); } @@ -332,40 +348,25 @@ static void invert_det( struct brw_sf_compile *c) static bool calculate_masks(struct brw_sf_compile *c, - GLuint reg, - GLushort *pc, - GLushort *pc_persp, - GLushort *pc_linear) + GLuint reg, + GLushort *pc, + GLushort *pc_persp, + GLushort *pc_linear) { + struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + enum glsl_interp_qualifier interp; bool is_last_attr = (reg == c->nr_setup_regs - 1); - GLbitfield64 persp_mask; - GLbitfield64 linear_mask; - - if (c->key.do_flat_shading) - persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS) | - BITFIELD64_BIT(VERT_RESULT_COL0) | - BITFIELD64_BIT(VERT_RESULT_COL1) | - BITFIELD64_BIT(VERT_RESULT_BFC0) | - BITFIELD64_BIT(VERT_RESULT_BFC1)); - else - persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS)); - - if (c->key.do_flat_shading) - linear_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_COL0) | - BITFIELD64_BIT(VERT_RESULT_COL1) | - BITFIELD64_BIT(VERT_RESULT_BFC0) | - BITFIELD64_BIT(VERT_RESULT_BFC1)); - else - linear_mask = c->key.attrs; *pc_persp = 0; *pc_linear = 0; *pc = 0xf; - - if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0))) - *pc_persp = 0xf; - if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0))) + interp = brw_get_interpolation_mode(brw, vert_reg_to_vue_slot(c, reg, 0)); + if (interp == INTERP_QUALIFIER_SMOOTH) { + *pc_linear = 0xf; + *pc_persp = 0xf; + } else if(interp == INTERP_QUALIFIER_NOPERSPECTIVE) *pc_linear = 0xf; /* Maybe only processs one attribute on the final round: @@ -373,11 +374,12 @@ calculate_masks(struct brw_sf_compile *c, if (vert_reg_to_vert_result(c, reg, 1) != BRW_VERT_RESULT_MAX) { *pc |= 0xf0; - if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 1))) - *pc_persp |= 0xf0; - - if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 1))) - *pc_linear |= 0xf0; + interp = brw_get_interpolation_mode(brw, vert_reg_to_vue_slot(c, reg, 1)); + if (interp == INTERP_QUALIFIER_SMOOTH) { + *pc_linear |= 0xf0; + *pc_persp |= 0xf0; + } else if(interp == INTERP_QUALIFIER_NOPERSPECTIVE) + *pc_linear |= 0xf0; } return is_last_attr; @@ -430,7 +432,7 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate) if (c->key.do_twoside_color) do_twoside_color(c); - if (c->key.do_flat_shading) + if (c->key.has_flat_shading) do_flatshade_triangle(c); @@ -443,7 +445,6 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate) struct brw_reg a2 = offset(c->vert[2], i); GLushort pc, pc_persp, pc_linear; bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear); - if (pc_persp) { brw_set_predicate_control_flag_value(p, pc_persp); @@ -507,7 +508,6 @@ void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate) struct brw_compile *p = &c->func; GLuint i; - c->nr_verts = 2; if (allocate) @@ -516,7 +516,7 @@ void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate) invert_det(c); copy_z_inv_w(c); - if (c->key.do_flat_shading) + if (c->key.has_flat_shading) do_flatshade_line(c); for (i = 0; i < c->nr_setup_regs; i++) @@ -799,7 +799,3 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c ) brw_emit_point_setup( c, false ); } - - - -
This patch also correct a couple of problems with noperspective interpolation. At that point all the glsl 1.1/1.3 interpolation tests that do not clip pass (the -none ones). The fs code does not use the pre-resolved interpolation modes in order not to mess with gen6+. Sharing the resolution would require putting brw_wm_prog before brw_clip_prog and brw_sf_prog. This may be a good thing, but it could have unexpected consequences, so it's better be done independently in any case. Signed-off-by: Olivier Galibert <galibert@pobox.com> --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 5 + src/mesa/drivers/dri/i965/brw_sf.c | 9 +- src/mesa/drivers/dri/i965/brw_sf.h | 2 +- src/mesa/drivers/dri/i965/brw_sf_emit.c | 164 +++++++++++++------------- 5 files changed, 95 insertions(+), 87 deletions(-)