diff mbox

[3/5] intel gen4-5: Correctly setup the parameters in the sf.

Message ID 1341082215-22933-4-git-send-email-galibert@pobox.com (mailing list archive)
State Not Applicable
Headers show

Commit Message

Olivier Galibert June 30, 2012, 6:50 p.m. UTC
This patch also correct a couple of problems with noperspective
interpolation.

At that point all the glsl 1.1/1.3 interpolation tests that do not
clip pass (the -none ones).

The fs code does not use the pre-resolved interpolation modes in order
not to mess with gen6+.  Sharing the resolution would require putting
brw_wm_prog before brw_clip_prog and brw_sf_prog.  This may be a good
thing, but it could have unexpected consequences, so it's better be
done independently in any case.

Signed-off-by: Olivier Galibert <galibert@pobox.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp         |    2 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |    5 +
 src/mesa/drivers/dri/i965/brw_sf.c           |    9 +-
 src/mesa/drivers/dri/i965/brw_sf.h           |    2 +-
 src/mesa/drivers/dri/i965/brw_sf_emit.c      |  164 +++++++++++++-------------
 5 files changed, 95 insertions(+), 87 deletions(-)

Comments

Paul Berry July 17, 2012, 12:50 p.m. UTC | #1
On 30 June 2012 11:50, Olivier Galibert <galibert@pobox.com> wrote:

> This patch also correct a couple of problems with noperspective
> interpolation.
>
> At that point all the glsl 1.1/1.3 interpolation tests that do not
> clip pass (the -none ones).
>
> The fs code does not use the pre-resolved interpolation modes in order
> not to mess with gen6+.  Sharing the resolution would require putting
> brw_wm_prog before brw_clip_prog and brw_sf_prog.  This may be a good
> thing, but it could have unexpected consequences, so it's better be
> done independently in any case.
>
> Signed-off-by: Olivier Galibert <galibert@pobox.com>
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp         |    2 +-
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |    5 +
>  src/mesa/drivers/dri/i965/brw_sf.c           |    9 +-
>  src/mesa/drivers/dri/i965/brw_sf.h           |    2 +-
>  src/mesa/drivers/dri/i965/brw_sf_emit.c      |  164
> +++++++++++++-------------
>  5 files changed, 95 insertions(+), 87 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 710f2ff..b142f2b 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -506,7 +506,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir)
>                   struct brw_reg interp = interp_reg(location, k);
>                    emit_linterp(attr, fs_reg(interp), interpolation_mode,
>                                 ir->centroid);
> -                 if (intel->gen < 6) {
> +                 if (intel->gen < 6 && interpolation_mode ==
> INTERP_QUALIFIER_SMOOTH) {
>                      emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w);
>                   }
>                }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 9bd1e67..ab83a95 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -1924,6 +1924,11 @@ fs_visitor::emit_interpolation_setup_gen4()
>     emit(BRW_OPCODE_ADD,
> this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
>         this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1))));
>
> +   this->delta_x[BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC] =
> +     this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
> +   this->delta_y[BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC] =
> +     this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
> +
>

Can we add a comment explaining why this is correct?  Something like this
maybe:

"On Gen4-5, we accomplish perspective-correct interpolation by dividing the
attribute values by w in the vertex shader, interpolating the result
linearly in screen space, and then multiplying by w in the fragment
shader.  So the interpolation step is always linear in screen space,
regardless of whether the attribute is perspective or non-perspective.
Accordingly, we use the same delta_x and delta_y values for both kinds of
interpolation."


>     this->current_annotation = "compute pos.w and 1/pos.w";
>     /* Compute wpos.w.  It's always in our setup, since it's needed to
>      * interpolate the other attributes.
> diff --git a/src/mesa/drivers/dri/i965/brw_sf.c
> b/src/mesa/drivers/dri/i965/brw_sf.c
> index 0cc4fc7..85f5f51 100644
> --- a/src/mesa/drivers/dri/i965/brw_sf.c
> +++ b/src/mesa/drivers/dri/i965/brw_sf.c
> @@ -139,6 +139,7 @@ brw_upload_sf_prog(struct brw_context *brw)
>     struct brw_sf_prog_key key;
>     /* _NEW_BUFFERS */
>     bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
> +   int i;
>
>     memset(&key, 0, sizeof(key));
>
> @@ -191,7 +192,13 @@ brw_upload_sf_prog(struct brw_context *brw)
>        key.sprite_origin_lower_left = true;
>
>     /* _NEW_LIGHT */
> -   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
> +   key.has_flat_shading = 0;
> +   for (i = 0; i < BRW_VERT_RESULT_MAX; i++) {
> +      if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) {
> +         key.has_flat_shading = 1;
> +         break;
> +      }
> +   }
>     key.do_twoside_color = (ctx->Light.Enabled &&
> ctx->Light.Model.TwoSide) ||
>       ctx->VertexProgram._TwoSideEnabled;
>     brw_copy_interpolation_modes(brw, key.interpolation_mode);
> diff --git a/src/mesa/drivers/dri/i965/brw_sf.h
> b/src/mesa/drivers/dri/i965/brw_sf.h
> index 0a8135c..c718072 100644
> --- a/src/mesa/drivers/dri/i965/brw_sf.h
> +++ b/src/mesa/drivers/dri/i965/brw_sf.h
> @@ -50,7 +50,7 @@ struct brw_sf_prog_key {
>     uint8_t point_sprite_coord_replace;
>     GLuint primitive:2;
>     GLuint do_twoside_color:1;
> -   GLuint do_flat_shading:1;
> +   GLuint has_flat_shading:1;
>     GLuint frontface_ccw:1;
>     GLuint do_point_sprite:1;
>     GLuint do_point_coord:1;
> diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c
> b/src/mesa/drivers/dri/i965/brw_sf_emit.c
> index 9d8aa38..387685a 100644
> --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
> @@ -44,6 +44,17 @@
>
>
>  /**
> + * Determine the vue slot corresponding to the given half of the given
> + * register.  half=0 means the first half of a register, half=1 means the
> + * second half.
> + */
> +static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint
> reg,
> +                                       int half)
> +{
> +   return (reg + c->urb_entry_read_offset) * 2 + half;
> +}
> +
> +/**
>   * Determine the vert_result corresponding to the given half of the given
>   * register.  half=0 means the first half of a register, half=1 means the
>   * second half.
> @@ -51,11 +62,24 @@
>  static inline int vert_reg_to_vert_result(struct brw_sf_compile *c,
> GLuint reg,
>                                            int half)
>  {
> -   int vue_slot = (reg + c->urb_entry_read_offset) * 2 + half;
> +   int vue_slot = vert_reg_to_vue_slot(c, reg, half);
>     return c->vue_map.slot_to_vert_result[vue_slot];
>  }
>
>  /**
> + * Determine the register corresponding to the given vue slot.
> + */
> +static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
> +                                   struct brw_reg vert,
> +                                   int vue_slot)
> +{
> +   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
> +   GLuint sub = vue_slot % 2;
> +
> +   return brw_vec4_grf(vert.nr + off, sub * 4);
>
+}
> +
> +/**
>   * Determine the register corresponding to the given vert_result.
>   */
>  static struct brw_reg get_vert_result(struct brw_sf_compile *c,
> @@ -64,10 +88,7 @@ static struct brw_reg get_vert_result(struct
> brw_sf_compile *c,
>  {
>     int vue_slot = c->vue_map.vert_result_to_slot[vert_result];
>     assert (vue_slot >= c->urb_entry_read_offset);
> -   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
> -   GLuint sub = vue_slot % 2;
> -
> -   return brw_vec4_grf(vert.nr + off, sub * 4);
> +   return get_vue_slot(c, vert, vue_slot);
>  }
>
>  static bool
> @@ -128,31 +149,37 @@ static void do_twoside_color( struct brw_sf_compile
> *c )
>   * Flat shading
>   */
>
> -#define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \
> -                                BITFIELD64_BIT(VERT_RESULT_COL1))
> -
> -static void copy_colors( struct brw_sf_compile *c,
> -                    struct brw_reg dst,
> -                     struct brw_reg src,
> -                     int allow_twoside)
> +static void copy_flatshaded_attributes( struct brw_sf_compile *c,
> +                                        struct brw_reg dst,
> +                                        struct brw_reg src)
>  {
>     struct brw_compile *p = &c->func;
> +   struct brw_context *brw = p->brw;
>     GLuint i;
>
> -   for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
> -      if (have_attr(c,i)) {
> +   for (i = 0; i < BRW_VERT_RESULT_MAX; i++) {
> +      if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) {
>          brw_MOV(p,
> -                get_vert_result(c, dst, i),
> -                get_vert_result(c, src, i));
> +                get_vue_slot(c, dst, i),
> +                get_vue_slot(c, src, i));
>
> -      } else if(allow_twoside && have_attr(c, i - VERT_RESULT_COL0 +
> VERT_RESULT_BFC0)) {
> -        brw_MOV(p,
> -                get_vert_result(c, dst, i - VERT_RESULT_COL0 +
> VERT_RESULT_BFC0),
> -                get_vert_result(c, src, i - VERT_RESULT_COL0 +
> VERT_RESULT_BFC0));
>        }
>     }
>  }
>
> +static GLuint count_flatshaded_attributes(struct brw_sf_compile *c )
> +{
> +   struct brw_compile *p = &c->func;
> +   struct brw_context *brw = p->brw;
> +   GLuint count = 0;
> +   GLuint i;
> +   for (i = 0; i < BRW_VERT_RESULT_MAX; i++) {
> +      if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT)
> +         count++;
> +   }
> +   return count;
> +}
> +
>
>
>  /* Need to use a computed jump to copy flatshaded attributes as the
> @@ -168,18 +195,6 @@ static void do_flatshade_triangle( struct
> brw_sf_compile *c )
>
>     GLuint nr;
>
> -   if (c->key.do_twoside_color) {
> -      nr = ((c->key.attrs & (BITFIELD64_BIT(VERT_RESULT_COL0) |
> BITFIELD64_BIT(VERT_RESULT_BFC0))) != 0) +
> -         ((c->key.attrs & (BITFIELD64_BIT(VERT_RESULT_COL1) |
> BITFIELD64_BIT(VERT_RESULT_BFC1))) != 0);
> -
> -   } else {
> -      nr = ((c->key.attrs & BITFIELD64_BIT(VERT_RESULT_COL0)) != 0) +
> -         ((c->key.attrs & BITFIELD64_BIT(VERT_RESULT_COL1)) != 0);
> -   }
> -
> -   if (!nr)
> -      return;
> -
>     /* Already done in clip program:
>      */
>     if (c->key.primitive == SF_UNFILLED_TRIS)
> @@ -188,21 +203,23 @@ static void do_flatshade_triangle( struct
> brw_sf_compile *c )
>     if (intel->gen == 5)
>         jmpi = 2;
>
> +   nr = count_flatshaded_attributes(c);
> +
>

We used to have this optimization:

if (!nr)
   return;

I understand why you removed it: because now this code should only be
called if c->key.has_flat_shading is true.  However, for the sake of safety
(and documentation), can we add something like this:

   /* This code should only be called if c->key.has_flat_shading is true,
    * meaning there is at least one flatshaded attribute.
    */
   assert(nr != 0);

If you would prefer to put this assertion inside
count_flatshaded_attributes(), I would be happy with that too.


>     brw_push_insn_state(p);
>
>     brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
>     brw_JMPI(p, ip, ip, c->pv);
>
> -   copy_colors(c, c->vert[1], c->vert[0], c->key.do_twoside_color);
> -   copy_colors(c, c->vert[2], c->vert[0], c->key.do_twoside_color);
> +   copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
> +   copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
>     brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
>
> -   copy_colors(c, c->vert[0], c->vert[1], c->key.do_twoside_color);
> -   copy_colors(c, c->vert[2], c->vert[1], c->key.do_twoside_color);
> +   copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
> +   copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
>     brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
>
> -   copy_colors(c, c->vert[0], c->vert[2], c->key.do_twoside_color);
> -   copy_colors(c, c->vert[1], c->vert[2], c->key.do_twoside_color);
> +   copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
> +   copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
>
>     brw_pop_insn_state(p);
>  }
> @@ -213,12 +230,9 @@ static void do_flatshade_line( struct brw_sf_compile
> *c )
>     struct brw_compile *p = &c->func;
>     struct intel_context *intel = &p->brw->intel;
>     struct brw_reg ip = brw_ip_reg();
> -   GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS);
> +   GLuint nr;
>     GLuint jmpi = 1;
>
> -   if (!nr)
> -      return;
> -
>     /* Already done in clip program:
>      */
>     if (c->key.primitive == SF_UNFILLED_TRIS)
> @@ -227,14 +241,16 @@ static void do_flatshade_line( struct brw_sf_compile
> *c )
>     if (intel->gen == 5)
>         jmpi = 2;
>
> +   nr = count_flatshaded_attributes(c);
> +
>

A similar assertion should go here.


>     brw_push_insn_state(p);
>
>     brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
>     brw_JMPI(p, ip, ip, c->pv);
> -   copy_colors(c, c->vert[1], c->vert[0], 0);
> +   copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
>
>     brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
> -   copy_colors(c, c->vert[0], c->vert[1], 0);
> +   copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
>
>     brw_pop_insn_state(p);
>  }
> @@ -332,40 +348,25 @@ static void invert_det( struct brw_sf_compile *c)
>
>  static bool
>  calculate_masks(struct brw_sf_compile *c,
> -               GLuint reg,
> -               GLushort *pc,
> -               GLushort *pc_persp,
> -               GLushort *pc_linear)
> +                GLuint reg,
> +                GLushort *pc,
> +                GLushort *pc_persp,
> +                GLushort *pc_linear)
>

I like the way you've rewritten this function.  Nicely done.


>  {
> +   struct brw_compile *p = &c->func;
> +   struct brw_context *brw = p->brw;
> +   enum glsl_interp_qualifier interp;
>     bool is_last_attr = (reg == c->nr_setup_regs - 1);
> -   GLbitfield64 persp_mask;
> -   GLbitfield64 linear_mask;
> -
> -   if (c->key.do_flat_shading)
> -      persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS) |
> -                                    BITFIELD64_BIT(VERT_RESULT_COL0) |
> -                                    BITFIELD64_BIT(VERT_RESULT_COL1) |
> -                                    BITFIELD64_BIT(VERT_RESULT_BFC0) |
> -                                    BITFIELD64_BIT(VERT_RESULT_BFC1));
> -   else
> -      persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS));
> -
> -   if (c->key.do_flat_shading)
> -      linear_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_COL0) |
> -                                     BITFIELD64_BIT(VERT_RESULT_COL1) |
> -                                     BITFIELD64_BIT(VERT_RESULT_BFC0) |
> -                                     BITFIELD64_BIT(VERT_RESULT_BFC1));
> -   else
> -      linear_mask = c->key.attrs;
>
>     *pc_persp = 0;
>     *pc_linear = 0;
>     *pc = 0xf;
> -
> -   if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0)))
> -      *pc_persp = 0xf;
>
> -   if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0)))
> +   interp = brw_get_interpolation_mode(brw, vert_reg_to_vue_slot(c, reg,
> 0));
> +   if (interp == INTERP_QUALIFIER_SMOOTH) {
> +      *pc_linear = 0xf;
> +      *pc_persp = 0xf;
> +   } else if(interp == INTERP_QUALIFIER_NOPERSPECTIVE)
>        *pc_linear = 0xf;
>
>     /* Maybe only processs one attribute on the final round:
> @@ -373,11 +374,12 @@ calculate_masks(struct brw_sf_compile *c,
>     if (vert_reg_to_vert_result(c, reg, 1) != BRW_VERT_RESULT_MAX) {
>        *pc |= 0xf0;
>
> -      if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 1)))
> -        *pc_persp |= 0xf0;
> -
> -      if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg,
> 1)))
> -        *pc_linear |= 0xf0;
> +      interp = brw_get_interpolation_mode(brw, vert_reg_to_vue_slot(c,
> reg, 1));
> +      if (interp == INTERP_QUALIFIER_SMOOTH) {
> +         *pc_linear |= 0xf0;
> +         *pc_persp |= 0xf0;
> +      } else if(interp == INTERP_QUALIFIER_NOPERSPECTIVE)
> +         *pc_linear |= 0xf0;
>     }
>
>     return is_last_attr;
> @@ -430,7 +432,7 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool
> allocate)
>     if (c->key.do_twoside_color)
>        do_twoside_color(c);
>
> -   if (c->key.do_flat_shading)
> +   if (c->key.has_flat_shading)
>        do_flatshade_triangle(c);
>
>
> @@ -443,7 +445,6 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool
> allocate)
>        struct brw_reg a2 = offset(c->vert[2], i);
>        GLushort pc, pc_persp, pc_linear;
>        bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
> -
>        if (pc_persp)
>        {
>          brw_set_predicate_control_flag_value(p, pc_persp);
> @@ -507,7 +508,6 @@ void brw_emit_line_setup(struct brw_sf_compile *c,
> bool allocate)
>     struct brw_compile *p = &c->func;
>     GLuint i;
>
> -
>     c->nr_verts = 2;
>
>     if (allocate)
> @@ -516,7 +516,7 @@ void brw_emit_line_setup(struct brw_sf_compile *c,
> bool allocate)
>     invert_det(c);
>     copy_z_inv_w(c);
>
> -   if (c->key.do_flat_shading)
> +   if (c->key.has_flat_shading)
>        do_flatshade_line(c);
>
>     for (i = 0; i < c->nr_setup_regs; i++)
> @@ -799,7 +799,3 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
>
>     brw_emit_point_setup( c, false );
>  }
> -
> -
> -
> -
> --
> 1.7.10.rc3.1.gb306
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

With the minor changes noted above, this patch is:

Reviewed-by: Paul Berry <stereotype441@gmail.com>
Paul Berry July 17, 2012, 1:07 p.m. UTC | #2
On 17 July 2012 05:50, Paul Berry <stereotype441@gmail.com> wrote:

> On 30 June 2012 11:50, Olivier Galibert <galibert@pobox.com> wrote:
>
>> This patch also correct a couple of problems with noperspective
>> interpolation.
>>
>> At that point all the glsl 1.1/1.3 interpolation tests that do not
>> clip pass (the -none ones).
>>
>> The fs code does not use the pre-resolved interpolation modes in order
>> not to mess with gen6+.  Sharing the resolution would require putting
>> brw_wm_prog before brw_clip_prog and brw_sf_prog.  This may be a good
>> thing, but it could have unexpected consequences, so it's better be
>> done independently in any case.
>>
>> Signed-off-by: Olivier Galibert <galibert@pobox.com>
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs.cpp         |    2 +-
>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |    5 +
>>  src/mesa/drivers/dri/i965/brw_sf.c           |    9 +-
>>  src/mesa/drivers/dri/i965/brw_sf.h           |    2 +-
>>  src/mesa/drivers/dri/i965/brw_sf_emit.c      |  164
>> +++++++++++++-------------
>>  5 files changed, 95 insertions(+), 87 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> index 710f2ff..b142f2b 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> @@ -506,7 +506,7 @@ fs_visitor::emit_general_interpolation(ir_variable
>> *ir)
>>                   struct brw_reg interp = interp_reg(location, k);
>>                    emit_linterp(attr, fs_reg(interp), interpolation_mode,
>>                                 ir->centroid);
>> -                 if (intel->gen < 6) {
>> +                 if (intel->gen < 6 && interpolation_mode ==
>> INTERP_QUALIFIER_SMOOTH) {
>>                      emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w);
>>                   }
>>                }
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> index 9bd1e67..ab83a95 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> @@ -1924,6 +1924,11 @@ fs_visitor::emit_interpolation_setup_gen4()
>>     emit(BRW_OPCODE_ADD,
>> this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
>>         this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1))));
>>
>> +   this->delta_x[BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC] =
>> +     this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
>> +   this->delta_y[BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC] =
>> +     this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
>> +
>>
>
> Can we add a comment explaining why this is correct?  Something like this
> maybe:
>
> "On Gen4-5, we accomplish perspective-correct interpolation by dividing
> the attribute values by w in the vertex shader, interpolating the result
> linearly in screen space, and then multiplying by w in the fragment
> shader.  So the interpolation step is always linear in screen space,
> regardless of whether the attribute is perspective or non-perspective.
> Accordingly, we use the same delta_x and delta_y values for both kinds of
> interpolation."
>

Whoops, my memory was faulty.  This should say "...in the sf thread...",
not "...in the vertex shader...".


>
>
>>     this->current_annotation = "compute pos.w and 1/pos.w";
>>     /* Compute wpos.w.  It's always in our setup, since it's needed to
>>      * interpolate the other attributes.
>> diff --git a/src/mesa/drivers/dri/i965/brw_sf.c
>> b/src/mesa/drivers/dri/i965/brw_sf.c
>> index 0cc4fc7..85f5f51 100644
>> --- a/src/mesa/drivers/dri/i965/brw_sf.c
>> +++ b/src/mesa/drivers/dri/i965/brw_sf.c
>> @@ -139,6 +139,7 @@ brw_upload_sf_prog(struct brw_context *brw)
>>     struct brw_sf_prog_key key;
>>     /* _NEW_BUFFERS */
>>     bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
>> +   int i;
>>
>>     memset(&key, 0, sizeof(key));
>>
>> @@ -191,7 +192,13 @@ brw_upload_sf_prog(struct brw_context *brw)
>>        key.sprite_origin_lower_left = true;
>>
>>     /* _NEW_LIGHT */
>> -   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
>> +   key.has_flat_shading = 0;
>> +   for (i = 0; i < BRW_VERT_RESULT_MAX; i++) {
>> +      if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) {
>> +         key.has_flat_shading = 1;
>> +         break;
>> +      }
>> +   }
>>     key.do_twoside_color = (ctx->Light.Enabled &&
>> ctx->Light.Model.TwoSide) ||
>>       ctx->VertexProgram._TwoSideEnabled;
>>     brw_copy_interpolation_modes(brw, key.interpolation_mode);
>> diff --git a/src/mesa/drivers/dri/i965/brw_sf.h
>> b/src/mesa/drivers/dri/i965/brw_sf.h
>> index 0a8135c..c718072 100644
>> --- a/src/mesa/drivers/dri/i965/brw_sf.h
>> +++ b/src/mesa/drivers/dri/i965/brw_sf.h
>> @@ -50,7 +50,7 @@ struct brw_sf_prog_key {
>>     uint8_t point_sprite_coord_replace;
>>     GLuint primitive:2;
>>     GLuint do_twoside_color:1;
>> -   GLuint do_flat_shading:1;
>> +   GLuint has_flat_shading:1;
>>     GLuint frontface_ccw:1;
>>     GLuint do_point_sprite:1;
>>     GLuint do_point_coord:1;
>> diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c
>> b/src/mesa/drivers/dri/i965/brw_sf_emit.c
>> index 9d8aa38..387685a 100644
>> --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
>> +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
>> @@ -44,6 +44,17 @@
>>
>>
>>  /**
>> + * Determine the vue slot corresponding to the given half of the given
>> + * register.  half=0 means the first half of a register, half=1 means the
>> + * second half.
>> + */
>> +static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint
>> reg,
>> +                                       int half)
>> +{
>> +   return (reg + c->urb_entry_read_offset) * 2 + half;
>> +}
>> +
>> +/**
>>   * Determine the vert_result corresponding to the given half of the given
>>   * register.  half=0 means the first half of a register, half=1 means the
>>   * second half.
>> @@ -51,11 +62,24 @@
>>  static inline int vert_reg_to_vert_result(struct brw_sf_compile *c,
>> GLuint reg,
>>                                            int half)
>>  {
>> -   int vue_slot = (reg + c->urb_entry_read_offset) * 2 + half;
>> +   int vue_slot = vert_reg_to_vue_slot(c, reg, half);
>>     return c->vue_map.slot_to_vert_result[vue_slot];
>>  }
>>
>>  /**
>> + * Determine the register corresponding to the given vue slot.
>> + */
>> +static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
>> +                                   struct brw_reg vert,
>> +                                   int vue_slot)
>> +{
>> +   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
>> +   GLuint sub = vue_slot % 2;
>> +
>> +   return brw_vec4_grf(vert.nr + off, sub * 4);
>>
> +}
>> +
>> +/**
>>   * Determine the register corresponding to the given vert_result.
>>   */
>>  static struct brw_reg get_vert_result(struct brw_sf_compile *c,
>> @@ -64,10 +88,7 @@ static struct brw_reg get_vert_result(struct
>> brw_sf_compile *c,
>>  {
>>     int vue_slot = c->vue_map.vert_result_to_slot[vert_result];
>>     assert (vue_slot >= c->urb_entry_read_offset);
>> -   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
>> -   GLuint sub = vue_slot % 2;
>> -
>> -   return brw_vec4_grf(vert.nr + off, sub * 4);
>> +   return get_vue_slot(c, vert, vue_slot);
>>  }
>>
>>  static bool
>> @@ -128,31 +149,37 @@ static void do_twoside_color( struct brw_sf_compile
>> *c )
>>   * Flat shading
>>   */
>>
>> -#define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \
>> -                                BITFIELD64_BIT(VERT_RESULT_COL1))
>> -
>> -static void copy_colors( struct brw_sf_compile *c,
>> -                    struct brw_reg dst,
>> -                     struct brw_reg src,
>> -                     int allow_twoside)
>> +static void copy_flatshaded_attributes( struct brw_sf_compile *c,
>> +                                        struct brw_reg dst,
>> +                                        struct brw_reg src)
>>  {
>>     struct brw_compile *p = &c->func;
>> +   struct brw_context *brw = p->brw;
>>     GLuint i;
>>
>> -   for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
>> -      if (have_attr(c,i)) {
>> +   for (i = 0; i < BRW_VERT_RESULT_MAX; i++) {
>> +      if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) {
>>          brw_MOV(p,
>> -                get_vert_result(c, dst, i),
>> -                get_vert_result(c, src, i));
>> +                get_vue_slot(c, dst, i),
>> +                get_vue_slot(c, src, i));
>>
>> -      } else if(allow_twoside && have_attr(c, i - VERT_RESULT_COL0 +
>> VERT_RESULT_BFC0)) {
>> -        brw_MOV(p,
>> -                get_vert_result(c, dst, i - VERT_RESULT_COL0 +
>> VERT_RESULT_BFC0),
>> -                get_vert_result(c, src, i - VERT_RESULT_COL0 +
>> VERT_RESULT_BFC0));
>>        }
>>     }
>>  }
>>
>> +static GLuint count_flatshaded_attributes(struct brw_sf_compile *c )
>> +{
>> +   struct brw_compile *p = &c->func;
>> +   struct brw_context *brw = p->brw;
>> +   GLuint count = 0;
>> +   GLuint i;
>> +   for (i = 0; i < BRW_VERT_RESULT_MAX; i++) {
>> +      if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT)
>> +         count++;
>> +   }
>> +   return count;
>> +}
>> +
>>
>>
>>  /* Need to use a computed jump to copy flatshaded attributes as the
>> @@ -168,18 +195,6 @@ static void do_flatshade_triangle( struct
>> brw_sf_compile *c )
>>
>>     GLuint nr;
>>
>> -   if (c->key.do_twoside_color) {
>> -      nr = ((c->key.attrs & (BITFIELD64_BIT(VERT_RESULT_COL0) |
>> BITFIELD64_BIT(VERT_RESULT_BFC0))) != 0) +
>> -         ((c->key.attrs & (BITFIELD64_BIT(VERT_RESULT_COL1) |
>> BITFIELD64_BIT(VERT_RESULT_BFC1))) != 0);
>> -
>> -   } else {
>> -      nr = ((c->key.attrs & BITFIELD64_BIT(VERT_RESULT_COL0)) != 0) +
>> -         ((c->key.attrs & BITFIELD64_BIT(VERT_RESULT_COL1)) != 0);
>> -   }
>> -
>> -   if (!nr)
>> -      return;
>> -
>>     /* Already done in clip program:
>>      */
>>     if (c->key.primitive == SF_UNFILLED_TRIS)
>> @@ -188,21 +203,23 @@ static void do_flatshade_triangle( struct
>> brw_sf_compile *c )
>>     if (intel->gen == 5)
>>         jmpi = 2;
>>
>> +   nr = count_flatshaded_attributes(c);
>> +
>>
>
> We used to have this optimization:
>
> if (!nr)
>    return;
>
> I understand why you removed it: because now this code should only be
> called if c->key.has_flat_shading is true.  However, for the sake of safety
> (and documentation), can we add something like this:
>
>    /* This code should only be called if c->key.has_flat_shading is true,
>     * meaning there is at least one flatshaded attribute.
>     */
>    assert(nr != 0);
>
> If you would prefer to put this assertion inside
> count_flatshaded_attributes(), I would be happy with that too.
>
>
>>     brw_push_insn_state(p);
>>
>>     brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
>>     brw_JMPI(p, ip, ip, c->pv);
>>
>> -   copy_colors(c, c->vert[1], c->vert[0], c->key.do_twoside_color);
>> -   copy_colors(c, c->vert[2], c->vert[0], c->key.do_twoside_color);
>> +   copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
>> +   copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
>>     brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
>>
>> -   copy_colors(c, c->vert[0], c->vert[1], c->key.do_twoside_color);
>> -   copy_colors(c, c->vert[2], c->vert[1], c->key.do_twoside_color);
>> +   copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
>> +   copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
>>     brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
>>
>> -   copy_colors(c, c->vert[0], c->vert[2], c->key.do_twoside_color);
>> -   copy_colors(c, c->vert[1], c->vert[2], c->key.do_twoside_color);
>> +   copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
>> +   copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
>>
>>     brw_pop_insn_state(p);
>>  }
>> @@ -213,12 +230,9 @@ static void do_flatshade_line( struct brw_sf_compile
>> *c )
>>     struct brw_compile *p = &c->func;
>>     struct intel_context *intel = &p->brw->intel;
>>     struct brw_reg ip = brw_ip_reg();
>> -   GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS);
>> +   GLuint nr;
>>     GLuint jmpi = 1;
>>
>> -   if (!nr)
>> -      return;
>> -
>>     /* Already done in clip program:
>>      */
>>     if (c->key.primitive == SF_UNFILLED_TRIS)
>> @@ -227,14 +241,16 @@ static void do_flatshade_line( struct
>> brw_sf_compile *c )
>>     if (intel->gen == 5)
>>         jmpi = 2;
>>
>> +   nr = count_flatshaded_attributes(c);
>> +
>>
>
> A similar assertion should go here.
>
>
>>     brw_push_insn_state(p);
>>
>>     brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
>>     brw_JMPI(p, ip, ip, c->pv);
>> -   copy_colors(c, c->vert[1], c->vert[0], 0);
>> +   copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
>>
>>     brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
>> -   copy_colors(c, c->vert[0], c->vert[1], 0);
>> +   copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
>>
>>     brw_pop_insn_state(p);
>>  }
>> @@ -332,40 +348,25 @@ static void invert_det( struct brw_sf_compile *c)
>>
>>  static bool
>>  calculate_masks(struct brw_sf_compile *c,
>> -               GLuint reg,
>> -               GLushort *pc,
>> -               GLushort *pc_persp,
>> -               GLushort *pc_linear)
>> +                GLuint reg,
>> +                GLushort *pc,
>> +                GLushort *pc_persp,
>> +                GLushort *pc_linear)
>>
>
> I like the way you've rewritten this function.  Nicely done.
>
>
>>  {
>> +   struct brw_compile *p = &c->func;
>> +   struct brw_context *brw = p->brw;
>> +   enum glsl_interp_qualifier interp;
>>     bool is_last_attr = (reg == c->nr_setup_regs - 1);
>> -   GLbitfield64 persp_mask;
>> -   GLbitfield64 linear_mask;
>> -
>> -   if (c->key.do_flat_shading)
>> -      persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS) |
>> -                                    BITFIELD64_BIT(VERT_RESULT_COL0) |
>> -                                    BITFIELD64_BIT(VERT_RESULT_COL1) |
>> -                                    BITFIELD64_BIT(VERT_RESULT_BFC0) |
>> -                                    BITFIELD64_BIT(VERT_RESULT_BFC1));
>> -   else
>> -      persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS));
>> -
>> -   if (c->key.do_flat_shading)
>> -      linear_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_COL0) |
>> -                                     BITFIELD64_BIT(VERT_RESULT_COL1) |
>> -                                     BITFIELD64_BIT(VERT_RESULT_BFC0) |
>> -                                     BITFIELD64_BIT(VERT_RESULT_BFC1));
>> -   else
>> -      linear_mask = c->key.attrs;
>>
>>     *pc_persp = 0;
>>     *pc_linear = 0;
>>     *pc = 0xf;
>> -
>> -   if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0)))
>> -      *pc_persp = 0xf;
>>
>> -   if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0)))
>> +   interp = brw_get_interpolation_mode(brw, vert_reg_to_vue_slot(c, reg,
>> 0));
>> +   if (interp == INTERP_QUALIFIER_SMOOTH) {
>> +      *pc_linear = 0xf;
>> +      *pc_persp = 0xf;
>> +   } else if(interp == INTERP_QUALIFIER_NOPERSPECTIVE)
>>        *pc_linear = 0xf;
>>
>>     /* Maybe only processs one attribute on the final round:
>> @@ -373,11 +374,12 @@ calculate_masks(struct brw_sf_compile *c,
>>     if (vert_reg_to_vert_result(c, reg, 1) != BRW_VERT_RESULT_MAX) {
>>        *pc |= 0xf0;
>>
>> -      if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg,
>> 1)))
>> -        *pc_persp |= 0xf0;
>> -
>> -      if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg,
>> 1)))
>> -        *pc_linear |= 0xf0;
>> +      interp = brw_get_interpolation_mode(brw, vert_reg_to_vue_slot(c,
>> reg, 1));
>> +      if (interp == INTERP_QUALIFIER_SMOOTH) {
>> +         *pc_linear |= 0xf0;
>> +         *pc_persp |= 0xf0;
>> +      } else if(interp == INTERP_QUALIFIER_NOPERSPECTIVE)
>> +         *pc_linear |= 0xf0;
>>     }
>>
>>     return is_last_attr;
>> @@ -430,7 +432,7 @@ void brw_emit_tri_setup(struct brw_sf_compile *c,
>> bool allocate)
>>     if (c->key.do_twoside_color)
>>        do_twoside_color(c);
>>
>> -   if (c->key.do_flat_shading)
>> +   if (c->key.has_flat_shading)
>>        do_flatshade_triangle(c);
>>
>>
>> @@ -443,7 +445,6 @@ void brw_emit_tri_setup(struct brw_sf_compile *c,
>> bool allocate)
>>        struct brw_reg a2 = offset(c->vert[2], i);
>>        GLushort pc, pc_persp, pc_linear;
>>        bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
>> -
>>        if (pc_persp)
>>        {
>>          brw_set_predicate_control_flag_value(p, pc_persp);
>> @@ -507,7 +508,6 @@ void brw_emit_line_setup(struct brw_sf_compile *c,
>> bool allocate)
>>     struct brw_compile *p = &c->func;
>>     GLuint i;
>>
>> -
>>     c->nr_verts = 2;
>>
>>     if (allocate)
>> @@ -516,7 +516,7 @@ void brw_emit_line_setup(struct brw_sf_compile *c,
>> bool allocate)
>>     invert_det(c);
>>     copy_z_inv_w(c);
>>
>> -   if (c->key.do_flat_shading)
>> +   if (c->key.has_flat_shading)
>>        do_flatshade_line(c);
>>
>>     for (i = 0; i < c->nr_setup_regs; i++)
>> @@ -799,7 +799,3 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c
>> )
>>
>>     brw_emit_point_setup( c, false );
>>  }
>> -
>> -
>> -
>> -
>> --
>> 1.7.10.rc3.1.gb306
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
> With the minor changes noted above, this patch is:
>
> Reviewed-by: Paul Berry <stereotype441@gmail.com>
>
diff mbox

Patch

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 710f2ff..b142f2b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -506,7 +506,7 @@  fs_visitor::emit_general_interpolation(ir_variable *ir)
 		  struct brw_reg interp = interp_reg(location, k);
                   emit_linterp(attr, fs_reg(interp), interpolation_mode,
                                ir->centroid);
-		  if (intel->gen < 6) {
+		  if (intel->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) {
 		     emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w);
 		  }
 	       }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 9bd1e67..ab83a95 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1924,6 +1924,11 @@  fs_visitor::emit_interpolation_setup_gen4()
    emit(BRW_OPCODE_ADD, this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
 	this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1))));
 
+   this->delta_x[BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC] =
+     this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
+   this->delta_y[BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC] =
+     this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
+
    this->current_annotation = "compute pos.w and 1/pos.w";
    /* Compute wpos.w.  It's always in our setup, since it's needed to
     * interpolate the other attributes.
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c
index 0cc4fc7..85f5f51 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -139,6 +139,7 @@  brw_upload_sf_prog(struct brw_context *brw)
    struct brw_sf_prog_key key;
    /* _NEW_BUFFERS */
    bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+   int i;
 
    memset(&key, 0, sizeof(key));
 
@@ -191,7 +192,13 @@  brw_upload_sf_prog(struct brw_context *brw)
       key.sprite_origin_lower_left = true;
 
    /* _NEW_LIGHT */
-   key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
+   key.has_flat_shading = 0;
+   for (i = 0; i < BRW_VERT_RESULT_MAX; i++) {
+      if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) {
+         key.has_flat_shading = 1;
+         break;
+      }
+   }
    key.do_twoside_color = (ctx->Light.Enabled && ctx->Light.Model.TwoSide) ||
      ctx->VertexProgram._TwoSideEnabled;
    brw_copy_interpolation_modes(brw, key.interpolation_mode);
diff --git a/src/mesa/drivers/dri/i965/brw_sf.h b/src/mesa/drivers/dri/i965/brw_sf.h
index 0a8135c..c718072 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.h
+++ b/src/mesa/drivers/dri/i965/brw_sf.h
@@ -50,7 +50,7 @@  struct brw_sf_prog_key {
    uint8_t point_sprite_coord_replace;
    GLuint primitive:2;
    GLuint do_twoside_color:1;
-   GLuint do_flat_shading:1;
+   GLuint has_flat_shading:1;
    GLuint frontface_ccw:1;
    GLuint do_point_sprite:1;
    GLuint do_point_coord:1;
diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c
index 9d8aa38..387685a 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
@@ -44,6 +44,17 @@ 
 
 
 /**
+ * Determine the vue slot corresponding to the given half of the given
+ * register.  half=0 means the first half of a register, half=1 means the
+ * second half.
+ */
+static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint reg,
+                                       int half)
+{
+   return (reg + c->urb_entry_read_offset) * 2 + half;
+}
+
+/**
  * Determine the vert_result corresponding to the given half of the given
  * register.  half=0 means the first half of a register, half=1 means the
  * second half.
@@ -51,11 +62,24 @@ 
 static inline int vert_reg_to_vert_result(struct brw_sf_compile *c, GLuint reg,
                                           int half)
 {
-   int vue_slot = (reg + c->urb_entry_read_offset) * 2 + half;
+   int vue_slot = vert_reg_to_vue_slot(c, reg, half);
    return c->vue_map.slot_to_vert_result[vue_slot];
 }
 
 /**
+ * Determine the register corresponding to the given vue slot.
+ */
+static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
+                                   struct brw_reg vert,
+                                   int vue_slot)
+{
+   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
+   GLuint sub = vue_slot % 2;
+
+   return brw_vec4_grf(vert.nr + off, sub * 4);
+}
+
+/**
  * Determine the register corresponding to the given vert_result.
  */
 static struct brw_reg get_vert_result(struct brw_sf_compile *c,
@@ -64,10 +88,7 @@  static struct brw_reg get_vert_result(struct brw_sf_compile *c,
 {
    int vue_slot = c->vue_map.vert_result_to_slot[vert_result];
    assert (vue_slot >= c->urb_entry_read_offset);
-   GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
-   GLuint sub = vue_slot % 2;
-
-   return brw_vec4_grf(vert.nr + off, sub * 4);
+   return get_vue_slot(c, vert, vue_slot);
 }
 
 static bool
@@ -128,31 +149,37 @@  static void do_twoside_color( struct brw_sf_compile *c )
  * Flat shading
  */
 
-#define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \
-                                BITFIELD64_BIT(VERT_RESULT_COL1))
-
-static void copy_colors( struct brw_sf_compile *c,
-		     struct brw_reg dst,
-                     struct brw_reg src,
-                     int allow_twoside)
+static void copy_flatshaded_attributes( struct brw_sf_compile *c,
+                                        struct brw_reg dst,
+                                        struct brw_reg src)
 {
    struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
    GLuint i;
 
-   for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
-      if (have_attr(c,i)) {
+   for (i = 0; i < BRW_VERT_RESULT_MAX; i++) {
+      if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) {
 	 brw_MOV(p, 
-		 get_vert_result(c, dst, i),
-		 get_vert_result(c, src, i));
+		 get_vue_slot(c, dst, i),
+		 get_vue_slot(c, src, i));
 
-      } else if(allow_twoside && have_attr(c, i - VERT_RESULT_COL0 + VERT_RESULT_BFC0)) {
-	 brw_MOV(p, 
-		 get_vert_result(c, dst, i - VERT_RESULT_COL0 + VERT_RESULT_BFC0),
-		 get_vert_result(c, src, i - VERT_RESULT_COL0 + VERT_RESULT_BFC0));
       }
    }
 }
 
+static GLuint count_flatshaded_attributes(struct brw_sf_compile *c )
+{
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   GLuint count = 0;
+   GLuint i;
+   for (i = 0; i < BRW_VERT_RESULT_MAX; i++) {
+      if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT)
+         count++;
+   }
+   return count;
+}
+
 
 
 /* Need to use a computed jump to copy flatshaded attributes as the
@@ -168,18 +195,6 @@  static void do_flatshade_triangle( struct brw_sf_compile *c )
 
    GLuint nr;
 
-   if (c->key.do_twoside_color) {
-      nr = ((c->key.attrs & (BITFIELD64_BIT(VERT_RESULT_COL0) | BITFIELD64_BIT(VERT_RESULT_BFC0))) != 0) +
-         ((c->key.attrs & (BITFIELD64_BIT(VERT_RESULT_COL1) | BITFIELD64_BIT(VERT_RESULT_BFC1))) != 0);
-
-   } else {
-      nr = ((c->key.attrs & BITFIELD64_BIT(VERT_RESULT_COL0)) != 0) +
-         ((c->key.attrs & BITFIELD64_BIT(VERT_RESULT_COL1)) != 0);
-   }
-
-   if (!nr)
-      return;
-
    /* Already done in clip program:
     */
    if (c->key.primitive == SF_UNFILLED_TRIS)
@@ -188,21 +203,23 @@  static void do_flatshade_triangle( struct brw_sf_compile *c )
    if (intel->gen == 5)
        jmpi = 2;
 
+   nr = count_flatshaded_attributes(c);
+
    brw_push_insn_state(p);
    
    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
    brw_JMPI(p, ip, ip, c->pv);
 
-   copy_colors(c, c->vert[1], c->vert[0], c->key.do_twoside_color);
-   copy_colors(c, c->vert[2], c->vert[0], c->key.do_twoside_color);
+   copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
+   copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
    brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
 
-   copy_colors(c, c->vert[0], c->vert[1], c->key.do_twoside_color);
-   copy_colors(c, c->vert[2], c->vert[1], c->key.do_twoside_color);
+   copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
+   copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
    brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
 
-   copy_colors(c, c->vert[0], c->vert[2], c->key.do_twoside_color);
-   copy_colors(c, c->vert[1], c->vert[2], c->key.do_twoside_color);
+   copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
+   copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
 
    brw_pop_insn_state(p);
 }
@@ -213,12 +230,9 @@  static void do_flatshade_line( struct brw_sf_compile *c )
    struct brw_compile *p = &c->func;
    struct intel_context *intel = &p->brw->intel;
    struct brw_reg ip = brw_ip_reg();
-   GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS);
+   GLuint nr;
    GLuint jmpi = 1;
 
-   if (!nr)
-      return;
-
    /* Already done in clip program: 
     */
    if (c->key.primitive == SF_UNFILLED_TRIS)
@@ -227,14 +241,16 @@  static void do_flatshade_line( struct brw_sf_compile *c )
    if (intel->gen == 5)
        jmpi = 2;
 
+   nr = count_flatshaded_attributes(c);
+
    brw_push_insn_state(p);
    
    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
    brw_JMPI(p, ip, ip, c->pv);
-   copy_colors(c, c->vert[1], c->vert[0], 0);
+   copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
 
    brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
-   copy_colors(c, c->vert[0], c->vert[1], 0);
+   copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
 
    brw_pop_insn_state(p);
 }
@@ -332,40 +348,25 @@  static void invert_det( struct brw_sf_compile *c)
 
 static bool
 calculate_masks(struct brw_sf_compile *c,
-	        GLuint reg,
-		GLushort *pc,
-		GLushort *pc_persp,
-		GLushort *pc_linear)
+                GLuint reg,
+                GLushort *pc,
+                GLushort *pc_persp,
+                GLushort *pc_linear)
 {
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   enum glsl_interp_qualifier interp;
    bool is_last_attr = (reg == c->nr_setup_regs - 1);
-   GLbitfield64 persp_mask;
-   GLbitfield64 linear_mask;
-
-   if (c->key.do_flat_shading)
-      persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS) |
-                                    BITFIELD64_BIT(VERT_RESULT_COL0) |
-                                    BITFIELD64_BIT(VERT_RESULT_COL1) |
-                                    BITFIELD64_BIT(VERT_RESULT_BFC0) |
-                                    BITFIELD64_BIT(VERT_RESULT_BFC1));
-   else
-      persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS));
-
-   if (c->key.do_flat_shading)
-      linear_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_COL0) |
-                                     BITFIELD64_BIT(VERT_RESULT_COL1) |
-                                     BITFIELD64_BIT(VERT_RESULT_BFC0) |
-                                     BITFIELD64_BIT(VERT_RESULT_BFC1));
-   else
-      linear_mask = c->key.attrs;
 
    *pc_persp = 0;
    *pc_linear = 0;
    *pc = 0xf;
-      
-   if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0)))
-      *pc_persp = 0xf;
 
-   if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0)))
+   interp = brw_get_interpolation_mode(brw, vert_reg_to_vue_slot(c, reg, 0));
+   if (interp == INTERP_QUALIFIER_SMOOTH) {
+      *pc_linear = 0xf;
+      *pc_persp = 0xf;
+   } else if(interp == INTERP_QUALIFIER_NOPERSPECTIVE)
       *pc_linear = 0xf;
 
    /* Maybe only processs one attribute on the final round:
@@ -373,11 +374,12 @@  calculate_masks(struct brw_sf_compile *c,
    if (vert_reg_to_vert_result(c, reg, 1) != BRW_VERT_RESULT_MAX) {
       *pc |= 0xf0;
 
-      if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 1)))
-	 *pc_persp |= 0xf0;
-
-      if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 1)))
-	 *pc_linear |= 0xf0;
+      interp = brw_get_interpolation_mode(brw, vert_reg_to_vue_slot(c, reg, 1));
+      if (interp == INTERP_QUALIFIER_SMOOTH) {
+         *pc_linear |= 0xf0;
+         *pc_persp |= 0xf0;
+      } else if(interp == INTERP_QUALIFIER_NOPERSPECTIVE)
+         *pc_linear |= 0xf0;
    }
 
    return is_last_attr;
@@ -430,7 +432,7 @@  void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
    if (c->key.do_twoside_color) 
       do_twoside_color(c);
 
-   if (c->key.do_flat_shading)
+   if (c->key.has_flat_shading)
       do_flatshade_triangle(c);
       
    
@@ -443,7 +445,6 @@  void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
       struct brw_reg a2 = offset(c->vert[2], i);
       GLushort pc, pc_persp, pc_linear;
       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
-
       if (pc_persp)
       {
 	 brw_set_predicate_control_flag_value(p, pc_persp);
@@ -507,7 +508,6 @@  void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
    struct brw_compile *p = &c->func;
    GLuint i;
 
-
    c->nr_verts = 2;
 
    if (allocate)
@@ -516,7 +516,7 @@  void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
    invert_det(c);
    copy_z_inv_w(c);
 
-   if (c->key.do_flat_shading)
+   if (c->key.has_flat_shading)
       do_flatshade_line(c);
 
    for (i = 0; i < c->nr_setup_regs; i++)
@@ -799,7 +799,3 @@  void brw_emit_anyprim_setup( struct brw_sf_compile *c )
 
    brw_emit_point_setup( c, false );
 }
-
-
-
-