diff mbox

[5/5] intel gen4-5: Make noperspective clipping work.

Message ID 1341082215-22933-6-git-send-email-galibert@pobox.com (mailing list archive)
State Not Applicable
Headers show

Commit Message

Olivier Galibert June 30, 2012, 6:50 p.m. UTC
At this point all interpolation tests with fixed clipping work.

Signed-off-by: Olivier Galibert <galibert@pobox.com>
---
 src/mesa/drivers/dri/i965/brw_clip.c      |    9 ++
 src/mesa/drivers/dri/i965/brw_clip.h      |    1 +
 src/mesa/drivers/dri/i965/brw_clip_util.c |  133 ++++++++++++++++++++++++++---
 3 files changed, 132 insertions(+), 11 deletions(-)

Comments

Paul Berry July 17, 2012, 2:07 p.m. UTC | #1
On 30 June 2012 11:50, Olivier Galibert <galibert@pobox.com> wrote:

> At this point all interpolation tests with fixed clipping work.
>
> Signed-off-by: Olivier Galibert <galibert@pobox.com>
> ---
>  src/mesa/drivers/dri/i965/brw_clip.c      |    9 ++
>  src/mesa/drivers/dri/i965/brw_clip.h      |    1 +
>  src/mesa/drivers/dri/i965/brw_clip_util.c |  133
> ++++++++++++++++++++++++++---
>  3 files changed, 132 insertions(+), 11 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_clip.c
> b/src/mesa/drivers/dri/i965/brw_clip.c
> index 952eb4a..6bfdf24 100644
> --- a/src/mesa/drivers/dri/i965/brw_clip.c
> +++ b/src/mesa/drivers/dri/i965/brw_clip.c
> @@ -234,6 +234,15 @@ brw_upload_clip_prog(struct brw_context *brw)
>           break;
>        }
>     }
> +   key.has_noperspective_shading = 0;
> +   for (i = 0; i < BRW_VERT_RESULT_MAX; i++) {
> +      if (brw_get_interpolation_mode(brw, i) ==
> INTERP_QUALIFIER_NOPERSPECTIVE &&
> +          brw->vs.prog_data->vue_map.slot_to_vert_result[i] !=
> VERT_RESULT_HPOS) {
> +         key.has_noperspective_shading = 1;
> +         break;
> +      }
> +   }
> +
>     key.pv_first = (ctx->Light.ProvokingVertex ==
> GL_FIRST_VERTEX_CONVENTION);
>     brw_copy_interpolation_modes(brw, key.interpolation_mode);
>     /* _NEW_TRANSFORM (also part of VUE map)*/
> diff --git a/src/mesa/drivers/dri/i965/brw_clip.h
> b/src/mesa/drivers/dri/i965/brw_clip.h
> index 0ea0394..2a7245a 100644
> --- a/src/mesa/drivers/dri/i965/brw_clip.h
> +++ b/src/mesa/drivers/dri/i965/brw_clip.h
> @@ -47,6 +47,7 @@ struct brw_clip_prog_key {
>     GLuint primitive:4;
>     GLuint nr_userclip:4;
>     GLuint has_flat_shading:1;
> +   GLuint has_noperspective_shading:1;
>     GLuint pv_first:1;
>     GLuint do_unfilled:1;
>     GLuint fill_cw:2;           /* includes cull information */
> diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c
> b/src/mesa/drivers/dri/i965/brw_clip_util.c
> index 7b0205a..5bdcef8 100644
> --- a/src/mesa/drivers/dri/i965/brw_clip_util.c
> +++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
> @@ -129,6 +129,8 @@ static void brw_clip_project_vertex( struct
> brw_clip_compile *c,
>
>  /* Interpolate between two vertices and put the result into a0.0.
>   * Increment a0.0 accordingly.
> + *
> + * Beware that dest_ptr can be equal to v0_ptr.
>   */
>  void brw_clip_interp_vertex( struct brw_clip_compile *c,
>                              struct brw_indirect dest_ptr,
> @@ -138,8 +140,9 @@ void brw_clip_interp_vertex( struct brw_clip_compile
> *c,
>                              bool force_edgeflag)
>  {
>     struct brw_compile *p = &c->func;
> -   struct brw_reg tmp = get_tmp(c);
> -   GLuint slot;
> +   struct brw_context *brw = p->brw;
> +   struct brw_reg tmp, t_nopersp, v0_ndc_copy;
> +   GLuint slot, delta;
>
>     /* Just copy the vertex header:
>      */
> @@ -148,13 +151,119 @@ void brw_clip_interp_vertex( struct
> brw_clip_compile *c,
>      * back on Ironlake, so needn't change it
>      */
>     brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
> -
> -   /* Iterate over each attribute (could be done in pairs?)
> +
> +   /*
> +    * First handle the 3D and NDC positioning, in case we need
> +    * noperspective interpolation.  Doing it early has no performance
> +    * impact in any case.
> +    */
> +
> +   /* Start by picking up the v0 NDC coordinates, because that vertex
> +    * may be shared with the destination.
> +    */
> +   if (c->key.has_noperspective_shading) {
> +      v0_ndc_copy = get_tmp(c);
> +      brw_MOV(p, v0_ndc_copy, deref_4f(v0_ptr,
> +
> brw_vert_result_to_offset(&c->vue_map,
> +
> BRW_VERT_RESULT_NDC)));
> +   }
>

Style nit-pick: this is a lot of indentation.  How about this instead:

   if (c->key.has_noperspective_shading) {
      unsigned offset =
         brw_vert_result_to_offset(&c->vue_map, BRW_VERT_RESULT_NDC);
      v0_ndc_copy = get_tmp(c);
      brw_MOV(p, v0_ndc_copy, deref_4f(v0_ptr, offset));
   }



> +
> +   /*
> +    * Compute the new 3D position
> +    */
> +
> +   delta = brw_vert_result_to_offset(&c->vue_map, VERT_RESULT_HPOS);
> +   tmp = get_tmp(c);
> +   brw_MUL(p,
> +           vec4(brw_null_reg()),
> +           deref_4f(v1_ptr, delta),
> +           t0);
> +
> +   brw_MAC(p,
> +           tmp,
> +           negate(deref_4f(v0_ptr, delta)),
> +           t0);
> +
> +   brw_ADD(p,
> +           deref_4f(dest_ptr, delta),
> +           deref_4f(v0_ptr, delta),
> +           tmp);
> +   release_tmp(c, tmp);
>

Since delta and tmp are used elsewhere in this function for other purposes,
I would feel more comfortable if we created a local scope for them by
enclosing this small chunk of code in curly braces, e.g.:

   {
      /*
       * Compute the new 3D position
       */

      GLuint delta = brw_vert_result_to_offset(&c->vue_map,
VERT_RESULT_HPOS);
      struct brw_reg tmp = get_tmp(c);
      brw_MUL(p,
              vec4(brw_null_reg()),
              deref_4f(v1_ptr, delta),
              t0);

      brw_MAC(p,
              tmp,
              negate(deref_4f(v0_ptr, delta)),
              t0);

      brw_ADD(p,
              deref_4f(dest_ptr, delta),
              deref_4f(v0_ptr, delta),
              tmp);
      release_tmp(c, tmp);
   }

Also, it would be nice to have a comment above each small group of
instructions showing what they compute as a formula.  For example, above
these three instructions we could say:

/* dest_hpos = v0_hpos * (1 - t0) + v1_hpos * t0 */


> +
> +   /* Then recreate the projected (NDC) coordinate in the new vertex
> +    * header
>      */
> +   brw_clip_project_vertex(c, dest_ptr);
> +
> +   /*
> +    * If we have noperspective attributes, we now need to compute the
> +    * screen-space t.
> +    */
> +   if (c->key.has_noperspective_shading) {
> +      delta = brw_vert_result_to_offset(&c->vue_map, BRW_VERT_RESULT_NDC);
> +      t_nopersp = get_tmp(c);
> +      tmp = get_tmp(c);
>

Along the same lines as my previous comment, I'd prefer to make these three
variables local to this scope, e.g.:

   if (c->key.has_noperspective_shading) {
      GLuint delta = brw_vert_result_to_offset(&c->vue_map,
BRW_VERT_RESULT_NDC);
      struct brw_reg t_nopersp = get_tmp(c);
      struct brw_reg tmp = get_tmp(c);



> +
> +      /* Build a register with coordinates from the second and new
> vertices */
>

In the code below, I could really use some comments to clarify the
computation you're doing.  I'll insert some suggestions inline:

      /* t_nopersp = vec4(v1_ndc.xy, dest_ndc.xy) */

> +      brw_MOV(p, t_nopersp, deref_4f(v1_ptr, delta));
> +      brw_MOV(p, tmp, deref_4f(dest_ptr, delta));
> +      brw_set_access_mode(p, BRW_ALIGN_16);
> +      brw_MOV(p,
> +              brw_writemask(t_nopersp, WRITEMASK_ZW),
> +              brw_swizzle(tmp, 0,1,0,1));
> +
> +      /* Subtract the coordinates of the first vertex */
>
      /* t_nopersp -= v0_ndc_copy.xyxy
       * Therefore t_nopersp = vec4(v1_ndc.xy - v0_ndc.xy,
       *                            dest_ndc.xy - v0_ndc.xy)
       */

> +      brw_ADD(p, t_nopersp, t_nopersp, negate(brw_swizzle(v0_ndc_copy,
> 0,1,0,1)));
> +
> +      /* Add the absolute value of the X and Y deltas so that if the
> +       * points aren't in the same place on the screen we get non-zero
> +       * values to divide.
> +       *
> +       * After that we have vert1-vert0 in t_nopersp.x and vertnew-vert0
> in t_nopersp.y.
> +       */
>
      /* t_nopersp.xy = |t_nopersp.xz| + |t_nopersp.yw|
       * Therefore:
       * t_nopersp = vec4(|v1_ndc.x - v0_ndc.x| + |v1_ndc.y - v0_ndc.y|,
       *                  |dest_ndc.x - v0_ndc.x| + |dest_ndc.y - v0_ndc.y|,
       *                  <don't care>, <don't care>)
       */

> +      brw_ADD(p,
> +              brw_writemask(t_nopersp, WRITEMASK_XY),
> +              brw_abs(brw_swizzle(t_nopersp, 0,2,0,0)),
> +              brw_abs(brw_swizzle(t_nopersp, 1,3,0,0)));
> +      brw_set_access_mode(p, BRW_ALIGN_1);
> +
> +      /* If the points are in the same place (vert1-vert0 == 0), just
> +       * substitute a value that will ensure that we don't divide by
> +       * 0.
> +       */
> +      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ,
> +              vec1(t_nopersp),
> +              brw_imm_f(1));
>

Shouldn't this be brw_imm_f(0)?


> +      brw_IF(p, BRW_EXECUTE_1);
> +      brw_MOV(p, t_nopersp, brw_imm_vf4(VF_ONE, VF_ZERO, VF_ZERO,
> VF_ZERO));
> +      brw_ENDIF(p);
> +
> +      /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it
> */
> +      brw_math_invert(p, get_element(t_nopersp, 0),
> get_element(t_nopersp, 0));
> +      brw_MUL(p,
> +              vec1(t_nopersp),
> +              vec1(t_nopersp),
> +              vec1(suboffset(t_nopersp, 1)));
> +      brw_set_access_mode(p, BRW_ALIGN_16);
> +      brw_MOV(p, t_nopersp, brw_swizzle(t_nopersp, 0,0,0,0));
> +      brw_set_access_mode(p, BRW_ALIGN_1);
>

That was a very clever computation.  Well done.


> +
> +      release_tmp(c, tmp);
> +      release_tmp(c, v0_ndc_copy);
> +   }
> +
> +   /* Now we can iterate over each attribute
> +    * (could be done in pairs?)
> +    */
> +   tmp = get_tmp(c);
>     for (slot = 0; slot < c->vue_map.num_slots; slot++) {
>        int vert_result = c->vue_map.slot_to_vert_result[slot];
>        GLuint delta = brw_vue_slot_to_offset(slot);
>
> +      /* HPOS is already handled */
> +      if(vert_result == VERT_RESULT_HPOS)
> +         continue;
> +
>        if (vert_result == VERT_RESULT_EDGE) {
>          if (force_edgeflag)
>             brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
> @@ -174,15 +283,20 @@ void brw_clip_interp_vertex( struct brw_clip_compile
> *c,
>           *
>           *        New = attr0 + t*attr1 - t*attr0
>           */
> +
> +         struct brw_reg t =
> +            brw_get_interpolation_mode(brw, slot) ==
> INTERP_QUALIFIER_NOPERSPECTIVE ?
> +            t_nopersp : t0;
> +
>          brw_MUL(p,
>                  vec4(brw_null_reg()),
>                  deref_4f(v1_ptr, delta),
> -                t0);
> +                t);
>
>          brw_MAC(p,
>                  tmp,
>                  negate(deref_4f(v0_ptr, delta)),
> -                t0);
> +                t);
>
>          brw_ADD(p,
>                  deref_4f(dest_ptr, delta),
> @@ -198,11 +312,8 @@ void brw_clip_interp_vertex( struct brw_clip_compile
> *c,
>     }
>
>     release_tmp(c, tmp);
> -
> -   /* Recreate the projected (NDC) coordinate in the new vertex
> -    * header:
> -    */
> -   brw_clip_project_vertex(c, dest_ptr );
> +   if (c->key.has_noperspective_shading)
> +      release_tmp(c, t_nopersp);
>  }
>
>  void brw_clip_emit_vue(struct brw_clip_compile *c,
> --
> 1.7.10.rc3.1.gb306
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>

With the exception of my question about brw_imm_f(0), all of my comments on
this patch are stylistic suggestions.  So assuming we get the brw_imm_f(0)
thing figured out, this patch is:

Reviewed-by: Paul Berry <stereotype441@gmail.com>
diff mbox

Patch

diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c
index 952eb4a..6bfdf24 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.c
+++ b/src/mesa/drivers/dri/i965/brw_clip.c
@@ -234,6 +234,15 @@  brw_upload_clip_prog(struct brw_context *brw)
          break;
       }
    }
+   key.has_noperspective_shading = 0;
+   for (i = 0; i < BRW_VERT_RESULT_MAX; i++) {
+      if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_NOPERSPECTIVE &&
+          brw->vs.prog_data->vue_map.slot_to_vert_result[i] != VERT_RESULT_HPOS) {
+         key.has_noperspective_shading = 1;
+         break;
+      }
+   }
+
    key.pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
    brw_copy_interpolation_modes(brw, key.interpolation_mode);
    /* _NEW_TRANSFORM (also part of VUE map)*/
diff --git a/src/mesa/drivers/dri/i965/brw_clip.h b/src/mesa/drivers/dri/i965/brw_clip.h
index 0ea0394..2a7245a 100644
--- a/src/mesa/drivers/dri/i965/brw_clip.h
+++ b/src/mesa/drivers/dri/i965/brw_clip.h
@@ -47,6 +47,7 @@  struct brw_clip_prog_key {
    GLuint primitive:4;
    GLuint nr_userclip:4;
    GLuint has_flat_shading:1;
+   GLuint has_noperspective_shading:1;
    GLuint pv_first:1;
    GLuint do_unfilled:1;
    GLuint fill_cw:2;		/* includes cull information */
diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c b/src/mesa/drivers/dri/i965/brw_clip_util.c
index 7b0205a..5bdcef8 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -129,6 +129,8 @@  static void brw_clip_project_vertex( struct brw_clip_compile *c,
 
 /* Interpolate between two vertices and put the result into a0.0.  
  * Increment a0.0 accordingly.
+ *
+ * Beware that dest_ptr can be equal to v0_ptr.
  */
 void brw_clip_interp_vertex( struct brw_clip_compile *c,
 			     struct brw_indirect dest_ptr,
@@ -138,8 +140,9 @@  void brw_clip_interp_vertex( struct brw_clip_compile *c,
 			     bool force_edgeflag)
 {
    struct brw_compile *p = &c->func;
-   struct brw_reg tmp = get_tmp(c);
-   GLuint slot;
+   struct brw_context *brw = p->brw;
+   struct brw_reg tmp, t_nopersp, v0_ndc_copy;
+   GLuint slot, delta;
 
    /* Just copy the vertex header:
     */
@@ -148,13 +151,119 @@  void brw_clip_interp_vertex( struct brw_clip_compile *c,
     * back on Ironlake, so needn't change it
     */
    brw_copy_indirect_to_indirect(p, dest_ptr, v0_ptr, 1);
-      
-   /* Iterate over each attribute (could be done in pairs?)
+
+   /*
+    * First handle the 3D and NDC positioning, in case we need
+    * noperspective interpolation.  Doing it early has no performance
+    * impact in any case.
+    */
+
+   /* Start by picking up the v0 NDC coordinates, because that vertex
+    * may be shared with the destination.
+    */
+   if (c->key.has_noperspective_shading) {
+      v0_ndc_copy = get_tmp(c);
+      brw_MOV(p, v0_ndc_copy, deref_4f(v0_ptr,
+                                       brw_vert_result_to_offset(&c->vue_map,
+                                                                 BRW_VERT_RESULT_NDC)));
+   }      
+
+   /*
+    * Compute the new 3D position
+    */
+
+   delta = brw_vert_result_to_offset(&c->vue_map, VERT_RESULT_HPOS);
+   tmp = get_tmp(c);
+   brw_MUL(p, 
+           vec4(brw_null_reg()),
+           deref_4f(v1_ptr, delta),
+           t0);
+
+   brw_MAC(p, 
+           tmp,	      
+           negate(deref_4f(v0_ptr, delta)),
+           t0); 
+	      
+   brw_ADD(p,
+           deref_4f(dest_ptr, delta), 
+           deref_4f(v0_ptr, delta),
+           tmp);
+   release_tmp(c, tmp);
+
+   /* Then recreate the projected (NDC) coordinate in the new vertex
+    * header
     */
+   brw_clip_project_vertex(c, dest_ptr);
+
+   /*
+    * If we have noperspective attributes, we now need to compute the
+    * screen-space t.
+    */
+   if (c->key.has_noperspective_shading) {
+      delta = brw_vert_result_to_offset(&c->vue_map, BRW_VERT_RESULT_NDC);
+      t_nopersp = get_tmp(c);
+      tmp = get_tmp(c);
+
+      /* Build a register with coordinates from the second and new vertices */
+      brw_MOV(p, t_nopersp, deref_4f(v1_ptr, delta));
+      brw_MOV(p, tmp, deref_4f(dest_ptr, delta));
+      brw_set_access_mode(p, BRW_ALIGN_16);
+      brw_MOV(p,
+              brw_writemask(t_nopersp, WRITEMASK_ZW),
+              brw_swizzle(tmp, 0,1,0,1));
+
+      /* Subtract the coordinates of the first vertex */
+      brw_ADD(p, t_nopersp, t_nopersp, negate(brw_swizzle(v0_ndc_copy, 0,1,0,1)));
+
+      /* Add the absolute value of the X and Y deltas so that if the
+       * points aren't in the same place on the screen we get non-zero
+       * values to divide.
+       *
+       * After that we have vert1-vert0 in t_nopersp.x and vertnew-vert0 in t_nopersp.y.
+       */
+      brw_ADD(p,
+              brw_writemask(t_nopersp, WRITEMASK_XY),
+              brw_abs(brw_swizzle(t_nopersp, 0,2,0,0)),
+              brw_abs(brw_swizzle(t_nopersp, 1,3,0,0)));
+      brw_set_access_mode(p, BRW_ALIGN_1);
+
+      /* If the points are in the same place (vert1-vert0 == 0), just
+       * substitute a value that will ensure that we don't divide by
+       * 0.
+       */
+      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ,
+              vec1(t_nopersp),
+              brw_imm_f(1));
+      brw_IF(p, BRW_EXECUTE_1);
+      brw_MOV(p, t_nopersp, brw_imm_vf4(VF_ONE, VF_ZERO, VF_ZERO, VF_ZERO));
+      brw_ENDIF(p);
+
+      /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it */
+      brw_math_invert(p, get_element(t_nopersp, 0), get_element(t_nopersp, 0));
+      brw_MUL(p,
+              vec1(t_nopersp),
+              vec1(t_nopersp),
+              vec1(suboffset(t_nopersp, 1)));
+      brw_set_access_mode(p, BRW_ALIGN_16);
+      brw_MOV(p, t_nopersp, brw_swizzle(t_nopersp, 0,0,0,0));
+      brw_set_access_mode(p, BRW_ALIGN_1);
+
+      release_tmp(c, tmp);
+      release_tmp(c, v0_ndc_copy);
+   }
+
+   /* Now we can iterate over each attribute
+    * (could be done in pairs?)
+    */
+   tmp = get_tmp(c);
    for (slot = 0; slot < c->vue_map.num_slots; slot++) {
       int vert_result = c->vue_map.slot_to_vert_result[slot];
       GLuint delta = brw_vue_slot_to_offset(slot);
 
+      /* HPOS is already handled */
+      if(vert_result == VERT_RESULT_HPOS)
+         continue;
+
       if (vert_result == VERT_RESULT_EDGE) {
 	 if (force_edgeflag) 
 	    brw_MOV(p, deref_4f(dest_ptr, delta), brw_imm_f(1));
@@ -174,15 +283,20 @@  void brw_clip_interp_vertex( struct brw_clip_compile *c,
 	  *
 	  *        New = attr0 + t*attr1 - t*attr0
 	  */
+
+         struct brw_reg t =
+            brw_get_interpolation_mode(brw, slot) == INTERP_QUALIFIER_NOPERSPECTIVE ?
+            t_nopersp : t0;
+
 	 brw_MUL(p, 
 		 vec4(brw_null_reg()),
 		 deref_4f(v1_ptr, delta),
-		 t0);
+		 t);
 
 	 brw_MAC(p, 
 		 tmp,	      
 		 negate(deref_4f(v0_ptr, delta)),
-		 t0); 
+		 t); 
 	      
 	 brw_ADD(p,
 		 deref_4f(dest_ptr, delta), 
@@ -198,11 +312,8 @@  void brw_clip_interp_vertex( struct brw_clip_compile *c,
    }
 
    release_tmp(c, tmp);
-
-   /* Recreate the projected (NDC) coordinate in the new vertex
-    * header:
-    */
-   brw_clip_project_vertex(c, dest_ptr );
+   if (c->key.has_noperspective_shading)
+      release_tmp(c, t_nopersp);
 }
 
 void brw_clip_emit_vue(struct brw_clip_compile *c,