diff mbox

[v2] drm/i915: prevent out of range pt in the PDE macros (take 3)

Message ID 1443791813-30551-1-git-send-email-michel.thierry@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michel Thierry Oct. 2, 2015, 1:16 p.m. UTC
We tried to fix this in commit fdc454c1484a ("drm/i915: Prevent out of
range pt in gen6_for_each_pde").

But the static analyzer still complains that, just before we break due
to "iter < I915_PDES", we do "pt = (pd)->page_table[iter]" with an
iter value that is bigger than I915_PDES. Of course, this isn't really
a problem since no one uses pt outside the macro. Still, every single
new usage of the macro will create a new issue for us to mark as a
false positive.

Also, Paulo re-started the discussion a while ago [1], but didn't end up
implemented.

In order to "solve" this "problem", this patch takes the ideas from
Chris and Dave, but that check would change the desired behavior of the
code, because the object (for example pdp->page_directory[iter]) can be
null during init/alloc, and C would take this as false, breaking the for
loop immediately.

This has been already verified with "static analysis tools".

[1]http://lists.freedesktop.org/archives/intel-gfx/2015-June/068548.html

v2: Make it a single statement, while preventing the common subexpression
elimination (Chris)

Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Gordon <david.s.gordon@intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

Comments

Dave Gordon Oct. 5, 2015, 4:36 p.m. UTC | #1
On 02/10/15 14:16, Michel Thierry wrote:
> We tried to fix this in commit fdc454c1484a ("drm/i915: Prevent out of
> range pt in gen6_for_each_pde").
>
> But the static analyzer still complains that, just before we break due
> to "iter < I915_PDES", we do "pt = (pd)->page_table[iter]" with an
> iter value that is bigger than I915_PDES. Of course, this isn't really
> a problem since no one uses pt outside the macro. Still, every single
> new usage of the macro will create a new issue for us to mark as a
> false positive.
>
> Also, Paulo re-started the discussion a while ago [1], but didn't end up
> implemented.
>
> In order to "solve" this "problem", this patch takes the ideas from
> Chris and Dave, but that check would change the desired behavior of the
> code, because the object (for example pdp->page_directory[iter]) can be
> null during init/alloc, and C would take this as false, breaking the for
> loop immediately.
>
> This has been already verified with "static analysis tools".
>
> [1]http://lists.freedesktop.org/archives/intel-gfx/2015-June/068548.html
>
> v2: Make it a single statement, while preventing the common subexpression
> elimination (Chris)
>
> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Dave Gordon <david.s.gordon@intel.com>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_gtt.h | 14 ++++++++------
>   1 file changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 9fbb07d..a216397 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -394,7 +394,8 @@ struct i915_hw_ppgtt {
>    */
>   #define gen6_for_each_pde(pt, pd, start, length, temp, iter) \
>   	for (iter = gen6_pde_index(start); \
> -	     pt = (pd)->page_table[iter], length > 0 && iter < I915_PDES; \
> +	     length > 0 && iter < I915_PDES ? \
> +			(pt = (pd)->page_table[iter]), 1 : 0; \
>   	     iter++, \
>   	     temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT) - start, \
>   	     temp = min_t(unsigned, temp, length), \
> @@ -459,7 +460,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
>    */
>   #define gen8_for_each_pde(pt, pd, start, length, temp, iter)		\
>   	for (iter = gen8_pde_index(start); \
> -	     pt = (pd)->page_table[iter], length > 0 && iter < I915_PDES;	\
> +	     length > 0 && iter < I915_PDES ? \
> +			(pt = (pd)->page_table[iter]), 1 : 0; \
>   	     iter++,				\
>   	     temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT) - start,	\
>   	     temp = min(temp, length),					\
> @@ -467,8 +469,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
>
>   #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)	\
>   	for (iter = gen8_pdpe_index(start); \
> -	     pd = (pdp)->page_directory[iter], \
> -	     length > 0 && (iter < I915_PDPES_PER_PDP(dev)); \
> +	     length > 0 && (iter < I915_PDPES_PER_PDP(dev)) ? \
> +			(pd = (pdp)->page_directory[iter]), 1 : 0; \
>   	     iter++,				\
>   	     temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start,	\
>   	     temp = min(temp, length),					\
> @@ -476,8 +478,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
>
>   #define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)	\
>   	for (iter = gen8_pml4e_index(start);	\
> -	     pdp = (pml4)->pdps[iter], \
> -	     length > 0 && iter < GEN8_PML4ES_PER_PML4; \
> +	     length > 0 && iter < GEN8_PML4ES_PER_PML4 ? \
> +			(pdp = (pml4)->pdps[iter]), 1 : 0; \

this won't compile -- see below

>   	     iter++,				\
>   	     temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,	\
>   	     temp = min(temp, length),					\

The man page for C operators tells us:

        Operator                             Associativity
        () [] -> .                           left to right
        ! ~ ++ -- + - (type) * & sizeof      right to left
        * / %                                left to right
        + -                                  left to right
        << >>                                left to right
        < <= > >=                            left to right
        == !=                                left to right
        &                                    left to right
        ^                                    left to right
        |                                    left to right
        &&                                   left to right
        ||                                   left to right
        ?:                                   right to left
        = += -= *= /= %= <<= >>= &= ^= |=    right to left
        ,                                    left to right

So there's a problem with the above code, because the comma operator is 
LOWER precedence than either assignment or ?: You'd need to put the 
parentheses around the (pdp = ... , 1) section, not just the assignment.

Or for yet another variation, how about:

#define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)         \
         for (iter = gen8_pdpe_index(start);                            \
              iter < I915_PDPES_PER_PDP(dev) &&                         \
                 (pd = (pdp)->page_directory[iter], length > 0);        \
              iter++,                                                   \
              temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start,      \
              temp = min(temp, length),                                 \
              start += temp, length -= temp)

#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)      \
         for (iter = gen8_pml4e_index(start);                           \
              iter < GEN8_PML4ES_PER_PML4 &&                            \
                 (pdp = (pml4)->pdps[iter], length > 0);                \
              iter++,                                                   \
              temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,  \
              temp = min(temp, length),                                 \
              start += temp, length -= temp)

with no ugly ?: at all :)

.Dave.
Michel Thierry Oct. 5, 2015, 4:59 p.m. UTC | #2
On 10/5/2015 5:36 PM, Dave Gordon wrote:
> On 02/10/15 14:16, Michel Thierry wrote:
>> We tried to fix this in commit fdc454c1484a ("drm/i915: Prevent out of
>> range pt in gen6_for_each_pde").
>>
>> But the static analyzer still complains that, just before we break due
>> to "iter < I915_PDES", we do "pt = (pd)->page_table[iter]" with an
>> iter value that is bigger than I915_PDES. Of course, this isn't really
>> a problem since no one uses pt outside the macro. Still, every single
>> new usage of the macro will create a new issue for us to mark as a
>> false positive.
>>
>> Also, Paulo re-started the discussion a while ago [1], but didn't end up
>> implemented.
>>
>> In order to "solve" this "problem", this patch takes the ideas from
>> Chris and Dave, but that check would change the desired behavior of the
>> code, because the object (for example pdp->page_directory[iter]) can be
>> null during init/alloc, and C would take this as false, breaking the for
>> loop immediately.
>>
>> This has been already verified with "static analysis tools".
>>
>> [1]http://lists.freedesktop.org/archives/intel-gfx/2015-June/068548.html
>>
>> v2: Make it a single statement, while preventing the common subexpression
>> elimination (Chris)
>>
>> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Dave Gordon <david.s.gordon@intel.com>
>> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem_gtt.h | 14 ++++++++------
>>   1 file changed, 8 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h
>> b/drivers/gpu/drm/i915/i915_gem_gtt.h
>> index 9fbb07d..a216397 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
>> @@ -394,7 +394,8 @@ struct i915_hw_ppgtt {
>>    */
>>   #define gen6_for_each_pde(pt, pd, start, length, temp, iter) \
>>       for (iter = gen6_pde_index(start); \
>> -         pt = (pd)->page_table[iter], length > 0 && iter < I915_PDES; \
>> +         length > 0 && iter < I915_PDES ? \
>> +            (pt = (pd)->page_table[iter]), 1 : 0; \
>>            iter++, \
>>            temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT) - start, \
>>            temp = min_t(unsigned, temp, length), \
>> @@ -459,7 +460,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
>>    */
>>   #define gen8_for_each_pde(pt, pd, start, length, temp, iter)        \
>>       for (iter = gen8_pde_index(start); \
>> -         pt = (pd)->page_table[iter], length > 0 && iter <
>> I915_PDES;    \
>> +         length > 0 && iter < I915_PDES ? \
>> +            (pt = (pd)->page_table[iter]), 1 : 0; \
>>            iter++,                \
>>            temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT) - start,    \
>>            temp = min(temp, length),                    \
>> @@ -467,8 +469,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
>>
>>   #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)    \
>>       for (iter = gen8_pdpe_index(start); \
>> -         pd = (pdp)->page_directory[iter], \
>> -         length > 0 && (iter < I915_PDPES_PER_PDP(dev)); \
>> +         length > 0 && (iter < I915_PDPES_PER_PDP(dev)) ? \
>> +            (pd = (pdp)->page_directory[iter]), 1 : 0; \
>>            iter++,                \
>>            temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start,    \
>>            temp = min(temp, length),                    \
>> @@ -476,8 +478,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
>>
>>   #define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)    \
>>       for (iter = gen8_pml4e_index(start);    \
>> -         pdp = (pml4)->pdps[iter], \
>> -         length > 0 && iter < GEN8_PML4ES_PER_PML4; \
>> +         length > 0 && iter < GEN8_PML4ES_PER_PML4 ? \
>> +            (pdp = (pml4)->pdps[iter]), 1 : 0; \
>
> this won't compile -- see below

Hmm, it compiled (also got rid of of the "analysis tool error" and 
didn't see any behavior change).

>
>>            iter++,                \
>>            temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,    \
>>            temp = min(temp, length),                    \
>
> The man page for C operators tells us:
>
>         Operator                             Associativity
>         () [] -> .                           left to right
>         ! ~ ++ -- + - (type) * & sizeof      right to left
>         * / %                                left to right
>         + -                                  left to right
>         << >>                                left to right
>         < <= > >=                            left to right
>         == !=                                left to right
>         &                                    left to right
>         ^                                    left to right
>         |                                    left to right
>         &&                                   left to right
>         ||                                   left to right
>         ?:                                   right to left
>         = += -= *= /= %= <<= >>= &= ^= |=    right to left
>         ,                                    left to right
>
> So there's a problem with the above code, because the comma operator is
> LOWER precedence than either assignment or ?: You'd need to put the
> parentheses around the (pdp = ... , 1) section, not just the assignment.
>
> Or for yet another variation, how about:
>
> #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)         \
>          for (iter = gen8_pdpe_index(start);                            \
>               iter < I915_PDPES_PER_PDP(dev) &&                         \
>                  (pd = (pdp)->page_directory[iter], length > 0);        \
>               iter++,                                                   \
>               temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start,      \
>               temp = min(temp, length),                                 \
>               start += temp, length -= temp)
>
> #define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)      \
>          for (iter = gen8_pml4e_index(start);                           \
>               iter < GEN8_PML4ES_PER_PML4 &&                            \
>                  (pdp = (pml4)->pdps[iter], length > 0);                \
>               iter++,                                                   \
>               temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,  \
>               temp = min(temp, length),                                 \
>               start += temp, length -= temp)
>
> with no ugly ?: at all :)

This also works.

Since it's a change for not a real issue, I don't have a preference.
I can send either.

Thanks,

>
> .Dave.
Daniel Vetter Oct. 6, 2015, 8:38 a.m. UTC | #3
On Mon, Oct 05, 2015 at 05:59:50PM +0100, Michel Thierry wrote:
> On 10/5/2015 5:36 PM, Dave Gordon wrote:
> >On 02/10/15 14:16, Michel Thierry wrote:
> >>We tried to fix this in commit fdc454c1484a ("drm/i915: Prevent out of
> >>range pt in gen6_for_each_pde").
> >>
> >>But the static analyzer still complains that, just before we break due
> >>to "iter < I915_PDES", we do "pt = (pd)->page_table[iter]" with an
> >>iter value that is bigger than I915_PDES. Of course, this isn't really
> >>a problem since no one uses pt outside the macro. Still, every single
> >>new usage of the macro will create a new issue for us to mark as a
> >>false positive.
> >>
> >>Also, Paulo re-started the discussion a while ago [1], but didn't end up
> >>implemented.
> >>
> >>In order to "solve" this "problem", this patch takes the ideas from
> >>Chris and Dave, but that check would change the desired behavior of the
> >>code, because the object (for example pdp->page_directory[iter]) can be
> >>null during init/alloc, and C would take this as false, breaking the for
> >>loop immediately.
> >>
> >>This has been already verified with "static analysis tools".
> >>
> >>[1]http://lists.freedesktop.org/archives/intel-gfx/2015-June/068548.html
> >>
> >>v2: Make it a single statement, while preventing the common subexpression
> >>elimination (Chris)
> >>
> >>Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> >>Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >>Cc: Dave Gordon <david.s.gordon@intel.com>
> >>Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> >>---
> >>  drivers/gpu/drm/i915/i915_gem_gtt.h | 14 ++++++++------
> >>  1 file changed, 8 insertions(+), 6 deletions(-)
> >>
> >>diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h
> >>b/drivers/gpu/drm/i915/i915_gem_gtt.h
> >>index 9fbb07d..a216397 100644
> >>--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> >>+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> >>@@ -394,7 +394,8 @@ struct i915_hw_ppgtt {
> >>   */
> >>  #define gen6_for_each_pde(pt, pd, start, length, temp, iter) \
> >>      for (iter = gen6_pde_index(start); \
> >>-         pt = (pd)->page_table[iter], length > 0 && iter < I915_PDES; \
> >>+         length > 0 && iter < I915_PDES ? \
> >>+            (pt = (pd)->page_table[iter]), 1 : 0; \
> >>           iter++, \
> >>           temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT) - start, \
> >>           temp = min_t(unsigned, temp, length), \
> >>@@ -459,7 +460,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
> >>   */
> >>  #define gen8_for_each_pde(pt, pd, start, length, temp, iter)        \
> >>      for (iter = gen8_pde_index(start); \
> >>-         pt = (pd)->page_table[iter], length > 0 && iter <
> >>I915_PDES;    \
> >>+         length > 0 && iter < I915_PDES ? \
> >>+            (pt = (pd)->page_table[iter]), 1 : 0; \
> >>           iter++,                \
> >>           temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT) - start,    \
> >>           temp = min(temp, length),                    \
> >>@@ -467,8 +469,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
> >>
> >>  #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)    \
> >>      for (iter = gen8_pdpe_index(start); \
> >>-         pd = (pdp)->page_directory[iter], \
> >>-         length > 0 && (iter < I915_PDPES_PER_PDP(dev)); \
> >>+         length > 0 && (iter < I915_PDPES_PER_PDP(dev)) ? \
> >>+            (pd = (pdp)->page_directory[iter]), 1 : 0; \
> >>           iter++,                \
> >>           temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start,    \
> >>           temp = min(temp, length),                    \
> >>@@ -476,8 +478,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
> >>
> >>  #define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)    \
> >>      for (iter = gen8_pml4e_index(start);    \
> >>-         pdp = (pml4)->pdps[iter], \
> >>-         length > 0 && iter < GEN8_PML4ES_PER_PML4; \
> >>+         length > 0 && iter < GEN8_PML4ES_PER_PML4 ? \
> >>+            (pdp = (pml4)->pdps[iter]), 1 : 0; \
> >
> >this won't compile -- see below
> 
> Hmm, it compiled (also got rid of of the "analysis tool error" and didn't
> see any behavior change).
> 
> >
> >>           iter++,                \
> >>           temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,    \
> >>           temp = min(temp, length),                    \
> >
> >The man page for C operators tells us:
> >
> >        Operator                             Associativity
> >        () [] -> .                           left to right
> >        ! ~ ++ -- + - (type) * & sizeof      right to left
> >        * / %                                left to right
> >        + -                                  left to right
> >        << >>                                left to right
> >        < <= > >=                            left to right
> >        == !=                                left to right
> >        &                                    left to right
> >        ^                                    left to right
> >        |                                    left to right
> >        &&                                   left to right
> >        ||                                   left to right
> >        ?:                                   right to left
> >        = += -= *= /= %= <<= >>= &= ^= |=    right to left
> >        ,                                    left to right
> >
> >So there's a problem with the above code, because the comma operator is
> >LOWER precedence than either assignment or ?: You'd need to put the
> >parentheses around the (pdp = ... , 1) section, not just the assignment.
> >
> >Or for yet another variation, how about:
> >
> >#define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)         \
> >         for (iter = gen8_pdpe_index(start);                            \
> >              iter < I915_PDPES_PER_PDP(dev) &&                         \
> >                 (pd = (pdp)->page_directory[iter], length > 0);        \
> >              iter++,                                                   \
> >              temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start,      \
> >              temp = min(temp, length),                                 \
> >              start += temp, length -= temp)
> >
> >#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)      \
> >         for (iter = gen8_pml4e_index(start);                           \
> >              iter < GEN8_PML4ES_PER_PML4 &&                            \
> >                 (pdp = (pml4)->pdps[iter], length > 0);                \
> >              iter++,                                                   \
> >              temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,  \
> >              temp = min(temp, length),                                 \
> >              start += temp, length -= temp)
> >
> >with no ugly ?: at all :)
> 
> This also works.
> 
> Since it's a change for not a real issue, I don't have a preference.
> I can send either.

Yeah, since ?: is a ternary operator parsing implicitly adds the () in the
middle and always parses it as a ? (b) : c. If lower-level operators in
the middle could split the ternary operator then it would result in
parsing fail (sinc ? without the : is just useless). So lgtm. Someone
willing to smack an r-b onto the patch?
-Daniel
Chris Wilson Oct. 6, 2015, 9:43 a.m. UTC | #4
On Tue, Oct 06, 2015 at 10:38:22AM +0200, Daniel Vetter wrote:
> On Mon, Oct 05, 2015 at 05:59:50PM +0100, Michel Thierry wrote:
> > On 10/5/2015 5:36 PM, Dave Gordon wrote:
> > >On 02/10/15 14:16, Michel Thierry wrote:
> > >>We tried to fix this in commit fdc454c1484a ("drm/i915: Prevent out of
> > >>range pt in gen6_for_each_pde").
> > >>
> > >>But the static analyzer still complains that, just before we break due
> > >>to "iter < I915_PDES", we do "pt = (pd)->page_table[iter]" with an
> > >>iter value that is bigger than I915_PDES. Of course, this isn't really
> > >>a problem since no one uses pt outside the macro. Still, every single
> > >>new usage of the macro will create a new issue for us to mark as a
> > >>false positive.
> > >>
> > >>Also, Paulo re-started the discussion a while ago [1], but didn't end up
> > >>implemented.
> > >>
> > >>In order to "solve" this "problem", this patch takes the ideas from
> > >>Chris and Dave, but that check would change the desired behavior of the
> > >>code, because the object (for example pdp->page_directory[iter]) can be
> > >>null during init/alloc, and C would take this as false, breaking the for
> > >>loop immediately.
> > >>
> > >>This has been already verified with "static analysis tools".
> > >>
> > >>[1]http://lists.freedesktop.org/archives/intel-gfx/2015-June/068548.html
> > >>
> > >>v2: Make it a single statement, while preventing the common subexpression
> > >>elimination (Chris)
> > >>
> > >>Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > >>Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > >>Cc: Dave Gordon <david.s.gordon@intel.com>
> > >>Signed-off-by: Michel Thierry <michel.thierry@intel.com>

> Yeah, since ?: is a ternary operator parsing implicitly adds the () in the
> middle and always parses it as a ? (b) : c. If lower-level operators in
> the middle could split the ternary operator then it would result in
> parsing fail (sinc ? without the : is just useless). So lgtm. Someone
> willing to smack an r-b onto the patch?

I think it's good enough.
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Something to consider is that ppgtt_insert is 10x slower than
ppgtt_clear, and that some workloads (admittedly not 48b!) spend a
disproportionate amount of time changing PTE. If you have ideas for
spending up insertion, feel free to experiment!
-Chris
Daniel Vetter Oct. 6, 2015, 10:09 a.m. UTC | #5
On Tue, Oct 06, 2015 at 10:43:48AM +0100, Chris Wilson wrote:
> On Tue, Oct 06, 2015 at 10:38:22AM +0200, Daniel Vetter wrote:
> > On Mon, Oct 05, 2015 at 05:59:50PM +0100, Michel Thierry wrote:
> > > On 10/5/2015 5:36 PM, Dave Gordon wrote:
> > > >On 02/10/15 14:16, Michel Thierry wrote:
> > > >>We tried to fix this in commit fdc454c1484a ("drm/i915: Prevent out of
> > > >>range pt in gen6_for_each_pde").
> > > >>
> > > >>But the static analyzer still complains that, just before we break due
> > > >>to "iter < I915_PDES", we do "pt = (pd)->page_table[iter]" with an
> > > >>iter value that is bigger than I915_PDES. Of course, this isn't really
> > > >>a problem since no one uses pt outside the macro. Still, every single
> > > >>new usage of the macro will create a new issue for us to mark as a
> > > >>false positive.
> > > >>
> > > >>Also, Paulo re-started the discussion a while ago [1], but didn't end up
> > > >>implemented.
> > > >>
> > > >>In order to "solve" this "problem", this patch takes the ideas from
> > > >>Chris and Dave, but that check would change the desired behavior of the
> > > >>code, because the object (for example pdp->page_directory[iter]) can be
> > > >>null during init/alloc, and C would take this as false, breaking the for
> > > >>loop immediately.
> > > >>
> > > >>This has been already verified with "static analysis tools".
> > > >>
> > > >>[1]http://lists.freedesktop.org/archives/intel-gfx/2015-June/068548.html
> > > >>
> > > >>v2: Make it a single statement, while preventing the common subexpression
> > > >>elimination (Chris)
> > > >>
> > > >>Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > > >>Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > > >>Cc: Dave Gordon <david.s.gordon@intel.com>
> > > >>Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> 
> > Yeah, since ?: is a ternary operator parsing implicitly adds the () in the
> > middle and always parses it as a ? (b) : c. If lower-level operators in
> > the middle could split the ternary operator then it would result in
> > parsing fail (sinc ? without the : is just useless). So lgtm. Someone
> > willing to smack an r-b onto the patch?
> 
> I think it's good enough.
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Queued for -next, thanks for the patch.

> Something to consider is that ppgtt_insert is 10x slower than
> ppgtt_clear, and that some workloads (admittedly not 48b!) spend a
> disproportionate amount of time changing PTE. If you have ideas for
> spending up insertion, feel free to experiment!

Hm, where do we waste all that time? 10x slower is pretty impressive since
on a quick look I can only see the sg table walk as the additional bit of
memory traversals insert does on top of clear ...
-Daniel
Chris Wilson Oct. 6, 2015, 10:19 a.m. UTC | #6
On Tue, Oct 06, 2015 at 12:09:51PM +0200, Daniel Vetter wrote:
> On Tue, Oct 06, 2015 at 10:43:48AM +0100, Chris Wilson wrote:
> > On Tue, Oct 06, 2015 at 10:38:22AM +0200, Daniel Vetter wrote:
> > > On Mon, Oct 05, 2015 at 05:59:50PM +0100, Michel Thierry wrote:
> > > > On 10/5/2015 5:36 PM, Dave Gordon wrote:
> > > > >On 02/10/15 14:16, Michel Thierry wrote:
> > > > >>We tried to fix this in commit fdc454c1484a ("drm/i915: Prevent out of
> > > > >>range pt in gen6_for_each_pde").
> > > > >>
> > > > >>But the static analyzer still complains that, just before we break due
> > > > >>to "iter < I915_PDES", we do "pt = (pd)->page_table[iter]" with an
> > > > >>iter value that is bigger than I915_PDES. Of course, this isn't really
> > > > >>a problem since no one uses pt outside the macro. Still, every single
> > > > >>new usage of the macro will create a new issue for us to mark as a
> > > > >>false positive.
> > > > >>
> > > > >>Also, Paulo re-started the discussion a while ago [1], but didn't end up
> > > > >>implemented.
> > > > >>
> > > > >>In order to "solve" this "problem", this patch takes the ideas from
> > > > >>Chris and Dave, but that check would change the desired behavior of the
> > > > >>code, because the object (for example pdp->page_directory[iter]) can be
> > > > >>null during init/alloc, and C would take this as false, breaking the for
> > > > >>loop immediately.
> > > > >>
> > > > >>This has been already verified with "static analysis tools".
> > > > >>
> > > > >>[1]http://lists.freedesktop.org/archives/intel-gfx/2015-June/068548.html
> > > > >>
> > > > >>v2: Make it a single statement, while preventing the common subexpression
> > > > >>elimination (Chris)
> > > > >>
> > > > >>Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > > > >>Cc: Chris Wilson <chris@chris-wilson.co.uk>
> > > > >>Cc: Dave Gordon <david.s.gordon@intel.com>
> > > > >>Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> > 
> > > Yeah, since ?: is a ternary operator parsing implicitly adds the () in the
> > > middle and always parses it as a ? (b) : c. If lower-level operators in
> > > the middle could split the ternary operator then it would result in
> > > parsing fail (sinc ? without the : is just useless). So lgtm. Someone
> > > willing to smack an r-b onto the patch?
> > 
> > I think it's good enough.
> > Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Queued for -next, thanks for the patch.
> 
> > Something to consider is that ppgtt_insert is 10x slower than
> > ppgtt_clear, and that some workloads (admittedly not 48b!) spend a
> > disproportionate amount of time changing PTE. If you have ideas for
> > spending up insertion, feel free to experiment!
> 
> Hm, where do we waste all that time? 10x slower is pretty impressive since
> on a quick look I can only see the sg table walk as the additional bit of
> memory traversals insert does on top of clear ...

The sg_page_iter claims top spot, followed by the memory dereferences
(at a guess, it doesn't seem the individual sg struct isn't dense enough
to be cache friendly or maybe we should just cachealign it?). We can make
substantial improvement by opencoding the page iter (under the assumption
that we do not have page coallescing) like:

http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=f9baf155c3096058ef9aeeb39b586713291efc56

with that and eliminating the ivb_pte_encode() indirection gets us to
only be ~5x slower than clearing. It is not pretty (and is rather
cavalier about the last page), but at that point it seems to be memory bound.

However, in this particular benchmark where inserting ppgtt dominates
the kernel profile, we are GPU bound and improving the insert is lost in
the noise. (Unless we disable the GPU load and assume an infinitely fast
GPU like Skylake!)
-Chris
Dave Gordon Oct. 6, 2015, 11:21 a.m. UTC | #7
On 06/10/15 09:38, Daniel Vetter wrote:
> On Mon, Oct 05, 2015 at 05:59:50PM +0100, Michel Thierry wrote:
>> On 10/5/2015 5:36 PM, Dave Gordon wrote:
>>> On 02/10/15 14:16, Michel Thierry wrote:
>>>> We tried to fix this in commit fdc454c1484a ("drm/i915: Prevent out of
>>>> range pt in gen6_for_each_pde").
>>>>
>>>> But the static analyzer still complains that, just before we break due
>>>> to "iter < I915_PDES", we do "pt = (pd)->page_table[iter]" with an
>>>> iter value that is bigger than I915_PDES. Of course, this isn't really
>>>> a problem since no one uses pt outside the macro. Still, every single
>>>> new usage of the macro will create a new issue for us to mark as a
>>>> false positive.
>>>>
>>>> Also, Paulo re-started the discussion a while ago [1], but didn't end up
>>>> implemented.
>>>>
>>>> In order to "solve" this "problem", this patch takes the ideas from
>>>> Chris and Dave, but that check would change the desired behavior of the
>>>> code, because the object (for example pdp->page_directory[iter]) can be
>>>> null during init/alloc, and C would take this as false, breaking the for
>>>> loop immediately.
>>>>
>>>> This has been already verified with "static analysis tools".
>>>>
>>>> [1]http://lists.freedesktop.org/archives/intel-gfx/2015-June/068548.html
>>>>
>>>> v2: Make it a single statement, while preventing the common subexpression
>>>> elimination (Chris)
>>>>
>>>> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
>>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>>> Cc: Dave Gordon <david.s.gordon@intel.com>
>>>> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/i915_gem_gtt.h | 14 ++++++++------
>>>>   1 file changed, 8 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h
>>>> b/drivers/gpu/drm/i915/i915_gem_gtt.h
>>>> index 9fbb07d..a216397 100644
>>>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
>>>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
>>>> @@ -394,7 +394,8 @@ struct i915_hw_ppgtt {
>>>>    */
>>>>   #define gen6_for_each_pde(pt, pd, start, length, temp, iter) \
>>>>       for (iter = gen6_pde_index(start); \
>>>> -         pt = (pd)->page_table[iter], length > 0 && iter < I915_PDES; \
>>>> +         length > 0 && iter < I915_PDES ? \
>>>> +            (pt = (pd)->page_table[iter]), 1 : 0; \
>>>>            iter++, \
>>>>            temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT) - start, \
>>>>            temp = min_t(unsigned, temp, length), \
>>>> @@ -459,7 +460,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
>>>>    */
>>>>   #define gen8_for_each_pde(pt, pd, start, length, temp, iter)        \
>>>>       for (iter = gen8_pde_index(start); \
>>>> -         pt = (pd)->page_table[iter], length > 0 && iter <
>>>> I915_PDES;    \
>>>> +         length > 0 && iter < I915_PDES ? \
>>>> +            (pt = (pd)->page_table[iter]), 1 : 0; \
>>>>            iter++,                \
>>>>            temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT) - start,    \
>>>>            temp = min(temp, length),                    \
>>>> @@ -467,8 +469,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
>>>>
>>>>   #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)    \
>>>>       for (iter = gen8_pdpe_index(start); \
>>>> -         pd = (pdp)->page_directory[iter], \
>>>> -         length > 0 && (iter < I915_PDPES_PER_PDP(dev)); \
>>>> +         length > 0 && (iter < I915_PDPES_PER_PDP(dev)) ? \
>>>> +            (pd = (pdp)->page_directory[iter]), 1 : 0; \
>>>>            iter++,                \
>>>>            temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start,    \
>>>>            temp = min(temp, length),                    \
>>>> @@ -476,8 +478,8 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
>>>>
>>>>   #define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)    \
>>>>       for (iter = gen8_pml4e_index(start);    \
>>>> -         pdp = (pml4)->pdps[iter], \
>>>> -         length > 0 && iter < GEN8_PML4ES_PER_PML4; \
>>>> +         length > 0 && iter < GEN8_PML4ES_PER_PML4 ? \
>>>> +            (pdp = (pml4)->pdps[iter]), 1 : 0; \
>>>
>>> this won't compile -- see below
>>
>> Hmm, it compiled (also got rid of of the "analysis tool error" and didn't
>> see any behavior change).
>>
>>>
>>>>            iter++,                \
>>>>            temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,    \
>>>>            temp = min(temp, length),                    \
>>>
>>> The man page for C operators tells us:
>>>
>>>         Operator                             Associativity
>>>         () [] -> .                           left to right
>>>         ! ~ ++ -- + - (type) * & sizeof      right to left
>>>         * / %                                left to right
>>>         + -                                  left to right
>>>         << >>                                left to right
>>>         < <= > >=                            left to right
>>>         == !=                                left to right
>>>         &                                    left to right
>>>         ^                                    left to right
>>>         |                                    left to right
>>>         &&                                   left to right
>>>         ||                                   left to right
>>>         ?:                                   right to left
>>>         = += -= *= /= %= <<= >>= &= ^= |=    right to left
>>>         ,                                    left to right
>>>
>>> So there's a problem with the above code, because the comma operator is
>>> LOWER precedence than either assignment or ?: You'd need to put the
>>> parentheses around the (pdp = ... , 1) section, not just the assignment.
>>>
>>> Or for yet another variation, how about:
>>>
>>> #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)         \
>>>          for (iter = gen8_pdpe_index(start);                            \
>>>               iter < I915_PDPES_PER_PDP(dev) &&                         \
>>>                  (pd = (pdp)->page_directory[iter], length > 0);        \
>>>               iter++,                                                   \
>>>               temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start,      \
>>>               temp = min(temp, length),                                 \
>>>               start += temp, length -= temp)
>>>
>>> #define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)      \
>>>          for (iter = gen8_pml4e_index(start);                           \
>>>               iter < GEN8_PML4ES_PER_PML4 &&                            \
>>>                  (pdp = (pml4)->pdps[iter], length > 0);                \
>>>               iter++,                                                   \
>>>               temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,  \
>>>               temp = min(temp, length),                                 \
>>>               start += temp, length -= temp)
>>>
>>> with no ugly ?: at all :)
>>
>> This also works.
>>
>> Since it's a change for not a real issue, I don't have a preference.
>> I can send either.
>
> Yeah, since ?: is a ternary operator parsing implicitly adds the () in the
> middle and always parses it as a ? (b) : c. If lower-level operators in
> the middle could split the ternary operator then it would result in
> parsing fail (sinc ? without the : is just useless). So lgtm. Someone
> willing to smack an r-b onto the patch?
> -Daniel

Hmmm ... it failed to compile when I tried it yesterday, but I think I 
must have bodged the copypaste from the email to the header file, and 
then just assumed the error message meant something was wrong with the 
way all those low-precedence operators were stacked together. Anyway I 
just did it again and it compiles OK now, so it gets my

Reviewed-by: Dave Gordon <david.s.gordon@intel.com>

... although I still think my version is (slightly) easier to read. Or 
it could be improved even more by moving the increment of 'iter' to the 
end, making it one line shorter and perhaps helping the compiler a little :)

#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)      \
         for (iter = gen8_pml4e_index(start);                           \
              iter < GEN8_PML4ES_PER_PML4 &&                            \
                 (pdp = (pml4)->pdps[iter], length > 0);                \
              temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,  \
              temp = min(temp, length),                                 \
              start += temp, length -= temp, ++iter)

Or, noting that 'temp' is never used in the generated loop bodies, we 
could eliminate the parameter and make it local to the loop header :)

#define gen8_for_each_pml4e(pdp, pml4, start, length, iter)            \
         for (iter = gen8_pml4e_index(start);                           \
              iter < GEN8_PML4ES_PER_PML4 &&                            \
                 (pdp = (pml4)->pdps[iter], length > 0);                \
              ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT);   \
                     temp = min(temp - start, length);                  \
                     start += temp, length -= temp; }),                 \
                 ++iter)

.Dave.
Chris Wilson Oct. 6, 2015, 11:53 a.m. UTC | #8
On Tue, Oct 06, 2015 at 12:21:05PM +0100, Dave Gordon wrote:
> ... although I still think my version is (slightly) easier to read.
> Or it could be improved even more by moving the increment of 'iter'
> to the end, making it one line shorter and perhaps helping the
> compiler a little :)
> 
> #define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)      \
>         for (iter = gen8_pml4e_index(start);                           \
>              iter < GEN8_PML4ES_PER_PML4 &&                            \
>                 (pdp = (pml4)->pdps[iter], length > 0);                \
>              temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,  \
>              temp = min(temp, length),                                 \
>              start += temp, length -= temp, ++iter)

Shorter, yes, but we may as well take advantage of not using [iter] if
length == 0, so meh.
 
> Or, noting that 'temp' is never used in the generated loop bodies,
> we could eliminate the parameter and make it local to the loop
> header :)
> 
> #define gen8_for_each_pml4e(pdp, pml4, start, length, iter)            \
>         for (iter = gen8_pml4e_index(start);                           \
>              iter < GEN8_PML4ES_PER_PML4 &&                            \
>                 (pdp = (pml4)->pdps[iter], length > 0);                \
>              ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT);   \
>                     temp = min(temp - start, length);                  \
>                     start += temp, length -= temp; }),                 \
>                 ++iter)

Removing extraneous parameters from macros is differently a usability win.
Care to spin a real patch so we can see how it looks in practice?
-Chris
Dave Gordon Dec. 8, 2015, 1:30 p.m. UTC | #9
Way back at the beginning of October, Chris Wilson suggested that cleaning
up these macros by removing the redundant 'temp' might be worthwhile. So
here's the patch.

There's one more thing that might be cleaned up here (but for which
I don't have a patch yet), which is that gen8_for_each_pdpe() still
references a non-parameter value named 'dev'. Bizarrely, this apparent
nonlocal reference is only used as the parameter to another macro -- which
then doesn't actually use it! Suggestions on how to tidy this up welcome!

One possibility was to define a common iterator (__gen8_for_each())
and then use it to create all the three level-specific macros, which
meant that the loop bounds were passed in rather than being part of the
iterator. But the common macro needed a LOT of parameters, and didn't
really look like an improvement. So I've gone with this simple rewrite
for now.
Daniel Vetter Dec. 10, 2015, 8:35 a.m. UTC | #10
On Tue, Dec 08, 2015 at 01:30:50PM +0000, Dave Gordon wrote:
> Way back at the beginning of October, Chris Wilson suggested that cleaning
> up these macros by removing the redundant 'temp' might be worthwhile. So
> here's the patch.
> 
> There's one more thing that might be cleaned up here (but for which
> I don't have a patch yet), which is that gen8_for_each_pdpe() still
> references a non-parameter value named 'dev'. Bizarrely, this apparent
> nonlocal reference is only used as the parameter to another macro -- which
> then doesn't actually use it! Suggestions on how to tidy this up welcome!

Iirc this is fallout from how we originally handled full ppgtt and
aliasing ppgtt. Old versions tried to transparently fall back to simpler
code, nowadays we just declare the gpu wedged. That's why there's no need
to look at dev any more. I think you can just remove that.
-Daniel

> 
> One possibility was to define a common iterator (__gen8_for_each())
> and then use it to create all the three level-specific macros, which
> meant that the loop bounds were passed in rather than being part of the
> iterator. But the common macro needed a LOT of parameters, and didn't
> really look like an improvement. So I've gone with this simple rewrite
> for now.
> 
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 9fbb07d..a216397 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -394,7 +394,8 @@  struct i915_hw_ppgtt {
  */
 #define gen6_for_each_pde(pt, pd, start, length, temp, iter) \
 	for (iter = gen6_pde_index(start); \
-	     pt = (pd)->page_table[iter], length > 0 && iter < I915_PDES; \
+	     length > 0 && iter < I915_PDES ? \
+			(pt = (pd)->page_table[iter]), 1 : 0; \
 	     iter++, \
 	     temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT) - start, \
 	     temp = min_t(unsigned, temp, length), \
@@ -459,7 +460,8 @@  static inline uint32_t gen6_pde_index(uint32_t addr)
  */
 #define gen8_for_each_pde(pt, pd, start, length, temp, iter)		\
 	for (iter = gen8_pde_index(start); \
-	     pt = (pd)->page_table[iter], length > 0 && iter < I915_PDES;	\
+	     length > 0 && iter < I915_PDES ? \
+			(pt = (pd)->page_table[iter]), 1 : 0; \
 	     iter++,				\
 	     temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT) - start,	\
 	     temp = min(temp, length),					\
@@ -467,8 +469,8 @@  static inline uint32_t gen6_pde_index(uint32_t addr)
 
 #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)	\
 	for (iter = gen8_pdpe_index(start); \
-	     pd = (pdp)->page_directory[iter], \
-	     length > 0 && (iter < I915_PDPES_PER_PDP(dev)); \
+	     length > 0 && (iter < I915_PDPES_PER_PDP(dev)) ? \
+			(pd = (pdp)->page_directory[iter]), 1 : 0; \
 	     iter++,				\
 	     temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start,	\
 	     temp = min(temp, length),					\
@@ -476,8 +478,8 @@  static inline uint32_t gen6_pde_index(uint32_t addr)
 
 #define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)	\
 	for (iter = gen8_pml4e_index(start);	\
-	     pdp = (pml4)->pdps[iter], \
-	     length > 0 && iter < GEN8_PML4ES_PER_PML4; \
+	     length > 0 && iter < GEN8_PML4ES_PER_PML4 ? \
+			(pdp = (pml4)->pdps[iter]), 1 : 0; \
 	     iter++,				\
 	     temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,	\
 	     temp = min(temp, length),					\