diff mbox

[12/13] drm/i915/skl: Replace the HDMI DPLL divider computation algorithm

Message ID 1431020329-11414-13-git-send-email-damien.lespiau@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lespiau, Damien May 7, 2015, 5:38 p.m. UTC
The HW validation team came back from further testing with a slightly
changed constraint on the deviation between the DCO frequency and the
central frequency. Instead of +-4%, it's now +1%/-6%.

Unfortunately, the previous algorithm didn't quite cope with these new
constraints, the reason being that it wasn't thorough enough looking at
the possible divider candidates.

The new algorithm looks at all dividers, which is definitely a hammer
approach (we could reduce further the set of dividers to good ones as a
follow up, at the cost of a bit more complicated code). But, at least,
we can now satisfy the +1%/+6% rule for all the "Well known" HDMI
frequencies of my test set (373 entries).

On that subject, the new code is quite extensively tested in
intel-gpu-tools (tools/skl_compute_wrpll).

Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
---
 drivers/gpu/drm/i915/intel_ddi.c | 207 +++++++++++++++++++++++++--------------
 1 file changed, 133 insertions(+), 74 deletions(-)

Comments

Paulo Zanoni May 27, 2015, 9:28 p.m. UTC | #1
2015-05-07 14:38 GMT-03:00 Damien Lespiau <damien.lespiau@intel.com>:
> The HW validation team came back from further testing with a slightly
> changed constraint on the deviation between the DCO frequency and the
> central frequency. Instead of +-4%, it's now +1%/-6%.
>
> Unfortunately, the previous algorithm didn't quite cope with these new
> constraints, the reason being that it wasn't thorough enough looking at
> the possible divider candidates.
>
> The new algorithm looks at all dividers, which is definitely a hammer
> approach (we could reduce further the set of dividers to good ones as a
> follow up, at the cost of a bit more complicated code). But, at least,
> we can now satisfy the +1%/+6% rule for all the "Well known" HDMI
> frequencies of my test set (373 entries).
>
> On that subject, the new code is quite extensively tested in
> intel-gpu-tools (tools/skl_compute_wrpll).
>
> Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_ddi.c | 207 +++++++++++++++++++++++++--------------
>  1 file changed, 133 insertions(+), 74 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
> index a99fee1..381a8c9 100644
> --- a/drivers/gpu/drm/i915/intel_ddi.c
> +++ b/drivers/gpu/drm/i915/intel_ddi.c
> @@ -1100,6 +1100,99 @@ hsw_ddi_pll_select(struct intel_crtc *intel_crtc,
>         return true;
>  }
>
> +struct skl_wrpll_context {
> +       uint32_t min_pdeviation;        /* record the minimum deviations to */
> +       uint32_t min_ndeviation;        /* compare candidates */
> +       uint64_t central_freq;          /* chosen central freq */
> +       uint64_t dco_freq;              /* chosen dco freq */
> +       unsigned int p;                 /* chosen divider */
> +};
> +
> +static void skl_wrpll_context_init(struct skl_wrpll_context *ctx)
> +{
> +       memset(ctx, 0, sizeof(*ctx));
> +
> +       /* DCO freq must be within +1%/-6%  of the DCO central freq */
> +       ctx->min_pdeviation = 100;
> +       ctx-> min_ndeviation = 600;

Since this is called only once, you could just have initialized the
struct by the usual way...

Also, there's a white space on the "ctx-> min_ndeviation" part.

> +}
> +
> +static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx,
> +                                 uint64_t central_freq,
> +                                 uint64_t dco_freq,
> +                                 unsigned int divider)
> +{
> +       uint64_t deviation;
> +
> +       deviation = div64_u64(10000 * abs_diff(dco_freq, central_freq),
> +                             central_freq);
> +
> +       /* positive deviation */
> +       if (dco_freq >= central_freq) {
> +               if (deviation < ctx->min_pdeviation) {
> +                       ctx->min_pdeviation = deviation;
> +                       ctx->central_freq = central_freq;
> +                       ctx->dco_freq = dco_freq;
> +                       ctx->p = divider;
> +               }
> +       /* negative deviation */
> +       } else if (deviation < ctx->min_ndeviation) {
> +               ctx->min_ndeviation = deviation;
> +               ctx->central_freq = central_freq;
> +               ctx->dco_freq = dco_freq;
> +               ctx->p = divider;
> +       }
> +}
> +
> +static void skl_wrpll_get_multipliers(unsigned int p,
> +                                     unsigned int *p0 /* out */,
> +                                     unsigned int *p1 /* out */,
> +                                     unsigned int *p2 /* out */)
> +{
> +       /* even dividers */
> +       if (p % 2 == 0) {
> +               unsigned int half = p / 2;
> +
> +               if (half == 1 || half == 2 || half == 3 || half == 5) {
> +                       *p0 = 2;
> +                       *p1 = 1;
> +                       *p2 = half;
> +               } else if (half % 2 == 0) {
> +                       *p0 = 2;
> +                       *p1 = half / 2;
> +                       *p2 = 2;
> +               } else if (half % 3 == 0) {
> +                       *p0 = 3;
> +                       *p1 = half / 3;
> +                       *p2 = 2;
> +               } else if (half % 7 == 0) {
> +                       *p0 = 7;
> +                       *p1 = half / 7;
> +                       *p2 = 2;
> +               }
> +       } else if (p == 3 || p == 9) {  /* 3, 5, 7, 9, 15, 21, 35 */
> +               *p0 = 3;
> +               *p1 = 1;
> +               *p2 = p / 3;
> +       } else if (p == 5 || p == 7) {
> +               *p0 = p;
> +               *p1 = 1;
> +               *p2 = 1;
> +       } else if (p == 15) {
> +               *p0 = 3;
> +               *p1 = 1;
> +               *p2 = 5;
> +       } else if (p == 21) {
> +               *p0 = 7;
> +               *p1 = 1;
> +               *p2 = 3;
> +       } else if (p == 35) {
> +               *p0 = 7;
> +               *p1 = 1;
> +               *p2 = 5;
> +       }
> +}
> +
>  struct skl_wrpll_params {
>         uint32_t        dco_fraction;
>         uint32_t        dco_integer;
> @@ -1185,90 +1278,56 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */,
>         uint64_t dco_central_freq[3] = {8400000000ULL,
>                                         9000000000ULL,
>                                         9600000000ULL};
> -       uint32_t min_dco_deviation = 400;
> -       uint32_t min_dco_index = 3;
> -       uint32_t P0[4] = {1, 2, 3, 7};
> -       uint32_t P2[4] = {1, 2, 3, 5};
> -       bool found = false;
> -       uint32_t candidate_p = 0;
> -       uint32_t candidate_p0[3] = {0}, candidate_p1[3] = {0};
> -       uint32_t candidate_p2[3] = {0};
> -       uint32_t dco_central_freq_deviation[3];
> -       uint32_t i, P1, k, dco_count;
> -       bool retry_with_odd = false;
> -
> -       /* Determine P0, P1 or P2 */
> -       for (dco_count = 0; dco_count < 3; dco_count++) {
> -               found = false;
> -               candidate_p =
> -                       div64_u64(dco_central_freq[dco_count], afe_clock);
> -               if (retry_with_odd == false)
> -                       candidate_p = (candidate_p % 2 == 0 ?
> -                               candidate_p : candidate_p + 1);
> -
> -               for (P1 = 1; P1 < candidate_p; P1++) {
> -                       for (i = 0; i < 4; i++) {
> -                               if (!(P0[i] != 1 || P1 == 1))
> -                                       continue;
> -
> -                               for (k = 0; k < 4; k++) {
> -                                       if (P1 != 1 && P2[k] != 2)
> -                                               continue;
> -
> -                                       if (candidate_p == P0[i] * P1 * P2[k]) {
> -                                               /* Found possible P0, P1, P2 */
> -                                               found = true;
> -                                               candidate_p0[dco_count] = P0[i];
> -                                               candidate_p1[dco_count] = P1;
> -                                               candidate_p2[dco_count] = P2[k];
> -                                               goto found;
> -                                       }
> -
> -                               }
> +       static const int even_dividers[] = {  4,  6,  8, 10, 12, 14, 16, 18, 20,
> +                                            24, 28, 30, 32, 36, 40, 42, 44,
> +                                            48, 52, 54, 56, 60, 64, 66, 68,
> +                                            70, 72, 76, 78, 80, 84, 88, 90,
> +                                            92, 96, 98 };
> +       static const int odd_dividers[] = { 3, 5, 7, 9, 15, 21, 35 };
> +       static const struct {
> +               const int *list;
> +               int n_dividers;
> +       } dividers[] = {
> +               { even_dividers, ARRAY_SIZE(even_dividers) },
> +               { odd_dividers, ARRAY_SIZE(odd_dividers) },
> +       };
> +       struct skl_wrpll_context ctx;
> +       unsigned int dco, d, i;
> +       unsigned int p0, p1, p2;
> +
> +       skl_wrpll_context_init(&ctx);
> +
> +       for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) {
> +               for (d = 0; d < ARRAY_SIZE(dividers); d++) {

From what I can tell by reading the documentation, it really seems
that the two lines above should be inverted. First we do everything
for the even (including all DCOs), and only then for the odd. It's
easier to realize this when you look at the part of the doc that says,
in order: "next candidate divider \n next DCO central frequency \n
next divider parity". Anyway, this is not exactly a problem right now
since we never break the loop, but you change this on the next patch,
so it's probably best to discuss this here.

> +                       for (i = 0; i < dividers[d].n_dividers; i++) {
> +                               unsigned int p = dividers[d].list[i];
> +                               uint64_t dco_freq = p * afe_clock;
> +
> +                               skl_wrpll_try_divider(&ctx,
> +                                                     dco_central_freq[dco],
> +                                                     dco_freq,
> +                                                     p);
>                         }
>                 }
> -
> -found:
> -               if (found) {
> -                       dco_central_freq_deviation[dco_count] =
> -                               div64_u64(10000 *
> -                                         abs_diff(candidate_p * afe_clock,
> -                                                  dco_central_freq[dco_count]),
> -                                         dco_central_freq[dco_count]);
> -
> -                       if (dco_central_freq_deviation[dco_count] <
> -                               min_dco_deviation) {
> -                               min_dco_deviation =
> -                                       dco_central_freq_deviation[dco_count];
> -                               min_dco_index = dco_count;
> -                       }
> -               }
> -
> -               if (min_dco_index > 2 && dco_count == 2) {
> -                        /* oh well, we tried... */
> -                        if (retry_with_odd)
> -                                break;
> -
> -                       retry_with_odd = true;
> -                       dco_count = 0;
> -               }
>         }
>
> -       if (WARN(min_dco_index > 2,
> -                "No valid parameters found for pixel clock: %dHz\n", clock))
> +       if (!ctx.p) {
> +               DRM_DEBUG_DRIVER("No valid divider found for %dHz\n", clock);
>                 return false;
> +       }
>
> -       skl_wrpll_params_populate(wrpll_params,
> -                                 afe_clock,
> -                                 dco_central_freq[min_dco_index],
> -                                 candidate_p0[min_dco_index],
> -                                 candidate_p1[min_dco_index],
> -                                 candidate_p2[min_dco_index]);
> +       /*
> +        * gcc incorrectly analyses that these can be used without being
> +        * initialized. To be fair, it's hard to guess.

Why didn't you just write the mathematical proof as a comment? I hope
it's not too big to fit on the 80 column margin limit.

Jokes aside, the only "blocker" would be the DCO first vs odd/even
first. I'd like to hear your interpretation on this.

> +        */
> +       p0 = p1 = p2 = 0;
> +       skl_wrpll_get_multipliers(ctx.p, &p0, &p1, &p2);
> +       skl_wrpll_params_populate(wrpll_params, afe_clock, ctx.central_freq,
> +                                 p0, p1, p2);
>
>         return true;
>  }
>
> -
>  static bool
>  skl_ddi_pll_select(struct intel_crtc *intel_crtc,
>                    struct intel_crtc_state *crtc_state,
> --
> 2.1.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Paulo Zanoni May 27, 2015, 9:51 p.m. UTC | #2
2015-05-27 18:28 GMT-03:00 Paulo Zanoni <przanoni@gmail.com>:
> 2015-05-07 14:38 GMT-03:00 Damien Lespiau <damien.lespiau@intel.com>:
>> The HW validation team came back from further testing with a slightly
>> changed constraint on the deviation between the DCO frequency and the
>> central frequency. Instead of +-4%, it's now +1%/-6%.
>>
>> Unfortunately, the previous algorithm didn't quite cope with these new
>> constraints, the reason being that it wasn't thorough enough looking at
>> the possible divider candidates.
>>
>> The new algorithm looks at all dividers, which is definitely a hammer
>> approach (we could reduce further the set of dividers to good ones as a
>> follow up, at the cost of a bit more complicated code). But, at least,
>> we can now satisfy the +1%/+6% rule for all the "Well known" HDMI
>> frequencies of my test set (373 entries).
>>
>> On that subject, the new code is quite extensively tested in
>> intel-gpu-tools (tools/skl_compute_wrpll).
>>
>> Signed-off-by: Damien Lespiau <damien.lespiau@intel.com>
>> ---
>>  drivers/gpu/drm/i915/intel_ddi.c | 207 +++++++++++++++++++++++++--------------
>>  1 file changed, 133 insertions(+), 74 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
>> index a99fee1..381a8c9 100644
>> --- a/drivers/gpu/drm/i915/intel_ddi.c
>> +++ b/drivers/gpu/drm/i915/intel_ddi.c
>> @@ -1100,6 +1100,99 @@ hsw_ddi_pll_select(struct intel_crtc *intel_crtc,
>>         return true;
>>  }
>>
>> +struct skl_wrpll_context {
>> +       uint32_t min_pdeviation;        /* record the minimum deviations to */
>> +       uint32_t min_ndeviation;        /* compare candidates */
>> +       uint64_t central_freq;          /* chosen central freq */
>> +       uint64_t dco_freq;              /* chosen dco freq */
>> +       unsigned int p;                 /* chosen divider */
>> +};
>> +
>> +static void skl_wrpll_context_init(struct skl_wrpll_context *ctx)
>> +{
>> +       memset(ctx, 0, sizeof(*ctx));
>> +
>> +       /* DCO freq must be within +1%/-6%  of the DCO central freq */
>> +       ctx->min_pdeviation = 100;
>> +       ctx-> min_ndeviation = 600;
>
> Since this is called only once, you could just have initialized the
> struct by the usual way...
>
> Also, there's a white space on the "ctx-> min_ndeviation" part.
>
>> +}
>> +
>> +static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx,
>> +                                 uint64_t central_freq,
>> +                                 uint64_t dco_freq,
>> +                                 unsigned int divider)
>> +{
>> +       uint64_t deviation;
>> +
>> +       deviation = div64_u64(10000 * abs_diff(dco_freq, central_freq),
>> +                             central_freq);
>> +
>> +       /* positive deviation */
>> +       if (dco_freq >= central_freq) {
>> +               if (deviation < ctx->min_pdeviation) {
>> +                       ctx->min_pdeviation = deviation;
>> +                       ctx->central_freq = central_freq;
>> +                       ctx->dco_freq = dco_freq;
>> +                       ctx->p = divider;
>> +               }
>> +       /* negative deviation */
>> +       } else if (deviation < ctx->min_ndeviation) {
>> +               ctx->min_ndeviation = deviation;
>> +               ctx->central_freq = central_freq;
>> +               ctx->dco_freq = dco_freq;
>> +               ctx->p = divider;
>> +       }

Some additional comments:

Don't we want to break the loop in case we reach a point where any of
the deviations is zero?

Also notice that, due to the way we loop over the variables at the
main func, we will always pick the last deviation that was "improved"
(positive or negative), as opposed to the very minimal deviation. In
other words, if the last thing our algorithm did was move the
ndeviation from 600 to 599, we will pick this, even if, in previous
iterations, we moved the pdeviation from 100 to 1. Is this really what
we want? Maybe we want to compare min_pdeviation against
min_ndeviation before picking central_freq, dco_freq and p?

Also, if we loop over the *odd* dividers before looping over the
*even* divers, and change the comparisons to "<=" instead of just "<",
and don't "break the loop in case we reach 0 variation" for the odd
dividers, then our algorithm will give preference to even dividers in
case we find the same deviation on both odd and even dividers (in
other words, this will give you the implementation of the alternative
interpretation of the spec, which we're discussing on patch 13).

I know you probably don't have any of these answers, so I won't block
the patch review on the problems on this email. But I'd at least like
to know your opinions here.Maybe we could send some additional
questions to the spec writers.

>> +}
>> +
>> +static void skl_wrpll_get_multipliers(unsigned int p,
>> +                                     unsigned int *p0 /* out */,
>> +                                     unsigned int *p1 /* out */,
>> +                                     unsigned int *p2 /* out */)
>> +{
>> +       /* even dividers */
>> +       if (p % 2 == 0) {
>> +               unsigned int half = p / 2;
>> +
>> +               if (half == 1 || half == 2 || half == 3 || half == 5) {
>> +                       *p0 = 2;
>> +                       *p1 = 1;
>> +                       *p2 = half;
>> +               } else if (half % 2 == 0) {
>> +                       *p0 = 2;
>> +                       *p1 = half / 2;
>> +                       *p2 = 2;
>> +               } else if (half % 3 == 0) {
>> +                       *p0 = 3;
>> +                       *p1 = half / 3;
>> +                       *p2 = 2;
>> +               } else if (half % 7 == 0) {
>> +                       *p0 = 7;
>> +                       *p1 = half / 7;
>> +                       *p2 = 2;
>> +               }
>> +       } else if (p == 3 || p == 9) {  /* 3, 5, 7, 9, 15, 21, 35 */
>> +               *p0 = 3;
>> +               *p1 = 1;
>> +               *p2 = p / 3;
>> +       } else if (p == 5 || p == 7) {
>> +               *p0 = p;
>> +               *p1 = 1;
>> +               *p2 = 1;
>> +       } else if (p == 15) {
>> +               *p0 = 3;
>> +               *p1 = 1;
>> +               *p2 = 5;
>> +       } else if (p == 21) {
>> +               *p0 = 7;
>> +               *p1 = 1;
>> +               *p2 = 3;
>> +       } else if (p == 35) {
>> +               *p0 = 7;
>> +               *p1 = 1;
>> +               *p2 = 5;
>> +       }
>> +}
>> +
>>  struct skl_wrpll_params {
>>         uint32_t        dco_fraction;
>>         uint32_t        dco_integer;
>> @@ -1185,90 +1278,56 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */,
>>         uint64_t dco_central_freq[3] = {8400000000ULL,
>>                                         9000000000ULL,
>>                                         9600000000ULL};
>> -       uint32_t min_dco_deviation = 400;
>> -       uint32_t min_dco_index = 3;
>> -       uint32_t P0[4] = {1, 2, 3, 7};
>> -       uint32_t P2[4] = {1, 2, 3, 5};
>> -       bool found = false;
>> -       uint32_t candidate_p = 0;
>> -       uint32_t candidate_p0[3] = {0}, candidate_p1[3] = {0};
>> -       uint32_t candidate_p2[3] = {0};
>> -       uint32_t dco_central_freq_deviation[3];
>> -       uint32_t i, P1, k, dco_count;
>> -       bool retry_with_odd = false;
>> -
>> -       /* Determine P0, P1 or P2 */
>> -       for (dco_count = 0; dco_count < 3; dco_count++) {
>> -               found = false;
>> -               candidate_p =
>> -                       div64_u64(dco_central_freq[dco_count], afe_clock);
>> -               if (retry_with_odd == false)
>> -                       candidate_p = (candidate_p % 2 == 0 ?
>> -                               candidate_p : candidate_p + 1);
>> -
>> -               for (P1 = 1; P1 < candidate_p; P1++) {
>> -                       for (i = 0; i < 4; i++) {
>> -                               if (!(P0[i] != 1 || P1 == 1))
>> -                                       continue;
>> -
>> -                               for (k = 0; k < 4; k++) {
>> -                                       if (P1 != 1 && P2[k] != 2)
>> -                                               continue;
>> -
>> -                                       if (candidate_p == P0[i] * P1 * P2[k]) {
>> -                                               /* Found possible P0, P1, P2 */
>> -                                               found = true;
>> -                                               candidate_p0[dco_count] = P0[i];
>> -                                               candidate_p1[dco_count] = P1;
>> -                                               candidate_p2[dco_count] = P2[k];
>> -                                               goto found;
>> -                                       }
>> -
>> -                               }
>> +       static const int even_dividers[] = {  4,  6,  8, 10, 12, 14, 16, 18, 20,
>> +                                            24, 28, 30, 32, 36, 40, 42, 44,
>> +                                            48, 52, 54, 56, 60, 64, 66, 68,
>> +                                            70, 72, 76, 78, 80, 84, 88, 90,
>> +                                            92, 96, 98 };
>> +       static const int odd_dividers[] = { 3, 5, 7, 9, 15, 21, 35 };
>> +       static const struct {
>> +               const int *list;
>> +               int n_dividers;
>> +       } dividers[] = {
>> +               { even_dividers, ARRAY_SIZE(even_dividers) },
>> +               { odd_dividers, ARRAY_SIZE(odd_dividers) },
>> +       };
>> +       struct skl_wrpll_context ctx;
>> +       unsigned int dco, d, i;
>> +       unsigned int p0, p1, p2;
>> +
>> +       skl_wrpll_context_init(&ctx);
>> +
>> +       for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) {
>> +               for (d = 0; d < ARRAY_SIZE(dividers); d++) {
>
> From what I can tell by reading the documentation, it really seems
> that the two lines above should be inverted. First we do everything
> for the even (including all DCOs), and only then for the odd. It's
> easier to realize this when you look at the part of the doc that says,
> in order: "next candidate divider \n next DCO central frequency \n
> next divider parity". Anyway, this is not exactly a problem right now
> since we never break the loop, but you change this on the next patch,
> so it's probably best to discuss this here.
>
>> +                       for (i = 0; i < dividers[d].n_dividers; i++) {
>> +                               unsigned int p = dividers[d].list[i];
>> +                               uint64_t dco_freq = p * afe_clock;
>> +
>> +                               skl_wrpll_try_divider(&ctx,
>> +                                                     dco_central_freq[dco],
>> +                                                     dco_freq,
>> +                                                     p);
>>                         }
>>                 }
>> -
>> -found:
>> -               if (found) {
>> -                       dco_central_freq_deviation[dco_count] =
>> -                               div64_u64(10000 *
>> -                                         abs_diff(candidate_p * afe_clock,
>> -                                                  dco_central_freq[dco_count]),
>> -                                         dco_central_freq[dco_count]);
>> -
>> -                       if (dco_central_freq_deviation[dco_count] <
>> -                               min_dco_deviation) {
>> -                               min_dco_deviation =
>> -                                       dco_central_freq_deviation[dco_count];
>> -                               min_dco_index = dco_count;
>> -                       }
>> -               }
>> -
>> -               if (min_dco_index > 2 && dco_count == 2) {
>> -                        /* oh well, we tried... */
>> -                        if (retry_with_odd)
>> -                                break;
>> -
>> -                       retry_with_odd = true;
>> -                       dco_count = 0;
>> -               }
>>         }
>>
>> -       if (WARN(min_dco_index > 2,
>> -                "No valid parameters found for pixel clock: %dHz\n", clock))
>> +       if (!ctx.p) {
>> +               DRM_DEBUG_DRIVER("No valid divider found for %dHz\n", clock);
>>                 return false;
>> +       }
>>
>> -       skl_wrpll_params_populate(wrpll_params,
>> -                                 afe_clock,
>> -                                 dco_central_freq[min_dco_index],
>> -                                 candidate_p0[min_dco_index],
>> -                                 candidate_p1[min_dco_index],
>> -                                 candidate_p2[min_dco_index]);
>> +       /*
>> +        * gcc incorrectly analyses that these can be used without being
>> +        * initialized. To be fair, it's hard to guess.
>
> Why didn't you just write the mathematical proof as a comment? I hope
> it's not too big to fit on the 80 column margin limit.
>
> Jokes aside, the only "blocker" would be the DCO first vs odd/even
> first. I'd like to hear your interpretation on this.
>
>> +        */
>> +       p0 = p1 = p2 = 0;
>> +       skl_wrpll_get_multipliers(ctx.p, &p0, &p1, &p2);
>> +       skl_wrpll_params_populate(wrpll_params, afe_clock, ctx.central_freq,
>> +                                 p0, p1, p2);
>>
>>         return true;
>>  }
>>
>> -
>>  static bool
>>  skl_ddi_pll_select(struct intel_crtc *intel_crtc,
>>                    struct intel_crtc_state *crtc_state,
>> --
>> 2.1.0
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
>
>
> --
> Paulo Zanoni
Lespiau, Damien June 25, 2015, 10:21 a.m. UTC | #3
On Wed, May 27, 2015 at 06:28:06PM -0300, Paulo Zanoni wrote:
> > @@ -1185,90 +1278,56 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */,
> >         uint64_t dco_central_freq[3] = {8400000000ULL,
> >                                         9000000000ULL,
> >                                         9600000000ULL};
> > +       static const int even_dividers[] = {  4,  6,  8, 10, 12, 14, 16, 18, 20,
> > +                                            24, 28, 30, 32, 36, 40, 42, 44,
> > +                                            48, 52, 54, 56, 60, 64, 66, 68,
> > +                                            70, 72, 76, 78, 80, 84, 88, 90,
> > +                                            92, 96, 98 };
> > +       static const int odd_dividers[] = { 3, 5, 7, 9, 15, 21, 35 };
> > +       static const struct {
> > +               const int *list;
> > +               int n_dividers;
> > +       } dividers[] = {
> > +               { even_dividers, ARRAY_SIZE(even_dividers) },
> > +               { odd_dividers, ARRAY_SIZE(odd_dividers) },
> > +       };
> > +       struct skl_wrpll_context ctx;
> > +       unsigned int dco, d, i;
> > +       unsigned int p0, p1, p2;
> > +
> > +       skl_wrpll_context_init(&ctx);
> > +
> > +       for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) {
> > +               for (d = 0; d < ARRAY_SIZE(dividers); d++) {
> 
> From what I can tell by reading the documentation, it really seems
> that the two lines above should be inverted. First we do everything
> for the even (including all DCOs), and only then for the odd. It's
> easier to realize this when you look at the part of the doc that says,
> in order: "next candidate divider \n next DCO central frequency \n
> next divider parity". Anyway, this is not exactly a problem right now
> since we never break the loop, but you change this on the next patch,
> so it's probably best to discuss this here.

You're absolutely right, those 2 lines need to be inverted.

> > +       /*
> > +        * gcc incorrectly analyses that these can be used without being
> > +        * initialized. To be fair, it's hard to guess.
> 
> Why didn't you just write the mathematical proof as a comment? I hope
> it's not too big to fit on the 80 column margin limit.
> 
> Jokes aside, the only "blocker" would be the DCO first vs odd/even
> first. I'd like to hear your interpretation on this.
> 
> > +        */
> > +       p0 = p1 = p2 = 0;
> > +       skl_wrpll_get_multipliers(ctx.p, &p0, &p1, &p2);
> > +       skl_wrpll_params_populate(wrpll_params, afe_clock, ctx.central_freq,
> > +                                 p0, p1, p2);

The "proof" is in tools/skl_compute_wrpll.c, in the test_multipliers()
function that unit-tests skl_wrpll_get_multipliers().
Lespiau, Damien June 25, 2015, 3 p.m. UTC | #4
On Wed, May 27, 2015 at 06:51:13PM -0300, Paulo Zanoni wrote:
> >> +static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx,
> >> +                                 uint64_t central_freq,
> >> +                                 uint64_t dco_freq,
> >> +                                 unsigned int divider)
> >> +{
> >> +       uint64_t deviation;
> >> +
> >> +       deviation = div64_u64(10000 * abs_diff(dco_freq, central_freq),
> >> +                             central_freq);
> >> +
> >> +       /* positive deviation */
> >> +       if (dco_freq >= central_freq) {
> >> +               if (deviation < ctx->min_pdeviation) {
> >> +                       ctx->min_pdeviation = deviation;
> >> +                       ctx->central_freq = central_freq;
> >> +                       ctx->dco_freq = dco_freq;
> >> +                       ctx->p = divider;
> >> +               }
> >> +       /* negative deviation */
> >> +       } else if (deviation < ctx->min_ndeviation) {
> >> +               ctx->min_ndeviation = deviation;
> >> +               ctx->central_freq = central_freq;
> >> +               ctx->dco_freq = dco_freq;
> >> +               ctx->p = divider;
> >> +       }
> 
> Some additional comments:
> 
> Don't we want to break the loop in case we reach a point where any of
> the deviations is zero?

We could, haven't done it in the v2 of the patch, could be done as a
follow up.

> Also notice that, due to the way we loop over the variables at the
> main func, we will always pick the last deviation that was "improved"
> (positive or negative), as opposed to the very minimal deviation. In
> other words, if the last thing our algorithm did was move the
> ndeviation from 600 to 599, we will pick this, even if, in previous
> iterations, we moved the pdeviation from 100 to 1. Is this really what
> we want? Maybe we want to compare min_pdeviation against
> min_ndeviation before picking central_freq, dco_freq and p?

That seems to be a (pretty big!) deficiency of the documented algorithm,
that's definitely something improving the average deviation in the i-g-t
test (average deviation 206.52 Vs 194.47)

> Also, if we loop over the *odd* dividers before looping over the
> *even* divers, and change the comparisons to "<=" instead of just "<",
> and don't "break the loop in case we reach 0 variation" for the odd
> dividers, then our algorithm will give preference to even dividers in
> case we find the same deviation on both odd and even dividers (in
> other words, this will give you the implementation of the alternative
> interpretation of the spec, which we're discussing on patch 13).

But then it would not use an even divider that has a slightly worse
deviation. My reading of the specs is:

  - If there's an even divider with the deviation fitting in the +1%/-6%
    constraint, choose it. (if there are several such even dividers,
    choose the one minimizing the deviation)
  - If we can't find an even divider within +1%/-6%, settle for an odd
    divider that satisfies those constraints.

> I know you probably don't have any of these answers, so I won't block
> the patch review on the problems on this email. But I'd at least like
> to know your opinions here.Maybe we could send some additional
> questions to the spec writers.

I do have some answers :) at least assuming the interpreation above is
the correct one. The two big suggestions you have make quite a
difference in the 2 metrics I gather in the i-g-t test:

v1 of this series:
  even/odd dividers: 338/35
  average deviation: 206.52

v2 of this series:
  even/odd dividers: 363/10
  average deviation: 194.47

It's also easier to follow the changes as diffs in the corresponding
i-g-t series than the v2 of the kernel patches.

testdisplay still cycles through the modes on the HDMI displays I have.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index a99fee1..381a8c9 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -1100,6 +1100,99 @@  hsw_ddi_pll_select(struct intel_crtc *intel_crtc,
 	return true;
 }
 
+struct skl_wrpll_context {
+	uint32_t min_pdeviation;	/* record the minimum deviations to */
+	uint32_t min_ndeviation;	/* compare candidates */
+	uint64_t central_freq;		/* chosen central freq */
+	uint64_t dco_freq;		/* chosen dco freq */
+	unsigned int p;			/* chosen divider */
+};
+
+static void skl_wrpll_context_init(struct skl_wrpll_context *ctx)
+{
+	memset(ctx, 0, sizeof(*ctx));
+
+	/* DCO freq must be within +1%/-6%  of the DCO central freq */
+	ctx->min_pdeviation = 100;
+	ctx-> min_ndeviation = 600;
+}
+
+static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx,
+				  uint64_t central_freq,
+				  uint64_t dco_freq,
+				  unsigned int divider)
+{
+	uint64_t deviation;
+
+	deviation = div64_u64(10000 * abs_diff(dco_freq, central_freq),
+			      central_freq);
+
+	/* positive deviation */
+	if (dco_freq >= central_freq) {
+		if (deviation < ctx->min_pdeviation) {
+			ctx->min_pdeviation = deviation;
+			ctx->central_freq = central_freq;
+			ctx->dco_freq = dco_freq;
+			ctx->p = divider;
+		}
+	/* negative deviation */
+	} else if (deviation < ctx->min_ndeviation) {
+		ctx->min_ndeviation = deviation;
+		ctx->central_freq = central_freq;
+		ctx->dco_freq = dco_freq;
+		ctx->p = divider;
+	}
+}
+
+static void skl_wrpll_get_multipliers(unsigned int p,
+				      unsigned int *p0 /* out */,
+				      unsigned int *p1 /* out */,
+				      unsigned int *p2 /* out */)
+{
+	/* even dividers */
+	if (p % 2 == 0) {
+		unsigned int half = p / 2;
+
+		if (half == 1 || half == 2 || half == 3 || half == 5) {
+			*p0 = 2;
+			*p1 = 1;
+			*p2 = half;
+		} else if (half % 2 == 0) {
+			*p0 = 2;
+			*p1 = half / 2;
+			*p2 = 2;
+		} else if (half % 3 == 0) {
+			*p0 = 3;
+			*p1 = half / 3;
+			*p2 = 2;
+		} else if (half % 7 == 0) {
+			*p0 = 7;
+			*p1 = half / 7;
+			*p2 = 2;
+		}
+	} else if (p == 3 || p == 9) {  /* 3, 5, 7, 9, 15, 21, 35 */
+		*p0 = 3;
+		*p1 = 1;
+		*p2 = p / 3;
+	} else if (p == 5 || p == 7) {
+		*p0 = p;
+		*p1 = 1;
+		*p2 = 1;
+	} else if (p == 15) {
+		*p0 = 3;
+		*p1 = 1;
+		*p2 = 5;
+	} else if (p == 21) {
+		*p0 = 7;
+		*p1 = 1;
+		*p2 = 3;
+	} else if (p == 35) {
+		*p0 = 7;
+		*p1 = 1;
+		*p2 = 5;
+	}
+}
+
 struct skl_wrpll_params {
 	uint32_t        dco_fraction;
 	uint32_t        dco_integer;
@@ -1185,90 +1278,56 @@  skl_ddi_calculate_wrpll(int clock /* in Hz */,
 	uint64_t dco_central_freq[3] = {8400000000ULL,
 					9000000000ULL,
 					9600000000ULL};
-	uint32_t min_dco_deviation = 400;
-	uint32_t min_dco_index = 3;
-	uint32_t P0[4] = {1, 2, 3, 7};
-	uint32_t P2[4] = {1, 2, 3, 5};
-	bool found = false;
-	uint32_t candidate_p = 0;
-	uint32_t candidate_p0[3] = {0}, candidate_p1[3] = {0};
-	uint32_t candidate_p2[3] = {0};
-	uint32_t dco_central_freq_deviation[3];
-	uint32_t i, P1, k, dco_count;
-	bool retry_with_odd = false;
-
-	/* Determine P0, P1 or P2 */
-	for (dco_count = 0; dco_count < 3; dco_count++) {
-		found = false;
-		candidate_p =
-			div64_u64(dco_central_freq[dco_count], afe_clock);
-		if (retry_with_odd == false)
-			candidate_p = (candidate_p % 2 == 0 ?
-				candidate_p : candidate_p + 1);
-
-		for (P1 = 1; P1 < candidate_p; P1++) {
-			for (i = 0; i < 4; i++) {
-				if (!(P0[i] != 1 || P1 == 1))
-					continue;
-
-				for (k = 0; k < 4; k++) {
-					if (P1 != 1 && P2[k] != 2)
-						continue;
-
-					if (candidate_p == P0[i] * P1 * P2[k]) {
-						/* Found possible P0, P1, P2 */
-						found = true;
-						candidate_p0[dco_count] = P0[i];
-						candidate_p1[dco_count] = P1;
-						candidate_p2[dco_count] = P2[k];
-						goto found;
-					}
-
-				}
+	static const int even_dividers[] = {  4,  6,  8, 10, 12, 14, 16, 18, 20,
+					     24, 28, 30, 32, 36, 40, 42, 44,
+					     48, 52, 54, 56, 60, 64, 66, 68,
+					     70, 72, 76, 78, 80, 84, 88, 90,
+					     92, 96, 98 };
+	static const int odd_dividers[] = { 3, 5, 7, 9, 15, 21, 35 };
+	static const struct {
+		const int *list;
+		int n_dividers;
+	} dividers[] = {
+		{ even_dividers, ARRAY_SIZE(even_dividers) },
+		{ odd_dividers, ARRAY_SIZE(odd_dividers) },
+	};
+	struct skl_wrpll_context ctx;
+	unsigned int dco, d, i;
+	unsigned int p0, p1, p2;
+
+	skl_wrpll_context_init(&ctx);
+
+	for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) {
+		for (d = 0; d < ARRAY_SIZE(dividers); d++) {
+			for (i = 0; i < dividers[d].n_dividers; i++) {
+				unsigned int p = dividers[d].list[i];
+				uint64_t dco_freq = p * afe_clock;
+
+				skl_wrpll_try_divider(&ctx,
+						      dco_central_freq[dco],
+						      dco_freq,
+						      p);
 			}
 		}
-
-found:
-		if (found) {
-			dco_central_freq_deviation[dco_count] =
-				div64_u64(10000 *
-					  abs_diff(candidate_p * afe_clock,
-						   dco_central_freq[dco_count]),
-					  dco_central_freq[dco_count]);
-
-			if (dco_central_freq_deviation[dco_count] <
-				min_dco_deviation) {
-				min_dco_deviation =
-					dco_central_freq_deviation[dco_count];
-				min_dco_index = dco_count;
-			}
-		}
-
-		if (min_dco_index > 2 && dco_count == 2) {
-                        /* oh well, we tried... */
-                        if (retry_with_odd)
-                                break;
-
-			retry_with_odd = true;
-			dco_count = 0;
-		}
 	}
 
-	if (WARN(min_dco_index > 2,
-		 "No valid parameters found for pixel clock: %dHz\n", clock))
+	if (!ctx.p) {
+		DRM_DEBUG_DRIVER("No valid divider found for %dHz\n", clock);
 		return false;
+	}
 
-	skl_wrpll_params_populate(wrpll_params,
-				  afe_clock,
-				  dco_central_freq[min_dco_index],
-				  candidate_p0[min_dco_index],
-				  candidate_p1[min_dco_index],
-				  candidate_p2[min_dco_index]);
+	/*
+	 * gcc incorrectly analyses that these can be used without being
+	 * initialized. To be fair, it's hard to guess.
+	 */
+	p0 = p1 = p2 = 0;
+	skl_wrpll_get_multipliers(ctx.p, &p0, &p1, &p2);
+	skl_wrpll_params_populate(wrpll_params, afe_clock, ctx.central_freq,
+				  p0, p1, p2);
 
 	return true;
 }
 
-
 static bool
 skl_ddi_pll_select(struct intel_crtc *intel_crtc,
 		   struct intel_crtc_state *crtc_state,