Message ID | 1431020329-11414-13-git-send-email-damien.lespiau@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
2015-05-07 14:38 GMT-03:00 Damien Lespiau <damien.lespiau@intel.com>: > The HW validation team came back from further testing with a slightly > changed constraint on the deviation between the DCO frequency and the > central frequency. Instead of +-4%, it's now +1%/-6%. > > Unfortunately, the previous algorithm didn't quite cope with these new > constraints, the reason being that it wasn't thorough enough looking at > the possible divider candidates. > > The new algorithm looks at all dividers, which is definitely a hammer > approach (we could reduce further the set of dividers to good ones as a > follow up, at the cost of a bit more complicated code). But, at least, > we can now satisfy the +1%/+6% rule for all the "Well known" HDMI > frequencies of my test set (373 entries). > > On that subject, the new code is quite extensively tested in > intel-gpu-tools (tools/skl_compute_wrpll). > > Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> > --- > drivers/gpu/drm/i915/intel_ddi.c | 207 +++++++++++++++++++++++++-------------- > 1 file changed, 133 insertions(+), 74 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c > index a99fee1..381a8c9 100644 > --- a/drivers/gpu/drm/i915/intel_ddi.c > +++ b/drivers/gpu/drm/i915/intel_ddi.c > @@ -1100,6 +1100,99 @@ hsw_ddi_pll_select(struct intel_crtc *intel_crtc, > return true; > } > > +struct skl_wrpll_context { > + uint32_t min_pdeviation; /* record the minimum deviations to */ > + uint32_t min_ndeviation; /* compare candidates */ > + uint64_t central_freq; /* chosen central freq */ > + uint64_t dco_freq; /* chosen dco freq */ > + unsigned int p; /* chosen divider */ > +}; > + > +static void skl_wrpll_context_init(struct skl_wrpll_context *ctx) > +{ > + memset(ctx, 0, sizeof(*ctx)); > + > + /* DCO freq must be within +1%/-6% of the DCO central freq */ > + ctx->min_pdeviation = 100; > + ctx-> min_ndeviation = 600; Since this is called only once, you could just have initialized the struct by the usual way... Also, there's a white space on the "ctx-> min_ndeviation" part. > +} > + > +static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx, > + uint64_t central_freq, > + uint64_t dco_freq, > + unsigned int divider) > +{ > + uint64_t deviation; > + > + deviation = div64_u64(10000 * abs_diff(dco_freq, central_freq), > + central_freq); > + > + /* positive deviation */ > + if (dco_freq >= central_freq) { > + if (deviation < ctx->min_pdeviation) { > + ctx->min_pdeviation = deviation; > + ctx->central_freq = central_freq; > + ctx->dco_freq = dco_freq; > + ctx->p = divider; > + } > + /* negative deviation */ > + } else if (deviation < ctx->min_ndeviation) { > + ctx->min_ndeviation = deviation; > + ctx->central_freq = central_freq; > + ctx->dco_freq = dco_freq; > + ctx->p = divider; > + } > +} > + > +static void skl_wrpll_get_multipliers(unsigned int p, > + unsigned int *p0 /* out */, > + unsigned int *p1 /* out */, > + unsigned int *p2 /* out */) > +{ > + /* even dividers */ > + if (p % 2 == 0) { > + unsigned int half = p / 2; > + > + if (half == 1 || half == 2 || half == 3 || half == 5) { > + *p0 = 2; > + *p1 = 1; > + *p2 = half; > + } else if (half % 2 == 0) { > + *p0 = 2; > + *p1 = half / 2; > + *p2 = 2; > + } else if (half % 3 == 0) { > + *p0 = 3; > + *p1 = half / 3; > + *p2 = 2; > + } else if (half % 7 == 0) { > + *p0 = 7; > + *p1 = half / 7; > + *p2 = 2; > + } > + } else if (p == 3 || p == 9) { /* 3, 5, 7, 9, 15, 21, 35 */ > + *p0 = 3; > + *p1 = 1; > + *p2 = p / 3; > + } else if (p == 5 || p == 7) { > + *p0 = p; > + *p1 = 1; > + *p2 = 1; > + } else if (p == 15) { > + *p0 = 3; > + *p1 = 1; > + *p2 = 5; > + } else if (p == 21) { > + *p0 = 7; > + *p1 = 1; > + *p2 = 3; > + } else if (p == 35) { > + *p0 = 7; > + *p1 = 1; > + *p2 = 5; > + } > +} > + > struct skl_wrpll_params { > uint32_t dco_fraction; > uint32_t dco_integer; > @@ -1185,90 +1278,56 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */, > uint64_t dco_central_freq[3] = {8400000000ULL, > 9000000000ULL, > 9600000000ULL}; > - uint32_t min_dco_deviation = 400; > - uint32_t min_dco_index = 3; > - uint32_t P0[4] = {1, 2, 3, 7}; > - uint32_t P2[4] = {1, 2, 3, 5}; > - bool found = false; > - uint32_t candidate_p = 0; > - uint32_t candidate_p0[3] = {0}, candidate_p1[3] = {0}; > - uint32_t candidate_p2[3] = {0}; > - uint32_t dco_central_freq_deviation[3]; > - uint32_t i, P1, k, dco_count; > - bool retry_with_odd = false; > - > - /* Determine P0, P1 or P2 */ > - for (dco_count = 0; dco_count < 3; dco_count++) { > - found = false; > - candidate_p = > - div64_u64(dco_central_freq[dco_count], afe_clock); > - if (retry_with_odd == false) > - candidate_p = (candidate_p % 2 == 0 ? > - candidate_p : candidate_p + 1); > - > - for (P1 = 1; P1 < candidate_p; P1++) { > - for (i = 0; i < 4; i++) { > - if (!(P0[i] != 1 || P1 == 1)) > - continue; > - > - for (k = 0; k < 4; k++) { > - if (P1 != 1 && P2[k] != 2) > - continue; > - > - if (candidate_p == P0[i] * P1 * P2[k]) { > - /* Found possible P0, P1, P2 */ > - found = true; > - candidate_p0[dco_count] = P0[i]; > - candidate_p1[dco_count] = P1; > - candidate_p2[dco_count] = P2[k]; > - goto found; > - } > - > - } > + static const int even_dividers[] = { 4, 6, 8, 10, 12, 14, 16, 18, 20, > + 24, 28, 30, 32, 36, 40, 42, 44, > + 48, 52, 54, 56, 60, 64, 66, 68, > + 70, 72, 76, 78, 80, 84, 88, 90, > + 92, 96, 98 }; > + static const int odd_dividers[] = { 3, 5, 7, 9, 15, 21, 35 }; > + static const struct { > + const int *list; > + int n_dividers; > + } dividers[] = { > + { even_dividers, ARRAY_SIZE(even_dividers) }, > + { odd_dividers, ARRAY_SIZE(odd_dividers) }, > + }; > + struct skl_wrpll_context ctx; > + unsigned int dco, d, i; > + unsigned int p0, p1, p2; > + > + skl_wrpll_context_init(&ctx); > + > + for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) { > + for (d = 0; d < ARRAY_SIZE(dividers); d++) { From what I can tell by reading the documentation, it really seems that the two lines above should be inverted. First we do everything for the even (including all DCOs), and only then for the odd. It's easier to realize this when you look at the part of the doc that says, in order: "next candidate divider \n next DCO central frequency \n next divider parity". Anyway, this is not exactly a problem right now since we never break the loop, but you change this on the next patch, so it's probably best to discuss this here. > + for (i = 0; i < dividers[d].n_dividers; i++) { > + unsigned int p = dividers[d].list[i]; > + uint64_t dco_freq = p * afe_clock; > + > + skl_wrpll_try_divider(&ctx, > + dco_central_freq[dco], > + dco_freq, > + p); > } > } > - > -found: > - if (found) { > - dco_central_freq_deviation[dco_count] = > - div64_u64(10000 * > - abs_diff(candidate_p * afe_clock, > - dco_central_freq[dco_count]), > - dco_central_freq[dco_count]); > - > - if (dco_central_freq_deviation[dco_count] < > - min_dco_deviation) { > - min_dco_deviation = > - dco_central_freq_deviation[dco_count]; > - min_dco_index = dco_count; > - } > - } > - > - if (min_dco_index > 2 && dco_count == 2) { > - /* oh well, we tried... */ > - if (retry_with_odd) > - break; > - > - retry_with_odd = true; > - dco_count = 0; > - } > } > > - if (WARN(min_dco_index > 2, > - "No valid parameters found for pixel clock: %dHz\n", clock)) > + if (!ctx.p) { > + DRM_DEBUG_DRIVER("No valid divider found for %dHz\n", clock); > return false; > + } > > - skl_wrpll_params_populate(wrpll_params, > - afe_clock, > - dco_central_freq[min_dco_index], > - candidate_p0[min_dco_index], > - candidate_p1[min_dco_index], > - candidate_p2[min_dco_index]); > + /* > + * gcc incorrectly analyses that these can be used without being > + * initialized. To be fair, it's hard to guess. Why didn't you just write the mathematical proof as a comment? I hope it's not too big to fit on the 80 column margin limit. Jokes aside, the only "blocker" would be the DCO first vs odd/even first. I'd like to hear your interpretation on this. > + */ > + p0 = p1 = p2 = 0; > + skl_wrpll_get_multipliers(ctx.p, &p0, &p1, &p2); > + skl_wrpll_params_populate(wrpll_params, afe_clock, ctx.central_freq, > + p0, p1, p2); > > return true; > } > > - > static bool > skl_ddi_pll_select(struct intel_crtc *intel_crtc, > struct intel_crtc_state *crtc_state, > -- > 2.1.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
2015-05-27 18:28 GMT-03:00 Paulo Zanoni <przanoni@gmail.com>: > 2015-05-07 14:38 GMT-03:00 Damien Lespiau <damien.lespiau@intel.com>: >> The HW validation team came back from further testing with a slightly >> changed constraint on the deviation between the DCO frequency and the >> central frequency. Instead of +-4%, it's now +1%/-6%. >> >> Unfortunately, the previous algorithm didn't quite cope with these new >> constraints, the reason being that it wasn't thorough enough looking at >> the possible divider candidates. >> >> The new algorithm looks at all dividers, which is definitely a hammer >> approach (we could reduce further the set of dividers to good ones as a >> follow up, at the cost of a bit more complicated code). But, at least, >> we can now satisfy the +1%/+6% rule for all the "Well known" HDMI >> frequencies of my test set (373 entries). >> >> On that subject, the new code is quite extensively tested in >> intel-gpu-tools (tools/skl_compute_wrpll). >> >> Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> >> --- >> drivers/gpu/drm/i915/intel_ddi.c | 207 +++++++++++++++++++++++++-------------- >> 1 file changed, 133 insertions(+), 74 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c >> index a99fee1..381a8c9 100644 >> --- a/drivers/gpu/drm/i915/intel_ddi.c >> +++ b/drivers/gpu/drm/i915/intel_ddi.c >> @@ -1100,6 +1100,99 @@ hsw_ddi_pll_select(struct intel_crtc *intel_crtc, >> return true; >> } >> >> +struct skl_wrpll_context { >> + uint32_t min_pdeviation; /* record the minimum deviations to */ >> + uint32_t min_ndeviation; /* compare candidates */ >> + uint64_t central_freq; /* chosen central freq */ >> + uint64_t dco_freq; /* chosen dco freq */ >> + unsigned int p; /* chosen divider */ >> +}; >> + >> +static void skl_wrpll_context_init(struct skl_wrpll_context *ctx) >> +{ >> + memset(ctx, 0, sizeof(*ctx)); >> + >> + /* DCO freq must be within +1%/-6% of the DCO central freq */ >> + ctx->min_pdeviation = 100; >> + ctx-> min_ndeviation = 600; > > Since this is called only once, you could just have initialized the > struct by the usual way... > > Also, there's a white space on the "ctx-> min_ndeviation" part. > >> +} >> + >> +static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx, >> + uint64_t central_freq, >> + uint64_t dco_freq, >> + unsigned int divider) >> +{ >> + uint64_t deviation; >> + >> + deviation = div64_u64(10000 * abs_diff(dco_freq, central_freq), >> + central_freq); >> + >> + /* positive deviation */ >> + if (dco_freq >= central_freq) { >> + if (deviation < ctx->min_pdeviation) { >> + ctx->min_pdeviation = deviation; >> + ctx->central_freq = central_freq; >> + ctx->dco_freq = dco_freq; >> + ctx->p = divider; >> + } >> + /* negative deviation */ >> + } else if (deviation < ctx->min_ndeviation) { >> + ctx->min_ndeviation = deviation; >> + ctx->central_freq = central_freq; >> + ctx->dco_freq = dco_freq; >> + ctx->p = divider; >> + } Some additional comments: Don't we want to break the loop in case we reach a point where any of the deviations is zero? Also notice that, due to the way we loop over the variables at the main func, we will always pick the last deviation that was "improved" (positive or negative), as opposed to the very minimal deviation. In other words, if the last thing our algorithm did was move the ndeviation from 600 to 599, we will pick this, even if, in previous iterations, we moved the pdeviation from 100 to 1. Is this really what we want? Maybe we want to compare min_pdeviation against min_ndeviation before picking central_freq, dco_freq and p? Also, if we loop over the *odd* dividers before looping over the *even* divers, and change the comparisons to "<=" instead of just "<", and don't "break the loop in case we reach 0 variation" for the odd dividers, then our algorithm will give preference to even dividers in case we find the same deviation on both odd and even dividers (in other words, this will give you the implementation of the alternative interpretation of the spec, which we're discussing on patch 13). I know you probably don't have any of these answers, so I won't block the patch review on the problems on this email. But I'd at least like to know your opinions here.Maybe we could send some additional questions to the spec writers. >> +} >> + >> +static void skl_wrpll_get_multipliers(unsigned int p, >> + unsigned int *p0 /* out */, >> + unsigned int *p1 /* out */, >> + unsigned int *p2 /* out */) >> +{ >> + /* even dividers */ >> + if (p % 2 == 0) { >> + unsigned int half = p / 2; >> + >> + if (half == 1 || half == 2 || half == 3 || half == 5) { >> + *p0 = 2; >> + *p1 = 1; >> + *p2 = half; >> + } else if (half % 2 == 0) { >> + *p0 = 2; >> + *p1 = half / 2; >> + *p2 = 2; >> + } else if (half % 3 == 0) { >> + *p0 = 3; >> + *p1 = half / 3; >> + *p2 = 2; >> + } else if (half % 7 == 0) { >> + *p0 = 7; >> + *p1 = half / 7; >> + *p2 = 2; >> + } >> + } else if (p == 3 || p == 9) { /* 3, 5, 7, 9, 15, 21, 35 */ >> + *p0 = 3; >> + *p1 = 1; >> + *p2 = p / 3; >> + } else if (p == 5 || p == 7) { >> + *p0 = p; >> + *p1 = 1; >> + *p2 = 1; >> + } else if (p == 15) { >> + *p0 = 3; >> + *p1 = 1; >> + *p2 = 5; >> + } else if (p == 21) { >> + *p0 = 7; >> + *p1 = 1; >> + *p2 = 3; >> + } else if (p == 35) { >> + *p0 = 7; >> + *p1 = 1; >> + *p2 = 5; >> + } >> +} >> + >> struct skl_wrpll_params { >> uint32_t dco_fraction; >> uint32_t dco_integer; >> @@ -1185,90 +1278,56 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */, >> uint64_t dco_central_freq[3] = {8400000000ULL, >> 9000000000ULL, >> 9600000000ULL}; >> - uint32_t min_dco_deviation = 400; >> - uint32_t min_dco_index = 3; >> - uint32_t P0[4] = {1, 2, 3, 7}; >> - uint32_t P2[4] = {1, 2, 3, 5}; >> - bool found = false; >> - uint32_t candidate_p = 0; >> - uint32_t candidate_p0[3] = {0}, candidate_p1[3] = {0}; >> - uint32_t candidate_p2[3] = {0}; >> - uint32_t dco_central_freq_deviation[3]; >> - uint32_t i, P1, k, dco_count; >> - bool retry_with_odd = false; >> - >> - /* Determine P0, P1 or P2 */ >> - for (dco_count = 0; dco_count < 3; dco_count++) { >> - found = false; >> - candidate_p = >> - div64_u64(dco_central_freq[dco_count], afe_clock); >> - if (retry_with_odd == false) >> - candidate_p = (candidate_p % 2 == 0 ? >> - candidate_p : candidate_p + 1); >> - >> - for (P1 = 1; P1 < candidate_p; P1++) { >> - for (i = 0; i < 4; i++) { >> - if (!(P0[i] != 1 || P1 == 1)) >> - continue; >> - >> - for (k = 0; k < 4; k++) { >> - if (P1 != 1 && P2[k] != 2) >> - continue; >> - >> - if (candidate_p == P0[i] * P1 * P2[k]) { >> - /* Found possible P0, P1, P2 */ >> - found = true; >> - candidate_p0[dco_count] = P0[i]; >> - candidate_p1[dco_count] = P1; >> - candidate_p2[dco_count] = P2[k]; >> - goto found; >> - } >> - >> - } >> + static const int even_dividers[] = { 4, 6, 8, 10, 12, 14, 16, 18, 20, >> + 24, 28, 30, 32, 36, 40, 42, 44, >> + 48, 52, 54, 56, 60, 64, 66, 68, >> + 70, 72, 76, 78, 80, 84, 88, 90, >> + 92, 96, 98 }; >> + static const int odd_dividers[] = { 3, 5, 7, 9, 15, 21, 35 }; >> + static const struct { >> + const int *list; >> + int n_dividers; >> + } dividers[] = { >> + { even_dividers, ARRAY_SIZE(even_dividers) }, >> + { odd_dividers, ARRAY_SIZE(odd_dividers) }, >> + }; >> + struct skl_wrpll_context ctx; >> + unsigned int dco, d, i; >> + unsigned int p0, p1, p2; >> + >> + skl_wrpll_context_init(&ctx); >> + >> + for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) { >> + for (d = 0; d < ARRAY_SIZE(dividers); d++) { > > From what I can tell by reading the documentation, it really seems > that the two lines above should be inverted. First we do everything > for the even (including all DCOs), and only then for the odd. It's > easier to realize this when you look at the part of the doc that says, > in order: "next candidate divider \n next DCO central frequency \n > next divider parity". Anyway, this is not exactly a problem right now > since we never break the loop, but you change this on the next patch, > so it's probably best to discuss this here. > >> + for (i = 0; i < dividers[d].n_dividers; i++) { >> + unsigned int p = dividers[d].list[i]; >> + uint64_t dco_freq = p * afe_clock; >> + >> + skl_wrpll_try_divider(&ctx, >> + dco_central_freq[dco], >> + dco_freq, >> + p); >> } >> } >> - >> -found: >> - if (found) { >> - dco_central_freq_deviation[dco_count] = >> - div64_u64(10000 * >> - abs_diff(candidate_p * afe_clock, >> - dco_central_freq[dco_count]), >> - dco_central_freq[dco_count]); >> - >> - if (dco_central_freq_deviation[dco_count] < >> - min_dco_deviation) { >> - min_dco_deviation = >> - dco_central_freq_deviation[dco_count]; >> - min_dco_index = dco_count; >> - } >> - } >> - >> - if (min_dco_index > 2 && dco_count == 2) { >> - /* oh well, we tried... */ >> - if (retry_with_odd) >> - break; >> - >> - retry_with_odd = true; >> - dco_count = 0; >> - } >> } >> >> - if (WARN(min_dco_index > 2, >> - "No valid parameters found for pixel clock: %dHz\n", clock)) >> + if (!ctx.p) { >> + DRM_DEBUG_DRIVER("No valid divider found for %dHz\n", clock); >> return false; >> + } >> >> - skl_wrpll_params_populate(wrpll_params, >> - afe_clock, >> - dco_central_freq[min_dco_index], >> - candidate_p0[min_dco_index], >> - candidate_p1[min_dco_index], >> - candidate_p2[min_dco_index]); >> + /* >> + * gcc incorrectly analyses that these can be used without being >> + * initialized. To be fair, it's hard to guess. > > Why didn't you just write the mathematical proof as a comment? I hope > it's not too big to fit on the 80 column margin limit. > > Jokes aside, the only "blocker" would be the DCO first vs odd/even > first. I'd like to hear your interpretation on this. > >> + */ >> + p0 = p1 = p2 = 0; >> + skl_wrpll_get_multipliers(ctx.p, &p0, &p1, &p2); >> + skl_wrpll_params_populate(wrpll_params, afe_clock, ctx.central_freq, >> + p0, p1, p2); >> >> return true; >> } >> >> - >> static bool >> skl_ddi_pll_select(struct intel_crtc *intel_crtc, >> struct intel_crtc_state *crtc_state, >> -- >> 2.1.0 >> >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx > > > > -- > Paulo Zanoni
On Wed, May 27, 2015 at 06:28:06PM -0300, Paulo Zanoni wrote: > > @@ -1185,90 +1278,56 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */, > > uint64_t dco_central_freq[3] = {8400000000ULL, > > 9000000000ULL, > > 9600000000ULL}; > > + static const int even_dividers[] = { 4, 6, 8, 10, 12, 14, 16, 18, 20, > > + 24, 28, 30, 32, 36, 40, 42, 44, > > + 48, 52, 54, 56, 60, 64, 66, 68, > > + 70, 72, 76, 78, 80, 84, 88, 90, > > + 92, 96, 98 }; > > + static const int odd_dividers[] = { 3, 5, 7, 9, 15, 21, 35 }; > > + static const struct { > > + const int *list; > > + int n_dividers; > > + } dividers[] = { > > + { even_dividers, ARRAY_SIZE(even_dividers) }, > > + { odd_dividers, ARRAY_SIZE(odd_dividers) }, > > + }; > > + struct skl_wrpll_context ctx; > > + unsigned int dco, d, i; > > + unsigned int p0, p1, p2; > > + > > + skl_wrpll_context_init(&ctx); > > + > > + for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) { > > + for (d = 0; d < ARRAY_SIZE(dividers); d++) { > > From what I can tell by reading the documentation, it really seems > that the two lines above should be inverted. First we do everything > for the even (including all DCOs), and only then for the odd. It's > easier to realize this when you look at the part of the doc that says, > in order: "next candidate divider \n next DCO central frequency \n > next divider parity". Anyway, this is not exactly a problem right now > since we never break the loop, but you change this on the next patch, > so it's probably best to discuss this here. You're absolutely right, those 2 lines need to be inverted. > > + /* > > + * gcc incorrectly analyses that these can be used without being > > + * initialized. To be fair, it's hard to guess. > > Why didn't you just write the mathematical proof as a comment? I hope > it's not too big to fit on the 80 column margin limit. > > Jokes aside, the only "blocker" would be the DCO first vs odd/even > first. I'd like to hear your interpretation on this. > > > + */ > > + p0 = p1 = p2 = 0; > > + skl_wrpll_get_multipliers(ctx.p, &p0, &p1, &p2); > > + skl_wrpll_params_populate(wrpll_params, afe_clock, ctx.central_freq, > > + p0, p1, p2); The "proof" is in tools/skl_compute_wrpll.c, in the test_multipliers() function that unit-tests skl_wrpll_get_multipliers().
On Wed, May 27, 2015 at 06:51:13PM -0300, Paulo Zanoni wrote: > >> +static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx, > >> + uint64_t central_freq, > >> + uint64_t dco_freq, > >> + unsigned int divider) > >> +{ > >> + uint64_t deviation; > >> + > >> + deviation = div64_u64(10000 * abs_diff(dco_freq, central_freq), > >> + central_freq); > >> + > >> + /* positive deviation */ > >> + if (dco_freq >= central_freq) { > >> + if (deviation < ctx->min_pdeviation) { > >> + ctx->min_pdeviation = deviation; > >> + ctx->central_freq = central_freq; > >> + ctx->dco_freq = dco_freq; > >> + ctx->p = divider; > >> + } > >> + /* negative deviation */ > >> + } else if (deviation < ctx->min_ndeviation) { > >> + ctx->min_ndeviation = deviation; > >> + ctx->central_freq = central_freq; > >> + ctx->dco_freq = dco_freq; > >> + ctx->p = divider; > >> + } > > Some additional comments: > > Don't we want to break the loop in case we reach a point where any of > the deviations is zero? We could, haven't done it in the v2 of the patch, could be done as a follow up. > Also notice that, due to the way we loop over the variables at the > main func, we will always pick the last deviation that was "improved" > (positive or negative), as opposed to the very minimal deviation. In > other words, if the last thing our algorithm did was move the > ndeviation from 600 to 599, we will pick this, even if, in previous > iterations, we moved the pdeviation from 100 to 1. Is this really what > we want? Maybe we want to compare min_pdeviation against > min_ndeviation before picking central_freq, dco_freq and p? That seems to be a (pretty big!) deficiency of the documented algorithm, that's definitely something improving the average deviation in the i-g-t test (average deviation 206.52 Vs 194.47) > Also, if we loop over the *odd* dividers before looping over the > *even* divers, and change the comparisons to "<=" instead of just "<", > and don't "break the loop in case we reach 0 variation" for the odd > dividers, then our algorithm will give preference to even dividers in > case we find the same deviation on both odd and even dividers (in > other words, this will give you the implementation of the alternative > interpretation of the spec, which we're discussing on patch 13). But then it would not use an even divider that has a slightly worse deviation. My reading of the specs is: - If there's an even divider with the deviation fitting in the +1%/-6% constraint, choose it. (if there are several such even dividers, choose the one minimizing the deviation) - If we can't find an even divider within +1%/-6%, settle for an odd divider that satisfies those constraints. > I know you probably don't have any of these answers, so I won't block > the patch review on the problems on this email. But I'd at least like > to know your opinions here.Maybe we could send some additional > questions to the spec writers. I do have some answers :) at least assuming the interpreation above is the correct one. The two big suggestions you have make quite a difference in the 2 metrics I gather in the i-g-t test: v1 of this series: even/odd dividers: 338/35 average deviation: 206.52 v2 of this series: even/odd dividers: 363/10 average deviation: 194.47 It's also easier to follow the changes as diffs in the corresponding i-g-t series than the v2 of the kernel patches. testdisplay still cycles through the modes on the HDMI displays I have.
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index a99fee1..381a8c9 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1100,6 +1100,99 @@ hsw_ddi_pll_select(struct intel_crtc *intel_crtc, return true; } +struct skl_wrpll_context { + uint32_t min_pdeviation; /* record the minimum deviations to */ + uint32_t min_ndeviation; /* compare candidates */ + uint64_t central_freq; /* chosen central freq */ + uint64_t dco_freq; /* chosen dco freq */ + unsigned int p; /* chosen divider */ +}; + +static void skl_wrpll_context_init(struct skl_wrpll_context *ctx) +{ + memset(ctx, 0, sizeof(*ctx)); + + /* DCO freq must be within +1%/-6% of the DCO central freq */ + ctx->min_pdeviation = 100; + ctx-> min_ndeviation = 600; +} + +static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx, + uint64_t central_freq, + uint64_t dco_freq, + unsigned int divider) +{ + uint64_t deviation; + + deviation = div64_u64(10000 * abs_diff(dco_freq, central_freq), + central_freq); + + /* positive deviation */ + if (dco_freq >= central_freq) { + if (deviation < ctx->min_pdeviation) { + ctx->min_pdeviation = deviation; + ctx->central_freq = central_freq; + ctx->dco_freq = dco_freq; + ctx->p = divider; + } + /* negative deviation */ + } else if (deviation < ctx->min_ndeviation) { + ctx->min_ndeviation = deviation; + ctx->central_freq = central_freq; + ctx->dco_freq = dco_freq; + ctx->p = divider; + } +} + +static void skl_wrpll_get_multipliers(unsigned int p, + unsigned int *p0 /* out */, + unsigned int *p1 /* out */, + unsigned int *p2 /* out */) +{ + /* even dividers */ + if (p % 2 == 0) { + unsigned int half = p / 2; + + if (half == 1 || half == 2 || half == 3 || half == 5) { + *p0 = 2; + *p1 = 1; + *p2 = half; + } else if (half % 2 == 0) { + *p0 = 2; + *p1 = half / 2; + *p2 = 2; + } else if (half % 3 == 0) { + *p0 = 3; + *p1 = half / 3; + *p2 = 2; + } else if (half % 7 == 0) { + *p0 = 7; + *p1 = half / 7; + *p2 = 2; + } + } else if (p == 3 || p == 9) { /* 3, 5, 7, 9, 15, 21, 35 */ + *p0 = 3; + *p1 = 1; + *p2 = p / 3; + } else if (p == 5 || p == 7) { + *p0 = p; + *p1 = 1; + *p2 = 1; + } else if (p == 15) { + *p0 = 3; + *p1 = 1; + *p2 = 5; + } else if (p == 21) { + *p0 = 7; + *p1 = 1; + *p2 = 3; + } else if (p == 35) { + *p0 = 7; + *p1 = 1; + *p2 = 5; + } +} + struct skl_wrpll_params { uint32_t dco_fraction; uint32_t dco_integer; @@ -1185,90 +1278,56 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */, uint64_t dco_central_freq[3] = {8400000000ULL, 9000000000ULL, 9600000000ULL}; - uint32_t min_dco_deviation = 400; - uint32_t min_dco_index = 3; - uint32_t P0[4] = {1, 2, 3, 7}; - uint32_t P2[4] = {1, 2, 3, 5}; - bool found = false; - uint32_t candidate_p = 0; - uint32_t candidate_p0[3] = {0}, candidate_p1[3] = {0}; - uint32_t candidate_p2[3] = {0}; - uint32_t dco_central_freq_deviation[3]; - uint32_t i, P1, k, dco_count; - bool retry_with_odd = false; - - /* Determine P0, P1 or P2 */ - for (dco_count = 0; dco_count < 3; dco_count++) { - found = false; - candidate_p = - div64_u64(dco_central_freq[dco_count], afe_clock); - if (retry_with_odd == false) - candidate_p = (candidate_p % 2 == 0 ? - candidate_p : candidate_p + 1); - - for (P1 = 1; P1 < candidate_p; P1++) { - for (i = 0; i < 4; i++) { - if (!(P0[i] != 1 || P1 == 1)) - continue; - - for (k = 0; k < 4; k++) { - if (P1 != 1 && P2[k] != 2) - continue; - - if (candidate_p == P0[i] * P1 * P2[k]) { - /* Found possible P0, P1, P2 */ - found = true; - candidate_p0[dco_count] = P0[i]; - candidate_p1[dco_count] = P1; - candidate_p2[dco_count] = P2[k]; - goto found; - } - - } + static const int even_dividers[] = { 4, 6, 8, 10, 12, 14, 16, 18, 20, + 24, 28, 30, 32, 36, 40, 42, 44, + 48, 52, 54, 56, 60, 64, 66, 68, + 70, 72, 76, 78, 80, 84, 88, 90, + 92, 96, 98 }; + static const int odd_dividers[] = { 3, 5, 7, 9, 15, 21, 35 }; + static const struct { + const int *list; + int n_dividers; + } dividers[] = { + { even_dividers, ARRAY_SIZE(even_dividers) }, + { odd_dividers, ARRAY_SIZE(odd_dividers) }, + }; + struct skl_wrpll_context ctx; + unsigned int dco, d, i; + unsigned int p0, p1, p2; + + skl_wrpll_context_init(&ctx); + + for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) { + for (d = 0; d < ARRAY_SIZE(dividers); d++) { + for (i = 0; i < dividers[d].n_dividers; i++) { + unsigned int p = dividers[d].list[i]; + uint64_t dco_freq = p * afe_clock; + + skl_wrpll_try_divider(&ctx, + dco_central_freq[dco], + dco_freq, + p); } } - -found: - if (found) { - dco_central_freq_deviation[dco_count] = - div64_u64(10000 * - abs_diff(candidate_p * afe_clock, - dco_central_freq[dco_count]), - dco_central_freq[dco_count]); - - if (dco_central_freq_deviation[dco_count] < - min_dco_deviation) { - min_dco_deviation = - dco_central_freq_deviation[dco_count]; - min_dco_index = dco_count; - } - } - - if (min_dco_index > 2 && dco_count == 2) { - /* oh well, we tried... */ - if (retry_with_odd) - break; - - retry_with_odd = true; - dco_count = 0; - } } - if (WARN(min_dco_index > 2, - "No valid parameters found for pixel clock: %dHz\n", clock)) + if (!ctx.p) { + DRM_DEBUG_DRIVER("No valid divider found for %dHz\n", clock); return false; + } - skl_wrpll_params_populate(wrpll_params, - afe_clock, - dco_central_freq[min_dco_index], - candidate_p0[min_dco_index], - candidate_p1[min_dco_index], - candidate_p2[min_dco_index]); + /* + * gcc incorrectly analyses that these can be used without being + * initialized. To be fair, it's hard to guess. + */ + p0 = p1 = p2 = 0; + skl_wrpll_get_multipliers(ctx.p, &p0, &p1, &p2); + skl_wrpll_params_populate(wrpll_params, afe_clock, ctx.central_freq, + p0, p1, p2); return true; } - static bool skl_ddi_pll_select(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state,
The HW validation team came back from further testing with a slightly changed constraint on the deviation between the DCO frequency and the central frequency. Instead of +-4%, it's now +1%/-6%. Unfortunately, the previous algorithm didn't quite cope with these new constraints, the reason being that it wasn't thorough enough looking at the possible divider candidates. The new algorithm looks at all dividers, which is definitely a hammer approach (we could reduce further the set of dividers to good ones as a follow up, at the cost of a bit more complicated code). But, at least, we can now satisfy the +1%/+6% rule for all the "Well known" HDMI frequencies of my test set (373 entries). On that subject, the new code is quite extensively tested in intel-gpu-tools (tools/skl_compute_wrpll). Signed-off-by: Damien Lespiau <damien.lespiau@intel.com> --- drivers/gpu/drm/i915/intel_ddi.c | 207 +++++++++++++++++++++++++-------------- 1 file changed, 133 insertions(+), 74 deletions(-)