Message ID | 20190430152108.31814-5-shashank.sharma@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Enable Multi-segmented-gamma for ICL | expand |
On Tue, Apr 30, 2019 at 08:51:08PM +0530, Shashank Sharma wrote: > ICL introduces a new gamma correction mode in display engine, called > multi-segmented-gamma mode. This mode allows users to program the > darker region of the gamma curve with sueprfine precision. An > example use case for this is HDR curves (like PQ ST-2084). > > If we plot a gamma correction curve from value range between 0.0 to 1.0, > ICL's multi-segment has 3 different sections: > - superfine segment: 9 values, ranges between 0 - 1/(128 * 256) > - fine segment: 257 values, ranges between 0 - 1/(128) > - corase segment: 257 values, ranges between 0 - 1 > > This patch: > - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256), > so that userspace can program with highest precision supported. > - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode. > - Adds functions to program/detect multi-segment gamma. > > V2: Addressed review comments from Ville > - separate function for superfine and fine segments. > - remove enum for segments. > - reuse last entry of the LUT as gc_max value. > - replace if() ....cond with switch...case in icl_load_luts. > - add an entry variable, instead of 'word' > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> > Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch> > > Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com> > Signed-off-by: Shashank Sharma <shashank.sharma@intel.com> > Signed-off-by: Uma Shankar <uma.shankar@intel.com> > --- > drivers/gpu/drm/i915/i915_pci.c | 3 +- > drivers/gpu/drm/i915/intel_color.c | 125 ++++++++++++++++++++++++++++- > 2 files changed, 123 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > index ffa2ee70a03d..83698951760b 100644 > --- a/drivers/gpu/drm/i915/i915_pci.c > +++ b/drivers/gpu/drm/i915/i915_pci.c > @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = { > GEN(11), \ > .ddb_size = 2048, \ > .has_logical_ring_elsq = 1, \ > - .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 } > + .color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 } Ugh. Thats one big LUT. But looks correct. > + Bogus newline. > > static const struct intel_device_info intel_icelake_11_info = { > GEN11_FEATURES, > diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c > index 6c341bea514c..49831e8d02fb 100644 > --- a/drivers/gpu/drm/i915/intel_color.c > +++ b/drivers/gpu/drm/i915/intel_color.c > @@ -41,6 +41,9 @@ > #define CTM_COEFF_ABS(coeff) ((coeff) & (CTM_COEFF_SIGN - 1)) > > #define LEGACY_LUT_LENGTH 256 > +#define ICL_GAMMA_MULTISEG_LUT_LENGTH (256 * 128 * 8) > +#define ICL_GAMMA_SUPERFINE_SEG_LENGTH 9 > + > /* > * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point > * format). This macro takes the coefficient we want transformed and the > @@ -767,6 +770,113 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) > } > } > > +/* ilk+ "12.4" interpolated format (high 10 bits) */ > +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color) > +{ > + return (color->red >> 6) << 20 | (color->green >> 6) << 10 | > + (color->blue >> 6); > +} > + > +/* ilk+ "12.4" interpolated format (low 6 bits) */ > +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color) > +{ > + return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 | > + (color->blue & 0x3f); > +} > + > +static void > +icl_load_gcmax(const struct intel_crtc_state *crtc_state, > + const struct drm_color_lut *entry) Indentation looks off. Also s/entry/color/ to match the other similarish funcs maybe? > +{ > + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > + enum pipe pipe = crtc->pipe; > + > + /* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */ > + I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), entry->red); > + I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), entry->green); > + I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), entry->blue); > +} > + > +static void > +icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) > +{ > + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; > + const struct drm_color_lut *lut = blob->data; > + enum pipe pipe = crtc->pipe; > + u32 i; > + > + if (!lut || drm_color_lut_size(blob) < ICL_GAMMA_SUPERFINE_SEG_LENGTH) > + return; These checks aren't needed. Just dead code. > + > + /* > + * Every entry in the multi-segment LUT is corresponding to a superfine > + * segment step which is 1/(8 * 128 * 256). > + * > + * Superfine segment has 9 entries, corresponding to values > + * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256). > + */ > + I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); > + > + for (i = 0; i < 9; i++) { > + const struct drm_color_lut *entry = &lut[i]; > + > + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), > + ilk_lut_12p4_udw(entry)); ldw should come before udw. > + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), > + ilk_lut_12p4_ldw(entry)); > + } > +} > + > +static void > +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) > +{ > + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; > + const struct drm_color_lut *lut = blob->data; > + const struct drm_color_lut *entry; 'entry' declaration can be moved into the loops. > + enum pipe pipe = crtc->pipe; > + u32 i; > + > + if (!lut || (drm_color_lut_size(blob) < ICL_GAMMA_MULTISEG_LUT_LENGTH)) > + return; More checks that aren't needed. > + > + /* > + * Every entry in the multi-segment LUT is corresponding to a superfine > + * segment step which is 1/(8*128*256). > + * > + * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256), 2/(128*256) > + * ... 256/(128*256). So in order to program fine segment of LUT we > + * need to pick every 8'th entry in LUT, and program 256 indexes. > + * Fine segment's index 0 is programmed in HW, and it starts from > + * index 1. The wording here is a bit confusing. I guess the problem is what to call things. PAL_PREC_INDEX[0/1] is what we program, but that maps to the point seg2[1] with seg2[0] being unused by the hw. Well, the spec says it's implicit but IIRC I was told long ago that it's not actually used. Not sure how to word that in the best way. Maybe something like? /* * Fine segment (seg2) ... * * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1], * with seg2[0] being unused by the hardware. */ Not sure that's any clearer. > + */ > + I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); > + for (i = 1; i < 257; i++) { > + entry = &lut[i * 8]; > + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); > + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); > + } > + > + /* > + * Coarse segment's starts from index 0 and it's step is 1/256 ie 0, > + * 1/256, 2/256 ...256/256. As per the description of each entry in LUT > + * above, we need to pick every 8 * 128 = 1024th entry in LUT, and > + * program 256 of those. > + */ Could make a note here stating that seg3[0] and seg3[1] are also unused by the hardware, even though we have to program them to advance the index. I don't see it mentioned in the spec, but this one I definitely remember confirming from Art way back when. However I never verified that on actual hw. We could also consider just programming those two entries to 0 and start the actual coarse segment programming from index 2. Or we could skip them by reprogramming the index directly. > + for (i = 0; i < 256; i++) { > + entry = &lut[i * 1024]; s/1024/8 * 128/ maybe? > + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); > + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); > + } > + > + icl_load_gcmax(crtc_state, entry); > + ivb_load_lut_ext_max(crtc); > +} > + > static void icl_load_luts(const struct intel_crtc_state *crtc_state) > { > const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; > @@ -775,10 +885,17 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state) > if (crtc_state->base.degamma_lut) > glk_load_degamma_lut(crtc_state); > > - if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) == > - GAMMA_MODE_MODE_8BIT) { > + switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) { > + case GAMMA_MODE_MODE_8BIT: > i9xx_load_luts(crtc_state); > - } else { > + break; > + > + case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: > + icl_program_gamma_superfine_segment(crtc_state); > + icl_program_gamma_multi_segment(crtc_state); > + break; > + > + default: > bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0)); > ivb_load_lut_ext_max(crtc); > } > @@ -1209,7 +1326,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state) > crtc_state_is_legacy_gamma(crtc_state)) > gamma_mode |= GAMMA_MODE_MODE_8BIT; > else > - gamma_mode |= GAMMA_MODE_MODE_10BIT; > + gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED; > > return gamma_mode; > } > -- > 2.17.1
Regards Shashank On 5/3/2019 9:20 PM, Ville Syrjälä wrote: > On Tue, Apr 30, 2019 at 08:51:08PM +0530, Shashank Sharma wrote: >> ICL introduces a new gamma correction mode in display engine, called >> multi-segmented-gamma mode. This mode allows users to program the >> darker region of the gamma curve with sueprfine precision. An >> example use case for this is HDR curves (like PQ ST-2084). >> >> If we plot a gamma correction curve from value range between 0.0 to 1.0, >> ICL's multi-segment has 3 different sections: >> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256) >> - fine segment: 257 values, ranges between 0 - 1/(128) >> - corase segment: 257 values, ranges between 0 - 1 >> >> This patch: >> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256), >> so that userspace can program with highest precision supported. >> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode. >> - Adds functions to program/detect multi-segment gamma. >> >> V2: Addressed review comments from Ville >> - separate function for superfine and fine segments. >> - remove enum for segments. >> - reuse last entry of the LUT as gc_max value. >> - replace if() ....cond with switch...case in icl_load_luts. >> - add an entry variable, instead of 'word' >> >> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> >> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> >> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> >> >> Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com> >> Signed-off-by: Shashank Sharma <shashank.sharma@intel.com> >> Signed-off-by: Uma Shankar <uma.shankar@intel.com> >> --- >> drivers/gpu/drm/i915/i915_pci.c | 3 +- >> drivers/gpu/drm/i915/intel_color.c | 125 ++++++++++++++++++++++++++++- >> 2 files changed, 123 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c >> index ffa2ee70a03d..83698951760b 100644 >> --- a/drivers/gpu/drm/i915/i915_pci.c >> +++ b/drivers/gpu/drm/i915/i915_pci.c >> @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = { >> GEN(11), \ >> .ddb_size = 2048, \ >> .has_logical_ring_elsq = 1, \ >> - .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 } >> + .color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 } > Ugh. Thats one big LUT. But looks correct. > >> + > Bogus newline. Got it. >> >> static const struct intel_device_info intel_icelake_11_info = { >> GEN11_FEATURES, >> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c >> index 6c341bea514c..49831e8d02fb 100644 >> --- a/drivers/gpu/drm/i915/intel_color.c >> +++ b/drivers/gpu/drm/i915/intel_color.c >> @@ -41,6 +41,9 @@ >> #define CTM_COEFF_ABS(coeff) ((coeff) & (CTM_COEFF_SIGN - 1)) >> >> #define LEGACY_LUT_LENGTH 256 >> +#define ICL_GAMMA_MULTISEG_LUT_LENGTH (256 * 128 * 8) >> +#define ICL_GAMMA_SUPERFINE_SEG_LENGTH 9 >> + >> /* >> * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point >> * format). This macro takes the coefficient we want transformed and the >> @@ -767,6 +770,113 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) >> } >> } >> >> +/* ilk+ "12.4" interpolated format (high 10 bits) */ >> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color) >> +{ >> + return (color->red >> 6) << 20 | (color->green >> 6) << 10 | >> + (color->blue >> 6); >> +} >> + >> +/* ilk+ "12.4" interpolated format (low 6 bits) */ >> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color) >> +{ >> + return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 | >> + (color->blue & 0x3f); >> +} >> + >> +static void >> +icl_load_gcmax(const struct intel_crtc_state *crtc_state, >> + const struct drm_color_lut *entry) > Indentation looks off. Also s/entry/color/ to match the other similarish > funcs maybe? Sure. >> +{ >> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); >> + enum pipe pipe = crtc->pipe; >> + >> + /* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */ >> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), entry->red); >> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), entry->green); >> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), entry->blue); >> +} >> + >> +static void >> +icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) >> +{ >> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); >> + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; >> + const struct drm_color_lut *lut = blob->data; >> + enum pipe pipe = crtc->pipe; >> + u32 i; >> + >> + if (!lut || drm_color_lut_size(blob) < ICL_GAMMA_SUPERFINE_SEG_LENGTH) >> + return; > These checks aren't needed. Just dead code. Will remove this and similars. >> + >> + /* >> + * Every entry in the multi-segment LUT is corresponding to a superfine >> + * segment step which is 1/(8 * 128 * 256). >> + * >> + * Superfine segment has 9 entries, corresponding to values >> + * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256). >> + */ >> + I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); >> + >> + for (i = 0; i < 9; i++) { >> + const struct drm_color_lut *entry = &lut[i]; >> + >> + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), >> + ilk_lut_12p4_udw(entry)); > ldw should come before udw. Got it. >> + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), >> + ilk_lut_12p4_ldw(entry)); >> + } >> +} >> + >> +static void >> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) >> +{ >> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); >> + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; >> + const struct drm_color_lut *lut = blob->data; >> + const struct drm_color_lut *entry; > 'entry' declaration can be moved into the loops. Its being used in multiple loops, also being used for GCMax outside the loop. >> + enum pipe pipe = crtc->pipe; >> + u32 i; >> + >> + if (!lut || (drm_color_lut_size(blob) < ICL_GAMMA_MULTISEG_LUT_LENGTH)) >> + return; > More checks that aren't needed. Got it. >> + >> + /* >> + * Every entry in the multi-segment LUT is corresponding to a superfine >> + * segment step which is 1/(8*128*256). >> + * >> + * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256), 2/(128*256) >> + * ... 256/(128*256). So in order to program fine segment of LUT we >> + * need to pick every 8'th entry in LUT, and program 256 indexes. >> + * Fine segment's index 0 is programmed in HW, and it starts from >> + * index 1. > The wording here is a bit confusing. I guess the problem is what to call > things. PAL_PREC_INDEX[0/1] is what we program, but that maps to the point > seg2[1] with seg2[0] being unused by the hw. Well, the spec says it's > implicit but IIRC I was told long ago that it's not actually used. > > Not sure how to word that in the best way. Maybe something like? > > /* > * Fine segment (seg2) ... > * > * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1], > * with seg2[0] being unused by the hardware. > */ > > Not sure that's any clearer. Ok, will try to come up with something in similar lines. >> + */ >> + I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); >> + for (i = 1; i < 257; i++) { >> + entry = &lut[i * 8]; >> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); >> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); >> + } >> + >> + /* >> + * Coarse segment's starts from index 0 and it's step is 1/256 ie 0, >> + * 1/256, 2/256 ...256/256. As per the description of each entry in LUT >> + * above, we need to pick every 8 * 128 = 1024th entry in LUT, and >> + * program 256 of those. >> + */ > Could make a note here stating that seg3[0] and seg3[1] are also unused > by the hardware, even though we have to program them to advance the > index. I don't see it mentioned in the spec, but this one I definitely > remember confirming from Art way back when. However I never verified > that on actual hw. We could also consider just programming those two > entries to 0 and start the actual coarse segment programming from index 2. > Or we could skip them by reprogramming the index directly. If they are not being used, does it matter what and if we program into them ? We can add a note though, mentioning this. >> + for (i = 0; i < 256; i++) { >> + entry = &lut[i * 1024]; > s/1024/8 * 128/ maybe? Sure. Regards Shashank > >> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); >> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); >> + } >> + >> + icl_load_gcmax(crtc_state, entry); >> + ivb_load_lut_ext_max(crtc); >> +} >> + >> static void icl_load_luts(const struct intel_crtc_state *crtc_state) >> { >> const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; >> @@ -775,10 +885,17 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state) >> if (crtc_state->base.degamma_lut) >> glk_load_degamma_lut(crtc_state); >> >> - if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) == >> - GAMMA_MODE_MODE_8BIT) { >> + switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) { >> + case GAMMA_MODE_MODE_8BIT: >> i9xx_load_luts(crtc_state); >> - } else { >> + break; >> + >> + case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: >> + icl_program_gamma_superfine_segment(crtc_state); >> + icl_program_gamma_multi_segment(crtc_state); >> + break; >> + >> + default: >> bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0)); >> ivb_load_lut_ext_max(crtc); >> } >> @@ -1209,7 +1326,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state) >> crtc_state_is_legacy_gamma(crtc_state)) >> gamma_mode |= GAMMA_MODE_MODE_8BIT; >> else >> - gamma_mode |= GAMMA_MODE_MODE_10BIT; >> + gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED; >> >> return gamma_mode; >> } >> -- >> 2.17.1
On Mon, May 06, 2019 at 04:09:33PM +0530, Sharma, Shashank wrote: > Regards > > Shashank > > On 5/3/2019 9:20 PM, Ville Syrjälä wrote: > > On Tue, Apr 30, 2019 at 08:51:08PM +0530, Shashank Sharma wrote: > >> ICL introduces a new gamma correction mode in display engine, called > >> multi-segmented-gamma mode. This mode allows users to program the > >> darker region of the gamma curve with sueprfine precision. An > >> example use case for this is HDR curves (like PQ ST-2084). > >> > >> If we plot a gamma correction curve from value range between 0.0 to 1.0, > >> ICL's multi-segment has 3 different sections: > >> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256) > >> - fine segment: 257 values, ranges between 0 - 1/(128) > >> - corase segment: 257 values, ranges between 0 - 1 > >> > >> This patch: > >> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256), > >> so that userspace can program with highest precision supported. > >> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode. > >> - Adds functions to program/detect multi-segment gamma. > >> > >> V2: Addressed review comments from Ville > >> - separate function for superfine and fine segments. > >> - remove enum for segments. > >> - reuse last entry of the LUT as gc_max value. > >> - replace if() ....cond with switch...case in icl_load_luts. > >> - add an entry variable, instead of 'word' > >> > >> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> > >> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> > >> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> > >> > >> Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com> > >> Signed-off-by: Shashank Sharma <shashank.sharma@intel.com> > >> Signed-off-by: Uma Shankar <uma.shankar@intel.com> > >> --- > >> drivers/gpu/drm/i915/i915_pci.c | 3 +- > >> drivers/gpu/drm/i915/intel_color.c | 125 ++++++++++++++++++++++++++++- > >> 2 files changed, 123 insertions(+), 5 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > >> index ffa2ee70a03d..83698951760b 100644 > >> --- a/drivers/gpu/drm/i915/i915_pci.c > >> +++ b/drivers/gpu/drm/i915/i915_pci.c > >> @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = { > >> GEN(11), \ > >> .ddb_size = 2048, \ > >> .has_logical_ring_elsq = 1, \ > >> - .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 } > >> + .color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 } > > Ugh. Thats one big LUT. But looks correct. > > > >> + > > Bogus newline. > Got it. > >> > >> static const struct intel_device_info intel_icelake_11_info = { > >> GEN11_FEATURES, > >> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c > >> index 6c341bea514c..49831e8d02fb 100644 > >> --- a/drivers/gpu/drm/i915/intel_color.c > >> +++ b/drivers/gpu/drm/i915/intel_color.c > >> @@ -41,6 +41,9 @@ > >> #define CTM_COEFF_ABS(coeff) ((coeff) & (CTM_COEFF_SIGN - 1)) > >> > >> #define LEGACY_LUT_LENGTH 256 > >> +#define ICL_GAMMA_MULTISEG_LUT_LENGTH (256 * 128 * 8) > >> +#define ICL_GAMMA_SUPERFINE_SEG_LENGTH 9 > >> + > >> /* > >> * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point > >> * format). This macro takes the coefficient we want transformed and the > >> @@ -767,6 +770,113 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) > >> } > >> } > >> > >> +/* ilk+ "12.4" interpolated format (high 10 bits) */ > >> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color) > >> +{ > >> + return (color->red >> 6) << 20 | (color->green >> 6) << 10 | > >> + (color->blue >> 6); > >> +} > >> + > >> +/* ilk+ "12.4" interpolated format (low 6 bits) */ > >> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color) > >> +{ > >> + return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 | > >> + (color->blue & 0x3f); > >> +} > >> + > >> +static void > >> +icl_load_gcmax(const struct intel_crtc_state *crtc_state, > >> + const struct drm_color_lut *entry) > > Indentation looks off. Also s/entry/color/ to match the other similarish > > funcs maybe? > Sure. > >> +{ > >> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > >> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > >> + enum pipe pipe = crtc->pipe; > >> + > >> + /* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */ > >> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), entry->red); > >> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), entry->green); > >> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), entry->blue); > >> +} > >> + > >> +static void > >> +icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) > >> +{ > >> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > >> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > >> + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; > >> + const struct drm_color_lut *lut = blob->data; > >> + enum pipe pipe = crtc->pipe; > >> + u32 i; > >> + > >> + if (!lut || drm_color_lut_size(blob) < ICL_GAMMA_SUPERFINE_SEG_LENGTH) > >> + return; > > These checks aren't needed. Just dead code. > Will remove this and similars. > >> + > >> + /* > >> + * Every entry in the multi-segment LUT is corresponding to a superfine > >> + * segment step which is 1/(8 * 128 * 256). > >> + * > >> + * Superfine segment has 9 entries, corresponding to values > >> + * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256). > >> + */ > >> + I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); > >> + > >> + for (i = 0; i < 9; i++) { > >> + const struct drm_color_lut *entry = &lut[i]; > >> + > >> + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), > >> + ilk_lut_12p4_udw(entry)); > > ldw should come before udw. > Got it. > >> + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), > >> + ilk_lut_12p4_ldw(entry)); > >> + } > >> +} > >> + > >> +static void > >> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) > >> +{ > >> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > >> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > >> + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; > >> + const struct drm_color_lut *lut = blob->data; > >> + const struct drm_color_lut *entry; > > 'entry' declaration can be moved into the loops. > Its being used in multiple loops, IMO still better to move into the loops. We don't want to pass any information between the loops. > also being used for GCMax outside the > loop. Hmm. That might be an arguemnt for keeping it out. But the current gcmax usage looks broken. You're programming the same value into the last PAL_PREC index and GCMAX. > >> + enum pipe pipe = crtc->pipe; > >> + u32 i; > >> + > >> + if (!lut || (drm_color_lut_size(blob) < ICL_GAMMA_MULTISEG_LUT_LENGTH)) > >> + return; > > More checks that aren't needed. > Got it. > >> + > >> + /* > >> + * Every entry in the multi-segment LUT is corresponding to a superfine > >> + * segment step which is 1/(8*128*256). > >> + * > >> + * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256), 2/(128*256) > >> + * ... 256/(128*256). So in order to program fine segment of LUT we > >> + * need to pick every 8'th entry in LUT, and program 256 indexes. > >> + * Fine segment's index 0 is programmed in HW, and it starts from > >> + * index 1. > > The wording here is a bit confusing. I guess the problem is what to call > > things. PAL_PREC_INDEX[0/1] is what we program, but that maps to the point > > seg2[1] with seg2[0] being unused by the hw. Well, the spec says it's > > implicit but IIRC I was told long ago that it's not actually used. > > > > Not sure how to word that in the best way. Maybe something like? > > > > /* > > * Fine segment (seg2) ... > > * > > * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1], > > * with seg2[0] being unused by the hardware. > > */ > > > > Not sure that's any clearer. > Ok, will try to come up with something in similar lines. > >> + */ > >> + I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); > >> + for (i = 1; i < 257; i++) { > >> + entry = &lut[i * 8]; > >> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); > >> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); > >> + } > >> + > >> + /* > >> + * Coarse segment's starts from index 0 and it's step is 1/256 ie 0, > >> + * 1/256, 2/256 ...256/256. As per the description of each entry in LUT > >> + * above, we need to pick every 8 * 128 = 1024th entry in LUT, and > >> + * program 256 of those. > >> + */ > > Could make a note here stating that seg3[0] and seg3[1] are also unused > > by the hardware, even though we have to program them to advance the > > index. I don't see it mentioned in the spec, but this one I definitely > > remember confirming from Art way back when. However I never verified > > that on actual hw. We could also consider just programming those two > > entries to 0 and start the actual coarse segment programming from index 2. > > Or we could skip them by reprogramming the index directly. > If they are not being used, does it matter what and if we program into > them ? We can add a note though, mentioning this. It shouldn't matter what we programing into them. But as mentioned I never actually confirmed this on actual hardware. Would be nice to double check that so we don't end up with incorrect comment. > >> + for (i = 0; i < 256; i++) { > >> + entry = &lut[i * 1024]; > > s/1024/8 * 128/ maybe? > > Sure. > > Regards > > Shashank > > > > >> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); > >> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); > >> + } > >> + > >> + icl_load_gcmax(crtc_state, entry); > >> + ivb_load_lut_ext_max(crtc); > >> +} > >> + > >> static void icl_load_luts(const struct intel_crtc_state *crtc_state) > >> { > >> const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; > >> @@ -775,10 +885,17 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state) > >> if (crtc_state->base.degamma_lut) > >> glk_load_degamma_lut(crtc_state); > >> > >> - if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) == > >> - GAMMA_MODE_MODE_8BIT) { > >> + switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) { > >> + case GAMMA_MODE_MODE_8BIT: > >> i9xx_load_luts(crtc_state); > >> - } else { > >> + break; > >> + > >> + case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: > >> + icl_program_gamma_superfine_segment(crtc_state); > >> + icl_program_gamma_multi_segment(crtc_state); > >> + break; > >> + > >> + default: > >> bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0)); > >> ivb_load_lut_ext_max(crtc); > >> } > >> @@ -1209,7 +1326,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state) > >> crtc_state_is_legacy_gamma(crtc_state)) > >> gamma_mode |= GAMMA_MODE_MODE_8BIT; > >> else > >> - gamma_mode |= GAMMA_MODE_MODE_10BIT; > >> + gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED; > >> > >> return gamma_mode; > >> } > >> -- > >> 2.17.1
On 5/6/2019 5:55 PM, Ville Syrjälä wrote: > On Mon, May 06, 2019 at 04:09:33PM +0530, Sharma, Shashank wrote: >> Regards >> >> Shashank >> >> On 5/3/2019 9:20 PM, Ville Syrjälä wrote: >>> On Tue, Apr 30, 2019 at 08:51:08PM +0530, Shashank Sharma wrote: >>>> ICL introduces a new gamma correction mode in display engine, called >>>> multi-segmented-gamma mode. This mode allows users to program the >>>> darker region of the gamma curve with sueprfine precision. An >>>> example use case for this is HDR curves (like PQ ST-2084). >>>> >>>> If we plot a gamma correction curve from value range between 0.0 to 1.0, >>>> ICL's multi-segment has 3 different sections: >>>> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256) >>>> - fine segment: 257 values, ranges between 0 - 1/(128) >>>> - corase segment: 257 values, ranges between 0 - 1 >>>> >>>> This patch: >>>> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256), >>>> so that userspace can program with highest precision supported. >>>> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode. >>>> - Adds functions to program/detect multi-segment gamma. >>>> >>>> V2: Addressed review comments from Ville >>>> - separate function for superfine and fine segments. >>>> - remove enum for segments. >>>> - reuse last entry of the LUT as gc_max value. >>>> - replace if() ....cond with switch...case in icl_load_luts. >>>> - add an entry variable, instead of 'word' >>>> >>>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> >>>> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> >>>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> >>>> >>>> Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com> >>>> Signed-off-by: Shashank Sharma <shashank.sharma@intel.com> >>>> Signed-off-by: Uma Shankar <uma.shankar@intel.com> >>>> --- >>>> drivers/gpu/drm/i915/i915_pci.c | 3 +- >>>> drivers/gpu/drm/i915/intel_color.c | 125 ++++++++++++++++++++++++++++- >>>> 2 files changed, 123 insertions(+), 5 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c >>>> index ffa2ee70a03d..83698951760b 100644 >>>> --- a/drivers/gpu/drm/i915/i915_pci.c >>>> +++ b/drivers/gpu/drm/i915/i915_pci.c >>>> @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = { >>>> GEN(11), \ >>>> .ddb_size = 2048, \ >>>> .has_logical_ring_elsq = 1, \ >>>> - .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 } >>>> + .color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 } >>> Ugh. Thats one big LUT. But looks correct. >>> >>>> + >>> Bogus newline. >> Got it. >>>> >>>> static const struct intel_device_info intel_icelake_11_info = { >>>> GEN11_FEATURES, >>>> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c >>>> index 6c341bea514c..49831e8d02fb 100644 >>>> --- a/drivers/gpu/drm/i915/intel_color.c >>>> +++ b/drivers/gpu/drm/i915/intel_color.c >>>> @@ -41,6 +41,9 @@ >>>> #define CTM_COEFF_ABS(coeff) ((coeff) & (CTM_COEFF_SIGN - 1)) >>>> >>>> #define LEGACY_LUT_LENGTH 256 >>>> +#define ICL_GAMMA_MULTISEG_LUT_LENGTH (256 * 128 * 8) >>>> +#define ICL_GAMMA_SUPERFINE_SEG_LENGTH 9 >>>> + >>>> /* >>>> * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point >>>> * format). This macro takes the coefficient we want transformed and the >>>> @@ -767,6 +770,113 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) >>>> } >>>> } >>>> >>>> +/* ilk+ "12.4" interpolated format (high 10 bits) */ >>>> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color) >>>> +{ >>>> + return (color->red >> 6) << 20 | (color->green >> 6) << 10 | >>>> + (color->blue >> 6); >>>> +} >>>> + >>>> +/* ilk+ "12.4" interpolated format (low 6 bits) */ >>>> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color) >>>> +{ >>>> + return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 | >>>> + (color->blue & 0x3f); >>>> +} >>>> + >>>> +static void >>>> +icl_load_gcmax(const struct intel_crtc_state *crtc_state, >>>> + const struct drm_color_lut *entry) >>> Indentation looks off. Also s/entry/color/ to match the other similarish >>> funcs maybe? >> Sure. >>>> +{ >>>> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >>>> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); >>>> + enum pipe pipe = crtc->pipe; >>>> + >>>> + /* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */ >>>> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), entry->red); >>>> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), entry->green); >>>> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), entry->blue); >>>> +} >>>> + >>>> +static void >>>> +icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) >>>> +{ >>>> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >>>> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); >>>> + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; >>>> + const struct drm_color_lut *lut = blob->data; >>>> + enum pipe pipe = crtc->pipe; >>>> + u32 i; >>>> + >>>> + if (!lut || drm_color_lut_size(blob) < ICL_GAMMA_SUPERFINE_SEG_LENGTH) >>>> + return; >>> These checks aren't needed. Just dead code. >> Will remove this and similars. >>>> + >>>> + /* >>>> + * Every entry in the multi-segment LUT is corresponding to a superfine >>>> + * segment step which is 1/(8 * 128 * 256). >>>> + * >>>> + * Superfine segment has 9 entries, corresponding to values >>>> + * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256). >>>> + */ >>>> + I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); >>>> + >>>> + for (i = 0; i < 9; i++) { >>>> + const struct drm_color_lut *entry = &lut[i]; >>>> + >>>> + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), >>>> + ilk_lut_12p4_udw(entry)); >>> ldw should come before udw. >> Got it. >>>> + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), >>>> + ilk_lut_12p4_ldw(entry)); >>>> + } >>>> +} >>>> + >>>> +static void >>>> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) >>>> +{ >>>> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >>>> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); >>>> + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; >>>> + const struct drm_color_lut *lut = blob->data; >>>> + const struct drm_color_lut *entry; >>> 'entry' declaration can be moved into the loops. >> Its being used in multiple loops, > IMO still better to move into the loops. We don't want to pass any > information between the loops. > >> also being used for GCMax outside the >> loop. > Hmm. That might be an arguemnt for keeping it out. But the current > gcmax usage looks broken. You're programming the same value into > the last PAL_PREC index and GCMAX. Isn't this what you wanted ? IIRC, your recommendation was to program the highest values user wants to program in GCMAX reg. We also had a discussion on how user cant program 1.0, as we have LUT depth on 16 bit only, and we decided to use the last value of the LUT as GCMAX. Did I misunderstand anything there ? - Shashank >>>> + enum pipe pipe = crtc->pipe; >>>> + u32 i; >>>> + >>>> + if (!lut || (drm_color_lut_size(blob) < ICL_GAMMA_MULTISEG_LUT_LENGTH)) >>>> + return; >>> More checks that aren't needed. >> Got it. >>>> + >>>> + /* >>>> + * Every entry in the multi-segment LUT is corresponding to a superfine >>>> + * segment step which is 1/(8*128*256). >>>> + * >>>> + * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256), 2/(128*256) >>>> + * ... 256/(128*256). So in order to program fine segment of LUT we >>>> + * need to pick every 8'th entry in LUT, and program 256 indexes. >>>> + * Fine segment's index 0 is programmed in HW, and it starts from >>>> + * index 1. >>> The wording here is a bit confusing. I guess the problem is what to call >>> things. PAL_PREC_INDEX[0/1] is what we program, but that maps to the point >>> seg2[1] with seg2[0] being unused by the hw. Well, the spec says it's >>> implicit but IIRC I was told long ago that it's not actually used. >>> >>> Not sure how to word that in the best way. Maybe something like? >>> >>> /* >>> * Fine segment (seg2) ... >>> * >>> * PAL_PREC_INDEX[0] and PAL_PREC_INDEX[1] map to seg2[1], >>> * with seg2[0] being unused by the hardware. >>> */ >>> >>> Not sure that's any clearer. >> Ok, will try to come up with something in similar lines. >>>> + */ >>>> + I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); >>>> + for (i = 1; i < 257; i++) { >>>> + entry = &lut[i * 8]; >>>> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); >>>> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); >>>> + } >>>> + >>>> + /* >>>> + * Coarse segment's starts from index 0 and it's step is 1/256 ie 0, >>>> + * 1/256, 2/256 ...256/256. As per the description of each entry in LUT >>>> + * above, we need to pick every 8 * 128 = 1024th entry in LUT, and >>>> + * program 256 of those. >>>> + */ >>> Could make a note here stating that seg3[0] and seg3[1] are also unused >>> by the hardware, even though we have to program them to advance the >>> index. I don't see it mentioned in the spec, but this one I definitely >>> remember confirming from Art way back when. However I never verified >>> that on actual hw. We could also consider just programming those two >>> entries to 0 and start the actual coarse segment programming from index 2. >>> Or we could skip them by reprogramming the index directly. >> If they are not being used, does it matter what and if we program into >> them ? We can add a note though, mentioning this. > It shouldn't matter what we programing into them. But as mentioned I > never actually confirmed this on actual hardware. Would be nice to > double check that so we don't end up with incorrect comment. > >>>> + for (i = 0; i < 256; i++) { >>>> + entry = &lut[i * 1024]; >>> s/1024/8 * 128/ maybe? >> Sure. >> >> Regards >> >> Shashank >> >>>> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); >>>> + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); >>>> + } >>>> + >>>> + icl_load_gcmax(crtc_state, entry); >>>> + ivb_load_lut_ext_max(crtc); >>>> +} >>>> + >>>> static void icl_load_luts(const struct intel_crtc_state *crtc_state) >>>> { >>>> const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; >>>> @@ -775,10 +885,17 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state) >>>> if (crtc_state->base.degamma_lut) >>>> glk_load_degamma_lut(crtc_state); >>>> >>>> - if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) == >>>> - GAMMA_MODE_MODE_8BIT) { >>>> + switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) { >>>> + case GAMMA_MODE_MODE_8BIT: >>>> i9xx_load_luts(crtc_state); >>>> - } else { >>>> + break; >>>> + >>>> + case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: >>>> + icl_program_gamma_superfine_segment(crtc_state); >>>> + icl_program_gamma_multi_segment(crtc_state); >>>> + break; >>>> + >>>> + default: >>>> bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0)); >>>> ivb_load_lut_ext_max(crtc); >>>> } >>>> @@ -1209,7 +1326,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state) >>>> crtc_state_is_legacy_gamma(crtc_state)) >>>> gamma_mode |= GAMMA_MODE_MODE_8BIT; >>>> else >>>> - gamma_mode |= GAMMA_MODE_MODE_10BIT; >>>> + gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED; >>>> >>>> return gamma_mode; >>>> } >>>> -- >>>> 2.17.1
On Mon, May 06, 2019 at 06:25:19PM +0530, Sharma, Shashank wrote: > > On 5/6/2019 5:55 PM, Ville Syrjälä wrote: > > On Mon, May 06, 2019 at 04:09:33PM +0530, Sharma, Shashank wrote: > >> Regards > >> > >> Shashank > >> > >> On 5/3/2019 9:20 PM, Ville Syrjälä wrote: > >>> On Tue, Apr 30, 2019 at 08:51:08PM +0530, Shashank Sharma wrote: > >>>> ICL introduces a new gamma correction mode in display engine, called > >>>> multi-segmented-gamma mode. This mode allows users to program the > >>>> darker region of the gamma curve with sueprfine precision. An > >>>> example use case for this is HDR curves (like PQ ST-2084). > >>>> > >>>> If we plot a gamma correction curve from value range between 0.0 to 1.0, > >>>> ICL's multi-segment has 3 different sections: > >>>> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256) > >>>> - fine segment: 257 values, ranges between 0 - 1/(128) > >>>> - corase segment: 257 values, ranges between 0 - 1 > >>>> > >>>> This patch: > >>>> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256), > >>>> so that userspace can program with highest precision supported. > >>>> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode. > >>>> - Adds functions to program/detect multi-segment gamma. > >>>> > >>>> V2: Addressed review comments from Ville > >>>> - separate function for superfine and fine segments. > >>>> - remove enum for segments. > >>>> - reuse last entry of the LUT as gc_max value. > >>>> - replace if() ....cond with switch...case in icl_load_luts. > >>>> - add an entry variable, instead of 'word' > >>>> > >>>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> > >>>> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> > >>>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> > >>>> > >>>> Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com> > >>>> Signed-off-by: Shashank Sharma <shashank.sharma@intel.com> > >>>> Signed-off-by: Uma Shankar <uma.shankar@intel.com> > >>>> --- > >>>> drivers/gpu/drm/i915/i915_pci.c | 3 +- > >>>> drivers/gpu/drm/i915/intel_color.c | 125 ++++++++++++++++++++++++++++- > >>>> 2 files changed, 123 insertions(+), 5 deletions(-) > >>>> > >>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > >>>> index ffa2ee70a03d..83698951760b 100644 > >>>> --- a/drivers/gpu/drm/i915/i915_pci.c > >>>> +++ b/drivers/gpu/drm/i915/i915_pci.c > >>>> @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = { > >>>> GEN(11), \ > >>>> .ddb_size = 2048, \ > >>>> .has_logical_ring_elsq = 1, \ > >>>> - .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 } > >>>> + .color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 } > >>> Ugh. Thats one big LUT. But looks correct. > >>> > >>>> + > >>> Bogus newline. > >> Got it. > >>>> > >>>> static const struct intel_device_info intel_icelake_11_info = { > >>>> GEN11_FEATURES, > >>>> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c > >>>> index 6c341bea514c..49831e8d02fb 100644 > >>>> --- a/drivers/gpu/drm/i915/intel_color.c > >>>> +++ b/drivers/gpu/drm/i915/intel_color.c > >>>> @@ -41,6 +41,9 @@ > >>>> #define CTM_COEFF_ABS(coeff) ((coeff) & (CTM_COEFF_SIGN - 1)) > >>>> > >>>> #define LEGACY_LUT_LENGTH 256 > >>>> +#define ICL_GAMMA_MULTISEG_LUT_LENGTH (256 * 128 * 8) > >>>> +#define ICL_GAMMA_SUPERFINE_SEG_LENGTH 9 > >>>> + > >>>> /* > >>>> * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point > >>>> * format). This macro takes the coefficient we want transformed and the > >>>> @@ -767,6 +770,113 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) > >>>> } > >>>> } > >>>> > >>>> +/* ilk+ "12.4" interpolated format (high 10 bits) */ > >>>> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color) > >>>> +{ > >>>> + return (color->red >> 6) << 20 | (color->green >> 6) << 10 | > >>>> + (color->blue >> 6); > >>>> +} > >>>> + > >>>> +/* ilk+ "12.4" interpolated format (low 6 bits) */ > >>>> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color) > >>>> +{ > >>>> + return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 | > >>>> + (color->blue & 0x3f); > >>>> +} > >>>> + > >>>> +static void > >>>> +icl_load_gcmax(const struct intel_crtc_state *crtc_state, > >>>> + const struct drm_color_lut *entry) > >>> Indentation looks off. Also s/entry/color/ to match the other similarish > >>> funcs maybe? > >> Sure. > >>>> +{ > >>>> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > >>>> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > >>>> + enum pipe pipe = crtc->pipe; > >>>> + > >>>> + /* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */ > >>>> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), entry->red); > >>>> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), entry->green); > >>>> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), entry->blue); > >>>> +} > >>>> + > >>>> +static void > >>>> +icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) > >>>> +{ > >>>> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > >>>> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > >>>> + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; > >>>> + const struct drm_color_lut *lut = blob->data; > >>>> + enum pipe pipe = crtc->pipe; > >>>> + u32 i; > >>>> + > >>>> + if (!lut || drm_color_lut_size(blob) < ICL_GAMMA_SUPERFINE_SEG_LENGTH) > >>>> + return; > >>> These checks aren't needed. Just dead code. > >> Will remove this and similars. > >>>> + > >>>> + /* > >>>> + * Every entry in the multi-segment LUT is corresponding to a superfine > >>>> + * segment step which is 1/(8 * 128 * 256). > >>>> + * > >>>> + * Superfine segment has 9 entries, corresponding to values > >>>> + * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256). > >>>> + */ > >>>> + I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); > >>>> + > >>>> + for (i = 0; i < 9; i++) { > >>>> + const struct drm_color_lut *entry = &lut[i]; > >>>> + > >>>> + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), > >>>> + ilk_lut_12p4_udw(entry)); > >>> ldw should come before udw. > >> Got it. > >>>> + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), > >>>> + ilk_lut_12p4_ldw(entry)); > >>>> + } > >>>> +} > >>>> + > >>>> +static void > >>>> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) > >>>> +{ > >>>> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > >>>> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > >>>> + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; > >>>> + const struct drm_color_lut *lut = blob->data; > >>>> + const struct drm_color_lut *entry; > >>> 'entry' declaration can be moved into the loops. > >> Its being used in multiple loops, > > IMO still better to move into the loops. We don't want to pass any > > information between the loops. > > > >> also being used for GCMax outside the > >> loop. > > Hmm. That might be an arguemnt for keeping it out. But the current > > gcmax usage looks broken. You're programming the same value into > > the last PAL_PREC index and GCMAX. > > Isn't this what you wanted ? IIRC, your recommendation was to program > the highest values user wants to program in GCMAX reg. We also had a > discussion on how user cant program 1.0, as we have LUT depth on 16 bit > only, and we decided to use the last value of the LUT as GCMAX. Did I > misunderstand anything there ? It should be: PAL_PREC[max] = lut[size-2]; GCMAX = lut[size-1]; Oh, we also need to increase the LUT size by one to make that work correctly.
Regards Shashank On 5/6/2019 6:41 PM, Ville Syrjälä wrote: > On Mon, May 06, 2019 at 06:25:19PM +0530, Sharma, Shashank wrote: >> On 5/6/2019 5:55 PM, Ville Syrjälä wrote: >>> On Mon, May 06, 2019 at 04:09:33PM +0530, Sharma, Shashank wrote: >>>> Regards >>>> >>>> Shashank >>>> >>>> On 5/3/2019 9:20 PM, Ville Syrjälä wrote: >>>>> On Tue, Apr 30, 2019 at 08:51:08PM +0530, Shashank Sharma wrote: >>>>>> ICL introduces a new gamma correction mode in display engine, called >>>>>> multi-segmented-gamma mode. This mode allows users to program the >>>>>> darker region of the gamma curve with sueprfine precision. An >>>>>> example use case for this is HDR curves (like PQ ST-2084). >>>>>> >>>>>> If we plot a gamma correction curve from value range between 0.0 to 1.0, >>>>>> ICL's multi-segment has 3 different sections: >>>>>> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256) >>>>>> - fine segment: 257 values, ranges between 0 - 1/(128) >>>>>> - corase segment: 257 values, ranges between 0 - 1 >>>>>> >>>>>> This patch: >>>>>> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256), >>>>>> so that userspace can program with highest precision supported. >>>>>> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode. >>>>>> - Adds functions to program/detect multi-segment gamma. >>>>>> >>>>>> V2: Addressed review comments from Ville >>>>>> - separate function for superfine and fine segments. >>>>>> - remove enum for segments. >>>>>> - reuse last entry of the LUT as gc_max value. >>>>>> - replace if() ....cond with switch...case in icl_load_luts. >>>>>> - add an entry variable, instead of 'word' >>>>>> >>>>>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> >>>>>> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> >>>>>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> >>>>>> >>>>>> Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com> >>>>>> Signed-off-by: Shashank Sharma <shashank.sharma@intel.com> >>>>>> Signed-off-by: Uma Shankar <uma.shankar@intel.com> >>>>>> --- >>>>>> drivers/gpu/drm/i915/i915_pci.c | 3 +- >>>>>> drivers/gpu/drm/i915/intel_color.c | 125 ++++++++++++++++++++++++++++- >>>>>> 2 files changed, 123 insertions(+), 5 deletions(-) >>>>>> >>>>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c >>>>>> index ffa2ee70a03d..83698951760b 100644 >>>>>> --- a/drivers/gpu/drm/i915/i915_pci.c >>>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c >>>>>> @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = { >>>>>> GEN(11), \ >>>>>> .ddb_size = 2048, \ >>>>>> .has_logical_ring_elsq = 1, \ >>>>>> - .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 } >>>>>> + .color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 } >>>>> Ugh. Thats one big LUT. But looks correct. >>>>> >>>>>> + >>>>> Bogus newline. >>>> Got it. >>>>>> >>>>>> static const struct intel_device_info intel_icelake_11_info = { >>>>>> GEN11_FEATURES, >>>>>> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c >>>>>> index 6c341bea514c..49831e8d02fb 100644 >>>>>> --- a/drivers/gpu/drm/i915/intel_color.c >>>>>> +++ b/drivers/gpu/drm/i915/intel_color.c >>>>>> @@ -41,6 +41,9 @@ >>>>>> #define CTM_COEFF_ABS(coeff) ((coeff) & (CTM_COEFF_SIGN - 1)) >>>>>> >>>>>> #define LEGACY_LUT_LENGTH 256 >>>>>> +#define ICL_GAMMA_MULTISEG_LUT_LENGTH (256 * 128 * 8) >>>>>> +#define ICL_GAMMA_SUPERFINE_SEG_LENGTH 9 >>>>>> + >>>>>> /* >>>>>> * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point >>>>>> * format). This macro takes the coefficient we want transformed and the >>>>>> @@ -767,6 +770,113 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) >>>>>> } >>>>>> } >>>>>> >>>>>> +/* ilk+ "12.4" interpolated format (high 10 bits) */ >>>>>> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color) >>>>>> +{ >>>>>> + return (color->red >> 6) << 20 | (color->green >> 6) << 10 | >>>>>> + (color->blue >> 6); >>>>>> +} >>>>>> + >>>>>> +/* ilk+ "12.4" interpolated format (low 6 bits) */ >>>>>> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color) >>>>>> +{ >>>>>> + return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 | >>>>>> + (color->blue & 0x3f); >>>>>> +} >>>>>> + >>>>>> +static void >>>>>> +icl_load_gcmax(const struct intel_crtc_state *crtc_state, >>>>>> + const struct drm_color_lut *entry) >>>>> Indentation looks off. Also s/entry/color/ to match the other similarish >>>>> funcs maybe? >>>> Sure. >>>>>> +{ >>>>>> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >>>>>> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); >>>>>> + enum pipe pipe = crtc->pipe; >>>>>> + >>>>>> + /* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */ >>>>>> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), entry->red); >>>>>> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), entry->green); >>>>>> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), entry->blue); >>>>>> +} >>>>>> + >>>>>> +static void >>>>>> +icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) >>>>>> +{ >>>>>> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >>>>>> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); >>>>>> + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; >>>>>> + const struct drm_color_lut *lut = blob->data; >>>>>> + enum pipe pipe = crtc->pipe; >>>>>> + u32 i; >>>>>> + >>>>>> + if (!lut || drm_color_lut_size(blob) < ICL_GAMMA_SUPERFINE_SEG_LENGTH) >>>>>> + return; >>>>> These checks aren't needed. Just dead code. >>>> Will remove this and similars. >>>>>> + >>>>>> + /* >>>>>> + * Every entry in the multi-segment LUT is corresponding to a superfine >>>>>> + * segment step which is 1/(8 * 128 * 256). >>>>>> + * >>>>>> + * Superfine segment has 9 entries, corresponding to values >>>>>> + * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256). >>>>>> + */ >>>>>> + I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); >>>>>> + >>>>>> + for (i = 0; i < 9; i++) { >>>>>> + const struct drm_color_lut *entry = &lut[i]; >>>>>> + >>>>>> + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), >>>>>> + ilk_lut_12p4_udw(entry)); >>>>> ldw should come before udw. >>>> Got it. >>>>>> + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), >>>>>> + ilk_lut_12p4_ldw(entry)); >>>>>> + } >>>>>> +} >>>>>> + >>>>>> +static void >>>>>> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) >>>>>> +{ >>>>>> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); >>>>>> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); >>>>>> + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; >>>>>> + const struct drm_color_lut *lut = blob->data; >>>>>> + const struct drm_color_lut *entry; >>>>> 'entry' declaration can be moved into the loops. >>>> Its being used in multiple loops, >>> IMO still better to move into the loops. We don't want to pass any >>> information between the loops. >>> >>>> also being used for GCMax outside the >>>> loop. >>> Hmm. That might be an arguemnt for keeping it out. But the current >>> gcmax usage looks broken. You're programming the same value into >>> the last PAL_PREC index and GCMAX. >> Isn't this what you wanted ? IIRC, your recommendation was to program >> the highest values user wants to program in GCMAX reg. We also had a >> discussion on how user cant program 1.0, as we have LUT depth on 16 bit >> only, and we decided to use the last value of the LUT as GCMAX. Did I >> misunderstand anything there ? > It should be: > PAL_PREC[max] = lut[size-2]; > GCMAX = lut[size-1]; > > Oh, we also need to increase the LUT size by one to make that > work correctly. Yep, that's why I was not so sure about this, and thought programming a 1.0 is a better idea. So now the new size would be 257 * 256 * 128 = 263,168 entries.I will do changes and send V3. Regards Shashank >
On Mon, May 06, 2019 at 06:44:43PM +0530, Sharma, Shashank wrote: > Regards > > Shashank > > On 5/6/2019 6:41 PM, Ville Syrjälä wrote: > > On Mon, May 06, 2019 at 06:25:19PM +0530, Sharma, Shashank wrote: > >> On 5/6/2019 5:55 PM, Ville Syrjälä wrote: > >>> On Mon, May 06, 2019 at 04:09:33PM +0530, Sharma, Shashank wrote: > >>>> Regards > >>>> > >>>> Shashank > >>>> > >>>> On 5/3/2019 9:20 PM, Ville Syrjälä wrote: > >>>>> On Tue, Apr 30, 2019 at 08:51:08PM +0530, Shashank Sharma wrote: > >>>>>> ICL introduces a new gamma correction mode in display engine, called > >>>>>> multi-segmented-gamma mode. This mode allows users to program the > >>>>>> darker region of the gamma curve with sueprfine precision. An > >>>>>> example use case for this is HDR curves (like PQ ST-2084). > >>>>>> > >>>>>> If we plot a gamma correction curve from value range between 0.0 to 1.0, > >>>>>> ICL's multi-segment has 3 different sections: > >>>>>> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256) > >>>>>> - fine segment: 257 values, ranges between 0 - 1/(128) > >>>>>> - corase segment: 257 values, ranges between 0 - 1 > >>>>>> > >>>>>> This patch: > >>>>>> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256), > >>>>>> so that userspace can program with highest precision supported. > >>>>>> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode. > >>>>>> - Adds functions to program/detect multi-segment gamma. > >>>>>> > >>>>>> V2: Addressed review comments from Ville > >>>>>> - separate function for superfine and fine segments. > >>>>>> - remove enum for segments. > >>>>>> - reuse last entry of the LUT as gc_max value. > >>>>>> - replace if() ....cond with switch...case in icl_load_luts. > >>>>>> - add an entry variable, instead of 'word' > >>>>>> > >>>>>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> > >>>>>> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> > >>>>>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> > >>>>>> > >>>>>> Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com> > >>>>>> Signed-off-by: Shashank Sharma <shashank.sharma@intel.com> > >>>>>> Signed-off-by: Uma Shankar <uma.shankar@intel.com> > >>>>>> --- > >>>>>> drivers/gpu/drm/i915/i915_pci.c | 3 +- > >>>>>> drivers/gpu/drm/i915/intel_color.c | 125 ++++++++++++++++++++++++++++- > >>>>>> 2 files changed, 123 insertions(+), 5 deletions(-) > >>>>>> > >>>>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > >>>>>> index ffa2ee70a03d..83698951760b 100644 > >>>>>> --- a/drivers/gpu/drm/i915/i915_pci.c > >>>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c > >>>>>> @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = { > >>>>>> GEN(11), \ > >>>>>> .ddb_size = 2048, \ > >>>>>> .has_logical_ring_elsq = 1, \ > >>>>>> - .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 } > >>>>>> + .color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 } > >>>>> Ugh. Thats one big LUT. But looks correct. > >>>>> > >>>>>> + > >>>>> Bogus newline. > >>>> Got it. > >>>>>> > >>>>>> static const struct intel_device_info intel_icelake_11_info = { > >>>>>> GEN11_FEATURES, > >>>>>> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c > >>>>>> index 6c341bea514c..49831e8d02fb 100644 > >>>>>> --- a/drivers/gpu/drm/i915/intel_color.c > >>>>>> +++ b/drivers/gpu/drm/i915/intel_color.c > >>>>>> @@ -41,6 +41,9 @@ > >>>>>> #define CTM_COEFF_ABS(coeff) ((coeff) & (CTM_COEFF_SIGN - 1)) > >>>>>> > >>>>>> #define LEGACY_LUT_LENGTH 256 > >>>>>> +#define ICL_GAMMA_MULTISEG_LUT_LENGTH (256 * 128 * 8) > >>>>>> +#define ICL_GAMMA_SUPERFINE_SEG_LENGTH 9 > >>>>>> + > >>>>>> /* > >>>>>> * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point > >>>>>> * format). This macro takes the coefficient we want transformed and the > >>>>>> @@ -767,6 +770,113 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) > >>>>>> } > >>>>>> } > >>>>>> > >>>>>> +/* ilk+ "12.4" interpolated format (high 10 bits) */ > >>>>>> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color) > >>>>>> +{ > >>>>>> + return (color->red >> 6) << 20 | (color->green >> 6) << 10 | > >>>>>> + (color->blue >> 6); > >>>>>> +} > >>>>>> + > >>>>>> +/* ilk+ "12.4" interpolated format (low 6 bits) */ > >>>>>> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color) > >>>>>> +{ > >>>>>> + return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 | > >>>>>> + (color->blue & 0x3f); > >>>>>> +} > >>>>>> + > >>>>>> +static void > >>>>>> +icl_load_gcmax(const struct intel_crtc_state *crtc_state, > >>>>>> + const struct drm_color_lut *entry) > >>>>> Indentation looks off. Also s/entry/color/ to match the other similarish > >>>>> funcs maybe? > >>>> Sure. > >>>>>> +{ > >>>>>> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > >>>>>> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > >>>>>> + enum pipe pipe = crtc->pipe; > >>>>>> + > >>>>>> + /* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */ > >>>>>> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), entry->red); > >>>>>> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), entry->green); > >>>>>> + I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), entry->blue); > >>>>>> +} > >>>>>> + > >>>>>> +static void > >>>>>> +icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) > >>>>>> +{ > >>>>>> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > >>>>>> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > >>>>>> + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; > >>>>>> + const struct drm_color_lut *lut = blob->data; > >>>>>> + enum pipe pipe = crtc->pipe; > >>>>>> + u32 i; > >>>>>> + > >>>>>> + if (!lut || drm_color_lut_size(blob) < ICL_GAMMA_SUPERFINE_SEG_LENGTH) > >>>>>> + return; > >>>>> These checks aren't needed. Just dead code. > >>>> Will remove this and similars. > >>>>>> + > >>>>>> + /* > >>>>>> + * Every entry in the multi-segment LUT is corresponding to a superfine > >>>>>> + * segment step which is 1/(8 * 128 * 256). > >>>>>> + * > >>>>>> + * Superfine segment has 9 entries, corresponding to values > >>>>>> + * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256). > >>>>>> + */ > >>>>>> + I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); > >>>>>> + > >>>>>> + for (i = 0; i < 9; i++) { > >>>>>> + const struct drm_color_lut *entry = &lut[i]; > >>>>>> + > >>>>>> + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), > >>>>>> + ilk_lut_12p4_udw(entry)); > >>>>> ldw should come before udw. > >>>> Got it. > >>>>>> + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), > >>>>>> + ilk_lut_12p4_ldw(entry)); > >>>>>> + } > >>>>>> +} > >>>>>> + > >>>>>> +static void > >>>>>> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) > >>>>>> +{ > >>>>>> + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); > >>>>>> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); > >>>>>> + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; > >>>>>> + const struct drm_color_lut *lut = blob->data; > >>>>>> + const struct drm_color_lut *entry; > >>>>> 'entry' declaration can be moved into the loops. > >>>> Its being used in multiple loops, > >>> IMO still better to move into the loops. We don't want to pass any > >>> information between the loops. > >>> > >>>> also being used for GCMax outside the > >>>> loop. > >>> Hmm. That might be an arguemnt for keeping it out. But the current > >>> gcmax usage looks broken. You're programming the same value into > >>> the last PAL_PREC index and GCMAX. > >> Isn't this what you wanted ? IIRC, your recommendation was to program > >> the highest values user wants to program in GCMAX reg. We also had a > >> discussion on how user cant program 1.0, as we have LUT depth on 16 bit > >> only, and we decided to use the last value of the LUT as GCMAX. Did I > >> misunderstand anything there ? > > It should be: > > PAL_PREC[max] = lut[size-2]; > > GCMAX = lut[size-1]; > > > > Oh, we also need to increase the LUT size by one to make that > > work correctly. > > Yep, that's why I was not so sure about this, and thought programming a > 1.0 is a better idea. > > So now the new size would be 257 * 256 * 128 = 263,168 entries.I will do > changes and send V3. Should be just 256*128*8+1=262145
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ffa2ee70a03d..83698951760b 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = { GEN(11), \ .ddb_size = 2048, \ .has_logical_ring_elsq = 1, \ - .color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 } + .color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 } + static const struct intel_device_info intel_icelake_11_info = { GEN11_FEATURES, diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 6c341bea514c..49831e8d02fb 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -41,6 +41,9 @@ #define CTM_COEFF_ABS(coeff) ((coeff) & (CTM_COEFF_SIGN - 1)) #define LEGACY_LUT_LENGTH 256 +#define ICL_GAMMA_MULTISEG_LUT_LENGTH (256 * 128 * 8) +#define ICL_GAMMA_SUPERFINE_SEG_LENGTH 9 + /* * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point * format). This macro takes the coefficient we want transformed and the @@ -767,6 +770,113 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state) } } +/* ilk+ "12.4" interpolated format (high 10 bits) */ +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color) +{ + return (color->red >> 6) << 20 | (color->green >> 6) << 10 | + (color->blue >> 6); +} + +/* ilk+ "12.4" interpolated format (low 6 bits) */ +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color) +{ + return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 | + (color->blue & 0x3f); +} + +static void +icl_load_gcmax(const struct intel_crtc_state *crtc_state, + const struct drm_color_lut *entry) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + + /* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */ + I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), entry->red); + I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), entry->green); + I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), entry->blue); +} + +static void +icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; + const struct drm_color_lut *lut = blob->data; + enum pipe pipe = crtc->pipe; + u32 i; + + if (!lut || drm_color_lut_size(blob) < ICL_GAMMA_SUPERFINE_SEG_LENGTH) + return; + + /* + * Every entry in the multi-segment LUT is corresponding to a superfine + * segment step which is 1/(8 * 128 * 256). + * + * Superfine segment has 9 entries, corresponding to values + * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256). + */ + I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); + + for (i = 0; i < 9; i++) { + const struct drm_color_lut *entry = &lut[i]; + + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), + ilk_lut_12p4_udw(entry)); + I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe), + ilk_lut_12p4_ldw(entry)); + } +} + +static void +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct drm_property_blob *blob = crtc_state->base.gamma_lut; + const struct drm_color_lut *lut = blob->data; + const struct drm_color_lut *entry; + enum pipe pipe = crtc->pipe; + u32 i; + + if (!lut || (drm_color_lut_size(blob) < ICL_GAMMA_MULTISEG_LUT_LENGTH)) + return; + + /* + * Every entry in the multi-segment LUT is corresponding to a superfine + * segment step which is 1/(8*128*256). + * + * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256), 2/(128*256) + * ... 256/(128*256). So in order to program fine segment of LUT we + * need to pick every 8'th entry in LUT, and program 256 indexes. + * Fine segment's index 0 is programmed in HW, and it starts from + * index 1. + */ + I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_AUTO_INCREMENT); + for (i = 1; i < 257; i++) { + entry = &lut[i * 8]; + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); + } + + /* + * Coarse segment's starts from index 0 and it's step is 1/256 ie 0, + * 1/256, 2/256 ...256/256. As per the description of each entry in LUT + * above, we need to pick every 8 * 128 = 1024th entry in LUT, and + * program 256 of those. + */ + for (i = 0; i < 256; i++) { + entry = &lut[i * 1024]; + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_udw(entry)); + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_12p4_ldw(entry)); + } + + icl_load_gcmax(crtc_state, entry); + ivb_load_lut_ext_max(crtc); +} + static void icl_load_luts(const struct intel_crtc_state *crtc_state) { const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut; @@ -775,10 +885,17 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state) if (crtc_state->base.degamma_lut) glk_load_degamma_lut(crtc_state); - if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) == - GAMMA_MODE_MODE_8BIT) { + switch (crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) { + case GAMMA_MODE_MODE_8BIT: i9xx_load_luts(crtc_state); - } else { + break; + + case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED: + icl_program_gamma_superfine_segment(crtc_state); + icl_program_gamma_multi_segment(crtc_state); + break; + + default: bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0)); ivb_load_lut_ext_max(crtc); } @@ -1209,7 +1326,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state) crtc_state_is_legacy_gamma(crtc_state)) gamma_mode |= GAMMA_MODE_MODE_8BIT; else - gamma_mode |= GAMMA_MODE_MODE_10BIT; + gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED; return gamma_mode; }