diff mbox series

[3/3] drm/i915/icl: Add Multi-segmented gamma support

Message ID 20190426180151.30036-3-shashank.sharma@intel.com (mailing list archive)
State New, archived
Headers show
Series [1/3] drm/i915: Change gamma/degamma_lut_size data type to u32 | expand

Commit Message

Sharma, Shashank April 26, 2019, 6:01 p.m. UTC
ICL introduces a new gamma correction mode in display engine, called
multi-segmented-gamma mode. This mode allows users to program the
darker region of the gamma curve with sueprfine precision. An
example use case for this is HDR curves (like PQ ST-2084).

If we plot a gamma correction curve from value range between 0.0 to 1.0,
ICL's multi-segment has 3 different sections:
- superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
- fine segment: 257 values, ranges between 0 - 1/(128)
- corase segment: 257 values, ranges between 0 - 1

This patch:
- Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
  so that userspace can program with highest precision supported.
- Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
- Adds functions to program/detect multi-segment gamma.

Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>

Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Shashank Sharma <shashank.sharma@intel.com>
Signed-off-by: Uma Shankar <uma.shankar@intel.com>
---
 drivers/gpu/drm/i915/i915_pci.c    |   3 +-
 drivers/gpu/drm/i915/intel_color.c | 155 ++++++++++++++++++++++++++++-
 2 files changed, 156 insertions(+), 2 deletions(-)

Comments

Ville Syrjala April 26, 2019, 6:29 p.m. UTC | #1
On Fri, Apr 26, 2019 at 11:31:51PM +0530, Shashank Sharma wrote:
> ICL introduces a new gamma correction mode in display engine, called
> multi-segmented-gamma mode. This mode allows users to program the
> darker region of the gamma curve with sueprfine precision. An
> example use case for this is HDR curves (like PQ ST-2084).
> 
> If we plot a gamma correction curve from value range between 0.0 to 1.0,
> ICL's multi-segment has 3 different sections:
> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
> - fine segment: 257 values, ranges between 0 - 1/(128)
> - corase segment: 257 values, ranges between 0 - 1
> 
> This patch:
> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
>   so that userspace can program with highest precision supported.
> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
> - Adds functions to program/detect multi-segment gamma.
> 
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@intel.com>
> Signed-off-by: Uma Shankar <uma.shankar@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_pci.c    |   3 +-
>  drivers/gpu/drm/i915/intel_color.c | 155 ++++++++++++++++++++++++++++-
>  2 files changed, 156 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index ffa2ee70a03d..83698951760b 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>  	GEN(11), \
>  	.ddb_size = 2048, \
>  	.has_logical_ring_elsq = 1, \
> -	.color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
> +	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 }
> +
>  
>  static const struct intel_device_info intel_icelake_11_info = {
>  	GEN11_FEATURES,
> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
> index ca341a9e47e6..d1fb79a5d764 100644
> --- a/drivers/gpu/drm/i915/intel_color.c
> +++ b/drivers/gpu/drm/i915/intel_color.c
> @@ -41,6 +41,7 @@
>  #define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
>  
>  #define LEGACY_LUT_LENGTH		256
> +#define ICL_MULTISEG_LUT_LENGTH		(256 * 128 * 8)
>  /*
>   * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
>   * format). This macro takes the coefficient we want transformed and the
> @@ -58,6 +59,12 @@
>  
>  #define ILK_CSC_POSTOFF_LIMITED_RANGE (16 * (1 << 12) / 255)
>  
> +enum icl_ms_gamma_segments {
> +	ICL_MS_GAMMA_SEG_SUPERFINE,
> +	ICL_MS_GAMMA_SEG_FINE,
> +	ICL_MS_GAMMA_SEG_COARSE,
> +};
> +
>  static const u16 ilk_csc_off_zero[3] = {};
>  
>  static const u16 ilk_csc_coeff_identity[9] = {
> @@ -767,6 +774,149 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
>  	}
>  }
>  
> +/* ilk+ "12.4" interpolated format (high 10 bits) */
> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
> +{
> +	return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
> +		(color->blue >> 6);
> +}
> +
> +/* ilk+ "12.4" interpolated format (low 6 bits) */
> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
> +{
> +	return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
> +		(color->blue & 0x3f);
> +}
> +
> +static void
> +icl_program_gamma_gcmax(const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> +	enum pipe pipe = crtc->pipe;
> +
> +	/*
> +	 * Program the max register to clamp values > 1.0.
> +	 * ToDo: Extend the ABI to be able to program values
> +	 * from 1.0
> +	 */
> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), (1 << 16));
> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16));
> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16));

This one I think we want to program based on the provide LUT. It's the
last entry that still gets used in interpolation for <1.0 values.
Or at least that's the way it works with the 12p4 mode IIRC. I don't
actually remember how it goes with the multi segment mode.

> +
> +	/*
> +	 * Program the max register to clamp values > 1.0.
> +	 * ToDo: Extend the ABI to be able to program values
> +	 * from 1.0 to 3.0
> +	 */
> +	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 0), (1 << 16));
> +	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 1), (1 << 16));
> +	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 2), (1 << 16));
> +
> +	/*
> +	 * Program the gc max 2 register to clamp values > 1.0.
> +	 * ToDo: Extend the ABI to be able to program values
> +	 * from 3.0 to 7.0
> +	 */
> +	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 0), (1 << 16));
> +	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 1), (1 << 16));
> +	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 2), (1 << 16));

And these are just ivb_load_lut_10_max(). Might need to just
s/10/ext/ in the name since I guess they apply to the 12p4 and
multi segmment modes too.

> +}
> +
> +static void
> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state,
> +				const struct drm_property_blob *blob,
> +				enum icl_ms_gamma_segments segment)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> +	const struct drm_color_lut *lut = blob->data;
> +	enum pipe pipe = crtc->pipe;
> +	u32 i, word, seg_size, seg_step, seg_start = 0;
> +	i915_reg_t index_reg, data_reg;
> +
> +	if (!lut || (drm_color_lut_size(blob) < ICL_MULTISEG_LUT_LENGTH)) {
> +		DRM_DEBUG_KMS("Not programming gamma segment, invalid input\n");
> +		return;
> +	}
> +
> +	/*
> +	 * Every entry in the multi-segment LUT is corresponding to a superfine
> +	 * segment step which is 1/(8*128*256).
> +	 *
> +	 * Superfine segment has 9 entries, corresponding to values
> +	 * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
> +	 *
> +	 * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256),  2/(128*256)
> +	 * ... 256/(128*256). So in order to program fine segment of LUT we
> +	 * need to pick every 8'th entry in LUT, and program 256 indexes.
> +	 * Fine segment's index 0 is programmed in HW, and it starts from
> +	 * index 1.
> +	 *
> +	 * Coarse segment's starts from index 0 and it's step is 1/256 ie 0,
> +	 * 1/256, 2/256 ...256/256. As per the description of each entry in LUT
> +	 * above, we need to pick every 8 * 128 = 1024th entry in LUT, and
> +	 * program 256 of those.
> +	 */
> +
> +	switch (segment) {
> +	case ICL_MS_GAMMA_SEG_SUPERFINE:
> +		seg_size = 9;
> +		seg_step = 1;
> +		index_reg = PREC_PAL_MULTI_SEG_INDEX(pipe);
> +		data_reg = PREC_PAL_MULTI_SEG_DATA(pipe);
> +		break;
> +
> +	case ICL_MS_GAMMA_SEG_FINE:
> +		seg_start = 1;
> +		seg_size = 256;
> +		seg_step = 8;
> +		index_reg = PREC_PAL_INDEX(pipe);
> +		data_reg = PREC_PAL_DATA(pipe);
> +		break;
> +
> +	case ICL_MS_GAMMA_SEG_COARSE:
> +		seg_size = 256;
> +		seg_step = 1024;
> +		index_reg = PREC_PAL_INDEX(pipe);
> +		data_reg = PREC_PAL_DATA(pipe);
> +		break;

I'm not convinced this this enum is actually helpful. I would
probably just try to provide separate functions for the segments.
Not sure if we could share the same code for the fine/coarse
segments and just pass in the differences as parameters.

> +
> +	default:
> +		DRM_ERROR("Invalid gamma segment %d to program\n", segment);
> +		return;
> +	}
> +
> +	I915_WRITE(index_reg, PAL_PREC_AUTO_INCREMENT);
> +
> +	for (i = seg_start; i < seg_size; i++) {
> +		/* Even Index */

These comments don't really provide any value.

> +		word = ilk_lut_12p4_udw(&lut[i * seg_step]);

and the 'word' variable isn't particularly useful either.
We might want a local varianble for the lut entry like we
have in the ivb/bdw functions, if only to avoid repeating the
arithmetic.

> +		I915_WRITE(data_reg, word);
> +
> +		/* Odd index */
> +		word = ilk_lut_12p4_ldw(&lut[i * seg_step]);
> +		I915_WRITE(data_reg, word);
> +	}
> +}
> +
> +static void
> +icl_load_gamma_multi_seg_lut(const struct intel_crtc_state *crtc_state)
> +{
> +	const struct drm_property_blob *lut_blob = crtc_state->base.gamma_lut;
> +
> +	if (!lut_blob)
> +		return;
> +
> +	icl_program_gamma_multi_segment(crtc_state, lut_blob,
> +					ICL_MS_GAMMA_SEG_SUPERFINE);
> +	icl_program_gamma_multi_segment(crtc_state, lut_blob,
> +					ICL_MS_GAMMA_SEG_FINE);
> +	icl_program_gamma_multi_segment(crtc_state, lut_blob,
> +					ICL_MS_GAMMA_SEG_COARSE);
> +	icl_program_gamma_gcmax(crtc_state);
> +}

And this I would just inline into icl_load_luts() just like the rest.

> +
>  static void icl_load_luts(const struct intel_crtc_state *crtc_state)
>  {
>  	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
> @@ -776,6 +926,9 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state)
>  		glk_load_degamma_lut(crtc_state);
>  
>  	if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
> +		GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED) {
> +		icl_load_gamma_multi_seg_lut(crtc_state);

I'd put this last to keep things in consistent order. And probably want
to use a switch statement now that we have three options.

> +	} else if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
>  	    GAMMA_MODE_MODE_8BIT) {
>  		i9xx_load_luts(crtc_state);
>  	} else {
> @@ -1187,7 +1340,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state)
>  	    crtc_state_is_legacy_gamma(crtc_state))
>  		gamma_mode |= GAMMA_MODE_MODE_8BIT;
>  	else
> -		gamma_mode |= GAMMA_MODE_MODE_10BIT;
> +		gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED;
>  
>  	return gamma_mode;
>  }
> -- 
> 2.17.1
Sharma, Shashank April 29, 2019, 11:30 a.m. UTC | #2
On 4/26/2019 11:59 PM, Ville Syrjälä wrote:
> On Fri, Apr 26, 2019 at 11:31:51PM +0530, Shashank Sharma wrote:
>> ICL introduces a new gamma correction mode in display engine, called
>> multi-segmented-gamma mode. This mode allows users to program the
>> darker region of the gamma curve with sueprfine precision. An
>> example use case for this is HDR curves (like PQ ST-2084).
>>
>> If we plot a gamma correction curve from value range between 0.0 to 1.0,
>> ICL's multi-segment has 3 different sections:
>> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
>> - fine segment: 257 values, ranges between 0 - 1/(128)
>> - corase segment: 257 values, ranges between 0 - 1
>>
>> This patch:
>> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
>>    so that userspace can program with highest precision supported.
>> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
>> - Adds functions to program/detect multi-segment gamma.
>>
>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
>> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>>
>> Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma@intel.com>
>> Signed-off-by: Uma Shankar <uma.shankar@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_pci.c    |   3 +-
>>   drivers/gpu/drm/i915/intel_color.c | 155 ++++++++++++++++++++++++++++-
>>   2 files changed, 156 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>> index ffa2ee70a03d..83698951760b 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>>   	GEN(11), \
>>   	.ddb_size = 2048, \
>>   	.has_logical_ring_elsq = 1, \
>> -	.color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
>> +	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 }
>> +
>>   
>>   static const struct intel_device_info intel_icelake_11_info = {
>>   	GEN11_FEATURES,
>> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
>> index ca341a9e47e6..d1fb79a5d764 100644
>> --- a/drivers/gpu/drm/i915/intel_color.c
>> +++ b/drivers/gpu/drm/i915/intel_color.c
>> @@ -41,6 +41,7 @@
>>   #define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
>>   
>>   #define LEGACY_LUT_LENGTH		256
>> +#define ICL_MULTISEG_LUT_LENGTH		(256 * 128 * 8)
>>   /*
>>    * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
>>    * format). This macro takes the coefficient we want transformed and the
>> @@ -58,6 +59,12 @@
>>   
>>   #define ILK_CSC_POSTOFF_LIMITED_RANGE (16 * (1 << 12) / 255)
>>   
>> +enum icl_ms_gamma_segments {
>> +	ICL_MS_GAMMA_SEG_SUPERFINE,
>> +	ICL_MS_GAMMA_SEG_FINE,
>> +	ICL_MS_GAMMA_SEG_COARSE,
>> +};
>> +
>>   static const u16 ilk_csc_off_zero[3] = {};
>>   
>>   static const u16 ilk_csc_coeff_identity[9] = {
>> @@ -767,6 +774,149 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
>>   	}
>>   }
>>   
>> +/* ilk+ "12.4" interpolated format (high 10 bits) */
>> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
>> +{
>> +	return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
>> +		(color->blue >> 6);
>> +}
>> +
>> +/* ilk+ "12.4" interpolated format (low 6 bits) */
>> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
>> +{
>> +	return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
>> +		(color->blue & 0x3f);
>> +}
>> +
>> +static void
>> +icl_program_gamma_gcmax(const struct intel_crtc_state *crtc_state)
>> +{
>> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>> +	enum pipe pipe = crtc->pipe;
>> +
>> +	/*
>> +	 * Program the max register to clamp values > 1.0.
>> +	 * ToDo: Extend the ABI to be able to program values
>> +	 * from 1.0
>> +	 */
>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), (1 << 16));
>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16));
>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16));
> This one I think we want to program based on the provide LUT. It's the
> last entry that still gets used in interpolation for <1.0 values.
> Or at least that's the way it works with the 12p4 mode IIRC. I don't
> actually remember how it goes with the multi segment mode.
I am a bit skeptical if its a good idea to program from the LUT, as you 
know, the drm_color_lut elements are 16bit per channel, and in order to 
program 1.0 we need 17 bits. So even if userspace wants, it cant set the 
value of 1.0 as of now. Also, wouldn't it would be good for the 
interpolation to have the accurate max value (1.0) of the curve ?
>> +
>> +	/*
>> +	 * Program the max register to clamp values > 1.0.
>> +	 * ToDo: Extend the ABI to be able to program values
>> +	 * from 1.0 to 3.0
>> +	 */
>> +	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 0), (1 << 16));
>> +	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 1), (1 << 16));
>> +	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 2), (1 << 16));
>> +
>> +	/*
>> +	 * Program the gc max 2 register to clamp values > 1.0.
>> +	 * ToDo: Extend the ABI to be able to program values
>> +	 * from 3.0 to 7.0
>> +	 */
>> +	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 0), (1 << 16));
>> +	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 1), (1 << 16));
>> +	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 2), (1 << 16));
> And these are just ivb_load_lut_10_max(). Might need to just
> s/10/ext/ in the name since I guess they apply to the 12p4 and
> multi segmment modes too.
Got it !
>
>> +}
>> +
>> +static void
>> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state,
>> +				const struct drm_property_blob *blob,
>> +				enum icl_ms_gamma_segments segment)
>> +{
>> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>> +	const struct drm_color_lut *lut = blob->data;
>> +	enum pipe pipe = crtc->pipe;
>> +	u32 i, word, seg_size, seg_step, seg_start = 0;
>> +	i915_reg_t index_reg, data_reg;
>> +
>> +	if (!lut || (drm_color_lut_size(blob) < ICL_MULTISEG_LUT_LENGTH)) {
>> +		DRM_DEBUG_KMS("Not programming gamma segment, invalid input\n");
>> +		return;
>> +	}
>> +
>> +	/*
>> +	 * Every entry in the multi-segment LUT is corresponding to a superfine
>> +	 * segment step which is 1/(8*128*256).
>> +	 *
>> +	 * Superfine segment has 9 entries, corresponding to values
>> +	 * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
>> +	 *
>> +	 * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256),  2/(128*256)
>> +	 * ... 256/(128*256). So in order to program fine segment of LUT we
>> +	 * need to pick every 8'th entry in LUT, and program 256 indexes.
>> +	 * Fine segment's index 0 is programmed in HW, and it starts from
>> +	 * index 1.
>> +	 *
>> +	 * Coarse segment's starts from index 0 and it's step is 1/256 ie 0,
>> +	 * 1/256, 2/256 ...256/256. As per the description of each entry in LUT
>> +	 * above, we need to pick every 8 * 128 = 1024th entry in LUT, and
>> +	 * program 256 of those.
>> +	 */
>> +
>> +	switch (segment) {
>> +	case ICL_MS_GAMMA_SEG_SUPERFINE:
>> +		seg_size = 9;
>> +		seg_step = 1;
>> +		index_reg = PREC_PAL_MULTI_SEG_INDEX(pipe);
>> +		data_reg = PREC_PAL_MULTI_SEG_DATA(pipe);
>> +		break;
>> +
>> +	case ICL_MS_GAMMA_SEG_FINE:
>> +		seg_start = 1;
>> +		seg_size = 256;
>> +		seg_step = 8;
>> +		index_reg = PREC_PAL_INDEX(pipe);
>> +		data_reg = PREC_PAL_DATA(pipe);
>> +		break;
>> +
>> +	case ICL_MS_GAMMA_SEG_COARSE:
>> +		seg_size = 256;
>> +		seg_step = 1024;
>> +		index_reg = PREC_PAL_INDEX(pipe);
>> +		data_reg = PREC_PAL_DATA(pipe);
>> +		break;
> I'm not convinced this this enum is actually helpful. I would
> probably just try to provide separate functions for the segments.
> Not sure if we could share the same code for the fine/coarse
> segments and just pass in the differences as parameters.
Ah, I started just like that, having two different functions, one for 
fine + coarse segment, other one for superfile segment, and then I saw 
much of code duplication, so I changed it this way. But if you think it 
would have been better that way, I can switch to the previous version.
>> +
>> +	default:
>> +		DRM_ERROR("Invalid gamma segment %d to program\n", segment);
>> +		return;
>> +	}
>> +
>> +	I915_WRITE(index_reg, PAL_PREC_AUTO_INCREMENT);
>> +
>> +	for (i = seg_start; i < seg_size; i++) {
>> +		/* Even Index */
> These comments don't really provide any value.
Ok
>
>> +		word = ilk_lut_12p4_udw(&lut[i * seg_step]);
> and the 'word' variable isn't particularly useful either.
> We might want a local varianble for the lut entry like we
> have in the ivb/bdw functions, if only to avoid repeating the
> arithmetic.
Sure, will do that.
>> +		I915_WRITE(data_reg, word);
>> +
>> +		/* Odd index */
>> +		word = ilk_lut_12p4_ldw(&lut[i * seg_step]);
>> +		I915_WRITE(data_reg, word);
>> +	}
>> +}
>> +
>> +static void
>> +icl_load_gamma_multi_seg_lut(const struct intel_crtc_state *crtc_state)
>> +{
>> +	const struct drm_property_blob *lut_blob = crtc_state->base.gamma_lut;
>> +
>> +	if (!lut_blob)
>> +		return;
>> +
>> +	icl_program_gamma_multi_segment(crtc_state, lut_blob,
>> +					ICL_MS_GAMMA_SEG_SUPERFINE);
>> +	icl_program_gamma_multi_segment(crtc_state, lut_blob,
>> +					ICL_MS_GAMMA_SEG_FINE);
>> +	icl_program_gamma_multi_segment(crtc_state, lut_blob,
>> +					ICL_MS_GAMMA_SEG_COARSE);
>> +	icl_program_gamma_gcmax(crtc_state);
>> +}
> And this I would just inline into icl_load_luts() just like the rest.
Got it !
>> +
>>   static void icl_load_luts(const struct intel_crtc_state *crtc_state)
>>   {
>>   	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
>> @@ -776,6 +926,9 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state)
>>   		glk_load_degamma_lut(crtc_state);
>>   
>>   	if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
>> +		GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED) {
>> +		icl_load_gamma_multi_seg_lut(crtc_state);
> I'd put this last to keep things in consistent order. And probably want
> to use a switch statement now that we have three options.

Sure !

- Shashank

>> +	} else if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
>>   	    GAMMA_MODE_MODE_8BIT) {
>>   		i9xx_load_luts(crtc_state);
>>   	} else {
>> @@ -1187,7 +1340,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state)
>>   	    crtc_state_is_legacy_gamma(crtc_state))
>>   		gamma_mode |= GAMMA_MODE_MODE_8BIT;
>>   	else
>> -		gamma_mode |= GAMMA_MODE_MODE_10BIT;
>> +		gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED;
>>   
>>   	return gamma_mode;
>>   }
>> -- 
>> 2.17.1
Ville Syrjala April 29, 2019, 11:49 a.m. UTC | #3
On Mon, Apr 29, 2019 at 05:00:21PM +0530, Sharma, Shashank wrote:
> 
> On 4/26/2019 11:59 PM, Ville Syrjälä wrote:
> > On Fri, Apr 26, 2019 at 11:31:51PM +0530, Shashank Sharma wrote:
> >> ICL introduces a new gamma correction mode in display engine, called
> >> multi-segmented-gamma mode. This mode allows users to program the
> >> darker region of the gamma curve with sueprfine precision. An
> >> example use case for this is HDR curves (like PQ ST-2084).
> >>
> >> If we plot a gamma correction curve from value range between 0.0 to 1.0,
> >> ICL's multi-segment has 3 different sections:
> >> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
> >> - fine segment: 257 values, ranges between 0 - 1/(128)
> >> - corase segment: 257 values, ranges between 0 - 1
> >>
> >> This patch:
> >> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
> >>    so that userspace can program with highest precision supported.
> >> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
> >> - Adds functions to program/detect multi-segment gamma.
> >>
> >> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> >> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> >> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> >>
> >> Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> >> Signed-off-by: Shashank Sharma <shashank.sharma@intel.com>
> >> Signed-off-by: Uma Shankar <uma.shankar@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/i915_pci.c    |   3 +-
> >>   drivers/gpu/drm/i915/intel_color.c | 155 ++++++++++++++++++++++++++++-
> >>   2 files changed, 156 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> >> index ffa2ee70a03d..83698951760b 100644
> >> --- a/drivers/gpu/drm/i915/i915_pci.c
> >> +++ b/drivers/gpu/drm/i915/i915_pci.c
> >> @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = {
> >>   	GEN(11), \
> >>   	.ddb_size = 2048, \
> >>   	.has_logical_ring_elsq = 1, \
> >> -	.color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
> >> +	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 }
> >> +
> >>   
> >>   static const struct intel_device_info intel_icelake_11_info = {
> >>   	GEN11_FEATURES,
> >> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
> >> index ca341a9e47e6..d1fb79a5d764 100644
> >> --- a/drivers/gpu/drm/i915/intel_color.c
> >> +++ b/drivers/gpu/drm/i915/intel_color.c
> >> @@ -41,6 +41,7 @@
> >>   #define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
> >>   
> >>   #define LEGACY_LUT_LENGTH		256
> >> +#define ICL_MULTISEG_LUT_LENGTH		(256 * 128 * 8)
> >>   /*
> >>    * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
> >>    * format). This macro takes the coefficient we want transformed and the
> >> @@ -58,6 +59,12 @@
> >>   
> >>   #define ILK_CSC_POSTOFF_LIMITED_RANGE (16 * (1 << 12) / 255)
> >>   
> >> +enum icl_ms_gamma_segments {
> >> +	ICL_MS_GAMMA_SEG_SUPERFINE,
> >> +	ICL_MS_GAMMA_SEG_FINE,
> >> +	ICL_MS_GAMMA_SEG_COARSE,
> >> +};
> >> +
> >>   static const u16 ilk_csc_off_zero[3] = {};
> >>   
> >>   static const u16 ilk_csc_coeff_identity[9] = {
> >> @@ -767,6 +774,149 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
> >>   	}
> >>   }
> >>   
> >> +/* ilk+ "12.4" interpolated format (high 10 bits) */
> >> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
> >> +{
> >> +	return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
> >> +		(color->blue >> 6);
> >> +}
> >> +
> >> +/* ilk+ "12.4" interpolated format (low 6 bits) */
> >> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
> >> +{
> >> +	return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
> >> +		(color->blue & 0x3f);
> >> +}
> >> +
> >> +static void
> >> +icl_program_gamma_gcmax(const struct intel_crtc_state *crtc_state)
> >> +{
> >> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> >> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> >> +	enum pipe pipe = crtc->pipe;
> >> +
> >> +	/*
> >> +	 * Program the max register to clamp values > 1.0.
> >> +	 * ToDo: Extend the ABI to be able to program values
> >> +	 * from 1.0
> >> +	 */
> >> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), (1 << 16));
> >> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16));
> >> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16));
> > This one I think we want to program based on the provide LUT. It's the
> > last entry that still gets used in interpolation for <1.0 values.
> > Or at least that's the way it works with the 12p4 mode IIRC. I don't
> > actually remember how it goes with the multi segment mode.
> I am a bit skeptical if its a good idea to program from the LUT, as you 
> know, the drm_color_lut elements are 16bit per channel, and in order to 
> program 1.0 we need 17 bits. So even if userspace wants, it cant set the 
> value of 1.0 as of now. Also, wouldn't it would be good for the 
> interpolation to have the accurate max value (1.0) of the curve ?

If userspace doesn't want the max to be 1.0 then we will do the wrong
thing here. But I suppose that wouldn't be a very common thing to do.
The better arguemnt for doing what I suggest is that we already do it
like that on the other platforms with interpolated gamma (gen4/vlv/chv).
Sharma, Shashank April 29, 2019, 12:41 p.m. UTC | #4
On 4/29/2019 5:19 PM, Ville Syrjälä wrote:
> On Mon, Apr 29, 2019 at 05:00:21PM +0530, Sharma, Shashank wrote:
>> On 4/26/2019 11:59 PM, Ville Syrjälä wrote:
>>> On Fri, Apr 26, 2019 at 11:31:51PM +0530, Shashank Sharma wrote:
>>>> ICL introduces a new gamma correction mode in display engine, called
>>>> multi-segmented-gamma mode. This mode allows users to program the
>>>> darker region of the gamma curve with sueprfine precision. An
>>>> example use case for this is HDR curves (like PQ ST-2084).
>>>>
>>>> If we plot a gamma correction curve from value range between 0.0 to 1.0,
>>>> ICL's multi-segment has 3 different sections:
>>>> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
>>>> - fine segment: 257 values, ranges between 0 - 1/(128)
>>>> - corase segment: 257 values, ranges between 0 - 1
>>>>
>>>> This patch:
>>>> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
>>>>     so that userspace can program with highest precision supported.
>>>> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
>>>> - Adds functions to program/detect multi-segment gamma.
>>>>
>>>> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
>>>> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
>>>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>>
>>>> Suggested-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
>>>> Signed-off-by: Shashank Sharma <shashank.sharma@intel.com>
>>>> Signed-off-by: Uma Shankar <uma.shankar@intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/i915_pci.c    |   3 +-
>>>>    drivers/gpu/drm/i915/intel_color.c | 155 ++++++++++++++++++++++++++++-
>>>>    2 files changed, 156 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>>>> index ffa2ee70a03d..83698951760b 100644
>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>>> @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>>>>    	GEN(11), \
>>>>    	.ddb_size = 2048, \
>>>>    	.has_logical_ring_elsq = 1, \
>>>> -	.color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
>>>> +	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 }
>>>> +
>>>>    
>>>>    static const struct intel_device_info intel_icelake_11_info = {
>>>>    	GEN11_FEATURES,
>>>> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
>>>> index ca341a9e47e6..d1fb79a5d764 100644
>>>> --- a/drivers/gpu/drm/i915/intel_color.c
>>>> +++ b/drivers/gpu/drm/i915/intel_color.c
>>>> @@ -41,6 +41,7 @@
>>>>    #define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
>>>>    
>>>>    #define LEGACY_LUT_LENGTH		256
>>>> +#define ICL_MULTISEG_LUT_LENGTH		(256 * 128 * 8)
>>>>    /*
>>>>     * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
>>>>     * format). This macro takes the coefficient we want transformed and the
>>>> @@ -58,6 +59,12 @@
>>>>    
>>>>    #define ILK_CSC_POSTOFF_LIMITED_RANGE (16 * (1 << 12) / 255)
>>>>    
>>>> +enum icl_ms_gamma_segments {
>>>> +	ICL_MS_GAMMA_SEG_SUPERFINE,
>>>> +	ICL_MS_GAMMA_SEG_FINE,
>>>> +	ICL_MS_GAMMA_SEG_COARSE,
>>>> +};
>>>> +
>>>>    static const u16 ilk_csc_off_zero[3] = {};
>>>>    
>>>>    static const u16 ilk_csc_coeff_identity[9] = {
>>>> @@ -767,6 +774,149 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
>>>>    	}
>>>>    }
>>>>    
>>>> +/* ilk+ "12.4" interpolated format (high 10 bits) */
>>>> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
>>>> +{
>>>> +	return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
>>>> +		(color->blue >> 6);
>>>> +}
>>>> +
>>>> +/* ilk+ "12.4" interpolated format (low 6 bits) */
>>>> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
>>>> +{
>>>> +	return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
>>>> +		(color->blue & 0x3f);
>>>> +}
>>>> +
>>>> +static void
>>>> +icl_program_gamma_gcmax(const struct intel_crtc_state *crtc_state)
>>>> +{
>>>> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>>>> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>>>> +	enum pipe pipe = crtc->pipe;
>>>> +
>>>> +	/*
>>>> +	 * Program the max register to clamp values > 1.0.
>>>> +	 * ToDo: Extend the ABI to be able to program values
>>>> +	 * from 1.0
>>>> +	 */
>>>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), (1 << 16));
>>>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16));
>>>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16));
>>> This one I think we want to program based on the provide LUT. It's the
>>> last entry that still gets used in interpolation for <1.0 values.
>>> Or at least that's the way it works with the 12p4 mode IIRC. I don't
>>> actually remember how it goes with the multi segment mode.
>> I am a bit skeptical if its a good idea to program from the LUT, as you
>> know, the drm_color_lut elements are 16bit per channel, and in order to
>> program 1.0 we need 17 bits. So even if userspace wants, it cant set the
>> value of 1.0 as of now. Also, wouldn't it would be good for the
>> interpolation to have the accurate max value (1.0) of the curve ?
> If userspace doesn't want the max to be 1.0 then we will do the wrong
> thing here. But I suppose that wouldn't be a very common thing to do.
My point exactly. Most of the time userspace wants to apply a known curve.
> The better arguemnt for doing what I suggest is that we already do it
> like that on the other platforms with interpolated gamma (gen4/vlv/chv).

Ok, lets keep it the last value of the LUT then.

- Shashank
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index ffa2ee70a03d..83698951760b 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -749,7 +749,8 @@  static const struct intel_device_info intel_cannonlake_info = {
 	GEN(11), \
 	.ddb_size = 2048, \
 	.has_logical_ring_elsq = 1, \
-	.color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
+	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 }
+
 
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
index ca341a9e47e6..d1fb79a5d764 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -41,6 +41,7 @@ 
 #define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
 
 #define LEGACY_LUT_LENGTH		256
+#define ICL_MULTISEG_LUT_LENGTH		(256 * 128 * 8)
 /*
  * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
  * format). This macro takes the coefficient we want transformed and the
@@ -58,6 +59,12 @@ 
 
 #define ILK_CSC_POSTOFF_LIMITED_RANGE (16 * (1 << 12) / 255)
 
+enum icl_ms_gamma_segments {
+	ICL_MS_GAMMA_SEG_SUPERFINE,
+	ICL_MS_GAMMA_SEG_FINE,
+	ICL_MS_GAMMA_SEG_COARSE,
+};
+
 static const u16 ilk_csc_off_zero[3] = {};
 
 static const u16 ilk_csc_coeff_identity[9] = {
@@ -767,6 +774,149 @@  static void glk_load_luts(const struct intel_crtc_state *crtc_state)
 	}
 }
 
+/* ilk+ "12.4" interpolated format (high 10 bits) */
+static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
+{
+	return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
+		(color->blue >> 6);
+}
+
+/* ilk+ "12.4" interpolated format (low 6 bits) */
+static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
+{
+	return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
+		(color->blue & 0x3f);
+}
+
+static void
+icl_program_gamma_gcmax(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	enum pipe pipe = crtc->pipe;
+
+	/*
+	 * Program the max register to clamp values > 1.0.
+	 * ToDo: Extend the ABI to be able to program values
+	 * from 1.0
+	 */
+	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), (1 << 16));
+	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16));
+	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16));
+
+	/*
+	 * Program the max register to clamp values > 1.0.
+	 * ToDo: Extend the ABI to be able to program values
+	 * from 1.0 to 3.0
+	 */
+	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 0), (1 << 16));
+	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 1), (1 << 16));
+	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 2), (1 << 16));
+
+	/*
+	 * Program the gc max 2 register to clamp values > 1.0.
+	 * ToDo: Extend the ABI to be able to program values
+	 * from 3.0 to 7.0
+	 */
+	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 0), (1 << 16));
+	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 1), (1 << 16));
+	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 2), (1 << 16));
+}
+
+static void
+icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state,
+				const struct drm_property_blob *blob,
+				enum icl_ms_gamma_segments segment)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	const struct drm_color_lut *lut = blob->data;
+	enum pipe pipe = crtc->pipe;
+	u32 i, word, seg_size, seg_step, seg_start = 0;
+	i915_reg_t index_reg, data_reg;
+
+	if (!lut || (drm_color_lut_size(blob) < ICL_MULTISEG_LUT_LENGTH)) {
+		DRM_DEBUG_KMS("Not programming gamma segment, invalid input\n");
+		return;
+	}
+
+	/*
+	 * Every entry in the multi-segment LUT is corresponding to a superfine
+	 * segment step which is 1/(8*128*256).
+	 *
+	 * Superfine segment has 9 entries, corresponding to values
+	 * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
+	 *
+	 * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256),  2/(128*256)
+	 * ... 256/(128*256). So in order to program fine segment of LUT we
+	 * need to pick every 8'th entry in LUT, and program 256 indexes.
+	 * Fine segment's index 0 is programmed in HW, and it starts from
+	 * index 1.
+	 *
+	 * Coarse segment's starts from index 0 and it's step is 1/256 ie 0,
+	 * 1/256, 2/256 ...256/256. As per the description of each entry in LUT
+	 * above, we need to pick every 8 * 128 = 1024th entry in LUT, and
+	 * program 256 of those.
+	 */
+
+	switch (segment) {
+	case ICL_MS_GAMMA_SEG_SUPERFINE:
+		seg_size = 9;
+		seg_step = 1;
+		index_reg = PREC_PAL_MULTI_SEG_INDEX(pipe);
+		data_reg = PREC_PAL_MULTI_SEG_DATA(pipe);
+		break;
+
+	case ICL_MS_GAMMA_SEG_FINE:
+		seg_start = 1;
+		seg_size = 256;
+		seg_step = 8;
+		index_reg = PREC_PAL_INDEX(pipe);
+		data_reg = PREC_PAL_DATA(pipe);
+		break;
+
+	case ICL_MS_GAMMA_SEG_COARSE:
+		seg_size = 256;
+		seg_step = 1024;
+		index_reg = PREC_PAL_INDEX(pipe);
+		data_reg = PREC_PAL_DATA(pipe);
+		break;
+
+	default:
+		DRM_ERROR("Invalid gamma segment %d to program\n", segment);
+		return;
+	}
+
+	I915_WRITE(index_reg, PAL_PREC_AUTO_INCREMENT);
+
+	for (i = seg_start; i < seg_size; i++) {
+		/* Even Index */
+		word = ilk_lut_12p4_udw(&lut[i * seg_step]);
+		I915_WRITE(data_reg, word);
+
+		/* Odd index */
+		word = ilk_lut_12p4_ldw(&lut[i * seg_step]);
+		I915_WRITE(data_reg, word);
+	}
+}
+
+static void
+icl_load_gamma_multi_seg_lut(const struct intel_crtc_state *crtc_state)
+{
+	const struct drm_property_blob *lut_blob = crtc_state->base.gamma_lut;
+
+	if (!lut_blob)
+		return;
+
+	icl_program_gamma_multi_segment(crtc_state, lut_blob,
+					ICL_MS_GAMMA_SEG_SUPERFINE);
+	icl_program_gamma_multi_segment(crtc_state, lut_blob,
+					ICL_MS_GAMMA_SEG_FINE);
+	icl_program_gamma_multi_segment(crtc_state, lut_blob,
+					ICL_MS_GAMMA_SEG_COARSE);
+	icl_program_gamma_gcmax(crtc_state);
+}
+
 static void icl_load_luts(const struct intel_crtc_state *crtc_state)
 {
 	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
@@ -776,6 +926,9 @@  static void icl_load_luts(const struct intel_crtc_state *crtc_state)
 		glk_load_degamma_lut(crtc_state);
 
 	if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
+		GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED) {
+		icl_load_gamma_multi_seg_lut(crtc_state);
+	} else if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
 	    GAMMA_MODE_MODE_8BIT) {
 		i9xx_load_luts(crtc_state);
 	} else {
@@ -1187,7 +1340,7 @@  static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state)
 	    crtc_state_is_legacy_gamma(crtc_state))
 		gamma_mode |= GAMMA_MODE_MODE_8BIT;
 	else
-		gamma_mode |= GAMMA_MODE_MODE_10BIT;
+		gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED;
 
 	return gamma_mode;
 }