diff mbox

[RFC] drm/i915: Render decompression support for Gen9 and above

Message ID 1441395770-31339-1-git-send-email-vandana.kannan@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

vandana.kannan@intel.com Sept. 4, 2015, 7:42 p.m. UTC
This patch includes enabling render decompression after checking all the
requirements (format, tiling, rotation etc.). Along with this, the WAs
mentioned in BSpec Workaround page have been implemented.

This patch has been implemented on top of Nabendu/Chandra's NV12 patches.

TODO:
1. Disable stereo 3D when render decomp is enabled (bit 7:6)
2. Render decompression must not be used in VTd pass-through mode
3. Program hashing select CHICKEN_MISC1 bit 15
4. For Gen10, add support for RGB 1010102

Signed-off-by: Vandana Kannan <vandana.kannan@intel.com>
---
 drivers/gpu/drm/drm_atomic.c         |   4 +
 drivers/gpu/drm/drm_crtc.c           |  16 ++++
 drivers/gpu/drm/i915/intel_display.c | 174 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_drv.h     |   7 ++
 drivers/gpu/drm/i915/intel_sprite.c  |  35 +++++++
 include/drm/drm_crtc.h               |  11 +++
 6 files changed, 247 insertions(+)

Comments

Daniel Vetter Sept. 7, 2015, 4:35 p.m. UTC | #1
On Sat, Sep 05, 2015 at 01:12:50AM +0530, Vandana Kannan wrote:
> This patch includes enabling render decompression after checking all the
> requirements (format, tiling, rotation etc.). Along with this, the WAs
> mentioned in BSpec Workaround page have been implemented.
> 
> This patch has been implemented on top of Nabendu/Chandra's NV12 patches.
> 
> TODO:
> 1. Disable stereo 3D when render decomp is enabled (bit 7:6)
> 2. Render decompression must not be used in VTd pass-through mode
> 3. Program hashing select CHICKEN_MISC1 bit 15
> 4. For Gen10, add support for RGB 1010102
> 
> Signed-off-by: Vandana Kannan <vandana.kannan@intel.com>
> ---
>  drivers/gpu/drm/drm_atomic.c         |   4 +
>  drivers/gpu/drm/drm_crtc.c           |  16 ++++
>  drivers/gpu/drm/i915/intel_display.c | 174 +++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_drv.h     |   7 ++
>  drivers/gpu/drm/i915/intel_sprite.c  |  35 +++++++
>  include/drm/drm_crtc.h               |  11 +++
>  6 files changed, 247 insertions(+)
> 
> diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
> index 940f80b..d9004e8 100644
> --- a/drivers/gpu/drm/drm_atomic.c
> +++ b/drivers/gpu/drm/drm_atomic.c
> @@ -607,6 +607,8 @@ int drm_atomic_plane_set_property(struct drm_plane *plane,
>  		state->src_h = val;
>  	} else if (property == config->rotation_property) {
>  		state->rotation = val;
> +	} else if (property == config->compression_property) {
> +		state->compression = val;

Please use a framebuffer modifier instead. Also this needs userspace.
-Daniel

>  	} else if (plane->funcs->atomic_set_property) {
>  		return plane->funcs->atomic_set_property(plane, state,
>  				property, val);
> @@ -654,6 +656,8 @@ drm_atomic_plane_get_property(struct drm_plane *plane,
>  		*val = state->src_h;
>  	} else if (property == config->rotation_property) {
>  		*val = state->rotation;
> +	} else if (property == config->compression_property) {
> +		*val = state->compression;
>  	} else if (plane->funcs->atomic_get_property) {
>  		return plane->funcs->atomic_get_property(plane, state, property, val);
>  	} else {
> diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
> index 474f328..66f817b 100644
> --- a/drivers/gpu/drm/drm_crtc.c
> +++ b/drivers/gpu/drm/drm_crtc.c
> @@ -5778,6 +5778,22 @@ struct drm_property *drm_mode_create_rotation_property(struct drm_device *dev,
>  }
>  EXPORT_SYMBOL(drm_mode_create_rotation_property);
>  
> +struct drm_property *drm_mode_create_compression_property(
> +		struct drm_device *dev,
> +		unsigned int comp_caps)
> +{
> +	static const struct drm_prop_enum_list props[] = {
> +		{ DRM_COMP_NONE,   "Not compression capable" },
> +		{ DRM_COMP_RENDER,  "Only render decompression" },
> +	};
> +
> +	return drm_property_create_bitmask(dev, 0, "render compression",
> +			props, ARRAY_SIZE(props),
> +			comp_caps);
> +}
> +EXPORT_SYMBOL(drm_mode_create_compression_property);
> +
> +
>  /**
>   * DOC: Tile group
>   *
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 59d6736..115736f 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -3120,8 +3120,47 @@ static void skylake_update_primary_plane(struct drm_crtc *crtc,
>  
>  			hphase = 0x00010001;  /* use trip for both Y and UV */
>  			vphase = 0x00012000;  /* use trip for Y and phase 0.5 for UV */
> +
> +			/*
> +			 * On SKL-C and BXT-A,
> +			 * when the plane source pixel format is NV12,
> +			 * the CHICKEN_PIPESL_* register bit 22 must be
> +			 * set to 1
> +			 */
> +			if (IS_BROXTON(dev) || (IS_SKYLAKE(dev) &&
> +					INTEL_REVID(dev) == SKL_REVID_C0))
> +				I915_WRITE(CHICKEN_PIPESL_1(pipe),
> +					I915_READ(CHICKEN_PIPESL_1(pipe)) |
> +					HSW_FBCQ_DIS);
>  		}
>  	}
> +
> +	if (plane->state->compression) {
> +		/*
> +		 * FIXME: Check if aux_dist ad aux_stride can be taken as it
> +		 * is from userspace.
> +		 */
> +		aux_dist = fb->offsets[1];
> +		aux_stride = fb->pitches[1];
> +		/*
> +		 * For SKL and BXT,
> +		 * When the render compression is enabled with plane
> +		 * width greater than 3840 and horizontal panning,
> +		 * the stride programmed in the PLANE_STRIDE register
> +		 * must be multiple of 4.
> +		 */
> +		if (x > 3840 && x_offset != 0)
> +			stride = stride - (stride % 4);
> +		if (IS_BROXTON(dev) || (IS_SKYLAKE(dev) &&
> +					INTEL_REVID(dev) == SKL_REVID_C0))
> +			I915_WRITE(CHICKEN_PIPESL_1(pipe),
> +					I915_READ(CHICKEN_PIPESL_1(pipe)) &
> +					~HSW_FBCQ_DIS);
> +		plane_ctl |= PLANE_CTL_DECOMPRESSION_ENABLE;
> +	} else {
> +		plane_ctl &= ~PLANE_CTL_DECOMPRESSION_ENABLE;
> +	}
> +
>  	plane_offset = y_offset << 16 | x_offset;
>  
>  	I915_WRITE(PLANE_CTL(pipe, 0), plane_ctl);
> @@ -11760,6 +11799,29 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
>  		    plane_state->rotation != BIT(DRM_ROTATE_0))
>  			intel_crtc->atomic.disable_fbc = true;
>  
> +		if (INTEL_INFO(dev)->gen >= 9) {
> +			if (plane_state->compression) {
> +				ret = skl_check_compression(dev,
> +					to_intel_plane(plane),
> +					to_intel_plane_state(plane_state),
> +					intel_crtc->pipe);
> +				if (ret)
> +					return ret;
> +			}
> +		}
> +
> +		/*
> +		 * Disable FBC if render decompression has to be enabled.
> +		 * FIXME: If FBC is disabled here because render decomp
> +		 * has to be enabled, then in update_primary_plane(), if
> +		 * render decomp is disabled for some reason, we need to
> +		 * enable FBC ?
> +		 */
> +		if (IS_SKYLAKE(dev) && dev_priv->fbc.crtc == intel_crtc &&
> +				plane_state->compression) {
> +			intel_crtc->atomic.disable_fbc = true;
> +		}
> +
>  		/*
>  		 * BDW signals flip done immediately if the plane
>  		 * is disabled, even if the plane enable is already
> @@ -11778,6 +11840,16 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
>  			intel_crtc->atomic.update_sprite_watermarks |=
>  				1 << i;
>  		}
> +		if (INTEL_INFO(dev)->gen >= 9) {
> +			if (plane_state->compression) {
> +				ret = skl_check_compression(dev,
> +					to_intel_plane(plane),
> +					to_intel_plane_state(plane_state),
> +					intel_crtc->pipe);
> +				if (ret)
> +					return ret;
> +			}
> +		}
>  	}
>  	return 0;
>  }
> @@ -13495,6 +13567,88 @@ skl_max_scale(struct intel_crtc *intel_crtc,
>  	return max_scale;
>  }
>  
> +int skl_check_compression(struct drm_device *dev,
> +		struct intel_plane *intel_plane,
> +		struct intel_plane_state *plane_state,
> +		enum pipe pipe)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_rect *src = &plane_state->src;
> +	struct drm_framebuffer *fb;
> +	int i;
> +
> +	fb = intel_plane ? plane_state->base.fb : NULL;
> +
> +	/*
> +	 * TODO:
> +	 * 1. Disable stereo 3D when render decomp is enabled (bit 7:6)
> +	 * 2. Render decompression must not be used in VTd pass-through mode
> +	 * 3. Program hashing select CHICKEN_MISC1 bit 15
> +	 */
> +
> +	/*
> +	 * On SKL A and SKL B,
> +	 * Do not enable render decompression when the plane
> +	 * width is smaller than 32 pixels or greater than
> +	 * 2048 pixels
> +	 */
> +	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) < SKL_REVID_C0)
> +			&& ((src->x1 >> 16) > 2048))
> +		plane_state->base.compression = DRM_COMP_NONE;
> +
> +	if (!plane_state->base.compression)
> +		return -EINVAL;
> +
> +	/*
> +	 * On SKL:*:C and BXT:*:A, there is a possible hang with NV12 format.
> +	 * WA: When the plane source pixel format is NV12,
> +	 * the CHICKEN_PIPESL_* register bit 22 must be set to 1 and the
> +	 * render decompression must not be enabled on any of the planes in
> +	 * that pipe.
> +	 */
> +	if (IS_BROXTON(dev) || (IS_SKYLAKE(dev) &&
> +				INTEL_REVID(dev) == SKL_REVID_C0)) {
> +		struct drm_plane *drm_plane;
> +		struct drm_plane_state *state;
> +
> +		for_each_plane(dev_priv, pipe, i) {
> +			drm_plane = drm_plane_from_index(dev, i);
> +			if (drm_plane) {
> +				state = drm_plane->state;
> +				if (state) {
> +					if (state->fb &&
> +							state->fb->pixel_format
> +							== DRM_FORMAT_NV12) {
> +						plane_state->base.compression =
> +							DRM_COMP_NONE;
> +					}
> +				}
> +			}
> +		}
> +	}
> +
> +	if (!plane_state->base.compression)
> +		return -EINVAL;
> +
> +	/*
> +	 * Conditions to satisfy before enabling render decomp.
> +	 * SKL+
> +	 * Pipe A & B, Planes 1 & 2
> +	 * RGB8888 Tile-Y format
> +	 * 0/180 rotation
> +	 */
> +	if ((pipe == PIPE_C) ||
> +			((fb->pixel_format != DRM_FORMAT_XRGB8888) &&
> +			 (fb->pixel_format != DRM_FORMAT_ARGB8888)) ||
> +			intel_rotation_90_or_270(plane_state->base.rotation) ||
> +			((fb->modifier[0] == DRM_FORMAT_MOD_NONE) ||
> +			 (fb->modifier[0] == I915_FORMAT_MOD_X_TILED))) {
> +		plane_state->base.compression = DRM_COMP_NONE;
> +		return -EINVAL;
> +	}
> +	return 0;
> +}
> +
>  static int
>  intel_check_primary_plane(struct drm_plane *plane,
>  			  struct intel_crtc_state *crtc_state,
> @@ -13670,11 +13824,31 @@ static struct drm_plane *intel_primary_plane_create(struct drm_device *dev,
>  	if (INTEL_INFO(dev)->gen >= 4)
>  		intel_create_rotation_property(dev, primary);
>  
> +	intel_create_compression_property(dev, primary);
> +
>  	drm_plane_helper_add(&primary->base, &intel_plane_helper_funcs);
>  
>  	return &primary->base;
>  }
>  
> +void intel_create_compression_property(struct drm_device *dev,
> +		struct intel_plane *plane)
> +{
> +	/* Render compression is applicable only for plane 1 & 2 */
> +	if (INTEL_INFO(dev)->gen >= 9 && (plane->plane <= 1)) {
> +		if (!dev->mode_config.compression_property)
> +			dev->mode_config.compression_property =
> +				drm_mode_create_compression_property(dev,
> +						BIT(DRM_COMP_NONE) |
> +						BIT(DRM_COMP_RENDER));
> +
> +		if (dev->mode_config.compression_property)
> +			drm_object_attach_property(&plane->base.base,
> +					dev->mode_config.compression_property,
> +					plane->base.state->compression);
> +	}
> +}
> +
>  void intel_create_rotation_property(struct drm_device *dev, struct intel_plane *plane)
>  {
>  	if (!dev->mode_config.rotation_property) {
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 6cd6cb0..bae22eb 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1090,6 +1090,13 @@ intel_rotation_90_or_270(unsigned int rotation)
>  void intel_create_rotation_property(struct drm_device *dev,
>  					struct intel_plane *plane);
>  
> +void intel_create_compression_property(struct drm_device *dev,
> +					struct intel_plane *plane);
> +int skl_check_compression(struct drm_device *dev,
> +			struct intel_plane *intel_plane,
> +			struct intel_plane_state *plane_state,
> +			enum pipe pipe);
> +
>  /* shared dpll functions */
>  struct intel_shared_dpll *intel_crtc_to_shared_dpll(struct intel_crtc *crtc);
>  void assert_shared_dpll(struct drm_i915_private *dev_priv,
> diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
> index 2f9f856..c6cf4ad 100644
> --- a/drivers/gpu/drm/i915/intel_sprite.c
> +++ b/drivers/gpu/drm/i915/intel_sprite.c
> @@ -275,8 +275,41 @@ skl_update_plane(struct drm_plane *drm_plane, struct drm_crtc *crtc,
>  
>  			hphase = 0x00010001;  /* use trip for both Y and UV */
>  			vphase = 0x00012000;  /* use trip for Y and phase 0.5 for UV */
> +
> +			if (IS_BROXTON(dev) || (IS_SKYLAKE(dev) &&
> +					INTEL_REVID(dev) == SKL_REVID_C0))
> +				I915_WRITE(CHICKEN_PIPESL_1(pipe),
> +					I915_READ(CHICKEN_PIPESL_1(pipe)) |
> +					HSW_FBCQ_DIS);
>  		}
>  	}
> +
> +	if (drm_plane->state->compression) {
> +		/*
> +		 * FIXME: Check if aux_dist ad aux_stride can be taken as it
> +		 * is from userspace.
> +		 */
> +		aux_dist = fb->offsets[1];
> +		aux_stride = fb->pitches[1];
> +		/*
> +		 * For SKL and BXT,
> +		 * When the render compression is enabled with plane
> +		 * width greater than 3840 and horizontal panning,
> +		 * the stride programmed in the PLANE_STRIDE register
> +		 * must be multiple of 4.
> +		 */
> +		if (x > 3840 && x_offset != 0)
> +			stride = stride - (stride % 4);
> +		if (IS_BROXTON(dev) || (IS_SKYLAKE(dev) &&
> +					INTEL_REVID(dev) == SKL_REVID_C0))
> +			I915_WRITE(CHICKEN_PIPESL_1(pipe),
> +					I915_READ(CHICKEN_PIPESL_1(pipe)) &
> +					~HSW_FBCQ_DIS);
> +		plane_ctl |= PLANE_CTL_DECOMPRESSION_ENABLE;
> +	} else {
> +		plane_ctl &= ~PLANE_CTL_DECOMPRESSION_ENABLE;
> +	}
> +
>  	plane_offset = y_offset << 16 | x_offset;
>  
>  	I915_WRITE(PLANE_OFFSET(pipe, plane), plane_offset);
> @@ -1217,6 +1250,8 @@ intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane)
>  
>  	intel_create_rotation_property(dev, intel_plane);
>  
> +	intel_create_compression_property(dev, intel_plane);
> +
>  	drm_plane_helper_add(&intel_plane->base, &intel_plane_helper_funcs);
>  
>  out:
> diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
> index 75f49c1..02a9aaa 100644
> --- a/include/drm/drm_crtc.h
> +++ b/include/drm/drm_crtc.h
> @@ -93,6 +93,10 @@ static inline uint64_t I642U64(int64_t val)
>  #define DRM_REFLECT_X	4
>  #define DRM_REFLECT_Y	5
>  
> +/* render compression property bits */
> +#define DRM_COMP_NONE		0
> +#define DRM_COMP_RENDER		1
> +
>  enum drm_connector_force {
>  	DRM_FORCE_UNSPECIFIED,
>  	DRM_FORCE_OFF,
> @@ -783,6 +787,9 @@ struct drm_plane_state {
>  	/* Plane rotation */
>  	unsigned int rotation;
>  
> +	/* Render compression */
> +	unsigned int compression;
> +
>  	struct drm_atomic_state *state;
>  };
>  
> @@ -1114,6 +1121,7 @@ struct drm_mode_config {
>  	struct drm_property *tile_property;
>  	struct drm_property *plane_type_property;
>  	struct drm_property *rotation_property;
> +	struct drm_property *compression_property;
>  	struct drm_property *prop_src_x;
>  	struct drm_property *prop_src_y;
>  	struct drm_property *prop_src_w;
> @@ -1507,6 +1515,9 @@ extern int drm_format_vert_chroma_subsampling(uint32_t format);
>  extern const char *drm_get_format_name(uint32_t format);
>  extern struct drm_property *drm_mode_create_rotation_property(struct drm_device *dev,
>  							      unsigned int supported_rotations);
> +extern struct drm_property *drm_mode_create_compression_property(
> +				struct drm_device *dev,
> +				unsigned int comp_caps);
>  extern unsigned int drm_rotation_simplify(unsigned int rotation,
>  					  unsigned int supported_rotations);
>  
> -- 
> 1.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Jesse Barnes Sept. 8, 2015, 10:07 p.m. UTC | #2
On 09/07/2015 09:35 AM, Daniel Vetter wrote:
> On Sat, Sep 05, 2015 at 01:12:50AM +0530, Vandana Kannan wrote:
>> This patch includes enabling render decompression after checking all the
>> requirements (format, tiling, rotation etc.). Along with this, the WAs
>> mentioned in BSpec Workaround page have been implemented.
>>
>> This patch has been implemented on top of Nabendu/Chandra's NV12 patches.
>>
>> TODO:
>> 1. Disable stereo 3D when render decomp is enabled (bit 7:6)
>> 2. Render decompression must not be used in VTd pass-through mode
>> 3. Program hashing select CHICKEN_MISC1 bit 15
>> 4. For Gen10, add support for RGB 1010102
>>
>> Signed-off-by: Vandana Kannan <vandana.kannan@intel.com>
>> ---
>>  drivers/gpu/drm/drm_atomic.c         |   4 +
>>  drivers/gpu/drm/drm_crtc.c           |  16 ++++
>>  drivers/gpu/drm/i915/intel_display.c | 174 +++++++++++++++++++++++++++++++++++
>>  drivers/gpu/drm/i915/intel_drv.h     |   7 ++
>>  drivers/gpu/drm/i915/intel_sprite.c  |  35 +++++++
>>  include/drm/drm_crtc.h               |  11 +++
>>  6 files changed, 247 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
>> index 940f80b..d9004e8 100644
>> --- a/drivers/gpu/drm/drm_atomic.c
>> +++ b/drivers/gpu/drm/drm_atomic.c
>> @@ -607,6 +607,8 @@ int drm_atomic_plane_set_property(struct drm_plane *plane,
>>  		state->src_h = val;
>>  	} else if (property == config->rotation_property) {
>>  		state->rotation = val;
>> +	} else if (property == config->compression_property) {
>> +		state->compression = val;
> 
> Please use a framebuffer modifier instead. Also this needs userspace.

I thought we already agreed, based on feedback from the userspace guys,
that a property was easier to use and therefore the way to go?

Jesse
Daniel Vetter Sept. 9, 2015, 3:23 p.m. UTC | #3
On Tue, Sep 08, 2015 at 03:07:40PM -0700, Jesse Barnes wrote:
> On 09/07/2015 09:35 AM, Daniel Vetter wrote:
> > On Sat, Sep 05, 2015 at 01:12:50AM +0530, Vandana Kannan wrote:
> >> This patch includes enabling render decompression after checking all the
> >> requirements (format, tiling, rotation etc.). Along with this, the WAs
> >> mentioned in BSpec Workaround page have been implemented.
> >>
> >> This patch has been implemented on top of Nabendu/Chandra's NV12 patches.
> >>
> >> TODO:
> >> 1. Disable stereo 3D when render decomp is enabled (bit 7:6)
> >> 2. Render decompression must not be used in VTd pass-through mode
> >> 3. Program hashing select CHICKEN_MISC1 bit 15
> >> 4. For Gen10, add support for RGB 1010102
> >>
> >> Signed-off-by: Vandana Kannan <vandana.kannan@intel.com>
> >> ---
> >>  drivers/gpu/drm/drm_atomic.c         |   4 +
> >>  drivers/gpu/drm/drm_crtc.c           |  16 ++++
> >>  drivers/gpu/drm/i915/intel_display.c | 174 +++++++++++++++++++++++++++++++++++
> >>  drivers/gpu/drm/i915/intel_drv.h     |   7 ++
> >>  drivers/gpu/drm/i915/intel_sprite.c  |  35 +++++++
> >>  include/drm/drm_crtc.h               |  11 +++
> >>  6 files changed, 247 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
> >> index 940f80b..d9004e8 100644
> >> --- a/drivers/gpu/drm/drm_atomic.c
> >> +++ b/drivers/gpu/drm/drm_atomic.c
> >> @@ -607,6 +607,8 @@ int drm_atomic_plane_set_property(struct drm_plane *plane,
> >>  		state->src_h = val;
> >>  	} else if (property == config->rotation_property) {
> >>  		state->rotation = val;
> >> +	} else if (property == config->compression_property) {
> >> +		state->compression = val;
> > 
> > Please use a framebuffer modifier instead. Also this needs userspace.
> 
> I thought we already agreed, based on feedback from the userspace guys,
> that a property was easier to use and therefore the way to go?

Blob hwc want a property because they're afraid of the overhead of
creating an additional drm fb object. Until I see data that that overhead
is real I see no reason at all to have something else than what the
community consensus for these features from 1 year ago at xdc bordeaux.

If someone disagrees please convince Rob Clark and Thierry Redding (and
whomever else took part in that discussion) that we need to change this, I
personally don't see the value in this particular bikeshed.
-Daniel
Jesse Barnes Sept. 9, 2015, 3:24 p.m. UTC | #4
On 09/09/2015 08:23 AM, Daniel Vetter wrote:
> On Tue, Sep 08, 2015 at 03:07:40PM -0700, Jesse Barnes wrote:
>> On 09/07/2015 09:35 AM, Daniel Vetter wrote:
>>> On Sat, Sep 05, 2015 at 01:12:50AM +0530, Vandana Kannan wrote:
>>>> This patch includes enabling render decompression after checking all the
>>>> requirements (format, tiling, rotation etc.). Along with this, the WAs
>>>> mentioned in BSpec Workaround page have been implemented.
>>>>
>>>> This patch has been implemented on top of Nabendu/Chandra's NV12 patches.
>>>>
>>>> TODO:
>>>> 1. Disable stereo 3D when render decomp is enabled (bit 7:6)
>>>> 2. Render decompression must not be used in VTd pass-through mode
>>>> 3. Program hashing select CHICKEN_MISC1 bit 15
>>>> 4. For Gen10, add support for RGB 1010102
>>>>
>>>> Signed-off-by: Vandana Kannan <vandana.kannan@intel.com>
>>>> ---
>>>>  drivers/gpu/drm/drm_atomic.c         |   4 +
>>>>  drivers/gpu/drm/drm_crtc.c           |  16 ++++
>>>>  drivers/gpu/drm/i915/intel_display.c | 174 +++++++++++++++++++++++++++++++++++
>>>>  drivers/gpu/drm/i915/intel_drv.h     |   7 ++
>>>>  drivers/gpu/drm/i915/intel_sprite.c  |  35 +++++++
>>>>  include/drm/drm_crtc.h               |  11 +++
>>>>  6 files changed, 247 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
>>>> index 940f80b..d9004e8 100644
>>>> --- a/drivers/gpu/drm/drm_atomic.c
>>>> +++ b/drivers/gpu/drm/drm_atomic.c
>>>> @@ -607,6 +607,8 @@ int drm_atomic_plane_set_property(struct drm_plane *plane,
>>>>  		state->src_h = val;
>>>>  	} else if (property == config->rotation_property) {
>>>>  		state->rotation = val;
>>>> +	} else if (property == config->compression_property) {
>>>> +		state->compression = val;
>>>
>>> Please use a framebuffer modifier instead. Also this needs userspace.
>>
>> I thought we already agreed, based on feedback from the userspace guys,
>> that a property was easier to use and therefore the way to go?
> 
> Blob hwc want a property because they're afraid of the overhead of
> creating an additional drm fb object. Until I see data that that overhead
> is real I see no reason at all to have something else than what the
> community consensus for these features from 1 year ago at xdc bordeaux.
> 
> If someone disagrees please convince Rob Clark and Thierry Redding (and
> whomever else took part in that discussion) that we need to change this, I
> personally don't see the value in this particular bikeshed.

I don't think it was overhead, just convenience and reasoning about how
the feature is used.  Cc'ing Gary for more background.

Jesse
Smith, Gary K Sept. 9, 2015, 4:36 p.m. UTC | #5
I don't understand why this is an issue. Surely the fb is to describe static state about the buffer, not dynamic state. The fb should be created with the compressed modifier. The compressed property is just a hint to the kernel that the buffer has been completely resolved, hence currently it can be treated as an uncompressed fb (and the aux buffer can be ignored). This is dynamic state that may well change very regularly over the lifetime of the buffer.

It's still a compressed fb, it contains a aux buffer and had to be created with the compressed fb modifier. However, once the userspace has fully resolved the buffer, the aux buffer can be ignored and the compressed fb can be used in any situation where an uncompressed fb would normally be required. This is dynamic state that may well change very regularly over the lifetime of the buffer.

I could allocate two fbs always, and use the appropriate one. We already do this in order to indicate whether a RGBA buffer currently needs to be considered as opaque (RGBX) or blended. Experience has shown that it makes it very complex to debug when the fb keeps on changing its value. However, because we now have 4 different states (Blended/Opaque and Compressed or Resolved), we will now end up with up to 4 fbs per buffer.

We aren't just talking about a few fbs here, we already see more than 100 fbs active during complex situations. Potentially doubling this number is surely a significant increase in memory usage, both from the management side in userspace and the kernel side.

Thanks
Gary



-----Original Message-----
From: Jesse Barnes [mailto:jbarnes@virtuousgeek.org] 
Sent: Wednesday, September 9, 2015 4:25 PM
To: Daniel Vetter
Cc: Kannan, Vandana; intel-gfx@lists.freedesktop.org; Smith, Gary K
Subject: Re: [Intel-gfx] [RFC] drm/i915: Render decompression support for Gen9 and above

On 09/09/2015 08:23 AM, Daniel Vetter wrote:
> On Tue, Sep 08, 2015 at 03:07:40PM -0700, Jesse Barnes wrote:
>> On 09/07/2015 09:35 AM, Daniel Vetter wrote:
>>> On Sat, Sep 05, 2015 at 01:12:50AM +0530, Vandana Kannan wrote:
>>>> This patch includes enabling render decompression after checking 
>>>> all the requirements (format, tiling, rotation etc.). Along with 
>>>> this, the WAs mentioned in BSpec Workaround page have been implemented.
>>>>
>>>> This patch has been implemented on top of Nabendu/Chandra's NV12 patches.
>>>>
>>>> TODO:
>>>> 1. Disable stereo 3D when render decomp is enabled (bit 7:6) 2. 
>>>> Render decompression must not be used in VTd pass-through mode 3. 
>>>> Program hashing select CHICKEN_MISC1 bit 15 4. For Gen10, add 
>>>> support for RGB 1010102
>>>>
>>>> Signed-off-by: Vandana Kannan <vandana.kannan@intel.com>
>>>> ---
>>>>  drivers/gpu/drm/drm_atomic.c         |   4 +
>>>>  drivers/gpu/drm/drm_crtc.c           |  16 ++++
>>>>  drivers/gpu/drm/i915/intel_display.c | 174 +++++++++++++++++++++++++++++++++++
>>>>  drivers/gpu/drm/i915/intel_drv.h     |   7 ++
>>>>  drivers/gpu/drm/i915/intel_sprite.c  |  35 +++++++
>>>>  include/drm/drm_crtc.h               |  11 +++
>>>>  6 files changed, 247 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_atomic.c 
>>>> b/drivers/gpu/drm/drm_atomic.c index 940f80b..d9004e8 100644
>>>> --- a/drivers/gpu/drm/drm_atomic.c
>>>> +++ b/drivers/gpu/drm/drm_atomic.c
>>>> @@ -607,6 +607,8 @@ int drm_atomic_plane_set_property(struct drm_plane *plane,
>>>>  		state->src_h = val;
>>>>  	} else if (property == config->rotation_property) {
>>>>  		state->rotation = val;
>>>> +	} else if (property == config->compression_property) {
>>>> +		state->compression = val;
>>>
>>> Please use a framebuffer modifier instead. Also this needs userspace.
>>
>> I thought we already agreed, based on feedback from the userspace 
>> guys, that a property was easier to use and therefore the way to go?
> 
> Blob hwc want a property because they're afraid of the overhead of 
> creating an additional drm fb object. Until I see data that that 
> overhead is real I see no reason at all to have something else than 
> what the community consensus for these features from 1 year ago at xdc bordeaux.
> 
> If someone disagrees please convince Rob Clark and Thierry Redding 
> (and whomever else took part in that discussion) that we need to 
> change this, I personally don't see the value in this particular bikeshed.

I don't think it was overhead, just convenience and reasoning about how the feature is used.  Cc'ing Gary for more background.

Jesse

---------------------------------------------------------------------
Intel Corporation (UK) Limited
Registered No. 1134945 (England)
Registered Office: Pipers Way, Swindon SN3 1RJ
VAT No: 860 2173 47

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
Jesse Barnes Sept. 9, 2015, 5:04 p.m. UTC | #6
[Adding Rob & Thierry]

On 09/09/2015 09:36 AM, Smith, Gary K wrote:
> I don't understand why this is an issue. Surely the fb is to describe static state about the buffer, not dynamic state. The fb should be created with the compressed modifier. The compressed property is just a hint to the kernel that the buffer has been completely resolved, hence currently it can be treated as an uncompressed fb (and the aux buffer can be ignored). This is dynamic state that may well change very regularly over the lifetime of the buffer.
> 
> It's still a compressed fb, it contains a aux buffer and had to be created with the compressed fb modifier. However, once the userspace has fully resolved the buffer, the aux buffer can be ignored and the compressed fb can be used in any situation where an uncompressed fb would normally be required. This is dynamic state that may well change very regularly over the lifetime of the buffer.
> 
> I could allocate two fbs always, and use the appropriate one. We already do this in order to indicate whether a RGBA buffer currently needs to be considered as opaque (RGBX) or blended. Experience has shown that it makes it very complex to debug when the fb keeps on changing its value. However, because we now have 4 different states (Blended/Opaque and Compressed or Resolved), we will now end up with up to 4 fbs per buffer.
> 
> We aren't just talking about a few fbs here, we already see more than 100 fbs active during complex situations. Potentially doubling this number is surely a significant increase in memory usage, both from the management side in userspace and the kernel side.
> 
> Thanks
> Gary
> 
> 
> 
> -----Original Message-----
> From: Jesse Barnes [mailto:jbarnes@virtuousgeek.org] 
> Sent: Wednesday, September 9, 2015 4:25 PM
> To: Daniel Vetter
> Cc: Kannan, Vandana; intel-gfx@lists.freedesktop.org; Smith, Gary K
> Subject: Re: [Intel-gfx] [RFC] drm/i915: Render decompression support for Gen9 and above
> 
> On 09/09/2015 08:23 AM, Daniel Vetter wrote:
>> On Tue, Sep 08, 2015 at 03:07:40PM -0700, Jesse Barnes wrote:
>>> On 09/07/2015 09:35 AM, Daniel Vetter wrote:
>>>> On Sat, Sep 05, 2015 at 01:12:50AM +0530, Vandana Kannan wrote:
>>>>> This patch includes enabling render decompression after checking 
>>>>> all the requirements (format, tiling, rotation etc.). Along with 
>>>>> this, the WAs mentioned in BSpec Workaround page have been implemented.
>>>>>
>>>>> This patch has been implemented on top of Nabendu/Chandra's NV12 patches.
>>>>>
>>>>> TODO:
>>>>> 1. Disable stereo 3D when render decomp is enabled (bit 7:6) 2. 
>>>>> Render decompression must not be used in VTd pass-through mode 3. 
>>>>> Program hashing select CHICKEN_MISC1 bit 15 4. For Gen10, add 
>>>>> support for RGB 1010102
>>>>>
>>>>> Signed-off-by: Vandana Kannan <vandana.kannan@intel.com>
>>>>> ---
>>>>>  drivers/gpu/drm/drm_atomic.c         |   4 +
>>>>>  drivers/gpu/drm/drm_crtc.c           |  16 ++++
>>>>>  drivers/gpu/drm/i915/intel_display.c | 174 +++++++++++++++++++++++++++++++++++
>>>>>  drivers/gpu/drm/i915/intel_drv.h     |   7 ++
>>>>>  drivers/gpu/drm/i915/intel_sprite.c  |  35 +++++++
>>>>>  include/drm/drm_crtc.h               |  11 +++
>>>>>  6 files changed, 247 insertions(+)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/drm_atomic.c 
>>>>> b/drivers/gpu/drm/drm_atomic.c index 940f80b..d9004e8 100644
>>>>> --- a/drivers/gpu/drm/drm_atomic.c
>>>>> +++ b/drivers/gpu/drm/drm_atomic.c
>>>>> @@ -607,6 +607,8 @@ int drm_atomic_plane_set_property(struct drm_plane *plane,
>>>>>  		state->src_h = val;
>>>>>  	} else if (property == config->rotation_property) {
>>>>>  		state->rotation = val;
>>>>> +	} else if (property == config->compression_property) {
>>>>> +		state->compression = val;
>>>>
>>>> Please use a framebuffer modifier instead. Also this needs userspace.
>>>
>>> I thought we already agreed, based on feedback from the userspace 
>>> guys, that a property was easier to use and therefore the way to go?
>>
>> Blob hwc want a property because they're afraid of the overhead of 
>> creating an additional drm fb object. Until I see data that that 
>> overhead is real I see no reason at all to have something else than 
>> what the community consensus for these features from 1 year ago at xdc bordeaux.
>>
>> If someone disagrees please convince Rob Clark and Thierry Redding 
>> (and whomever else took part in that discussion) that we need to 
>> change this, I personally don't see the value in this particular bikeshed.
> 
> I don't think it was overhead, just convenience and reasoning about how the feature is used.  Cc'ing Gary for more background.
> 
> Jesse
> 
> ---------------------------------------------------------------------
> Intel Corporation (UK) Limited
> Registered No. 1134945 (England)
> Registered Office: Pipers Way, Swindon SN3 1RJ
> VAT No: 860 2173 47
> 
> This e-mail and any attachments may contain confidential material for
> the sole use of the intended recipient(s). Any review or distribution
> by others is strictly prohibited. If you are not the intended
> recipient, please contact the sender and delete all copies.
> 
>
Daniel Vetter Sept. 10, 2015, 3:02 p.m. UTC | #7
On Wed, Sep 09, 2015 at 10:04:23AM -0700, Jesse Barnes wrote:
> [Adding Rob & Thierry]
> 
> On 09/09/2015 09:36 AM, Smith, Gary K wrote:
> > I don't understand why this is an issue. Surely the fb is to describe
> > static state about the buffer, not dynamic state. The fb should be
> > created with the compressed modifier. The compressed property is just
> > a hint to the kernel that the buffer has been completely resolved,
> > hence currently it can be treated as an uncompressed fb (and the aux
> > buffer can be ignored). This is dynamic state that may well change
> > very regularly over the lifetime of the buffer.

There's fb modifier at all in this patch set, not just a static modifier
to be able to check the compression data fits plus a "ignore compression"
runtime knob.

> > It's still a compressed fb, it contains a aux buffer and had to be
> > created with the compressed fb modifier. However, once the userspace
> > has fully resolved the buffer, the aux buffer can be ignored and the
> > compressed fb can be used in any situation where an uncompressed fb
> > would normally be required. This is dynamic state that may well change
> > very regularly over the lifetime of the buffer.
> > 
> > I could allocate two fbs always, and use the appropriate one. We
> > already do this in order to indicate whether a RGBA buffer currently
> > needs to be considered as opaque (RGBX) or blended. Experience has
> > shown that it makes it very complex to debug when the fb keeps on
> > changing its value. However, because we now have 4 different states
> > (Blended/Opaque and Compressed or Resolved), we will now end up with
> > up to 4 fbs per buffer.
> > 
> > We aren't just talking about a few fbs here, we already see more than
> > 100 fbs active during complex situations. Potentially doubling this
> > number is surely a significant increase in memory usage, both from the
> > management side in userspace and the kernel side.

8kb kernel memory for the additional 2 copies of drm_framebuffer structs
for 100 buffers. That's about as much as the minimal overhead for just 1
underlying gem object (counting the sg table, vma, gtt pte tracking, gem
object and shmem backing node and pagecache entries). 2 integers in userspace.

Do you have some data to show that overhead?
-Daniel
Daniel Stone Jan. 19, 2016, 10:28 a.m. UTC | #8
Hi,

On 10 September 2015 at 16:02, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Wed, Sep 09, 2015 at 10:04:23AM -0700, Jesse Barnes wrote:
>> On 09/09/2015 09:36 AM, Smith, Gary K wrote:
>> > I don't understand why this is an issue. Surely the fb is to describe
>> > static state about the buffer, not dynamic state. The fb should be
>> > created with the compressed modifier. The compressed property is just
>> > a hint to the kernel that the buffer has been completely resolved,
>> > hence currently it can be treated as an uncompressed fb (and the aux
>> > buffer can be ignored). This is dynamic state that may well change
>> > very regularly over the lifetime of the buffer.
>
> There's fb modifier at all in this patch set, not just a static modifier
> to be able to check the compression data fits plus a "ignore compression"
> runtime knob.
>
>> > It's still a compressed fb, it contains a aux buffer and had to be
>> > created with the compressed fb modifier. However, once the userspace
>> > has fully resolved the buffer, the aux buffer can be ignored and the
>> > compressed fb can be used in any situation where an uncompressed fb
>> > would normally be required. This is dynamic state that may well change
>> > very regularly over the lifetime of the buffer.
>> >
>> > I could allocate two fbs always, and use the appropriate one. We
>> > already do this in order to indicate whether a RGBA buffer currently
>> > needs to be considered as opaque (RGBX) or blended. Experience has
>> > shown that it makes it very complex to debug when the fb keeps on
>> > changing its value. However, because we now have 4 different states
>> > (Blended/Opaque and Compressed or Resolved), we will now end up with
>> > up to 4 fbs per buffer.
>> >
>> > We aren't just talking about a few fbs here, we already see more than
>> > 100 fbs active during complex situations. Potentially doubling this
>> > number is surely a significant increase in memory usage, both from the
>> > management side in userspace and the kernel side.
>
> 8kb kernel memory for the additional 2 copies of drm_framebuffer structs
> for 100 buffers. That's about as much as the minimal overhead for just 1
> underlying gem object (counting the sg table, vma, gtt pte tracking, gem
> object and shmem backing node and pagecache entries). 2 integers in userspace.
>
> Do you have some data to show that overhead?

I agree with this view as well, and it does seem to be the way chosen
for generic userspace on other drivers.

For context, the way ChromeOS and Wayland compositors (Weston, Mutter,
Enlightenment) work is that a userspace library called GBM is
distributed as part of EGL, which is the native EGL platform/winsys
for rendering on KMS. The major difference with GBM, however, is that
it does _not_ do presentation: presentation is explicitly controlled
by the compositor itself.

In order to use this new property, we would have to add API to EGL/GBM
to extract a list of property names to set, which wouldn't really make
for great API. It'd be much cleaner for these users to stick with FB
modifiers, especially as they destroy and recreate the FB objects
(something we've not seen have any performance impact) for every flip
anyway. From my side, I'd be much happier using generically-applicable
FB modifiers, than continuing along the property explosion.

The other sticking point is that if I go from flipping GPU buffers
with render compression enabled to software buffers, from userspace
that means I then need to explicitly go unset the render decompression
flag before I can display software buffers, else the flips just get
rejected; something which isn't the case with FB modifiers. One more
thing to go wrong ...

Cheers,
Daniel
Jesse Barnes Jan. 25, 2016, 5:38 p.m. UTC | #9
On 01/19/2016 02:28 AM, Daniel Stone wrote:
>>>> >> > We aren't just talking about a few fbs here, we already see more than
>>>> >> > 100 fbs active during complex situations. Potentially doubling this
>>>> >> > number is surely a significant increase in memory usage, both from the
>>>> >> > management side in userspace and the kernel side.
>> >
>> > 8kb kernel memory for the additional 2 copies of drm_framebuffer structs
>> > for 100 buffers. That's about as much as the minimal overhead for just 1
>> > underlying gem object (counting the sg table, vma, gtt pte tracking, gem
>> > object and shmem backing node and pagecache entries). 2 integers in userspace.
>> >
>> > Do you have some data to show that overhead?
> I agree with this view as well, and it does seem to be the way chosen
> for generic userspace on other drivers.
> 
> For context, the way ChromeOS and Wayland compositors (Weston, Mutter,
> Enlightenment) work is that a userspace library called GBM is
> distributed as part of EGL, which is the native EGL platform/winsys
> for rendering on KMS. The major difference with GBM, however, is that
> it does _not_ do presentation: presentation is explicitly controlled
> by the compositor itself.
> 
> In order to use this new property, we would have to add API to EGL/GBM
> to extract a list of property names to set, which wouldn't really make
> for great API. It'd be much cleaner for these users to stick with FB
> modifiers, especially as they destroy and recreate the FB objects
> (something we've not seen have any performance impact) for every flip
> anyway. From my side, I'd be much happier using generically-applicable
> FB modifiers, than continuing along the property explosion.
> 
> The other sticking point is that if I go from flipping GPU buffers
> with render compression enabled to software buffers, from userspace
> that means I then need to explicitly go unset the render decompression
> flag before I can display software buffers, else the flips just get
> rejected; something which isn't the case with FB modifiers. One more
> thing to go wrong ...

Just for background, we ended up with a property for this attribute due to a request from the only userland folks we had at the time (our hwcomposer team).  They felt it would be simpler to use a property in this specific case, though they already do have a number of fb objects to deal with.  Really I can make an argument either way for how well each matches hardware behavior, so figured we'd just go with a property due to someone expressing a preference.

This has probably already been changed in an updated patch (still catching up on mail), but I thought I'd at least chime in on the thinking on this from way back (around a year ago now I think).

Cc'ing Gary in case he has further comment.

Jesse
Smith, Gary K Jan. 25, 2016, 6:15 p.m. UTC | #10
I really do not understand what the issue is here. It’s a hint provided by the user to indicate that the current fb (which was allocated with the appropriate aux buffer/compression fb modifiers) has been entirely resolved (uncompressed) and hence can be used anywhere an uncompressed fb would normally be required.

Creating a fb that says that there is no aux buffer is entirely wrong in this case as there is an aux buffer, it just happens to be in a particular known state for this particular flip.

If GBM doesn’t want to optimize its power usage when(if) it knows the buffer is uncompressed, then it doesn’t have to set the property at all. 

Thanks
Gary 




-----Original Message-----
From: Jesse Barnes [mailto:jbarnes@virtuousgeek.org] 

Sent: Monday, January 25, 2016 5:39 PM
To: Daniel Stone <daniel@fooishbar.org>; Daniel Vetter <daniel@ffwll.ch>
Cc: intel-gfx@lists.freedesktop.org; Thierry Reding <thierry.reding@avionic-desi.gn.de>; Smith, Gary K <gary.k.smith@intel.com>
Subject: Re: [Intel-gfx] [RFC] drm/i915: Render decompression support for Gen9 and above

On 01/19/2016 02:28 AM, Daniel Stone wrote:
>>>> >> > We aren't just talking about a few fbs here, we already see 

>>>> >> > more than

>>>> >> > 100 fbs active during complex situations. Potentially doubling 

>>>> >> > this number is surely a significant increase in memory usage, 

>>>> >> > both from the management side in userspace and the kernel side.

>> >

>> > 8kb kernel memory for the additional 2 copies of drm_framebuffer 

>> > structs for 100 buffers. That's about as much as the minimal 

>> > overhead for just 1 underlying gem object (counting the sg table, 

>> > vma, gtt pte tracking, gem object and shmem backing node and pagecache entries). 2 integers in userspace.

>> >

>> > Do you have some data to show that overhead?

> I agree with this view as well, and it does seem to be the way chosen 

> for generic userspace on other drivers.

> 

> For context, the way ChromeOS and Wayland compositors (Weston, Mutter,

> Enlightenment) work is that a userspace library called GBM is 

> distributed as part of EGL, which is the native EGL platform/winsys 

> for rendering on KMS. The major difference with GBM, however, is that 

> it does _not_ do presentation: presentation is explicitly controlled 

> by the compositor itself.

> 

> In order to use this new property, we would have to add API to EGL/GBM 

> to extract a list of property names to set, which wouldn't really make 

> for great API. It'd be much cleaner for these users to stick with FB 

> modifiers, especially as they destroy and recreate the FB objects 

> (something we've not seen have any performance impact) for every flip 

> anyway. From my side, I'd be much happier using generically-applicable 

> FB modifiers, than continuing along the property explosion.

> 

> The other sticking point is that if I go from flipping GPU buffers 

> with render compression enabled to software buffers, from userspace 

> that means I then need to explicitly go unset the render decompression 

> flag before I can display software buffers, else the flips just get 

> rejected; something which isn't the case with FB modifiers. One more 

> thing to go wrong ...


Just for background, we ended up with a property for this attribute due to a request from the only userland folks we had at the time (our hwcomposer team).  They felt it would be simpler to use a property in this specific case, though they already do have a number of fb objects to deal with.  Really I can make an argument either way for how well each matches hardware behavior, so figured we'd just go with a property due to someone expressing a preference.

This has probably already been changed in an updated patch (still catching up on mail), but I thought I'd at least chime in on the thinking on this from way back (around a year ago now I think).

Cc'ing Gary in case he has further comment.

Jesse
---------------------------------------------------------------------
Intel Corporation (UK) Limited
Registered No. 1134945 (England)
Registered Office: Pipers Way, Swindon SN3 1RJ
VAT No: 860 2173 47

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
Daniel Stone Jan. 25, 2016, 6:37 p.m. UTC | #11
Hi,

On 25 January 2016 at 18:15, Smith, Gary K <gary.k.smith@intel.com> wrote:
> I really do not understand what the issue is here. It’s a hint provided by the user to indicate that the current fb (which was allocated with the appropriate aux buffer/compression fb modifiers) has been entirely resolved (uncompressed) and hence can be used anywhere an uncompressed fb would normally be required.
>
> Creating a fb that says that there is no aux buffer is entirely wrong in this case as there is an aux buffer, it just happens to be in a particular known state for this particular flip.
>
> If GBM doesn’t want to optimize its power usage when(if) it knows the buffer is uncompressed, then it doesn’t have to set the property at all.

GBM cannot set the property, because GBM is not the element in charge
of presenting the buffers. The property-based design precludes optimal
use of current open-source userspace.

The flipside here is a minor inconvenience (having to create multiple
framebuffers) to closed userspace; last time this came up, when it was
asked if this was actually a serious memory concern or not, there was
no response.

Cheers,
Daniel

> Thanks
> Gary
>
>
>
>
> -----Original Message-----
> From: Jesse Barnes [mailto:jbarnes@virtuousgeek.org]
> Sent: Monday, January 25, 2016 5:39 PM
> To: Daniel Stone <daniel@fooishbar.org>; Daniel Vetter <daniel@ffwll.ch>
> Cc: intel-gfx@lists.freedesktop.org; Thierry Reding <thierry.reding@avionic-desi.gn.de>; Smith, Gary K <gary.k.smith@intel.com>
> Subject: Re: [Intel-gfx] [RFC] drm/i915: Render decompression support for Gen9 and above
>
> On 01/19/2016 02:28 AM, Daniel Stone wrote:
>>>>> >> > We aren't just talking about a few fbs here, we already see
>>>>> >> > more than
>>>>> >> > 100 fbs active during complex situations. Potentially doubling
>>>>> >> > this number is surely a significant increase in memory usage,
>>>>> >> > both from the management side in userspace and the kernel side.
>>> >
>>> > 8kb kernel memory for the additional 2 copies of drm_framebuffer
>>> > structs for 100 buffers. That's about as much as the minimal
>>> > overhead for just 1 underlying gem object (counting the sg table,
>>> > vma, gtt pte tracking, gem object and shmem backing node and pagecache entries). 2 integers in userspace.
>>> >
>>> > Do you have some data to show that overhead?
>> I agree with this view as well, and it does seem to be the way chosen
>> for generic userspace on other drivers.
>>
>> For context, the way ChromeOS and Wayland compositors (Weston, Mutter,
>> Enlightenment) work is that a userspace library called GBM is
>> distributed as part of EGL, which is the native EGL platform/winsys
>> for rendering on KMS. The major difference with GBM, however, is that
>> it does _not_ do presentation: presentation is explicitly controlled
>> by the compositor itself.
>>
>> In order to use this new property, we would have to add API to EGL/GBM
>> to extract a list of property names to set, which wouldn't really make
>> for great API. It'd be much cleaner for these users to stick with FB
>> modifiers, especially as they destroy and recreate the FB objects
>> (something we've not seen have any performance impact) for every flip
>> anyway. From my side, I'd be much happier using generically-applicable
>> FB modifiers, than continuing along the property explosion.
>>
>> The other sticking point is that if I go from flipping GPU buffers
>> with render compression enabled to software buffers, from userspace
>> that means I then need to explicitly go unset the render decompression
>> flag before I can display software buffers, else the flips just get
>> rejected; something which isn't the case with FB modifiers. One more
>> thing to go wrong ...
>
> Just for background, we ended up with a property for this attribute due to a request from the only userland folks we had at the time (our hwcomposer team).  They felt it would be simpler to use a property in this specific case, though they already do have a number of fb objects to deal with.  Really I can make an argument either way for how well each matches hardware behavior, so figured we'd just go with a property due to someone expressing a preference.
>
> This has probably already been changed in an updated patch (still catching up on mail), but I thought I'd at least chime in on the thinking on this from way back (around a year ago now I think).
>
> Cc'ing Gary in case he has further comment.
>
> Jesse
> ---------------------------------------------------------------------
> Intel Corporation (UK) Limited
> Registered No. 1134945 (England)
> Registered Office: Pipers Way, Swindon SN3 1RJ
> VAT No: 860 2173 47
>
> This e-mail and any attachments may contain confidential material for
> the sole use of the intended recipient(s). Any review or distribution
> by others is strictly prohibited. If you are not the intended
> recipient, please contact the sender and delete all copies.
Smith, Gary K Jan. 25, 2016, 7:04 p.m. UTC | #12
It’s a few hundred lines of extra code and additional complexity in open-source userspace to track the additional fb's in flight in order to save the i915 code from performing a couple of extra if's during a flip. It's not a good tradeoff at all. We already have the experience of requiring to change the format of fb's dynamically between opaque and transparent as an example of what has to happen. Its also much harder to debug as there is now no fixed mapping between the buffer and the fb.

> The property-based design precludes optimal use of current open-source userspace.

I don’t see how it even effects open-source userspace. It can just ignore it entirely.

Thanks
Gary


-----Original Message-----
From: Daniel Stone [mailto:daniel@fooishbar.org] 

Sent: Monday, January 25, 2016 6:37 PM
To: Smith, Gary K <gary.k.smith@intel.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>; Daniel Vetter <daniel@ffwll.ch>; intel-gfx@lists.freedesktop.org; Thierry Reding <thierry.reding@avionic-desi.gn.de>
Subject: Re: [Intel-gfx] [RFC] drm/i915: Render decompression support for Gen9 and above

Hi,

On 25 January 2016 at 18:15, Smith, Gary K <gary.k.smith@intel.com> wrote:
> I really do not understand what the issue is here. It’s a hint provided by the user to indicate that the current fb (which was allocated with the appropriate aux buffer/compression fb modifiers) has been entirely resolved (uncompressed) and hence can be used anywhere an uncompressed fb would normally be required.

>

> Creating a fb that says that there is no aux buffer is entirely wrong in this case as there is an aux buffer, it just happens to be in a particular known state for this particular flip.

>

> If GBM doesn’t want to optimize its power usage when(if) it knows the buffer is uncompressed, then it doesn’t have to set the property at all.


GBM cannot set the property, because GBM is not the element in charge of presenting the buffers. The property-based design precludes optimal use of current open-source userspace.

The flipside here is a minor inconvenience (having to create multiple
framebuffers) to closed userspace; last time this came up, when it was asked if this was actually a serious memory concern or not, there was no response.

Cheers,
Daniel

> Thanks

> Gary

>

>

>

>

> -----Original Message-----

> From: Jesse Barnes [mailto:jbarnes@virtuousgeek.org]

> Sent: Monday, January 25, 2016 5:39 PM

> To: Daniel Stone <daniel@fooishbar.org>; Daniel Vetter 

> <daniel@ffwll.ch>

> Cc: intel-gfx@lists.freedesktop.org; Thierry Reding 

> <thierry.reding@avionic-desi.gn.de>; Smith, Gary K 

> <gary.k.smith@intel.com>

> Subject: Re: [Intel-gfx] [RFC] drm/i915: Render decompression support 

> for Gen9 and above

>

> On 01/19/2016 02:28 AM, Daniel Stone wrote:

>>>>> >> > We aren't just talking about a few fbs here, we already see 

>>>>> >> > more than

>>>>> >> > 100 fbs active during complex situations. Potentially 

>>>>> >> > doubling this number is surely a significant increase in 

>>>>> >> > memory usage, both from the management side in userspace and the kernel side.

>>> >

>>> > 8kb kernel memory for the additional 2 copies of drm_framebuffer 

>>> > structs for 100 buffers. That's about as much as the minimal 

>>> > overhead for just 1 underlying gem object (counting the sg table, 

>>> > vma, gtt pte tracking, gem object and shmem backing node and pagecache entries). 2 integers in userspace.

>>> >

>>> > Do you have some data to show that overhead?

>> I agree with this view as well, and it does seem to be the way chosen 

>> for generic userspace on other drivers.

>>

>> For context, the way ChromeOS and Wayland compositors (Weston, 

>> Mutter,

>> Enlightenment) work is that a userspace library called GBM is 

>> distributed as part of EGL, which is the native EGL platform/winsys 

>> for rendering on KMS. The major difference with GBM, however, is that 

>> it does _not_ do presentation: presentation is explicitly controlled 

>> by the compositor itself.

>>

>> In order to use this new property, we would have to add API to 

>> EGL/GBM to extract a list of property names to set, which wouldn't 

>> really make for great API. It'd be much cleaner for these users to 

>> stick with FB modifiers, especially as they destroy and recreate the 

>> FB objects (something we've not seen have any performance impact) for 

>> every flip anyway. From my side, I'd be much happier using 

>> generically-applicable FB modifiers, than continuing along the property explosion.

>>

>> The other sticking point is that if I go from flipping GPU buffers 

>> with render compression enabled to software buffers, from userspace 

>> that means I then need to explicitly go unset the render 

>> decompression flag before I can display software buffers, else the 

>> flips just get rejected; something which isn't the case with FB 

>> modifiers. One more thing to go wrong ...

>

> Just for background, we ended up with a property for this attribute due to a request from the only userland folks we had at the time (our hwcomposer team).  They felt it would be simpler to use a property in this specific case, though they already do have a number of fb objects to deal with.  Really I can make an argument either way for how well each matches hardware behavior, so figured we'd just go with a property due to someone expressing a preference.

>

> This has probably already been changed in an updated patch (still catching up on mail), but I thought I'd at least chime in on the thinking on this from way back (around a year ago now I think).

>

> Cc'ing Gary in case he has further comment.

>

> Jesse

> ---------------------------------------------------------------------

> Intel Corporation (UK) Limited

> Registered No. 1134945 (England)

> Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47

>

> This e-mail and any attachments may contain confidential material for 

> the sole use of the intended recipient(s). Any review or distribution 

> by others is strictly prohibited. If you are not the intended 

> recipient, please contact the sender and delete all copies.

---------------------------------------------------------------------
Intel Corporation (UK) Limited
Registered No. 1134945 (England)
Registered Office: Pipers Way, Swindon SN3 1RJ
VAT No: 860 2173 47

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
Smith, Gary K Jan. 25, 2016, 7:08 p.m. UTC | #13
Corrected the first line below.

-----Original Message-----
From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf Of Smith, Gary K

Sent: Monday, January 25, 2016 7:04 PM
To: Daniel Stone <daniel@fooishbar.org>
Cc: intel-gfx@lists.freedesktop.org; Thierry Reding <thierry.reding@avionic-desi.gn.de>
Subject: Re: [Intel-gfx] [RFC] drm/i915: Render decompression support for Gen9 and above

It’s a few hundred lines of extra code and additional complexity in userspace to track the additional fb's in flight in order to save the i915 code from performing a couple of extra if's during a flip. It's not a good tradeoff at all. We already have the experience of requiring to change the format of fb's dynamically between opaque and transparent as an example of what has to happen. Its also much harder to debug as there is now no fixed mapping between the buffer and the fb.

> The property-based design precludes optimal use of current open-source userspace.

I don’t see how it even effects open-source userspace. It can just ignore it entirely.

Thanks
Gary


-----Original Message-----
From: Daniel Stone [mailto:daniel@fooishbar.org]

Sent: Monday, January 25, 2016 6:37 PM
To: Smith, Gary K <gary.k.smith@intel.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>; Daniel Vetter <daniel@ffwll.ch>; intel-gfx@lists.freedesktop.org; Thierry Reding <thierry.reding@avionic-desi.gn.de>
Subject: Re: [Intel-gfx] [RFC] drm/i915: Render decompression support for Gen9 and above

Hi,

On 25 January 2016 at 18:15, Smith, Gary K <gary.k.smith@intel.com> wrote:
> I really do not understand what the issue is here. It’s a hint provided by the user to indicate that the current fb (which was allocated with the appropriate aux buffer/compression fb modifiers) has been entirely resolved (uncompressed) and hence can be used anywhere an uncompressed fb would normally be required.

>

> Creating a fb that says that there is no aux buffer is entirely wrong in this case as there is an aux buffer, it just happens to be in a particular known state for this particular flip.

>

> If GBM doesn’t want to optimize its power usage when(if) it knows the buffer is uncompressed, then it doesn’t have to set the property at all.


GBM cannot set the property, because GBM is not the element in charge of presenting the buffers. The property-based design precludes optimal use of current open-source userspace.

The flipside here is a minor inconvenience (having to create multiple
framebuffers) to closed userspace; last time this came up, when it was asked if this was actually a serious memory concern or not, there was no response.

Cheers,
Daniel

> Thanks

> Gary

>

>

>

>

> -----Original Message-----

> From: Jesse Barnes [mailto:jbarnes@virtuousgeek.org]

> Sent: Monday, January 25, 2016 5:39 PM

> To: Daniel Stone <daniel@fooishbar.org>; Daniel Vetter 

> <daniel@ffwll.ch>

> Cc: intel-gfx@lists.freedesktop.org; Thierry Reding 

> <thierry.reding@avionic-desi.gn.de>; Smith, Gary K 

> <gary.k.smith@intel.com>

> Subject: Re: [Intel-gfx] [RFC] drm/i915: Render decompression support 

> for Gen9 and above

>

> On 01/19/2016 02:28 AM, Daniel Stone wrote:

>>>>> >> > We aren't just talking about a few fbs here, we already see 

>>>>> >> > more than

>>>>> >> > 100 fbs active during complex situations. Potentially 

>>>>> >> > doubling this number is surely a significant increase in 

>>>>> >> > memory usage, both from the management side in userspace and the kernel side.

>>> >

>>> > 8kb kernel memory for the additional 2 copies of drm_framebuffer 

>>> > structs for 100 buffers. That's about as much as the minimal 

>>> > overhead for just 1 underlying gem object (counting the sg table, 

>>> > vma, gtt pte tracking, gem object and shmem backing node and pagecache entries). 2 integers in userspace.

>>> >

>>> > Do you have some data to show that overhead?

>> I agree with this view as well, and it does seem to be the way chosen 

>> for generic userspace on other drivers.

>>

>> For context, the way ChromeOS and Wayland compositors (Weston, 

>> Mutter,

>> Enlightenment) work is that a userspace library called GBM is 

>> distributed as part of EGL, which is the native EGL platform/winsys 

>> for rendering on KMS. The major difference with GBM, however, is that 

>> it does _not_ do presentation: presentation is explicitly controlled 

>> by the compositor itself.

>>

>> In order to use this new property, we would have to add API to 

>> EGL/GBM to extract a list of property names to set, which wouldn't 

>> really make for great API. It'd be much cleaner for these users to 

>> stick with FB modifiers, especially as they destroy and recreate the 

>> FB objects (something we've not seen have any performance impact) for 

>> every flip anyway. From my side, I'd be much happier using 

>> generically-applicable FB modifiers, than continuing along the property explosion.

>>

>> The other sticking point is that if I go from flipping GPU buffers 

>> with render compression enabled to software buffers, from userspace 

>> that means I then need to explicitly go unset the render 

>> decompression flag before I can display software buffers, else the 

>> flips just get rejected; something which isn't the case with FB 

>> modifiers. One more thing to go wrong ...

>

> Just for background, we ended up with a property for this attribute due to a request from the only userland folks we had at the time (our hwcomposer team).  They felt it would be simpler to use a property in this specific case, though they already do have a number of fb objects to deal with.  Really I can make an argument either way for how well each matches hardware behavior, so figured we'd just go with a property due to someone expressing a preference.

>

> This has probably already been changed in an updated patch (still catching up on mail), but I thought I'd at least chime in on the thinking on this from way back (around a year ago now I think).

>

> Cc'ing Gary in case he has further comment.

>

> Jesse

> ---------------------------------------------------------------------

> Intel Corporation (UK) Limited

> Registered No. 1134945 (England)

> Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47

>

> This e-mail and any attachments may contain confidential material for 

> the sole use of the intended recipient(s). Any review or distribution 

> by others is strictly prohibited. If you are not the intended 

> recipient, please contact the sender and delete all copies.

---------------------------------------------------------------------
Intel Corporation (UK) Limited
Registered No. 1134945 (England)
Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47

This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
---------------------------------------------------------------------
Intel Corporation (UK) Limited
Registered No. 1134945 (England)
Registered Office: Pipers Way, Swindon SN3 1RJ
VAT No: 860 2173 47

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
Daniel Stone Jan. 25, 2016, 7:09 p.m. UTC | #14
Hi,

On 25 January 2016 at 19:04, Smith, Gary K <gary.k.smith@intel.com> wrote:
> It’s a few hundred lines of extra code and additional complexity in open-source userspace to track the additional fb's in flight in order to save the i915 code from performing a couple of extra if's during a flip. It's not a good tradeoff at all. We already have the experience of requiring to change the format of fb's dynamically between opaque and transparent as an example of what has to happen. Its also much harder to debug as there is now no fixed mapping between the buffer and the fb.

I don't see how it really ends up being a few hundred extra lines, but
OK. It's also really - really - not about extra if's during the flip,
but about the design of _userspace_ components which are not able to
use property-based interfaces because of the separation of concerns
between rendering and display. It also goes against what other drivers
have been working with.

>> The property-based design precludes optimal use of current open-source userspace.
> I don’t see how it even effects open-source userspace. It can just ignore it entirely.

i.e. this property is useless for open-source userspace, which can
never hint that render compression has been resolved. So fine by me if
you want to merge it I guess, and we'll just ignore it.

Cheers,
Daniel

> Thanks
> Gary
>
>
> -----Original Message-----
> From: Daniel Stone [mailto:daniel@fooishbar.org]
> Sent: Monday, January 25, 2016 6:37 PM
> To: Smith, Gary K <gary.k.smith@intel.com>
> Cc: Jesse Barnes <jbarnes@virtuousgeek.org>; Daniel Vetter <daniel@ffwll.ch>; intel-gfx@lists.freedesktop.org; Thierry Reding <thierry.reding@avionic-desi.gn.de>
> Subject: Re: [Intel-gfx] [RFC] drm/i915: Render decompression support for Gen9 and above
>
> Hi,
>
> On 25 January 2016 at 18:15, Smith, Gary K <gary.k.smith@intel.com> wrote:
>> I really do not understand what the issue is here. It’s a hint provided by the user to indicate that the current fb (which was allocated with the appropriate aux buffer/compression fb modifiers) has been entirely resolved (uncompressed) and hence can be used anywhere an uncompressed fb would normally be required.
>>
>> Creating a fb that says that there is no aux buffer is entirely wrong in this case as there is an aux buffer, it just happens to be in a particular known state for this particular flip.
>>
>> If GBM doesn’t want to optimize its power usage when(if) it knows the buffer is uncompressed, then it doesn’t have to set the property at all.
>
> GBM cannot set the property, because GBM is not the element in charge of presenting the buffers. The property-based design precludes optimal use of current open-source userspace.
>
> The flipside here is a minor inconvenience (having to create multiple
> framebuffers) to closed userspace; last time this came up, when it was asked if this was actually a serious memory concern or not, there was no response.
>
> Cheers,
> Daniel
>
>> Thanks
>> Gary
>>
>>
>>
>>
>> -----Original Message-----
>> From: Jesse Barnes [mailto:jbarnes@virtuousgeek.org]
>> Sent: Monday, January 25, 2016 5:39 PM
>> To: Daniel Stone <daniel@fooishbar.org>; Daniel Vetter
>> <daniel@ffwll.ch>
>> Cc: intel-gfx@lists.freedesktop.org; Thierry Reding
>> <thierry.reding@avionic-desi.gn.de>; Smith, Gary K
>> <gary.k.smith@intel.com>
>> Subject: Re: [Intel-gfx] [RFC] drm/i915: Render decompression support
>> for Gen9 and above
>>
>> On 01/19/2016 02:28 AM, Daniel Stone wrote:
>>>>>> >> > We aren't just talking about a few fbs here, we already see
>>>>>> >> > more than
>>>>>> >> > 100 fbs active during complex situations. Potentially
>>>>>> >> > doubling this number is surely a significant increase in
>>>>>> >> > memory usage, both from the management side in userspace and the kernel side.
>>>> >
>>>> > 8kb kernel memory for the additional 2 copies of drm_framebuffer
>>>> > structs for 100 buffers. That's about as much as the minimal
>>>> > overhead for just 1 underlying gem object (counting the sg table,
>>>> > vma, gtt pte tracking, gem object and shmem backing node and pagecache entries). 2 integers in userspace.
>>>> >
>>>> > Do you have some data to show that overhead?
>>> I agree with this view as well, and it does seem to be the way chosen
>>> for generic userspace on other drivers.
>>>
>>> For context, the way ChromeOS and Wayland compositors (Weston,
>>> Mutter,
>>> Enlightenment) work is that a userspace library called GBM is
>>> distributed as part of EGL, which is the native EGL platform/winsys
>>> for rendering on KMS. The major difference with GBM, however, is that
>>> it does _not_ do presentation: presentation is explicitly controlled
>>> by the compositor itself.
>>>
>>> In order to use this new property, we would have to add API to
>>> EGL/GBM to extract a list of property names to set, which wouldn't
>>> really make for great API. It'd be much cleaner for these users to
>>> stick with FB modifiers, especially as they destroy and recreate the
>>> FB objects (something we've not seen have any performance impact) for
>>> every flip anyway. From my side, I'd be much happier using
>>> generically-applicable FB modifiers, than continuing along the property explosion.
>>>
>>> The other sticking point is that if I go from flipping GPU buffers
>>> with render compression enabled to software buffers, from userspace
>>> that means I then need to explicitly go unset the render
>>> decompression flag before I can display software buffers, else the
>>> flips just get rejected; something which isn't the case with FB
>>> modifiers. One more thing to go wrong ...
>>
>> Just for background, we ended up with a property for this attribute due to a request from the only userland folks we had at the time (our hwcomposer team).  They felt it would be simpler to use a property in this specific case, though they already do have a number of fb objects to deal with.  Really I can make an argument either way for how well each matches hardware behavior, so figured we'd just go with a property due to someone expressing a preference.
>>
>> This has probably already been changed in an updated patch (still catching up on mail), but I thought I'd at least chime in on the thinking on this from way back (around a year ago now I think).
>>
>> Cc'ing Gary in case he has further comment.
>>
>> Jesse
>> ---------------------------------------------------------------------
>> Intel Corporation (UK) Limited
>> Registered No. 1134945 (England)
>> Registered Office: Pipers Way, Swindon SN3 1RJ VAT No: 860 2173 47
>>
>> This e-mail and any attachments may contain confidential material for
>> the sole use of the intended recipient(s). Any review or distribution
>> by others is strictly prohibited. If you are not the intended
>> recipient, please contact the sender and delete all copies.
> ---------------------------------------------------------------------
> Intel Corporation (UK) Limited
> Registered No. 1134945 (England)
> Registered Office: Pipers Way, Swindon SN3 1RJ
> VAT No: 860 2173 47
>
> This e-mail and any attachments may contain confidential material for
> the sole use of the intended recipient(s). Any review or distribution
> by others is strictly prohibited. If you are not the intended
> recipient, please contact the sender and delete all copies.
Daniel Vetter Jan. 26, 2016, 9:43 a.m. UTC | #15
On Mon, Jan 25, 2016 at 8:09 PM, Daniel Stone <daniel@fooishbar.org> wrote:
>>> The property-based design precludes optimal use of current open-source userspace.
>> I don’t see how it even effects open-source userspace. It can just ignore it entirely.
>
> i.e. this property is useless for open-source userspace, which can
> never hint that render compression has been resolved. So fine by me if
> you want to merge it I guess, and we'll just ignore it.

New ABI requires open-source userspace. If it's not suitable for that
it won't land.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 940f80b..d9004e8 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -607,6 +607,8 @@  int drm_atomic_plane_set_property(struct drm_plane *plane,
 		state->src_h = val;
 	} else if (property == config->rotation_property) {
 		state->rotation = val;
+	} else if (property == config->compression_property) {
+		state->compression = val;
 	} else if (plane->funcs->atomic_set_property) {
 		return plane->funcs->atomic_set_property(plane, state,
 				property, val);
@@ -654,6 +656,8 @@  drm_atomic_plane_get_property(struct drm_plane *plane,
 		*val = state->src_h;
 	} else if (property == config->rotation_property) {
 		*val = state->rotation;
+	} else if (property == config->compression_property) {
+		*val = state->compression;
 	} else if (plane->funcs->atomic_get_property) {
 		return plane->funcs->atomic_get_property(plane, state, property, val);
 	} else {
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 474f328..66f817b 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -5778,6 +5778,22 @@  struct drm_property *drm_mode_create_rotation_property(struct drm_device *dev,
 }
 EXPORT_SYMBOL(drm_mode_create_rotation_property);
 
+struct drm_property *drm_mode_create_compression_property(
+		struct drm_device *dev,
+		unsigned int comp_caps)
+{
+	static const struct drm_prop_enum_list props[] = {
+		{ DRM_COMP_NONE,   "Not compression capable" },
+		{ DRM_COMP_RENDER,  "Only render decompression" },
+	};
+
+	return drm_property_create_bitmask(dev, 0, "render compression",
+			props, ARRAY_SIZE(props),
+			comp_caps);
+}
+EXPORT_SYMBOL(drm_mode_create_compression_property);
+
+
 /**
  * DOC: Tile group
  *
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 59d6736..115736f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3120,8 +3120,47 @@  static void skylake_update_primary_plane(struct drm_crtc *crtc,
 
 			hphase = 0x00010001;  /* use trip for both Y and UV */
 			vphase = 0x00012000;  /* use trip for Y and phase 0.5 for UV */
+
+			/*
+			 * On SKL-C and BXT-A,
+			 * when the plane source pixel format is NV12,
+			 * the CHICKEN_PIPESL_* register bit 22 must be
+			 * set to 1
+			 */
+			if (IS_BROXTON(dev) || (IS_SKYLAKE(dev) &&
+					INTEL_REVID(dev) == SKL_REVID_C0))
+				I915_WRITE(CHICKEN_PIPESL_1(pipe),
+					I915_READ(CHICKEN_PIPESL_1(pipe)) |
+					HSW_FBCQ_DIS);
 		}
 	}
+
+	if (plane->state->compression) {
+		/*
+		 * FIXME: Check if aux_dist ad aux_stride can be taken as it
+		 * is from userspace.
+		 */
+		aux_dist = fb->offsets[1];
+		aux_stride = fb->pitches[1];
+		/*
+		 * For SKL and BXT,
+		 * When the render compression is enabled with plane
+		 * width greater than 3840 and horizontal panning,
+		 * the stride programmed in the PLANE_STRIDE register
+		 * must be multiple of 4.
+		 */
+		if (x > 3840 && x_offset != 0)
+			stride = stride - (stride % 4);
+		if (IS_BROXTON(dev) || (IS_SKYLAKE(dev) &&
+					INTEL_REVID(dev) == SKL_REVID_C0))
+			I915_WRITE(CHICKEN_PIPESL_1(pipe),
+					I915_READ(CHICKEN_PIPESL_1(pipe)) &
+					~HSW_FBCQ_DIS);
+		plane_ctl |= PLANE_CTL_DECOMPRESSION_ENABLE;
+	} else {
+		plane_ctl &= ~PLANE_CTL_DECOMPRESSION_ENABLE;
+	}
+
 	plane_offset = y_offset << 16 | x_offset;
 
 	I915_WRITE(PLANE_CTL(pipe, 0), plane_ctl);
@@ -11760,6 +11799,29 @@  int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
 		    plane_state->rotation != BIT(DRM_ROTATE_0))
 			intel_crtc->atomic.disable_fbc = true;
 
+		if (INTEL_INFO(dev)->gen >= 9) {
+			if (plane_state->compression) {
+				ret = skl_check_compression(dev,
+					to_intel_plane(plane),
+					to_intel_plane_state(plane_state),
+					intel_crtc->pipe);
+				if (ret)
+					return ret;
+			}
+		}
+
+		/*
+		 * Disable FBC if render decompression has to be enabled.
+		 * FIXME: If FBC is disabled here because render decomp
+		 * has to be enabled, then in update_primary_plane(), if
+		 * render decomp is disabled for some reason, we need to
+		 * enable FBC ?
+		 */
+		if (IS_SKYLAKE(dev) && dev_priv->fbc.crtc == intel_crtc &&
+				plane_state->compression) {
+			intel_crtc->atomic.disable_fbc = true;
+		}
+
 		/*
 		 * BDW signals flip done immediately if the plane
 		 * is disabled, even if the plane enable is already
@@ -11778,6 +11840,16 @@  int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state,
 			intel_crtc->atomic.update_sprite_watermarks |=
 				1 << i;
 		}
+		if (INTEL_INFO(dev)->gen >= 9) {
+			if (plane_state->compression) {
+				ret = skl_check_compression(dev,
+					to_intel_plane(plane),
+					to_intel_plane_state(plane_state),
+					intel_crtc->pipe);
+				if (ret)
+					return ret;
+			}
+		}
 	}
 	return 0;
 }
@@ -13495,6 +13567,88 @@  skl_max_scale(struct intel_crtc *intel_crtc,
 	return max_scale;
 }
 
+int skl_check_compression(struct drm_device *dev,
+		struct intel_plane *intel_plane,
+		struct intel_plane_state *plane_state,
+		enum pipe pipe)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_rect *src = &plane_state->src;
+	struct drm_framebuffer *fb;
+	int i;
+
+	fb = intel_plane ? plane_state->base.fb : NULL;
+
+	/*
+	 * TODO:
+	 * 1. Disable stereo 3D when render decomp is enabled (bit 7:6)
+	 * 2. Render decompression must not be used in VTd pass-through mode
+	 * 3. Program hashing select CHICKEN_MISC1 bit 15
+	 */
+
+	/*
+	 * On SKL A and SKL B,
+	 * Do not enable render decompression when the plane
+	 * width is smaller than 32 pixels or greater than
+	 * 2048 pixels
+	 */
+	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) < SKL_REVID_C0)
+			&& ((src->x1 >> 16) > 2048))
+		plane_state->base.compression = DRM_COMP_NONE;
+
+	if (!plane_state->base.compression)
+		return -EINVAL;
+
+	/*
+	 * On SKL:*:C and BXT:*:A, there is a possible hang with NV12 format.
+	 * WA: When the plane source pixel format is NV12,
+	 * the CHICKEN_PIPESL_* register bit 22 must be set to 1 and the
+	 * render decompression must not be enabled on any of the planes in
+	 * that pipe.
+	 */
+	if (IS_BROXTON(dev) || (IS_SKYLAKE(dev) &&
+				INTEL_REVID(dev) == SKL_REVID_C0)) {
+		struct drm_plane *drm_plane;
+		struct drm_plane_state *state;
+
+		for_each_plane(dev_priv, pipe, i) {
+			drm_plane = drm_plane_from_index(dev, i);
+			if (drm_plane) {
+				state = drm_plane->state;
+				if (state) {
+					if (state->fb &&
+							state->fb->pixel_format
+							== DRM_FORMAT_NV12) {
+						plane_state->base.compression =
+							DRM_COMP_NONE;
+					}
+				}
+			}
+		}
+	}
+
+	if (!plane_state->base.compression)
+		return -EINVAL;
+
+	/*
+	 * Conditions to satisfy before enabling render decomp.
+	 * SKL+
+	 * Pipe A & B, Planes 1 & 2
+	 * RGB8888 Tile-Y format
+	 * 0/180 rotation
+	 */
+	if ((pipe == PIPE_C) ||
+			((fb->pixel_format != DRM_FORMAT_XRGB8888) &&
+			 (fb->pixel_format != DRM_FORMAT_ARGB8888)) ||
+			intel_rotation_90_or_270(plane_state->base.rotation) ||
+			((fb->modifier[0] == DRM_FORMAT_MOD_NONE) ||
+			 (fb->modifier[0] == I915_FORMAT_MOD_X_TILED))) {
+		plane_state->base.compression = DRM_COMP_NONE;
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static int
 intel_check_primary_plane(struct drm_plane *plane,
 			  struct intel_crtc_state *crtc_state,
@@ -13670,11 +13824,31 @@  static struct drm_plane *intel_primary_plane_create(struct drm_device *dev,
 	if (INTEL_INFO(dev)->gen >= 4)
 		intel_create_rotation_property(dev, primary);
 
+	intel_create_compression_property(dev, primary);
+
 	drm_plane_helper_add(&primary->base, &intel_plane_helper_funcs);
 
 	return &primary->base;
 }
 
+void intel_create_compression_property(struct drm_device *dev,
+		struct intel_plane *plane)
+{
+	/* Render compression is applicable only for plane 1 & 2 */
+	if (INTEL_INFO(dev)->gen >= 9 && (plane->plane <= 1)) {
+		if (!dev->mode_config.compression_property)
+			dev->mode_config.compression_property =
+				drm_mode_create_compression_property(dev,
+						BIT(DRM_COMP_NONE) |
+						BIT(DRM_COMP_RENDER));
+
+		if (dev->mode_config.compression_property)
+			drm_object_attach_property(&plane->base.base,
+					dev->mode_config.compression_property,
+					plane->base.state->compression);
+	}
+}
+
 void intel_create_rotation_property(struct drm_device *dev, struct intel_plane *plane)
 {
 	if (!dev->mode_config.rotation_property) {
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 6cd6cb0..bae22eb 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1090,6 +1090,13 @@  intel_rotation_90_or_270(unsigned int rotation)
 void intel_create_rotation_property(struct drm_device *dev,
 					struct intel_plane *plane);
 
+void intel_create_compression_property(struct drm_device *dev,
+					struct intel_plane *plane);
+int skl_check_compression(struct drm_device *dev,
+			struct intel_plane *intel_plane,
+			struct intel_plane_state *plane_state,
+			enum pipe pipe);
+
 /* shared dpll functions */
 struct intel_shared_dpll *intel_crtc_to_shared_dpll(struct intel_crtc *crtc);
 void assert_shared_dpll(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 2f9f856..c6cf4ad 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -275,8 +275,41 @@  skl_update_plane(struct drm_plane *drm_plane, struct drm_crtc *crtc,
 
 			hphase = 0x00010001;  /* use trip for both Y and UV */
 			vphase = 0x00012000;  /* use trip for Y and phase 0.5 for UV */
+
+			if (IS_BROXTON(dev) || (IS_SKYLAKE(dev) &&
+					INTEL_REVID(dev) == SKL_REVID_C0))
+				I915_WRITE(CHICKEN_PIPESL_1(pipe),
+					I915_READ(CHICKEN_PIPESL_1(pipe)) |
+					HSW_FBCQ_DIS);
 		}
 	}
+
+	if (drm_plane->state->compression) {
+		/*
+		 * FIXME: Check if aux_dist ad aux_stride can be taken as it
+		 * is from userspace.
+		 */
+		aux_dist = fb->offsets[1];
+		aux_stride = fb->pitches[1];
+		/*
+		 * For SKL and BXT,
+		 * When the render compression is enabled with plane
+		 * width greater than 3840 and horizontal panning,
+		 * the stride programmed in the PLANE_STRIDE register
+		 * must be multiple of 4.
+		 */
+		if (x > 3840 && x_offset != 0)
+			stride = stride - (stride % 4);
+		if (IS_BROXTON(dev) || (IS_SKYLAKE(dev) &&
+					INTEL_REVID(dev) == SKL_REVID_C0))
+			I915_WRITE(CHICKEN_PIPESL_1(pipe),
+					I915_READ(CHICKEN_PIPESL_1(pipe)) &
+					~HSW_FBCQ_DIS);
+		plane_ctl |= PLANE_CTL_DECOMPRESSION_ENABLE;
+	} else {
+		plane_ctl &= ~PLANE_CTL_DECOMPRESSION_ENABLE;
+	}
+
 	plane_offset = y_offset << 16 | x_offset;
 
 	I915_WRITE(PLANE_OFFSET(pipe, plane), plane_offset);
@@ -1217,6 +1250,8 @@  intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane)
 
 	intel_create_rotation_property(dev, intel_plane);
 
+	intel_create_compression_property(dev, intel_plane);
+
 	drm_plane_helper_add(&intel_plane->base, &intel_plane_helper_funcs);
 
 out:
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 75f49c1..02a9aaa 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -93,6 +93,10 @@  static inline uint64_t I642U64(int64_t val)
 #define DRM_REFLECT_X	4
 #define DRM_REFLECT_Y	5
 
+/* render compression property bits */
+#define DRM_COMP_NONE		0
+#define DRM_COMP_RENDER		1
+
 enum drm_connector_force {
 	DRM_FORCE_UNSPECIFIED,
 	DRM_FORCE_OFF,
@@ -783,6 +787,9 @@  struct drm_plane_state {
 	/* Plane rotation */
 	unsigned int rotation;
 
+	/* Render compression */
+	unsigned int compression;
+
 	struct drm_atomic_state *state;
 };
 
@@ -1114,6 +1121,7 @@  struct drm_mode_config {
 	struct drm_property *tile_property;
 	struct drm_property *plane_type_property;
 	struct drm_property *rotation_property;
+	struct drm_property *compression_property;
 	struct drm_property *prop_src_x;
 	struct drm_property *prop_src_y;
 	struct drm_property *prop_src_w;
@@ -1507,6 +1515,9 @@  extern int drm_format_vert_chroma_subsampling(uint32_t format);
 extern const char *drm_get_format_name(uint32_t format);
 extern struct drm_property *drm_mode_create_rotation_property(struct drm_device *dev,
 							      unsigned int supported_rotations);
+extern struct drm_property *drm_mode_create_compression_property(
+				struct drm_device *dev,
+				unsigned int comp_caps);
 extern unsigned int drm_rotation_simplify(unsigned int rotation,
 					  unsigned int supported_rotations);