diff mbox

[1/8] drm/i915: Implement .get_format_info() hook for CCS

Message ID 1496835664-13656-2-git-send-email-vidya.srinivas@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Srinivas, Vidya June 7, 2017, 11:40 a.m. UTC
From: Ville Syrjälä <ville.syrjala@linux.intel.com>

SKL+ display engine can scan out certain kinds of compressed surfaces
produced by the render engine. This involved telling the display engine
the location of the color control surfae (CCS) which describes which
parts of the main surface are compressed and which are not. The location
of CCS is provided by userspace as just another plane with its own offset.

By providing our own format information for the CCS formats, we should
be able to make framebuffer_check() do the right thing for the CCS
surface as well.

Note that we'll return the same format info for both Y and Yf tiled
format as that's what happens with the non-CCS Y vs. Yf as well. If
desired, we could potentially return a unique pointer for each
pixel_format+tiling+ccs combination, in which case we immediately be
able to tell if any of that stuff changed by just comparing the
pointers. But that does sound a bit wasteful space wise.

v2: Drop the 'dev' argument from the hook
	v3: Include the description of the CCS surface layout
v4: Pretend CCS tiles are regular 128 byte wide Y tiles (Jason)

Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Ben Widawsky <ben@bwidawsk.net>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Ben Widawsky <ben@bwidawsk.net> (v3)
Link: https://patchwork.kernel.org/patch/9637253/
Signed-off-by: Ville Syrjä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/drm_fourcc.c         |  2 +-
 drivers/gpu/drm/i915/intel_display.c | 37 ++++++++++++++++++++++++++++++++++++
 include/drm/drm_mode_config.h        |  3 ++-
 include/uapi/drm/drm_fourcc.h        |  3 +++
 4 files changed, 43 insertions(+), 2 deletions(-)

Comments

Daniel Stone June 7, 2017, 11:44 a.m. UTC | #1
Hi Vidya,

On 7 June 2017 at 12:40, Vidya Srinivas <vidya.srinivas@intel.com> wrote:
> +static const struct drm_format_info ccs_formats[] = {
> +       { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 16, .vsub = 8, },
> +       { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 16, .vsub = 8, },
> +       { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 16, .vsub = 8, },
> +       { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 16, .vsub = 8, },
> +};

I notice that here hsub/vsub are declared as 16x8. In Ville's tree
which I pulled my submission from, this is 8x16, which aligns with
this comment (missing from your submission):
/*
 * 1 byte of CCS actually corresponds to 16x8 pixels on the main
 * surface, and the memory layout for the CCS tile us 64x64 bytes.
 * But since we're pretending the CCS tile is 128 bytes wide we
 * adjust hsub/vsub here accordingly to 8x16 so that the
 * bytes<->x/y conversions come out correct.
 */

If the hsub/vsub is inverted to match the comment, trying to add a
3200x1800 (for example) framebuffer will fail, because (1800 % 16) !=
0. This is true even if the allocation is correct (i.e. the buffer
contains an even number of tiles for both width and height). Generic
userspace cannot know that it should try to create a larger
framebuffer (in this case 3200x1808) and only show a smaller region of
that framebuffer.

This is the reason for the halign/valign patch mentioned earlier, as
well as the additional part of the comment which was also present in
my submission:
/*
 * We don't require any
 * CCS block size alignment of the fb under the assumption that the
 * hardware will handle things correctly of only a single pixel
 * gets touched. The compression should be lossless so any garbage
 * pixels as part of the same block shouldn't cause visual artifacts.
 */

Cheers,
Daniel
Ville Syrjala June 7, 2017, 12:53 p.m. UTC | #2
On Wed, Jun 07, 2017 at 12:44:47PM +0100, Daniel Stone wrote:
> Hi Vidya,
> 
> On 7 June 2017 at 12:40, Vidya Srinivas <vidya.srinivas@intel.com> wrote:
> > +static const struct drm_format_info ccs_formats[] = {
> > +       { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 16, .vsub = 8, },
> > +       { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 16, .vsub = 8, },
> > +       { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 16, .vsub = 8, },
> > +       { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 16, .vsub = 8, },
> > +};
> 
> I notice that here hsub/vsub are declared as 16x8. In Ville's tree
> which I pulled my submission from, this is 8x16, which aligns with
> this comment (missing from your submission):
> /*
>  * 1 byte of CCS actually corresponds to 16x8 pixels on the main
>  * surface, and the memory layout for the CCS tile us 64x64 bytes.
>  * But since we're pretending the CCS tile is 128 bytes wide we
>  * adjust hsub/vsub here accordingly to 8x16 so that the
>  * bytes<->x/y conversions come out correct.
>  */
> 
> If the hsub/vsub is inverted to match the comment, trying to add a
> 3200x1800 (for example) framebuffer will fail, because (1800 % 16) !=
> 0. This is true even if the allocation is correct (i.e. the buffer
> contains an even number of tiles for both width and height). Generic
> userspace cannot know that it should try to create a larger
> framebuffer (in this case 3200x1808) and only show a smaller region of
> that framebuffer.
> 
> This is the reason for the halign/valign patch mentioned earlier, as
> well as the additional part of the comment which was also present in
> my submission:
> /*
>  * We don't require any
>  * CCS block size alignment of the fb under the assumption that the
>  * hardware will handle things correctly of only a single pixel
>  * gets touched. The compression should be lossless so any garbage
>  * pixels as part of the same block shouldn't cause visual artifacts.
>  */

The alignment requirement is gone in upstream, hence my latest CCS
stuff doesn't have the valign/halign stuff anymore.

Anyways, I'll have to revisit the the offsets[] thing because people
didn't like my original linear offset idea, and it doesn't match what
userspace already does.

And I still need to convince myself that the ccs hash mode won't be
an issue, which I think I'm close to doing since I managed to trick
igt rendercopy to do ccs.
Daniel Stone June 7, 2017, 2:24 p.m. UTC | #3
Hi,

On 7 June 2017 at 13:53, Ville Syrjälä <ville.syrjala@linux.intel.com> wrote:
> On Wed, Jun 07, 2017 at 12:44:47PM +0100, Daniel Stone wrote:
>> /*
>>  * We don't require any
>>  * CCS block size alignment of the fb under the assumption that the
>>  * hardware will handle things correctly of only a single pixel
>>  * gets touched. The compression should be lossless so any garbage
>>  * pixels as part of the same block shouldn't cause visual artifacts.
>>  */
>
> The alignment requirement is gone in upstream, hence my latest CCS
> stuff doesn't have the valign/halign stuff anymore.

Oh sorry, I'd missed the hsub requirement dropping out. That's fine then.

> Anyways, I'll have to revisit the the offsets[] thing because people
> didn't like my original linear offset idea, and it doesn't match what
> userspace already does.

I'm still really confused about this. Your patches implement a linear
byte offset. The last time it came up on IRC, all four of myself, Ben,
Jason, and you, agreed that linear byte offsets were the only thing
which made sense. The Mesa patchset that's been sent out a couple of
times and is now in Jason's hands use linear offsets. If everything
(kernel, Mesa) uses linear offsets, and everyone (the four of us in
the discussion) wants linear offsets - why revisit?

Cheers,
Daniel
Ville Syrjala June 7, 2017, 3:33 p.m. UTC | #4
On Wed, Jun 07, 2017 at 03:24:58PM +0100, Daniel Stone wrote:
> Hi,
> 
> On 7 June 2017 at 13:53, Ville Syrjälä <ville.syrjala@linux.intel.com> wrote:
> > On Wed, Jun 07, 2017 at 12:44:47PM +0100, Daniel Stone wrote:
> >> /*
> >>  * We don't require any
> >>  * CCS block size alignment of the fb under the assumption that the
> >>  * hardware will handle things correctly of only a single pixel
> >>  * gets touched. The compression should be lossless so any garbage
> >>  * pixels as part of the same block shouldn't cause visual artifacts.
> >>  */
> >
> > The alignment requirement is gone in upstream, hence my latest CCS
> > stuff doesn't have the valign/halign stuff anymore.
> 
> Oh sorry, I'd missed the hsub requirement dropping out. That's fine then.
> 
> > Anyways, I'll have to revisit the the offsets[] thing because people
> > didn't like my original linear offset idea, and it doesn't match what
> > userspace already does.
> 
> I'm still really confused about this. Your patches implement a linear
> byte offset. The last time it came up on IRC, all four of myself, Ben,
> Jason, and you, agreed that linear byte offsets were the only thing
> which made sense. The Mesa patchset that's been sent out a couple of
> times and is now in Jason's hands use linear offsets. If everything
> (kernel, Mesa) uses linear offsets, and everyone (the four of us in
> the discussion) wants linear offsets - why revisit?

Mesa doesn't use linear offsets. Or at least it didn't when I last
looked.
Daniel Stone June 7, 2017, 3:48 p.m. UTC | #5
Hi,

On 7 June 2017 at 16:33, Ville Syrjälä <ville.syrjala@linux.intel.com> wrote:
> On Wed, Jun 07, 2017 at 03:24:58PM +0100, Daniel Stone wrote:
>> On 7 June 2017 at 13:53, Ville Syrjälä <ville.syrjala@linux.intel.com> wrote:
>> > Anyways, I'll have to revisit the the offsets[] thing because people
>> > didn't like my original linear offset idea, and it doesn't match what
>> > userspace already does.
>>
>> I'm still really confused about this. Your patches implement a linear
>> byte offset. The last time it came up on IRC, all four of myself, Ben,
>> Jason, and you, agreed that linear byte offsets were the only thing
>> which made sense. The Mesa patchset that's been sent out a couple of
>> times and is now in Jason's hands use linear offsets. If everything
>> (kernel, Mesa) uses linear offsets, and everyone (the four of us in
>> the discussion) wants linear offsets - why revisit?
>
> Mesa doesn't use linear offsets. Or at least it didn't when I last
> looked.

It does, and I have correct CCS output (tested by displaying frames
either as Y_CCS, or as plain Y; correct display with the former and
visibly showing an incomplete primary surface for the latter) with the
last set of Mesa patches I submitted, using Weston. It's been that way
for a couple of months (?) now, since the stride handling was fixed
too.

Cheers,
Daniel
Ville Syrjala June 7, 2017, 4:28 p.m. UTC | #6
On Wed, Jun 07, 2017 at 04:48:06PM +0100, Daniel Stone wrote:
> Hi,
> 
> On 7 June 2017 at 16:33, Ville Syrjälä <ville.syrjala@linux.intel.com> wrote:
> > On Wed, Jun 07, 2017 at 03:24:58PM +0100, Daniel Stone wrote:
> >> On 7 June 2017 at 13:53, Ville Syrjälä <ville.syrjala@linux.intel.com> wrote:
> >> > Anyways, I'll have to revisit the the offsets[] thing because people
> >> > didn't like my original linear offset idea, and it doesn't match what
> >> > userspace already does.
> >>
> >> I'm still really confused about this. Your patches implement a linear
> >> byte offset. The last time it came up on IRC, all four of myself, Ben,
> >> Jason, and you, agreed that linear byte offsets were the only thing
> >> which made sense. The Mesa patchset that's been sent out a couple of
> >> times and is now in Jason's hands use linear offsets. If everything
> >> (kernel, Mesa) uses linear offsets, and everyone (the four of us in
> >> the discussion) wants linear offsets - why revisit?
> >
> > Mesa doesn't use linear offsets. Or at least it didn't when I last
> > looked.
> 
> It does, and I have correct CCS output (tested by displaying frames
> either as Y_CCS, or as plain Y; correct display with the former and
> visibly showing an incomplete primary surface for the latter) with the
> last set of Mesa patches I submitted, using Weston. It's been that way
> for a couple of months (?) now, since the stride handling was fixed
> too.

I still see stuff like

intel_setup_image_from_mipmap_tree()
-> intel_miptree_get_tile_offsets()
   -> intel_miptree_get_aligned_offset()

which doesn't return a linear offset.
Daniel Stone June 7, 2017, 5:14 p.m. UTC | #7
Hi,

On 7 June 2017 at 17:28, Ville Syrjälä <ville.syrjala@linux.intel.com> wrote:
> On Wed, Jun 07, 2017 at 04:48:06PM +0100, Daniel Stone wrote:
>> It does, and I have correct CCS output (tested by displaying frames
>> either as Y_CCS, or as plain Y; correct display with the former and
>> visibly showing an incomplete primary surface for the latter) with the
>> last set of Mesa patches I submitted, using Weston. It's been that way
>> for a couple of months (?) now, since the stride handling was fixed
>> too.
>
> I still see stuff like
>
> intel_setup_image_from_mipmap_tree()
> -> intel_miptree_get_tile_offsets()
>    -> intel_miptree_get_aligned_offset()
>
> which doesn't return a linear offset.

That's only used when creating a DRIimage from a GL texture.

The (slightly simplified) allocation path for GBM creating a buffer
and then extracting the information to pass to AddFB2 is (assuming an
aux buffer is present):
gbm_surface_create_with_modifiers()
  -> intel_create_image_with_modifiers (as
DRIimageExtension->createImageWithModifiers)
    -> image->aux_offset = ALIGN(height, tile_height) * image->pitch;

gbm_surface_lock_front_buffer()
  -> return gbm_bo wrapping DRIImage created above

gbm_bo_get_stride_for_plane()
  -> gbm_dri_bo_get_stride()
    -> intel_query_image (via DRIimageExtension->queryImage)
      -> return image->pitch

gbm_bo_get_offset()
  -> gbm_dri_bo_get_offset()
    -> plane==0: (intel_from_planar via DRIimageExtension->fromPlanar
returns false)
      -> intel_query_image
        -> return image->offset (hardcoded to 0 at alloc)
    -> else plane==1: (intel_from_planar returns new DRIimage)
      -> intel_query_image
        -> return image->offset (set to image->aux_offset inside
intel_create_image_with_modifiers)

For 3200x1800 with XRGB8888 + CCS, running the actual Mesa patchset
submitted under Weston on the patchset I sent in May which has no
difference in offset handling to this one, this callchain results in:
  offset[0] == 0
  stride[0] == 12800 (== 3200 * 4)
  offset[1] == 23040000 (== 12800 * 1800)

(I hadn't logged what stride[1] was and don't have the kernel to run
it right this second, but given I get a very sparse 'dotty' display
when I just pass the primary buffer as Y_TILED with no aux buffer, and
a completely correct display when I pass it as Y_CCS with the aux
buffer, I'm pretty confident the stride is correct.)

Either I'm seriously hallucinating or it is very definitely linear.
The comments above intel_miptree_get_aligned_offset ('Compute the
offset (in bytes) from the start of the BO to the given x and y
coordinate.') also suggest it's working in linear space. Manually
feeding x==256,y==0 into intel_miptree_get_aligned_offset gives me an
offset of 32768, i.e. two 128x32 tiles, so again that seems right to
me.

Cheers,
Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c
index 9c0152d..50da618 100644
--- a/drivers/gpu/drm/drm_fourcc.c
+++ b/drivers/gpu/drm/drm_fourcc.c
@@ -222,7 +222,7 @@  const struct drm_format_info *drm_format_info(u32 format)
 	const struct drm_format_info *info = NULL;
 
 	if (dev->mode_config.funcs->get_format_info)
-		info = dev->mode_config.funcs->get_format_info(mode_cmd);
+		info = dev->mode_config.funcs->get_format_info(dev, mode_cmd);
 
 	if (!info)
 		info = drm_format_info(mode_cmd->pixel_format);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 85ac325..a3fdba2 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2438,6 +2438,42 @@  static unsigned int intel_fb_modifier_to_tiling(uint64_t fb_modifier)
 	}
 }
 
+static const struct drm_format_info ccs_formats[] = {
+	{ .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 16, .vsub = 8, },
+	{ .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 16, .vsub = 8, },
+	{ .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 16, .vsub = 8, },
+	{ .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2, .cpp = { 4, 1, }, .hsub = 16, .vsub = 8, },
+};
+
+static const struct drm_format_info *
+lookup_format_info(const struct drm_format_info formats[],
+		   int num_formats, u32 format)
+{
+	int i;
+
+	for (i = 0; i < num_formats; i++) {
+		if (formats[i].format == format)
+			return &formats[i];
+	}
+
+	return NULL;
+}
+
+static const struct drm_format_info *
+intel_get_format_info(struct drm_device *dev,
+		      const struct drm_mode_fb_cmd2 *cmd)
+{
+	switch (cmd->modifier[0]) {
+	case I915_FORMAT_MOD_Y_TILED_CCS:
+	case I915_FORMAT_MOD_Yf_TILED_CCS:
+		return lookup_format_info(ccs_formats,
+					  ARRAY_SIZE(ccs_formats),
+					  cmd->pixel_format);
+	default:
+		return NULL;
+	}
+}
+
 static int
 intel_fill_fb_info(struct drm_i915_private *dev_priv,
 		   struct drm_framebuffer *fb)
@@ -14595,6 +14631,7 @@  static void intel_atomic_state_free(struct drm_atomic_state *state)
 
 static const struct drm_mode_config_funcs intel_mode_funcs = {
 	.fb_create = intel_user_framebuffer_create,
+	.get_format_info = intel_get_format_info,
 	.output_poll_changed = intel_fbdev_output_poll_changed,
 	.atomic_check = intel_atomic_check,
 	.atomic_commit = intel_atomic_commit,
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index 4298171..f0d3d38 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -81,7 +81,8 @@  struct drm_mode_config_funcs {
 	 * The format information specific to the given fb metadata, or
 	 * NULL if none is found.
 	 */
-	const struct drm_format_info *(*get_format_info)(const struct drm_mode_fb_cmd2 *mode_cmd);
+	const struct drm_format_info *(*get_format_info)(struct drm_device *dev,
+		const struct drm_mode_fb_cmd2 *mode_cmd);
 
 	/**
 	 * @output_poll_changed:
diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
index 55e3010..58ee031 100644
--- a/include/uapi/drm/drm_fourcc.h
+++ b/include/uapi/drm/drm_fourcc.h
@@ -251,6 +251,9 @@ 
  */
 #define I915_FORMAT_MOD_Yf_TILED fourcc_mod_code(INTEL, 3)
 
+#define I915_FORMAT_MOD_Y_TILED_CCS	fourcc_mod_code(INTEL, 4)
+#define I915_FORMAT_MOD_Yf_TILED_CCS	fourcc_mod_code(INTEL, 5)
+
 /*
  * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks
  *