Message ID | 1501807961-23186-1-git-send-email-jason.ekstrand@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 17-08-03 17:52:41, Jason Ekstrand wrote: >Previously, the test used the old 64x64 convention that Ville introduced >for CCS tiles and not the current 128x32 Y-tile convention. Also, the >original scheme for generating the CCS data was over-complicated and >didn't work correctly because it assumed you could cut the main surface >at an arbitrary Y coordinate. While you clearly *can* do this (the >hardware does), it's not a good idea for a generator in a test. The new >scheme, introduced here, is entirely based on the relationship between >cache-lines in the main surface and the CCS that's documented in the >PRM. By keeping everything CCS cache-line aligned, our chances of >generating correct data for an arbitrary-size surface are much higher. > >Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com> >Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> >Cc: Ben Widawsky <benjamin.widawsky@intel.com> >Cc: Daniel Stone <daniels@collabora.com> >Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Reviewed-by: Ben Widawsky <ben@bwidawsk.net> >--- > tests/kms_ccs.c | 91 ++++++++++++++++++++++++++++++++++++++++----------------- > 1 file changed, 65 insertions(+), 26 deletions(-) > >diff --git a/tests/kms_ccs.c b/tests/kms_ccs.c >index 29d676a..ef952f2 100644 >--- a/tests/kms_ccs.c >+++ b/tests/kms_ccs.c >@@ -48,12 +48,17 @@ typedef struct { > #define COMPRESSED_GREEN 0x000ff00f > #define COMPRESSED_BLUE 0x00000fff > >+#define CCS_UNCOMPRESSED 0x0 >+#define CCS_COMPRESSED 0x55 >+ > #define RED 0x00ff0000 > >-static void render_fb(data_t *data, bool compressed) >+static void render_fb(data_t *data, bool compressed, >+ int height, unsigned int stride) > { > struct igt_fb *fb = &data->fb; > uint32_t *ptr; >+ unsigned int half_height, half_size; > int i; > > igt_assert(fb->fb_id); >@@ -62,43 +67,63 @@ static void render_fb(data_t *data, bool compressed) > 0, fb->size, > PROT_READ | PROT_WRITE); > >- for (i = 0; i < fb->size / 4; i++) { >- /* Fill upper half as compressed */ >- if (compressed && i < fb->size / 4 / 2) >- ptr[i] = COMPRESSED_RED; >- else >+ if (compressed) { >+ /* In the compressed case, we want the top half of the >+ * surface to be uncompressed and the bottom half to be >+ * compressed. >+ * >+ * We need to cut the surface on a CCS cache-line boundary, >+ * otherwise, we're going to be in trouble when we try to >+ * generate CCS data for the surface. A cache line in the >+ * CCS is 16x16 cache-line-pairs in the main surface. 16 >+ * cache lines is 64 rows high. >+ */ >+ half_height = ALIGN(height, 128) / 2; >+ half_size = half_height * stride; >+ for (i = 0; i < fb->size / 4; i++) { >+ if (i < half_size / 4) >+ ptr[i] = RED; >+ else >+ ptr[i] = COMPRESSED_RED; >+ } >+ } else { >+ for (i = 0; i < fb->size / 4; i++) > ptr[i] = RED; > } > > munmap(ptr, fb->size); > } > >-static uint8_t *ccs_ptr(uint8_t *ptr, >- unsigned int x, unsigned int y, >- unsigned int stride) >+static unsigned int >+y_tile_y_pos(unsigned int offset, unsigned int stride) > { >- return ptr + >- ((y & ~0x3f) * stride) + >- ((x & ~0x7) * 64) + >- ((y & 0x3f) * 8) + >- (x & 7); >+ unsigned int y_tiles, y; >+ y_tiles = (offset / 4096) / (stride / 128); >+ y = y_tiles * 32 + ((offset & 0x1f0) >> 4); >+ return y; > } > > static void render_ccs(data_t *data, uint32_t gem_handle, > uint32_t offset, uint32_t size, >- int w, int h, unsigned int stride) >+ int height, unsigned int ccs_stride) > { >+ unsigned int half_height, ccs_half_height; > uint8_t *ptr; >- int x, y; >+ int i; >+ >+ half_height = ALIGN(height, 128) / 2; >+ ccs_half_height = half_height / 16; > > ptr = gem_mmap__cpu(data->drm_fd, gem_handle, > offset, size, > PROT_READ | PROT_WRITE); > >- /* Mark upper half as compressed */ >- for (x = 0 ; x < w; x++) >- for (y = 0 ; y <= h / 2; y++) >- *ccs_ptr(ptr, x, y, stride) = 0x55; >+ for (i = 0; i < size; i++) { >+ if (y_tile_y_pos(i, ccs_stride) < ccs_half_height) >+ ptr[i] = CCS_UNCOMPRESSED; >+ else >+ ptr[i] = CCS_COMPRESSED; >+ } > > munmap(ptr, size); > } >@@ -143,12 +168,26 @@ static void display_fb(data_t *data, int compressed) > size[0] = f.pitches[0] * ALIGN(height, 32); > > if (compressed) { >- width = ALIGN(f.width, 16) / 16; >- height = ALIGN(f.height, 8) / 8; >- f.pitches[1] = ALIGN(width * 1, 64); >+ /* From the Sky Lake PRM, Vol 12, "Color Control Surface": >+ * >+ * "The compression state of the cache-line pair is >+ * specified by 2 bits in the CCS. Each CCS cache-line >+ * represents an area on the main surface of 16x16 sets >+ * of 128 byte Y-tiled cache-line-pairs. CCS is always Y >+ * tiled." >+ * >+ * A "cache-line-pair" for a Y-tiled surface is two >+ * horizontally adjacent cache lines. When operating in >+ * bytes and rows, this gives us a scale-down factor of >+ * 32x16. Since the main surface has a 32-bit format, we >+ * need to multiply width by 4 to get bytes. >+ */ >+ width = ALIGN(f.width * 4, 32) / 32; >+ height = ALIGN(f.height, 16) / 16; >+ f.pitches[1] = ALIGN(width * 1, 128); > f.modifier[1] = modifier; > f.offsets[1] = size[0]; >- size[1] = f.pitches[1] * ALIGN(height, 64); >+ size[1] = f.pitches[1] * ALIGN(height, 32); > > f.handles[0] = gem_create(data->drm_fd, size[0] + size[1]); > f.handles[1] = f.handles[0]; >@@ -176,11 +215,11 @@ static void display_fb(data_t *data, int compressed) > fb->cairo_surface = NULL; > fb->domain = 0; > >- render_fb(data, compressed); >+ render_fb(data, compressed, f.height, f.pitches[0]); > > if (compressed) > render_ccs(data, f.handles[0], f.offsets[1], size[1], >- f.width/16, f.height/8, f.pitches[1]); >+ f.height, f.pitches[1]); > > primary = igt_output_get_plane_type(data->output, DRM_PLANE_TYPE_PRIMARY); > igt_plane_set_fb(primary, fb); >-- >2.5.0.400.gff86faf > >_______________________________________________ >Intel-gfx mailing list >Intel-gfx@lists.freedesktop.org >https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Hi Jason, On 4 August 2017 at 01:52, Jason Ekstrand <jason@jlekstrand.net> wrote: > Previously, the test used the old 64x64 convention that Ville introduced > for CCS tiles and not the current 128x32 Y-tile convention. Also, the > original scheme for generating the CCS data was over-complicated and > didn't work correctly because it assumed you could cut the main surface > at an arbitrary Y coordinate. While you clearly *can* do this (the > hardware does), it's not a good idea for a generator in a test. The new > scheme, introduced here, is entirely based on the relationship between > cache-lines in the main surface and the CCS that's documented in the > PRM. By keeping everything CCS cache-line aligned, our chances of > generating correct data for an arbitrary-size surface are much higher. Thanks for fixing this! > + * We need to cut the surface on a CCS cache-line boundary, > + * otherwise, we're going to be in trouble when we try to > + * generate CCS data for the surface. A cache line in the > + * CCS is 16x16 cache-line-pairs in the main surface. 16 > + * cache lines is 64 rows high. > + */ > + half_height = ALIGN(height, 128) / 2; > + half_size = half_height * stride; > + for (i = 0; i < fb->size / 4; i++) { > + if (i < half_size / 4) > + ptr[i] = RED; > + else > + ptr[i] = COMPRESSED_RED; I toyed with: else if (!(i & 0xe)) ptr[i] = COMPRESSED_RED; else ptr[i] = BLACK; ... to make the failure mode even more obvious. But that still writes in (far) more compressed-red pixels than we strictly need for CCS, right? Anyway, follow-up patch. > +static unsigned int > +y_tile_y_pos(unsigned int offset, unsigned int stride) > { > - return ptr + > - ((y & ~0x3f) * stride) + > - ((x & ~0x7) * 64) + > - ((y & 0x3f) * 8) + > - (x & 7); > + unsigned int y_tiles, y; > + y_tiles = (offset / 4096) / (stride / 128); > + y = y_tiles * 32 + ((offset & 0x1f0) >> 4); > + return y; > } > > @@ -143,12 +168,26 @@ static void display_fb(data_t *data, int compressed) > size[0] = f.pitches[0] * ALIGN(height, 32); > > if (compressed) { > - width = ALIGN(f.width, 16) / 16; > - height = ALIGN(f.height, 8) / 8; > - f.pitches[1] = ALIGN(width * 1, 64); > + /* From the Sky Lake PRM, Vol 12, "Color Control Surface": > + * > + * "The compression state of the cache-line pair is > + * specified by 2 bits in the CCS. Each CCS cache-line > + * represents an area on the main surface of 16x16 sets > + * of 128 byte Y-tiled cache-line-pairs. CCS is always Y > + * tiled." > + * > + * A "cache-line-pair" for a Y-tiled surface is two > + * horizontally adjacent cache lines. When operating in > + * bytes and rows, this gives us a scale-down factor of > + * 32x16. Since the main surface has a 32-bit format, we > + * need to multiply width by 4 to get bytes. > + */ Yeah, this code and comment match better (& helped clarify) my understanding of how it works. But given my understanding was mostly gleaned from other, borderline incomprehensible, comments, as well as manually poking bits and observing the result, maybe that's not a ringing endorsement. > + width = ALIGN(f.width * 4, 32) / 32; > + height = ALIGN(f.height, 16) / 16; > + f.pitches[1] = ALIGN(width * 1, 128); > f.modifier[1] = modifier; > f.offsets[1] = size[0]; > - size[1] = f.pitches[1] * ALIGN(height, 64); > + size[1] = f.pitches[1] * ALIGN(height, 32); I changed this to f.height rather than height, because otherwise the kernel was rejecting the aux buffer for being too small. With that, it now passes on SKL + APL for me, so I've pushed with my review. Thanks! Cheers, Daniel
On August 4, 2017 2:59:56 AM Daniel Stone <daniel@fooishbar.org> wrote: > Hi Jason, > > On 4 August 2017 at 01:52, Jason Ekstrand <jason@jlekstrand.net> wrote: >> Previously, the test used the old 64x64 convention that Ville introduced >> for CCS tiles and not the current 128x32 Y-tile convention. Also, the >> original scheme for generating the CCS data was over-complicated and >> didn't work correctly because it assumed you could cut the main surface >> at an arbitrary Y coordinate. While you clearly *can* do this (the >> hardware does), it's not a good idea for a generator in a test. The new >> scheme, introduced here, is entirely based on the relationship between >> cache-lines in the main surface and the CCS that's documented in the >> PRM. By keeping everything CCS cache-line aligned, our chances of >> generating correct data for an arbitrary-size surface are much higher. > > Thanks for fixing this! > >> + * We need to cut the surface on a CCS cache-line boundary, >> + * otherwise, we're going to be in trouble when we try to >> + * generate CCS data for the surface. A cache line in the >> + * CCS is 16x16 cache-line-pairs in the main surface. 16 >> + * cache lines is 64 rows high. >> + */ >> + half_height = ALIGN(height, 128) / 2; >> + half_size = half_height * stride; >> + for (i = 0; i < fb->size / 4; i++) { >> + if (i < half_size / 4) >> + ptr[i] = RED; >> + else >> + ptr[i] = COMPRESSED_RED; > > I toyed with: > else if (!(i & 0xe)) > ptr[i] = COMPRESSED_RED; > else > ptr[i] = BLACK; > > ... to make the failure mode even more obvious. But that still writes > in (far) more compressed-red pixels than we strictly need for CCS, > right? Anyway, follow-up patch. Yeah, we can probably do quite a bit of patterning so long as we keep the compressed/uncompressed split simple and make sure our pattern works in whole cache lines. >> +static unsigned int >> +y_tile_y_pos(unsigned int offset, unsigned int stride) >> { >> - return ptr + >> - ((y & ~0x3f) * stride) + >> - ((x & ~0x7) * 64) + >> - ((y & 0x3f) * 8) + >> - (x & 7); >> + unsigned int y_tiles, y; >> + y_tiles = (offset / 4096) / (stride / 128); >> + y = y_tiles * 32 + ((offset & 0x1f0) >> 4); >> + return y; >> } >> >> @@ -143,12 +168,26 @@ static void display_fb(data_t *data, int compressed) >> size[0] = f.pitches[0] * ALIGN(height, 32); >> >> if (compressed) { >> - width = ALIGN(f.width, 16) / 16; >> - height = ALIGN(f.height, 8) / 8; >> - f.pitches[1] = ALIGN(width * 1, 64); >> + /* From the Sky Lake PRM, Vol 12, "Color Control Surface": >> + * >> + * "The compression state of the cache-line pair is >> + * specified by 2 bits in the CCS. Each CCS cache-line >> + * represents an area on the main surface of 16x16 sets >> + * of 128 byte Y-tiled cache-line-pairs. CCS is always Y >> + * tiled." >> + * >> + * A "cache-line-pair" for a Y-tiled surface is two >> + * horizontally adjacent cache lines. When operating in >> + * bytes and rows, this gives us a scale-down factor of >> + * 32x16. Since the main surface has a 32-bit format, we >> + * need to multiply width by 4 to get bytes. >> + */ > > Yeah, this code and comment match better (& helped clarify) my > understanding of how it works. But given my understanding was mostly > gleaned from other, borderline incomprehensible, comments, as well as > manually poking bits and observing the result, maybe that's not a > ringing endorsement. > >> + width = ALIGN(f.width * 4, 32) / 32; >> + height = ALIGN(f.height, 16) / 16; >> + f.pitches[1] = ALIGN(width * 1, 128); >> f.modifier[1] = modifier; >> f.offsets[1] = size[0]; >> - size[1] = f.pitches[1] * ALIGN(height, 64); >> + size[1] = f.pitches[1] * ALIGN(height, 32); > > I changed this to f.height rather than height, because otherwise the > kernel was rejecting the aux buffer for being too small. Congratulations, you found a bug in the kernel branch you're running. The downsized height is definitely what we want and it works fine with my kernel branch. > With that, it now passes on SKL + APL for me, so I've pushed with my > review. Thanks! > > Cheers, > Daniel
On 4 August 2017 at 15:56, Jason Ekstrand <jason@jlekstrand.net> wrote: > On August 4, 2017 2:59:56 AM Daniel Stone <daniel@fooishbar.org> wrote: >>> + width = ALIGN(f.width * 4, 32) / 32; >>> + height = ALIGN(f.height, 16) / 16; >>> + f.pitches[1] = ALIGN(width * 1, 128); >>> f.modifier[1] = modifier; >>> f.offsets[1] = size[0]; >>> - size[1] = f.pitches[1] * ALIGN(height, 64); >>> + size[1] = f.pitches[1] * ALIGN(height, 32); >> >> >> I changed this to f.height rather than height, because otherwise the >> kernel was rejecting the aux buffer for being too small. > > Congratulations, you found a bug in the kernel branch you're running. The > downsized height is definitely what we want and it works fine with my kernel > branch. Great. Which kernel are you running then? I'm running from here: https://git.collabora.com/cgit/user/daniels/linux.git/refs/heads Currently we have hsub/vsub defined as 16/8 (Vidya inverted this, but I never got a clear answer on why), tile_width as 128, and tile_height comes out as 32. Given the calculations in intel_fill_fb_info, I come out with the kernel demanding either 34816 bytes for CCS (using 16/8 hsub/vsub), or 20480 bytes (8/16) for a 1920x1080 framebuffer. Which kernel do you have, and how are you coming out with that calculation? Do we need to go back and re-figure out what pitch is? FWIW, ISL seems to get it right, according to the kernel. Cheers, Daniel
On Fri, Aug 4, 2017 at 8:42 AM, Daniel Stone <daniel@fooishbar.org> wrote: > On 4 August 2017 at 15:56, Jason Ekstrand <jason@jlekstrand.net> wrote: > > On August 4, 2017 2:59:56 AM Daniel Stone <daniel@fooishbar.org> wrote: > >>> + width = ALIGN(f.width * 4, 32) / 32; > >>> + height = ALIGN(f.height, 16) / 16; > >>> + f.pitches[1] = ALIGN(width * 1, 128); > >>> f.modifier[1] = modifier; > >>> f.offsets[1] = size[0]; > >>> - size[1] = f.pitches[1] * ALIGN(height, 64); > >>> + size[1] = f.pitches[1] * ALIGN(height, 32); > >> > >> > >> I changed this to f.height rather than height, because otherwise the > >> kernel was rejecting the aux buffer for being too small. > > > > Congratulations, you found a bug in the kernel branch you're running. > The > > downsized height is definitely what we want and it works fine with my > kernel > > branch. > > Great. Which kernel are you running then? I'm running from here: > https://git.collabora.com/cgit/user/daniels/linux.git/refs/heads > I'm working from some branch I got from Ville a couple months ago. > Currently we have hsub/vsub defined as 16/8 (Vidya inverted this, but > I never got a clear answer on why), Here's my comment in the IGT test: /* From the Sky Lake PRM, Vol 12, "Color Control Surface": * * "The compression state of the cache-line pair is * specified by 2 bits in the CCS. Each CCS cache-line * represents an area on the main surface of 16x16 sets * of 128 byte Y-tiled cache-line-pairs. CCS is always Y * tiled." * * A "cache-line-pair" for a Y-tiled surface is two * horizontally adjacent cache lines. When operating in * bytes and rows, this gives us a scale-down factor of * 32x16. Since the main surface has a 32-bit format, we * need to multiply width by 4 to get bytes. */ We have a scaling factor, in bytes, of 32x16. However, the main surface uses 4 byes per pixel so we need to account for that. In the IGT test, we multiply the width of the main surface by 4 to get bytes. Alternatively, you can adjust the scale factor to 8x16 so long as you align things correctly. tile_width as 128, and tile_height > comes out as 32. Yes, that's a correct Y-tile. > Given the calculations in intel_fill_fb_info, I come > out with the kernel demanding either 34816 bytes for CCS (using 16/8 > hsub/vsub), or 20480 bytes (8/16) for a 1920x1080 framebuffer. Neither of those look right. I'm getting 6 pages, or 24576B when I run the test which should be correct. > Which > kernel do you have, and how are you coming out with that calculation? > Do we need to go back and re-figure out what pitch is? > > FWIW, ISL seems to get it right, according to the kernel. > Weird...
On Fri, Aug 04, 2017 at 07:56:11AM -0700, Jason Ekstrand wrote: > On August 4, 2017 2:59:56 AM Daniel Stone <daniel@fooishbar.org> wrote: > > > Hi Jason, > > > > On 4 August 2017 at 01:52, Jason Ekstrand <jason@jlekstrand.net> wrote: > > > Previously, the test used the old 64x64 convention that Ville introduced > > > for CCS tiles and not the current 128x32 Y-tile convention. Also, the > > > original scheme for generating the CCS data was over-complicated and > > > didn't work correctly because it assumed you could cut the main surface > > > at an arbitrary Y coordinate. While you clearly *can* do this (the > > > hardware does), it's not a good idea for a generator in a test. The new > > > scheme, introduced here, is entirely based on the relationship between > > > cache-lines in the main surface and the CCS that's documented in the > > > PRM. By keeping everything CCS cache-line aligned, our chances of > > > generating correct data for an arbitrary-size surface are much higher. > > > > Thanks for fixing this! > > > > > + * We need to cut the surface on a CCS cache-line boundary, > > > + * otherwise, we're going to be in trouble when we try to > > > + * generate CCS data for the surface. A cache line in the > > > + * CCS is 16x16 cache-line-pairs in the main surface. 16 > > > + * cache lines is 64 rows high. > > > + */ > > > + half_height = ALIGN(height, 128) / 2; > > > + half_size = half_height * stride; > > > + for (i = 0; i < fb->size / 4; i++) { > > > + if (i < half_size / 4) > > > + ptr[i] = RED; > > > + else > > > + ptr[i] = COMPRESSED_RED; > > > > I toyed with: > > else if (!(i & 0xe)) > > ptr[i] = COMPRESSED_RED; > > else > > ptr[i] = BLACK; > > > > ... to make the failure mode even more obvious. But that still writes > > in (far) more compressed-red pixels than we strictly need for CCS, > > right? Anyway, follow-up patch. > > Yeah, we can probably do quite a bit of patterning so long as we keep the > compressed/uncompressed split simple and make sure our pattern works in > whole cache lines. > > > > +static unsigned int > > > +y_tile_y_pos(unsigned int offset, unsigned int stride) > > > { > > > - return ptr + > > > - ((y & ~0x3f) * stride) + > > > - ((x & ~0x7) * 64) + > > > - ((y & 0x3f) * 8) + > > > - (x & 7); > > > + unsigned int y_tiles, y; > > > + y_tiles = (offset / 4096) / (stride / 128); > > > + y = y_tiles * 32 + ((offset & 0x1f0) >> 4); > > > + return y; > > > } > > > > > > @@ -143,12 +168,26 @@ static void display_fb(data_t *data, int compressed) > > > size[0] = f.pitches[0] * ALIGN(height, 32); > > > > > > if (compressed) { > > > - width = ALIGN(f.width, 16) / 16; > > > - height = ALIGN(f.height, 8) / 8; > > > - f.pitches[1] = ALIGN(width * 1, 64); > > > + /* From the Sky Lake PRM, Vol 12, "Color Control Surface": > > > + * > > > + * "The compression state of the cache-line pair is > > > + * specified by 2 bits in the CCS. Each CCS cache-line > > > + * represents an area on the main surface of 16x16 sets > > > + * of 128 byte Y-tiled cache-line-pairs. CCS is always Y > > > + * tiled." > > > + * > > > + * A "cache-line-pair" for a Y-tiled surface is two > > > + * horizontally adjacent cache lines. When operating in > > > + * bytes and rows, this gives us a scale-down factor of > > > + * 32x16. Since the main surface has a 32-bit format, we > > > + * need to multiply width by 4 to get bytes. > > > + */ > > > > Yeah, this code and comment match better (& helped clarify) my > > understanding of how it works. But given my understanding was mostly > > gleaned from other, borderline incomprehensible, comments, as well as > > manually poking bits and observing the result, maybe that's not a > > ringing endorsement. > > > > > + width = ALIGN(f.width * 4, 32) / 32; > > > + height = ALIGN(f.height, 16) / 16; > > > + f.pitches[1] = ALIGN(width * 1, 128); > > > f.modifier[1] = modifier; > > > f.offsets[1] = size[0]; > > > - size[1] = f.pitches[1] * ALIGN(height, 64); > > > + size[1] = f.pitches[1] * ALIGN(height, 32); > > > > I changed this to f.height rather than height, because otherwise the > > kernel was rejecting the aux buffer for being too small. > > Congratulations, you found a bug in the kernel branch you're running. The > downsized height is definitely what we want and it works fine with my kernel > branch. This is why I'd like someone (post or pre merging the kernel patches) to add a pile of subtests here (in the spirit of kms_addfb_basic) which goes through all kinds of invalid alignment/settings and makes sure the kernel catches them all. The current single invalid testcase we have is imo way too trustful of kernel hacker's ability to get this right :-) Thanks, Daniel
diff --git a/tests/kms_ccs.c b/tests/kms_ccs.c index 29d676a..ef952f2 100644 --- a/tests/kms_ccs.c +++ b/tests/kms_ccs.c @@ -48,12 +48,17 @@ typedef struct { #define COMPRESSED_GREEN 0x000ff00f #define COMPRESSED_BLUE 0x00000fff +#define CCS_UNCOMPRESSED 0x0 +#define CCS_COMPRESSED 0x55 + #define RED 0x00ff0000 -static void render_fb(data_t *data, bool compressed) +static void render_fb(data_t *data, bool compressed, + int height, unsigned int stride) { struct igt_fb *fb = &data->fb; uint32_t *ptr; + unsigned int half_height, half_size; int i; igt_assert(fb->fb_id); @@ -62,43 +67,63 @@ static void render_fb(data_t *data, bool compressed) 0, fb->size, PROT_READ | PROT_WRITE); - for (i = 0; i < fb->size / 4; i++) { - /* Fill upper half as compressed */ - if (compressed && i < fb->size / 4 / 2) - ptr[i] = COMPRESSED_RED; - else + if (compressed) { + /* In the compressed case, we want the top half of the + * surface to be uncompressed and the bottom half to be + * compressed. + * + * We need to cut the surface on a CCS cache-line boundary, + * otherwise, we're going to be in trouble when we try to + * generate CCS data for the surface. A cache line in the + * CCS is 16x16 cache-line-pairs in the main surface. 16 + * cache lines is 64 rows high. + */ + half_height = ALIGN(height, 128) / 2; + half_size = half_height * stride; + for (i = 0; i < fb->size / 4; i++) { + if (i < half_size / 4) + ptr[i] = RED; + else + ptr[i] = COMPRESSED_RED; + } + } else { + for (i = 0; i < fb->size / 4; i++) ptr[i] = RED; } munmap(ptr, fb->size); } -static uint8_t *ccs_ptr(uint8_t *ptr, - unsigned int x, unsigned int y, - unsigned int stride) +static unsigned int +y_tile_y_pos(unsigned int offset, unsigned int stride) { - return ptr + - ((y & ~0x3f) * stride) + - ((x & ~0x7) * 64) + - ((y & 0x3f) * 8) + - (x & 7); + unsigned int y_tiles, y; + y_tiles = (offset / 4096) / (stride / 128); + y = y_tiles * 32 + ((offset & 0x1f0) >> 4); + return y; } static void render_ccs(data_t *data, uint32_t gem_handle, uint32_t offset, uint32_t size, - int w, int h, unsigned int stride) + int height, unsigned int ccs_stride) { + unsigned int half_height, ccs_half_height; uint8_t *ptr; - int x, y; + int i; + + half_height = ALIGN(height, 128) / 2; + ccs_half_height = half_height / 16; ptr = gem_mmap__cpu(data->drm_fd, gem_handle, offset, size, PROT_READ | PROT_WRITE); - /* Mark upper half as compressed */ - for (x = 0 ; x < w; x++) - for (y = 0 ; y <= h / 2; y++) - *ccs_ptr(ptr, x, y, stride) = 0x55; + for (i = 0; i < size; i++) { + if (y_tile_y_pos(i, ccs_stride) < ccs_half_height) + ptr[i] = CCS_UNCOMPRESSED; + else + ptr[i] = CCS_COMPRESSED; + } munmap(ptr, size); } @@ -143,12 +168,26 @@ static void display_fb(data_t *data, int compressed) size[0] = f.pitches[0] * ALIGN(height, 32); if (compressed) { - width = ALIGN(f.width, 16) / 16; - height = ALIGN(f.height, 8) / 8; - f.pitches[1] = ALIGN(width * 1, 64); + /* From the Sky Lake PRM, Vol 12, "Color Control Surface": + * + * "The compression state of the cache-line pair is + * specified by 2 bits in the CCS. Each CCS cache-line + * represents an area on the main surface of 16x16 sets + * of 128 byte Y-tiled cache-line-pairs. CCS is always Y + * tiled." + * + * A "cache-line-pair" for a Y-tiled surface is two + * horizontally adjacent cache lines. When operating in + * bytes and rows, this gives us a scale-down factor of + * 32x16. Since the main surface has a 32-bit format, we + * need to multiply width by 4 to get bytes. + */ + width = ALIGN(f.width * 4, 32) / 32; + height = ALIGN(f.height, 16) / 16; + f.pitches[1] = ALIGN(width * 1, 128); f.modifier[1] = modifier; f.offsets[1] = size[0]; - size[1] = f.pitches[1] * ALIGN(height, 64); + size[1] = f.pitches[1] * ALIGN(height, 32); f.handles[0] = gem_create(data->drm_fd, size[0] + size[1]); f.handles[1] = f.handles[0]; @@ -176,11 +215,11 @@ static void display_fb(data_t *data, int compressed) fb->cairo_surface = NULL; fb->domain = 0; - render_fb(data, compressed); + render_fb(data, compressed, f.height, f.pitches[0]); if (compressed) render_ccs(data, f.handles[0], f.offsets[1], size[1], - f.width/16, f.height/8, f.pitches[1]); + f.height, f.pitches[1]); primary = igt_output_get_plane_type(data->output, DRM_PLANE_TYPE_PRIMARY); igt_plane_set_fb(primary, fb);
Previously, the test used the old 64x64 convention that Ville introduced for CCS tiles and not the current 128x32 Y-tile convention. Also, the original scheme for generating the CCS data was over-complicated and didn't work correctly because it assumed you could cut the main surface at an arbitrary Y coordinate. While you clearly *can* do this (the hardware does), it's not a good idea for a generator in a test. The new scheme, introduced here, is entirely based on the relationship between cache-lines in the main surface and the CCS that's documented in the PRM. By keeping everything CCS cache-line aligned, our chances of generating correct data for an arbitrary-size surface are much higher. Signed-off-by: Jason Ekstrand <jason.ekstrand@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Ben Widawsky <benjamin.widawsky@intel.com> Cc: Daniel Stone <daniels@collabora.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> --- tests/kms_ccs.c | 91 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 26 deletions(-)