Message ID | 20230621194121.184552-1-arthurgrillo@riseup.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v4] drm/vkms: Add support to 1D gamma LUT | expand |
On 6/21/23 15:41, Arthur Grillo wrote: > Support a 1D gamma LUT with interpolation for each color channel on the > VKMS driver. Add a check for the LUT length by creating > vkms_atomic_check(). > > Tested with: > igt@kms_color@gamma > igt@kms_color@legacy-gamma > igt@kms_color@invalid-gamma-lut-sizes > > v2: > - Add interpolation between the values of the LUT (Simon Ser) > > v3: > - s/ratio/delta (Pekka) > - s/color_channel/channel_value (Pekka) > - s/lut_area/lut_channel > - Store the `drm_color_lut`, `lut_length`, and > `channel_value2index_ratio` inside a struct called `vkms_lut` > (Pekka) > - Pre-compute some constants values used through the LUT procedure > (Pekka) > - Change the switch statement to a cast to __u16* (Pekka) > - Make the apply_lut_to_channel_value return the computation result > (Pekka) > > v4: > - Add a comment explaining that `enum lut_area` depends on the > layout of `struct drm_color_lut` (Pekka) > - Remove unused variable (kernel test robot) > > Signed-off-by: Arthur Grillo <arthurgrillo@riseup.net> > Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com> Thanks for writing this and thanks, Pekka and Simon, for doing the heavy lifting with reviews. This will be helpful while I'm writing a sketch of a drm_plane color pipeline API with VKMS. This patch looks good to me and is Reviewed-by: Harry Wentland <harry.wentland@amd.com> Harry > --- > drivers/gpu/drm/vkms/vkms_composer.c | 86 ++++++++++++++++++++++++++++ > drivers/gpu/drm/vkms/vkms_crtc.c | 3 + > drivers/gpu/drm/vkms/vkms_drv.c | 20 ++++++- > drivers/gpu/drm/vkms/vkms_drv.h | 9 +++ > 4 files changed, 117 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c > index 906d3df40cdb..620c0bafbe56 100644 > --- a/drivers/gpu/drm/vkms/vkms_composer.c > +++ b/drivers/gpu/drm/vkms/vkms_composer.c > @@ -6,6 +6,7 @@ > #include <drm/drm_atomic_helper.h> > #include <drm/drm_blend.h> > #include <drm/drm_fourcc.h> > +#include <drm/drm_fixed.h> > #include <drm/drm_gem_framebuffer_helper.h> > #include <drm/drm_vblank.h> > #include <linux/minmax.h> > @@ -89,6 +90,73 @@ static void fill_background(const struct pixel_argb_u16 *background_color, > output_buffer->pixels[i] = *background_color; > } > > +// lerp(a, b, t) = a + (b - a) * t > +static u16 lerp_u16(u16 a, u16 b, s64 t) > +{ > + s64 a_fp = drm_int2fixp(a); > + s64 b_fp = drm_int2fixp(b); > + > + s64 delta = drm_fixp_mul(b_fp - a_fp, t); > + > + return drm_fixp2int(a_fp + delta); > +} > + > +static s64 get_lut_index(const struct vkms_color_lut *lut, u16 channel_value) > +{ > + s64 color_channel_fp = drm_int2fixp(channel_value); > + > + return drm_fixp_mul(color_channel_fp, lut->channel_value2index_ratio); > +} > + > +/* > + * This enum is related to the positions of the variables inside > + * `struct drm_color_lut`, so the order of both needs to be the same. > + */ > +enum lut_channel { > + LUT_RED = 0, > + LUT_GREEN, > + LUT_BLUE, > + LUT_RESERVED > +}; > + > +static u16 apply_lut_to_channel_value(const struct vkms_color_lut *lut, u16 channel_value, > + enum lut_channel channel) > +{ > + s64 lut_index = get_lut_index(lut, channel_value); > + > + /* > + * This checks if `struct drm_color_lut` had any gap added by the compiler > + * between the struct fields. > + */ > + static_assert(sizeof(struct drm_color_lut) == sizeof(__u16) * 4); > + > + u16 *floor_lut_value = (__u16 *)&lut->base[drm_fixp2int(lut_index)]; > + u16 *ceil_lut_value = (__u16 *)&lut->base[drm_fixp2int_ceil(lut_index)]; > + > + u16 floor_channel_value = floor_lut_value[channel]; > + u16 ceil_channel_value = ceil_lut_value[channel]; > + > + return lerp_u16(floor_channel_value, ceil_channel_value, > + lut_index & DRM_FIXED_DECIMAL_MASK); > +} > + > +static void apply_lut(const struct vkms_crtc_state *crtc_state, struct line_buffer *output_buffer) > +{ > + if (!crtc_state->gamma_lut.base) > + return; > + > + if (!crtc_state->gamma_lut.lut_length) > + return; > + > + for (size_t x = 0; x < output_buffer->n_pixels; x++) { > + struct pixel_argb_u16 *pixel = &output_buffer->pixels[x]; > + > + pixel->r = apply_lut_to_channel_value(&crtc_state->gamma_lut, pixel->r, LUT_RED); > + pixel->g = apply_lut_to_channel_value(&crtc_state->gamma_lut, pixel->g, LUT_GREEN); > + pixel->b = apply_lut_to_channel_value(&crtc_state->gamma_lut, pixel->b, LUT_BLUE); > + } > +} > + > /** > * @wb_frame_info: The writeback frame buffer metadata > * @crtc_state: The crtc state > @@ -128,6 +196,8 @@ static void blend(struct vkms_writeback_job *wb, > output_buffer); > } > > + apply_lut(crtc_state, output_buffer); > + > *crc32 = crc32_le(*crc32, (void *)output_buffer->pixels, row_size); > > if (wb) > @@ -242,6 +312,22 @@ void vkms_composer_worker(struct work_struct *work) > crtc_state->frame_start = 0; > crtc_state->frame_end = 0; > crtc_state->crc_pending = false; > + > + if (crtc->state->gamma_lut) { > + s64 max_lut_index_fp; > + s64 u16_max_fp = drm_int2fixp(0xffff); > + > + crtc_state->gamma_lut.base = (struct drm_color_lut *)crtc->state->gamma_lut->data; > + crtc_state->gamma_lut.lut_length = > + crtc->state->gamma_lut->length / sizeof(struct drm_color_lut); > + max_lut_index_fp = drm_int2fixp(crtc_state->gamma_lut.lut_length - 1); > + crtc_state->gamma_lut.channel_value2index_ratio = drm_fixp_div(max_lut_index_fp, > + u16_max_fp); > + > + } else { > + crtc_state->gamma_lut.base = NULL; > + } > + > spin_unlock_irq(&out->composer_lock); > > /* > diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c > index 515f6772b866..61e500b8c9da 100644 > --- a/drivers/gpu/drm/vkms/vkms_crtc.c > +++ b/drivers/gpu/drm/vkms/vkms_crtc.c > @@ -290,6 +290,9 @@ int vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, > > drm_crtc_helper_add(crtc, &vkms_crtc_helper_funcs); > > + drm_mode_crtc_set_gamma_size(crtc, VKMS_LUT_SIZE); > + drm_crtc_enable_color_mgmt(crtc, 0, false, VKMS_LUT_SIZE); > + > spin_lock_init(&vkms_out->lock); > spin_lock_init(&vkms_out->composer_lock); > > diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c > index e3c9c9571c8d..dd0af086e7fa 100644 > --- a/drivers/gpu/drm/vkms/vkms_drv.c > +++ b/drivers/gpu/drm/vkms/vkms_drv.c > @@ -120,9 +120,27 @@ static const struct drm_driver vkms_driver = { > .minor = DRIVER_MINOR, > }; > > +static int vkms_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) > +{ > + struct drm_crtc *crtc; > + struct drm_crtc_state *new_crtc_state; > + int i; > + > + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { > + if (!new_crtc_state->gamma_lut || !new_crtc_state->color_mgmt_changed) > + continue; > + > + if (new_crtc_state->gamma_lut->length / sizeof(struct drm_color_lut *) > + > VKMS_LUT_SIZE) > + return -EINVAL; > + } > + > + return drm_atomic_helper_check(dev, state); > +} > + > static const struct drm_mode_config_funcs vkms_mode_funcs = { > .fb_create = drm_gem_fb_create, > - .atomic_check = drm_atomic_helper_check, > + .atomic_check = vkms_atomic_check, > .atomic_commit = drm_atomic_helper_commit, > }; > > diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h > index 5f1a0a44a78c..f16b5d7b81ef 100644 > --- a/drivers/gpu/drm/vkms/vkms_drv.h > +++ b/drivers/gpu/drm/vkms/vkms_drv.h > @@ -23,6 +23,8 @@ > > #define NUM_OVERLAY_PLANES 8 > > +#define VKMS_LUT_SIZE 256 > + > struct vkms_frame_info { > struct drm_framebuffer *fb; > struct drm_rect src, dst; > @@ -65,6 +67,12 @@ struct vkms_plane { > struct drm_plane base; > }; > > +struct vkms_color_lut { > + struct drm_color_lut *base; > + size_t lut_length; > + s64 channel_value2index_ratio; > +}; > + > /** > * vkms_crtc_state - Driver specific CRTC state > * @base: base CRTC state > @@ -80,6 +88,7 @@ struct vkms_crtc_state { > /* stack of active planes for crc computation, should be in z order */ > struct vkms_plane_state **active_planes; > struct vkms_writeback_job *active_writeback; > + struct vkms_color_lut gamma_lut; > > /* below four are protected by vkms_output.composer_lock */ > bool crc_pending;
Hi Arthur, Thanks for working on this feature for the VKMS! On 6/21/23 16:41, Arthur Grillo wrote: > Support a 1D gamma LUT with interpolation for each color channel on the > VKMS driver. Add a check for the LUT length by creating > vkms_atomic_check(). > > Tested with: > igt@kms_color@gamma > igt@kms_color@legacy-gamma > igt@kms_color@invalid-gamma-lut-sizes Could you also mention that this will make it possible to run the test igt@kms_plane@pixel-format? Also, you mentioned to me that the performance degraded with this new feature, but I wasn't able to see it while running the VKMS CI. I performed a couple of tests and I didn't see any significant performance issue. Could you please run a benchmark and share the results with us? This way we can atest that this new feature will not affect significantly the VKMS performance. It would be nice to have a small brief of this benchmark on the commit message as well. Attesting that there isn't a performance issue and adding those nits to the commit message, you can add my Reviewed-by: Maíra Canal <mairacanal@riseup.net> on the next version. > > v2: > - Add interpolation between the values of the LUT (Simon Ser) > > v3: > - s/ratio/delta (Pekka) > - s/color_channel/channel_value (Pekka) > - s/lut_area/lut_channel > - Store the `drm_color_lut`, `lut_length`, and > `channel_value2index_ratio` inside a struct called `vkms_lut` > (Pekka) > - Pre-compute some constants values used through the LUT procedure > (Pekka) > - Change the switch statement to a cast to __u16* (Pekka) > - Make the apply_lut_to_channel_value return the computation result > (Pekka) > > v4: > - Add a comment explaining that `enum lut_area` depends on the > layout of `struct drm_color_lut` (Pekka) > - Remove unused variable (kernel test robot) > > Signed-off-by: Arthur Grillo <arthurgrillo@riseup.net> > Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com> > --- > drivers/gpu/drm/vkms/vkms_composer.c | 86 ++++++++++++++++++++++++++++ > drivers/gpu/drm/vkms/vkms_crtc.c | 3 + > drivers/gpu/drm/vkms/vkms_drv.c | 20 ++++++- > drivers/gpu/drm/vkms/vkms_drv.h | 9 +++ > 4 files changed, 117 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c > index 906d3df40cdb..620c0bafbe56 100644 > --- a/drivers/gpu/drm/vkms/vkms_composer.c > +++ b/drivers/gpu/drm/vkms/vkms_composer.c > @@ -6,6 +6,7 @@ > #include <drm/drm_atomic_helper.h> > #include <drm/drm_blend.h> > #include <drm/drm_fourcc.h> > +#include <drm/drm_fixed.h> > #include <drm/drm_gem_framebuffer_helper.h> > #include <drm/drm_vblank.h> > #include <linux/minmax.h> > @@ -89,6 +90,73 @@ static void fill_background(const struct pixel_argb_u16 *background_color, > output_buffer->pixels[i] = *background_color; > } > > +// lerp(a, b, t) = a + (b - a) * t > +static u16 lerp_u16(u16 a, u16 b, s64 t) > +{ > + s64 a_fp = drm_int2fixp(a); > + s64 b_fp = drm_int2fixp(b); > + > + s64 delta = drm_fixp_mul(b_fp - a_fp, t); > + > + return drm_fixp2int(a_fp + delta); > +} > + > +static s64 get_lut_index(const struct vkms_color_lut *lut, u16 channel_value) > +{ > + s64 color_channel_fp = drm_int2fixp(channel_value); > + > + return drm_fixp_mul(color_channel_fp, lut->channel_value2index_ratio); > +} > + > +/* > + * This enum is related to the positions of the variables inside > + * `struct drm_color_lut`, so the order of both needs to be the same. > + */ > +enum lut_channel { > + LUT_RED = 0, > + LUT_GREEN, > + LUT_BLUE, > + LUT_RESERVED > +}; > + > +static u16 apply_lut_to_channel_value(const struct vkms_color_lut *lut, u16 channel_value, > + enum lut_channel channel) > +{ > + s64 lut_index = get_lut_index(lut, channel_value); > + > + /* > + * This checks if `struct drm_color_lut` had any gap added by the compiler s/had/has Best Regards, - Maíra > + * between the struct fields. > + */ > + static_assert(sizeof(struct drm_color_lut) == sizeof(__u16) * 4); > + > + u16 *floor_lut_value = (__u16 *)&lut->base[drm_fixp2int(lut_index)]; > + u16 *ceil_lut_value = (__u16 *)&lut->base[drm_fixp2int_ceil(lut_index)]; > + > + u16 floor_channel_value = floor_lut_value[channel]; > + u16 ceil_channel_value = ceil_lut_value[channel]; > + > + return lerp_u16(floor_channel_value, ceil_channel_value, > + lut_index & DRM_FIXED_DECIMAL_MASK); > +} > + > +static void apply_lut(const struct vkms_crtc_state *crtc_state, struct line_buffer *output_buffer) > +{ > + if (!crtc_state->gamma_lut.base) > + return; > + > + if (!crtc_state->gamma_lut.lut_length) > + return; > + > + for (size_t x = 0; x < output_buffer->n_pixels; x++) { > + struct pixel_argb_u16 *pixel = &output_buffer->pixels[x]; > + > + pixel->r = apply_lut_to_channel_value(&crtc_state->gamma_lut, pixel->r, LUT_RED); > + pixel->g = apply_lut_to_channel_value(&crtc_state->gamma_lut, pixel->g, LUT_GREEN); > + pixel->b = apply_lut_to_channel_value(&crtc_state->gamma_lut, pixel->b, LUT_BLUE); > + } > +} > + > /** > * @wb_frame_info: The writeback frame buffer metadata > * @crtc_state: The crtc state > @@ -128,6 +196,8 @@ static void blend(struct vkms_writeback_job *wb, > output_buffer); > } > > + apply_lut(crtc_state, output_buffer); > + > *crc32 = crc32_le(*crc32, (void *)output_buffer->pixels, row_size); > > if (wb) > @@ -242,6 +312,22 @@ void vkms_composer_worker(struct work_struct *work) > crtc_state->frame_start = 0; > crtc_state->frame_end = 0; > crtc_state->crc_pending = false; > + > + if (crtc->state->gamma_lut) { > + s64 max_lut_index_fp; > + s64 u16_max_fp = drm_int2fixp(0xffff); > + > + crtc_state->gamma_lut.base = (struct drm_color_lut *)crtc->state->gamma_lut->data; > + crtc_state->gamma_lut.lut_length = > + crtc->state->gamma_lut->length / sizeof(struct drm_color_lut); > + max_lut_index_fp = drm_int2fixp(crtc_state->gamma_lut.lut_length - 1); > + crtc_state->gamma_lut.channel_value2index_ratio = drm_fixp_div(max_lut_index_fp, > + u16_max_fp); > + > + } else { > + crtc_state->gamma_lut.base = NULL; > + } > + > spin_unlock_irq(&out->composer_lock); > > /* > diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c > index 515f6772b866..61e500b8c9da 100644 > --- a/drivers/gpu/drm/vkms/vkms_crtc.c > +++ b/drivers/gpu/drm/vkms/vkms_crtc.c > @@ -290,6 +290,9 @@ int vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, > > drm_crtc_helper_add(crtc, &vkms_crtc_helper_funcs); > > + drm_mode_crtc_set_gamma_size(crtc, VKMS_LUT_SIZE); > + drm_crtc_enable_color_mgmt(crtc, 0, false, VKMS_LUT_SIZE); > + > spin_lock_init(&vkms_out->lock); > spin_lock_init(&vkms_out->composer_lock); > > diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c > index e3c9c9571c8d..dd0af086e7fa 100644 > --- a/drivers/gpu/drm/vkms/vkms_drv.c > +++ b/drivers/gpu/drm/vkms/vkms_drv.c > @@ -120,9 +120,27 @@ static const struct drm_driver vkms_driver = { > .minor = DRIVER_MINOR, > }; > > +static int vkms_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) > +{ > + struct drm_crtc *crtc; > + struct drm_crtc_state *new_crtc_state; > + int i; > + > + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { > + if (!new_crtc_state->gamma_lut || !new_crtc_state->color_mgmt_changed) > + continue; > + > + if (new_crtc_state->gamma_lut->length / sizeof(struct drm_color_lut *) > + > VKMS_LUT_SIZE) > + return -EINVAL; > + } > + > + return drm_atomic_helper_check(dev, state); > +} > + > static const struct drm_mode_config_funcs vkms_mode_funcs = { > .fb_create = drm_gem_fb_create, > - .atomic_check = drm_atomic_helper_check, > + .atomic_check = vkms_atomic_check, > .atomic_commit = drm_atomic_helper_commit, > }; > > diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h > index 5f1a0a44a78c..f16b5d7b81ef 100644 > --- a/drivers/gpu/drm/vkms/vkms_drv.h > +++ b/drivers/gpu/drm/vkms/vkms_drv.h > @@ -23,6 +23,8 @@ > > #define NUM_OVERLAY_PLANES 8 > > +#define VKMS_LUT_SIZE 256 > + > struct vkms_frame_info { > struct drm_framebuffer *fb; > struct drm_rect src, dst; > @@ -65,6 +67,12 @@ struct vkms_plane { > struct drm_plane base; > }; > > +struct vkms_color_lut { > + struct drm_color_lut *base; > + size_t lut_length; > + s64 channel_value2index_ratio; > +}; > + > /** > * vkms_crtc_state - Driver specific CRTC state > * @base: base CRTC state > @@ -80,6 +88,7 @@ struct vkms_crtc_state { > /* stack of active planes for crc computation, should be in z order */ > struct vkms_plane_state **active_planes; > struct vkms_writeback_job *active_writeback; > + struct vkms_color_lut gamma_lut; > > /* below four are protected by vkms_output.composer_lock */ > bool crc_pending;
On Sat, 24 Jun 2023 18:48:08 -0300 Maira Canal <mairacanal@riseup.net> wrote: > Hi Arthur, > > Thanks for working on this feature for the VKMS! > > On 6/21/23 16:41, Arthur Grillo wrote: > > Support a 1D gamma LUT with interpolation for each color channel on the > > VKMS driver. Add a check for the LUT length by creating > > vkms_atomic_check(). > > > > Tested with: > > igt@kms_color@gamma > > igt@kms_color@legacy-gamma > > igt@kms_color@invalid-gamma-lut-sizes > > Could you also mention that this will make it possible to run the test > igt@kms_plane@pixel-format? > > Also, you mentioned to me that the performance degraded with this new > feature, but I wasn't able to see it while running the VKMS CI. I > performed a couple of tests and I didn't see any significant performance > issue. > > Could you please run a benchmark and share the results with us? This way > we can atest that this new feature will not affect significantly the > VKMS performance. It would be nice to have a small brief of this > benchmark on the commit message as well. > > Attesting that there isn't a performance issue and adding those nits to > the commit message, you can add my > > Reviewed-by: Maíra Canal <mairacanal@riseup.net> > > on the next version. Hi, perfomance testing is good indeed. As future work, could there be a document describing how to test VKMS performance? "I ran IGT@blah 100 times and it took xx seconds before and yy seconds after" does not really give someone like me an idea of what was actually measured. For example blending overhead increase could be completely lost in opaque pixel copying noise if the test case has only few pixels to blend, e.g. a cursor plane, not to mention the overhead of launching an IGT test in the first place. Something that would guide new developers in running meaningful benchmarks would be nice. Should e.g. IGT have explicit (VKMS) performance tests that need to be run manually, since evaluation of the result is not feasible automatically? Or a benchmark mode in correctness tests that would run the identical operation N times and measure the time before checking for correctness? The correctness verification in IGT tests, if done by image comparison which they undoubtedly will need to be in the future, may dominate the CPU run time measurements if included. Thanks, pq
Hi Pekka, On 6/26/23 05:17, Pekka Paalanen wrote: > On Sat, 24 Jun 2023 18:48:08 -0300 > Maira Canal <mairacanal@riseup.net> wrote: > >> Hi Arthur, >> >> Thanks for working on this feature for the VKMS! >> >> On 6/21/23 16:41, Arthur Grillo wrote: >>> Support a 1D gamma LUT with interpolation for each color channel on the >>> VKMS driver. Add a check for the LUT length by creating >>> vkms_atomic_check(). >>> >>> Tested with: >>> igt@kms_color@gamma >>> igt@kms_color@legacy-gamma >>> igt@kms_color@invalid-gamma-lut-sizes >> >> Could you also mention that this will make it possible to run the test >> igt@kms_plane@pixel-format? >> >> Also, you mentioned to me that the performance degraded with this new >> feature, but I wasn't able to see it while running the VKMS CI. I >> performed a couple of tests and I didn't see any significant performance >> issue. >> >> Could you please run a benchmark and share the results with us? This way >> we can atest that this new feature will not affect significantly the >> VKMS performance. It would be nice to have a small brief of this >> benchmark on the commit message as well. >> >> Attesting that there isn't a performance issue and adding those nits to >> the commit message, you can add my >> >> Reviewed-by: Maíra Canal <mairacanal@riseup.net> >> >> on the next version. > > Hi, > > perfomance testing is good indeed. As future work, could there be a > document describing how to test VKMS performance? I'll try to select a couple of more meaningful IGT tests to describe how to test the VKMS performance and also add a document to describe how to run this tests. Recently, I added a VKMS must-pass testlist to IGT. This testlist tries to assure that regressions will not be introduced into VKMS. But, I failed to introduce a documentation on the kernel side pointing to this new testlist... I'll also work on it. > > "I ran IGT@blah 100 times and it took xx seconds before and yy seconds > after" does not really give someone like me an idea of what was > actually measured. For example blending overhead increase could be > completely lost in opaque pixel copying noise if the test case has only > few pixels to blend, e.g. a cursor plane, not to mention the overhead > of launching an IGT test in the first place. About the IGT overhead, I don't know exactly how we could escape from it. Maybe writing KUnit tests to the VKMS's composition functions, such as blend(). Anyway, we would have the overhead of the KUnit framework. I mean, for whatever framework we choose, there'll be an overhead... Do you have any other ideas on how to test VKMS with less overhead? Best Regards, - Maíra > > Something that would guide new developers in running meaningful > benchmarks would be nice. > > Should e.g. IGT have explicit (VKMS) performance tests that need to be > run manually, since evaluation of the result is not feasible > automatically? Or a benchmark mode in correctness tests that would run > the identical operation N times and measure the time before checking > for correctness? > > The correctness verification in IGT tests, if done by image comparison > which they undoubtedly will need to be in the future, may dominate the > CPU run time measurements if included. > > > Thanks, > pq
On Mon, 26 Jun 2023 14:35:25 -0300 Maira Canal <mairacanal@riseup.net> wrote: > Hi Pekka, > > On 6/26/23 05:17, Pekka Paalanen wrote: > > On Sat, 24 Jun 2023 18:48:08 -0300 > > Maira Canal <mairacanal@riseup.net> wrote: > > > >> Hi Arthur, > >> > >> Thanks for working on this feature for the VKMS! > >> > >> On 6/21/23 16:41, Arthur Grillo wrote: > >>> Support a 1D gamma LUT with interpolation for each color channel on the > >>> VKMS driver. Add a check for the LUT length by creating > >>> vkms_atomic_check(). > >>> > >>> Tested with: > >>> igt@kms_color@gamma > >>> igt@kms_color@legacy-gamma > >>> igt@kms_color@invalid-gamma-lut-sizes > >> > >> Could you also mention that this will make it possible to run the test > >> igt@kms_plane@pixel-format? > >> > >> Also, you mentioned to me that the performance degraded with this new > >> feature, but I wasn't able to see it while running the VKMS CI. I > >> performed a couple of tests and I didn't see any significant performance > >> issue. > >> > >> Could you please run a benchmark and share the results with us? This way > >> we can atest that this new feature will not affect significantly the > >> VKMS performance. It would be nice to have a small brief of this > >> benchmark on the commit message as well. > >> > >> Attesting that there isn't a performance issue and adding those nits to > >> the commit message, you can add my > >> > >> Reviewed-by: Maíra Canal <mairacanal@riseup.net> > >> > >> on the next version. > > > > Hi, > > > > perfomance testing is good indeed. As future work, could there be a > > document describing how to test VKMS performance? > > I'll try to select a couple of more meaningful IGT tests to describe how > to test the VKMS performance and also add a document to describe how to > run this tests. > > Recently, I added a VKMS must-pass testlist to IGT. This testlist > tries to assure that regressions will not be introduced into VKMS. But, > I failed to introduce a documentation on the kernel side pointing to > this new testlist... I'll also work on it. > > > > > "I ran IGT@blah 100 times and it took xx seconds before and yy seconds > > after" does not really give someone like me an idea of what was > > actually measured. For example blending overhead increase could be > > completely lost in opaque pixel copying noise if the test case has only > > few pixels to blend, e.g. a cursor plane, not to mention the overhead > > of launching an IGT test in the first place. > > About the IGT overhead, I don't know exactly how we could escape from > it. Maybe writing KUnit tests to the VKMS's composition functions, such > as blend(). Anyway, we would have the overhead of the KUnit framework. > I mean, for whatever framework we choose, there'll be an overhead... > > Do you have any other ideas on how to test VKMS with less overhead? Maybe put the repeat loop and time measurement inside the code of a few chosen IGT tests? So that it loops only the KMS programming and somehow ensures VKMS has finished processing each update before doing the next cycle. I presume VKMS does not have a timer-based refresh cycle that might add CPU idle time? Writeback should be included in the measurement too, but inspecting writeback results should not. Once all that is in place, then each performance test needs to use appropriate operations. E.g. if testing blending performance, use almost full-screen planes. What's the overhead of KUnit framework? Can you not do the same there, put the repeat loop and time measurement inside the test to cover only the interesting code? Unit-testing the composition function performance might be ideal. Depending on the type of test, if the CRTC mode and planes are big enough, maybe there is no need to repeat even. But testing presumably fast things like moving a cursor plane will likely need repeating in order to produce stable numbers. Thanks, pq > > Best Regards, > - Maíra > > > > > Something that would guide new developers in running meaningful > > benchmarks would be nice. > > > > Should e.g. IGT have explicit (VKMS) performance tests that need to be > > run manually, since evaluation of the result is not feasible > > automatically? Or a benchmark mode in correctness tests that would run > > the identical operation N times and measure the time before checking > > for correctness? > > > > The correctness verification in IGT tests, if done by image comparison > > which they undoubtedly will need to be in the future, may dominate the > > CPU run time measurements if included. > > > > > > Thanks, > > pq
On 6/27/23 05:12, Pekka Paalanen wrote: > On Mon, 26 Jun 2023 14:35:25 -0300 > Maira Canal <mairacanal@riseup.net> wrote: > >> Hi Pekka, >> >> On 6/26/23 05:17, Pekka Paalanen wrote: >>> On Sat, 24 Jun 2023 18:48:08 -0300 >>> Maira Canal <mairacanal@riseup.net> wrote: >>> >>>> Hi Arthur, >>>> >>>> Thanks for working on this feature for the VKMS! >>>> >>>> On 6/21/23 16:41, Arthur Grillo wrote: >>>>> Support a 1D gamma LUT with interpolation for each color channel on the >>>>> VKMS driver. Add a check for the LUT length by creating >>>>> vkms_atomic_check(). >>>>> >>>>> Tested with: >>>>> igt@kms_color@gamma >>>>> igt@kms_color@legacy-gamma >>>>> igt@kms_color@invalid-gamma-lut-sizes >>>> >>>> Could you also mention that this will make it possible to run the test >>>> igt@kms_plane@pixel-format? >>>> >>>> Also, you mentioned to me that the performance degraded with this new >>>> feature, but I wasn't able to see it while running the VKMS CI. I >>>> performed a couple of tests and I didn't see any significant performance >>>> issue. >>>> >>>> Could you please run a benchmark and share the results with us? This way >>>> we can atest that this new feature will not affect significantly the >>>> VKMS performance. It would be nice to have a small brief of this >>>> benchmark on the commit message as well. >>>> >>>> Attesting that there isn't a performance issue and adding those nits to >>>> the commit message, you can add my >>>> >>>> Reviewed-by: Maíra Canal <mairacanal@riseup.net> >>>> >>>> on the next version. >>> >>> Hi, >>> >>> perfomance testing is good indeed. As future work, could there be a >>> document describing how to test VKMS performance? >> >> I'll try to select a couple of more meaningful IGT tests to describe how >> to test the VKMS performance and also add a document to describe how to >> run this tests. >> >> Recently, I added a VKMS must-pass testlist to IGT. This testlist >> tries to assure that regressions will not be introduced into VKMS. But, >> I failed to introduce a documentation on the kernel side pointing to >> this new testlist... I'll also work on it. >> >>> >>> "I ran IGT@blah 100 times and it took xx seconds before and yy seconds >>> after" does not really give someone like me an idea of what was >>> actually measured. For example blending overhead increase could be >>> completely lost in opaque pixel copying noise if the test case has only >>> few pixels to blend, e.g. a cursor plane, not to mention the overhead >>> of launching an IGT test in the first place. >> >> About the IGT overhead, I don't know exactly how we could escape from >> it. Maybe writing KUnit tests to the VKMS's composition functions, such >> as blend(). Anyway, we would have the overhead of the KUnit framework. >> I mean, for whatever framework we choose, there'll be an overhead... >> >> Do you have any other ideas on how to test VKMS with less overhead? > > Maybe put the repeat loop and time measurement inside the code of a few > chosen IGT tests? > > So that it loops only the KMS programming and somehow ensures VKMS has > finished processing each update before doing the next cycle. I presume > VKMS does not have a timer-based refresh cycle that might add CPU idle > time? Writeback should be included in the measurement too, but inspecting > writeback results should not. > > Once all that is in place, then each performance test needs to use > appropriate operations. E.g. if testing blending performance, use > almost full-screen planes. ^ Grillo, any chance you could work on something like this for the performance measurements? > > What's the overhead of KUnit framework? Can you not do the same there, > put the repeat loop and time measurement inside the test to cover only > the interesting code? > > Unit-testing the composition function performance might be ideal. > I'll try to work on some unit tests for, at least, the composition section of VKMS. I believe that they will be very valuable for the maintenance and performance evaluation. Thanks for your valuable inputs on the VKMS! Best Regards, - Maíra > Depending on the type of test, if the CRTC mode and planes are big > enough, maybe there is no need to repeat even. But testing presumably > fast things like moving a cursor plane will likely need repeating in > order to produce stable numbers. > > > Thanks, > pq > >> >> Best Regards, >> - Maíra >> >>> >>> Something that would guide new developers in running meaningful >>> benchmarks would be nice. >>> >>> Should e.g. IGT have explicit (VKMS) performance tests that need to be >>> run manually, since evaluation of the result is not feasible >>> automatically? Or a benchmark mode in correctness tests that would run >>> the identical operation N times and measure the time before checking >>> for correctness? >>> >>> The correctness verification in IGT tests, if done by image comparison >>> which they undoubtedly will need to be in the future, may dominate the >>> CPU run time measurements if included. >>> >>> >>> Thanks, >>> pq >
On 02/07/23 18:37, Maira Canal wrote: > On 6/27/23 05:12, Pekka Paalanen wrote: >> On Mon, 26 Jun 2023 14:35:25 -0300 >> Maira Canal <mairacanal@riseup.net> wrote: >> >>> Hi Pekka, >>> >>> On 6/26/23 05:17, Pekka Paalanen wrote: >>>> On Sat, 24 Jun 2023 18:48:08 -0300 >>>> Maira Canal <mairacanal@riseup.net> wrote: >>>> >>>>> Hi Arthur, >>>>> >>>>> Thanks for working on this feature for the VKMS! >>>>> >>>>> On 6/21/23 16:41, Arthur Grillo wrote: >>>>>> Support a 1D gamma LUT with interpolation for each color channel on the >>>>>> VKMS driver. Add a check for the LUT length by creating >>>>>> vkms_atomic_check(). >>>>>> >>>>>> Tested with: >>>>>> igt@kms_color@gamma >>>>>> igt@kms_color@legacy-gamma >>>>>> igt@kms_color@invalid-gamma-lut-sizes >>>>> >>>>> Could you also mention that this will make it possible to run the test >>>>> igt@kms_plane@pixel-format? >>>>> >>>>> Also, you mentioned to me that the performance degraded with this new >>>>> feature, but I wasn't able to see it while running the VKMS CI. I >>>>> performed a couple of tests and I didn't see any significant performance >>>>> issue. >>>>> >>>>> Could you please run a benchmark and share the results with us? This way >>>>> we can atest that this new feature will not affect significantly the >>>>> VKMS performance. It would be nice to have a small brief of this >>>>> benchmark on the commit message as well. >>>>> >>>>> Attesting that there isn't a performance issue and adding those nits to >>>>> the commit message, you can add my >>>>> >>>>> Reviewed-by: Maíra Canal <mairacanal@riseup.net> >>>>> >>>>> on the next version. >>>> >>>> Hi, >>>> >>>> perfomance testing is good indeed. As future work, could there be a >>>> document describing how to test VKMS performance? >>> >>> I'll try to select a couple of more meaningful IGT tests to describe how >>> to test the VKMS performance and also add a document to describe how to >>> run this tests. >>> >>> Recently, I added a VKMS must-pass testlist to IGT. This testlist >>> tries to assure that regressions will not be introduced into VKMS. But, >>> I failed to introduce a documentation on the kernel side pointing to >>> this new testlist... I'll also work on it. >>> >>>> >>>> "I ran IGT@blah 100 times and it took xx seconds before and yy seconds >>>> after" does not really give someone like me an idea of what was >>>> actually measured. For example blending overhead increase could be >>>> completely lost in opaque pixel copying noise if the test case has only >>>> few pixels to blend, e.g. a cursor plane, not to mention the overhead >>>> of launching an IGT test in the first place. >>> >>> About the IGT overhead, I don't know exactly how we could escape from >>> it. Maybe writing KUnit tests to the VKMS's composition functions, such >>> as blend(). Anyway, we would have the overhead of the KUnit framework. >>> I mean, for whatever framework we choose, there'll be an overhead... >>> >>> Do you have any other ideas on how to test VKMS with less overhead? >> >> Maybe put the repeat loop and time measurement inside the code of a few >> chosen IGT tests? >> >> So that it loops only the KMS programming and somehow ensures VKMS has >> finished processing each update before doing the next cycle. I presume >> VKMS does not have a timer-based refresh cycle that might add CPU idle >> time? Writeback should be included in the measurement too, but inspecting >> writeback results should not. >> >> Once all that is in place, then each performance test needs to use >> appropriate operations. E.g. if testing blending performance, use >> almost full-screen planes. > > ^ Grillo, any chance you could work on something like this for the > performance measurements? > Yeah, I can do something like this. Sorry for the delay I think I will have those measurements on this week. ~Grillo >> >> What's the overhead of KUnit framework? Can you not do the same there, >> put the repeat loop and time measurement inside the test to cover only >> the interesting code? >> >> Unit-testing the composition function performance might be ideal. >> > > I'll try to work on some unit tests for, at least, the composition > section of VKMS. I believe that they will be very valuable for the > maintenance and performance evaluation. > > Thanks for your valuable inputs on the VKMS! > > Best Regards, > - Maíra > >> Depending on the type of test, if the CRTC mode and planes are big >> enough, maybe there is no need to repeat even. But testing presumably >> fast things like moving a cursor plane will likely need repeating in >> order to produce stable numbers. >> >> >> Thanks, >> pq >> >>> >>> Best Regards, >>> - Maíra >>> >>>> >>>> Something that would guide new developers in running meaningful >>>> benchmarks would be nice. >>>> >>>> Should e.g. IGT have explicit (VKMS) performance tests that need to be >>>> run manually, since evaluation of the result is not feasible >>>> automatically? Or a benchmark mode in correctness tests that would run >>>> the identical operation N times and measure the time before checking >>>> for correctness? >>>> >>>> The correctness verification in IGT tests, if done by image comparison >>>> which they undoubtedly will need to be in the future, may dominate the >>>> CPU run time measurements if included. >>>> >>>> >>>> Thanks, >>>> pq >>
diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index 906d3df40cdb..620c0bafbe56 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -6,6 +6,7 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_blend.h> #include <drm/drm_fourcc.h> +#include <drm/drm_fixed.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_vblank.h> #include <linux/minmax.h> @@ -89,6 +90,73 @@ static void fill_background(const struct pixel_argb_u16 *background_color, output_buffer->pixels[i] = *background_color; } +// lerp(a, b, t) = a + (b - a) * t +static u16 lerp_u16(u16 a, u16 b, s64 t) +{ + s64 a_fp = drm_int2fixp(a); + s64 b_fp = drm_int2fixp(b); + + s64 delta = drm_fixp_mul(b_fp - a_fp, t); + + return drm_fixp2int(a_fp + delta); +} + +static s64 get_lut_index(const struct vkms_color_lut *lut, u16 channel_value) +{ + s64 color_channel_fp = drm_int2fixp(channel_value); + + return drm_fixp_mul(color_channel_fp, lut->channel_value2index_ratio); +} + +/* + * This enum is related to the positions of the variables inside + * `struct drm_color_lut`, so the order of both needs to be the same. + */ +enum lut_channel { + LUT_RED = 0, + LUT_GREEN, + LUT_BLUE, + LUT_RESERVED +}; + +static u16 apply_lut_to_channel_value(const struct vkms_color_lut *lut, u16 channel_value, + enum lut_channel channel) +{ + s64 lut_index = get_lut_index(lut, channel_value); + + /* + * This checks if `struct drm_color_lut` had any gap added by the compiler + * between the struct fields. + */ + static_assert(sizeof(struct drm_color_lut) == sizeof(__u16) * 4); + + u16 *floor_lut_value = (__u16 *)&lut->base[drm_fixp2int(lut_index)]; + u16 *ceil_lut_value = (__u16 *)&lut->base[drm_fixp2int_ceil(lut_index)]; + + u16 floor_channel_value = floor_lut_value[channel]; + u16 ceil_channel_value = ceil_lut_value[channel]; + + return lerp_u16(floor_channel_value, ceil_channel_value, + lut_index & DRM_FIXED_DECIMAL_MASK); +} + +static void apply_lut(const struct vkms_crtc_state *crtc_state, struct line_buffer *output_buffer) +{ + if (!crtc_state->gamma_lut.base) + return; + + if (!crtc_state->gamma_lut.lut_length) + return; + + for (size_t x = 0; x < output_buffer->n_pixels; x++) { + struct pixel_argb_u16 *pixel = &output_buffer->pixels[x]; + + pixel->r = apply_lut_to_channel_value(&crtc_state->gamma_lut, pixel->r, LUT_RED); + pixel->g = apply_lut_to_channel_value(&crtc_state->gamma_lut, pixel->g, LUT_GREEN); + pixel->b = apply_lut_to_channel_value(&crtc_state->gamma_lut, pixel->b, LUT_BLUE); + } +} + /** * @wb_frame_info: The writeback frame buffer metadata * @crtc_state: The crtc state @@ -128,6 +196,8 @@ static void blend(struct vkms_writeback_job *wb, output_buffer); } + apply_lut(crtc_state, output_buffer); + *crc32 = crc32_le(*crc32, (void *)output_buffer->pixels, row_size); if (wb) @@ -242,6 +312,22 @@ void vkms_composer_worker(struct work_struct *work) crtc_state->frame_start = 0; crtc_state->frame_end = 0; crtc_state->crc_pending = false; + + if (crtc->state->gamma_lut) { + s64 max_lut_index_fp; + s64 u16_max_fp = drm_int2fixp(0xffff); + + crtc_state->gamma_lut.base = (struct drm_color_lut *)crtc->state->gamma_lut->data; + crtc_state->gamma_lut.lut_length = + crtc->state->gamma_lut->length / sizeof(struct drm_color_lut); + max_lut_index_fp = drm_int2fixp(crtc_state->gamma_lut.lut_length - 1); + crtc_state->gamma_lut.channel_value2index_ratio = drm_fixp_div(max_lut_index_fp, + u16_max_fp); + + } else { + crtc_state->gamma_lut.base = NULL; + } + spin_unlock_irq(&out->composer_lock); /* diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c index 515f6772b866..61e500b8c9da 100644 --- a/drivers/gpu/drm/vkms/vkms_crtc.c +++ b/drivers/gpu/drm/vkms/vkms_crtc.c @@ -290,6 +290,9 @@ int vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, drm_crtc_helper_add(crtc, &vkms_crtc_helper_funcs); + drm_mode_crtc_set_gamma_size(crtc, VKMS_LUT_SIZE); + drm_crtc_enable_color_mgmt(crtc, 0, false, VKMS_LUT_SIZE); + spin_lock_init(&vkms_out->lock); spin_lock_init(&vkms_out->composer_lock); diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c index e3c9c9571c8d..dd0af086e7fa 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.c +++ b/drivers/gpu/drm/vkms/vkms_drv.c @@ -120,9 +120,27 @@ static const struct drm_driver vkms_driver = { .minor = DRIVER_MINOR, }; +static int vkms_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *new_crtc_state; + int i; + + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + if (!new_crtc_state->gamma_lut || !new_crtc_state->color_mgmt_changed) + continue; + + if (new_crtc_state->gamma_lut->length / sizeof(struct drm_color_lut *) + > VKMS_LUT_SIZE) + return -EINVAL; + } + + return drm_atomic_helper_check(dev, state); +} + static const struct drm_mode_config_funcs vkms_mode_funcs = { .fb_create = drm_gem_fb_create, - .atomic_check = drm_atomic_helper_check, + .atomic_check = vkms_atomic_check, .atomic_commit = drm_atomic_helper_commit, }; diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h index 5f1a0a44a78c..f16b5d7b81ef 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.h +++ b/drivers/gpu/drm/vkms/vkms_drv.h @@ -23,6 +23,8 @@ #define NUM_OVERLAY_PLANES 8 +#define VKMS_LUT_SIZE 256 + struct vkms_frame_info { struct drm_framebuffer *fb; struct drm_rect src, dst; @@ -65,6 +67,12 @@ struct vkms_plane { struct drm_plane base; }; +struct vkms_color_lut { + struct drm_color_lut *base; + size_t lut_length; + s64 channel_value2index_ratio; +}; + /** * vkms_crtc_state - Driver specific CRTC state * @base: base CRTC state @@ -80,6 +88,7 @@ struct vkms_crtc_state { /* stack of active planes for crc computation, should be in z order */ struct vkms_plane_state **active_planes; struct vkms_writeback_job *active_writeback; + struct vkms_color_lut gamma_lut; /* below four are protected by vkms_output.composer_lock */ bool crc_pending;