From patchwork Fri Oct 19 12:15:28 2018
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Philipp Zabel
X-Patchwork-Id: 10649177
Return-Path:
Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org
[172.30.200.125])
by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 58C36181D
for ;
Fri, 19 Oct 2018 12:15:58 +0000 (UTC)
Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1])
by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 47C9D28AB1
for ;
Fri, 19 Oct 2018 12:15:58 +0000 (UTC)
Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486)
id 3BE2328AB6; Fri, 19 Oct 2018 12:15:58 +0000 (UTC)
X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on
pdx-wl-mail.web.codeaurora.org
X-Spam-Level:
X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI,
RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 77C7328AB2
for ;
Fri, 19 Oct 2018 12:15:57 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
id S1727550AbeJSUVp (ORCPT
);
Fri, 19 Oct 2018 16:21:45 -0400
Received: from metis.ext.pengutronix.de ([85.220.165.71]:33863 "EHLO
metis.ext.pengutronix.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
with ESMTP id S1727539AbeJSUVe (ORCPT
);
Fri, 19 Oct 2018 16:21:34 -0400
Received: from lupine.hi.pengutronix.de
([2001:67c:670:100:3ad5:47ff:feaf:1a17] helo=lupine.pengutronix.de.)
by metis.ext.pengutronix.de with esmtp (Exim 4.89)
(envelope-from )
id 1gDTgt-0007Oc-1c; Fri, 19 Oct 2018 14:15:43 +0200
From: Philipp Zabel
To: linux-media@vger.kernel.org,
Steve Longerbeam
Cc: Nicolas Dufresne ,
Tim Harvey , kernel@pengutronix.de
Subject: [PATCH v4 11/22] gpu: ipu-v3: image-convert: calculate per-tile
resize coefficients
Date: Fri, 19 Oct 2018 14:15:28 +0200
Message-Id: <20181019121539.12778-12-p.zabel@pengutronix.de>
X-Mailer: git-send-email 2.11.0
In-Reply-To: <20181019121539.12778-1-p.zabel@pengutronix.de>
References: <20181019121539.12778-1-p.zabel@pengutronix.de>
X-SA-Exim-Connect-IP: 2001:67c:670:100:3ad5:47ff:feaf:1a17
X-SA-Exim-Mail-From: p.zabel@pengutronix.de
X-SA-Exim-Scanned: No (on metis.ext.pengutronix.de);
SAEximRunCond expanded to false
X-PTX-Original-Recipient: linux-media@vger.kernel.org
Sender: linux-media-owner@vger.kernel.org
Precedence: bulk
List-ID:
X-Mailing-List: linux-media@vger.kernel.org
X-Virus-Scanned: ClamAV using ClamSMTP
Slightly modifying resize coefficients per-tile allows to completely
hide the seams between tiles and to sample the correct input pixels at
the bottom and right edges of the image.
Tiling requires a bilinear interpolator reset at each tile start, which
causes the image to be slightly shifted if the starting pixel should not
have been sampled from an integer pixel position in the source image
according to the full image resizing ratio. To work around this
hardware limitation, calculate per-tile resizing coefficients that make
sure that the correct input pixels are sampled at the tile end.
Signed-off-by: Philipp Zabel
---
No changes since v3.
---
drivers/gpu/ipu-v3/ipu-image-convert.c | 236 ++++++++++++++++++++++++-
1 file changed, 234 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c
index c2f82d681c48..31e7186bcc00 100644
--- a/drivers/gpu/ipu-v3/ipu-image-convert.c
+++ b/drivers/gpu/ipu-v3/ipu-image-convert.c
@@ -135,6 +135,12 @@ struct ipu_image_convert_ctx {
struct ipu_image_convert_image in;
struct ipu_image_convert_image out;
enum ipu_rotate_mode rot_mode;
+ u32 downsize_coeff_h;
+ u32 downsize_coeff_v;
+ u32 image_resize_coeff_h;
+ u32 image_resize_coeff_v;
+ u32 resize_coeffs_h[MAX_STRIPES_W];
+ u32 resize_coeffs_v[MAX_STRIPES_H];
/* intermediate buffer for rotation */
struct ipu_image_convert_dma_buf rot_intermediate[2];
@@ -361,6 +367,69 @@ static inline int num_stripes(int dim)
return 4;
}
+/*
+ * Calculate downsizing coefficients, which are the same for all tiles,
+ * and bilinear resizing coefficients, which are used to find the best
+ * seam positions.
+ */
+static int calc_image_resize_coefficients(struct ipu_image_convert_ctx *ctx,
+ struct ipu_image *in,
+ struct ipu_image *out)
+{
+ u32 downsized_width = in->rect.width;
+ u32 downsized_height = in->rect.height;
+ u32 downsize_coeff_v = 0;
+ u32 downsize_coeff_h = 0;
+ u32 resized_width = out->rect.width;
+ u32 resized_height = out->rect.height;
+ u32 resize_coeff_h;
+ u32 resize_coeff_v;
+
+ if (ipu_rot_mode_is_irt(ctx->rot_mode)) {
+ resized_width = out->rect.height;
+ resized_height = out->rect.width;
+ }
+
+ /* Do not let invalid input lead to an endless loop below */
+ if (WARN_ON(resized_width == 0 || resized_height == 0))
+ return -EINVAL;
+
+ while (downsized_width >= resized_width * 2) {
+ downsized_width >>= 1;
+ downsize_coeff_h++;
+ }
+
+ while (downsized_height >= resized_height * 2) {
+ downsized_height >>= 1;
+ downsize_coeff_v++;
+ }
+
+ /*
+ * Calculate the bilinear resizing coefficients that could be used if
+ * we were converting with a single tile. The bottom right output pixel
+ * should sample as close as possible to the bottom right input pixel
+ * out of the decimator, but not overshoot it:
+ */
+ resize_coeff_h = 8192 * (downsized_width - 1) / (resized_width - 1);
+ resize_coeff_v = 8192 * (downsized_height - 1) / (resized_height - 1);
+
+ dev_dbg(ctx->chan->priv->ipu->dev,
+ "%s: hscale: >>%u, *8192/%u vscale: >>%u, *8192/%u, %ux%u tiles\n",
+ __func__, downsize_coeff_h, resize_coeff_h, downsize_coeff_v,
+ resize_coeff_v, ctx->in.num_cols, ctx->in.num_rows);
+
+ if (downsize_coeff_h > 2 || downsize_coeff_v > 2 ||
+ resize_coeff_h > 0x3fff || resize_coeff_v > 0x3fff)
+ return -EINVAL;
+
+ ctx->downsize_coeff_h = downsize_coeff_h;
+ ctx->downsize_coeff_v = downsize_coeff_v;
+ ctx->image_resize_coeff_h = resize_coeff_h;
+ ctx->image_resize_coeff_v = resize_coeff_v;
+
+ return 0;
+}
+
static void calc_tile_dimensions(struct ipu_image_convert_ctx *ctx,
struct ipu_image_convert_image *image)
{
@@ -578,6 +647,149 @@ static int calc_tile_offsets(struct ipu_image_convert_ctx *ctx,
return calc_tile_offsets_packed(ctx, image);
}
+/*
+ * Calculate the resizing ratio for the IC main processing section given input
+ * size, fixed downsizing coefficient, and output size.
+ * Either round to closest for the next tile's first pixel to minimize seams
+ * and distortion (for all but right column / bottom row), or round down to
+ * avoid sampling beyond the edges of the input image for this tile's last
+ * pixel.
+ * Returns the resizing coefficient, resizing ratio is 8192.0 / resize_coeff.
+ */
+static u32 calc_resize_coeff(u32 input_size, u32 downsize_coeff,
+ u32 output_size, bool allow_overshoot)
+{
+ u32 downsized = input_size >> downsize_coeff;
+
+ if (allow_overshoot)
+ return DIV_ROUND_CLOSEST(8192 * downsized, output_size);
+ else
+ return 8192 * (downsized - 1) / (output_size - 1);
+}
+
+/*
+ * Slightly modify resize coefficients per tile to hide the bilinear
+ * interpolator reset at tile borders, shifting the right / bottom edge
+ * by up to a half input pixel. This removes noticeable seams between
+ * tiles at higher upscaling factors.
+ */
+static void calc_tile_resize_coefficients(struct ipu_image_convert_ctx *ctx)
+{
+ struct ipu_image_convert_chan *chan = ctx->chan;
+ struct ipu_image_convert_priv *priv = chan->priv;
+ struct ipu_image_tile *in_tile, *out_tile;
+ unsigned int col, row, tile_idx;
+ unsigned int last_output;
+
+ for (col = 0; col < ctx->in.num_cols; col++) {
+ bool closest = (col < ctx->in.num_cols - 1) &&
+ !(ctx->rot_mode & IPU_ROT_BIT_HFLIP);
+ u32 resized_width;
+ u32 resize_coeff_h;
+
+ tile_idx = col;
+ in_tile = &ctx->in.tile[tile_idx];
+ out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]];
+
+ if (ipu_rot_mode_is_irt(ctx->rot_mode))
+ resized_width = out_tile->height;
+ else
+ resized_width = out_tile->width;
+
+ resize_coeff_h = calc_resize_coeff(in_tile->width,
+ ctx->downsize_coeff_h,
+ resized_width, closest);
+
+ dev_dbg(priv->ipu->dev, "%s: column %u hscale: *8192/%u\n",
+ __func__, col, resize_coeff_h);
+
+
+ for (row = 0; row < ctx->in.num_rows; row++) {
+ tile_idx = row * ctx->in.num_cols + col;
+ in_tile = &ctx->in.tile[tile_idx];
+ out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]];
+
+ /*
+ * With the horizontal scaling factor known, round up
+ * resized width (output width or height) to burst size.
+ */
+ if (ipu_rot_mode_is_irt(ctx->rot_mode))
+ out_tile->height = round_up(resized_width, 8);
+ else
+ out_tile->width = round_up(resized_width, 8);
+
+ /*
+ * Calculate input width from the last accessed input
+ * pixel given resized width and scaling coefficients.
+ * Round up to burst size.
+ */
+ last_output = round_up(resized_width, 8) - 1;
+ if (closest)
+ last_output++;
+ in_tile->width = round_up(
+ (DIV_ROUND_UP(last_output * resize_coeff_h,
+ 8192) + 1)
+ << ctx->downsize_coeff_h, 8);
+ }
+
+ ctx->resize_coeffs_h[col] = resize_coeff_h;
+ }
+
+ for (row = 0; row < ctx->in.num_rows; row++) {
+ bool closest = (row < ctx->in.num_rows - 1) &&
+ !(ctx->rot_mode & IPU_ROT_BIT_VFLIP);
+ u32 resized_height;
+ u32 resize_coeff_v;
+
+ tile_idx = row * ctx->in.num_cols;
+ in_tile = &ctx->in.tile[tile_idx];
+ out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]];
+
+ if (ipu_rot_mode_is_irt(ctx->rot_mode))
+ resized_height = out_tile->width;
+ else
+ resized_height = out_tile->height;
+
+ resize_coeff_v = calc_resize_coeff(in_tile->height,
+ ctx->downsize_coeff_v,
+ resized_height, closest);
+
+ dev_dbg(priv->ipu->dev, "%s: row %u vscale: *8192/%u\n",
+ __func__, row, resize_coeff_v);
+
+ for (col = 0; col < ctx->in.num_cols; col++) {
+ tile_idx = row * ctx->in.num_cols + col;
+ in_tile = &ctx->in.tile[tile_idx];
+ out_tile = &ctx->out.tile[ctx->out_tile_map[tile_idx]];
+
+ /*
+ * With the vertical scaling factor known, round up
+ * resized height (output width or height) to IDMAC
+ * limitations.
+ */
+ if (ipu_rot_mode_is_irt(ctx->rot_mode))
+ out_tile->width = round_up(resized_height, 2);
+ else
+ out_tile->height = round_up(resized_height, 2);
+
+ /*
+ * Calculate input width from the last accessed input
+ * pixel given resized height and scaling coefficients.
+ * Align to IDMAC restrictions.
+ */
+ last_output = round_up(resized_height, 2) - 1;
+ if (closest)
+ last_output++;
+ in_tile->height = round_up(
+ (DIV_ROUND_UP(last_output * resize_coeff_v,
+ 8192) + 1)
+ << ctx->downsize_coeff_v, 2);
+ }
+
+ ctx->resize_coeffs_v[row] = resize_coeff_v;
+ }
+}
+
/*
* return the number of runs in given queue (pending_q or done_q)
* for this context. hold irqlock when calling.
@@ -712,6 +924,8 @@ static int convert_start(struct ipu_image_convert_run *run, unsigned int tile)
enum ipu_color_space src_cs, dest_cs;
unsigned int dst_tile = ctx->out_tile_map[tile];
unsigned int dest_width, dest_height;
+ unsigned int col, row;
+ u32 rsc;
int ret;
dev_dbg(priv->ipu->dev, "%s: task %u: starting ctx %p run %p tile %u -> %u\n",
@@ -729,13 +943,26 @@ static int convert_start(struct ipu_image_convert_run *run, unsigned int tile)
dest_height = d_image->tile[dst_tile].height;
}
+ row = tile / s_image->num_cols;
+ col = tile % s_image->num_cols;
+
+ rsc = (ctx->downsize_coeff_v << 30) |
+ (ctx->resize_coeffs_v[row] << 16) |
+ (ctx->downsize_coeff_h << 14) |
+ (ctx->resize_coeffs_h[col]);
+
+ dev_dbg(priv->ipu->dev, "%s: %ux%u -> %ux%u (rsc = 0x%x)\n",
+ __func__, s_image->tile[tile].width,
+ s_image->tile[tile].height, dest_width, dest_height, rsc);
+
/* setup the IC resizer and CSC */
- ret = ipu_ic_task_init(chan->ic,
+ ret = ipu_ic_task_init_rsc(chan->ic,
s_image->tile[tile].width,
s_image->tile[tile].height,
dest_width,
dest_height,
- src_cs, dest_cs);
+ src_cs, dest_cs,
+ rsc);
if (ret) {
dev_err(priv->ipu->dev, "ipu_ic_task_init failed, %d\n", ret);
return ret;
@@ -1420,6 +1647,10 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task,
ctx->num_tiles = d_image->num_cols * d_image->num_rows;
ctx->rot_mode = rot_mode;
+ ret = calc_image_resize_coefficients(ctx, in, out);
+ if (ret)
+ goto out_free;
+
ret = fill_image(ctx, s_image, in, IMAGE_CONVERT_IN);
if (ret)
goto out_free;
@@ -1428,6 +1659,7 @@ ipu_image_convert_prepare(struct ipu_soc *ipu, enum ipu_ic_task ic_task,
goto out_free;
calc_out_tile_map(ctx);
+ calc_tile_resize_coefficients(ctx);
dump_format(ctx, s_image);
dump_format(ctx, d_image);