@@ -20,6 +20,14 @@ config VIDEO_HANTRO
To compile this driver as a module, choose M here: the module
will be called hantro-vpu.
+config VIDEO_HANTRO_HEVC_RFC
+ bool "Use reference frame compression for HEVC"
+ depends on VIDEO_HANTRO
+ default n
+ help
+ Enable reference frame compression feature for HEVC codec.
+ It will use more memory but save bandwidth on memory bus.
+
config VIDEO_HANTRO_IMX8M
bool "Hantro VPU i.MX8M support"
depends on VIDEO_HANTRO
@@ -9,6 +9,12 @@
#include "hantro_g2_regs.h"
#define G2_ALIGN 16
+#define CBS_SIZE 16 /* compression table size in bytes */
+#define CBS_LUMA 8 /* luminance CBS is composed of 1 8x8 coded block */
+#define CBS_CHROMA_W (8 * 2) /* chrominance CBS is composed of two 8x4 coded
+ * blocks, with Cb CB first then Cr CB following
+ */
+#define CBS_CHROMA_H 4
void hantro_g2_check_idle(struct hantro_dev *vpu)
{
@@ -56,3 +62,32 @@ size_t hantro_g2_motion_vectors_offset(struct hantro_ctx *ctx)
return ALIGN((cr_offset * 3) / 2, G2_ALIGN);
}
+
+static size_t hantro_g2_mv_size(struct hantro_ctx *ctx)
+{
+ const struct hantro_hevc_dec_ctrls *ctrls = &ctx->hevc_dec.ctrls;
+ const struct v4l2_ctrl_hevc_sps *sps = ctrls->sps;
+ unsigned int pic_width_in_ctbs, pic_height_in_ctbs;
+ unsigned int max_log2_ctb_size;
+
+ max_log2_ctb_size = sps->log2_min_luma_coding_block_size_minus3 + 3 +
+ sps->log2_diff_max_min_luma_coding_block_size;
+ pic_width_in_ctbs = (sps->pic_width_in_luma_samples +
+ (1 << max_log2_ctb_size) - 1) >> max_log2_ctb_size;
+ pic_height_in_ctbs = (sps->pic_height_in_luma_samples + (1 << max_log2_ctb_size) - 1)
+ >> max_log2_ctb_size;
+
+ return pic_width_in_ctbs * pic_height_in_ctbs * (1 << (2 * (max_log2_ctb_size - 4))) * 16;
+}
+
+size_t hantro_g2_luma_compress_offset(struct hantro_ctx *ctx)
+{
+ return hantro_g2_motion_vectors_offset(ctx) +
+ hantro_g2_mv_size(ctx);
+}
+
+size_t hantro_g2_chroma_compress_offset(struct hantro_ctx *ctx)
+{
+ return hantro_g2_luma_compress_offset(ctx) +
+ hantro_hevc_luma_compressed_size(ctx->dst_fmt.width, ctx->dst_fmt.height);
+}
@@ -367,11 +367,14 @@ static int set_ref(struct hantro_ctx *ctx)
const struct v4l2_ctrl_hevc_decode_params *decode_params = ctrls->decode_params;
const struct v4l2_hevc_dpb_entry *dpb = decode_params->dpb;
dma_addr_t luma_addr, chroma_addr, mv_addr = 0;
+ dma_addr_t compress_luma_addr, compress_chroma_addr = 0;
struct hantro_dev *vpu = ctx->dev;
struct vb2_v4l2_buffer *vb2_dst;
struct hantro_decoded_buffer *dst;
size_t cr_offset = hantro_g2_chroma_offset(ctx);
size_t mv_offset = hantro_g2_motion_vectors_offset(ctx);
+ size_t compress_luma_offset = hantro_g2_luma_compress_offset(ctx);
+ size_t compress_chroma_offset = hantro_g2_chroma_compress_offset(ctx);
u32 max_ref_frames;
u16 dpb_longterm_e;
static const struct hantro_reg cur_poc[] = {
@@ -445,6 +448,8 @@ static int set_ref(struct hantro_ctx *ctx)
chroma_addr = luma_addr + cr_offset;
mv_addr = luma_addr + mv_offset;
+ compress_luma_addr = luma_addr + compress_luma_offset;
+ compress_chroma_addr = luma_addr + compress_chroma_offset;
if (dpb[i].flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE)
dpb_longterm_e |= BIT(V4L2_HEVC_DPB_ENTRIES_NUM_MAX - 1 - i);
@@ -452,6 +457,8 @@ static int set_ref(struct hantro_ctx *ctx)
hantro_write_addr(vpu, G2_REF_LUMA_ADDR(i), luma_addr);
hantro_write_addr(vpu, G2_REF_CHROMA_ADDR(i), chroma_addr);
hantro_write_addr(vpu, G2_REF_MV_ADDR(i), mv_addr);
+ hantro_write_addr(vpu, G2_REF_COMP_LUMA_ADDR(i), compress_luma_addr);
+ hantro_write_addr(vpu, G2_REF_COMP_CHROMA_ADDR(i), compress_chroma_addr);
}
vb2_dst = hantro_get_dst_buf(ctx);
@@ -465,19 +472,27 @@ static int set_ref(struct hantro_ctx *ctx)
chroma_addr = luma_addr + cr_offset;
mv_addr = luma_addr + mv_offset;
+ compress_luma_addr = luma_addr + compress_luma_offset;
+ compress_chroma_addr = luma_addr + compress_chroma_offset;
hantro_write_addr(vpu, G2_REF_LUMA_ADDR(i), luma_addr);
hantro_write_addr(vpu, G2_REF_CHROMA_ADDR(i), chroma_addr);
- hantro_write_addr(vpu, G2_REF_MV_ADDR(i++), mv_addr);
+ hantro_write_addr(vpu, G2_REF_MV_ADDR(i), mv_addr);
+ hantro_write_addr(vpu, G2_REF_COMP_LUMA_ADDR(i), compress_luma_addr);
+ hantro_write_addr(vpu, G2_REF_COMP_CHROMA_ADDR(i++), compress_chroma_addr);
hantro_write_addr(vpu, G2_OUT_LUMA_ADDR, luma_addr);
hantro_write_addr(vpu, G2_OUT_CHROMA_ADDR, chroma_addr);
hantro_write_addr(vpu, G2_OUT_MV_ADDR, mv_addr);
+ hantro_write_addr(vpu, G2_OUT_COMP_LUMA_ADDR, compress_luma_addr);
+ hantro_write_addr(vpu, G2_OUT_COMP_CHROMA_ADDR, compress_chroma_addr);
for (; i < V4L2_HEVC_DPB_ENTRIES_NUM_MAX; i++) {
hantro_write_addr(vpu, G2_REF_LUMA_ADDR(i), 0);
hantro_write_addr(vpu, G2_REF_CHROMA_ADDR(i), 0);
hantro_write_addr(vpu, G2_REF_MV_ADDR(i), 0);
+ hantro_write_addr(vpu, G2_REF_COMP_LUMA_ADDR(i), 0);
+ hantro_write_addr(vpu, G2_REF_COMP_CHROMA_ADDR(i), 0);
}
hantro_reg_write(vpu, &g2_refer_lterm_e, dpb_longterm_e);
@@ -594,8 +609,7 @@ int hantro_g2_hevc_dec_run(struct hantro_ctx *ctx)
/* Don't disable output */
hantro_reg_write(vpu, &g2_out_dis, 0);
- /* Don't compress buffers */
- hantro_reg_write(vpu, &g2_ref_compress_bypass, 1);
+ hantro_reg_write(vpu, &g2_ref_compress_bypass, !ctx->hevc_dec.use_compression);
/* Bus width and max burst */
hantro_reg_write(vpu, &g2_buswidth, BUS_WIDTH_128);
@@ -318,6 +318,10 @@
#define G2_TILE_BSD_ADDR (G2_SWREG(183))
#define G2_DS_DST (G2_SWREG(186))
#define G2_DS_DST_CHR (G2_SWREG(188))
+#define G2_OUT_COMP_LUMA_ADDR (G2_SWREG(190))
+#define G2_REF_COMP_LUMA_ADDR(i) (G2_SWREG(192) + ((i) * 0x8))
+#define G2_OUT_COMP_CHROMA_ADDR (G2_SWREG(224))
+#define G2_REF_COMP_CHROMA_ADDR(i) (G2_SWREG(226) + ((i) * 0x8))
#define g2_strm_buffer_len G2_DEC_REG(258, 0, 0xffffffff)
#define g2_strm_start_offset G2_DEC_REG(259, 0, 0xffffffff)
@@ -25,6 +25,11 @@
#define MAX_TILE_COLS 20
#define MAX_TILE_ROWS 22
+static bool hevc_use_compression = IS_ENABLED(CONFIG_VIDEO_HANTRO_HEVC_RFC);
+module_param_named(hevc_use_compression, hevc_use_compression, bool, 0644);
+MODULE_PARM_DESC(hevc_use_compression,
+ "Use reference frame compression for HEVC");
+
void hantro_hevc_ref_init(struct hantro_ctx *ctx)
{
struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec;
@@ -275,5 +280,8 @@ int hantro_hevc_dec_init(struct hantro_ctx *ctx)
hantro_hevc_ref_init(ctx);
+ hevc_dec->use_compression =
+ hevc_use_compression & hantro_needs_postproc(ctx, ctx->vpu_dst_fmt);
+
return 0;
}
@@ -42,6 +42,14 @@
#define MAX_POSTPROC_BUFFERS 64
+#define G2_ALIGN 16
+#define CBS_SIZE 16 /* compression table size in bytes */
+#define CBS_LUMA 8 /* luminance CBS is composed of 1 8x8 coded block */
+#define CBS_CHROMA_W (8 * 2) /* chrominance CBS is composed of two 8x4 coded
+ * blocks, with Cb CB first then Cr CB following
+ */
+#define CBS_CHROMA_H 4
+
struct hantro_dev;
struct hantro_ctx;
struct hantro_buf;
@@ -144,6 +152,7 @@ struct hantro_hevc_dec_ctrls {
* @ref_bufs_used: Bitfield of used reference buffers
* @ctrls: V4L2 controls attached to a run
* @num_tile_cols_allocated: number of allocated tiles
+ * @use_compression: use reference buffer compression
*/
struct hantro_hevc_dec_hw_ctx {
struct hantro_aux_buf tile_sizes;
@@ -156,6 +165,7 @@ struct hantro_hevc_dec_hw_ctx {
u32 ref_bufs_used;
struct hantro_hevc_dec_ctrls ctrls;
unsigned int num_tile_cols_allocated;
+ bool use_compression;
};
/**
@@ -510,6 +520,33 @@ hantro_hevc_mv_size(unsigned int width, unsigned int height)
return width * height / 16;
}
+static inline size_t
+hantro_hevc_luma_compressed_size(unsigned int width, unsigned int height)
+{
+ u32 pic_width_in_cbsy =
+ round_up((width + CBS_LUMA - 1) / CBS_LUMA, CBS_SIZE);
+ u32 pic_height_in_cbsy = (height + CBS_LUMA - 1) / CBS_LUMA;
+
+ return round_up(pic_width_in_cbsy * pic_height_in_cbsy, CBS_SIZE);
+}
+
+static inline size_t
+hantro_hevc_chroma_compressed_size(unsigned int width, unsigned int height)
+{
+ u32 pic_width_in_cbsc =
+ round_up((width + CBS_CHROMA_W - 1) / CBS_CHROMA_W, CBS_SIZE);
+ u32 pic_height_in_cbsc = (height / 2 + CBS_CHROMA_H - 1) / CBS_CHROMA_H;
+
+ return round_up(pic_width_in_cbsc * pic_height_in_cbsc, CBS_SIZE);
+}
+
+static inline size_t
+hantro_hevc_compressed_size(unsigned int width, unsigned int height)
+{
+ return hantro_hevc_luma_compressed_size(width, height) +
+ hantro_hevc_chroma_compressed_size(width, height);
+}
+
static inline unsigned short hantro_av1_num_sbs(unsigned short dimension)
{
return DIV_ROUND_UP(dimension, 64);
@@ -525,6 +562,8 @@ hantro_av1_mv_size(unsigned int width, unsigned int height)
size_t hantro_g2_chroma_offset(struct hantro_ctx *ctx);
size_t hantro_g2_motion_vectors_offset(struct hantro_ctx *ctx);
+size_t hantro_g2_luma_compress_offset(struct hantro_ctx *ctx);
+size_t hantro_g2_chroma_compress_offset(struct hantro_ctx *ctx);
int hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx);
int rockchip_vpu2_mpeg2_dec_run(struct hantro_ctx *ctx);
@@ -213,9 +213,13 @@ static unsigned int hantro_postproc_buffer_size(struct hantro_ctx *ctx)
else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_VP9_FRAME)
buf_size += hantro_vp9_mv_size(pix_mp.width,
pix_mp.height);
- else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE)
+ else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_HEVC_SLICE) {
buf_size += hantro_hevc_mv_size(pix_mp.width,
pix_mp.height);
+ if (ctx->hevc_dec.use_compression)
+ buf_size += hantro_hevc_compressed_size(pix_mp.width,
+ pix_mp.height);
+ }
else if (ctx->vpu_src_fmt->fourcc == V4L2_PIX_FMT_AV1_FRAME)
buf_size += hantro_av1_mv_size(pix_mp.width,
pix_mp.height);