diff mbox series

[4/5] media: meson: vdec: add support for compressed output for VP9 decoder

Message ID 20200604135317.9235-5-narmstrong@baylibre.com (mailing list archive)
State Not Applicable
Headers show
Series media: meson: vdec: Add support for compressed framebuffer | expand

Commit Message

Neil Armstrong June 4, 2020, 1:53 p.m. UTC
From: Maxime Jourdan <mjourdan@baylibre.com>

Add the necessary changes to decode VP9 8bit and 10bit streams into
compressed buffers to be imported back into DRM/KMS using a modifier.

On GXL/GXM platforms, the VP9 decoder will output a basic Framebuffer
Compressed layout, with a memory saving option when decoding 8bit to
better align the compressed macroblocks. This layout includes the buffer
content and an header desscribing the compressed buffer.

On G12A and later, the VP9 decoder will output "Scatter" layout, meaning
the header buffer will contain references to the internal memory decoder
workspace and frame memory to construct a compressed framebuffer.

The compressed layout has been described in the DRM Modifier patchset
at [1].

[1] https://patchwork.freedesktop.org/series/73722/#rev7

Signed-off-by: Maxime Jourdan <mjourdan@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
---
 .../media/meson/vdec/codec_hevc_common.c      | 112 ++++++------------
 .../media/meson/vdec/codec_hevc_common.h      |  13 +-
 drivers/staging/media/meson/vdec/codec_vp9.c  |  19 +--
 3 files changed, 47 insertions(+), 97 deletions(-)
diff mbox series

Patch

diff --git a/drivers/staging/media/meson/vdec/codec_hevc_common.c b/drivers/staging/media/meson/vdec/codec_hevc_common.c
index c9bf67aa2668..73dae40b3319 100644
--- a/drivers/staging/media/meson/vdec/codec_hevc_common.c
+++ b/drivers/staging/media/meson/vdec/codec_hevc_common.c
@@ -42,9 +42,17 @@  void codec_hevc_setup_decode_head(struct amvdec_session *sess, int is_10bit)
 		return;
 	}
 
+	/* enable mem saving mode for 8-bit */
+	if (!is_10bit)
+		amvdec_write_dos_bits(core, HEVC_SAO_CTRL5, BIT(9));
+	else
+		amvdec_clear_dos_bits(core, HEVC_SAO_CTRL5, BIT(9));
+
 	if (codec_hevc_use_mmu(core->platform->revision,
 			       sess->pixfmt_cap, is_10bit))
 		amvdec_write_dos(core, HEVCD_MPP_DECOMP_CTL1, BIT(4));
+	else if (!is_10bit)
+		amvdec_write_dos(core, HEVCD_MPP_DECOMP_CTL1, BIT(3));
 	else
 		amvdec_write_dos(core, HEVCD_MPP_DECOMP_CTL1, 0);
 
@@ -76,7 +84,7 @@  static void codec_hevc_setup_buffers_gxbb(struct amvdec_session *sess,
 
 		idx = vb->index;
 
-		if (codec_hevc_use_downsample(sess->pixfmt_cap, is_10bit))
+		if (codec_hevc_use_fbc(sess->pixfmt_cap, is_10bit))
 			buf_y_paddr = comm->fbc_buffer_paddr[idx];
 		else
 			buf_y_paddr = vb2_dma_contig_plane_dma_addr(vb, 0);
@@ -117,7 +125,6 @@  static void codec_hevc_setup_buffers_gxl(struct amvdec_session *sess,
 {
 	struct amvdec_core *core = sess->core;
 	struct v4l2_m2m_buffer *buf;
-	u32 revision = core->platform->revision;
 	u32 pixfmt_cap = sess->pixfmt_cap;
 	int i;
 
@@ -130,9 +137,7 @@  static void codec_hevc_setup_buffers_gxl(struct amvdec_session *sess,
 		dma_addr_t buf_uv_paddr = 0;
 		u32 idx = vb->index;
 
-		if (codec_hevc_use_mmu(revision, pixfmt_cap, is_10bit))
-			buf_y_paddr = comm->mmu_header_paddr[idx];
-		else if (codec_hevc_use_downsample(pixfmt_cap, is_10bit))
+		if (codec_hevc_use_downsample(pixfmt_cap, is_10bit))
 			buf_y_paddr = comm->fbc_buffer_paddr[idx];
 		else
 			buf_y_paddr = vb2_dma_contig_plane_dma_addr(vb, 0);
@@ -173,6 +178,14 @@  void codec_hevc_free_fbc_buffers(struct amvdec_session *sess,
 			comm->fbc_buffer_vaddr[i] = NULL;
 		}
 	}
+
+	if (comm->mmu_map_vaddr) {
+		dma_free_coherent(dev, MMU_MAP_SIZE,
+				  comm->mmu_map_vaddr,
+				  comm->mmu_map_paddr);
+		comm->mmu_map_vaddr = NULL;
+	}
+
 }
 EXPORT_SYMBOL_GPL(codec_hevc_free_fbc_buffers);
 
@@ -205,79 +218,29 @@  static int codec_hevc_alloc_fbc_buffers(struct amvdec_session *sess,
 	return 0;
 }
 
-void codec_hevc_free_mmu_headers(struct amvdec_session *sess,
-				 struct codec_hevc_common *comm)
-{
-	struct device *dev = sess->core->dev;
-	int i;
-
-	for (i = 0; i < MAX_REF_PIC_NUM; ++i) {
-		if (comm->mmu_header_vaddr[i]) {
-			dma_free_coherent(dev, MMU_COMPRESS_HEADER_SIZE,
-					  comm->mmu_header_vaddr[i],
-					  comm->mmu_header_paddr[i]);
-			comm->mmu_header_vaddr[i] = NULL;
-		}
-	}
-
-	if (comm->mmu_map_vaddr) {
-		dma_free_coherent(dev, MMU_MAP_SIZE,
-				  comm->mmu_map_vaddr,
-				  comm->mmu_map_paddr);
-		comm->mmu_map_vaddr = NULL;
-	}
-}
-EXPORT_SYMBOL_GPL(codec_hevc_free_mmu_headers);
-
-static int codec_hevc_alloc_mmu_headers(struct amvdec_session *sess,
-					struct codec_hevc_common *comm)
-{
-	struct device *dev = sess->core->dev;
-	struct v4l2_m2m_buffer *buf;
-
-	comm->mmu_map_vaddr = dma_alloc_coherent(dev, MMU_MAP_SIZE,
-						 &comm->mmu_map_paddr,
-						 GFP_KERNEL);
-	if (!comm->mmu_map_vaddr)
-		return -ENOMEM;
-
-	v4l2_m2m_for_each_dst_buf(sess->m2m_ctx, buf) {
-		u32 idx = buf->vb.vb2_buf.index;
-		dma_addr_t paddr;
-		void *vaddr = dma_alloc_coherent(dev, MMU_COMPRESS_HEADER_SIZE,
-						 &paddr, GFP_KERNEL);
-		if (!vaddr) {
-			codec_hevc_free_mmu_headers(sess, comm);
-			return -ENOMEM;
-		}
-
-		comm->mmu_header_vaddr[idx] = vaddr;
-		comm->mmu_header_paddr[idx] = paddr;
-	}
-
-	return 0;
-}
-
 int codec_hevc_setup_buffers(struct amvdec_session *sess,
 			     struct codec_hevc_common *comm,
 			     int is_10bit)
 {
 	struct amvdec_core *core = sess->core;
+	struct device *dev = core->dev;
 	int ret;
 
-	if (codec_hevc_use_downsample(sess->pixfmt_cap, is_10bit)) {
-		ret = codec_hevc_alloc_fbc_buffers(sess, comm);
-		if (ret)
-			return ret;
+	if (codec_hevc_use_mmu(core->platform->revision,
+			       sess->pixfmt_cap, is_10bit)) {
+		comm->mmu_map_vaddr = dma_alloc_coherent(dev, MMU_MAP_SIZE,
+							 &comm->mmu_map_paddr,
+							 GFP_KERNEL);
+		if (!comm->mmu_map_vaddr)
+			return -ENOMEM;
 	}
 
 	if (codec_hevc_use_mmu(core->platform->revision,
-			       sess->pixfmt_cap, is_10bit)) {
-		ret = codec_hevc_alloc_mmu_headers(sess, comm);
-		if (ret) {
-			codec_hevc_free_fbc_buffers(sess, comm);
+			       sess->pixfmt_cap, is_10bit) ||
+	    codec_hevc_use_downsample(sess->pixfmt_cap, is_10bit)) {
+		ret = codec_hevc_alloc_fbc_buffers(sess, comm);
+		if (ret)
 			return ret;
-		}
 	}
 
 	if (core->platform->revision == VDEC_REVISION_GXBB)
@@ -291,24 +254,19 @@  EXPORT_SYMBOL_GPL(codec_hevc_setup_buffers);
 
 void codec_hevc_fill_mmu_map(struct amvdec_session *sess,
 			     struct codec_hevc_common *comm,
-			     struct vb2_buffer *vb)
+			     struct vb2_buffer *vb,
+			     u32 is_10bit)
 {
 	u32 use_mmu = codec_hevc_use_mmu(sess->core->platform->revision,
-					 sess->pixfmt_cap,
-					 sess->bitdepth == 10 ? 1 : 0);
-	u32 size = amvdec_amfbc_size(sess->width, sess->height,
-				     sess->bitdepth == 10 ? 1 : 0,
+					 sess->pixfmt_cap, is_10bit);
+	u32 size = amvdec_amfbc_size(sess->width, sess->height, is_10bit,
 				     use_mmu);
 	u32 nb_pages = size / PAGE_SIZE;
 	u32 *mmu_map = comm->mmu_map_vaddr;
 	u32 first_page;
 	u32 i;
 
-	if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M)
-		first_page = comm->fbc_buffer_paddr[vb->index] >> PAGE_SHIFT;
-	else
-		first_page = vb2_dma_contig_plane_dma_addr(vb, 0) >> PAGE_SHIFT;
-
+	first_page = comm->fbc_buffer_paddr[vb->index] >> PAGE_SHIFT;
 	for (i = 0; i < nb_pages; ++i)
 		mmu_map[i] = first_page + i;
 }
diff --git a/drivers/staging/media/meson/vdec/codec_hevc_common.h b/drivers/staging/media/meson/vdec/codec_hevc_common.h
index 88e4379ba1ee..5a3c6520940f 100644
--- a/drivers/staging/media/meson/vdec/codec_hevc_common.h
+++ b/drivers/staging/media/meson/vdec/codec_hevc_common.h
@@ -22,9 +22,6 @@  struct codec_hevc_common {
 	void      *fbc_buffer_vaddr[MAX_REF_PIC_NUM];
 	dma_addr_t fbc_buffer_paddr[MAX_REF_PIC_NUM];
 
-	void      *mmu_header_vaddr[MAX_REF_PIC_NUM];
-	dma_addr_t mmu_header_paddr[MAX_REF_PIC_NUM];
-
 	void      *mmu_map_vaddr;
 	dma_addr_t mmu_map_paddr;
 };
@@ -32,14 +29,15 @@  struct codec_hevc_common {
 /* Returns 1 if we must use framebuffer compression */
 static inline int codec_hevc_use_fbc(u32 pixfmt, int is_10bit)
 {
-	/* TOFIX: Handle Amlogic Compressed buffer for 8bit also */
-	return is_10bit;
+	return pixfmt == V4L2_PIX_FMT_YUV420_8BIT ||
+	       pixfmt == V4L2_PIX_FMT_YUV420_10BIT ||
+	       is_10bit;
 }
 
 /* Returns 1 if we are decoding 10-bit but outputting 8-bit NV12 */
 static inline int codec_hevc_use_downsample(u32 pixfmt, int is_10bit)
 {
-	return is_10bit;
+	return pixfmt == V4L2_PIX_FMT_NV12M && is_10bit;
 }
 
 /* Returns 1 if we are decoding using the IOMMU */
@@ -66,6 +64,7 @@  int codec_hevc_setup_buffers(struct amvdec_session *sess,
 
 void codec_hevc_fill_mmu_map(struct amvdec_session *sess,
 			     struct codec_hevc_common *comm,
-			     struct vb2_buffer *vb);
+			     struct vb2_buffer *vb,
+			     u32 is_10bit);
 
 #endif
diff --git a/drivers/staging/media/meson/vdec/codec_vp9.c b/drivers/staging/media/meson/vdec/codec_vp9.c
index a810c50f3a39..e193a2f9de9f 100644
--- a/drivers/staging/media/meson/vdec/codec_vp9.c
+++ b/drivers/staging/media/meson/vdec/codec_vp9.c
@@ -458,12 +458,6 @@  struct codec_vp9 {
 	struct list_head ref_frames_list;
 	u32 frames_num;
 
-	/* In case of downsampling (decoding with FBC but outputting in NV12M),
-	 * we need to allocate additional buffers for FBC.
-	 */
-	void      *fbc_buffer_vaddr[MAX_REF_PIC_NUM];
-	dma_addr_t fbc_buffer_paddr[MAX_REF_PIC_NUM];
-
 	int ref_frame_map[REF_FRAMES];
 	int next_ref_frame_map[REF_FRAMES];
 	struct vp9_frame *frame_refs[REFS_PER_FRAME];
@@ -901,11 +895,8 @@  static void codec_vp9_set_sao(struct amvdec_session *sess,
 		buf_y_paddr =
 		       vb2_dma_contig_plane_dma_addr(vb, 0);
 
-	if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit)) {
-		val = amvdec_read_dos(core, HEVC_SAO_CTRL5) & ~0xff0200;
-		amvdec_write_dos(core, HEVC_SAO_CTRL5, val);
+	if (codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit))
 		amvdec_write_dos(core, HEVC_CM_BODY_START_ADDR, buf_y_paddr);
-	}
 
 	if (sess->pixfmt_cap == V4L2_PIX_FMT_NV12M) {
 		buf_y_paddr =
@@ -921,7 +912,7 @@  static void codec_vp9_set_sao(struct amvdec_session *sess,
 	if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap,
 			       vp9->is_10bit)) {
 		amvdec_write_dos(core, HEVC_CM_HEADER_START_ADDR,
-				 vp9->common.mmu_header_paddr[vb->index]);
+				 vb2_dma_contig_plane_dma_addr(vb, 0));
 		/* use HEVC_CM_HEADER_START_ADDR */
 		amvdec_write_dos_bits(core, HEVC_SAO_CTRL5, BIT(10));
 	}
@@ -956,7 +947,8 @@  static void codec_vp9_set_sao(struct amvdec_session *sess,
 		val &= ~0x3;
 		if (!codec_hevc_use_fbc(sess->pixfmt_cap, vp9->is_10bit))
 			val |= BIT(0); /* disable cm compression */
-		/* TOFIX: Handle Amlogic Framebuffer compression */
+		else if (amvdec_is_dst_fbc(sess))
+			val |= BIT(1); /* Disable double write */
 	}
 
 	amvdec_write_dos(core, HEVC_SAO_CTRL1, val);
@@ -1286,7 +1278,8 @@  static void codec_vp9_process_frame(struct amvdec_session *sess)
 	if (codec_hevc_use_mmu(core->platform->revision, sess->pixfmt_cap,
 			       vp9->is_10bit))
 		codec_hevc_fill_mmu_map(sess, &vp9->common,
-					&vp9->cur_frame->vbuf->vb2_buf);
+					&vp9->cur_frame->vbuf->vb2_buf,
+					vp9->is_10bit);
 
 	intra_only = param->p.show_frame ? 0 : param->p.intra_only;