Message ID | 20190603110946.4952-3-boris.brezillon@collabora.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | media: uapi: h264: First batch of adjusments | expand |
On Mon, Jun 03, 2019 at 01:09:42PM +0200, Boris Brezillon wrote: > Some stateless decoders don't support per-slice decoding (or at least > not in a way that would make them efficient or easy to use). > Let's expose a menu to control and expose the supported decoding modes. > Drivers are allowed to support only one decoding but they can support > both too. > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > --- > .../media/uapi/v4l/ext-ctrls-codec.rst | 42 ++++++++++++++++++- > drivers/media/v4l2-core/v4l2-ctrls.c | 9 ++++ > include/media/h264-ctrls.h | 13 ++++++ > 3 files changed, 63 insertions(+), 1 deletion(-) > > diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > index 82547d5de250..188f625acb7c 100644 > --- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > +++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > @@ -1748,6 +1748,14 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > * - __u32 > - ``size`` > - > + * - __u32 > + - ``start_byte_offset`` > + - Where the slice payload starts in the output buffer. Useful when > + operating in per frame decoding mode and decoding multi-slice content. > + In this case, the output buffer will contain more than one slice and > + some codecs need to know where each slice starts. Note that this > + offsets points to the beginning of the slice which is supposed to > + contain an ANNEX B start code > * - __u32 > - ``header_bit_size`` > - > @@ -1931,7 +1939,10 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > - > * - __u16 > - ``num_slices`` > - - Number of slices needed to decode the current frame > + - Number of slices needed to decode the current frame/field. When > + operating in per-slice decoding mode (see > + :c:type:`v4l2_mpeg_video_h264_decoding_mode`), this field > + should always be set to one > * - __u16 > - ``nal_ref_idc`` > - NAL reference ID value coming from the NAL Unit header > @@ -2022,6 +2033,35 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > - 0x00000004 > - The DPB entry is a long term reference frame > > +``V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE (enum)`` > + Specifies the decoding mode to use. Currently exposes per slice and per > + frame decoding but new modes might be added later on. > + > + .. note:: > + > + This menu control is not yet part of the public kernel API and > + it is expected to change. > + > +.. c:type:: v4l2_mpeg_video_h264_decoding_mode > + > +.. cssclass:: longtable > + > +.. flat-table:: > + :header-rows: 0 > + :stub-columns: 0 > + :widths: 1 1 2 > + > + * - ``V4L2_MPEG_VIDEO_H264_DECODING_PER_SLICE`` > + - 0 > + - The decoding is done per slice. v4l2_ctrl_h264_decode_params->num_slices > + must be set to 1 and the output buffer should contain only one slice. I wonder if we need to be that strict. Wouldn't it be possible for drivers to just iterate over a number of slices and decode each in turn if userspace passed more than one? Or perhaps a decoder can batch queue a couple of slices. I'm not sure how frequent this is, but consider something like a spike in activity on your system, causing some slices to get delayed so you actually get a few buffered up before you get a chance to hand them to the V4L2 device. Processing them all at once may help conceal the lag. Thierry > + * - ``V4L2_MPEG_VIDEO_H264_DECODING_PER_FRAME`` > + - 1 > + - The decoding is done per frame. v4l2_ctrl_h264_decode_params->num_slices > + can be > 1. When that happens, the output buffer should contain all > + slices needed to decode a frame/field, each slice being prefixed by an > + Annex B NAL header/start-code. > + > .. _v4l2-mpeg-mpeg2: > > ``V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS (struct)`` > diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c > index 1217d38ea394..72bb3c8882f5 100644 > --- a/drivers/media/v4l2-core/v4l2-ctrls.c > +++ b/drivers/media/v4l2-core/v4l2-ctrls.c > @@ -406,6 +406,11 @@ const char * const *v4l2_ctrl_get_menu(u32 id) > "Explicit", > NULL, > }; > + static const char * const h264_decoding_mode[] = { > + "Per Slice", > + "Per Frame", > + NULL, > + }; > static const char * const mpeg_mpeg2_level[] = { > "Low", > "Main", > @@ -637,6 +642,8 @@ const char * const *v4l2_ctrl_get_menu(u32 id) > return h264_fp_arrangement_type; > case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE: > return h264_fmo_map_type; > + case V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE: > + return h264_decoding_mode; > case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL: > return mpeg_mpeg2_level; > case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE: > @@ -856,6 +863,7 @@ const char *v4l2_ctrl_get_name(u32 id) > case V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX: return "H264 Scaling Matrix"; > case V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS: return "H264 Slice Parameters"; > case V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS: return "H264 Decode Parameters"; > + case V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE: return "H264 Decoding Mode"; > case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL: return "MPEG2 Level"; > case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE: return "MPEG2 Profile"; > case V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP: return "MPEG4 I-Frame QP Value"; > @@ -1224,6 +1232,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, > case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC: > case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE: > case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE: > + case V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE: > case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL: > case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE: > case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL: > diff --git a/include/media/h264-ctrls.h b/include/media/h264-ctrls.h > index e1404d78d6ff..26de2243f6f5 100644 > --- a/include/media/h264-ctrls.h > +++ b/include/media/h264-ctrls.h > @@ -26,6 +26,7 @@ > #define V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX (V4L2_CID_MPEG_BASE+1002) > #define V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS (V4L2_CID_MPEG_BASE+1003) > #define V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS (V4L2_CID_MPEG_BASE+1004) > +#define V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE (V4L2_CID_MPEG_BASE+1005) > > /* enum v4l2_ctrl_type type values */ > #define V4L2_CTRL_TYPE_H264_SPS 0x0110 > @@ -33,6 +34,12 @@ > #define V4L2_CTRL_TYPE_H264_SCALING_MATRIX 0x0112 > #define V4L2_CTRL_TYPE_H264_SLICE_PARAMS 0x0113 > #define V4L2_CTRL_TYPE_H264_DECODE_PARAMS 0x0114 > +#define V4L2_CTRL_TYPE_H264_DECODING_MODE 0x0115 > + > +enum v4l2_mpeg_video_h264_decoding_mode { > + V4L2_MPEG_VIDEO_H264_DECODING_PER_SLICE, > + V4L2_MPEG_VIDEO_H264_DECODING_PER_FRAME, > +}; > > #define V4L2_H264_SPS_CONSTRAINT_SET0_FLAG 0x01 > #define V4L2_H264_SPS_CONSTRAINT_SET1_FLAG 0x02 > @@ -111,6 +118,8 @@ struct v4l2_h264_pred_weight_table { > struct v4l2_h264_weight_factors weight_factors[2]; > }; > > +#define V4L2_H264_MAX_SLICES_PER_FRAME 16 > + > #define V4L2_H264_SLICE_TYPE_P 0 > #define V4L2_H264_SLICE_TYPE_B 1 > #define V4L2_H264_SLICE_TYPE_I 2 > @@ -125,6 +134,10 @@ struct v4l2_h264_pred_weight_table { > struct v4l2_ctrl_h264_slice_params { > /* Size in bytes, including header */ > __u32 size; > + > + /* Where the slice starts in the output buffer (expressed in bytes). */ > + __u32 start_byte_offset; > + > /* Offset in bits to slice_data() from the beginning of this slice. */ > __u32 header_bit_size; > > -- > 2.20.1 >
+Maxime Oops, just realized Maxime was not Cc-ed on this series. On Mon, 3 Jun 2019 14:30:20 +0200 Thierry Reding <thierry.reding@gmail.com> wrote: > On Mon, Jun 03, 2019 at 01:09:42PM +0200, Boris Brezillon wrote: > > Some stateless decoders don't support per-slice decoding (or at least > > not in a way that would make them efficient or easy to use). > > Let's expose a menu to control and expose the supported decoding modes. > > Drivers are allowed to support only one decoding but they can support > > both too. > > > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > > --- > > .../media/uapi/v4l/ext-ctrls-codec.rst | 42 ++++++++++++++++++- > > drivers/media/v4l2-core/v4l2-ctrls.c | 9 ++++ > > include/media/h264-ctrls.h | 13 ++++++ > > 3 files changed, 63 insertions(+), 1 deletion(-) > > > > diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > index 82547d5de250..188f625acb7c 100644 > > --- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > +++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > @@ -1748,6 +1748,14 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > * - __u32 > > - ``size`` > > - > > + * - __u32 > > + - ``start_byte_offset`` > > + - Where the slice payload starts in the output buffer. Useful when > > + operating in per frame decoding mode and decoding multi-slice content. > > + In this case, the output buffer will contain more than one slice and > > + some codecs need to know where each slice starts. Note that this > > + offsets points to the beginning of the slice which is supposed to > > + contain an ANNEX B start code > > * - __u32 > > - ``header_bit_size`` > > - > > @@ -1931,7 +1939,10 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > - > > * - __u16 > > - ``num_slices`` > > - - Number of slices needed to decode the current frame > > + - Number of slices needed to decode the current frame/field. When > > + operating in per-slice decoding mode (see > > + :c:type:`v4l2_mpeg_video_h264_decoding_mode`), this field > > + should always be set to one > > * - __u16 > > - ``nal_ref_idc`` > > - NAL reference ID value coming from the NAL Unit header > > @@ -2022,6 +2033,35 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > - 0x00000004 > > - The DPB entry is a long term reference frame > > > > +``V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE (enum)`` > > + Specifies the decoding mode to use. Currently exposes per slice and per > > + frame decoding but new modes might be added later on. > > + > > + .. note:: > > + > > + This menu control is not yet part of the public kernel API and > > + it is expected to change. > > + > > +.. c:type:: v4l2_mpeg_video_h264_decoding_mode > > + > > +.. cssclass:: longtable > > + > > +.. flat-table:: > > + :header-rows: 0 > > + :stub-columns: 0 > > + :widths: 1 1 2 > > + > > + * - ``V4L2_MPEG_VIDEO_H264_DECODING_PER_SLICE`` > > + - 0 > > + - The decoding is done per slice. v4l2_ctrl_h264_decode_params->num_slices > > + must be set to 1 and the output buffer should contain only one slice. > > I wonder if we need to be that strict. Wouldn't it be possible for > drivers to just iterate over a number of slices and decode each in turn > if userspace passed more than one? > > Or perhaps a decoder can batch queue a couple of slices. I'm not sure > how frequent this is, but consider something like a spike in activity on > your system, causing some slices to get delayed so you actually get a > few buffered up before you get a chance to hand them to the V4L2 device. > Processing them all at once may help conceal the lag. Hm, so we'd be in some kind of slice-batch mode, which means we could trigger a decode operation with more than one slice, but not necessarily all the slices needed to decode a frame. TBH, supporting per-frame (or the batch mode you suggest) on a HW that supports per-slice decoding should be pretty simple and has not real impact on perfs (as you said, it's just a matter of iterating over all the slices attached to a decode operation), so I'm fine relaxing the rule here and patching the cedrus driver accordingly (I can't really test the changes though). Paul, Maxime, what's your opinion?
On Mon, Jun 03, 2019 at 02:51:13PM +0200, Boris Brezillon wrote: > +Maxime > > Oops, just realized Maxime was not Cc-ed on this series. > > On Mon, 3 Jun 2019 14:30:20 +0200 > Thierry Reding <thierry.reding@gmail.com> wrote: > > > On Mon, Jun 03, 2019 at 01:09:42PM +0200, Boris Brezillon wrote: > > > Some stateless decoders don't support per-slice decoding (or at least > > > not in a way that would make them efficient or easy to use). > > > Let's expose a menu to control and expose the supported decoding modes. > > > Drivers are allowed to support only one decoding but they can support > > > both too. > > > > > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > > > --- > > > .../media/uapi/v4l/ext-ctrls-codec.rst | 42 ++++++++++++++++++- > > > drivers/media/v4l2-core/v4l2-ctrls.c | 9 ++++ > > > include/media/h264-ctrls.h | 13 ++++++ > > > 3 files changed, 63 insertions(+), 1 deletion(-) > > > > > > diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > index 82547d5de250..188f625acb7c 100644 > > > --- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > +++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > @@ -1748,6 +1748,14 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > * - __u32 > > > - ``size`` > > > - > > > + * - __u32 > > > + - ``start_byte_offset`` > > > + - Where the slice payload starts in the output buffer. Useful when > > > + operating in per frame decoding mode and decoding multi-slice content. > > > + In this case, the output buffer will contain more than one slice and > > > + some codecs need to know where each slice starts. Note that this > > > + offsets points to the beginning of the slice which is supposed to > > > + contain an ANNEX B start code > > > * - __u32 > > > - ``header_bit_size`` > > > - > > > @@ -1931,7 +1939,10 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > - > > > * - __u16 > > > - ``num_slices`` > > > - - Number of slices needed to decode the current frame > > > + - Number of slices needed to decode the current frame/field. When > > > + operating in per-slice decoding mode (see > > > + :c:type:`v4l2_mpeg_video_h264_decoding_mode`), this field > > > + should always be set to one > > > * - __u16 > > > - ``nal_ref_idc`` > > > - NAL reference ID value coming from the NAL Unit header > > > @@ -2022,6 +2033,35 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > - 0x00000004 > > > - The DPB entry is a long term reference frame > > > > > > +``V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE (enum)`` > > > + Specifies the decoding mode to use. Currently exposes per slice and per > > > + frame decoding but new modes might be added later on. > > > + > > > + .. note:: > > > + > > > + This menu control is not yet part of the public kernel API and > > > + it is expected to change. > > > + > > > +.. c:type:: v4l2_mpeg_video_h264_decoding_mode > > > + > > > +.. cssclass:: longtable > > > + > > > +.. flat-table:: > > > + :header-rows: 0 > > > + :stub-columns: 0 > > > + :widths: 1 1 2 > > > + > > > + * - ``V4L2_MPEG_VIDEO_H264_DECODING_PER_SLICE`` > > > + - 0 > > > + - The decoding is done per slice. v4l2_ctrl_h264_decode_params->num_slices > > > + must be set to 1 and the output buffer should contain only one slice. > > > > I wonder if we need to be that strict. Wouldn't it be possible for > > drivers to just iterate over a number of slices and decode each in turn > > if userspace passed more than one? > > > > Or perhaps a decoder can batch queue a couple of slices. I'm not sure > > how frequent this is, but consider something like a spike in activity on > > your system, causing some slices to get delayed so you actually get a > > few buffered up before you get a chance to hand them to the V4L2 device. > > Processing them all at once may help conceal the lag. > > Hm, so we'd be in some kind of slice-batch mode, which means we could > trigger a decode operation with more than one slice, but not > necessarily all the slices needed to decode a frame. TBH, supporting > per-frame (or the batch mode you suggest) on a HW that supports > per-slice decoding should be pretty simple and has not real impact on > perfs (as you said, it's just a matter of iterating over all the slices > attached to a decode operation), so I'm fine relaxing the rule here and > patching the cedrus driver accordingly (I can't really test the > changes though). Paul, Maxime, what's your opinion? We could perhaps have a test program to orchestrate such a scenario. I think the assumption should obviously still be that we don't cross the frame boundary using slices in one batch. Just that if a frame was made up of, say, 4 slices and you first pass 3 slices, then 1, that it'd be nice if the driver would be able to cope with that. It's something that could probably even be implemented in the framework as a helper, though I suspect it'd be just a couple of lines of extra code to wrap a loop around everything. Thierry
On Mon, 3 Jun 2019 16:05:26 +0200 Thierry Reding <thierry.reding@gmail.com> wrote: > On Mon, Jun 03, 2019 at 02:51:13PM +0200, Boris Brezillon wrote: > > +Maxime > > > > Oops, just realized Maxime was not Cc-ed on this series. > > > > On Mon, 3 Jun 2019 14:30:20 +0200 > > Thierry Reding <thierry.reding@gmail.com> wrote: > > > > > On Mon, Jun 03, 2019 at 01:09:42PM +0200, Boris Brezillon wrote: > > > > Some stateless decoders don't support per-slice decoding (or at least > > > > not in a way that would make them efficient or easy to use). > > > > Let's expose a menu to control and expose the supported decoding modes. > > > > Drivers are allowed to support only one decoding but they can support > > > > both too. > > > > > > > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > > > > --- > > > > .../media/uapi/v4l/ext-ctrls-codec.rst | 42 ++++++++++++++++++- > > > > drivers/media/v4l2-core/v4l2-ctrls.c | 9 ++++ > > > > include/media/h264-ctrls.h | 13 ++++++ > > > > 3 files changed, 63 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > > index 82547d5de250..188f625acb7c 100644 > > > > --- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > > +++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > > @@ -1748,6 +1748,14 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > > * - __u32 > > > > - ``size`` > > > > - > > > > + * - __u32 > > > > + - ``start_byte_offset`` > > > > + - Where the slice payload starts in the output buffer. Useful when > > > > + operating in per frame decoding mode and decoding multi-slice content. > > > > + In this case, the output buffer will contain more than one slice and > > > > + some codecs need to know where each slice starts. Note that this > > > > + offsets points to the beginning of the slice which is supposed to > > > > + contain an ANNEX B start code > > > > * - __u32 > > > > - ``header_bit_size`` > > > > - > > > > @@ -1931,7 +1939,10 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > > - > > > > * - __u16 > > > > - ``num_slices`` > > > > - - Number of slices needed to decode the current frame > > > > + - Number of slices needed to decode the current frame/field. When > > > > + operating in per-slice decoding mode (see > > > > + :c:type:`v4l2_mpeg_video_h264_decoding_mode`), this field > > > > + should always be set to one > > > > * - __u16 > > > > - ``nal_ref_idc`` > > > > - NAL reference ID value coming from the NAL Unit header > > > > @@ -2022,6 +2033,35 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > > - 0x00000004 > > > > - The DPB entry is a long term reference frame > > > > > > > > +``V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE (enum)`` > > > > + Specifies the decoding mode to use. Currently exposes per slice and per > > > > + frame decoding but new modes might be added later on. > > > > + > > > > + .. note:: > > > > + > > > > + This menu control is not yet part of the public kernel API and > > > > + it is expected to change. > > > > + > > > > +.. c:type:: v4l2_mpeg_video_h264_decoding_mode > > > > + > > > > +.. cssclass:: longtable > > > > + > > > > +.. flat-table:: > > > > + :header-rows: 0 > > > > + :stub-columns: 0 > > > > + :widths: 1 1 2 > > > > + > > > > + * - ``V4L2_MPEG_VIDEO_H264_DECODING_PER_SLICE`` > > > > + - 0 > > > > + - The decoding is done per slice. v4l2_ctrl_h264_decode_params->num_slices > > > > + must be set to 1 and the output buffer should contain only one slice. > > > > > > I wonder if we need to be that strict. Wouldn't it be possible for > > > drivers to just iterate over a number of slices and decode each in turn > > > if userspace passed more than one? > > > > > > Or perhaps a decoder can batch queue a couple of slices. I'm not sure > > > how frequent this is, but consider something like a spike in activity on > > > your system, causing some slices to get delayed so you actually get a > > > few buffered up before you get a chance to hand them to the V4L2 device. > > > Processing them all at once may help conceal the lag. > > > > Hm, so we'd be in some kind of slice-batch mode, which means we could > > trigger a decode operation with more than one slice, but not > > necessarily all the slices needed to decode a frame. TBH, supporting > > per-frame (or the batch mode you suggest) on a HW that supports > > per-slice decoding should be pretty simple and has not real impact on > > perfs (as you said, it's just a matter of iterating over all the slices > > attached to a decode operation), so I'm fine relaxing the rule here and > > patching the cedrus driver accordingly (I can't really test the > > changes though). Paul, Maxime, what's your opinion? > > We could perhaps have a test program to orchestrate such a scenario. I > think the assumption should obviously still be that we don't cross the > frame boundary using slices in one batch. We should definitely forbid mixing slices of different frames in the same decode operation, since each decode operation is targeting a single capture buffer. > Just that if a frame was made > up of, say, 4 slices and you first pass 3 slices, then 1, that it'd be > nice if the driver would be able to cope with that. Yep, that makes sense. > It's something that > could probably even be implemented in the framework as a helper, though > I suspect it'd be just a couple of lines of extra code to wrap a loop > around everything. I also thought about providing generic wrappers, both for this case and the per-slice -> per-frame case (this one would be a bit more complicated as it implies queuing slices in a bounce buffer and triggering the decode operation only when we have all slices of a frame).
On Mon, Jun 03, 2019 at 05:37:11PM +0200, Boris Brezillon wrote: > On Mon, 3 Jun 2019 16:05:26 +0200 > Thierry Reding <thierry.reding@gmail.com> wrote: > > > On Mon, Jun 03, 2019 at 02:51:13PM +0200, Boris Brezillon wrote: > > > +Maxime > > > > > > Oops, just realized Maxime was not Cc-ed on this series. > > > > > > On Mon, 3 Jun 2019 14:30:20 +0200 > > > Thierry Reding <thierry.reding@gmail.com> wrote: > > > > > > > On Mon, Jun 03, 2019 at 01:09:42PM +0200, Boris Brezillon wrote: > > > > > Some stateless decoders don't support per-slice decoding (or at least > > > > > not in a way that would make them efficient or easy to use). > > > > > Let's expose a menu to control and expose the supported decoding modes. > > > > > Drivers are allowed to support only one decoding but they can support > > > > > both too. > > > > > > > > > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > > > > > --- > > > > > .../media/uapi/v4l/ext-ctrls-codec.rst | 42 ++++++++++++++++++- > > > > > drivers/media/v4l2-core/v4l2-ctrls.c | 9 ++++ > > > > > include/media/h264-ctrls.h | 13 ++++++ > > > > > 3 files changed, 63 insertions(+), 1 deletion(-) > > > > > > > > > > diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > > > index 82547d5de250..188f625acb7c 100644 > > > > > --- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > > > +++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > > > @@ -1748,6 +1748,14 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > > > * - __u32 > > > > > - ``size`` > > > > > - > > > > > + * - __u32 > > > > > + - ``start_byte_offset`` > > > > > + - Where the slice payload starts in the output buffer. Useful when > > > > > + operating in per frame decoding mode and decoding multi-slice content. > > > > > + In this case, the output buffer will contain more than one slice and > > > > > + some codecs need to know where each slice starts. Note that this > > > > > + offsets points to the beginning of the slice which is supposed to > > > > > + contain an ANNEX B start code > > > > > * - __u32 > > > > > - ``header_bit_size`` > > > > > - > > > > > @@ -1931,7 +1939,10 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > > > - > > > > > * - __u16 > > > > > - ``num_slices`` > > > > > - - Number of slices needed to decode the current frame > > > > > + - Number of slices needed to decode the current frame/field. When > > > > > + operating in per-slice decoding mode (see > > > > > + :c:type:`v4l2_mpeg_video_h264_decoding_mode`), this field > > > > > + should always be set to one > > > > > * - __u16 > > > > > - ``nal_ref_idc`` > > > > > - NAL reference ID value coming from the NAL Unit header > > > > > @@ -2022,6 +2033,35 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > > > - 0x00000004 > > > > > - The DPB entry is a long term reference frame > > > > > > > > > > +``V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE (enum)`` > > > > > + Specifies the decoding mode to use. Currently exposes per slice and per > > > > > + frame decoding but new modes might be added later on. > > > > > + > > > > > + .. note:: > > > > > + > > > > > + This menu control is not yet part of the public kernel API and > > > > > + it is expected to change. > > > > > + > > > > > +.. c:type:: v4l2_mpeg_video_h264_decoding_mode > > > > > + > > > > > +.. cssclass:: longtable > > > > > + > > > > > +.. flat-table:: > > > > > + :header-rows: 0 > > > > > + :stub-columns: 0 > > > > > + :widths: 1 1 2 > > > > > + > > > > > + * - ``V4L2_MPEG_VIDEO_H264_DECODING_PER_SLICE`` > > > > > + - 0 > > > > > + - The decoding is done per slice. v4l2_ctrl_h264_decode_params->num_slices > > > > > + must be set to 1 and the output buffer should contain only one slice. > > > > > > > > I wonder if we need to be that strict. Wouldn't it be possible for > > > > drivers to just iterate over a number of slices and decode each in turn > > > > if userspace passed more than one? > > > > > > > > Or perhaps a decoder can batch queue a couple of slices. I'm not sure > > > > how frequent this is, but consider something like a spike in activity on > > > > your system, causing some slices to get delayed so you actually get a > > > > few buffered up before you get a chance to hand them to the V4L2 device. > > > > Processing them all at once may help conceal the lag. > > > > > > Hm, so we'd be in some kind of slice-batch mode, which means we could > > > trigger a decode operation with more than one slice, but not > > > necessarily all the slices needed to decode a frame. TBH, supporting > > > per-frame (or the batch mode you suggest) on a HW that supports > > > per-slice decoding should be pretty simple and has not real impact on > > > perfs (as you said, it's just a matter of iterating over all the slices > > > attached to a decode operation), so I'm fine relaxing the rule here and > > > patching the cedrus driver accordingly (I can't really test the > > > changes though). Paul, Maxime, what's your opinion? > > > > We could perhaps have a test program to orchestrate such a scenario. I > > think the assumption should obviously still be that we don't cross the > > frame boundary using slices in one batch. > > We should definitely forbid mixing slices of different frames in the > same decode operation, since each decode operation is targeting a > single capture buffer. > > > Just that if a frame was made > > up of, say, 4 slices and you first pass 3 slices, then 1, that it'd be > > nice if the driver would be able to cope with that. > > Yep, that makes sense. > > > It's something that > > could probably even be implemented in the framework as a helper, though > > I suspect it'd be just a couple of lines of extra code to wrap a loop > > around everything. > > I also thought about providing generic wrappers, both for this case and > the per-slice -> per-frame case (this one would be a bit more > complicated as it implies queuing slices in a bounce buffer and > triggering the decode operation only when we have all slices of a > frame). I like deferring the addition of that kind of helper until a clear pattern emerges out of the drivers that need this, just because that gives us real examples on which to model those helpers. But yeah, I think it should be possible to have helpers for these for most cases. Thierry
Hi, Le mardi 04 juin 2019 à 10:16 +0200, Thierry Reding a écrit : > On Mon, Jun 03, 2019 at 05:37:11PM +0200, Boris Brezillon wrote: > > On Mon, 3 Jun 2019 16:05:26 +0200 > > Thierry Reding <thierry.reding@gmail.com> wrote: > > > > > On Mon, Jun 03, 2019 at 02:51:13PM +0200, Boris Brezillon wrote: > > > > +Maxime > > > > > > > > Oops, just realized Maxime was not Cc-ed on this series. > > > > > > > > On Mon, 3 Jun 2019 14:30:20 +0200 > > > > Thierry Reding <thierry.reding@gmail.com> wrote: > > > > > > > > > On Mon, Jun 03, 2019 at 01:09:42PM +0200, Boris Brezillon wrote: > > > > > > Some stateless decoders don't support per-slice decoding (or at least > > > > > > not in a way that would make them efficient or easy to use). > > > > > > Let's expose a menu to control and expose the supported decoding modes. > > > > > > Drivers are allowed to support only one decoding but they can support > > > > > > both too. > > > > > > > > > > > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > > > > > > --- > > > > > > .../media/uapi/v4l/ext-ctrls-codec.rst | 42 ++++++++++++++++++- > > > > > > drivers/media/v4l2-core/v4l2-ctrls.c | 9 ++++ > > > > > > include/media/h264-ctrls.h | 13 ++++++ > > > > > > 3 files changed, 63 insertions(+), 1 deletion(-) > > > > > > > > > > > > diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > > > > index 82547d5de250..188f625acb7c 100644 > > > > > > --- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > > > > +++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > > > > @@ -1748,6 +1748,14 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > > > > * - __u32 > > > > > > - ``size`` > > > > > > - > > > > > > + * - __u32 > > > > > > + - ``start_byte_offset`` > > > > > > + - Where the slice payload starts in the output buffer. Useful when > > > > > > + operating in per frame decoding mode and decoding multi-slice content. > > > > > > + In this case, the output buffer will contain more than one slice and > > > > > > + some codecs need to know where each slice starts. Note that this > > > > > > + offsets points to the beginning of the slice which is supposed to > > > > > > + contain an ANNEX B start code > > > > > > * - __u32 > > > > > > - ``header_bit_size`` > > > > > > - > > > > > > @@ -1931,7 +1939,10 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > > > > - > > > > > > * - __u16 > > > > > > - ``num_slices`` > > > > > > - - Number of slices needed to decode the current frame > > > > > > + - Number of slices needed to decode the current frame/field. When > > > > > > + operating in per-slice decoding mode (see > > > > > > + :c:type:`v4l2_mpeg_video_h264_decoding_mode`), this field > > > > > > + should always be set to one > > > > > > * - __u16 > > > > > > - ``nal_ref_idc`` > > > > > > - NAL reference ID value coming from the NAL Unit header > > > > > > @@ -2022,6 +2033,35 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > > > > - 0x00000004 > > > > > > - The DPB entry is a long term reference frame > > > > > > > > > > > > +``V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE (enum)`` > > > > > > + Specifies the decoding mode to use. Currently exposes per slice and per > > > > > > + frame decoding but new modes might be added later on. > > > > > > + > > > > > > + .. note:: > > > > > > + > > > > > > + This menu control is not yet part of the public kernel API and > > > > > > + it is expected to change. > > > > > > + > > > > > > +.. c:type:: v4l2_mpeg_video_h264_decoding_mode > > > > > > + > > > > > > +.. cssclass:: longtable > > > > > > + > > > > > > +.. flat-table:: > > > > > > + :header-rows: 0 > > > > > > + :stub-columns: 0 > > > > > > + :widths: 1 1 2 > > > > > > + > > > > > > + * - ``V4L2_MPEG_VIDEO_H264_DECODING_PER_SLICE`` > > > > > > + - 0 > > > > > > + - The decoding is done per slice. v4l2_ctrl_h264_decode_params->num_slices > > > > > > + must be set to 1 and the output buffer should contain only one slice. > > > > > > > > > > I wonder if we need to be that strict. Wouldn't it be possible for > > > > > drivers to just iterate over a number of slices and decode each in turn > > > > > if userspace passed more than one? > > > > > > > > > > Or perhaps a decoder can batch queue a couple of slices. I'm not sure > > > > > how frequent this is, but consider something like a spike in activity on > > > > > your system, causing some slices to get delayed so you actually get a > > > > > few buffered up before you get a chance to hand them to the V4L2 device. > > > > > Processing them all at once may help conceal the lag. > > > > > > > > Hm, so we'd be in some kind of slice-batch mode, which means we could > > > > trigger a decode operation with more than one slice, but not > > > > necessarily all the slices needed to decode a frame. TBH, supporting > > > > per-frame (or the batch mode you suggest) on a HW that supports > > > > per-slice decoding should be pretty simple and has not real impact on > > > > perfs (as you said, it's just a matter of iterating over all the slices > > > > attached to a decode operation), so I'm fine relaxing the rule here and > > > > patching the cedrus driver accordingly (I can't really test the > > > > changes though). Paul, Maxime, what's your opinion? So perhaps we could just allow passing any number of slices with each request within a frame boundary and have userspace set the V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF flag accordingly. I'm not totally sure we need a batching method beyond that, just submitting requests with groups of slices and the same destination buffer should work out fine. We can leave it up to the drivers to handle multiple slices submitted at once. > > > We could perhaps have a test program to orchestrate such a scenario. I > > > think the assumption should obviously still be that we don't cross the > > > frame boundary using slices in one batch. > > > > We should definitely forbid mixing slices of different frames in the > > same decode operation, since each decode operation is targeting a > > single capture buffer. Agreed. > > > Just that if a frame was made > > > up of, say, 4 slices and you first pass 3 slices, then 1, that it'd be > > > nice if the driver would be able to cope with that. > > > > Yep, that makes sense. > > > > > It's something that > > > could probably even be implemented in the framework as a helper, though > > > I suspect it'd be just a couple of lines of extra code to wrap a loop > > > around everything. > > > > I also thought about providing generic wrappers, both for this case and > > the per-slice -> per-frame case (this one would be a bit more > > complicated as it implies queuing slices in a bounce buffer and > > triggering the decode operation only when we have all slices of a > > frame). > > I like deferring the addition of that kind of helper until a clear > pattern emerges out of the drivers that need this, just because that > gives us real examples on which to model those helpers. But yeah, I > think it should be possible to have helpers for these for most cases. So it seems that we need some convenient way to iterate over each slice and configure registers accordingly. Perhaps we could have some common per-codec helpers with callbacks to set the common controls and iterate over the per-slice controls. That would help keep our drivers tidy and understandable, and maybe we could have start/stop steps too, pretty much like we do in cedrus. What do you think? Cheers, Paul
Hi, Le lundi 03 juin 2019 à 13:09 +0200, Boris Brezillon a écrit : > Some stateless decoders don't support per-slice decoding (or at least > not in a way that would make them efficient or easy to use). > Let's expose a menu to control and expose the supported decoding modes. > Drivers are allowed to support only one decoding but they can support > both too. > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > --- > .../media/uapi/v4l/ext-ctrls-codec.rst | 42 ++++++++++++++++++- > drivers/media/v4l2-core/v4l2-ctrls.c | 9 ++++ > include/media/h264-ctrls.h | 13 ++++++ > 3 files changed, 63 insertions(+), 1 deletion(-) > > diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > index 82547d5de250..188f625acb7c 100644 > --- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > +++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > @@ -1748,6 +1748,14 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > * - __u32 > - ``size`` > - > + * - __u32 > + - ``start_byte_offset`` > + - Where the slice payload starts in the output buffer. Useful when > + operating in per frame decoding mode and decoding multi-slice content. > + In this case, the output buffer will contain more than one slice and > + some codecs need to know where each slice starts. Note that this > + offsets points to the beginning of the slice which is supposed to > + contain an ANNEX B start code Looks good, maybe add a terminating dot. > * - __u32 > - ``header_bit_size`` > - > @@ -1931,7 +1939,10 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > - > * - __u16 > - ``num_slices`` > - - Number of slices needed to decode the current frame > + - Number of slices needed to decode the current frame/field. When > + operating in per-slice decoding mode (see > + :c:type:`v4l2_mpeg_video_h264_decoding_mode`), this field > + should always be set to one So maybe we should allow an arbitrary number of slices unless only per- frame decoding is selected and we need all the slices at once. > * - __u16 > - ``nal_ref_idc`` > - NAL reference ID value coming from the NAL Unit header > @@ -2022,6 +2033,35 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > - 0x00000004 > - The DPB entry is a long term reference frame > > +``V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE (enum)`` > + Specifies the decoding mode to use. Currently exposes per slice and per > + frame decoding but new modes might be added later on. I think it definitey makes sense to have this per-codec. > + > + .. note:: > + > + This menu control is not yet part of the public kernel API and > + it is expected to change. > + > +.. c:type:: v4l2_mpeg_video_h264_decoding_mode > + > +.. cssclass:: longtable > + > +.. flat-table:: > + :header-rows: 0 > + :stub-columns: 0 > + :widths: 1 1 2 > + > + * - ``V4L2_MPEG_VIDEO_H264_DECODING_PER_SLICE`` > + - 0 > + - The decoding is done per slice. v4l2_ctrl_h264_decode_params->num_slices > + must be set to 1 and the output buffer should contain only one slice. See above about having arbitrary numbers of slices (within frame boundary). > + * - ``V4L2_MPEG_VIDEO_H264_DECODING_PER_FRAME`` > + - 1 > + - The decoding is done per frame. v4l2_ctrl_h264_decode_params->num_slices > + can be > 1. When that happens, the output buffer should contain all > + slices needed to decode a frame/field, each slice being prefixed by an > + Annex B NAL header/start-code. Looks good! > + > .. _v4l2-mpeg-mpeg2: > > ``V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS (struct)`` > diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c > index 1217d38ea394..72bb3c8882f5 100644 > --- a/drivers/media/v4l2-core/v4l2-ctrls.c > +++ b/drivers/media/v4l2-core/v4l2-ctrls.c > @@ -406,6 +406,11 @@ const char * const *v4l2_ctrl_get_menu(u32 id) > "Explicit", > NULL, > }; > + static const char * const h264_decoding_mode[] = { > + "Per Slice", > + "Per Frame", > + NULL, > + }; > static const char * const mpeg_mpeg2_level[] = { > "Low", > "Main", > @@ -637,6 +642,8 @@ const char * const *v4l2_ctrl_get_menu(u32 id) > return h264_fp_arrangement_type; > case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE: > return h264_fmo_map_type; > + case V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE: > + return h264_decoding_mode; > case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL: > return mpeg_mpeg2_level; > case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE: > @@ -856,6 +863,7 @@ const char *v4l2_ctrl_get_name(u32 id) > case V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX: return "H264 Scaling Matrix"; > case V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS: return "H264 Slice Parameters"; > case V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS: return "H264 Decode Parameters"; > + case V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE: return "H264 Decoding Mode"; > case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL: return "MPEG2 Level"; > case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE: return "MPEG2 Profile"; > case V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP: return "MPEG4 I-Frame QP Value"; > @@ -1224,6 +1232,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, > case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC: > case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE: > case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE: > + case V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE: > case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL: > case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE: > case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL: > diff --git a/include/media/h264-ctrls.h b/include/media/h264-ctrls.h > index e1404d78d6ff..26de2243f6f5 100644 > --- a/include/media/h264-ctrls.h > +++ b/include/media/h264-ctrls.h > @@ -26,6 +26,7 @@ > #define V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX (V4L2_CID_MPEG_BASE+1002) > #define V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS (V4L2_CID_MPEG_BASE+1003) > #define V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS (V4L2_CID_MPEG_BASE+1004) > +#define V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE (V4L2_CID_MPEG_BASE+1005) > > /* enum v4l2_ctrl_type type values */ > #define V4L2_CTRL_TYPE_H264_SPS 0x0110 > @@ -33,6 +34,12 @@ > #define V4L2_CTRL_TYPE_H264_SCALING_MATRIX 0x0112 > #define V4L2_CTRL_TYPE_H264_SLICE_PARAMS 0x0113 > #define V4L2_CTRL_TYPE_H264_DECODE_PARAMS 0x0114 > +#define V4L2_CTRL_TYPE_H264_DECODING_MODE 0x0115 > + > +enum v4l2_mpeg_video_h264_decoding_mode { > + V4L2_MPEG_VIDEO_H264_DECODING_PER_SLICE, > + V4L2_MPEG_VIDEO_H264_DECODING_PER_FRAME, > +}; > > #define V4L2_H264_SPS_CONSTRAINT_SET0_FLAG 0x01 > #define V4L2_H264_SPS_CONSTRAINT_SET1_FLAG 0x02 > @@ -111,6 +118,8 @@ struct v4l2_h264_pred_weight_table { > struct v4l2_h264_weight_factors weight_factors[2]; > }; > > +#define V4L2_H264_MAX_SLICES_PER_FRAME 16 > + > #define V4L2_H264_SLICE_TYPE_P 0 > #define V4L2_H264_SLICE_TYPE_B 1 > #define V4L2_H264_SLICE_TYPE_I 2 > @@ -125,6 +134,10 @@ struct v4l2_h264_pred_weight_table { > struct v4l2_ctrl_h264_slice_params { > /* Size in bytes, including header */ > __u32 size; > + > + /* Where the slice starts in the output buffer (expressed in bytes). */ Maybe call it "beginning of the slice" or adapt the following comment to use the same terminology. Looks good otherwise, thanks! Cheers, Paul > + __u32 start_byte_offset; > + > /* Offset in bits to slice_data() from the beginning of this slice. */ > __u32 header_bit_size; >
On Wed, 05 Jun 2019 22:48:08 +0200 Paul Kocialkowski <paul.kocialkowski@bootlin.com> wrote: > Hi, > > Le mardi 04 juin 2019 à 10:16 +0200, Thierry Reding a écrit : > > On Mon, Jun 03, 2019 at 05:37:11PM +0200, Boris Brezillon wrote: > > > On Mon, 3 Jun 2019 16:05:26 +0200 > > > Thierry Reding <thierry.reding@gmail.com> wrote: > > > > > > > On Mon, Jun 03, 2019 at 02:51:13PM +0200, Boris Brezillon wrote: > > > > > +Maxime > > > > > > > > > > Oops, just realized Maxime was not Cc-ed on this series. > > > > > > > > > > On Mon, 3 Jun 2019 14:30:20 +0200 > > > > > Thierry Reding <thierry.reding@gmail.com> wrote: > > > > > > > > > > > On Mon, Jun 03, 2019 at 01:09:42PM +0200, Boris Brezillon wrote: > > > > > > > Some stateless decoders don't support per-slice decoding (or at least > > > > > > > not in a way that would make them efficient or easy to use). > > > > > > > Let's expose a menu to control and expose the supported decoding modes. > > > > > > > Drivers are allowed to support only one decoding but they can support > > > > > > > both too. > > > > > > > > > > > > > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > > > > > > > --- > > > > > > > .../media/uapi/v4l/ext-ctrls-codec.rst | 42 ++++++++++++++++++- > > > > > > > drivers/media/v4l2-core/v4l2-ctrls.c | 9 ++++ > > > > > > > include/media/h264-ctrls.h | 13 ++++++ > > > > > > > 3 files changed, 63 insertions(+), 1 deletion(-) > > > > > > > > > > > > > > diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > > > > > index 82547d5de250..188f625acb7c 100644 > > > > > > > --- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > > > > > +++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst > > > > > > > @@ -1748,6 +1748,14 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > > > > > * - __u32 > > > > > > > - ``size`` > > > > > > > - > > > > > > > + * - __u32 > > > > > > > + - ``start_byte_offset`` > > > > > > > + - Where the slice payload starts in the output buffer. Useful when > > > > > > > + operating in per frame decoding mode and decoding multi-slice content. > > > > > > > + In this case, the output buffer will contain more than one slice and > > > > > > > + some codecs need to know where each slice starts. Note that this > > > > > > > + offsets points to the beginning of the slice which is supposed to > > > > > > > + contain an ANNEX B start code > > > > > > > * - __u32 > > > > > > > - ``header_bit_size`` > > > > > > > - > > > > > > > @@ -1931,7 +1939,10 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > > > > > - > > > > > > > * - __u16 > > > > > > > - ``num_slices`` > > > > > > > - - Number of slices needed to decode the current frame > > > > > > > + - Number of slices needed to decode the current frame/field. When > > > > > > > + operating in per-slice decoding mode (see > > > > > > > + :c:type:`v4l2_mpeg_video_h264_decoding_mode`), this field > > > > > > > + should always be set to one > > > > > > > * - __u16 > > > > > > > - ``nal_ref_idc`` > > > > > > > - NAL reference ID value coming from the NAL Unit header > > > > > > > @@ -2022,6 +2033,35 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - > > > > > > > - 0x00000004 > > > > > > > - The DPB entry is a long term reference frame > > > > > > > > > > > > > > +``V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE (enum)`` > > > > > > > + Specifies the decoding mode to use. Currently exposes per slice and per > > > > > > > + frame decoding but new modes might be added later on. > > > > > > > + > > > > > > > + .. note:: > > > > > > > + > > > > > > > + This menu control is not yet part of the public kernel API and > > > > > > > + it is expected to change. > > > > > > > + > > > > > > > +.. c:type:: v4l2_mpeg_video_h264_decoding_mode > > > > > > > + > > > > > > > +.. cssclass:: longtable > > > > > > > + > > > > > > > +.. flat-table:: > > > > > > > + :header-rows: 0 > > > > > > > + :stub-columns: 0 > > > > > > > + :widths: 1 1 2 > > > > > > > + > > > > > > > + * - ``V4L2_MPEG_VIDEO_H264_DECODING_PER_SLICE`` > > > > > > > + - 0 > > > > > > > + - The decoding is done per slice. v4l2_ctrl_h264_decode_params->num_slices > > > > > > > + must be set to 1 and the output buffer should contain only one slice. > > > > > > > > > > > > I wonder if we need to be that strict. Wouldn't it be possible for > > > > > > drivers to just iterate over a number of slices and decode each in turn > > > > > > if userspace passed more than one? > > > > > > > > > > > > Or perhaps a decoder can batch queue a couple of slices. I'm not sure > > > > > > how frequent this is, but consider something like a spike in activity on > > > > > > your system, causing some slices to get delayed so you actually get a > > > > > > few buffered up before you get a chance to hand them to the V4L2 device. > > > > > > Processing them all at once may help conceal the lag. > > > > > > > > > > Hm, so we'd be in some kind of slice-batch mode, which means we could > > > > > trigger a decode operation with more than one slice, but not > > > > > necessarily all the slices needed to decode a frame. TBH, supporting > > > > > per-frame (or the batch mode you suggest) on a HW that supports > > > > > per-slice decoding should be pretty simple and has not real impact on > > > > > perfs (as you said, it's just a matter of iterating over all the slices > > > > > attached to a decode operation), so I'm fine relaxing the rule here and > > > > > patching the cedrus driver accordingly (I can't really test the > > > > > changes though). Paul, Maxime, what's your opinion? > > So perhaps we could just allow passing any number of slices with each > request within a frame boundary and have userspace set the > V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF flag accordingly. > > I'm not totally sure we need a batching method beyond that, just > submitting requests with groups of slices and the same destination > buffer should work out fine. We can leave it up to the drivers to > handle multiple slices submitted at once. The batching mode we're talking about is exactly that: we let drivers collect slices and send them through a single decode operation. > > > > > We could perhaps have a test program to orchestrate such a scenario. I > > > > think the assumption should obviously still be that we don't cross the > > > > frame boundary using slices in one batch. > > > > > > We should definitely forbid mixing slices of different frames in the > > > same decode operation, since each decode operation is targeting a > > > single capture buffer. > > Agreed. > > > > > Just that if a frame was made > > > > up of, say, 4 slices and you first pass 3 slices, then 1, that it'd be > > > > nice if the driver would be able to cope with that. > > > > > > Yep, that makes sense. > > > > > > > It's something that > > > > could probably even be implemented in the framework as a helper, though > > > > I suspect it'd be just a couple of lines of extra code to wrap a loop > > > > around everything. > > > > > > I also thought about providing generic wrappers, both for this case and > > > the per-slice -> per-frame case (this one would be a bit more > > > complicated as it implies queuing slices in a bounce buffer and > > > triggering the decode operation only when we have all slices of a > > > frame). > > > > I like deferring the addition of that kind of helper until a clear > > pattern emerges out of the drivers that need this, just because that > > gives us real examples on which to model those helpers. But yeah, I > > think it should be possible to have helpers for these for most cases. > > So it seems that we need some convenient way to iterate over each slice > and configure registers accordingly. Perhaps we could have some common > per-codec helpers with callbacks to set the common controls and iterate > over the per-slice controls. Yep, I'm working on extracting common bits out of cedrus/hantro drivers, that new helper (to iterate over all slices) can be part of this series. > > That would help keep our drivers tidy and understandable, and maybe we > could have start/stop steps too, pretty much like we do in cedrus. > > What do you think? Makes total sense, and I'm all for having generic code provided as vb2-m2m/codec helpers.
diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst index 82547d5de250..188f625acb7c 100644 --- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst +++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst @@ -1748,6 +1748,14 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - * - __u32 - ``size`` - + * - __u32 + - ``start_byte_offset`` + - Where the slice payload starts in the output buffer. Useful when + operating in per frame decoding mode and decoding multi-slice content. + In this case, the output buffer will contain more than one slice and + some codecs need to know where each slice starts. Note that this + offsets points to the beginning of the slice which is supposed to + contain an ANNEX B start code * - __u32 - ``header_bit_size`` - @@ -1931,7 +1939,10 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - - * - __u16 - ``num_slices`` - - Number of slices needed to decode the current frame + - Number of slices needed to decode the current frame/field. When + operating in per-slice decoding mode (see + :c:type:`v4l2_mpeg_video_h264_decoding_mode`), this field + should always be set to one * - __u16 - ``nal_ref_idc`` - NAL reference ID value coming from the NAL Unit header @@ -2022,6 +2033,35 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - - 0x00000004 - The DPB entry is a long term reference frame +``V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE (enum)`` + Specifies the decoding mode to use. Currently exposes per slice and per + frame decoding but new modes might be added later on. + + .. note:: + + This menu control is not yet part of the public kernel API and + it is expected to change. + +.. c:type:: v4l2_mpeg_video_h264_decoding_mode + +.. cssclass:: longtable + +.. flat-table:: + :header-rows: 0 + :stub-columns: 0 + :widths: 1 1 2 + + * - ``V4L2_MPEG_VIDEO_H264_DECODING_PER_SLICE`` + - 0 + - The decoding is done per slice. v4l2_ctrl_h264_decode_params->num_slices + must be set to 1 and the output buffer should contain only one slice. + * - ``V4L2_MPEG_VIDEO_H264_DECODING_PER_FRAME`` + - 1 + - The decoding is done per frame. v4l2_ctrl_h264_decode_params->num_slices + can be > 1. When that happens, the output buffer should contain all + slices needed to decode a frame/field, each slice being prefixed by an + Annex B NAL header/start-code. + .. _v4l2-mpeg-mpeg2: ``V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS (struct)`` diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index 1217d38ea394..72bb3c8882f5 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -406,6 +406,11 @@ const char * const *v4l2_ctrl_get_menu(u32 id) "Explicit", NULL, }; + static const char * const h264_decoding_mode[] = { + "Per Slice", + "Per Frame", + NULL, + }; static const char * const mpeg_mpeg2_level[] = { "Low", "Main", @@ -637,6 +642,8 @@ const char * const *v4l2_ctrl_get_menu(u32 id) return h264_fp_arrangement_type; case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE: return h264_fmo_map_type; + case V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE: + return h264_decoding_mode; case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL: return mpeg_mpeg2_level; case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE: @@ -856,6 +863,7 @@ const char *v4l2_ctrl_get_name(u32 id) case V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX: return "H264 Scaling Matrix"; case V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS: return "H264 Slice Parameters"; case V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS: return "H264 Decode Parameters"; + case V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE: return "H264 Decoding Mode"; case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL: return "MPEG2 Level"; case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE: return "MPEG2 Profile"; case V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP: return "MPEG4 I-Frame QP Value"; @@ -1224,6 +1232,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC: case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE: case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE: + case V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE: case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL: case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE: case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL: diff --git a/include/media/h264-ctrls.h b/include/media/h264-ctrls.h index e1404d78d6ff..26de2243f6f5 100644 --- a/include/media/h264-ctrls.h +++ b/include/media/h264-ctrls.h @@ -26,6 +26,7 @@ #define V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX (V4L2_CID_MPEG_BASE+1002) #define V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS (V4L2_CID_MPEG_BASE+1003) #define V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS (V4L2_CID_MPEG_BASE+1004) +#define V4L2_CID_MPEG_VIDEO_H264_DECODING_MODE (V4L2_CID_MPEG_BASE+1005) /* enum v4l2_ctrl_type type values */ #define V4L2_CTRL_TYPE_H264_SPS 0x0110 @@ -33,6 +34,12 @@ #define V4L2_CTRL_TYPE_H264_SCALING_MATRIX 0x0112 #define V4L2_CTRL_TYPE_H264_SLICE_PARAMS 0x0113 #define V4L2_CTRL_TYPE_H264_DECODE_PARAMS 0x0114 +#define V4L2_CTRL_TYPE_H264_DECODING_MODE 0x0115 + +enum v4l2_mpeg_video_h264_decoding_mode { + V4L2_MPEG_VIDEO_H264_DECODING_PER_SLICE, + V4L2_MPEG_VIDEO_H264_DECODING_PER_FRAME, +}; #define V4L2_H264_SPS_CONSTRAINT_SET0_FLAG 0x01 #define V4L2_H264_SPS_CONSTRAINT_SET1_FLAG 0x02 @@ -111,6 +118,8 @@ struct v4l2_h264_pred_weight_table { struct v4l2_h264_weight_factors weight_factors[2]; }; +#define V4L2_H264_MAX_SLICES_PER_FRAME 16 + #define V4L2_H264_SLICE_TYPE_P 0 #define V4L2_H264_SLICE_TYPE_B 1 #define V4L2_H264_SLICE_TYPE_I 2 @@ -125,6 +134,10 @@ struct v4l2_h264_pred_weight_table { struct v4l2_ctrl_h264_slice_params { /* Size in bytes, including header */ __u32 size; + + /* Where the slice starts in the output buffer (expressed in bytes). */ + __u32 start_byte_offset; + /* Offset in bits to slice_data() from the beginning of this slice. */ __u32 header_bit_size;
Some stateless decoders don't support per-slice decoding (or at least not in a way that would make them efficient or easy to use). Let's expose a menu to control and expose the supported decoding modes. Drivers are allowed to support only one decoding but they can support both too. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> --- .../media/uapi/v4l/ext-ctrls-codec.rst | 42 ++++++++++++++++++- drivers/media/v4l2-core/v4l2-ctrls.c | 9 ++++ include/media/h264-ctrls.h | 13 ++++++ 3 files changed, 63 insertions(+), 1 deletion(-)