diff mbox series

[RFC,v2,5/6] media: v4l2-ctrls: Add video roi ctrls

Message ID 20241018054448.3190423-6-ming.qian@nxp.com (mailing list archive)
State New, archived
Headers show
Series Add video encoder ROI ctrls | expand

Commit Message

Ming Qian Oct. 18, 2024, 5:44 a.m. UTC
Add some ctrls to support the video encoder ROI feature.
Support 2 encoder ROI configurations that are rectangular region and
QP map

Signed-off-by: Ming Qian <ming.qian@nxp.com>
Signed-off-by: TaoJiang <tao.jiang_2@nxp.com>
---
 .../media/v4l/ext-ctrls-codec.rst             | 73 +++++++++++++++++++
 drivers/media/v4l2-core/v4l2-ctrls-defs.c     | 29 ++++++++
 include/uapi/linux/v4l2-controls.h            | 11 +++
 3 files changed, 113 insertions(+)

Comments

Hans Verkuil Oct. 18, 2024, 6:27 a.m. UTC | #1
On 18/10/2024 07:44, Ming Qian wrote:
> Add some ctrls to support the video encoder ROI feature.
> Support 2 encoder ROI configurations that are rectangular region and
> QP map
> 
> Signed-off-by: Ming Qian <ming.qian@nxp.com>
> Signed-off-by: TaoJiang <tao.jiang_2@nxp.com>
> ---
>  .../media/v4l/ext-ctrls-codec.rst             | 73 +++++++++++++++++++
>  drivers/media/v4l2-core/v4l2-ctrls-defs.c     | 29 ++++++++
>  include/uapi/linux/v4l2-controls.h            | 11 +++
>  3 files changed, 113 insertions(+)
> 
> diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
> index 4a379bd9e3fb..6b972247778c 100644
> --- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
> +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
> @@ -1667,6 +1667,79 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
>      Codecs need to always use the specified range, rather then a HW custom range.
>      Applicable to encoders
>  
> +``V4L2_CID_MPEG_VIDEO_ROI_MODE``
> +    (enum)
> +
> +enum v4l2_mpeg_video_roi_mode -
> +    Video roi mode. Possible values are:
> +
> +
> +
> +.. flat-table::
> +    :header-rows:  0
> +    :stub-columns: 0
> +
> +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_NONE``
> +      - No ROI in the MPEG stream
> +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_RECT``
> +      - Rectangle ROI mode
> +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``
> +      - Map ROI mode
> +
> +``V4L2_CID_MPEG_VIDEO_ROI_RECT (struct)``
> +    Select rectangular regions and specify the QP offset. The
> +    struct :c:type:`v4l2_ctrl_video_region_param` provides the
> +    rectangular region and the parameter to describe QP offset.
> +    The maximum number of rectangular regions depends on the
> +    hardware.  This control is a dynamically sized array. This
> +    control is applicable when ``V4L2_CID_MPEG_VIDEO_ROI_MODE``
> +    value is ``V4L2_MPEG_VIDEO_ROI_MODE_RECT``. Applicable to
> +    encoders.
> +
> +.. c:type:: v4l2_ctrl_video_region_param
> +
> +.. raw:: latex
> +
> +    \small
> +
> +.. tabularcolumns:: |p{4.0cm}|p{4.0cm}|p{4.0cm}|
> +
> +.. flat-table:: struct v4l2_ctrl_video_region_param
> +    :header-rows:  0
> +    :stub-columns: 0
> +    :widths:       1 1 1
> +
> +    * - struct :c:type:`v4l2_rect`
> +      - ``rect``
> +      - The rectangular region

What is the unit? I assume pixels. And inside what larger area is this
rectangle located? It probably needs to refer to one of the SEL_TGT targets as
described here:

https://hverkuil.home.xs4all.nl/spec/userspace-api/v4l/v4l2-selection-targets.html

> +    * - __s32
> +      - ``parameter``
> +      -

So what is the parameter? It has no description.

> +    * - __u32
> +      - ``reserved[2]``
> +      -

Add "Applications and drivers must set this to zero."

> +
> +.. raw:: latex
> +
> +    \normalsize
> +
> +``V4L2_CID_MPEG_VIDEO_ROI_MAP (integer)``
> +    Specifies the QP offset for each block. This control is a
> +    dynamically sized array. The array size can be calculated
> +    from video resolution and the roi map block size which can
> +    be got from ``V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE``. This
> +    control is applicable when ``V4L2_CID_MPEG_VIDEO_ROI_MODE``
> +    value is ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``. Applicable to
> +    encoders.
> +
> +``V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE (struct)``
> +    This control returns the roi block size in pixels. The struct
> +    :c:type:`v4l2_area` provides the width and height in separate
> +    fields. This control is applicable when
> +    ``V4L2_CID_MPEG_VIDEO_ROI_MODE`` value is
> +    ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``. This control depends on the
> +    encoding format. Applicable to encoders.
> +
>  .. raw:: latex
>  
>      \normalsize
> diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> index 1ea52011247a..54219a3b215a 100644
> --- a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> +++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> @@ -612,6 +612,13 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
>  		NULL,
>  	};
>  
> +	static const char * const mpeg_video_roi_mode[] = {
> +		"None",
> +		"Rectangle",
> +		"Map",
> +		NULL,
> +	};
> +
>  	switch (id) {
>  	case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ:
>  		return mpeg_audio_sampling_freq;
> @@ -750,6 +757,8 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
>  		return camera_orientation;
>  	case V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE:
>  		return intra_refresh_period_type;
> +	case V4L2_CID_MPEG_VIDEO_ROI_MODE:
> +		return mpeg_video_roi_mode;
>  	default:
>  		return NULL;
>  	}
> @@ -971,6 +980,10 @@ const char *v4l2_ctrl_get_name(u32 id)
>  	case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX:		return "Frame LTR Index";
>  	case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES:		return "Use LTR Frames";
>  	case V4L2_CID_MPEG_VIDEO_AVERAGE_QP:			return "Average QP Value";
> +	case V4L2_CID_MPEG_VIDEO_ROI_MODE:			return "Video ROI Mode";
> +	case V4L2_CID_MPEG_VIDEO_ROI_RECT:			return "Video ROI Rectangle";
> +	case V4L2_CID_MPEG_VIDEO_ROI_MAP:			return "Video ROI Map";
> +	case V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE:		return "Video ROI Map Block Size";
>  	case V4L2_CID_FWHT_I_FRAME_QP:				return "FWHT I-Frame QP Value";
>  	case V4L2_CID_FWHT_P_FRAME_QP:				return "FWHT P-Frame QP Value";
>  
> @@ -1512,6 +1525,22 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
>  		*type = V4L2_CTRL_TYPE_INTEGER;
>  		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
>  		break;
> +	case V4L2_CID_MPEG_VIDEO_ROI_MODE:
> +		*type = V4L2_CTRL_TYPE_MENU;
> +		*flags |= V4L2_CTRL_FLAG_UPDATE;
> +		break;
> +	case V4L2_CID_MPEG_VIDEO_ROI_RECT:
> +		*type =	V4L2_CTRL_TYPE_REGION;
> +		*flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY | V4L2_CTRL_FLAG_HAS_WHICH_MIN_MAX;
> +		break;
> +	case V4L2_CID_MPEG_VIDEO_ROI_MAP:
> +		*type = V4L2_CTRL_TYPE_INTEGER;
> +		*flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY | V4L2_CTRL_FLAG_HAS_WHICH_MIN_MAX;
> +		break;
> +	case V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE:
> +		*type = V4L2_CTRL_TYPE_AREA;
> +		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
> +		break;
>  	case V4L2_CID_PIXEL_RATE:
>  		*type = V4L2_CTRL_TYPE_INTEGER64;
>  		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
> diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
> index 974fd254e573..169a676fd64c 100644
> --- a/include/uapi/linux/v4l2-controls.h
> +++ b/include/uapi/linux/v4l2-controls.h
> @@ -900,6 +900,17 @@ enum v4l2_mpeg_video_av1_level {
>  
>  #define V4L2_CID_MPEG_VIDEO_AVERAGE_QP  (V4L2_CID_CODEC_BASE + 657)
>  
> +enum v4l2_mpeg_video_roi_mode {
> +	V4L2_MPEG_VIDEO_ROI_MODE_NONE,
> +	V4L2_MPEG_VIDEO_ROI_MODE_RECT,
> +	V4L2_MPEG_VIDEO_ROI_MODE_MAP
> +};
> +
> +#define V4L2_CID_MPEG_VIDEO_ROI_MODE		(V4L2_CID_CODEC_BASE + 658)
> +#define V4L2_CID_MPEG_VIDEO_ROI_RECT		(V4L2_CID_CODEC_BASE + 659)
> +#define V4L2_CID_MPEG_VIDEO_ROI_MAP		(V4L2_CID_CODEC_BASE + 660)
> +#define V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE	(V4L2_CID_CODEC_BASE + 661)
> +
>  /*  MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */
>  #define V4L2_CID_CODEC_CX2341X_BASE				(V4L2_CTRL_CLASS_CODEC | 0x1000)
>  #define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE		(V4L2_CID_CODEC_CX2341X_BASE+0)
Ming Qian Oct. 18, 2024, 8:20 a.m. UTC | #2
Hi Hans,

>-----Original Message-----
>From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
>Sent: Friday, October 18, 2024 2:28 PM
>To: Ming Qian <ming.qian@nxp.com>; mchehab@kernel.org
>Cc: yunkec@google.com; nicolas@ndufresne.ca; s.hauer@pengutronix.de;
>kernel@pengutronix.de; festevam@gmail.com; dl-linux-imx <linux-
>imx@nxp.com>; X.H. Bao <xiahong.bao@nxp.com>; Ming Zhou
><ming.zhou@nxp.com>; Eagle Zhou <eagle.zhou@nxp.com>; Tao Jiang
><tao.jiang_2@nxp.com>; Ming Qian (OSS) <ming.qian@oss.nxp.com>;
>imx@lists.linux.dev; linux-media@vger.kernel.org; linux-
>kernel@vger.kernel.org; linux-arm-kernel@lists.infradead.org
>Subject: [EXT] Re: [RFC v2 5/6] media: v4l2-ctrls: Add video roi ctrls
>
>Caution: This is an external email. Please take care when clicking links or
>opening attachments. When in doubt, report the message using the 'Report
>this email' button
>
>
>On 18/10/2024 07:44, Ming Qian wrote:
>> Add some ctrls to support the video encoder ROI feature.
>> Support 2 encoder ROI configurations that are rectangular region and
>> QP map
>>
>> Signed-off-by: Ming Qian <ming.qian@nxp.com>
>> Signed-off-by: TaoJiang <tao.jiang_2@nxp.com>
>> ---
>>  .../media/v4l/ext-ctrls-codec.rst             | 73 +++++++++++++++++++
>>  drivers/media/v4l2-core/v4l2-ctrls-defs.c     | 29 ++++++++
>>  include/uapi/linux/v4l2-controls.h            | 11 +++
>>  3 files changed, 113 insertions(+)
>>
>> diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
>> b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
>> index 4a379bd9e3fb..6b972247778c 100644
>> --- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
>> +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
>> @@ -1667,6 +1667,79 @@ enum
>v4l2_mpeg_video_h264_hierarchical_coding_type -
>>      Codecs need to always use the specified range, rather then a HW custom
>range.
>>      Applicable to encoders
>>
>> +``V4L2_CID_MPEG_VIDEO_ROI_MODE``
>> +    (enum)
>> +
>> +enum v4l2_mpeg_video_roi_mode -
>> +    Video roi mode. Possible values are:
>> +
>> +
>> +
>> +.. flat-table::
>> +    :header-rows:  0
>> +    :stub-columns: 0
>> +
>> +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_NONE``
>> +      - No ROI in the MPEG stream
>> +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_RECT``
>> +      - Rectangle ROI mode
>> +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``
>> +      - Map ROI mode
>> +
>> +``V4L2_CID_MPEG_VIDEO_ROI_RECT (struct)``
>> +    Select rectangular regions and specify the QP offset. The
>> +    struct :c:type:`v4l2_ctrl_video_region_param` provides the
>> +    rectangular region and the parameter to describe QP offset.
>> +    The maximum number of rectangular regions depends on the
>> +    hardware.  This control is a dynamically sized array. This
>> +    control is applicable when ``V4L2_CID_MPEG_VIDEO_ROI_MODE``
>> +    value is ``V4L2_MPEG_VIDEO_ROI_MODE_RECT``. Applicable to
>> +    encoders.
>> +
>> +.. c:type:: v4l2_ctrl_video_region_param
>> +
>> +.. raw:: latex
>> +
>> +    \small
>> +
>> +.. tabularcolumns:: |p{4.0cm}|p{4.0cm}|p{4.0cm}|
>> +
>> +.. flat-table:: struct v4l2_ctrl_video_region_param
>> +    :header-rows:  0
>> +    :stub-columns: 0
>> +    :widths:       1 1 1
>> +
>> +    * - struct :c:type:`v4l2_rect`
>> +      - ``rect``
>> +      - The rectangular region
>
>What is the unit? I assume pixels. And inside what larger area is this rectangle
>located? It probably needs to refer to one of the SEL_TGT targets as described
>here:
>
>https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fhverkuil.
>home.xs4all.nl%2Fspec%2Fuserspace-api%2Fv4l%2Fv4l2-selection-
>targets.html&data=05%7C02%7Cming.qian%40nxp.com%7Cfe9348ba24504eb
>d98f608dcef3dffcf%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C63
>8648296786960098%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAi
>LCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata
>=cTXaNWLZs4l6IytSu9TWmEb7OyvF4viby9IjpOJXvmE%3D&reserved=0
>

We want to use pixels as the unit, but for some detailed encoder, there
may be alignment constraints, and the rectangular area should be inside
the encoded picture size, for example, we encode a 720P H.264 stream,
the largest area is 1280x720@(0,0). This doesn't involve scaling up or
down. I'm not sure if it's possible to align to crop or compose.

Currently, we want to choose an area and increase or decrease the image
quality. so we want to use a parameter to set the qp offset.

>> +    * - __s32
>> +      - ``parameter``
>> +      -
>
>So what is the parameter? It has no description.
>

I newly add a ctrl type V4L2_CTRL_TYPE_REGION, and this struct is
related to the type, so I thought I need to define a general argument to
meet different needs, then this type can support a series of controls.
For this patch, it's qp offset.
I thought if I name it as qp_offset, the ctrl type can't be used on
other similar controls.
Is it better to rename it or add more description and keep the name?

>> +    * - __u32
>> +      - ``reserved[2]``
>> +      -
>
>Add "Applications and drivers must set this to zero."
>

Yes, I missed it

>> +
>> +.. raw:: latex
>> +
>> +    \normalsize
>> +
>> +``V4L2_CID_MPEG_VIDEO_ROI_MAP (integer)``
>> +    Specifies the QP offset for each block. This control is a
>> +    dynamically sized array. The array size can be calculated
>> +    from video resolution and the roi map block size which can
>> +    be got from ``V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE``. This
>> +    control is applicable when ``V4L2_CID_MPEG_VIDEO_ROI_MODE``
>> +    value is ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``. Applicable to
>> +    encoders.
>> +
>> +``V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE (struct)``
>> +    This control returns the roi block size in pixels. The struct
>> +    :c:type:`v4l2_area` provides the width and height in separate
>> +    fields. This control is applicable when
>> +    ``V4L2_CID_MPEG_VIDEO_ROI_MODE`` value is
>> +    ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``. This control depends on the
>> +    encoding format. Applicable to encoders.
>> +
>>  .. raw:: latex
>>
>>      \normalsize
>> diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>> b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>> index 1ea52011247a..54219a3b215a 100644
>> --- a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>> +++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>> @@ -612,6 +612,13 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
>>               NULL,
>>       };
>>
>> +     static const char * const mpeg_video_roi_mode[] = {
>> +             "None",
>> +             "Rectangle",
>> +             "Map",
>> +             NULL,
>> +     };
>> +
>>       switch (id) {
>>       case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ:
>>               return mpeg_audio_sampling_freq; @@ -750,6 +757,8 @@
>> const char * const *v4l2_ctrl_get_menu(u32 id)
>>               return camera_orientation;
>>       case V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE:
>>               return intra_refresh_period_type;
>> +     case V4L2_CID_MPEG_VIDEO_ROI_MODE:
>> +             return mpeg_video_roi_mode;
>>       default:
>>               return NULL;
>>       }
>> @@ -971,6 +980,10 @@ const char *v4l2_ctrl_get_name(u32 id)
>>       case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX:               return "Frame
>LTR Index";
>>       case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES:                return "Use LTR
>Frames";
>>       case V4L2_CID_MPEG_VIDEO_AVERAGE_QP:                    return "Average
>QP Value";
>> +     case V4L2_CID_MPEG_VIDEO_ROI_MODE:                      return "Video ROI
>Mode";
>> +     case V4L2_CID_MPEG_VIDEO_ROI_RECT:                      return "Video ROI
>Rectangle";
>> +     case V4L2_CID_MPEG_VIDEO_ROI_MAP:                       return "Video ROI
>Map";
>> +     case V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE:            return
>"Video ROI Map Block Size";
>>       case V4L2_CID_FWHT_I_FRAME_QP:                          return "FWHT I-Frame
>QP Value";
>>       case V4L2_CID_FWHT_P_FRAME_QP:                          return "FWHT P-
>Frame QP Value";
>>
>> @@ -1512,6 +1525,22 @@ void v4l2_ctrl_fill(u32 id, const char **name,
>enum v4l2_ctrl_type *type,
>>               *type = V4L2_CTRL_TYPE_INTEGER;
>>               *flags |= V4L2_CTRL_FLAG_READ_ONLY;
>>               break;
>> +     case V4L2_CID_MPEG_VIDEO_ROI_MODE:
>> +             *type = V4L2_CTRL_TYPE_MENU;
>> +             *flags |= V4L2_CTRL_FLAG_UPDATE;
>> +             break;
>> +     case V4L2_CID_MPEG_VIDEO_ROI_RECT:
>> +             *type = V4L2_CTRL_TYPE_REGION;
>> +             *flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY |
>V4L2_CTRL_FLAG_HAS_WHICH_MIN_MAX;
>> +             break;
>> +     case V4L2_CID_MPEG_VIDEO_ROI_MAP:
>> +             *type = V4L2_CTRL_TYPE_INTEGER;
>> +             *flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY |
>V4L2_CTRL_FLAG_HAS_WHICH_MIN_MAX;
>> +             break;
>> +     case V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE:
>> +             *type = V4L2_CTRL_TYPE_AREA;
>> +             *flags |= V4L2_CTRL_FLAG_READ_ONLY;
>> +             break;
>>       case V4L2_CID_PIXEL_RATE:
>>               *type = V4L2_CTRL_TYPE_INTEGER64;
>>               *flags |= V4L2_CTRL_FLAG_READ_ONLY; diff --git
>> a/include/uapi/linux/v4l2-controls.h
>> b/include/uapi/linux/v4l2-controls.h
>> index 974fd254e573..169a676fd64c 100644
>> --- a/include/uapi/linux/v4l2-controls.h
>> +++ b/include/uapi/linux/v4l2-controls.h
>> @@ -900,6 +900,17 @@ enum v4l2_mpeg_video_av1_level {
>>
>>  #define V4L2_CID_MPEG_VIDEO_AVERAGE_QP  (V4L2_CID_CODEC_BASE +
>657)
>>
>> +enum v4l2_mpeg_video_roi_mode {
>> +     V4L2_MPEG_VIDEO_ROI_MODE_NONE,
>> +     V4L2_MPEG_VIDEO_ROI_MODE_RECT,
>> +     V4L2_MPEG_VIDEO_ROI_MODE_MAP
>> +};
>> +
>> +#define V4L2_CID_MPEG_VIDEO_ROI_MODE         (V4L2_CID_CODEC_BASE
>+ 658)
>> +#define V4L2_CID_MPEG_VIDEO_ROI_RECT         (V4L2_CID_CODEC_BASE +
>659)
>> +#define V4L2_CID_MPEG_VIDEO_ROI_MAP          (V4L2_CID_CODEC_BASE +
>660)
>> +#define V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE
>(V4L2_CID_CODEC_BASE + 661)
>> +
>>  /*  MPEG-class control IDs specific to the CX2341x driver as defined by V4L2
>*/
>>  #define V4L2_CID_CODEC_CX2341X_BASE
>(V4L2_CTRL_CLASS_CODEC | 0x1000)
>>  #define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE
>(V4L2_CID_CODEC_CX2341X_BASE+0)
Hans Verkuil Oct. 18, 2024, 9:54 a.m. UTC | #3
Hi Ming Qian,

On 18/10/2024 10:20, Ming Qian wrote:
> Hi Hans,
> 
>> -----Original Message-----
>> From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
>> Sent: Friday, October 18, 2024 2:28 PM
>> To: Ming Qian <ming.qian@nxp.com>; mchehab@kernel.org
>> Cc: yunkec@google.com; nicolas@ndufresne.ca; s.hauer@pengutronix.de;
>> kernel@pengutronix.de; festevam@gmail.com; dl-linux-imx <linux-
>> imx@nxp.com>; X.H. Bao <xiahong.bao@nxp.com>; Ming Zhou
>> <ming.zhou@nxp.com>; Eagle Zhou <eagle.zhou@nxp.com>; Tao Jiang
>> <tao.jiang_2@nxp.com>; Ming Qian (OSS) <ming.qian@oss.nxp.com>;
>> imx@lists.linux.dev; linux-media@vger.kernel.org; linux-
>> kernel@vger.kernel.org; linux-arm-kernel@lists.infradead.org
>> Subject: [EXT] Re: [RFC v2 5/6] media: v4l2-ctrls: Add video roi ctrls
>>
>> Caution: This is an external email. Please take care when clicking links or
>> opening attachments. When in doubt, report the message using the 'Report
>> this email' button
>>
>>
>> On 18/10/2024 07:44, Ming Qian wrote:
>>> Add some ctrls to support the video encoder ROI feature.
>>> Support 2 encoder ROI configurations that are rectangular region and
>>> QP map
>>>
>>> Signed-off-by: Ming Qian <ming.qian@nxp.com>
>>> Signed-off-by: TaoJiang <tao.jiang_2@nxp.com>
>>> ---
>>>  .../media/v4l/ext-ctrls-codec.rst             | 73 +++++++++++++++++++
>>>  drivers/media/v4l2-core/v4l2-ctrls-defs.c     | 29 ++++++++
>>>  include/uapi/linux/v4l2-controls.h            | 11 +++
>>>  3 files changed, 113 insertions(+)
>>>
>>> diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
>>> b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
>>> index 4a379bd9e3fb..6b972247778c 100644
>>> --- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
>>> +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
>>> @@ -1667,6 +1667,79 @@ enum
>> v4l2_mpeg_video_h264_hierarchical_coding_type -
>>>      Codecs need to always use the specified range, rather then a HW custom
>> range.
>>>      Applicable to encoders
>>>
>>> +``V4L2_CID_MPEG_VIDEO_ROI_MODE``
>>> +    (enum)
>>> +
>>> +enum v4l2_mpeg_video_roi_mode -
>>> +    Video roi mode. Possible values are:
>>> +
>>> +
>>> +
>>> +.. flat-table::
>>> +    :header-rows:  0
>>> +    :stub-columns: 0
>>> +
>>> +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_NONE``
>>> +      - No ROI in the MPEG stream
>>> +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_RECT``
>>> +      - Rectangle ROI mode
>>> +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``
>>> +      - Map ROI mode
>>> +
>>> +``V4L2_CID_MPEG_VIDEO_ROI_RECT (struct)``
>>> +    Select rectangular regions and specify the QP offset. The
>>> +    struct :c:type:`v4l2_ctrl_video_region_param` provides the
>>> +    rectangular region and the parameter to describe QP offset.
>>> +    The maximum number of rectangular regions depends on the
>>> +    hardware.  This control is a dynamically sized array. This
>>> +    control is applicable when ``V4L2_CID_MPEG_VIDEO_ROI_MODE``
>>> +    value is ``V4L2_MPEG_VIDEO_ROI_MODE_RECT``. Applicable to
>>> +    encoders.
>>> +
>>> +.. c:type:: v4l2_ctrl_video_region_param
>>> +
>>> +.. raw:: latex
>>> +
>>> +    \small
>>> +
>>> +.. tabularcolumns:: |p{4.0cm}|p{4.0cm}|p{4.0cm}|
>>> +
>>> +.. flat-table:: struct v4l2_ctrl_video_region_param
>>> +    :header-rows:  0
>>> +    :stub-columns: 0
>>> +    :widths:       1 1 1
>>> +
>>> +    * - struct :c:type:`v4l2_rect`
>>> +      - ``rect``
>>> +      - The rectangular region
>>
>> What is the unit? I assume pixels. And inside what larger area is this rectangle
>> located? It probably needs to refer to one of the SEL_TGT targets as described
>> here:
>>
>> https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fhverkuil.
>> home.xs4all.nl%2Fspec%2Fuserspace-api%2Fv4l%2Fv4l2-selection-
>> targets.html&data=05%7C02%7Cming.qian%40nxp.com%7Cfe9348ba24504eb
>> d98f608dcef3dffcf%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C63
>> 8648296786960098%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAi
>> LCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata
>> =cTXaNWLZs4l6IytSu9TWmEb7OyvF4viby9IjpOJXvmE%3D&reserved=0
>>
> 
> We want to use pixels as the unit, but for some detailed encoder, there
> may be alignment constraints, and the rectangular area should be inside
> the encoded picture size, for example, we encode a 720P H.264 stream,
> the largest area is 1280x720@(0,0). This doesn't involve scaling up or
> down. I'm not sure if it's possible to align to crop or compose.
> 
> Currently, we want to choose an area and increase or decrease the image
> quality. so we want to use a parameter to set the qp offset.
> 
>>> +    * - __s32
>>> +      - ``parameter``
>>> +      -
>>
>> So what is the parameter? It has no description.
>>
> 
> I newly add a ctrl type V4L2_CTRL_TYPE_REGION, and this struct is
> related to the type, so I thought I need to define a general argument to
> meet different needs, then this type can support a series of controls.
> For this patch, it's qp offset.
> I thought if I name it as qp_offset, the ctrl type can't be used on
> other similar controls.
> Is it better to rename it or add more description and keep the name?

This seems very specific to this use-case, so it makes sense if this is
defined as such.

If you want use generic types, then you would need two controls: one
to define the regions (using type v4l2_rect), and one to set the
QP offset (type integer). This assumes that both arrays are set to the
same size.

If you want to keep them together, then just make a new type for this.

But a more general question regarding this feature: is this hardware
specific? Or is this defined in some codec standard?

And to be clear, this has nothing to do with the UVC ROI patch series that
you linked to, right? You just reused some of the work that was done there.

I am leaning to splitting this up in two controls: one defines the ROIs, and
one defines the parameter for each ROI. This makes it very easy to reuse in
other scenarios (such as UVC).

But I really need to know a bit more about this feature.

Regards,

	Hans

> 
>>> +    * - __u32
>>> +      - ``reserved[2]``
>>> +      -
>>
>> Add "Applications and drivers must set this to zero."
>>
> 
> Yes, I missed it
> 
>>> +
>>> +.. raw:: latex
>>> +
>>> +    \normalsize
>>> +
>>> +``V4L2_CID_MPEG_VIDEO_ROI_MAP (integer)``
>>> +    Specifies the QP offset for each block. This control is a
>>> +    dynamically sized array. The array size can be calculated
>>> +    from video resolution and the roi map block size which can
>>> +    be got from ``V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE``. This
>>> +    control is applicable when ``V4L2_CID_MPEG_VIDEO_ROI_MODE``
>>> +    value is ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``. Applicable to
>>> +    encoders.
>>> +
>>> +``V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE (struct)``
>>> +    This control returns the roi block size in pixels. The struct
>>> +    :c:type:`v4l2_area` provides the width and height in separate
>>> +    fields. This control is applicable when
>>> +    ``V4L2_CID_MPEG_VIDEO_ROI_MODE`` value is
>>> +    ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``. This control depends on the
>>> +    encoding format. Applicable to encoders.
>>> +
>>>  .. raw:: latex
>>>
>>>      \normalsize
>>> diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>>> b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>>> index 1ea52011247a..54219a3b215a 100644
>>> --- a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>>> +++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>>> @@ -612,6 +612,13 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
>>>               NULL,
>>>       };
>>>
>>> +     static const char * const mpeg_video_roi_mode[] = {
>>> +             "None",
>>> +             "Rectangle",
>>> +             "Map",
>>> +             NULL,
>>> +     };
>>> +
>>>       switch (id) {
>>>       case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ:
>>>               return mpeg_audio_sampling_freq; @@ -750,6 +757,8 @@
>>> const char * const *v4l2_ctrl_get_menu(u32 id)
>>>               return camera_orientation;
>>>       case V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE:
>>>               return intra_refresh_period_type;
>>> +     case V4L2_CID_MPEG_VIDEO_ROI_MODE:
>>> +             return mpeg_video_roi_mode;
>>>       default:
>>>               return NULL;
>>>       }
>>> @@ -971,6 +980,10 @@ const char *v4l2_ctrl_get_name(u32 id)
>>>       case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX:               return "Frame
>> LTR Index";
>>>       case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES:                return "Use LTR
>> Frames";
>>>       case V4L2_CID_MPEG_VIDEO_AVERAGE_QP:                    return "Average
>> QP Value";
>>> +     case V4L2_CID_MPEG_VIDEO_ROI_MODE:                      return "Video ROI
>> Mode";
>>> +     case V4L2_CID_MPEG_VIDEO_ROI_RECT:                      return "Video ROI
>> Rectangle";
>>> +     case V4L2_CID_MPEG_VIDEO_ROI_MAP:                       return "Video ROI
>> Map";
>>> +     case V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE:            return
>> "Video ROI Map Block Size";
>>>       case V4L2_CID_FWHT_I_FRAME_QP:                          return "FWHT I-Frame
>> QP Value";
>>>       case V4L2_CID_FWHT_P_FRAME_QP:                          return "FWHT P-
>> Frame QP Value";
>>>
>>> @@ -1512,6 +1525,22 @@ void v4l2_ctrl_fill(u32 id, const char **name,
>> enum v4l2_ctrl_type *type,
>>>               *type = V4L2_CTRL_TYPE_INTEGER;
>>>               *flags |= V4L2_CTRL_FLAG_READ_ONLY;
>>>               break;
>>> +     case V4L2_CID_MPEG_VIDEO_ROI_MODE:
>>> +             *type = V4L2_CTRL_TYPE_MENU;
>>> +             *flags |= V4L2_CTRL_FLAG_UPDATE;
>>> +             break;
>>> +     case V4L2_CID_MPEG_VIDEO_ROI_RECT:
>>> +             *type = V4L2_CTRL_TYPE_REGION;
>>> +             *flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY |
>> V4L2_CTRL_FLAG_HAS_WHICH_MIN_MAX;
>>> +             break;
>>> +     case V4L2_CID_MPEG_VIDEO_ROI_MAP:
>>> +             *type = V4L2_CTRL_TYPE_INTEGER;
>>> +             *flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY |
>> V4L2_CTRL_FLAG_HAS_WHICH_MIN_MAX;
>>> +             break;
>>> +     case V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE:
>>> +             *type = V4L2_CTRL_TYPE_AREA;
>>> +             *flags |= V4L2_CTRL_FLAG_READ_ONLY;
>>> +             break;
>>>       case V4L2_CID_PIXEL_RATE:
>>>               *type = V4L2_CTRL_TYPE_INTEGER64;
>>>               *flags |= V4L2_CTRL_FLAG_READ_ONLY; diff --git
>>> a/include/uapi/linux/v4l2-controls.h
>>> b/include/uapi/linux/v4l2-controls.h
>>> index 974fd254e573..169a676fd64c 100644
>>> --- a/include/uapi/linux/v4l2-controls.h
>>> +++ b/include/uapi/linux/v4l2-controls.h
>>> @@ -900,6 +900,17 @@ enum v4l2_mpeg_video_av1_level {
>>>
>>>  #define V4L2_CID_MPEG_VIDEO_AVERAGE_QP  (V4L2_CID_CODEC_BASE +
>> 657)
>>>
>>> +enum v4l2_mpeg_video_roi_mode {
>>> +     V4L2_MPEG_VIDEO_ROI_MODE_NONE,
>>> +     V4L2_MPEG_VIDEO_ROI_MODE_RECT,
>>> +     V4L2_MPEG_VIDEO_ROI_MODE_MAP
>>> +};
>>> +
>>> +#define V4L2_CID_MPEG_VIDEO_ROI_MODE         (V4L2_CID_CODEC_BASE
>> + 658)
>>> +#define V4L2_CID_MPEG_VIDEO_ROI_RECT         (V4L2_CID_CODEC_BASE +
>> 659)
>>> +#define V4L2_CID_MPEG_VIDEO_ROI_MAP          (V4L2_CID_CODEC_BASE +
>> 660)
>>> +#define V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE
>> (V4L2_CID_CODEC_BASE + 661)
>>> +
>>>  /*  MPEG-class control IDs specific to the CX2341x driver as defined by V4L2
>> */
>>>  #define V4L2_CID_CODEC_CX2341X_BASE
>> (V4L2_CTRL_CLASS_CODEC | 0x1000)
>>>  #define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE
>> (V4L2_CID_CODEC_CX2341X_BASE+0)
>
Nicolas Dufresne Oct. 18, 2024, 2:40 p.m. UTC | #4
Hi Ming and Hans,

adding my two cents...

Le vendredi 18 octobre 2024 à 11:54 +0200, Hans Verkuil a écrit :
> Hi Ming Qian,
> 
> On 18/10/2024 10:20, Ming Qian wrote:
> > Hi Hans,
> > 
> > > -----Original Message-----
> > > From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
> > > Sent: Friday, October 18, 2024 2:28 PM
> > > To: Ming Qian <ming.qian@nxp.com>; mchehab@kernel.org
> > > Cc: yunkec@google.com; nicolas@ndufresne.ca; s.hauer@pengutronix.de;
> > > kernel@pengutronix.de; festevam@gmail.com; dl-linux-imx <linux-
> > > imx@nxp.com>; X.H. Bao <xiahong.bao@nxp.com>; Ming Zhou
> > > <ming.zhou@nxp.com>; Eagle Zhou <eagle.zhou@nxp.com>; Tao Jiang
> > > <tao.jiang_2@nxp.com>; Ming Qian (OSS) <ming.qian@oss.nxp.com>;
> > > imx@lists.linux.dev; linux-media@vger.kernel.org; linux-
> > > kernel@vger.kernel.org; linux-arm-kernel@lists.infradead.org
> > > Subject: [EXT] Re: [RFC v2 5/6] media: v4l2-ctrls: Add video roi ctrls
> > > 
> > > Caution: This is an external email. Please take care when clicking links or
> > > opening attachments. When in doubt, report the message using the 'Report
> > > this email' button
> > > 
> > > 
> > > On 18/10/2024 07:44, Ming Qian wrote:
> > > > Add some ctrls to support the video encoder ROI feature.
> > > > Support 2 encoder ROI configurations that are rectangular region and
> > > > QP map
> > > > 
> > > > Signed-off-by: Ming Qian <ming.qian@nxp.com>
> > > > Signed-off-by: TaoJiang <tao.jiang_2@nxp.com>
> > > > ---
> > > >  .../media/v4l/ext-ctrls-codec.rst             | 73 +++++++++++++++++++
> > > >  drivers/media/v4l2-core/v4l2-ctrls-defs.c     | 29 ++++++++
> > > >  include/uapi/linux/v4l2-controls.h            | 11 +++
> > > >  3 files changed, 113 insertions(+)
> > > > 
> > > > diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
> > > > b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
> > > > index 4a379bd9e3fb..6b972247778c 100644
> > > > --- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
> > > > +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
> > > > @@ -1667,6 +1667,79 @@ enum
> > > v4l2_mpeg_video_h264_hierarchical_coding_type -
> > > >      Codecs need to always use the specified range, rather then a HW custom
> > > range.
> > > >      Applicable to encoders
> > > > 
> > > > +``V4L2_CID_MPEG_VIDEO_ROI_MODE``
> > > > +    (enum)
> > > > +
> > > > +enum v4l2_mpeg_video_roi_mode -
> > > > +    Video roi mode. Possible values are:
> > > > +
> > > > +
> > > > +
> > > > +.. flat-table::
> > > > +    :header-rows:  0
> > > > +    :stub-columns: 0
> > > > +
> > > > +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_NONE``
> > > > +      - No ROI in the MPEG stream
> > > > +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_RECT``
> > > > +      - Rectangle ROI mode
> > > > +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``
> > > > +      - Map ROI mode
> > > > +
> > > > +``V4L2_CID_MPEG_VIDEO_ROI_RECT (struct)``
> > > > +    Select rectangular regions and specify the QP offset. The
> > > > +    struct :c:type:`v4l2_ctrl_video_region_param` provides the
> > > > +    rectangular region and the parameter to describe QP offset.
> > > > +    The maximum number of rectangular regions depends on the
> > > > +    hardware.  This control is a dynamically sized array. This
> > > > +    control is applicable when ``V4L2_CID_MPEG_VIDEO_ROI_MODE``
> > > > +    value is ``V4L2_MPEG_VIDEO_ROI_MODE_RECT``. Applicable to
> > > > +    encoders.
> > > > +
> > > > +.. c:type:: v4l2_ctrl_video_region_param
> > > > +
> > > > +.. raw:: latex
> > > > +
> > > > +    \small
> > > > +
> > > > +.. tabularcolumns:: |p{4.0cm}|p{4.0cm}|p{4.0cm}|
> > > > +
> > > > +.. flat-table:: struct v4l2_ctrl_video_region_param
> > > > +    :header-rows:  0
> > > > +    :stub-columns: 0
> > > > +    :widths:       1 1 1
> > > > +
> > > > +    * - struct :c:type:`v4l2_rect`
> > > > +      - ``rect``
> > > > +      - The rectangular region
> > > 
> > > What is the unit? I assume pixels. And inside what larger area is this rectangle
> > > located? It probably needs to refer to one of the SEL_TGT targets as described
> > > here:
> > > 
> > > https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fhverkuil.
> > > home.xs4all.nl%2Fspec%2Fuserspace-api%2Fv4l%2Fv4l2-selection-
> > > targets.html&data=05%7C02%7Cming.qian%40nxp.com%7Cfe9348ba24504eb
> > > d98f608dcef3dffcf%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C63
> > > 8648296786960098%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAi
> > > LCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata
> > > =cTXaNWLZs4l6IytSu9TWmEb7OyvF4viby9IjpOJXvmE%3D&reserved=0
> > > 
> > 
> > We want to use pixels as the unit, but for some detailed encoder, there
> > may be alignment constraints, and the rectangular area should be inside
> > the encoded picture size, for example, we encode a 720P H.264 stream,
> > the largest area is 1280x720@(0,0). This doesn't involve scaling up or
> > down. I'm not sure if it's possible to align to crop or compose.
> > 
> > Currently, we want to choose an area and increase or decrease the image
> > quality. so we want to use a parameter to set the qp offset.

In existing codec, the quantization factor is global to the macroblock being
encoded (not to the image). So for H.264, it will be aligned to 16 pixels in
both directions. The documentation should be clear on that and perhaps we should
go further and defined the direction this should be rounded, which should be to
round toward the edges, so X,Y gets rounded down, and width/height up.

> > 
> > > > +    * - __s32
> > > > +      - ``parameter``
> > > > +      -
> > > 
> > > So what is the parameter? It has no description.
> > > 
> > 
> > I newly add a ctrl type V4L2_CTRL_TYPE_REGION, and this struct is
> > related to the type, so I thought I need to define a general argument to
> > meet different needs, then this type can support a series of controls.
> > For this patch, it's qp offset.
> > I thought if I name it as qp_offset, the ctrl type can't be used on
> > other similar controls.
> > Is it better to rename it or add more description and keep the name?
> 
> This seems very specific to this use-case, so it makes sense if this is
> defined as such.
> 
> If you want use generic types, then you would need two controls: one
> to define the regions (using type v4l2_rect), and one to set the
> QP offset (type integer). This assumes that both arrays are set to the
> same size.
> 
> If you want to keep them together, then just make a new type for this.
> 
> But a more general question regarding this feature: is this hardware
> specific? Or is this defined in some codec standard?

Most CODEC don't require a flat quantization. Each macroblock can be quantized
separately. There is multiple approach to give users control on what QP to use
for which macro-blocks. The ROI approach is notably found in VA API [0]. I've
seen ROI support left unimplemented in many of our stateful encoders, so this is
common enough approach.

[0] https://intel.github.io/libva/structVAEncROI.html

More recent APIs, notably DX3D12 and Vulkan Video, have adopted the QP Map
approach. They offer both absolute and offset QP, placed in a 2D map, with the
number of effective pixels per map entry defined. Not much details are known
about VKV, but on DX3D12, you can find some details in the AV1 encoder
documentation. While the ROI approach can easily be translate to QP Maps, the
other direction may not be possible. Usually, the maximum size of a QP map is
way larger then the maximum number of ROI.

https://microsoft.github.io/DirectX-Specs/d3d/D3D12_Video_Encoding_AV1.html

We should also document what happens when ROI overlap, if that supported ? are
the offset summed ? I don't think absolute QP is widely supported, but to be
verified. A split approach would make adding support for absolute a lot easier.

For a practical use of ROI, see:

https://obsproject.com/forum/resources/encoder-region-of-interest-editor.1904/

> 
> 
> And to be clear, this has nothing to do with the UVC ROI patch series that
> you linked to, right? You just reused some of the work that was done there

I believe the UVC ROI refers to rectangles used in 3A. Usually user or AI
driven, these will tell the camera the location of the interesting object. This
reflects on Mobile to tapping on an object in order to focus on it.

ROI can also be used to communicate back the bounding box of objects in an
image. Though, on modern system we tend to need more then that, as we want to
put these objects in relation to each other, and want to use other shapes. Think
of your eyes that are in relation to your face.

> .
> 
> I am leaning to splitting this up in two controls: one defines the ROIs, and
> one defines the parameter for each ROI. This makes it very easy to reuse in
> other scenarios (such as UVC).
> 
> But I really need to know a bit more about this feature.

Hope this drop of info helped a bit. I don't have a very strong opinion between
using a compound control vs two array control. Though your proposal makes the
built-in min/max/def feature usable, where with compound controls we don't
really have an API to expose that, and need to hand-write validation. It also
enables possible sharing a bit with future QP Maps implementation.

Perhaps good to know that GStreamer do have a metadata representation of ROI. It
is generic with the rectangle and location as base value and additional type and
arbitrary parameters. That is used for VA-API encoders. Before AI boom, this is
what we'd use to place objects detected with libraries like OpenCV. Typically,
an app would translate let's say faces ROI to an encoder QP offset based on its
own logic. AI systems in GStreamer are now moving to the GstAnalyticMeta, which
is more powerful and allow for relations to be expressed.

https://gitlab.freedesktop.org/gstreamer/gstreamer/-/blob/main/subprojects/gst-plugins-base/gst-libs/gst/video/gstvideometa.h?ref_type=heads#L308

https://gitlab.freedesktop.org/gstreamer/gstreamer/-/blob/main/subprojects/gst-plugins-bad/gst-libs/gst/analytics/gstanalyticsmeta.h?ref_type=heads

regards,
Nicolas

> 
> Regards,
> 
> 	Hans
> 
> > 
> > > > +    * - __u32
> > > > +      - ``reserved[2]``
> > > > +      -
> > > 
> > > Add "Applications and drivers must set this to zero."
> > > 
> > 
> > Yes, I missed it
> > 
> > > > +
> > > > +.. raw:: latex
> > > > +
> > > > +    \normalsize
> > > > +
> > > > +``V4L2_CID_MPEG_VIDEO_ROI_MAP (integer)``
> > > > +    Specifies the QP offset for each block. This control is a
> > > > +    dynamically sized array. The array size can be calculated
> > > > +    from video resolution and the roi map block size which can
> > > > +    be got from ``V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE``. This
> > > > +    control is applicable when ``V4L2_CID_MPEG_VIDEO_ROI_MODE``
> > > > +    value is ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``. Applicable to
> > > > +    encoders.
> > > > +
> > > > +``V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE (struct)``
> > > > +    This control returns the roi block size in pixels. The struct
> > > > +    :c:type:`v4l2_area` provides the width and height in separate
> > > > +    fields. This control is applicable when
> > > > +    ``V4L2_CID_MPEG_VIDEO_ROI_MODE`` value is
> > > > +    ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``. This control depends on the
> > > > +    encoding format. Applicable to encoders.
> > > > +
> > > >  .. raw:: latex
> > > > 
> > > >      \normalsize
> > > > diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> > > > b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> > > > index 1ea52011247a..54219a3b215a 100644
> > > > --- a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> > > > +++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
> > > > @@ -612,6 +612,13 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
> > > >               NULL,
> > > >       };
> > > > 
> > > > +     static const char * const mpeg_video_roi_mode[] = {
> > > > +             "None",
> > > > +             "Rectangle",
> > > > +             "Map",
> > > > +             NULL,
> > > > +     };
> > > > +
> > > >       switch (id) {
> > > >       case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ:
> > > >               return mpeg_audio_sampling_freq; @@ -750,6 +757,8 @@
> > > > const char * const *v4l2_ctrl_get_menu(u32 id)
> > > >               return camera_orientation;
> > > >       case V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE:
> > > >               return intra_refresh_period_type;
> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_MODE:
> > > > +             return mpeg_video_roi_mode;
> > > >       default:
> > > >               return NULL;
> > > >       }
> > > > @@ -971,6 +980,10 @@ const char *v4l2_ctrl_get_name(u32 id)
> > > >       case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX:               return "Frame
> > > LTR Index";
> > > >       case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES:                return "Use LTR
> > > Frames";
> > > >       case V4L2_CID_MPEG_VIDEO_AVERAGE_QP:                    return "Average
> > > QP Value";
> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_MODE:                      return "Video ROI
> > > Mode";
> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_RECT:                      return "Video ROI
> > > Rectangle";
> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_MAP:                       return "Video ROI
> > > Map";
> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE:            return
> > > "Video ROI Map Block Size";
> > > >       case V4L2_CID_FWHT_I_FRAME_QP:                          return "FWHT I-Frame
> > > QP Value";
> > > >       case V4L2_CID_FWHT_P_FRAME_QP:                          return "FWHT P-
> > > Frame QP Value";
> > > > 
> > > > @@ -1512,6 +1525,22 @@ void v4l2_ctrl_fill(u32 id, const char **name,
> > > enum v4l2_ctrl_type *type,
> > > >               *type = V4L2_CTRL_TYPE_INTEGER;
> > > >               *flags |= V4L2_CTRL_FLAG_READ_ONLY;
> > > >               break;
> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_MODE:
> > > > +             *type = V4L2_CTRL_TYPE_MENU;
> > > > +             *flags |= V4L2_CTRL_FLAG_UPDATE;
> > > > +             break;
> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_RECT:
> > > > +             *type = V4L2_CTRL_TYPE_REGION;
> > > > +             *flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY |
> > > V4L2_CTRL_FLAG_HAS_WHICH_MIN_MAX;
> > > > +             break;
> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_MAP:
> > > > +             *type = V4L2_CTRL_TYPE_INTEGER;
> > > > +             *flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY |
> > > V4L2_CTRL_FLAG_HAS_WHICH_MIN_MAX;
> > > > +             break;
> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE:
> > > > +             *type = V4L2_CTRL_TYPE_AREA;
> > > > +             *flags |= V4L2_CTRL_FLAG_READ_ONLY;
> > > > +             break;
> > > >       case V4L2_CID_PIXEL_RATE:
> > > >               *type = V4L2_CTRL_TYPE_INTEGER64;
> > > >               *flags |= V4L2_CTRL_FLAG_READ_ONLY; diff --git
> > > > a/include/uapi/linux/v4l2-controls.h
> > > > b/include/uapi/linux/v4l2-controls.h
> > > > index 974fd254e573..169a676fd64c 100644
> > > > --- a/include/uapi/linux/v4l2-controls.h
> > > > +++ b/include/uapi/linux/v4l2-controls.h
> > > > @@ -900,6 +900,17 @@ enum v4l2_mpeg_video_av1_level {
> > > > 
> > > >  #define V4L2_CID_MPEG_VIDEO_AVERAGE_QP  (V4L2_CID_CODEC_BASE +
> > > 657)
> > > > 
> > > > +enum v4l2_mpeg_video_roi_mode {
> > > > +     V4L2_MPEG_VIDEO_ROI_MODE_NONE,
> > > > +     V4L2_MPEG_VIDEO_ROI_MODE_RECT,
> > > > +     V4L2_MPEG_VIDEO_ROI_MODE_MAP
> > > > +};
> > > > +
> > > > +#define V4L2_CID_MPEG_VIDEO_ROI_MODE         (V4L2_CID_CODEC_BASE
> > > + 658)
> > > > +#define V4L2_CID_MPEG_VIDEO_ROI_RECT         (V4L2_CID_CODEC_BASE +
> > > 659)
> > > > +#define V4L2_CID_MPEG_VIDEO_ROI_MAP          (V4L2_CID_CODEC_BASE +
> > > 660)
> > > > +#define V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE
> > > (V4L2_CID_CODEC_BASE + 661)
> > > > +
> > > >  /*  MPEG-class control IDs specific to the CX2341x driver as defined by V4L2
> > > */
> > > >  #define V4L2_CID_CODEC_CX2341X_BASE
> > > (V4L2_CTRL_CLASS_CODEC | 0x1000)
> > > >  #define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE
> > > (V4L2_CID_CODEC_CX2341X_BASE+0)
> > 
>
Ming Qian Oct. 21, 2024, 6:05 a.m. UTC | #5
Hi Hans and Nicolas,

>-----Original Message-----
>From: Nicolas Dufresne <nicolas@ndufresne.ca>
>Sent: Friday, October 18, 2024 10:41 PM
>To: Hans Verkuil <hverkuil-cisco@xs4all.nl>; Ming Qian <ming.qian@nxp.com>;
>mchehab@kernel.org
>Cc: yunkec@google.com; s.hauer@pengutronix.de; kernel@pengutronix.de;
>festevam@gmail.com; dl-linux-imx <linux-imx@nxp.com>; X.H. Bao
><xiahong.bao@nxp.com>; Ming Zhou <ming.zhou@nxp.com>; Eagle Zhou
><eagle.zhou@nxp.com>; Tao Jiang <tao.jiang_2@nxp.com>; Ming Qian (OSS)
><ming.qian@oss.nxp.com>; imx@lists.linux.dev; linux-media@vger.kernel.org;
>linux-kernel@vger.kernel.org; linux-arm-kernel@lists.infradead.org
>Subject: Re: [EXT] Re: [RFC v2 5/6] media: v4l2-ctrls: Add video roi ctrls
>
>Caution: This is an external email. Please take care when clicking links or
>opening attachments. When in doubt, report the message using the 'Report
>this email' button
>
>
>Hi Ming and Hans,
>
>adding my two cents...
>
>Le vendredi 18 octobre 2024 à 11:54 +0200, Hans Verkuil a écrit :
>> Hi Ming Qian,
>>
>> On 18/10/2024 10:20, Ming Qian wrote:
>> > Hi Hans,
>> >
>> > > -----Original Message-----
>> > > From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
>> > > Sent: Friday, October 18, 2024 2:28 PM
>> > > To: Ming Qian <ming.qian@nxp.com>; mchehab@kernel.org
>> > > Cc: yunkec@google.com; nicolas@ndufresne.ca;
>> > > s.hauer@pengutronix.de; kernel@pengutronix.de; festevam@gmail.com;
>> > > dl-linux-imx <linux- imx@nxp.com>; X.H. Bao <xiahong.bao@nxp.com>;
>> > > Ming Zhou <ming.zhou@nxp.com>; Eagle Zhou <eagle.zhou@nxp.com>;
>> > > Tao Jiang <tao.jiang_2@nxp.com>; Ming Qian (OSS)
>> > > <ming.qian@oss.nxp.com>; imx@lists.linux.dev;
>> > > linux-media@vger.kernel.org; linux- kernel@vger.kernel.org;
>> > > linux-arm-kernel@lists.infradead.org
>> > > Subject: [EXT] Re: [RFC v2 5/6] media: v4l2-ctrls: Add video roi
>> > > ctrls
>> > >
>> > > Caution: This is an external email. Please take care when clicking
>> > > links or opening attachments. When in doubt, report the message
>> > > using the 'Report this email' button
>> > >
>> > >
>> > > On 18/10/2024 07:44, Ming Qian wrote:
>> > > > Add some ctrls to support the video encoder ROI feature.
>> > > > Support 2 encoder ROI configurations that are rectangular region
>> > > > and QP map
>> > > >
>> > > > Signed-off-by: Ming Qian <ming.qian@nxp.com>
>> > > > Signed-off-by: TaoJiang <tao.jiang_2@nxp.com>
>> > > > ---
>> > > >  .../media/v4l/ext-ctrls-codec.rst             | 73 +++++++++++++++++++
>> > > >  drivers/media/v4l2-core/v4l2-ctrls-defs.c     | 29 ++++++++
>> > > >  include/uapi/linux/v4l2-controls.h            | 11 +++
>> > > >  3 files changed, 113 insertions(+)
>> > > >
>> > > > diff --git
>> > > > a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
>> > > > b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
>> > > > index 4a379bd9e3fb..6b972247778c 100644
>> > > > --- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
>> > > > +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
>> > > > @@ -1667,6 +1667,79 @@ enum
>> > > v4l2_mpeg_video_h264_hierarchical_coding_type -
>> > > >      Codecs need to always use the specified range, rather then
>> > > > a HW custom
>> > > range.
>> > > >      Applicable to encoders
>> > > >
>> > > > +``V4L2_CID_MPEG_VIDEO_ROI_MODE``
>> > > > +    (enum)
>> > > > +
>> > > > +enum v4l2_mpeg_video_roi_mode -
>> > > > +    Video roi mode. Possible values are:
>> > > > +
>> > > > +
>> > > > +
>> > > > +.. flat-table::
>> > > > +    :header-rows:  0
>> > > > +    :stub-columns: 0
>> > > > +
>> > > > +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_NONE``
>> > > > +      - No ROI in the MPEG stream
>> > > > +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_RECT``
>> > > > +      - Rectangle ROI mode
>> > > > +    * - ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``
>> > > > +      - Map ROI mode
>> > > > +
>> > > > +``V4L2_CID_MPEG_VIDEO_ROI_RECT (struct)``
>> > > > +    Select rectangular regions and specify the QP offset. The
>> > > > +    struct :c:type:`v4l2_ctrl_video_region_param` provides the
>> > > > +    rectangular region and the parameter to describe QP offset.
>> > > > +    The maximum number of rectangular regions depends on the
>> > > > +    hardware.  This control is a dynamically sized array. This
>> > > > +    control is applicable when ``V4L2_CID_MPEG_VIDEO_ROI_MODE``
>> > > > +    value is ``V4L2_MPEG_VIDEO_ROI_MODE_RECT``. Applicable to
>> > > > +    encoders.
>> > > > +
>> > > > +.. c:type:: v4l2_ctrl_video_region_param
>> > > > +
>> > > > +.. raw:: latex
>> > > > +
>> > > > +    \small
>> > > > +
>> > > > +.. tabularcolumns:: |p{4.0cm}|p{4.0cm}|p{4.0cm}|
>> > > > +
>> > > > +.. flat-table:: struct v4l2_ctrl_video_region_param
>> > > > +    :header-rows:  0
>> > > > +    :stub-columns: 0
>> > > > +    :widths:       1 1 1
>> > > > +
>> > > > +    * - struct :c:type:`v4l2_rect`
>> > > > +      - ``rect``
>> > > > +      - The rectangular region
>> > >
>> > > What is the unit? I assume pixels. And inside what larger area is
>> > > this rectangle located? It probably needs to refer to one of the
>> > > SEL_TGT targets as described
>> > > here:
>> > >
>> > >
>https://hverkuil/
>%2F&data=05%7C02%7Cming.qian%40nxp.com%7C7acc80c8f68449c85a0408d
>cef82ded1%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C638648592
>606059405%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoi
>V2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata=niwtmJ
>HAhKS%2FI2swfrjvmcRw9oUPs4xXz5EY93B4mRQ%3D&reserved=0.
>> > > home.xs4all.nl%2Fspec%2Fuserspace-api%2Fv4l%2Fv4l2-selection-
>> > >
>targets.html&data=05%7C02%7Cming.qian%40nxp.com%7Cfe9348ba24504eb
>> > >
>d98f608dcef3dffcf%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C63
>> > >
>8648296786960098%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAi
>> > >
>LCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata
>> > > =cTXaNWLZs4l6IytSu9TWmEb7OyvF4viby9IjpOJXvmE%3D&reserved=0
>> > >
>> >
>> > We want to use pixels as the unit, but for some detailed encoder,
>> > there may be alignment constraints, and the rectangular area should
>> > be inside the encoded picture size, for example, we encode a 720P
>> > H.264 stream, the largest area is 1280x720@(0,0). This doesn't
>> > involve scaling up or down. I'm not sure if it's possible to align to crop or
>compose.
>> >
>> > Currently, we want to choose an area and increase or decrease the
>> > image quality. so we want to use a parameter to set the qp offset.
>
>In existing codec, the quantization factor is global to the macroblock being
>encoded (not to the image). So for H.264, it will be aligned to 16 pixels in both
>directions. The documentation should be clear on that and perhaps we should
>go further and defined the direction this should be rounded, which should be
>to round toward the edges, so X,Y gets rounded down, and width/height up.
>

You're right, my expression here is not appropriate, what I wanted to
say at first was changing the quality of the chosen area. And the area
should be aligned to the ROI block size.
I tried to add a new ctrl V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE to
query the ROI block size for the map type ROI, I think it should also
work for the rectangle type ROI. The X, Y and width and height should be
aligned, just like you said.

>> >
>> > > > +    * - __s32
>> > > > +      - ``parameter``
>> > > > +      -
>> > >
>> > > So what is the parameter? It has no description.
>> > >
>> >
>> > I newly add a ctrl type V4L2_CTRL_TYPE_REGION, and this struct is
>> > related to the type, so I thought I need to define a general
>> > argument to meet different needs, then this type can support a series of
>controls.
>> > For this patch, it's qp offset.
>> > I thought if I name it as qp_offset, the ctrl type can't be used on
>> > other similar controls.
>> > Is it better to rename it or add more description and keep the name?
>>
>> This seems very specific to this use-case, so it makes sense if this
>> is defined as such.
>>
>> If you want use generic types, then you would need two controls: one
>> to define the regions (using type v4l2_rect), and one to set the QP
>> offset (type integer). This assumes that both arrays are set to the
>> same size.
>>
>> If you want to keep them together, then just make a new type for this.
>>
>> But a more general question regarding this feature: is this hardware
>> specific? Or is this defined in some codec standard?

I initially considered using 2 controls, as you said, one to define
rectangle area, and one to set the QP offset.
What bothers me is how to ensure that two array type controls have the same array size.
So I tried a compound control.

I'll try two controls in the next version patch.

>
>Most CODEC don't require a flat quantization. Each macroblock can be
>quantized separately. There is multiple approach to give users control on what
>QP to use for which macro-blocks. The ROI approach is notably found in VA API
>[0]. I've seen ROI support left unimplemented in many of our stateful
>encoders, so this is common enough approach.
>
>[0]
>https://intel.git/
>hub.io%2Flibva%2FstructVAEncROI.html&data=05%7C02%7Cming.qian%40nxp
>.com%7C7acc80c8f68449c85a0408dcef82ded1%7C686ea1d3bc2b4c6fa92cd99c
>5c301635%7C0%7C0%7C638648592606098413%7CUnknown%7CTWFpbGZsb3
>d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3
>D%7C0%7C%7C%7C&sdata=BFiZ3RcyBYsiV0BwXdXp4ubpiyL1B21%2F3Vq3UFHP
>HOE%3D&reserved=0
>
>More recent APIs, notably DX3D12 and Vulkan Video, have adopted the QP
>Map approach. They offer both absolute and offset QP, placed in a 2D map,
>with the number of effective pixels per map entry defined. Not much details
>are known about VKV, but on DX3D12, you can find some details in the AV1
>encoder documentation. While the ROI approach can easily be translate to QP
>Maps, the other direction may not be possible. Usually, the maximum size of a
>QP map is way larger then the maximum number of ROI.
>
>https://microso/
>ft.github.io%2FDirectX-
>Specs%2Fd3d%2FD3D12_Video_Encoding_AV1.html&data=05%7C02%7Cming.
>qian%40nxp.com%7C7acc80c8f68449c85a0408dcef82ded1%7C686ea1d3bc2b4
>c6fa92cd99c5c301635%7C0%7C0%7C638648592606121020%7CUnknown%7CT
>WFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJX
>VCI6Mn0%3D%7C0%7C%7C%7C&sdata=tpT8VZ9ajW%2B132dsb6WiYlg3Pnqg7
>MXIZraiP6HkZpg%3D&reserved=0
>
>We should also document what happens when ROI overlap, if that
>supported ? are the offset summed ? I don't think absolute QP is widely
>supported, but to be verified. A split approach would make adding support for
>absolute a lot easier.
>
>For a practical use of ROI, see:
>
>https://obsproj/
>ect.com%2Fforum%2Fresources%2Fencoder-region-of-interest-
>editor.1904%2F&data=05%7C02%7Cming.qian%40nxp.com%7C7acc80c8f6844
>9c85a0408dcef82ded1%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7
>C638648592606142443%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAw
>MDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&
>sdata=zORnx0LUkOBkq3sgEAK55oaWM%2F2S5Igr6NxdgkBdwMs%3D&reserve
>d=0
>

Thank you for your additional explanation.

>>
>>
>> And to be clear, this has nothing to do with the UVC ROI patch series
>> that you linked to, right? You just reused some of the work that was
>> done there

Yes, I just want to reuse some work in the UVC ROI patchset and pick
some patches from it. Sorry for the inconvenience.

>
>I believe the UVC ROI refers to rectangles used in 3A. Usually user or AI driven,
>these will tell the camera the location of the interesting object. This reflects on
>Mobile to tapping on an object in order to focus on it.
>
>ROI can also be used to communicate back the bounding box of objects in an
>image. Though, on modern system we tend to need more then that, as we
>want to put these objects in relation to each other, and want to use other
>shapes. Think of your eyes that are in relation to your face.
>

Thanks your for your detailed explanation and additions.

>> .
>>
>> I am leaning to splitting this up in two controls: one defines the
>> ROIs, and one defines the parameter for each ROI. This makes it very
>> easy to reuse in other scenarios (such as UVC).
>>
>> But I really need to know a bit more about this feature.
>
>Hope this drop of info helped a bit. I don't have a very strong opinion between
>using a compound control vs two array control. Though your proposal makes
>the built-in min/max/def feature usable, where with compound controls we
>don't really have an API to expose that, and need to hand-write validation. It
>also enables possible sharing a bit with future QP Maps implementation.
>
>Perhaps good to know that GStreamer do have a metadata representation of
>ROI. It is generic with the rectangle and location as base value and additional
>type and arbitrary parameters. That is used for VA-API encoders. Before AI
>boom, this is what we'd use to place objects detected with libraries like
>OpenCV. Typically, an app would translate let's say faces ROI to an encoder QP
>offset based on its own logic. AI systems in GStreamer are now moving to the
>GstAnalyticMeta, which is more powerful and allow for relations to be
>expressed.
>
>https://gitlab.fr/
>eedesktop.org%2Fgstreamer%2Fgstreamer%2F-
>%2Fblob%2Fmain%2Fsubprojects%2Fgst-plugins-base%2Fgst-
>libs%2Fgst%2Fvideo%2Fgstvideometa.h%3Fref_type%3Dheads%23L308&data=
>05%7C02%7Cming.qian%40nxp.com%7C7acc80c8f68449c85a0408dcef82ded1
>%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C63864859260616379
>3%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiL
>CJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata=BuCdWrWyHr0n
>GE9dqTpPhfuaTQEpTQuIHjrauv4hKc0%3D&reserved=0
>
>https://gitlab.fr/
>eedesktop.org%2Fgstreamer%2Fgstreamer%2F-
>%2Fblob%2Fmain%2Fsubprojects%2Fgst-plugins-bad%2Fgst-
>libs%2Fgst%2Fanalytics%2Fgstanalyticsmeta.h%3Fref_type%3Dheads&data=05
>%7C02%7Cming.qian%40nxp.com%7C7acc80c8f68449c85a0408dcef82ded1%7
>C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C638648592606188142%
>7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJB
>TiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0%7C%7C%7C&sdata=QCCZ9aMArrRrediss
>kTQXBZRCmNXUbu%2B1rUwB5F%2FDO4%3D&reserved=0
>
>regards,
>Nicolas
>

I'm preparing the V3 patchset, and I will try two array controls.

Thank you both for your comments.

Ming

>>
>> Regards,
>>
>>       Hans
>>
>> >
>> > > > +    * - __u32
>> > > > +      - ``reserved[2]``
>> > > > +      -
>> > >
>> > > Add "Applications and drivers must set this to zero."
>> > >
>> >
>> > Yes, I missed it
>> >
>> > > > +
>> > > > +.. raw:: latex
>> > > > +
>> > > > +    \normalsize
>> > > > +
>> > > > +``V4L2_CID_MPEG_VIDEO_ROI_MAP (integer)``
>> > > > +    Specifies the QP offset for each block. This control is a
>> > > > +    dynamically sized array. The array size can be calculated
>> > > > +    from video resolution and the roi map block size which can
>> > > > +    be got from ``V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE``. This
>> > > > +    control is applicable when ``V4L2_CID_MPEG_VIDEO_ROI_MODE``
>> > > > +    value is ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``. Applicable to
>> > > > +    encoders.
>> > > > +
>> > > > +``V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE (struct)``
>> > > > +    This control returns the roi block size in pixels. The struct
>> > > > +    :c:type:`v4l2_area` provides the width and height in separate
>> > > > +    fields. This control is applicable when
>> > > > +    ``V4L2_CID_MPEG_VIDEO_ROI_MODE`` value is
>> > > > +    ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``. This control depends on
>the
>> > > > +    encoding format. Applicable to encoders.
>> > > > +
>> > > >  .. raw:: latex
>> > > >
>> > > >      \normalsize
>> > > > diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>> > > > b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>> > > > index 1ea52011247a..54219a3b215a 100644
>> > > > --- a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>> > > > +++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
>> > > > @@ -612,6 +612,13 @@ const char * const *v4l2_ctrl_get_menu(u32
>id)
>> > > >               NULL,
>> > > >       };
>> > > >
>> > > > +     static const char * const mpeg_video_roi_mode[] = {
>> > > > +             "None",
>> > > > +             "Rectangle",
>> > > > +             "Map",
>> > > > +             NULL,
>> > > > +     };
>> > > > +
>> > > >       switch (id) {
>> > > >       case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ:
>> > > >               return mpeg_audio_sampling_freq; @@ -750,6 +757,8
>> > > > @@ const char * const *v4l2_ctrl_get_menu(u32 id)
>> > > >               return camera_orientation;
>> > > >       case V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE:
>> > > >               return intra_refresh_period_type;
>> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_MODE:
>> > > > +             return mpeg_video_roi_mode;
>> > > >       default:
>> > > >               return NULL;
>> > > >       }
>> > > > @@ -971,6 +980,10 @@ const char *v4l2_ctrl_get_name(u32 id)
>> > > >       case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX:               return
>"Frame
>> > > LTR Index";
>> > > >       case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES:                return
>"Use LTR
>> > > Frames";
>> > > >       case V4L2_CID_MPEG_VIDEO_AVERAGE_QP:                    return
>"Average
>> > > QP Value";
>> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_MODE:                      return
>"Video ROI
>> > > Mode";
>> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_RECT:                      return "Video
>ROI
>> > > Rectangle";
>> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_MAP:                       return "Video
>ROI
>> > > Map";
>> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE:            return
>> > > "Video ROI Map Block Size";
>> > > >       case V4L2_CID_FWHT_I_FRAME_QP:                          return "FWHT I-
>Frame
>> > > QP Value";
>> > > >       case V4L2_CID_FWHT_P_FRAME_QP:                          return "FWHT P-
>> > > Frame QP Value";
>> > > >
>> > > > @@ -1512,6 +1525,22 @@ void v4l2_ctrl_fill(u32 id, const char
>> > > > **name,
>> > > enum v4l2_ctrl_type *type,
>> > > >               *type = V4L2_CTRL_TYPE_INTEGER;
>> > > >               *flags |= V4L2_CTRL_FLAG_READ_ONLY;
>> > > >               break;
>> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_MODE:
>> > > > +             *type = V4L2_CTRL_TYPE_MENU;
>> > > > +             *flags |= V4L2_CTRL_FLAG_UPDATE;
>> > > > +             break;
>> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_RECT:
>> > > > +             *type = V4L2_CTRL_TYPE_REGION;
>> > > > +             *flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY |
>> > > V4L2_CTRL_FLAG_HAS_WHICH_MIN_MAX;
>> > > > +             break;
>> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_MAP:
>> > > > +             *type = V4L2_CTRL_TYPE_INTEGER;
>> > > > +             *flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY |
>> > > V4L2_CTRL_FLAG_HAS_WHICH_MIN_MAX;
>> > > > +             break;
>> > > > +     case V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE:
>> > > > +             *type = V4L2_CTRL_TYPE_AREA;
>> > > > +             *flags |= V4L2_CTRL_FLAG_READ_ONLY;
>> > > > +             break;
>> > > >       case V4L2_CID_PIXEL_RATE:
>> > > >               *type = V4L2_CTRL_TYPE_INTEGER64;
>> > > >               *flags |= V4L2_CTRL_FLAG_READ_ONLY; diff --git
>> > > > a/include/uapi/linux/v4l2-controls.h
>> > > > b/include/uapi/linux/v4l2-controls.h
>> > > > index 974fd254e573..169a676fd64c 100644
>> > > > --- a/include/uapi/linux/v4l2-controls.h
>> > > > +++ b/include/uapi/linux/v4l2-controls.h
>> > > > @@ -900,6 +900,17 @@ enum v4l2_mpeg_video_av1_level {
>> > > >
>> > > >  #define V4L2_CID_MPEG_VIDEO_AVERAGE_QP
>(V4L2_CID_CODEC_BASE +
>> > > 657)
>> > > >
>> > > > +enum v4l2_mpeg_video_roi_mode {
>> > > > +     V4L2_MPEG_VIDEO_ROI_MODE_NONE,
>> > > > +     V4L2_MPEG_VIDEO_ROI_MODE_RECT,
>> > > > +     V4L2_MPEG_VIDEO_ROI_MODE_MAP };
>> > > > +
>> > > > +#define V4L2_CID_MPEG_VIDEO_ROI_MODE
>(V4L2_CID_CODEC_BASE
>> > > + 658)
>> > > > +#define V4L2_CID_MPEG_VIDEO_ROI_RECT
>(V4L2_CID_CODEC_BASE +
>> > > 659)
>> > > > +#define V4L2_CID_MPEG_VIDEO_ROI_MAP
>(V4L2_CID_CODEC_BASE +
>> > > 660)
>> > > > +#define V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE
>> > > (V4L2_CID_CODEC_BASE + 661)
>> > > > +
>> > > >  /*  MPEG-class control IDs specific to the CX2341x driver as
>> > > > defined by V4L2
>> > > */
>> > > >  #define V4L2_CID_CODEC_CX2341X_BASE
>> > > (V4L2_CTRL_CLASS_CODEC | 0x1000)
>> > > >  #define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE
>> > > (V4L2_CID_CODEC_CX2341X_BASE+0)
>> >
>>
diff mbox series

Patch

diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
index 4a379bd9e3fb..6b972247778c 100644
--- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
+++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst
@@ -1667,6 +1667,79 @@  enum v4l2_mpeg_video_h264_hierarchical_coding_type -
     Codecs need to always use the specified range, rather then a HW custom range.
     Applicable to encoders
 
+``V4L2_CID_MPEG_VIDEO_ROI_MODE``
+    (enum)
+
+enum v4l2_mpeg_video_roi_mode -
+    Video roi mode. Possible values are:
+
+
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+
+    * - ``V4L2_MPEG_VIDEO_ROI_MODE_NONE``
+      - No ROI in the MPEG stream
+    * - ``V4L2_MPEG_VIDEO_ROI_MODE_RECT``
+      - Rectangle ROI mode
+    * - ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``
+      - Map ROI mode
+
+``V4L2_CID_MPEG_VIDEO_ROI_RECT (struct)``
+    Select rectangular regions and specify the QP offset. The
+    struct :c:type:`v4l2_ctrl_video_region_param` provides the
+    rectangular region and the parameter to describe QP offset.
+    The maximum number of rectangular regions depends on the
+    hardware.  This control is a dynamically sized array. This
+    control is applicable when ``V4L2_CID_MPEG_VIDEO_ROI_MODE``
+    value is ``V4L2_MPEG_VIDEO_ROI_MODE_RECT``. Applicable to
+    encoders.
+
+.. c:type:: v4l2_ctrl_video_region_param
+
+.. raw:: latex
+
+    \small
+
+.. tabularcolumns:: |p{4.0cm}|p{4.0cm}|p{4.0cm}|
+
+.. flat-table:: struct v4l2_ctrl_video_region_param
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 1
+
+    * - struct :c:type:`v4l2_rect`
+      - ``rect``
+      - The rectangular region
+    * - __s32
+      - ``parameter``
+      -
+    * - __u32
+      - ``reserved[2]``
+      -
+
+.. raw:: latex
+
+    \normalsize
+
+``V4L2_CID_MPEG_VIDEO_ROI_MAP (integer)``
+    Specifies the QP offset for each block. This control is a
+    dynamically sized array. The array size can be calculated
+    from video resolution and the roi map block size which can
+    be got from ``V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE``. This
+    control is applicable when ``V4L2_CID_MPEG_VIDEO_ROI_MODE``
+    value is ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``. Applicable to
+    encoders.
+
+``V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE (struct)``
+    This control returns the roi block size in pixels. The struct
+    :c:type:`v4l2_area` provides the width and height in separate
+    fields. This control is applicable when
+    ``V4L2_CID_MPEG_VIDEO_ROI_MODE`` value is
+    ``V4L2_MPEG_VIDEO_ROI_MODE_MAP``. This control depends on the
+    encoding format. Applicable to encoders.
+
 .. raw:: latex
 
     \normalsize
diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
index 1ea52011247a..54219a3b215a 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls-defs.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c
@@ -612,6 +612,13 @@  const char * const *v4l2_ctrl_get_menu(u32 id)
 		NULL,
 	};
 
+	static const char * const mpeg_video_roi_mode[] = {
+		"None",
+		"Rectangle",
+		"Map",
+		NULL,
+	};
+
 	switch (id) {
 	case V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ:
 		return mpeg_audio_sampling_freq;
@@ -750,6 +757,8 @@  const char * const *v4l2_ctrl_get_menu(u32 id)
 		return camera_orientation;
 	case V4L2_CID_MPEG_VIDEO_INTRA_REFRESH_PERIOD_TYPE:
 		return intra_refresh_period_type;
+	case V4L2_CID_MPEG_VIDEO_ROI_MODE:
+		return mpeg_video_roi_mode;
 	default:
 		return NULL;
 	}
@@ -971,6 +980,10 @@  const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX:		return "Frame LTR Index";
 	case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES:		return "Use LTR Frames";
 	case V4L2_CID_MPEG_VIDEO_AVERAGE_QP:			return "Average QP Value";
+	case V4L2_CID_MPEG_VIDEO_ROI_MODE:			return "Video ROI Mode";
+	case V4L2_CID_MPEG_VIDEO_ROI_RECT:			return "Video ROI Rectangle";
+	case V4L2_CID_MPEG_VIDEO_ROI_MAP:			return "Video ROI Map";
+	case V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE:		return "Video ROI Map Block Size";
 	case V4L2_CID_FWHT_I_FRAME_QP:				return "FWHT I-Frame QP Value";
 	case V4L2_CID_FWHT_P_FRAME_QP:				return "FWHT P-Frame QP Value";
 
@@ -1512,6 +1525,22 @@  void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 		*type = V4L2_CTRL_TYPE_INTEGER;
 		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
 		break;
+	case V4L2_CID_MPEG_VIDEO_ROI_MODE:
+		*type = V4L2_CTRL_TYPE_MENU;
+		*flags |= V4L2_CTRL_FLAG_UPDATE;
+		break;
+	case V4L2_CID_MPEG_VIDEO_ROI_RECT:
+		*type =	V4L2_CTRL_TYPE_REGION;
+		*flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY | V4L2_CTRL_FLAG_HAS_WHICH_MIN_MAX;
+		break;
+	case V4L2_CID_MPEG_VIDEO_ROI_MAP:
+		*type = V4L2_CTRL_TYPE_INTEGER;
+		*flags |= V4L2_CTRL_FLAG_DYNAMIC_ARRAY | V4L2_CTRL_FLAG_HAS_WHICH_MIN_MAX;
+		break;
+	case V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE:
+		*type = V4L2_CTRL_TYPE_AREA;
+		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
+		break;
 	case V4L2_CID_PIXEL_RATE:
 		*type = V4L2_CTRL_TYPE_INTEGER64;
 		*flags |= V4L2_CTRL_FLAG_READ_ONLY;
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 974fd254e573..169a676fd64c 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -900,6 +900,17 @@  enum v4l2_mpeg_video_av1_level {
 
 #define V4L2_CID_MPEG_VIDEO_AVERAGE_QP  (V4L2_CID_CODEC_BASE + 657)
 
+enum v4l2_mpeg_video_roi_mode {
+	V4L2_MPEG_VIDEO_ROI_MODE_NONE,
+	V4L2_MPEG_VIDEO_ROI_MODE_RECT,
+	V4L2_MPEG_VIDEO_ROI_MODE_MAP
+};
+
+#define V4L2_CID_MPEG_VIDEO_ROI_MODE		(V4L2_CID_CODEC_BASE + 658)
+#define V4L2_CID_MPEG_VIDEO_ROI_RECT		(V4L2_CID_CODEC_BASE + 659)
+#define V4L2_CID_MPEG_VIDEO_ROI_MAP		(V4L2_CID_CODEC_BASE + 660)
+#define V4L2_CID_MPEG_VIDEO_ROI_MAP_BLOCK_SIZE	(V4L2_CID_CODEC_BASE + 661)
+
 /*  MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */
 #define V4L2_CID_CODEC_CX2341X_BASE				(V4L2_CTRL_CLASS_CODEC | 0x1000)
 #define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE		(V4L2_CID_CODEC_CX2341X_BASE+0)