diff mbox

[RFC,RESEND,04/11] v4l: Unify cache management hint buffer flags

Message ID 1441972234-8643-5-git-send-email-sakari.ailus@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sakari Ailus Sept. 11, 2015, 11:50 a.m. UTC
The V4L2_BUF_FLAG_NO_CACHE_INVALIDATE and V4L2_BUF_FLAG_NO_CACHE_CLEAN
buffer flags are currently not used by the kernel. Replace the definitions
by a single V4L2_BUF_FLAG_NO_CACHE_SYNC flag to be used by further
patches.

Different cache architectures should not be visible to the user space
which can make no meaningful use of the differences anyway. In case a
device can make use of non-coherent memory accesses, the necessary cache
operations depend on the CPU architecture and the buffer type, not the
requests of the user. The cache operation itself may be skipped on the
user's request which was the purpose of the two flags.

On ARM the invalidate and clean are separate operations whereas on
x86(-64) the two are a single operation (flush). Whether the hardware uses
the buffer for reading (V4L2_BUF_TYPE_*_OUTPUT*) or writing
(V4L2_BUF_TYPE_*CAPTURE*) already defines the required cache operation
(clean and invalidate, respectively). No user input is required.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
---
 Documentation/DocBook/media/v4l/io.xml | 25 +++++++++++--------------
 include/trace/events/v4l2.h            |  3 +--
 include/uapi/linux/videodev2.h         |  7 +++++--
 3 files changed, 17 insertions(+), 18 deletions(-)

Comments

Hans Verkuil Sept. 11, 2015, 4:26 p.m. UTC | #1
On 09/11/2015 01:50 PM, Sakari Ailus wrote:
> The V4L2_BUF_FLAG_NO_CACHE_INVALIDATE and V4L2_BUF_FLAG_NO_CACHE_CLEAN
> buffer flags are currently not used by the kernel. Replace the definitions
> by a single V4L2_BUF_FLAG_NO_CACHE_SYNC flag to be used by further
> patches.
> 
> Different cache architectures should not be visible to the user space
> which can make no meaningful use of the differences anyway. In case a
> device can make use of non-coherent memory accesses, the necessary cache
> operations depend on the CPU architecture and the buffer type, not the
> requests of the user. The cache operation itself may be skipped on the
> user's request which was the purpose of the two flags.
> 
> On ARM the invalidate and clean are separate operations whereas on
> x86(-64) the two are a single operation (flush). Whether the hardware uses
> the buffer for reading (V4L2_BUF_TYPE_*_OUTPUT*) or writing
> (V4L2_BUF_TYPE_*CAPTURE*) already defines the required cache operation
> (clean and invalidate, respectively). No user input is required.
> 
> Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>

Acked-by: Hans Verkuil <hans.verkuil@cisco.com>

> ---
>  Documentation/DocBook/media/v4l/io.xml | 25 +++++++++++--------------
>  include/trace/events/v4l2.h            |  3 +--
>  include/uapi/linux/videodev2.h         |  7 +++++--
>  3 files changed, 17 insertions(+), 18 deletions(-)
> 
> diff --git a/Documentation/DocBook/media/v4l/io.xml b/Documentation/DocBook/media/v4l/io.xml
> index 7bbc2a4..4facd63 100644
> --- a/Documentation/DocBook/media/v4l/io.xml
> +++ b/Documentation/DocBook/media/v4l/io.xml
> @@ -1112,21 +1112,18 @@ application. Drivers set or clear this flag when the
>  	  linkend="vidioc-qbuf">VIDIOC_DQBUF</link> ioctl is called.</entry>
>  	  </row>
>  	  <row>
> -	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_INVALIDATE</constant></entry>
> +	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_SYNC</constant></entry>
>  	    <entry>0x00000800</entry>
> -	    <entry>Caches do not have to be invalidated for this buffer.
> -Typically applications shall use this flag if the data captured in the buffer
> -is not going to be touched by the CPU, instead the buffer will, probably, be
> -passed on to a DMA-capable hardware unit for further processing or output.
> -</entry>
> -	  </row>
> -	  <row>
> -	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_CLEAN</constant></entry>
> -	    <entry>0x00001000</entry>
> -	    <entry>Caches do not have to be cleaned for this buffer.
> -Typically applications shall use this flag for output buffers if the data
> -in this buffer has not been created by the CPU but by some DMA-capable unit,
> -in which case caches have not been used.</entry>
> +	    <entry>Do not perform CPU cache synchronisation operations
> +	    when the buffer is queued or dequeued. The user is
> +	    responsible for the correct use of this flag. It should be
> +	    only used when the buffer is not accessed using the CPU,
> +	    e.g. the buffer is written to by a hardware block and then
> +	    read by another one, in which case the flag should be set
> +	    in both <link linkend="vidioc-qbuf">VIDIOC_DQBUF</link>
> +	    and <link linkend="vidioc-qbuf">VIDIOC_QBUF</link> IOCTLs.
> +	    The flag has no effect on some devices / architectures.
> +	    </entry>
>  	  </row>
>  	  <row>
>  	    <entry><constant>V4L2_BUF_FLAG_LAST</constant></entry>
> diff --git a/include/trace/events/v4l2.h b/include/trace/events/v4l2.h
> index dbf017b..4cee91d 100644
> --- a/include/trace/events/v4l2.h
> +++ b/include/trace/events/v4l2.h
> @@ -78,8 +78,7 @@ SHOW_FIELD
>  		{ V4L2_BUF_FLAG_ERROR,		     "ERROR" },		      \
>  		{ V4L2_BUF_FLAG_TIMECODE,	     "TIMECODE" },	      \
>  		{ V4L2_BUF_FLAG_PREPARED,	     "PREPARED" },	      \
> -		{ V4L2_BUF_FLAG_NO_CACHE_INVALIDATE, "NO_CACHE_INVALIDATE" }, \
> -		{ V4L2_BUF_FLAG_NO_CACHE_CLEAN,	     "NO_CACHE_CLEAN" },      \
> +		{ V4L2_BUF_FLAG_NO_CACHE_SYNC,	     "NO_CACHE_SYNC" },	      \
>  		{ V4L2_BUF_FLAG_TIMESTAMP_MASK,	     "TIMESTAMP_MASK" },      \
>  		{ V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN,   "TIMESTAMP_UNKNOWN" },   \
>  		{ V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC, "TIMESTAMP_MONOTONIC" }, \
> diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
> index 3228fbe..8d85aac 100644
> --- a/include/uapi/linux/videodev2.h
> +++ b/include/uapi/linux/videodev2.h
> @@ -875,8 +875,11 @@ struct v4l2_buffer {
>  #define V4L2_BUF_FLAG_TIMECODE			0x00000100
>  /* Buffer is prepared for queuing */
>  #define V4L2_BUF_FLAG_PREPARED			0x00000400
> -/* Cache handling flags */
> -#define V4L2_BUF_FLAG_NO_CACHE_INVALIDATE	0x00000800
> +/* Cache sync hint */
> +#define V4L2_BUF_FLAG_NO_CACHE_SYNC		0x00000800
> +/* DEPRECATED. THIS WILL BE REMOVED IN THE FUTURE! */
> +#define V4L2_BUF_FLAG_NO_CACHE_INVALIDATE	V4L2_BUF_FLAG_NO_CACHE_SYNC
> +/* DEPRECATED. THIS WILL BE REMOVED IN THE FUTURE! */
>  #define V4L2_BUF_FLAG_NO_CACHE_CLEAN		0x00001000
>  /* Timestamp type */
>  #define V4L2_BUF_FLAG_TIMESTAMP_MASK		0x0000e000
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hans Verkuil Sept. 11, 2015, 4:44 p.m. UTC | #2
On 09/11/2015 06:26 PM, Hans Verkuil wrote:
> On 09/11/2015 01:50 PM, Sakari Ailus wrote:
>> The V4L2_BUF_FLAG_NO_CACHE_INVALIDATE and V4L2_BUF_FLAG_NO_CACHE_CLEAN
>> buffer flags are currently not used by the kernel. Replace the definitions
>> by a single V4L2_BUF_FLAG_NO_CACHE_SYNC flag to be used by further
>> patches.
>>
>> Different cache architectures should not be visible to the user space
>> which can make no meaningful use of the differences anyway. In case a
>> device can make use of non-coherent memory accesses, the necessary cache
>> operations depend on the CPU architecture and the buffer type, not the
>> requests of the user. The cache operation itself may be skipped on the
>> user's request which was the purpose of the two flags.
>>
>> On ARM the invalidate and clean are separate operations whereas on
>> x86(-64) the two are a single operation (flush). Whether the hardware uses
>> the buffer for reading (V4L2_BUF_TYPE_*_OUTPUT*) or writing
>> (V4L2_BUF_TYPE_*CAPTURE*) already defines the required cache operation
>> (clean and invalidate, respectively). No user input is required.
>>
>> Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
> 
> Acked-by: Hans Verkuil <hans.verkuil@cisco.com>

Let me add something here: drivers should be able to override this flag.

There are drivers that need to do fixups in buf_finish (a typical example is to
fill in some JPEG or MPEG header). Obviously this does require syncing to the
cpu.

This is actually a bigger problem since trying to do this with dmabuf can fail
as well. I made patches for that, but I never got around to finishing it:

http://git.linuxtv.org/cgit.cgi/hverkuil/media_tree.git/log/?h=vb2-prep5

Basically this changes drivers so they are explicit when they need cpu access
to a buffer.

By using these begin/end_cpu_access functions you can still implement fixups
correctly, although it does mean that the NO_CACHE_SYNC is effectively (but
unavoidably) ignored.

I think my patch series should be merged with this patch series 

The cover letter I wrote for the RFC patch series containing these cpu access
functions is here:

http://comments.gmane.org/gmane.linux.drivers.video-input-infrastructure/84399

I quote:

"The last 5 patches make this more strict by requiring all cpu access to
be bracketed by calls to vb2_plane_begin/end_cpu_access() which replaces
the old vb2_plane_vaddr() call.

Note: two drivers still use the vb2_plane_vaddr() call: coda and
exynos4-is/fimc-capture.c. For both drivers I will need some help since
I am not sure where to put the begin/end calls. Patch 15 removes
the vb2_plane_vaddr call, so obviously those two drivers won't compile
after that."

So that's the missing part that I never got around to.

Of course, there are new drivers as well since the last time I posted it,
so there may be more to do now.

I also said:

"The issues that patches 12-16 address are [...] only an issue when using
dmabuf with drivers that need cpu access."

But it will *also* be an issue once this new NO_CACHE_SYNC flag is honored.

Regards,

	Hans

> 
>> ---
>>  Documentation/DocBook/media/v4l/io.xml | 25 +++++++++++--------------
>>  include/trace/events/v4l2.h            |  3 +--
>>  include/uapi/linux/videodev2.h         |  7 +++++--
>>  3 files changed, 17 insertions(+), 18 deletions(-)
>>
>> diff --git a/Documentation/DocBook/media/v4l/io.xml b/Documentation/DocBook/media/v4l/io.xml
>> index 7bbc2a4..4facd63 100644
>> --- a/Documentation/DocBook/media/v4l/io.xml
>> +++ b/Documentation/DocBook/media/v4l/io.xml
>> @@ -1112,21 +1112,18 @@ application. Drivers set or clear this flag when the
>>  	  linkend="vidioc-qbuf">VIDIOC_DQBUF</link> ioctl is called.</entry>
>>  	  </row>
>>  	  <row>
>> -	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_INVALIDATE</constant></entry>
>> +	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_SYNC</constant></entry>
>>  	    <entry>0x00000800</entry>
>> -	    <entry>Caches do not have to be invalidated for this buffer.
>> -Typically applications shall use this flag if the data captured in the buffer
>> -is not going to be touched by the CPU, instead the buffer will, probably, be
>> -passed on to a DMA-capable hardware unit for further processing or output.
>> -</entry>
>> -	  </row>
>> -	  <row>
>> -	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_CLEAN</constant></entry>
>> -	    <entry>0x00001000</entry>
>> -	    <entry>Caches do not have to be cleaned for this buffer.
>> -Typically applications shall use this flag for output buffers if the data
>> -in this buffer has not been created by the CPU but by some DMA-capable unit,
>> -in which case caches have not been used.</entry>
>> +	    <entry>Do not perform CPU cache synchronisation operations
>> +	    when the buffer is queued or dequeued. The user is
>> +	    responsible for the correct use of this flag. It should be
>> +	    only used when the buffer is not accessed using the CPU,
>> +	    e.g. the buffer is written to by a hardware block and then
>> +	    read by another one, in which case the flag should be set
>> +	    in both <link linkend="vidioc-qbuf">VIDIOC_DQBUF</link>
>> +	    and <link linkend="vidioc-qbuf">VIDIOC_QBUF</link> IOCTLs.
>> +	    The flag has no effect on some devices / architectures.
>> +	    </entry>
>>  	  </row>
>>  	  <row>
>>  	    <entry><constant>V4L2_BUF_FLAG_LAST</constant></entry>
>> diff --git a/include/trace/events/v4l2.h b/include/trace/events/v4l2.h
>> index dbf017b..4cee91d 100644
>> --- a/include/trace/events/v4l2.h
>> +++ b/include/trace/events/v4l2.h
>> @@ -78,8 +78,7 @@ SHOW_FIELD
>>  		{ V4L2_BUF_FLAG_ERROR,		     "ERROR" },		      \
>>  		{ V4L2_BUF_FLAG_TIMECODE,	     "TIMECODE" },	      \
>>  		{ V4L2_BUF_FLAG_PREPARED,	     "PREPARED" },	      \
>> -		{ V4L2_BUF_FLAG_NO_CACHE_INVALIDATE, "NO_CACHE_INVALIDATE" }, \
>> -		{ V4L2_BUF_FLAG_NO_CACHE_CLEAN,	     "NO_CACHE_CLEAN" },      \
>> +		{ V4L2_BUF_FLAG_NO_CACHE_SYNC,	     "NO_CACHE_SYNC" },	      \
>>  		{ V4L2_BUF_FLAG_TIMESTAMP_MASK,	     "TIMESTAMP_MASK" },      \
>>  		{ V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN,   "TIMESTAMP_UNKNOWN" },   \
>>  		{ V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC, "TIMESTAMP_MONOTONIC" }, \
>> diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
>> index 3228fbe..8d85aac 100644
>> --- a/include/uapi/linux/videodev2.h
>> +++ b/include/uapi/linux/videodev2.h
>> @@ -875,8 +875,11 @@ struct v4l2_buffer {
>>  #define V4L2_BUF_FLAG_TIMECODE			0x00000100
>>  /* Buffer is prepared for queuing */
>>  #define V4L2_BUF_FLAG_PREPARED			0x00000400
>> -/* Cache handling flags */
>> -#define V4L2_BUF_FLAG_NO_CACHE_INVALIDATE	0x00000800
>> +/* Cache sync hint */
>> +#define V4L2_BUF_FLAG_NO_CACHE_SYNC		0x00000800
>> +/* DEPRECATED. THIS WILL BE REMOVED IN THE FUTURE! */
>> +#define V4L2_BUF_FLAG_NO_CACHE_INVALIDATE	V4L2_BUF_FLAG_NO_CACHE_SYNC
>> +/* DEPRECATED. THIS WILL BE REMOVED IN THE FUTURE! */
>>  #define V4L2_BUF_FLAG_NO_CACHE_CLEAN		0x00001000
>>  /* Timestamp type */
>>  #define V4L2_BUF_FLAG_TIMESTAMP_MASK		0x0000e000
>>
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-media" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Laurent Pinchart Dec. 15, 2016, 8:15 p.m. UTC | #3
Hi Sakari,

Thank you for the patch.

On Friday 11 Sep 2015 14:50:27 Sakari Ailus wrote:
> The V4L2_BUF_FLAG_NO_CACHE_INVALIDATE and V4L2_BUF_FLAG_NO_CACHE_CLEAN
> buffer flags are currently not used by the kernel. Replace the definitions
> by a single V4L2_BUF_FLAG_NO_CACHE_SYNC flag to be used by further
> patches.
> 
> Different cache architectures should not be visible to the user space
> which can make no meaningful use of the differences anyway. In case a
> device can make use of non-coherent memory accesses, the necessary cache
> operations depend on the CPU architecture and the buffer type, not the
> requests of the user. The cache operation itself may be skipped on the
> user's request which was the purpose of the two flags.
> 
> On ARM the invalidate and clean are separate operations whereas on
> x86(-64) the two are a single operation (flush). Whether the hardware uses
> the buffer for reading (V4L2_BUF_TYPE_*_OUTPUT*) or writing
> (V4L2_BUF_TYPE_*CAPTURE*) already defines the required cache operation
> (clean and invalidate, respectively). No user input is required.

We need to perform the following operations.

	| QBUF		| DQBUF
-----------------------------------------------
CAPTURE	| Invalidate	| Invalidate (*)
OUTPUT	| Clean		| -

(*) for systems using speculative pre-fetching only.

The following optimizations are possible:

1. CAPTURE, the CPU has not written to the buffer before QBUF

Cache invalidation can be skipped at QBUF time, but becomes required at DQBUF 
time on all systems, regardless of whether they use speculative prefetching.

2. CAPTURE, the CPU will not read from the buffer after DQBUF

Cache invalidation can be skipped at DQBUF time.

3. CAPTURE, combination of (1) and (2)

Cache invalidation can be skipped at both QBUF and DQBUF time.

4. OUTPUT, the CPU has not written to the buffer before QBUF

Cache clean can be skipped at QBUF time.


A single flag can cover all cases, provided we keep track of the flag being 
set at QBUF time to force cache invalidation at DQBUF time for case (1) if the 
flag isn't set at DQBUF time.

One issue is that cache invalidation at DQBUF time for CAPTURE buffers isn't 
fully under the control of videobuf. We can instruct the DMA mapping API to 
skip cache handling, but we can't ask it to force cache invalidation in the 
sync_for_cpu operation for non speculative prefetching systems. On ARM32 the 
implementation currently always invalidates the cache in 
__dma_page_dev_to_cpu() for CAPTURE buffers so we're currently safe, but 
there's a FIXME comment that might lead to someone fixing the implementation 
in the future. I believe we'll have to fix this in the DMA mapping level, 
userspace shouldn't be affected.

Feel free to capture (part of) this explanation in the commit message to 
clarify your last paragraph.

> Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
> ---
>  Documentation/DocBook/media/v4l/io.xml | 25 +++++++++++--------------
>  include/trace/events/v4l2.h            |  3 +--
>  include/uapi/linux/videodev2.h         |  7 +++++--
>  3 files changed, 17 insertions(+), 18 deletions(-)
> 
> diff --git a/Documentation/DocBook/media/v4l/io.xml
> b/Documentation/DocBook/media/v4l/io.xml index 7bbc2a4..4facd63 100644
> --- a/Documentation/DocBook/media/v4l/io.xml
> +++ b/Documentation/DocBook/media/v4l/io.xml
> @@ -1112,21 +1112,18 @@ application. Drivers set or clear this flag when the
> linkend="vidioc-qbuf">VIDIOC_DQBUF</link> ioctl is called.</entry> </row>
>  	  <row>
> -	    
<entry><constant>V4L2_BUF_FLAG_NO_CACHE_INVALIDATE</constant></entry>
> +	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_SYNC</constant></entry>
>  	    <entry>0x00000800</entry>
> -	    <entry>Caches do not have to be invalidated for this buffer.
> -Typically applications shall use this flag if the data captured in the
> buffer -is not going to be touched by the CPU, instead the buffer will,
> probably, be -passed on to a DMA-capable hardware unit for further
> processing or output. -</entry>
> -	  </row>
> -	  <row>
> -	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_CLEAN</constant></entry>
> -	    <entry>0x00001000</entry>
> -	    <entry>Caches do not have to be cleaned for this buffer.
> -Typically applications shall use this flag for output buffers if the data
> -in this buffer has not been created by the CPU but by some DMA-capable
> unit, -in which case caches have not been used.</entry>
> +	    <entry>Do not perform CPU cache synchronisation operations
> +	    when the buffer is queued or dequeued. The user is
> +	    responsible for the correct use of this flag. It should be
> +	    only used when the buffer is not accessed using the CPU,
> +	    e.g. the buffer is written to by a hardware block and then
> +	    read by another one, in which case the flag should be set
> +	    in both <link linkend="vidioc-qbuf">VIDIOC_DQBUF</link>
> +	    and <link linkend="vidioc-qbuf">VIDIOC_QBUF</link> IOCTLs.

I'd like to word this differently. As explained above, there can be cases 
where the flag would only be set in either QBUF or DQBUF. I would prefer 
documenting the flag as a hint for the kernel that userspace has not written 
to the buffer before QBUF (when the flag is set for VIDIOC_QBUF) or will not 
read from the buffer after DQBUF (when the flag is set for VIDIOC_DQBUF) and 
that the kernel is free to perform the appropriate cache optimizations 
(without any guarantee).

> +	    The flag has no effect on some devices / architectures.
> +	    </entry>
>  	  </row>
>  	  <row>
>  	    <entry><constant>V4L2_BUF_FLAG_LAST</constant></entry>

[snip]
Sakari Ailus Dec. 17, 2016, 12:35 a.m. UTC | #4
Hi Laurent,

Thank you for the review.

On Thu, Dec 15, 2016 at 10:15:39PM +0200, Laurent Pinchart wrote:
> Hi Sakari,
> 
> Thank you for the patch.
> 
> On Friday 11 Sep 2015 14:50:27 Sakari Ailus wrote:
> > The V4L2_BUF_FLAG_NO_CACHE_INVALIDATE and V4L2_BUF_FLAG_NO_CACHE_CLEAN
> > buffer flags are currently not used by the kernel. Replace the definitions
> > by a single V4L2_BUF_FLAG_NO_CACHE_SYNC flag to be used by further
> > patches.
> > 
> > Different cache architectures should not be visible to the user space
> > which can make no meaningful use of the differences anyway. In case a
> > device can make use of non-coherent memory accesses, the necessary cache
> > operations depend on the CPU architecture and the buffer type, not the
> > requests of the user. The cache operation itself may be skipped on the
> > user's request which was the purpose of the two flags.
> > 
> > On ARM the invalidate and clean are separate operations whereas on
> > x86(-64) the two are a single operation (flush). Whether the hardware uses
> > the buffer for reading (V4L2_BUF_TYPE_*_OUTPUT*) or writing
> > (V4L2_BUF_TYPE_*CAPTURE*) already defines the required cache operation
> > (clean and invalidate, respectively). No user input is required.
> 
> We need to perform the following operations.
> 
> 	| QBUF		| DQBUF
> -----------------------------------------------
> CAPTURE	| Invalidate	| Invalidate (*)
> OUTPUT	| Clean		| -
> 
> (*) for systems using speculative pre-fetching only.
> 
> The following optimizations are possible:
> 
> 1. CAPTURE, the CPU has not written to the buffer before QBUF
> 
> Cache invalidation can be skipped at QBUF time, but becomes required at DQBUF 
> time on all systems, regardless of whether they use speculative prefetching.
> 
> 2. CAPTURE, the CPU will not read from the buffer after DQBUF
> 
> Cache invalidation can be skipped at DQBUF time.
> 
> 3. CAPTURE, combination of (1) and (2)
> 
> Cache invalidation can be skipped at both QBUF and DQBUF time.
> 
> 4. OUTPUT, the CPU has not written to the buffer before QBUF
> 
> Cache clean can be skipped at QBUF time.

Ack.

> 
> 
> A single flag can cover all cases, provided we keep track of the flag being 
> set at QBUF time to force cache invalidation at DQBUF time for case (1) if the 
> flag isn't set at DQBUF time.
> 
> One issue is that cache invalidation at DQBUF time for CAPTURE buffers isn't 
> fully under the control of videobuf. We can instruct the DMA mapping API to 
> skip cache handling, but we can't ask it to force cache invalidation in the 
> sync_for_cpu operation for non speculative prefetching systems. On ARM32 the 
> implementation currently always invalidates the cache in 
> __dma_page_dev_to_cpu() for CAPTURE buffers so we're currently safe, but 
> there's a FIXME comment that might lead to someone fixing the implementation 
> in the future. I believe we'll have to fix this in the DMA mapping level, 
> userspace shouldn't be affected.
> 
> Feel free to capture (part of) this explanation in the commit message to 
> clarify your last paragraph.
> 
> > Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
> > ---
> >  Documentation/DocBook/media/v4l/io.xml | 25 +++++++++++--------------
> >  include/trace/events/v4l2.h            |  3 +--
> >  include/uapi/linux/videodev2.h         |  7 +++++--
> >  3 files changed, 17 insertions(+), 18 deletions(-)
> > 
> > diff --git a/Documentation/DocBook/media/v4l/io.xml
> > b/Documentation/DocBook/media/v4l/io.xml index 7bbc2a4..4facd63 100644
> > --- a/Documentation/DocBook/media/v4l/io.xml
> > +++ b/Documentation/DocBook/media/v4l/io.xml
> > @@ -1112,21 +1112,18 @@ application. Drivers set or clear this flag when the
> > linkend="vidioc-qbuf">VIDIOC_DQBUF</link> ioctl is called.</entry> </row>
> >  	  <row>
> > -	    
> <entry><constant>V4L2_BUF_FLAG_NO_CACHE_INVALIDATE</constant></entry>
> > +	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_SYNC</constant></entry>
> >  	    <entry>0x00000800</entry>
> > -	    <entry>Caches do not have to be invalidated for this buffer.
> > -Typically applications shall use this flag if the data captured in the
> > buffer -is not going to be touched by the CPU, instead the buffer will,
> > probably, be -passed on to a DMA-capable hardware unit for further
> > processing or output. -</entry>
> > -	  </row>
> > -	  <row>
> > -	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_CLEAN</constant></entry>
> > -	    <entry>0x00001000</entry>
> > -	    <entry>Caches do not have to be cleaned for this buffer.
> > -Typically applications shall use this flag for output buffers if the data
> > -in this buffer has not been created by the CPU but by some DMA-capable
> > unit, -in which case caches have not been used.</entry>
> > +	    <entry>Do not perform CPU cache synchronisation operations
> > +	    when the buffer is queued or dequeued. The user is
> > +	    responsible for the correct use of this flag. It should be
> > +	    only used when the buffer is not accessed using the CPU,
> > +	    e.g. the buffer is written to by a hardware block and then
> > +	    read by another one, in which case the flag should be set
> > +	    in both <link linkend="vidioc-qbuf">VIDIOC_DQBUF</link>
> > +	    and <link linkend="vidioc-qbuf">VIDIOC_QBUF</link> IOCTLs.
> 
> I'd like to word this differently. As explained above, there can be cases 
> where the flag would only be set in either QBUF or DQBUF. I would prefer 
> documenting the flag as a hint for the kernel that userspace has not written 
> to the buffer before QBUF (when the flag is set for VIDIOC_QBUF) or will not 
> read from the buffer after DQBUF (when the flag is set for VIDIOC_DQBUF) and 
> that the kernel is free to perform the appropriate cache optimizations 
> (without any guarantee).

AFAIR that was my intention but I don't seem to have written that in a way
that could be deciphered later on. Sounds good to me. :-)

> 
> > +	    The flag has no effect on some devices / architectures.
> > +	    </entry>
> >  	  </row>
> >  	  <row>
> >  	    <entry><constant>V4L2_BUF_FLAG_LAST</constant></entry>
> 
> [snip]
>
diff mbox

Patch

diff --git a/Documentation/DocBook/media/v4l/io.xml b/Documentation/DocBook/media/v4l/io.xml
index 7bbc2a4..4facd63 100644
--- a/Documentation/DocBook/media/v4l/io.xml
+++ b/Documentation/DocBook/media/v4l/io.xml
@@ -1112,21 +1112,18 @@  application. Drivers set or clear this flag when the
 	  linkend="vidioc-qbuf">VIDIOC_DQBUF</link> ioctl is called.</entry>
 	  </row>
 	  <row>
-	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_INVALIDATE</constant></entry>
+	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_SYNC</constant></entry>
 	    <entry>0x00000800</entry>
-	    <entry>Caches do not have to be invalidated for this buffer.
-Typically applications shall use this flag if the data captured in the buffer
-is not going to be touched by the CPU, instead the buffer will, probably, be
-passed on to a DMA-capable hardware unit for further processing or output.
-</entry>
-	  </row>
-	  <row>
-	    <entry><constant>V4L2_BUF_FLAG_NO_CACHE_CLEAN</constant></entry>
-	    <entry>0x00001000</entry>
-	    <entry>Caches do not have to be cleaned for this buffer.
-Typically applications shall use this flag for output buffers if the data
-in this buffer has not been created by the CPU but by some DMA-capable unit,
-in which case caches have not been used.</entry>
+	    <entry>Do not perform CPU cache synchronisation operations
+	    when the buffer is queued or dequeued. The user is
+	    responsible for the correct use of this flag. It should be
+	    only used when the buffer is not accessed using the CPU,
+	    e.g. the buffer is written to by a hardware block and then
+	    read by another one, in which case the flag should be set
+	    in both <link linkend="vidioc-qbuf">VIDIOC_DQBUF</link>
+	    and <link linkend="vidioc-qbuf">VIDIOC_QBUF</link> IOCTLs.
+	    The flag has no effect on some devices / architectures.
+	    </entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_BUF_FLAG_LAST</constant></entry>
diff --git a/include/trace/events/v4l2.h b/include/trace/events/v4l2.h
index dbf017b..4cee91d 100644
--- a/include/trace/events/v4l2.h
+++ b/include/trace/events/v4l2.h
@@ -78,8 +78,7 @@  SHOW_FIELD
 		{ V4L2_BUF_FLAG_ERROR,		     "ERROR" },		      \
 		{ V4L2_BUF_FLAG_TIMECODE,	     "TIMECODE" },	      \
 		{ V4L2_BUF_FLAG_PREPARED,	     "PREPARED" },	      \
-		{ V4L2_BUF_FLAG_NO_CACHE_INVALIDATE, "NO_CACHE_INVALIDATE" }, \
-		{ V4L2_BUF_FLAG_NO_CACHE_CLEAN,	     "NO_CACHE_CLEAN" },      \
+		{ V4L2_BUF_FLAG_NO_CACHE_SYNC,	     "NO_CACHE_SYNC" },	      \
 		{ V4L2_BUF_FLAG_TIMESTAMP_MASK,	     "TIMESTAMP_MASK" },      \
 		{ V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN,   "TIMESTAMP_UNKNOWN" },   \
 		{ V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC, "TIMESTAMP_MONOTONIC" }, \
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 3228fbe..8d85aac 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -875,8 +875,11 @@  struct v4l2_buffer {
 #define V4L2_BUF_FLAG_TIMECODE			0x00000100
 /* Buffer is prepared for queuing */
 #define V4L2_BUF_FLAG_PREPARED			0x00000400
-/* Cache handling flags */
-#define V4L2_BUF_FLAG_NO_CACHE_INVALIDATE	0x00000800
+/* Cache sync hint */
+#define V4L2_BUF_FLAG_NO_CACHE_SYNC		0x00000800
+/* DEPRECATED. THIS WILL BE REMOVED IN THE FUTURE! */
+#define V4L2_BUF_FLAG_NO_CACHE_INVALIDATE	V4L2_BUF_FLAG_NO_CACHE_SYNC
+/* DEPRECATED. THIS WILL BE REMOVED IN THE FUTURE! */
 #define V4L2_BUF_FLAG_NO_CACHE_CLEAN		0x00001000
 /* Timestamp type */
 #define V4L2_BUF_FLAG_TIMESTAMP_MASK		0x0000e000