diff mbox

[RFC,RESEND,03/11] vb2: Move cache synchronisation from buffer done to dqbuf handler

Message ID 1441972234-8643-4-git-send-email-sakari.ailus@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sakari Ailus Sept. 11, 2015, 11:50 a.m. UTC
The cache synchronisation may be a time consuming operation and thus not
best performed in an interrupt which is a typical context for
vb2_buffer_done() calls. This may consume up to tens of ms on some
machines, depending on the buffer size.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
---
 drivers/media/v4l2-core/videobuf2-core.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

Comments

Hans Verkuil Sept. 11, 2015, 4:25 p.m. UTC | #1
On 09/11/2015 01:50 PM, Sakari Ailus wrote:
> The cache synchronisation may be a time consuming operation and thus not
> best performed in an interrupt which is a typical context for
> vb2_buffer_done() calls. This may consume up to tens of ms on some
> machines, depending on the buffer size.
> 
> Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
> ---
>  drivers/media/v4l2-core/videobuf2-core.c | 20 ++++++++++----------
>  1 file changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
> index 64fce4d..c5c0707a 100644
> --- a/drivers/media/v4l2-core/videobuf2-core.c
> +++ b/drivers/media/v4l2-core/videobuf2-core.c
> @@ -1177,7 +1177,6 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
>  {
>  	struct vb2_queue *q = vb->vb2_queue;
>  	unsigned long flags;
> -	unsigned int plane;
>  
>  	if (WARN_ON(vb->state != VB2_BUF_STATE_ACTIVE))
>  		return;
> @@ -1197,10 +1196,6 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
>  	dprintk(4, "done processing on buffer %d, state: %d\n",
>  			vb->v4l2_buf.index, state);
>  
> -	/* sync buffers */
> -	for (plane = 0; plane < vb->num_planes; ++plane)
> -		call_void_memop(vb, finish, vb->planes[plane].mem_priv);
> -

Ah, OK, so it is removed here,

>  	/* Add the buffer to the done buffers list */
>  	spin_lock_irqsave(&q->done_lock, flags);
>  	vb->state = state;
> @@ -2086,7 +2081,7 @@ EXPORT_SYMBOL_GPL(vb2_wait_for_all_buffers);
>  static void __vb2_dqbuf(struct vb2_buffer *vb)
>  {
>  	struct vb2_queue *q = vb->vb2_queue;
> -	unsigned int i;
> +	unsigned int plane;
>  
>  	/* nothing to do if the buffer is already dequeued */
>  	if (vb->state == VB2_BUF_STATE_DEQUEUED)
> @@ -2094,13 +2089,18 @@ static void __vb2_dqbuf(struct vb2_buffer *vb)
>  
>  	vb->state = VB2_BUF_STATE_DEQUEUED;
>  
> +	/* sync buffers */
> +	for (plane = 0; plane < vb->num_planes; plane++)
> +		call_void_memop(vb, finish, vb->planes[plane].mem_priv);
> +

to here.

I'm not sure if this is correct... So __vb2_dqbuf is called from __vb2_queue_cancel(),
but now the buf_finish() callback is called *before* the memop finish() callback,
where this was the other way around in __vb2_queue_cancel(). I don't think that is
right since buf_finish() expects that the buffer is synced for the cpu.

Was this tested with CONFIG_VIDEO_ADV_DEBUG set and with 'v4l2-compliance -s'?
Not that that would help if things are done in the wrong order...

Regards,

	Hans

>  	/* unmap DMABUF buffer */
>  	if (q->memory == V4L2_MEMORY_DMABUF)
> -		for (i = 0; i < vb->num_planes; ++i) {
> -			if (!vb->planes[i].dbuf_mapped)
> +		for (plane = 0; plane < vb->num_planes; ++plane) {
> +			if (!vb->planes[plane].dbuf_mapped)
>  				continue;
> -			call_void_memop(vb, unmap_dmabuf, vb->planes[i].mem_priv);
> -			vb->planes[i].dbuf_mapped = 0;
> +			call_void_memop(vb, unmap_dmabuf,
> +					vb->planes[plane].mem_priv);
> +			vb->planes[plane].dbuf_mapped = 0;
>  		}
>  }
>  
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-media" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sakari Ailus Sept. 15, 2015, 7:51 a.m. UTC | #2
Hi Hans,

Thank you for the review!

Hans Verkuil wrote:
> On 09/11/2015 01:50 PM, Sakari Ailus wrote:
>> The cache synchronisation may be a time consuming operation and thus not
>> best performed in an interrupt which is a typical context for
>> vb2_buffer_done() calls. This may consume up to tens of ms on some
>> machines, depending on the buffer size.
>>
>> Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
>> ---
>>  drivers/media/v4l2-core/videobuf2-core.c | 20 ++++++++++----------
>>  1 file changed, 10 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
>> index 64fce4d..c5c0707a 100644
>> --- a/drivers/media/v4l2-core/videobuf2-core.c
>> +++ b/drivers/media/v4l2-core/videobuf2-core.c
>> @@ -1177,7 +1177,6 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
>>  {
>>  	struct vb2_queue *q = vb->vb2_queue;
>>  	unsigned long flags;
>> -	unsigned int plane;
>>  
>>  	if (WARN_ON(vb->state != VB2_BUF_STATE_ACTIVE))
>>  		return;
>> @@ -1197,10 +1196,6 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
>>  	dprintk(4, "done processing on buffer %d, state: %d\n",
>>  			vb->v4l2_buf.index, state);
>>  
>> -	/* sync buffers */
>> -	for (plane = 0; plane < vb->num_planes; ++plane)
>> -		call_void_memop(vb, finish, vb->planes[plane].mem_priv);
>> -
> 
> Ah, OK, so it is removed here,

I can merge the two patches for the next version if you prefer that.

> 
>>  	/* Add the buffer to the done buffers list */
>>  	spin_lock_irqsave(&q->done_lock, flags);
>>  	vb->state = state;
>> @@ -2086,7 +2081,7 @@ EXPORT_SYMBOL_GPL(vb2_wait_for_all_buffers);
>>  static void __vb2_dqbuf(struct vb2_buffer *vb)
>>  {
>>  	struct vb2_queue *q = vb->vb2_queue;
>> -	unsigned int i;
>> +	unsigned int plane;
>>  
>>  	/* nothing to do if the buffer is already dequeued */
>>  	if (vb->state == VB2_BUF_STATE_DEQUEUED)
>> @@ -2094,13 +2089,18 @@ static void __vb2_dqbuf(struct vb2_buffer *vb)
>>  
>>  	vb->state = VB2_BUF_STATE_DEQUEUED;
>>  
>> +	/* sync buffers */
>> +	for (plane = 0; plane < vb->num_planes; plane++)
>> +		call_void_memop(vb, finish, vb->planes[plane].mem_priv);
>> +
> 
> to here.
> 
> I'm not sure if this is correct... So __vb2_dqbuf is called from __vb2_queue_cancel(),
> but now the buf_finish() callback is called *before* the memop finish() callback,
> where this was the other way around in __vb2_queue_cancel(). I don't think that is
> right since buf_finish() expects that the buffer is synced for the cpu.

I don't mind reordering them. The vb->state will be different as
__vb2_dqbuf() has already been called, there's at least one buffer state
check in a driver. However, __vb2_dqbuf() unconditionally sets the
buffer state to DEQUEUED, overriding e.g. ERROR which a driver would be
interested in.

I think the cache sync needs to be moved out of __vb2_dqbuf() to the
same level where it's called so proper ordering can be maintained while
still flushing cache before buf_finish() is called.

> 
> Was this tested with CONFIG_VIDEO_ADV_DEBUG set and with 'v4l2-compliance -s'?
> Not that that would help if things are done in the wrong order...

I'll do that the next time.
Laurent Pinchart Dec. 15, 2016, 11:47 p.m. UTC | #3
Hi Sakari,

Thank you for the patch.

On Tuesday 15 Sep 2015 10:51:10 Sakari Ailus wrote:
> Hans Verkuil wrote:
> > On 09/11/2015 01:50 PM, Sakari Ailus wrote:
> >> The cache synchronisation may be a time consuming operation and thus not
> >> best performed in an interrupt which is a typical context for
> >> vb2_buffer_done() calls. This may consume up to tens of ms on some
> >> machines, depending on the buffer size.
> >> 
> >> Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
> >> ---
> >> 
> >>  drivers/media/v4l2-core/videobuf2-core.c | 20 ++++++++++----------
> >>  1 file changed, 10 insertions(+), 10 deletions(-)
> >> 
> >> diff --git a/drivers/media/v4l2-core/videobuf2-core.c
> >> b/drivers/media/v4l2-core/videobuf2-core.c index 64fce4d..c5c0707a
> >> 100644
> >> --- a/drivers/media/v4l2-core/videobuf2-core.c
> >> +++ b/drivers/media/v4l2-core/videobuf2-core.c
> >> @@ -1177,7 +1177,6 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum
> >> vb2_buffer_state state)
> >>  {
> >>  	struct vb2_queue *q = vb->vb2_queue;
> >>  	unsigned long flags;
> >> -	unsigned int plane;
> >> 
> >>  	if (WARN_ON(vb->state != VB2_BUF_STATE_ACTIVE))
> >>  		return;
> >> @@ -1197,10 +1196,6 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum
> >> vb2_buffer_state state)
> >>  	dprintk(4, "done processing on buffer %d, state: %d\n",
> >>  			vb->v4l2_buf.index, state);
> >> 
> >> -	/* sync buffers */
> >> -	for (plane = 0; plane < vb->num_planes; ++plane)
> >> -		call_void_memop(vb, finish, vb->planes[plane].mem_priv);
> >> -
> > 
> > Ah, OK, so it is removed here,
> 
> I can merge the two patches for the next version if you prefer that.

I think that would be a good idea. They're both pretty small, and they're 
related. Merging them will make review easier.

> >>  	/* Add the buffer to the done buffers list */
> >>  	spin_lock_irqsave(&q->done_lock, flags);
> >>  	vb->state = state;
> >> 
> >> @@ -2086,7 +2081,7 @@ EXPORT_SYMBOL_GPL(vb2_wait_for_all_buffers);
> >>  static void __vb2_dqbuf(struct vb2_buffer *vb)
> >>  {
> >>  	struct vb2_queue *q = vb->vb2_queue;
> >> -	unsigned int i;
> >> +	unsigned int plane;
> >> 
> >>  	/* nothing to do if the buffer is already dequeued */
> >>  	if (vb->state == VB2_BUF_STATE_DEQUEUED)
> >> @@ -2094,13 +2089,18 @@ static void __vb2_dqbuf(struct vb2_buffer *vb)
> >> 
> >>  	vb->state = VB2_BUF_STATE_DEQUEUED;
> >> 
> >> +	/* sync buffers */
> >> +	for (plane = 0; plane < vb->num_planes; plane++)
> >> +		call_void_memop(vb, finish, vb->planes[plane].mem_priv);
> >> +
> > 
> > to here.
> > 
> > I'm not sure if this is correct... So __vb2_dqbuf is called from
> > __vb2_queue_cancel(), but now the buf_finish() callback is called
> > *before* the memop finish() callback, where this was the other way around
> > in __vb2_queue_cancel(). I don't think that is right since buf_finish()
> > expects that the buffer is synced for the cpu.
>
> I don't mind reordering them.

The .buf_finish() driver callback could touch the contents of the buffer using 
the CPU, so I agree with Hans that we need to reorder the calls.

> The vb->state will be different as __vb2_dqbuf() has already been called,
> there's at least one buffer state check in a driver. However, __vb2_dqbuf()
> unconditionally sets the buffer state to DEQUEUED, overriding e.g. ERROR
> which a driver would be interested in.
> 
> I think the cache sync needs to be moved out of __vb2_dqbuf() to the
> same level where it's called so proper ordering can be maintained while
> still flushing cache before buf_finish() is called.

I think that would be the easiest option. It's a bit of a shame to duplicate 
the call, but I don't see any other easy way. A comment that states that cache 
sync needs to occur before .buf_finish() would probably be useful.

> > Was this tested with CONFIG_VIDEO_ADV_DEBUG set and with 'v4l2-compliance
> > -s'? Not that that would help if things are done in the wrong order...
> 
> I'll do that the next time.
diff mbox

Patch

diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index 64fce4d..c5c0707a 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -1177,7 +1177,6 @@  void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
 {
 	struct vb2_queue *q = vb->vb2_queue;
 	unsigned long flags;
-	unsigned int plane;
 
 	if (WARN_ON(vb->state != VB2_BUF_STATE_ACTIVE))
 		return;
@@ -1197,10 +1196,6 @@  void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
 	dprintk(4, "done processing on buffer %d, state: %d\n",
 			vb->v4l2_buf.index, state);
 
-	/* sync buffers */
-	for (plane = 0; plane < vb->num_planes; ++plane)
-		call_void_memop(vb, finish, vb->planes[plane].mem_priv);
-
 	/* Add the buffer to the done buffers list */
 	spin_lock_irqsave(&q->done_lock, flags);
 	vb->state = state;
@@ -2086,7 +2081,7 @@  EXPORT_SYMBOL_GPL(vb2_wait_for_all_buffers);
 static void __vb2_dqbuf(struct vb2_buffer *vb)
 {
 	struct vb2_queue *q = vb->vb2_queue;
-	unsigned int i;
+	unsigned int plane;
 
 	/* nothing to do if the buffer is already dequeued */
 	if (vb->state == VB2_BUF_STATE_DEQUEUED)
@@ -2094,13 +2089,18 @@  static void __vb2_dqbuf(struct vb2_buffer *vb)
 
 	vb->state = VB2_BUF_STATE_DEQUEUED;
 
+	/* sync buffers */
+	for (plane = 0; plane < vb->num_planes; plane++)
+		call_void_memop(vb, finish, vb->planes[plane].mem_priv);
+
 	/* unmap DMABUF buffer */
 	if (q->memory == V4L2_MEMORY_DMABUF)
-		for (i = 0; i < vb->num_planes; ++i) {
-			if (!vb->planes[i].dbuf_mapped)
+		for (plane = 0; plane < vb->num_planes; ++plane) {
+			if (!vb->planes[plane].dbuf_mapped)
 				continue;
-			call_void_memop(vb, unmap_dmabuf, vb->planes[i].mem_priv);
-			vb->planes[i].dbuf_mapped = 0;
+			call_void_memop(vb, unmap_dmabuf,
+					vb->planes[plane].mem_priv);
+			vb->planes[plane].dbuf_mapped = 0;
 		}
 }