diff mbox series

[2/3] media: vim2m: use per-file handler work queue

Message ID 7ff2d5c791473c746ae07c012d1890c6bdd08f6d.1548776693.git.mchehab+samsung@kernel.org (mailing list archive)
State New, archived
Headers show
Series vim2m: make it work properly | expand

Commit Message

Mauro Carvalho Chehab Jan. 29, 2019, 4 p.m. UTC
It doesn't make sense to have a per-device work queue, as the
scheduler should be called per file handler. Having a single
one causes failures if multiple streams are filtered by vim2m.

So, move it to be inside the context structure.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/vim2m.c | 38 +++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 19 deletions(-)

Comments

Ezequiel Garcia Jan. 30, 2019, 12:41 p.m. UTC | #1
On Tue, 2019-01-29 at 14:00 -0200, Mauro Carvalho Chehab wrote:
> It doesn't make sense to have a per-device work queue, as the
> scheduler should be called per file handler. Having a single
> one causes failures if multiple streams are filtered by vim2m.
> 

Having a per-device workqueue should emulate a real device more closely.

But more importantly, why would having a single workqeueue fail if multiple
streams are run? The m2m should take care of the proper serialization
between multiple contexts, unless I am missing something here.

Thanks,
Eze

> So, move it to be inside the context structure.
> 
> Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
> ---
>  drivers/media/platform/vim2m.c | 38 +++++++++++++++++-----------------
>  1 file changed, 19 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/media/platform/vim2m.c b/drivers/media/platform/vim2m.c
> index ccd0576c766e..a9e43070567e 100644
> --- a/drivers/media/platform/vim2m.c
> +++ b/drivers/media/platform/vim2m.c
> @@ -146,9 +146,6 @@ struct vim2m_dev {
>  
>  	atomic_t		num_inst;
>  	struct mutex		dev_mutex;
> -	spinlock_t		irqlock;
> -
> -	struct delayed_work	work_run;
>  
>  	struct v4l2_m2m_dev	*m2m_dev;
>  };
> @@ -167,6 +164,10 @@ struct vim2m_ctx {
>  	/* Transaction time (i.e. simulated processing time) in milliseconds */
>  	u32			transtime;
>  
> +	struct mutex		vb_mutex;
> +	struct delayed_work	work_run;
> +	spinlock_t		irqlock;
> +
>  	/* Abort requested by m2m */
>  	int			aborting;
>  
> @@ -490,7 +491,6 @@ static void job_abort(void *priv)
>  static void device_run(void *priv)
>  {
>  	struct vim2m_ctx *ctx = priv;
> -	struct vim2m_dev *dev = ctx->dev;
>  	struct vb2_v4l2_buffer *src_buf, *dst_buf;
>  
>  	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
> @@ -507,18 +507,18 @@ static void device_run(void *priv)
>  				   &ctx->hdl);
>  
>  	/* Run delayed work, which simulates a hardware irq  */
> -	schedule_delayed_work(&dev->work_run, msecs_to_jiffies(ctx->transtime));
> +	schedule_delayed_work(&ctx->work_run, msecs_to_jiffies(ctx->transtime));
>  }
>  
>  static void device_work(struct work_struct *w)
>  {
> -	struct vim2m_dev *vim2m_dev =
> -		container_of(w, struct vim2m_dev, work_run.work);
>  	struct vim2m_ctx *curr_ctx;
> +	struct vim2m_dev *vim2m_dev;
>  	struct vb2_v4l2_buffer *src_vb, *dst_vb;
>  	unsigned long flags;
>  
> -	curr_ctx = v4l2_m2m_get_curr_priv(vim2m_dev->m2m_dev);
> +	curr_ctx = container_of(w, struct vim2m_ctx, work_run.work);
> +	vim2m_dev = curr_ctx->dev;
>  
>  	if (NULL == curr_ctx) {
>  		pr_err("Instance released before the end of transaction\n");
> @@ -530,10 +530,10 @@ static void device_work(struct work_struct *w)
>  
>  	curr_ctx->num_processed++;
>  
> -	spin_lock_irqsave(&vim2m_dev->irqlock, flags);
> +	spin_lock_irqsave(&curr_ctx->irqlock, flags);
>  	v4l2_m2m_buf_done(src_vb, VB2_BUF_STATE_DONE);
>  	v4l2_m2m_buf_done(dst_vb, VB2_BUF_STATE_DONE);
> -	spin_unlock_irqrestore(&vim2m_dev->irqlock, flags);
> +	spin_unlock_irqrestore(&curr_ctx->irqlock, flags);
>  
>  	if (curr_ctx->num_processed == curr_ctx->translen
>  	    || curr_ctx->aborting) {
> @@ -893,11 +893,10 @@ static int vim2m_start_streaming(struct vb2_queue *q, unsigned count)
>  static void vim2m_stop_streaming(struct vb2_queue *q)
>  {
>  	struct vim2m_ctx *ctx = vb2_get_drv_priv(q);
> -	struct vim2m_dev *dev = ctx->dev;
>  	struct vb2_v4l2_buffer *vbuf;
>  	unsigned long flags;
>  
> -	cancel_delayed_work_sync(&dev->work_run);
> +	cancel_delayed_work_sync(&ctx->work_run);
>  	for (;;) {
>  		if (V4L2_TYPE_IS_OUTPUT(q->type))
>  			vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> @@ -907,9 +906,9 @@ static void vim2m_stop_streaming(struct vb2_queue *q)
>  			return;
>  		v4l2_ctrl_request_complete(vbuf->vb2_buf.req_obj.req,
>  					   &ctx->hdl);
> -		spin_lock_irqsave(&ctx->dev->irqlock, flags);
> +		spin_lock_irqsave(&ctx->irqlock, flags);
>  		v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR);
> -		spin_unlock_irqrestore(&ctx->dev->irqlock, flags);
> +		spin_unlock_irqrestore(&ctx->irqlock, flags);
>  	}
>  }
>  
> @@ -943,7 +942,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *ds
>  	src_vq->ops = &vim2m_qops;
>  	src_vq->mem_ops = &vb2_vmalloc_memops;
>  	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> -	src_vq->lock = &ctx->dev->dev_mutex;
> +	src_vq->lock = &ctx->vb_mutex;
>  	src_vq->supports_requests = true;
>  
>  	ret = vb2_queue_init(src_vq);
> @@ -957,7 +956,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *ds
>  	dst_vq->ops = &vim2m_qops;
>  	dst_vq->mem_ops = &vb2_vmalloc_memops;
>  	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> -	dst_vq->lock = &ctx->dev->dev_mutex;
> +	dst_vq->lock = &ctx->vb_mutex;
>  
>  	return vb2_queue_init(dst_vq);
>  }
> @@ -1032,6 +1031,10 @@ static int vim2m_open(struct file *file)
>  
>  	ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx, &queue_init);
>  
> +	mutex_init(&ctx->vb_mutex);
> +	spin_lock_init(&ctx->irqlock);
> +	INIT_DELAYED_WORK(&ctx->work_run, device_work);
> +
>  	if (IS_ERR(ctx->fh.m2m_ctx)) {
>  		rc = PTR_ERR(ctx->fh.m2m_ctx);
>  
> @@ -1112,8 +1115,6 @@ static int vim2m_probe(struct platform_device *pdev)
>  	if (!dev)
>  		return -ENOMEM;
>  
> -	spin_lock_init(&dev->irqlock);
> -
>  	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
>  	if (ret)
>  		return ret;
> @@ -1125,7 +1126,6 @@ static int vim2m_probe(struct platform_device *pdev)
>  	vfd = &dev->vfd;
>  	vfd->lock = &dev->dev_mutex;
>  	vfd->v4l2_dev = &dev->v4l2_dev;
> -	INIT_DELAYED_WORK(&dev->work_run, device_work);
>  
>  	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
>  	if (ret) {
Mauro Carvalho Chehab Jan. 30, 2019, 1:19 p.m. UTC | #2
Em Wed, 30 Jan 2019 09:41:44 -0300
Ezequiel Garcia <ezequiel@collabora.com> escreveu:

> On Tue, 2019-01-29 at 14:00 -0200, Mauro Carvalho Chehab wrote:
> > It doesn't make sense to have a per-device work queue, as the
> > scheduler should be called per file handler. Having a single
> > one causes failures if multiple streams are filtered by vim2m.
> >   
> 
> Having a per-device workqueue should emulate a real device more closely.

Yes.

> But more importantly, why would having a single workqeueue fail if multiple
> streams are run? The m2m should take care of the proper serialization
> between multiple contexts, unless I am missing something here.

Yes, the m2m core serializes the access to m2m src/dest buffer per device.

So, two instances can't access the buffer at the same time. That makes
sense for a real physical hardware, although specifically for the virtual
one, it doesn't (any may not make sense for some stateless codec, if
the codec would internally be able to handle multiple requests at the same
time).

Without this patch, when multiple instances are used, sometimes it ends 
into a dead lock preventing to stop all of them.

I didn't have time to debug where exactly it happens, but I suspect that
the issue is related to using the same mutex for both VB and open/release
instances.

Yet, I ended by opting to move all queue-specific mutex/semaphore to be
instance-based, as this makes a lot more sense, IMHO. Also, if some day
we end by allowing support for some hardware that would have support to
run multiple m2m instances in parallel, vim2m would already be ready.


> 
> Thanks,
> Eze
> 
> > So, move it to be inside the context structure.
> > 
> > Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
> > ---
> >  drivers/media/platform/vim2m.c | 38 +++++++++++++++++-----------------
> >  1 file changed, 19 insertions(+), 19 deletions(-)
> > 
> > diff --git a/drivers/media/platform/vim2m.c b/drivers/media/platform/vim2m.c
> > index ccd0576c766e..a9e43070567e 100644
> > --- a/drivers/media/platform/vim2m.c
> > +++ b/drivers/media/platform/vim2m.c
> > @@ -146,9 +146,6 @@ struct vim2m_dev {
> >  
> >  	atomic_t		num_inst;
> >  	struct mutex		dev_mutex;
> > -	spinlock_t		irqlock;
> > -
> > -	struct delayed_work	work_run;
> >  
> >  	struct v4l2_m2m_dev	*m2m_dev;
> >  };
> > @@ -167,6 +164,10 @@ struct vim2m_ctx {
> >  	/* Transaction time (i.e. simulated processing time) in milliseconds */
> >  	u32			transtime;
> >  
> > +	struct mutex		vb_mutex;
> > +	struct delayed_work	work_run;
> > +	spinlock_t		irqlock;
> > +
> >  	/* Abort requested by m2m */
> >  	int			aborting;
> >  
> > @@ -490,7 +491,6 @@ static void job_abort(void *priv)
> >  static void device_run(void *priv)
> >  {
> >  	struct vim2m_ctx *ctx = priv;
> > -	struct vim2m_dev *dev = ctx->dev;
> >  	struct vb2_v4l2_buffer *src_buf, *dst_buf;
> >  
> >  	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
> > @@ -507,18 +507,18 @@ static void device_run(void *priv)
> >  				   &ctx->hdl);
> >  
> >  	/* Run delayed work, which simulates a hardware irq  */
> > -	schedule_delayed_work(&dev->work_run, msecs_to_jiffies(ctx->transtime));
> > +	schedule_delayed_work(&ctx->work_run, msecs_to_jiffies(ctx->transtime));
> >  }
> >  
> >  static void device_work(struct work_struct *w)
> >  {
> > -	struct vim2m_dev *vim2m_dev =
> > -		container_of(w, struct vim2m_dev, work_run.work);
> >  	struct vim2m_ctx *curr_ctx;
> > +	struct vim2m_dev *vim2m_dev;
> >  	struct vb2_v4l2_buffer *src_vb, *dst_vb;
> >  	unsigned long flags;
> >  
> > -	curr_ctx = v4l2_m2m_get_curr_priv(vim2m_dev->m2m_dev);
> > +	curr_ctx = container_of(w, struct vim2m_ctx, work_run.work);
> > +	vim2m_dev = curr_ctx->dev;
> >  
> >  	if (NULL == curr_ctx) {
> >  		pr_err("Instance released before the end of transaction\n");
> > @@ -530,10 +530,10 @@ static void device_work(struct work_struct *w)
> >  
> >  	curr_ctx->num_processed++;
> >  
> > -	spin_lock_irqsave(&vim2m_dev->irqlock, flags);
> > +	spin_lock_irqsave(&curr_ctx->irqlock, flags);
> >  	v4l2_m2m_buf_done(src_vb, VB2_BUF_STATE_DONE);
> >  	v4l2_m2m_buf_done(dst_vb, VB2_BUF_STATE_DONE);
> > -	spin_unlock_irqrestore(&vim2m_dev->irqlock, flags);
> > +	spin_unlock_irqrestore(&curr_ctx->irqlock, flags);
> >  
> >  	if (curr_ctx->num_processed == curr_ctx->translen
> >  	    || curr_ctx->aborting) {
> > @@ -893,11 +893,10 @@ static int vim2m_start_streaming(struct vb2_queue *q, unsigned count)
> >  static void vim2m_stop_streaming(struct vb2_queue *q)
> >  {
> >  	struct vim2m_ctx *ctx = vb2_get_drv_priv(q);
> > -	struct vim2m_dev *dev = ctx->dev;
> >  	struct vb2_v4l2_buffer *vbuf;
> >  	unsigned long flags;
> >  
> > -	cancel_delayed_work_sync(&dev->work_run);
> > +	cancel_delayed_work_sync(&ctx->work_run);
> >  	for (;;) {
> >  		if (V4L2_TYPE_IS_OUTPUT(q->type))
> >  			vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
> > @@ -907,9 +906,9 @@ static void vim2m_stop_streaming(struct vb2_queue *q)
> >  			return;
> >  		v4l2_ctrl_request_complete(vbuf->vb2_buf.req_obj.req,
> >  					   &ctx->hdl);
> > -		spin_lock_irqsave(&ctx->dev->irqlock, flags);
> > +		spin_lock_irqsave(&ctx->irqlock, flags);
> >  		v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR);
> > -		spin_unlock_irqrestore(&ctx->dev->irqlock, flags);
> > +		spin_unlock_irqrestore(&ctx->irqlock, flags);
> >  	}
> >  }
> >  
> > @@ -943,7 +942,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *ds
> >  	src_vq->ops = &vim2m_qops;
> >  	src_vq->mem_ops = &vb2_vmalloc_memops;
> >  	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > -	src_vq->lock = &ctx->dev->dev_mutex;
> > +	src_vq->lock = &ctx->vb_mutex;
> >  	src_vq->supports_requests = true;
> >  
> >  	ret = vb2_queue_init(src_vq);
> > @@ -957,7 +956,7 @@ static int queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *ds
> >  	dst_vq->ops = &vim2m_qops;
> >  	dst_vq->mem_ops = &vb2_vmalloc_memops;
> >  	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
> > -	dst_vq->lock = &ctx->dev->dev_mutex;
> > +	dst_vq->lock = &ctx->vb_mutex;
> >  
> >  	return vb2_queue_init(dst_vq);
> >  }
> > @@ -1032,6 +1031,10 @@ static int vim2m_open(struct file *file)
> >  
> >  	ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx, &queue_init);
> >  
> > +	mutex_init(&ctx->vb_mutex);
> > +	spin_lock_init(&ctx->irqlock);
> > +	INIT_DELAYED_WORK(&ctx->work_run, device_work);
> > +
> >  	if (IS_ERR(ctx->fh.m2m_ctx)) {
> >  		rc = PTR_ERR(ctx->fh.m2m_ctx);
> >  
> > @@ -1112,8 +1115,6 @@ static int vim2m_probe(struct platform_device *pdev)
> >  	if (!dev)
> >  		return -ENOMEM;
> >  
> > -	spin_lock_init(&dev->irqlock);
> > -
> >  	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
> >  	if (ret)
> >  		return ret;
> > @@ -1125,7 +1126,6 @@ static int vim2m_probe(struct platform_device *pdev)
> >  	vfd = &dev->vfd;
> >  	vfd->lock = &dev->dev_mutex;
> >  	vfd->v4l2_dev = &dev->v4l2_dev;
> > -	INIT_DELAYED_WORK(&dev->work_run, device_work);
> >  
> >  	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
> >  	if (ret) {  
> 
> 




Cheers,
Mauro
Ezequiel Garcia Jan. 30, 2019, 2:56 p.m. UTC | #3
Hey Mauro,

On Wed, 2019-01-30 at 11:19 -0200, Mauro Carvalho Chehab wrote:
> Em Wed, 30 Jan 2019 09:41:44 -0300
> Ezequiel Garcia <ezequiel@collabora.com> escreveu:
> 
> > On Tue, 2019-01-29 at 14:00 -0200, Mauro Carvalho Chehab wrote:
> > > It doesn't make sense to have a per-device work queue, as the
> > > scheduler should be called per file handler. Having a single
> > > one causes failures if multiple streams are filtered by vim2m.
> > >   
> > 
> > Having a per-device workqueue should emulate a real device more closely.
> 
> Yes.
> 
> > But more importantly, why would having a single workqeueue fail if multiple
> > streams are run? The m2m should take care of the proper serialization
> > between multiple contexts, unless I am missing something here.
> 
> Yes, the m2m core serializes the access to m2m src/dest buffer per device.
> 
> So, two instances can't access the buffer at the same time. That makes
> sense for a real physical hardware, although specifically for the virtual
> one, it doesn't (any may not make sense for some stateless codec, if
> the codec would internally be able to handle multiple requests at the same
> time).
> 
> Without this patch, when multiple instances are used, sometimes it ends 
> into a dead lock preventing to stop all of them.
> 
> I didn't have time to debug where exactly it happens, but I suspect that
> the issue is related to using the same mutex for both VB and open/release
> instances.
> 
> Yet, I ended by opting to move all queue-specific mutex/semaphore to be
> instance-based, as this makes a lot more sense, IMHO. Also, if some day
> we end by allowing support for some hardware that would have support to
> run multiple m2m instances in parallel, vim2m would already be ready.
> 

I don't oppose to the idea of having a per-context workqueue.

However, I'm not too comfortable with having a bug somewhere (and not knowing
whert) and instead of fixing it, working around it. I'd rather
fix the bug instead, then decide about the workqueue.

Thanks,
Eze
Mauro Carvalho Chehab Jan. 30, 2019, 6 p.m. UTC | #4
Em Wed, 30 Jan 2019 11:56:58 -0300
Ezequiel Garcia <ezequiel@collabora.com> escreveu:

> Hey Mauro,
> 
> On Wed, 2019-01-30 at 11:19 -0200, Mauro Carvalho Chehab wrote:
> > Em Wed, 30 Jan 2019 09:41:44 -0300
> > Ezequiel Garcia <ezequiel@collabora.com> escreveu:
> >   
> > > On Tue, 2019-01-29 at 14:00 -0200, Mauro Carvalho Chehab wrote:  
> > > > It doesn't make sense to have a per-device work queue, as the
> > > > scheduler should be called per file handler. Having a single
> > > > one causes failures if multiple streams are filtered by vim2m.
> > > >     
> > > 
> > > Having a per-device workqueue should emulate a real device more closely.  
> > 
> > Yes.
> >   
> > > But more importantly, why would having a single workqeueue fail if multiple
> > > streams are run? The m2m should take care of the proper serialization
> > > between multiple contexts, unless I am missing something here.  
> > 
> > Yes, the m2m core serializes the access to m2m src/dest buffer per device.
> > 
> > So, two instances can't access the buffer at the same time. That makes
> > sense for a real physical hardware, although specifically for the virtual
> > one, it doesn't (any may not make sense for some stateless codec, if
> > the codec would internally be able to handle multiple requests at the same
> > time).
> > 
> > Without this patch, when multiple instances are used, sometimes it ends 
> > into a dead lock preventing to stop all of them.
> > 
> > I didn't have time to debug where exactly it happens, but I suspect that
> > the issue is related to using the same mutex for both VB and open/release
> > instances.
> > 
> > Yet, I ended by opting to move all queue-specific mutex/semaphore to be
> > instance-based, as this makes a lot more sense, IMHO. Also, if some day
> > we end by allowing support for some hardware that would have support to
> > run multiple m2m instances in parallel, vim2m would already be ready.
> >   
> 
> I don't oppose to the idea of having a per-context workqueue.
> 
> However, I'm not too comfortable with having a bug somewhere (and not knowing
> whert) and instead of fixing it, working around it. I'd rather
> fix the bug instead, then decide about the workqueue.

I suspect that just using a separate mutex for open/release could
be enough to make the upstream version stable, when multiple instances
are running.


However, it has been a challenge for me to debug it here, as I'm traveling
with limited resources. I'm using the same machine for both desktop, to run
a VM to access some corp resources and to do Kernel devel.

That's usually a very bad idea. Yet, I'm pretty sure that after this patch,
things are stable, as I've been able to test it without any issues and
without needing to reboot my machine.

If you have enough time, feel free to test. Otherwise, I intend to just
apply this patch series, as it fixes a few bugs and make vim2m to
actually display what it would be expected, instead of plotting just some
weird image that the current version shows.

Cheers,
Mauro
diff mbox series

Patch

diff --git a/drivers/media/platform/vim2m.c b/drivers/media/platform/vim2m.c
index ccd0576c766e..a9e43070567e 100644
--- a/drivers/media/platform/vim2m.c
+++ b/drivers/media/platform/vim2m.c
@@ -146,9 +146,6 @@  struct vim2m_dev {
 
 	atomic_t		num_inst;
 	struct mutex		dev_mutex;
-	spinlock_t		irqlock;
-
-	struct delayed_work	work_run;
 
 	struct v4l2_m2m_dev	*m2m_dev;
 };
@@ -167,6 +164,10 @@  struct vim2m_ctx {
 	/* Transaction time (i.e. simulated processing time) in milliseconds */
 	u32			transtime;
 
+	struct mutex		vb_mutex;
+	struct delayed_work	work_run;
+	spinlock_t		irqlock;
+
 	/* Abort requested by m2m */
 	int			aborting;
 
@@ -490,7 +491,6 @@  static void job_abort(void *priv)
 static void device_run(void *priv)
 {
 	struct vim2m_ctx *ctx = priv;
-	struct vim2m_dev *dev = ctx->dev;
 	struct vb2_v4l2_buffer *src_buf, *dst_buf;
 
 	src_buf = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx);
@@ -507,18 +507,18 @@  static void device_run(void *priv)
 				   &ctx->hdl);
 
 	/* Run delayed work, which simulates a hardware irq  */
-	schedule_delayed_work(&dev->work_run, msecs_to_jiffies(ctx->transtime));
+	schedule_delayed_work(&ctx->work_run, msecs_to_jiffies(ctx->transtime));
 }
 
 static void device_work(struct work_struct *w)
 {
-	struct vim2m_dev *vim2m_dev =
-		container_of(w, struct vim2m_dev, work_run.work);
 	struct vim2m_ctx *curr_ctx;
+	struct vim2m_dev *vim2m_dev;
 	struct vb2_v4l2_buffer *src_vb, *dst_vb;
 	unsigned long flags;
 
-	curr_ctx = v4l2_m2m_get_curr_priv(vim2m_dev->m2m_dev);
+	curr_ctx = container_of(w, struct vim2m_ctx, work_run.work);
+	vim2m_dev = curr_ctx->dev;
 
 	if (NULL == curr_ctx) {
 		pr_err("Instance released before the end of transaction\n");
@@ -530,10 +530,10 @@  static void device_work(struct work_struct *w)
 
 	curr_ctx->num_processed++;
 
-	spin_lock_irqsave(&vim2m_dev->irqlock, flags);
+	spin_lock_irqsave(&curr_ctx->irqlock, flags);
 	v4l2_m2m_buf_done(src_vb, VB2_BUF_STATE_DONE);
 	v4l2_m2m_buf_done(dst_vb, VB2_BUF_STATE_DONE);
-	spin_unlock_irqrestore(&vim2m_dev->irqlock, flags);
+	spin_unlock_irqrestore(&curr_ctx->irqlock, flags);
 
 	if (curr_ctx->num_processed == curr_ctx->translen
 	    || curr_ctx->aborting) {
@@ -893,11 +893,10 @@  static int vim2m_start_streaming(struct vb2_queue *q, unsigned count)
 static void vim2m_stop_streaming(struct vb2_queue *q)
 {
 	struct vim2m_ctx *ctx = vb2_get_drv_priv(q);
-	struct vim2m_dev *dev = ctx->dev;
 	struct vb2_v4l2_buffer *vbuf;
 	unsigned long flags;
 
-	cancel_delayed_work_sync(&dev->work_run);
+	cancel_delayed_work_sync(&ctx->work_run);
 	for (;;) {
 		if (V4L2_TYPE_IS_OUTPUT(q->type))
 			vbuf = v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx);
@@ -907,9 +906,9 @@  static void vim2m_stop_streaming(struct vb2_queue *q)
 			return;
 		v4l2_ctrl_request_complete(vbuf->vb2_buf.req_obj.req,
 					   &ctx->hdl);
-		spin_lock_irqsave(&ctx->dev->irqlock, flags);
+		spin_lock_irqsave(&ctx->irqlock, flags);
 		v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR);
-		spin_unlock_irqrestore(&ctx->dev->irqlock, flags);
+		spin_unlock_irqrestore(&ctx->irqlock, flags);
 	}
 }
 
@@ -943,7 +942,7 @@  static int queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *ds
 	src_vq->ops = &vim2m_qops;
 	src_vq->mem_ops = &vb2_vmalloc_memops;
 	src_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
-	src_vq->lock = &ctx->dev->dev_mutex;
+	src_vq->lock = &ctx->vb_mutex;
 	src_vq->supports_requests = true;
 
 	ret = vb2_queue_init(src_vq);
@@ -957,7 +956,7 @@  static int queue_init(void *priv, struct vb2_queue *src_vq, struct vb2_queue *ds
 	dst_vq->ops = &vim2m_qops;
 	dst_vq->mem_ops = &vb2_vmalloc_memops;
 	dst_vq->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_COPY;
-	dst_vq->lock = &ctx->dev->dev_mutex;
+	dst_vq->lock = &ctx->vb_mutex;
 
 	return vb2_queue_init(dst_vq);
 }
@@ -1032,6 +1031,10 @@  static int vim2m_open(struct file *file)
 
 	ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, ctx, &queue_init);
 
+	mutex_init(&ctx->vb_mutex);
+	spin_lock_init(&ctx->irqlock);
+	INIT_DELAYED_WORK(&ctx->work_run, device_work);
+
 	if (IS_ERR(ctx->fh.m2m_ctx)) {
 		rc = PTR_ERR(ctx->fh.m2m_ctx);
 
@@ -1112,8 +1115,6 @@  static int vim2m_probe(struct platform_device *pdev)
 	if (!dev)
 		return -ENOMEM;
 
-	spin_lock_init(&dev->irqlock);
-
 	ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev);
 	if (ret)
 		return ret;
@@ -1125,7 +1126,6 @@  static int vim2m_probe(struct platform_device *pdev)
 	vfd = &dev->vfd;
 	vfd->lock = &dev->dev_mutex;
 	vfd->v4l2_dev = &dev->v4l2_dev;
-	INIT_DELAYED_WORK(&dev->work_run, device_work);
 
 	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
 	if (ret) {