diff mbox series

[v4,4/4] drm/vc4: Allocate binner bo when starting to use the V3D

Message ID 20190403154856.9470-5-paul.kocialkowski@bootlin.com (mailing list archive)
State New, archived
Headers show
Series drm/vc4: Binner BO management improvements | expand

Commit Message

Paul Kocialkowski April 3, 2019, 3:48 p.m. UTC
The binner bo is not required until the V3D is in use, so avoid
allocating it at probe and do it on the first non-dumb BO allocation.
Keep track of which clients are using the V3D and liberate the buffer
when there is none left.

We also want to keep it alive during runtime suspend/resume to avoid
failing to allocate it at resume. This happens when the CMA pool is
full at that point and results in a hard crash.

Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
---
 drivers/gpu/drm/vc4/vc4_bo.c  | 32 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/vc4/vc4_drv.c |  9 +++++++++
 drivers/gpu/drm/vc4/vc4_drv.h |  4 ++++
 drivers/gpu/drm/vc4/vc4_v3d.c | 13 -------------
 4 files changed, 45 insertions(+), 13 deletions(-)

Comments

Eric Anholt April 3, 2019, 6:53 p.m. UTC | #1
Paul Kocialkowski <paul.kocialkowski@bootlin.com> writes:

> The binner bo is not required until the V3D is in use, so avoid
> allocating it at probe and do it on the first non-dumb BO allocation.
> Keep track of which clients are using the V3D and liberate the buffer
> when there is none left.
>
> We also want to keep it alive during runtime suspend/resume to avoid
> failing to allocate it at resume. This happens when the CMA pool is
> full at that point and results in a hard crash.
>
> Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
> ---
>  drivers/gpu/drm/vc4/vc4_bo.c  | 32 ++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/vc4/vc4_drv.c |  9 +++++++++
>  drivers/gpu/drm/vc4/vc4_drv.h |  4 ++++
>  drivers/gpu/drm/vc4/vc4_v3d.c | 13 -------------
>  4 files changed, 45 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
> index 88ebd681d7eb..b941f09b9378 100644
> --- a/drivers/gpu/drm/vc4/vc4_bo.c
> +++ b/drivers/gpu/drm/vc4/vc4_bo.c
> @@ -799,6 +799,30 @@ vc4_prime_import_sg_table(struct drm_device *dev,
>  	return obj;
>  }
>  
> +static int vc4_prepare_bin_bo(struct drm_device *dev,
> +			      struct drm_file *file_priv)
> +{
> +	struct vc4_file *vc4file = file_priv->driver_priv;
> +	struct vc4_dev *vc4 = to_vc4_dev(dev);
> +	int ret;
> +
> +	if (!vc4->v3d)
> +		return -ENODEV;
> +
> +	if (!vc4file->needs_bin_bo) {
> +		atomic_inc(&vc4->bin_bo_usecnt);
> +		vc4file->needs_bin_bo = true;
> +	}
> +
> +	if (!vc4->bin_bo) {
> +		ret = vc4_v3d_allocate_bin_bo(vc4);
> +		if (ret)
> +			return ret;
> +	}
> +

This atomic usage looks really racy.  For example, multiple clients
could call allocate at the same time and leak one.  Or this timeline:

us           them
             dec count to 0
inc count
check bin_bo
             free bin_bo

vc4_v3d_allocate_bin_bo should probably be a vc4_v3d_bin_bo_get()
returning a kref on the BO, called under a lock protecting both one
file_priv being dereferenced by multiple threads in the kernel at the
same time (so file_priv doesn't try to double-get its ref) and multiple
file_privs trying to get the bin_bo at once.
Paul Kocialkowski April 4, 2019, 12:38 p.m. UTC | #2
Hi,

Le mercredi 03 avril 2019 à 11:53 -0700, Eric Anholt a écrit :
> Paul Kocialkowski <paul.kocialkowski@bootlin.com> writes:
> 
> > The binner bo is not required until the V3D is in use, so avoid
> > allocating it at probe and do it on the first non-dumb BO allocation.
> > Keep track of which clients are using the V3D and liberate the buffer
> > when there is none left.
> > 
> > We also want to keep it alive during runtime suspend/resume to avoid
> > failing to allocate it at resume. This happens when the CMA pool is
> > full at that point and results in a hard crash.
> > 
> > Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
> > ---
> >  drivers/gpu/drm/vc4/vc4_bo.c  | 32 ++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/vc4/vc4_drv.c |  9 +++++++++
> >  drivers/gpu/drm/vc4/vc4_drv.h |  4 ++++
> >  drivers/gpu/drm/vc4/vc4_v3d.c | 13 -------------
> >  4 files changed, 45 insertions(+), 13 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
> > index 88ebd681d7eb..b941f09b9378 100644
> > --- a/drivers/gpu/drm/vc4/vc4_bo.c
> > +++ b/drivers/gpu/drm/vc4/vc4_bo.c
> > @@ -799,6 +799,30 @@ vc4_prime_import_sg_table(struct drm_device *dev,
> >  	return obj;
> >  }
> >  
> > +static int vc4_prepare_bin_bo(struct drm_device *dev,
> > +			      struct drm_file *file_priv)
> > +{
> > +	struct vc4_file *vc4file = file_priv->driver_priv;
> > +	struct vc4_dev *vc4 = to_vc4_dev(dev);
> > +	int ret;
> > +
> > +	if (!vc4->v3d)
> > +		return -ENODEV;
> > +
> > +	if (!vc4file->needs_bin_bo) {
> > +		atomic_inc(&vc4->bin_bo_usecnt);
> > +		vc4file->needs_bin_bo = true;
> > +	}
> > +
> > +	if (!vc4->bin_bo) {
> > +		ret = vc4_v3d_allocate_bin_bo(vc4);
> > +		if (ret)
> > +			return ret;
> > +	}
> > +
> 
> This atomic usage looks really racy.  For example, multiple clients
> could call allocate at the same time and leak one.  Or this timeline:
> 
> us           them
>              dec count to 0
> inc count
> check bin_bo
>              free bin_bo

Oh, you're definitely right. Sorry I missed that.

> vc4_v3d_allocate_bin_bo should probably be a vc4_v3d_bin_bo_get()
> returning a kref on the BO, called under a lock protecting both one
> file_priv being dereferenced by multiple threads in the kernel at the
> same time (so file_priv doesn't try to double-get its ref) and multiple
> file_privs trying to get the bin_bo at once.

Sounds good, I'll look into it and spin up a new revision soon.

Cheers,

Paul
diff mbox series

Patch

diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 88ebd681d7eb..b941f09b9378 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -799,6 +799,30 @@  vc4_prime_import_sg_table(struct drm_device *dev,
 	return obj;
 }
 
+static int vc4_prepare_bin_bo(struct drm_device *dev,
+			      struct drm_file *file_priv)
+{
+	struct vc4_file *vc4file = file_priv->driver_priv;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	int ret;
+
+	if (!vc4->v3d)
+		return -ENODEV;
+
+	if (!vc4file->needs_bin_bo) {
+		atomic_inc(&vc4->bin_bo_usecnt);
+		vc4file->needs_bin_bo = true;
+	}
+
+	if (!vc4->bin_bo) {
+		ret = vc4_v3d_allocate_bin_bo(vc4);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
 {
@@ -806,6 +830,10 @@  int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
 	struct vc4_bo *bo = NULL;
 	int ret;
 
+	ret = vc4_prepare_bin_bo(dev, file_priv);
+	if (ret)
+		return ret;
+
 	/*
 	 * We can't allocate from the BO cache, because the BOs don't
 	 * get zeroed, and that might leak data between users.
@@ -865,6 +893,10 @@  vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
+	ret = vc4_prepare_bin_bo(dev, file_priv);
+	if (ret)
+		return ret;
+
 	bo = vc4_bo_create(dev, args->size, true, VC4_BO_TYPE_V3D_SHADER);
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index d840b52b9805..4db937601638 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -128,8 +128,15 @@  static int vc4_open(struct drm_device *dev, struct drm_file *file)
 
 static void vc4_close(struct drm_device *dev, struct drm_file *file)
 {
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_file *vc4file = file->driver_priv;
 
+	if (vc4file->needs_bin_bo && atomic_dec_and_test(&vc4->bin_bo_usecnt) &&
+	    vc4->bin_bo) {
+		drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
+		vc4->bin_bo = NULL;
+	}
+
 	vc4_perfmon_close_file(vc4file);
 	kfree(vc4file);
 }
@@ -274,6 +281,8 @@  static int vc4_drm_bind(struct device *dev)
 	drm->dev_private = vc4;
 	INIT_LIST_HEAD(&vc4->debugfs_list);
 
+	atomic_set(&vc4->bin_bo_usecnt, 0);
+
 	ret = vc4_bo_cache_init(drm);
 	if (ret)
 		goto dev_put;
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 37941f0e212a..7d49452f04fe 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -216,6 +216,8 @@  struct vc4_dev {
 	 * the minor is available (after drm_dev_register()).
 	 */
 	struct list_head debugfs_list;
+
+	atomic_t bin_bo_usecnt;
 };
 
 static inline struct vc4_dev *
@@ -594,6 +596,8 @@  struct vc4_file {
 		struct idr idr;
 		struct mutex lock;
 	} perfmon;
+
+	bool needs_bin_bo;
 };
 
 static inline struct vc4_exec_info *
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 70204c38074a..1cc366d76f2a 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -321,9 +321,6 @@  static int vc4_v3d_runtime_suspend(struct device *dev)
 
 	vc4_irq_uninstall(vc4->dev);
 
-	drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
-	vc4->bin_bo = NULL;
-
 	clk_disable_unprepare(v3d->clk);
 
 	return 0;
@@ -335,10 +332,6 @@  static int vc4_v3d_runtime_resume(struct device *dev)
 	struct vc4_dev *vc4 = v3d->vc4;
 	int ret;
 
-	ret = vc4_v3d_allocate_bin_bo(vc4);
-	if (ret)
-		return ret;
-
 	ret = clk_prepare_enable(v3d->clk);
 	if (ret != 0)
 		return ret;
@@ -405,12 +398,6 @@  static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
 	if (ret != 0)
 		return ret;
 
-	ret = vc4_v3d_allocate_bin_bo(vc4);
-	if (ret) {
-		clk_disable_unprepare(v3d->clk);
-		return ret;
-	}
-
 	/* Reset the binner overflow address/size at setup, to be sure
 	 * we don't reuse an old one.
 	 */