diff mbox

[08/16] drm/i915/error: Do a better job of disambiguating VMAs

Message ID 1404238671-18760-9-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky July 1, 2014, 6:17 p.m. UTC
Some of the original PPGTT patches in this area where unmerged, and this
left a lot of confusion in our error capture with regard to which vm/obj
we want to capture. There have been at least a couple of patches from
Chris, and myself to try to fix this up; so here is another shot. Nobody
running without full PPGTT is effected by this, and that is probably why
nobody has bothered to fix it yet.

Instead of using any of the global lists to find the VMAs we want to
capture, we use the union of the active, and the inactive list in the
VM. This allows us to replace our capture_bo with capture_vma, and know
all the VMAs we want to capture are valid.

I could have probably figured out a way to reuse mm_list. As we've had
bugs here before in the shrinker, I think the best way forward is to get
it working, and then optimize it later.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c   |  1 +
 drivers/gpu/drm/i915/i915_gem_gtt.h   |  2 ++
 drivers/gpu/drm/i915/i915_gpu_error.c | 39 ++++++++++++++++++++++-------------
 3 files changed, 28 insertions(+), 14 deletions(-)

Comments

Chris Wilson July 4, 2014, 7:57 a.m. UTC | #1
On Tue, Jul 01, 2014 at 11:17:43AM -0700, Ben Widawsky wrote:
> Some of the original PPGTT patches in this area where unmerged, and this
> left a lot of confusion in our error capture with regard to which vm/obj
> we want to capture. There have been at least a couple of patches from
> Chris, and myself to try to fix this up; so here is another shot. Nobody
> running without full PPGTT is effected by this, and that is probably why
> nobody has bothered to fix it yet.
> 
> Instead of using any of the global lists to find the VMAs we want to
> capture, we use the union of the active, and the inactive list in the
> VM. This allows us to replace our capture_bo with capture_vma, and know
> all the VMAs we want to capture are valid.
> 
> I could have probably figured out a way to reuse mm_list. As we've had
> bugs here before in the shrinker, I think the best way forward is to get
> it working, and then optimize it later.
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c   |  1 +
>  drivers/gpu/drm/i915/i915_gem_gtt.h   |  2 ++
>  drivers/gpu/drm/i915/i915_gpu_error.c | 39 ++++++++++++++++++++++-------------
>  3 files changed, 28 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index a4153ee..88451dc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2114,6 +2114,7 @@ static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
>  		return ERR_PTR(-ENOMEM);
>  
>  	INIT_LIST_HEAD(&vma->vma_link);
> +	INIT_LIST_HEAD(&vma->pin_capture_link);
>  	INIT_LIST_HEAD(&vma->mm_list);
>  	INIT_LIST_HEAD(&vma->exec_list);
>  	vma->vm = vm;
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 8d6f7c1..1d75801 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -126,6 +126,8 @@ struct i915_vma {
>  
>  	struct list_head vma_link; /* Link in the object's VMA list */
>  
> +	struct list_head pin_capture_link; /* Link in the error capture */
> +
>  	/** This vma's place in the batchbuffer or on the eviction list */
>  	struct list_head exec_list;

We already have a slot for temporary lists...
-Chris
Ben Widawsky July 4, 2014, 4:56 p.m. UTC | #2
On Fri, Jul 04, 2014 at 08:57:08AM +0100, Chris Wilson wrote:
> On Tue, Jul 01, 2014 at 11:17:43AM -0700, Ben Widawsky wrote:
> > Some of the original PPGTT patches in this area where unmerged, and this
> > left a lot of confusion in our error capture with regard to which vm/obj
> > we want to capture. There have been at least a couple of patches from
> > Chris, and myself to try to fix this up; so here is another shot. Nobody
> > running without full PPGTT is effected by this, and that is probably why
> > nobody has bothered to fix it yet.
> > 
> > Instead of using any of the global lists to find the VMAs we want to
> > capture, we use the union of the active, and the inactive list in the
> > VM. This allows us to replace our capture_bo with capture_vma, and know
> > all the VMAs we want to capture are valid.
> > 
> > I could have probably figured out a way to reuse mm_list. As we've had
> > bugs here before in the shrinker, I think the best way forward is to get
> > it working, and then optimize it later.
> > 
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > ---
> >  drivers/gpu/drm/i915/i915_gem_gtt.c   |  1 +
> >  drivers/gpu/drm/i915/i915_gem_gtt.h   |  2 ++
> >  drivers/gpu/drm/i915/i915_gpu_error.c | 39 ++++++++++++++++++++++-------------
> >  3 files changed, 28 insertions(+), 14 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index a4153ee..88451dc 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -2114,6 +2114,7 @@ static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
> >  		return ERR_PTR(-ENOMEM);
> >  
> >  	INIT_LIST_HEAD(&vma->vma_link);
> > +	INIT_LIST_HEAD(&vma->pin_capture_link);
> >  	INIT_LIST_HEAD(&vma->mm_list);
> >  	INIT_LIST_HEAD(&vma->exec_list);
> >  	vma->vm = vm;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > index 8d6f7c1..1d75801 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > @@ -126,6 +126,8 @@ struct i915_vma {
> >  
> >  	struct list_head vma_link; /* Link in the object's VMA list */
> >  
> > +	struct list_head pin_capture_link; /* Link in the error capture */
> > +
> >  	/** This vma's place in the batchbuffer or on the eviction list */
> >  	struct list_head exec_list;
> 
> We already have a slot for temporary lists...
> -Chris
> 

I did mention that in the commit message, if I caught your meaning.
Daniel Vetter July 17, 2014, 8:51 a.m. UTC | #3
On Fri, Jul 04, 2014 at 09:56:54AM -0700, Ben Widawsky wrote:
> On Fri, Jul 04, 2014 at 08:57:08AM +0100, Chris Wilson wrote:
> > On Tue, Jul 01, 2014 at 11:17:43AM -0700, Ben Widawsky wrote:
> > > Some of the original PPGTT patches in this area where unmerged, and this
> > > left a lot of confusion in our error capture with regard to which vm/obj
> > > we want to capture. There have been at least a couple of patches from
> > > Chris, and myself to try to fix this up; so here is another shot. Nobody
> > > running without full PPGTT is effected by this, and that is probably why
> > > nobody has bothered to fix it yet.
> > > 
> > > Instead of using any of the global lists to find the VMAs we want to
> > > capture, we use the union of the active, and the inactive list in the
> > > VM. This allows us to replace our capture_bo with capture_vma, and know
> > > all the VMAs we want to capture are valid.
> > > 
> > > I could have probably figured out a way to reuse mm_list. As we've had
> > > bugs here before in the shrinker, I think the best way forward is to get
> > > it working, and then optimize it later.
> > > 
> > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > > ---
> > >  drivers/gpu/drm/i915/i915_gem_gtt.c   |  1 +
> > >  drivers/gpu/drm/i915/i915_gem_gtt.h   |  2 ++
> > >  drivers/gpu/drm/i915/i915_gpu_error.c | 39 ++++++++++++++++++++++-------------
> > >  3 files changed, 28 insertions(+), 14 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > index a4153ee..88451dc 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > @@ -2114,6 +2114,7 @@ static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
> > >  		return ERR_PTR(-ENOMEM);
> > >  
> > >  	INIT_LIST_HEAD(&vma->vma_link);
> > > +	INIT_LIST_HEAD(&vma->pin_capture_link);
> > >  	INIT_LIST_HEAD(&vma->mm_list);
> > >  	INIT_LIST_HEAD(&vma->exec_list);
> > >  	vma->vm = vm;
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > > index 8d6f7c1..1d75801 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > > @@ -126,6 +126,8 @@ struct i915_vma {
> > >  
> > >  	struct list_head vma_link; /* Link in the object's VMA list */
> > >  
> > > +	struct list_head pin_capture_link; /* Link in the error capture */
> > > +
> > >  	/** This vma's place in the batchbuffer or on the eviction list */
> > >  	struct list_head exec_list;
> > 
> > We already have a slot for temporary lists...
> > -Chris
> > 
> 
> I did mention that in the commit message, if I caught your meaning.

Chris is probably talking about exec_list which is our canonical temporary
list, mostly used by execbuf. But also in other places.
-Daniel
Ben Widawsky July 20, 2014, 11:49 p.m. UTC | #4
On Thu, Jul 17, 2014 at 10:51:23AM +0200, Daniel Vetter wrote:
> On Fri, Jul 04, 2014 at 09:56:54AM -0700, Ben Widawsky wrote:
> > On Fri, Jul 04, 2014 at 08:57:08AM +0100, Chris Wilson wrote:
> > > On Tue, Jul 01, 2014 at 11:17:43AM -0700, Ben Widawsky wrote:
> > > > Some of the original PPGTT patches in this area where unmerged, and this
> > > > left a lot of confusion in our error capture with regard to which vm/obj
> > > > we want to capture. There have been at least a couple of patches from
> > > > Chris, and myself to try to fix this up; so here is another shot. Nobody
> > > > running without full PPGTT is effected by this, and that is probably why
> > > > nobody has bothered to fix it yet.
> > > > 
> > > > Instead of using any of the global lists to find the VMAs we want to
> > > > capture, we use the union of the active, and the inactive list in the
> > > > VM. This allows us to replace our capture_bo with capture_vma, and know
> > > > all the VMAs we want to capture are valid.
> > > > 
> > > > I could have probably figured out a way to reuse mm_list. As we've had
> > > > bugs here before in the shrinker, I think the best way forward is to get
> > > > it working, and then optimize it later.
> > > > 
> > > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_gem_gtt.c   |  1 +
> > > >  drivers/gpu/drm/i915/i915_gem_gtt.h   |  2 ++
> > > >  drivers/gpu/drm/i915/i915_gpu_error.c | 39 ++++++++++++++++++++++-------------
> > > >  3 files changed, 28 insertions(+), 14 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > > index a4153ee..88451dc 100644
> > > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > > @@ -2114,6 +2114,7 @@ static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
> > > >  		return ERR_PTR(-ENOMEM);
> > > >  
> > > >  	INIT_LIST_HEAD(&vma->vma_link);
> > > > +	INIT_LIST_HEAD(&vma->pin_capture_link);
> > > >  	INIT_LIST_HEAD(&vma->mm_list);
> > > >  	INIT_LIST_HEAD(&vma->exec_list);
> > > >  	vma->vm = vm;
> > > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > > > index 8d6f7c1..1d75801 100644
> > > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> > > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > > > @@ -126,6 +126,8 @@ struct i915_vma {
> > > >  
> > > >  	struct list_head vma_link; /* Link in the object's VMA list */
> > > >  
> > > > +	struct list_head pin_capture_link; /* Link in the error capture */
> > > > +
> > > >  	/** This vma's place in the batchbuffer or on the eviction list */
> > > >  	struct list_head exec_list;
> > > 
> > > We already have a slot for temporary lists...
> > > -Chris
> > > 
> > 
> > I did mention that in the commit message, if I caught your meaning.
> 
> Chris is probably talking about exec_list which is our canonical temporary
> list, mostly used by execbuf. But also in other places.
> -Daniel

I think that was a typo on my part, I meant exec_list. In either case, I
think doing it this way and merging it later is the safest path.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a4153ee..88451dc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2114,6 +2114,7 @@  static struct i915_vma *__i915_gem_vma_create(struct drm_i915_gem_object *obj,
 		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&vma->vma_link);
+	INIT_LIST_HEAD(&vma->pin_capture_link);
 	INIT_LIST_HEAD(&vma->mm_list);
 	INIT_LIST_HEAD(&vma->exec_list);
 	vma->vm = vm;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8d6f7c1..1d75801 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -126,6 +126,8 @@  struct i915_vma {
 
 	struct list_head vma_link; /* Link in the object's VMA list */
 
+	struct list_head pin_capture_link; /* Link in the error capture */
+
 	/** This vma's place in the batchbuffer or on the eviction list */
 	struct list_head exec_list;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index ebe2904..123a4fc 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -665,14 +665,14 @@  static u32 capture_active_bo(struct drm_i915_error_buffer *err,
 static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
 			     int count, struct list_head *head)
 {
-	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
 	int i = 0;
 
-	list_for_each_entry(obj, head, global_list) {
-		if (!i915_gem_obj_is_pinned(obj))
+	list_for_each_entry(vma, head, pin_capture_link) {
+		if (!i915_gem_obj_is_pinned(vma->obj))
 			continue;
 
-		capture_bo(err++, obj);
+		capture_bo(err++, vma->obj);
 		if (++i == count)
 			break;
 	}
@@ -982,21 +982,32 @@  static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
 				const int vm_ndx)
 {
 	struct drm_i915_error_buffer *active_bo = NULL, *pinned_bo = NULL;
-	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	int active_vma_count = 0;
+	int vma_pin_count = 0;
+	LIST_HEAD(pinned_vma);
 
-	list_for_each_entry(vma, &vm->active_list, mm_list)
+	list_for_each_entry(vma, &vm->active_list, mm_list) {
 		active_vma_count++;
+		if (vma->pin_count) {
+			vma_pin_count++;
+			list_move_tail(&vma->pin_capture_link, &pinned_vma);
+		}
+	}
 
-	error->active_bo_count[vm_ndx] = active_vma_count;
-
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
-		if (i915_gem_obj_is_pinned(obj))
-			active_vma_count++;
+	list_for_each_entry(vma, &vm->inactive_list, mm_list) {
+		/* Certain objects may be on the inactive list, but pinned, when
+		 * in the global GGTT. */
+		if (WARN_ON(!i915_is_ggtt(vm) &&
+			    vma->pin_count &&
+			    !(vma->exec_entry->flags & (1<<31)))) { /* FIXME: need the actual flag */
+			vma_pin_count++;
+			list_move_tail(&vma->pin_capture_link, &pinned_vma);
+		}
+	}
 
-	/* XXX: this is an incorrect measurement of pinned BOs */
-	error->pinned_bo_count[vm_ndx] = active_vma_count - error->active_bo_count[vm_ndx];
+	error->active_bo_count[vm_ndx] = active_vma_count;
+	error->pinned_bo_count[vm_ndx] = vma_pin_count;
 
 	if (active_vma_count) {
 		active_bo = kcalloc(active_vma_count, sizeof(*active_bo), GFP_ATOMIC);
@@ -1014,7 +1025,7 @@  static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
 		error->pinned_bo_count[vm_ndx] =
 			capture_pinned_bo(pinned_bo,
 					  error->pinned_bo_count[vm_ndx],
-					  &dev_priv->mm.bound_list);
+					  &pinned_vma);
 	error->active_bo[vm_ndx] = active_bo;
 	error->pinned_bo[vm_ndx] = pinned_bo;
 }