Message ID | 1363276337-12509-11-git-send-email-mika.kuoppala@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Mar 14, 2013 at 05:52:11PM +0200, Mika Kuoppala wrote: > To count context losses, add struct ctx_reset_state for > both i915_hw_context and drm_i915_file_private. > drm_i915_file_private is used when there is no context. Being really picky, but can we device a better name than reset_state. I keep reading 'reset' as a verb and get very confused... Even just gpu_reset reads better. Suggestions? -Chris
On Fri, Mar 15, 2013 at 09:52:43AM +0000, Chris Wilson wrote: > On Thu, Mar 14, 2013 at 05:52:11PM +0200, Mika Kuoppala wrote: > > To count context losses, add struct ctx_reset_state for > > both i915_hw_context and drm_i915_file_private. > > drm_i915_file_private is used when there is no context. > > Being really picky, but can we device a better name than reset_state. I > keep reading 'reset' as a verb and get very confused... > > Even just gpu_reset reads better. Suggestions? hang_stats? failure_stats? In any case the struct definition itself needs a i915_ prefix. -Daniel
On 03/14/2013 08:52 AM, Mika Kuoppala wrote: > To count context losses, add struct ctx_reset_state for > both i915_hw_context and drm_i915_file_private. > drm_i915_file_private is used when there is no context. > > Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> > --- > drivers/gpu/drm/i915/i915_dma.c | 4 +++- > drivers/gpu/drm/i915/i915_drv.h | 19 +++++++++++++++++++ > drivers/gpu/drm/i915/i915_gem_context.c | 11 +++++++++++ > 3 files changed, 33 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index e16099b..7902d97 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -1792,7 +1792,7 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file) > struct drm_i915_file_private *file_priv; > > DRM_DEBUG_DRIVER("\n"); > - file_priv = kmalloc(sizeof(*file_priv), GFP_KERNEL); > + file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); > if (!file_priv) > return -ENOMEM; > > @@ -1801,6 +1801,8 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file) > spin_lock_init(&file_priv->mm.lock); > INIT_LIST_HEAD(&file_priv->mm.request_list); > > + i915_gem_context_init_reset_state(dev, &file_priv->reset_state); > + > idr_init(&file_priv->context_idr); > > return 0; > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index a54c507..d004548 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -433,6 +433,19 @@ struct i915_hw_ppgtt { > void (*cleanup)(struct i915_hw_ppgtt *ppgtt); > }; > > +struct ctx_reset_state { > + /* guilty and reset counts when context initialized */ > + unsigned long guilty_cnt; > + unsigned long reset_cnt; I think we can afford to spell out "count." The first time I saw cnt, it looked like a dirty word. :) I think this structure could you some better description of the overall architecture. It's not completely obvious from the individual pieces... and that makes it really hard to evaluate. reset_cnt is the number of resets since start-up. What is guilty_cnt? What are innocent and guilty (below)? All of this makes it difficult for me to tell whether or not the logic in patch 16 is correct... and I don't think it is. > + > + unsigned innocent; > + unsigned guilty; > + /* Time when this context was last blamed for a GPU reset. */ > + unsigned long last_guilty_reset; > + > + /* banned to submit more work */ > + bool banned; > +}; > > /* This must match up with the value previously used for execbuf2.rsvd1. */ > #define DEFAULT_CONTEXT_ID 0 > @@ -443,6 +456,7 @@ struct i915_hw_context { > struct drm_i915_file_private *file_priv; > struct intel_ring_buffer *ring; > struct drm_i915_gem_object *obj; > + struct ctx_reset_state reset_state; > }; > > enum no_fbc_reason { > @@ -805,6 +819,7 @@ struct i915_gpu_error { > > unsigned long last_reset; > > + unsigned long guilty_cnt; > /** > * State variable and reset counter controlling the reset flow > * > @@ -1257,6 +1272,8 @@ struct drm_i915_file_private { > struct list_head request_list; > } mm; > struct idr context_idr; > + > + struct ctx_reset_state reset_state; > }; > > #define INTEL_INFO(dev) (((struct drm_i915_private *) (dev)->dev_private)->info) > @@ -1677,6 +1694,8 @@ struct i915_hw_context * __must_check > i915_switch_context(struct intel_ring_buffer *ring, > struct drm_file *file, int to_id); > void i915_gem_context_free(struct kref *ctx_ref); > +void i915_gem_context_init_reset_state(struct drm_device *dev, > + struct ctx_reset_state *rs); > int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, > struct drm_file *file); > int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index 8fb4d3c..dbd14b8 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -145,6 +145,15 @@ static void do_destroy(struct i915_hw_context *ctx) > kfree(ctx); > } > > +void i915_gem_context_init_reset_state(struct drm_device *dev, > + struct ctx_reset_state *rs) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + > + rs->reset_cnt = atomic_read(&dev_priv->gpu_error.reset_counter); > + rs->guilty_cnt = dev_priv->gpu_error.guilty_cnt; > +} > + > static struct i915_hw_context * > create_hw_context(struct drm_device *dev, > struct drm_i915_file_private *file_priv) > @@ -177,6 +186,8 @@ create_hw_context(struct drm_device *dev, > > ctx->file_priv = file_priv; > > + i915_gem_context_init_reset_state(dev, &ctx->reset_state); > + > again: > if (idr_pre_get(&file_priv->context_idr, GFP_KERNEL) == 0) { > ret = -ENOMEM; >
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index e16099b..7902d97 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1792,7 +1792,7 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file) struct drm_i915_file_private *file_priv; DRM_DEBUG_DRIVER("\n"); - file_priv = kmalloc(sizeof(*file_priv), GFP_KERNEL); + file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); if (!file_priv) return -ENOMEM; @@ -1801,6 +1801,8 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file) spin_lock_init(&file_priv->mm.lock); INIT_LIST_HEAD(&file_priv->mm.request_list); + i915_gem_context_init_reset_state(dev, &file_priv->reset_state); + idr_init(&file_priv->context_idr); return 0; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a54c507..d004548 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -433,6 +433,19 @@ struct i915_hw_ppgtt { void (*cleanup)(struct i915_hw_ppgtt *ppgtt); }; +struct ctx_reset_state { + /* guilty and reset counts when context initialized */ + unsigned long guilty_cnt; + unsigned long reset_cnt; + + unsigned innocent; + unsigned guilty; + + unsigned long last_guilty_reset; + + /* banned to submit more work */ + bool banned; +}; /* This must match up with the value previously used for execbuf2.rsvd1. */ #define DEFAULT_CONTEXT_ID 0 @@ -443,6 +456,7 @@ struct i915_hw_context { struct drm_i915_file_private *file_priv; struct intel_ring_buffer *ring; struct drm_i915_gem_object *obj; + struct ctx_reset_state reset_state; }; enum no_fbc_reason { @@ -805,6 +819,7 @@ struct i915_gpu_error { unsigned long last_reset; + unsigned long guilty_cnt; /** * State variable and reset counter controlling the reset flow * @@ -1257,6 +1272,8 @@ struct drm_i915_file_private { struct list_head request_list; } mm; struct idr context_idr; + + struct ctx_reset_state reset_state; }; #define INTEL_INFO(dev) (((struct drm_i915_private *) (dev)->dev_private)->info) @@ -1677,6 +1694,8 @@ struct i915_hw_context * __must_check i915_switch_context(struct intel_ring_buffer *ring, struct drm_file *file, int to_id); void i915_gem_context_free(struct kref *ctx_ref); +void i915_gem_context_init_reset_state(struct drm_device *dev, + struct ctx_reset_state *rs); int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8fb4d3c..dbd14b8 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -145,6 +145,15 @@ static void do_destroy(struct i915_hw_context *ctx) kfree(ctx); } +void i915_gem_context_init_reset_state(struct drm_device *dev, + struct ctx_reset_state *rs) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + rs->reset_cnt = atomic_read(&dev_priv->gpu_error.reset_counter); + rs->guilty_cnt = dev_priv->gpu_error.guilty_cnt; +} + static struct i915_hw_context * create_hw_context(struct drm_device *dev, struct drm_i915_file_private *file_priv) @@ -177,6 +186,8 @@ create_hw_context(struct drm_device *dev, ctx->file_priv = file_priv; + i915_gem_context_init_reset_state(dev, &ctx->reset_state); + again: if (idr_pre_get(&file_priv->context_idr, GFP_KERNEL) == 0) { ret = -ENOMEM;
To count context losses, add struct ctx_reset_state for both i915_hw_context and drm_i915_file_private. drm_i915_file_private is used when there is no context. Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> --- drivers/gpu/drm/i915/i915_dma.c | 4 +++- drivers/gpu/drm/i915/i915_drv.h | 19 +++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_context.c | 11 +++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-)