From patchwork Mon Jan 4 18:57:58 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 70698 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by demeter.kernel.org (8.14.3/8.14.2) with ESMTP id o04IwBdQ002829 for ; Mon, 4 Jan 2010 18:58:11 GMT Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id EA3059EB5F; Mon, 4 Jan 2010 10:58:10 -0800 (PST) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fmsmga101.fm.intel.com (mga05.intel.com [192.55.52.89]) by gabe.freedesktop.org (Postfix) with ESMTP id E581F9EB53 for ; Mon, 4 Jan 2010 10:58:07 -0800 (PST) Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by fmsmga101.fm.intel.com with ESMTP; 04 Jan 2010 10:57:41 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.47,499,1257148800"; d="scan'208";a="761197626" Received: from unknown (HELO localhost.localdomain) ([10.255.17.78]) by fmsmga001.fm.intel.com with ESMTP; 04 Jan 2010 10:57:57 -0800 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Mon, 4 Jan 2010 18:57:58 +0000 Message-Id: <1262631479-10949-3-git-send-email-chris@chris-wilson.co.uk> X-Mailer: git-send-email 1.6.5.7 In-Reply-To: <1262631479-10949-1-git-send-email-chris@chris-wilson.co.uk> References: <1262631479-10949-1-git-send-email-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH 3/4] drm/i915: Record batch buffer following GPU error X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.9 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: intel-gfx-bounces@lists.freedesktop.org Errors-To: intel-gfx-bounces@lists.freedesktop.org diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 463e8d0..6521c83 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -380,6 +380,38 @@ static int i915_error_state(struct seq_file *m, void *unused) seq_printf(m, " INSTDONE1: 0x%08x\n", error->instdone1); } + if (error->active_bo_count) { + int i; + + seq_printf(m, "Buffers [%d]:\n", error->active_bo_count); + + for (i = 0; i < error->active_bo_count; i++) { + seq_printf(m, " %08x %8zd %08x %08x", + error->active_bo[i].gtt_offset, + error->active_bo[i].size, + error->active_bo[i].read_domains, + error->active_bo[i].write_domain); + + if (error->active_bo[i].name) + seq_printf(m, " (name: %d)", error->active_bo[i].name); + if (error->active_bo[i].fence_reg != I915_FENCE_REG_NONE) + seq_printf(m, " (fence: %d)", error->active_bo[i].fence_reg); + + seq_printf(m, "\n"); + } + } + + if (error->batchbuffer && + i915_gem_object_get_pages(error->batchbuffer) == 0) { + struct drm_gem_object *obj = error->batchbuffer; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + + seq_printf(m, "--- gtt_offset = 0x%08x\n", obj_priv->gtt_offset); + i915_dump_pages(m, obj_priv->pages, obj->size / PAGE_SIZE); + + i915_gem_object_put_pages(obj); + } + out: spin_unlock_irqrestore(&dev_priv->error_lock, flags); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 29dd676..7b7ea9e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -150,7 +150,18 @@ struct drm_i915_error_state { u32 instps; u32 instdone1; u32 seqno; + u64 bbaddr; struct timeval time; + struct drm_gem_object *batchbuffer; + struct drm_i915_error_buffer { + size_t size; + u32 name; + u32 gtt_offset; + u32 read_domains; + u32 write_domain; + u32 fence_reg; + } *active_bo; + u32 active_bo_count; }; struct drm_i915_display_funcs { diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7cd8110..86b2f53 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -370,6 +370,51 @@ static void i915_error_work_func(struct work_struct *work) } } +static struct drm_gem_object +*clone_obj(struct drm_device *dev, + struct drm_gem_object *src) +{ + struct drm_gem_object *dst; + struct drm_i915_gem_object *src_priv, *dst_priv; + int page, page_count; + + dst = drm_gem_object_alloc(dev, src->size); + if (dst == NULL) + return NULL; + + if (i915_gem_object_get_pages(src)) + goto error_unref; + + if (i915_gem_object_get_pages(dst)) + goto error_src; + + src_priv = src->driver_private; + dst_priv = dst->driver_private; + + page_count = src->size / PAGE_SIZE; + for (page = 0; page < page_count; page++) { + memcpy(kmap_atomic(dst_priv->pages[page], KM_USER1), + kmap_atomic(src_priv->pages[page], KM_USER0), + PAGE_SIZE); + kunmap_atomic(dst_priv->pages[page], KM_USER1); + kunmap_atomic(src_priv->pages[page], KM_USER0); + } + + i915_gem_object_put_pages(dst); + i915_gem_object_put_pages(src); + + /* We lie here, but it makes later analysis easier. */ + dst_priv->gtt_offset = src_priv->gtt_offset; + + return dst; + +error_src: + i915_gem_object_put_pages(src); +error_unref: + drm_gem_object_unreference(dst); + return NULL; +} + /** * i915_capture_error_state - capture an error record for later analysis * @dev: drm device @@ -382,8 +427,10 @@ static void i915_error_work_func(struct work_struct *work) static void i915_capture_error_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv; struct drm_i915_error_state *error; unsigned long flags; + int count; spin_lock_irqsave(&dev_priv->error_lock, flags); if (dev_priv->first_error) @@ -405,6 +452,7 @@ static void i915_capture_error_state(struct drm_device *dev) error->ipehr = I915_READ(IPEHR); error->instdone = I915_READ(INSTDONE); error->acthd = I915_READ(ACTHD); + error->bbaddr = 0; /* XXX ? */ } else { error->ipeir = I915_READ(IPEIR_I965); error->ipehr = I915_READ(IPEHR_I965); @@ -412,7 +460,53 @@ static void i915_capture_error_state(struct drm_device *dev) error->instps = I915_READ(INSTPS); error->instdone1 = I915_READ(INSTDONE1); error->acthd = I915_READ(ACTHD_I965); + error->bbaddr = I915_READ64(BB_ADDR); + } + + /* Grab the current batchbuffer, most likely to have crashed. */ + error->batchbuffer = NULL; + spin_lock(&dev_priv->mm.active_list_lock); + list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) { + struct drm_gem_object *obj = obj_priv->obj; + + if (error->bbaddr >= obj_priv->gtt_offset && + error->bbaddr < obj_priv->gtt_offset + obj->size) { + /* We need to copy this to an anonymous buffer as + * the simplest method to avoid being overwritten + * by userpace. + */ + error->batchbuffer = clone_obj(dev, obj); + break; + } + } + + /* Record buffers on the active list. */ + error->active_bo = NULL; + error->active_bo_count = 0; + + count = 0; + list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) + count++; + + if (count) + error->active_bo = kmalloc(sizeof(*error->active_bo)*count, GFP_ATOMIC); + + if (error->active_bo) { + count = 0; + list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) { + struct drm_gem_object *obj = obj_priv->obj; + + error->active_bo[count].size = obj->size; + error->active_bo[count].name = obj->name; + error->active_bo[count].gtt_offset = obj_priv->gtt_offset; + error->active_bo[count].read_domains = obj->read_domains; + error->active_bo[count].write_domain = obj->write_domain; + error->active_bo[count].fence_reg = obj_priv->fence_reg; + count++; + } + error->active_bo_count = count; } + spin_unlock(&dev_priv->mm.active_list_lock); do_gettimeofday(&error->time); @@ -422,6 +516,30 @@ out: spin_unlock_irqrestore(&dev_priv->error_lock, flags); } +void i915_destroy_error_state(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_error_state *error; + unsigned long flags; + + spin_lock_irqsave(&dev_priv->error_lock, flags); + error = dev_priv->first_error; + dev_priv->first_error = NULL; + spin_unlock_irqrestore(&dev_priv->error_lock, flags); + + if (error == NULL) + return; + + if (error->batchbuffer) { + mutex_lock(&dev->struct_mutex); + drm_gem_object_unreference(error->batchbuffer); + mutex_unlock(&dev->struct_mutex); + } + + kfree(error->active_bo); + kfree(error); +} + /** * i915_handle_error - handle an error interrupt * @dev: drm device diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f79b133..e0ee576 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -324,6 +324,7 @@ #define CM0_COLOR_EVICT_DISABLE (1<<3) #define CM0_DEPTH_WRITE_DISABLE (1<<1) #define CM0_RC_OP_FLUSH_DISABLE (1<<0) +#define BB_ADDR 0x02140 /* 8 bytes */ #define GFX_FLSH_CNTL 0x02170 /* 915+ only */