From patchwork Wed Feb 16 06:07:38 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Zhenyu Wang X-Patchwork-Id: 565781 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p1G69PBa030600 for ; Wed, 16 Feb 2011 06:09:45 GMT Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id B0A079EB3B for ; Tue, 15 Feb 2011 22:09:25 -0800 (PST) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by gabe.freedesktop.org (Postfix) with ESMTP id 1241E9E7E2 for ; Tue, 15 Feb 2011 22:08:14 -0800 (PST) Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga101.jf.intel.com with ESMTP; 15 Feb 2011 22:07:55 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.60,479,1291622400"; d="scan'208";a="603105402" Received: from ubuntu-hp.sh.intel.com ([10.239.36.56]) by orsmga002.jf.intel.com with ESMTP; 15 Feb 2011 22:07:43 -0800 From: Zhenyu Wang To: intel-gfx@lists.freedesktop.org Date: Wed, 16 Feb 2011 14:07:38 +0800 Message-Id: <1297836461-31543-2-git-send-email-zhenyuw@linux.intel.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1297836461-31543-1-git-send-email-zhenyuw@linux.intel.com> References: <1297836461-31543-1-git-send-email-zhenyuw@linux.intel.com> Cc: yuanhan.liu@intel.com Subject: [Intel-gfx] [PATCH] intel: Add AUB file dump support X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.11 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Wed, 16 Feb 2011 06:09:45 +0000 (UTC) diff --git a/intel/Makefile.am b/intel/Makefile.am index 1ae92f8..398cd2f 100644 --- a/intel/Makefile.am +++ b/intel/Makefile.am @@ -41,7 +41,8 @@ libdrm_intel_la_SOURCES = \ intel_bufmgr_gem.c \ intel_chipset.h \ mm.c \ - mm.h + mm.h \ + intel_aub.h libdrm_intelincludedir = ${includedir}/libdrm libdrm_intelinclude_HEADERS = intel_bufmgr.h diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h index daa18b4..bb4158a 100644 --- a/intel/intel_bufmgr.h +++ b/intel/intel_bufmgr.h @@ -35,6 +35,7 @@ #define INTEL_BUFMGR_H #include +#include struct drm_clip_rect; @@ -83,6 +84,39 @@ struct _drm_intel_bo { int handle; }; +enum drm_intel_aub_bmp_format { + AUB_DUMP_BMP_LEGACY, + AUB_DUMP_BMP_8BIT, + AUB_DUMP_BMP_ARGB_0555, + AUB_DUMP_BMP_ARGB_0565, + AUB_DUMP_BMP_ARGB_4444, + AUB_DUMP_BMP_ARGB_1555, + AUB_DUMP_BMP_ARGB_0888, + AUB_DUMP_BMP_ARGB_8888, + AUB_DUMP_BMP_YCRCB_SWAPY, + AUB_DUMP_BMP_YCRCB_NORMAL, + AUB_DUMP_BMP_YCRCB_SWAPUV, + AUB_DUMP_BMP_YCRCB_SWAPUVY, + AUB_DUMP_BMP_ABGR_8888, +}; + +/* + * surface info needed by aub DUMP_BMP block + */ +struct drm_intel_aub_surface_bmp { + uint16_t x_offset; + uint16_t y_offset; + uint16_t pitch; + uint8_t bits_per_pixel; + uint8_t format; + uint16_t width; + uint16_t height; + uint32_t tiling_walk_y:1; + uint32_t tiling:1; + uint32_t pad:30; +}; + + #define BO_ALLOC_FOR_RENDER (1<<0) drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, @@ -150,6 +184,10 @@ int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo); void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable); int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id); +void drm_intel_bufmgr_gem_set_aubfile(drm_intel_bufmgr *bufmgr, FILE *file); +void drm_intel_bufmgr_gem_stop_aubfile(drm_intel_bufmgr *bufmgr); +int drm_intel_gem_aub_dump_bmp(drm_intel_bufmgr *bufmgr, drm_intel_bo *bo, + unsigned int offset, struct drm_intel_aub_surface_bmp *bmp); /* drm_intel_bufmgr_fake.c */ drm_intel_bufmgr *drm_intel_bufmgr_fake_init(int fd, diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index 3cdffce..654bc31 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -57,6 +57,7 @@ #include "intel_bufmgr.h" #include "intel_bufmgr_priv.h" #include "intel_chipset.h" +#include "intel_aub.h" #include "string.h" #include "i915_drm.h" @@ -75,6 +76,13 @@ struct drm_intel_gem_bo_bucket { unsigned long size; }; +struct drm_intel_aub_bmp { + drm_intel_bo *bo; /* surface bo */ + unsigned int offset; + struct drm_intel_aub_surface_bmp bmp; + struct drm_intel_aub_bmp *next; +}; + typedef struct _drm_intel_bufmgr_gem { drm_intel_bufmgr bufmgr; @@ -106,6 +114,10 @@ typedef struct _drm_intel_bufmgr_gem { unsigned int has_relaxed_fencing : 1; unsigned int bo_reuse : 1; char fenced_relocs; + + FILE *aub_file; + uint32_t aub_offset; + struct drm_intel_aub_bmp *aub_bmp; } drm_intel_bufmgr_gem; #define DRM_INTEL_RELOC_FENCE (1<<0) @@ -195,8 +207,396 @@ struct _drm_intel_bo_gem { * relocations. */ int reloc_tree_fences; + + uint32_t aub_offset; }; +/* AUB trace dump support */ + +static void +aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data) +{ + fwrite(&data, 1, 4, bufmgr_gem->aub_file); +} + +static void +aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size) +{ + fwrite(data, 1, size, bufmgr_gem->aub_file); +} + +static void +aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + uint32_t *data; + unsigned int i; + + data = malloc(bo->size); + drm_intel_bo_get_subdata(bo, offset, size, data); + + /* Easy mode: write out bo with no relocations */ + if (!bo_gem->reloc_count) { + aub_out_data(bufmgr_gem, data, size); + free(data); + return; + } + + /* Otherwise, handle the relocations while writing. */ + for (i = 0; i < size / 4; i++) { + int r; + for (r = 0; r < bo_gem->reloc_count; r++) { + struct drm_i915_gem_relocation_entry *reloc; + drm_intel_reloc_target *info; + + reloc = &bo_gem->relocs[r]; + info = &bo_gem->reloc_target_info[r]; + + if (reloc->offset == offset + i * 4) { + drm_intel_bo_gem *target_gem; + uint32_t val; + + target_gem = (drm_intel_bo_gem *)info->bo; + + val = reloc->delta; + val += target_gem->aub_offset; + + aub_out(bufmgr_gem, val); + data[i] = val; + break; + } + } + if (r == bo_gem->reloc_count) { + /* no relocation, just the data */ + aub_out(bufmgr_gem, data[i]); + } + } +} + +static void +aub_bo_get_address(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + /* Give the object a graphics address in the AUB file. We + * don't just use the GEM object address because we do AUB + * dumping before execution -- we want to successfully log + * when the hardware might hang, and we might even want to aub + * capture for a driver trying to execute on a different + * generation of hardware by disabling the actual kernel exec + * call. + */ + bo_gem->aub_offset = bufmgr_gem->aub_offset; + bufmgr_gem->aub_offset += bo->size; + /* XXX: Handle aperture overflow. */ + assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024); +} + +static const struct { + const char *name; + uint32_t type; + uint32_t subtype; +} name_to_type_mapping[] = { + { "VS_UNIT", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_VS_STATE}, + { "GS_UNIT", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_GS_STATE}, + { "CLIP_UNIT", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CL_STATE}, + { "SF_UNIT", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SF_STATE}, + { "WM_UNIT", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_WM_STATE}, + { "CC_UNIT", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CC_STATE}, + { "CLIP_VP", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CL_VP}, + { "SF_VP", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SF_VP}, + { "SF_SCISSOR_UNIT", + AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SF_SCISSOR_RECT}, + { "CC_VP", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CC_VP}, + { "SAMPLER", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SAMPLER_STATE}, + { "SAMPLER_DEFAULT_COLOR", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_SDC}, + { "VS_PROG", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL}, + { "GS_PROG", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL}, + { "CLIP_PROG", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL}, + { "SF_PROG", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL}, + { "WM_PROG", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_KERNEL}, + { "BLEND_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_BLEND_STATE}, + { "DEPTH_STENCIL_STATE", + AUB_TRACE_TYPE_GENERAL, AUB_TRACE_DEPTH_STENCIL_STATE}, + { "COLOR_CALC_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_CC_STATE}, + { "SS_SURF_BIND", AUB_TRACE_TYPE_SURFACE, AUB_TRACE_BINDING_TABLE}, + { "SS_SURFACE", AUB_TRACE_TYPE_SURFACE, AUB_TRACE_SURFACE_STATE}, + { "temporary VBO", AUB_TRACE_TYPE_VERTEX_BUFFER, 0}, + { "CURBE", AUB_TRACE_TYPE_CONSTANT_URB, 0}, + { "VS constant_bo", AUB_TRACE_TYPE_CONSTANT_BUFFER, 0}, + { "WM constant_bo", AUB_TRACE_TYPE_CONSTANT_BUFFER, 0}, + { "INTERFACE_DESC", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_INTERFACE_DESC}, + { "VLD_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_VLD_STATE}, + { "VFE_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_VFE_STATE}, + { "IT_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_IT_STATE}, + { "DI_SAMPLE_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_DI_SAMPLE_STATE}, + { "IEF_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_IEF_STATE}, + { "AVS_STATE", AUB_TRACE_TYPE_GENERAL, AUB_TRACE_AVS_STATE}, +}; + +static void +aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, + uint32_t offset, uint32_t size) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + aub_out(bufmgr_gem, + CMD_AUB_TRACE_HEADER_BLOCK | + (5 - 2)); + aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE); + aub_out(bufmgr_gem, subtype); + aub_out(bufmgr_gem, bo_gem->aub_offset + offset); + aub_out(bufmgr_gem, size); + aub_write_bo_data(bo, offset, size); +} + +static void +aub_write_bo(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + uint32_t type = AUB_TRACE_TYPE_NOTYPE; + uint32_t subtype = 0; + uint32_t block_size; + uint32_t offset; + unsigned int i; + + aub_bo_get_address(bo); + + for (i = 0; i < ARRAY_SIZE(name_to_type_mapping); i++) { + if (strcmp(bo_gem->name, + name_to_type_mapping[i].name) == 0) { + type = name_to_type_mapping[i].type; + subtype = name_to_type_mapping[i].subtype; + break; + } + } + + if (type == 0) { + DBG("Failed to find type for object %s(size: 0x%lx, aub_offset: 0x%08x)\n", + bo_gem->name, bo->size, bo_gem->aub_offset); + } + + + /* Break up large objects into multiple writes. Otherwise a + * 128kb VBO would overflow the 16 bits of size field in the + * packet header and everything goes badly after that. + */ + for (offset = 0; offset < bo->size; offset += block_size) { + block_size = bo->size - offset; + + if (block_size > 2 * 4096) + block_size = 2 * 4096; + + aub_write_trace_block(bo, type, subtype, + offset, block_size); + } +} + +/* + * Make a ringbuffer on fly and dump it + */ +static void +aub_generate_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem, + uint32_t batch_buffer, unsigned int flags) +{ + uint32_t ringbuffer[1024]; + int ring = 0; + + switch (flags) { + case I915_EXEC_RENDER: + case I915_EXEC_DEFAULT: + ring = AUB_TRACE_TYPE_RING_PRB0; + break; + case I915_EXEC_BSD: + ring = AUB_TRACE_TYPE_RING_PRB1; + break; + case I915_EXEC_BLT: + ring = AUB_TRACE_TYPE_RING_PRB2; + break; + } + + aub_out(bufmgr_gem, + CMD_AUB_TRACE_HEADER_BLOCK | + (5 - 2)); + aub_out(bufmgr_gem, + AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); + aub_out(bufmgr_gem, 0); /* general/surface subtype */ + aub_out(bufmgr_gem, bufmgr_gem->aub_offset); + aub_out(bufmgr_gem, 4096); + + /* Do make a ring buffer here */ + memset(ringbuffer, AUB_MI_NOOP, sizeof(ringbuffer)); + ringbuffer[0] = AUB_MI_BATCH_BUFFER_START; + ringbuffer[1] = batch_buffer; + + /* FIXME: Need some flush operations here? */ + + aub_out_data(bufmgr_gem, ringbuffer, 4096); + + /* Update offset pointer */ + bufmgr_gem->aub_offset += 4096; +} + +static void +aub_dump_bmp(drm_intel_bufmgr_gem *bufmgr_gem) +{ + struct drm_intel_aub_bmp *p = bufmgr_gem->aub_bmp; + + while(p) { + aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4); + aub_out(bufmgr_gem, (p->bmp.y_offset << 16) | p->bmp.x_offset); + aub_out(bufmgr_gem, (p->bmp.format << 24) | + (p->bmp.bits_per_pixel << 16) | p->bmp.pitch); + aub_out(bufmgr_gem, (p->bmp.height << 16) | p->bmp.width); + /* surface bo should already be written out */ + assert(((drm_intel_bo_gem *)p->bo)->aub_offset != 0); + aub_out(bufmgr_gem, ((drm_intel_bo_gem *)p->bo)->aub_offset + p->offset); + aub_out(bufmgr_gem, (p->bmp.tiling << 2) | (p->bmp.tiling_walk_y << 3)); + + bufmgr_gem->aub_bmp = p->next; + free(p); + p = bufmgr_gem->aub_bmp; + } +} + +static void +aub_exec(drm_intel_bo *bo, unsigned int flags) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int i; + + if (!bufmgr_gem->aub_file) + return; + + /* Write out all but the batchbuffer to AUB memory */ + for (i = 0; i < bufmgr_gem->exec_count - 1; i++) { + if (bufmgr_gem->exec_bos[i] != bo) + aub_write_bo(bufmgr_gem->exec_bos[i]); + } + + aub_bo_get_address(bo); + + /* Dump the batchbuffer. */ + aub_out(bufmgr_gem, + CMD_AUB_TRACE_HEADER_BLOCK | + (5 - 2)); + aub_out(bufmgr_gem, + AUB_TRACE_MEMTYPE_GTT | AUB_TRACE_TYPE_BATCH | AUB_TRACE_OP_DATA_WRITE); + aub_out(bufmgr_gem, 0); /* general/surface subtype */ + aub_out(bufmgr_gem, bo_gem->aub_offset); + aub_out(bufmgr_gem, bo_gem->bo.size); + aub_write_bo_data(bo, 0, bo_gem->bo.size); + + /* Dump ring buffer */ + aub_generate_ringbuffer(bufmgr_gem, bo_gem->aub_offset, flags); + + /* Dump BMP file for any requested surface */ + aub_dump_bmp(bufmgr_gem); + + fflush(bufmgr_gem->aub_file); + + /* + * One frame has been dumped. So reset the aub_offset for the next frame. + * + * FIXME: Can we do this? + */ + bufmgr_gem->aub_offset = 0x10000; +} + +/* + * Stop dumping data to aub file + */ +void drm_intel_bufmgr_gem_stop_aubfile(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + + pthread_mutex_lock(&bufmgr_gem->lock); + bufmgr_gem->aub_file = NULL; + pthread_mutex_unlock(&bufmgr_gem->lock); +} + +void drm_intel_bufmgr_gem_set_aubfile(drm_intel_bufmgr *bufmgr, FILE *file) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + int entry = 0x3; /* uc/valid GTT */ + int i; + + if (!file) + return; + + pthread_mutex_lock(&bufmgr_gem->lock); + + bufmgr_gem->aub_file = file; + + /* Start from 0x10000, since the address below is used for GTT entry building */ + bufmgr_gem->aub_offset = 0x10000; + + /* Start with a (required) version packet. */ + aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2)); + aub_out(bufmgr_gem, + (4 << AUB_HEADER_MAJOR_SHIFT) | + (0 << AUB_HEADER_MINOR_SHIFT)); + for (i = 0; i < 8; i++) { + aub_out(bufmgr_gem, 0); /* app name */ + } + aub_out(bufmgr_gem, 0); /* timestamp */ + aub_out(bufmgr_gem, 0); /* timestamp */ + aub_out(bufmgr_gem, 0); /* comment len */ + + /* Set up the GTT. The max we can handle is 256M. + * Need improvement, dynamicly alloc/write GTT entry + * block for each bo, so AubList output won't contain + * whole GTT entry block in the first, easier for parse. + */ + for (i = 0x000; i < 0x10000; i += 4, entry += 0x1000) { + aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | (5 - 2)); + aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA_WRITE); + aub_out(bufmgr_gem, 0); + aub_out(bufmgr_gem, i); + aub_out(bufmgr_gem, 4); + aub_out(bufmgr_gem, entry); + } + + pthread_mutex_unlock(&bufmgr_gem->lock); +} + +int drm_intel_gem_aub_dump_bmp(drm_intel_bufmgr *bufmgr, + drm_intel_bo *bo, unsigned int offset, + struct drm_intel_aub_surface_bmp *bmp) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + struct drm_intel_aub_bmp *aub_bmp, *p, *last; + + aub_bmp = malloc(sizeof(*aub_bmp)); + + aub_bmp->bo = bo; + aub_bmp->offset = offset; + memcpy(&aub_bmp->bmp, bmp, sizeof(*bmp)); + aub_bmp->next = NULL; + + pthread_mutex_lock(&bufmgr_gem->lock); + + /* Insert last */ + p = last = bufmgr_gem->aub_bmp; + while (p) { + last = p; + p = p->next; + } + if (last == bufmgr_gem->aub_bmp) + bufmgr_gem->aub_bmp = aub_bmp; + else + last->next = aub_bmp; + + pthread_mutex_unlock(&bufmgr_gem->lock); + + return 0; +} + static unsigned int drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); @@ -1624,6 +2024,8 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, execbuf.rsvd1 = 0; execbuf.rsvd2 = 0; + aub_exec(bo, flags); + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);