From patchwork Tue Oct 26 07:33:16 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Zou, Nanhai" X-Patchwork-Id: 281632 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id o9Q7Ya9V032607 for ; Tue, 26 Oct 2010 07:34:57 GMT Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 085449EFFA for ; Tue, 26 Oct 2010 00:34:36 -0700 (PDT) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by gabe.freedesktop.org (Postfix) with ESMTP id 8ADB99EB0A for ; Tue, 26 Oct 2010 00:33:46 -0700 (PDT) Received: from fmsmga001.fm.intel.com ([10.253.24.23]) by fmsmga101.fm.intel.com with ESMTP; 26 Oct 2010 00:33:46 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.58,239,1286175600"; d="scan'208";a="851052435" Received: from linux-g45.sh.intel.com (HELO hdmi.sh.intel.com) ([10.239.13.42]) by fmsmga001.fm.intel.com with ESMTP; 26 Oct 2010 00:33:45 -0700 From: Zou Nan hai To: intel-gfx@lists.freedesktop.org Date: Tue, 26 Oct 2010 15:33:16 +0800 Message-Id: <1288078396-6002-2-git-send-email-nanhai.zou@intel.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1288078396-6002-1-git-send-email-nanhai.zou@intel.com> References: <1288078396-6002-1-git-send-email-nanhai.zou@intel.com> Subject: [Intel-gfx] [PATCH 2/2] use BLT command to accelerate uxa on gen6. X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.11 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Tue, 26 Oct 2010 07:34:57 +0000 (UTC) diff --git a/src/i830_reg.h b/src/i830_reg.h index 4080896..93d03cf 100644 --- a/src/i830_reg.h +++ b/src/i830_reg.h @@ -32,6 +32,8 @@ /* Flush */ #define MI_FLUSH (0x04<<23) +#define MI_FLUSH_DW (0x26<<23) + #define MI_WRITE_DIRTY_STATE (1<<4) #define MI_END_SCENE (1<<3) #define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3) diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c index 3b72ba1..cde086b 100644 --- a/src/intel_batchbuffer.c +++ b/src/intel_batchbuffer.c @@ -171,6 +171,12 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn, int batch_idx) intel_batch_do_flush(scrn, batch_idx); } else { + BEGIN_BATCH_BLT(4); + OUT_BATCH_BLT(MI_FLUSH_DW | 2); + OUT_BATCH_BLT(0); + OUT_BATCH_BLT(0); + OUT_BATCH_BLT(0); + ADVANCE_BATCH_BLT(); } } @@ -193,13 +199,22 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush, int batch_idx) if (batch->batch_used == 0) return; + + if (batch_idx == RENDER_BATCH) { + /* Mark the end of the batchbuffer. */ + OUT_BATCH(MI_BATCH_BUFFER_END); + /* Emit a padding dword if we aren't going to be quad-word aligned. */ + if (batch->batch_used & 1) + OUT_BATCH(MI_NOOP); + } else { + /* Mark the end of the batchbuffer. */ + OUT_BATCH_BLT(MI_BATCH_BUFFER_END); + /* Emit a padding dword if we aren't going to be quad-word aligned. */ + if (batch->batch_used & 1) + OUT_BATCH_BLT(MI_NOOP); + } - /* Mark the end of the batchbuffer. */ - OUT_BATCH(MI_BATCH_BUFFER_END); - /* Emit a padding dword if we aren't going to be quad-word aligned. */ - if (batch->batch_used & 1) - OUT_BATCH(MI_NOOP); - + if (DUMP_BATCHBUFFERS) { FILE *file = fopen(DUMP_BATCHBUFFERS, "a"); if (file) { @@ -211,9 +226,13 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush, int batch_idx) ret = dri_bo_subdata(batch->batch_bo, 0, batch->batch_used*4, batch->batch_ptr); if (ret == 0) { if (batch_idx == RENDER_BATCH) { - ret = dri_bo_exec(batch->batch_bo, batch->batch_used*4, - NULL, 0, 0xffffffff); + ret = drm_intel_bo_mrb_exec(batch->batch_bo, + batch->batch_used*4, + NULL, 0, 0xffffffff, I915_EXEC_RENDER); } else { + ret = drm_intel_bo_mrb_exec(batch->batch_bo, + batch->batch_used*4, + NULL, 0, 0xffffffff, I915_EXEC_BLIT); } } if (ret != 0) { diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h index 1ed3ad8..6d1ee15 100644 --- a/src/intel_batchbuffer.h +++ b/src/intel_batchbuffer.h @@ -156,20 +156,35 @@ intel_batch_emit_reloc_pixmap(intel_screen_private *intel, PixmapPtr pixmap, } #define ALIGN_BATCH(align) intel_batch_align(intel, align, RENDER_BATCH); +#define ALIGN_BATCH_BLT(align) intel_batch_align(intel, align, BLT_BATCH); + #define OUT_BATCH(dword) intel_batch_emit_dword(intel, dword, RENDER_BATCH) +#define OUT_BATCH_BLT(dword) intel_batch_emit_dword(intel, dword, BLT_BATCH) #define OUT_RELOC(bo, read_domains, write_domains, delta) \ intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 0,RENDER_BATCH) +#define OUT_RELOC_BLT(bo, read_domains, write_domains, delta) \ + intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 0,BLT_BATCH) + #define OUT_RELOC_FENCED(bo, read_domains, write_domains, delta) \ intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 1,RENDER_BATCH) +#define OUT_RELOC_FENCED_BLT(bo, read_domains, write_domains, delta) \ + intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 1,BLT_BATCH) + #define OUT_RELOC_PIXMAP(pixmap, reads, write, delta) \ intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0, RENDER_BATCH) +#define OUT_RELOC_PIXMAP_BLT(pixmap, reads, write, delta) \ + intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0, BLT_BATCH) + #define OUT_RELOC_PIXMAP_FENCED(pixmap, reads, write, delta) \ intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1, RENDER_BATCH) +#define OUT_RELOC_PIXMAP_FENCED_BLT(pixmap, reads, write, delta) \ + intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1, BLT_BATCH) + union intfloat { float f; unsigned int ui; @@ -181,20 +196,23 @@ union intfloat { OUT_BATCH(tmp.ui); \ } while(0) -#define BEGIN_BATCH(n) \ +#define __BEGIN_BATCH(n,batch_idx) \ do { \ - struct batch *batch = &intel->batch[RENDER_BATCH]; \ + struct batch *batch = &intel->batch[batch_idx]; \ if (batch->batch_emitting != 0) \ FatalError("%s: BEGIN_BATCH called without closing " \ "ADVANCE_BATCH\n", __FUNCTION__); \ assert(!batch->in_batch_atomic); \ - intel_batch_require_space(scrn, intel, (n) * 4, RENDER_BATCH); \ + intel_batch_require_space(scrn, intel, (n) * 4, batch_idx); \ batch->batch_emitting = (n); \ batch->batch_emit_start = batch->batch_used; \ } while (0) -#define ADVANCE_BATCH() do { \ - struct batch *batch = &intel->batch[RENDER_BATCH]; \ +#define BEGIN_BATCH(n) __BEGIN_BATCH(n,RENDER_BATCH) +#define BEGIN_BATCH_BLT(n) __BEGIN_BATCH(n,BLT_BATCH) + +#define __ADVANCE_BATCH(batch_idx) do { \ + struct batch *batch = &intel->batch[batch_idx]; \ if (batch->batch_emitting == 0) \ FatalError("%s: ADVANCE_BATCH called with no matching " \ "BEGIN_BATCH\n", __FUNCTION__); \ @@ -213,6 +231,9 @@ do { \ batch->batch_emitting = 0; \ } while (0) +#define ADVANCE_BATCH(batch_idx) __ADVANCE_BATCH(RENDER_BATCH) +#define ADVANCE_BATCH_BLT(batch_idx) __ADVANCE_BATCH(BLT_BATCH) + void intel_next_vertex(intel_screen_private *intel); static inline void intel_vertex_emit(intel_screen_private *intel, float v) { diff --git a/src/intel_driver.c b/src/intel_driver.c index b9fb69d..051497d 100644 --- a/src/intel_driver.c +++ b/src/intel_driver.c @@ -581,8 +581,6 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags) } intel->use_shadow = FALSE; - if (IS_GEN6(intel)) - intel->use_shadow = TRUE; if (xf86IsOptionSet(intel->Options, OPTION_SHADOW)) { intel->use_shadow = @@ -809,6 +807,7 @@ intel_flush_callback(CallbackListPtr *list, intel_batch_submit(scrn, intel->batch[RENDER_BATCH].need_mi_flush ||!list_is_empty(&intel->batch[RENDER_BATCH].flush_pixmaps), RENDER_BATCH); + } } diff --git a/src/intel_uxa.c b/src/intel_uxa.c index 05ac3d2..cbd87ca 100644 --- a/src/intel_uxa.c +++ b/src/intel_uxa.c @@ -208,17 +208,9 @@ intel_uxa_pixmap_compute_size(PixmapPtr pixmap, } static Bool -i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask) +generic_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask) { ScrnInfoPtr scrn = xf86Screens[drawable->pScreen->myNum]; - intel_screen_private *intel = intel_get_screen_private(scrn); - - if (IS_GEN6(intel)) { - intel_debug_fallback(scrn, - "Sandybridge BLT engine not supported\n"); - return FALSE; - } - if (!UXA_PM_IS_SOLID(drawable, planemask)) { intel_debug_fallback(scrn, "planemask is not solid\n"); return FALSE; @@ -232,7 +224,6 @@ i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask) default: return FALSE; } - return TRUE; } @@ -240,7 +231,7 @@ i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask) * Sets up hardware state for a series of solid fills. */ static Bool -i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg) +generic_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg) { ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum]; intel_screen_private *intel = intel_get_screen_private(scrn); @@ -252,10 +243,17 @@ i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg) if (!intel_check_pitch_2d(pixmap)) return FALSE; - if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table), - RENDER_BATCH)) - return FALSE; - + if (IS_GEN6(intel)) { + if (!intel_get_aperture_space(scrn, bo_table, + ARRAY_SIZE(bo_table), + BLT_BATCH)) + return FALSE; + } else { + if (!intel_get_aperture_space(scrn, bo_table, + ARRAY_SIZE(bo_table), + RENDER_BATCH)) + return FALSE; + } intel->BR[13] = (I830PatternROP[alu] & 0xff) << 16; switch (pixmap->drawable.bitsPerPixel) { case 8: @@ -274,6 +272,52 @@ i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg) return TRUE; } +static void gen6_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2) +{ + ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); + unsigned long pitch; + uint32_t cmd; + if (x1 < 0) + x1 = 0; + if (y1 < 0) + y1 = 0; + if (x2 > pixmap->drawable.width) + x2 = pixmap->drawable.width; + if (y2 > pixmap->drawable.height) + y2 = pixmap->drawable.height; + + if (x2 <= x1 || y2 <= y1) + return; + + pitch = intel_pixmap_pitch(pixmap); + { + BEGIN_BATCH_BLT(6); + + cmd = XY_COLOR_BLT_CMD; + + if (pixmap->drawable.bitsPerPixel == 32) + cmd |= + XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB; + + if (intel_pixmap_tiled(pixmap)) { + assert((pitch % 512) == 0); + pitch >>= 2; + cmd |= XY_COLOR_BLT_TILED; + } + + OUT_BATCH_BLT(cmd); + + OUT_BATCH_BLT(intel->BR[13] | pitch); + OUT_BATCH_BLT((y1 << 16) | (x1 & 0xffff)); + OUT_BATCH_BLT((y2 << 16) | (x2 & 0xffff)); + OUT_RELOC_PIXMAP_FENCED_BLT(pixmap, I915_GEM_DOMAIN_RENDER, + 0, 0); + OUT_BATCH_BLT(intel->BR[16]); + ADVANCE_BATCH_BLT(); + } +} + static void i830_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2) { ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum]; @@ -324,10 +368,15 @@ static void i830_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2) ironlake_blt_workaround(scrn); } -static void i830_uxa_done_solid(PixmapPtr pixmap) +static void gen6_uxa_done_solid(PixmapPtr pixmap) { ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum]; + intel_batch_submit(scrn, FALSE, BLT_BATCH); +} +static void i830_uxa_done_solid(PixmapPtr pixmap) +{ + ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum]; intel_debug_flush(scrn, RENDER_BATCH); } @@ -336,17 +385,10 @@ static void i830_uxa_done_solid(PixmapPtr pixmap) * - support planemask using FULL_BLT_CMD? */ static Bool -i830_uxa_check_copy(PixmapPtr source, PixmapPtr dest, +generic_uxa_check_copy(PixmapPtr source, PixmapPtr dest, int alu, Pixel planemask) { ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; - intel_screen_private *intel = intel_get_screen_private(scrn); - - if (IS_GEN6(intel)) { - intel_debug_fallback(scrn, - "Sandybridge BLT engine not supported\n"); - return FALSE; - } if (!UXA_PM_IS_SOLID(&source->drawable, planemask)) { intel_debug_fallback(scrn, "planemask is not solid"); @@ -375,7 +417,7 @@ i830_uxa_check_copy(PixmapPtr source, PixmapPtr dest, } static Bool -i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir, +generic_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir, int ydir, int alu, Pixel planemask) { ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; @@ -386,9 +428,18 @@ i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir, intel_get_pixmap_bo(dest), }; - if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table), - RENDER_BATCH)) - return FALSE; + + if (IS_GEN6(intel)) { + if (!intel_get_aperture_space(scrn, bo_table, + ARRAY_SIZE(bo_table), + BLT_BATCH)) + return FALSE; + } else { + if (!intel_get_aperture_space(scrn, bo_table, + ARRAY_SIZE(bo_table), + RENDER_BATCH)) + return FALSE; + } intel->render_source = source; @@ -408,6 +459,90 @@ i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir, } static void +gen6_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1, + int dst_y1, int w, int h) +{ + ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; + intel_screen_private *intel = intel_get_screen_private(scrn); + uint32_t cmd; + int dst_x2, dst_y2, src_x2, src_y2; + unsigned int dst_pitch, src_pitch; + + dst_x2 = dst_x1 + w; + dst_y2 = dst_y1 + h; + + /* XXX Fixup extents as a lamentable workaround for missing + * source clipping in the upper layers. + */ + if (dst_x1 < 0) + src_x1 -= dst_x1, dst_x1 = 0; + if (dst_y1 < 0) + src_y1 -= dst_y1, dst_y1 = 0; + if (dst_x2 > dest->drawable.width) + dst_x2 = dest->drawable.width; + if (dst_y2 > dest->drawable.height) + dst_y2 = dest->drawable.height; + + src_x2 = src_x1 + (dst_x2 - dst_x1); + src_y2 = src_y1 + (dst_y2 - dst_y1); + + if (src_x1 < 0) + dst_x1 -= src_x1, src_x1 = 0; + if (src_y1 < 0) + dst_y1 -= src_y1, src_y1 = 0; + if (src_x2 > intel->render_source->drawable.width) + dst_x2 -= src_x2 - intel->render_source->drawable.width; + if (src_y2 > intel->render_source->drawable.height) + dst_y2 -= src_y2 - intel->render_source->drawable.height; + + if (dst_x2 <= dst_x1 || dst_y2 <= dst_y1) + return; + + dst_pitch = intel_pixmap_pitch(dest); + src_pitch = intel_pixmap_pitch(intel->render_source); + { + BEGIN_BATCH_BLT(8); + + cmd = XY_SRC_COPY_BLT_CMD; + + if (dest->drawable.bitsPerPixel == 32) + cmd |= + XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB; + + if (INTEL_INFO(intel)->gen >= 40) { + if (intel_pixmap_tiled(dest)) { + assert((dst_pitch % 512) == 0); + dst_pitch >>= 2; + cmd |= XY_SRC_COPY_BLT_DST_TILED; + } + + if (intel_pixmap_tiled(intel->render_source)) { + assert((src_pitch % 512) == 0); + src_pitch >>= 2; + cmd |= XY_SRC_COPY_BLT_SRC_TILED; + } + } + + OUT_BATCH_BLT(cmd); + + OUT_BATCH_BLT(intel->BR[13] | dst_pitch); + OUT_BATCH_BLT((dst_y1 << 16) | (dst_x1 & 0xffff)); + OUT_BATCH_BLT((dst_y2 << 16) | (dst_x2 & 0xffff)); + OUT_RELOC_PIXMAP_FENCED_BLT(dest, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH_BLT((src_y1 << 16) | (src_x1 & 0xffff)); + OUT_BATCH_BLT(src_pitch); + OUT_RELOC_PIXMAP_FENCED_BLT(intel->render_source, + I915_GEM_DOMAIN_RENDER, 0, + 0); + ADVANCE_BATCH_BLT(); + } +} + +static void i830_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1, int dst_y1, int w, int h) { @@ -497,10 +632,16 @@ i830_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1, static void i830_uxa_done_copy(PixmapPtr dest) { ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; + intel_debug_flush(scrn, BLT_BATCH); +} - intel_debug_flush(scrn, RENDER_BATCH); +static void gen6_uxa_done_copy(PixmapPtr dest) +{ + ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum]; + intel_batch_submit(scrn, FALSE, BLT_BATCH); } + /** * Do any cleanup from the Composite operation. * @@ -1191,17 +1332,30 @@ Bool intel_uxa_init(ScreenPtr screen) intel->vertex_bo = NULL; /* Solid fill */ - intel->uxa_driver->check_solid = i830_uxa_check_solid; - intel->uxa_driver->prepare_solid = i830_uxa_prepare_solid; - intel->uxa_driver->solid = i830_uxa_solid; - intel->uxa_driver->done_solid = i830_uxa_done_solid; + if (IS_GEN6(intel)) { + intel->uxa_driver->check_solid = generic_uxa_check_solid; + intel->uxa_driver->prepare_solid = generic_uxa_prepare_solid; + intel->uxa_driver->solid = gen6_uxa_solid; + intel->uxa_driver->done_solid = gen6_uxa_done_solid; + } else { + intel->uxa_driver->check_solid = generic_uxa_check_solid; + intel->uxa_driver->prepare_solid = generic_uxa_prepare_solid; + intel->uxa_driver->solid = i830_uxa_solid; + intel->uxa_driver->done_solid = i830_uxa_done_solid; + } /* Copy */ - intel->uxa_driver->check_copy = i830_uxa_check_copy; - intel->uxa_driver->prepare_copy = i830_uxa_prepare_copy; - intel->uxa_driver->copy = i830_uxa_copy; - intel->uxa_driver->done_copy = i830_uxa_done_copy; - + if (IS_GEN6(intel)) { + intel->uxa_driver->check_copy = generic_uxa_check_copy; + intel->uxa_driver->prepare_copy = generic_uxa_prepare_copy; + intel->uxa_driver->copy = gen6_uxa_copy; + intel->uxa_driver->done_copy = gen6_uxa_done_copy; + } else { + intel->uxa_driver->check_copy = generic_uxa_check_copy; + intel->uxa_driver->prepare_copy = generic_uxa_prepare_copy; + intel->uxa_driver->copy = i830_uxa_copy; + intel->uxa_driver->done_copy = i830_uxa_done_copy; + } /* Composite */ if (IS_GEN2(intel)) { intel->uxa_driver->check_composite = i830_check_composite;