From patchwork Thu Sep 3 17:22:18 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?Micha=C5=82_Winiarski?= X-Patchwork-Id: 7117911 Return-Path: X-Original-To: patchwork-intel-gfx@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork1.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.136]) by patchwork1.web.kernel.org (Postfix) with ESMTP id CE2849F36E for ; Thu, 3 Sep 2015 17:22:48 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id DB05020430 for ; Thu, 3 Sep 2015 17:22:47 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by mail.kernel.org (Postfix) with ESMTP id C9EB8203AA for ; Thu, 3 Sep 2015 17:22:46 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 54DDC6E74E; Thu, 3 Sep 2015 10:22:46 -0700 (PDT) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by gabe.freedesktop.org (Postfix) with ESMTP id A4AD16E74E for ; Thu, 3 Sep 2015 10:22:45 -0700 (PDT) Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga103.fm.intel.com with ESMTP; 03 Sep 2015 10:22:45 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.17,463,1437462000"; d="scan'208";a="797626909" Received: from irsmsx101.ger.corp.intel.com ([163.33.3.153]) by fmsmga002.fm.intel.com with ESMTP; 03 Sep 2015 10:22:42 -0700 Received: from mwiniars-desk1.igk.intel.com (172.28.173.140) by IRSMSX101.ger.corp.intel.com (163.33.3.153) with Microsoft SMTP Server id 14.3.224.2; Thu, 3 Sep 2015 18:22:41 +0100 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= To: , Daniel Vetter Date: Thu, 3 Sep 2015 19:22:18 +0200 Message-ID: <1441300938-18937-1-git-send-email-michal.winiarski@intel.com> X-Mailer: git-send-email 2.4.3 In-Reply-To: <1441208798-12446-1-git-send-email-michal.winiarski@intel.com> References: <1441208798-12446-1-git-send-email-michal.winiarski@intel.com> MIME-Version: 1.0 X-Originating-IP: [172.28.173.140] Subject: [Intel-gfx] [PATCH v5] drm/i915/gtt: Avoid calling kcalloc in a loop when allocating temp bitmaps X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Spam-Status: No, score=-4.2 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_MED, T_RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP On each call to gen8_alloc_va_range_3lvl we're allocating temporary bitmaps needed for error handling. Unfortunately, when we increase address space size (48b ppgtt) we do additional (512 - 4) calls to kcalloc, increasing latency between exec and actual start of execution on the GPU. Let's just do a single kcalloc, we can also drop the size from free_gen8_temp_bitmaps since it's no longer used. v2: Use GFP_TEMPORARY to make the allocations reclaimable. v3: Drop the 2D array, just allocate a single block. v4: Rebase to handle gen8_preallocate_top_level_pdps. v5: Align misaligned bracket. Cc: Chris Wilson Cc: Mika Kuoppala Cc: Michel Thierry Signed-off-by: Micha? Winiarski Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_gtt.c | 49 ++++++++++++++----------------------- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index bdb7adc..b40a9cb 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1164,13 +1164,8 @@ unwind_out: } static void -free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts, - uint32_t pdpes) +free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) { - int i; - - for (i = 0; i < pdpes; i++) - kfree(new_pts[i]); kfree(new_pts); kfree(new_pds); } @@ -1180,29 +1175,20 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts, */ static int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, - unsigned long ***new_pts, + unsigned long **new_pts, uint32_t pdpes) { - int i; unsigned long *pds; - unsigned long **pts; + unsigned long *pts; - pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_KERNEL); + pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); if (!pds) return -ENOMEM; - pts = kcalloc(pdpes, sizeof(unsigned long *), GFP_KERNEL); - if (!pts) { - kfree(pds); - return -ENOMEM; - } - - for (i = 0; i < pdpes; i++) { - pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES), - sizeof(unsigned long), GFP_KERNEL); - if (!pts[i]) - goto err_out; - } + pts = kcalloc(pdpes * BITS_TO_LONGS(I915_PDES), + sizeof(unsigned long), GFP_TEMPORARY); + if (!pts) + goto err_out; *new_pds = pds; *new_pts = pts; @@ -1210,7 +1196,7 @@ int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, return 0; err_out: - free_gen8_temp_bitmaps(pds, pts, pdpes); + free_gen8_temp_bitmaps(pds, pts); return -ENOMEM; } @@ -1231,7 +1217,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); - unsigned long *new_page_dirs, **new_page_tables; + unsigned long *new_page_dirs, *new_page_tables; struct drm_device *dev = vm->dev; struct i915_page_directory *pd; const uint64_t orig_start = start; @@ -1258,14 +1244,14 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, new_page_dirs); if (ret) { - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); return ret; } /* For every page directory referenced, allocate page tables */ gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) { ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, - new_page_tables[pdpe]); + new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); if (ret) goto err_out; } @@ -1316,20 +1302,21 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); } - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); mark_tlbs_dirty(ppgtt); return 0; err_out: while (pdpe--) { - for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES) + for_each_set_bit(temp, new_page_tables + pdpe * + BITS_TO_LONGS(I915_PDES), I915_PDES) free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]); } for_each_set_bit(pdpe, new_page_dirs, pdpes) free_pd(dev, pdp->page_directory[pdpe]); - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); mark_tlbs_dirty(ppgtt); return ret; } @@ -1481,7 +1468,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) { - unsigned long *new_page_dirs, **new_page_tables; + unsigned long *new_page_dirs, *new_page_tables; uint32_t pdpes = I915_PDPES_PER_PDP(dev); int ret; @@ -1501,7 +1488,7 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) if (!ret) *ppgtt->pdp.used_pdpes = *new_page_dirs; - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); return ret; }