From patchwork Sat May 10 03:59:33 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ben Widawsky X-Patchwork-Id: 4146361 Return-Path: X-Original-To: patchwork-intel-gfx@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork2.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.19.201]) by patchwork2.web.kernel.org (Postfix) with ESMTP id 68660BFF02 for ; Sat, 10 May 2014 04:02:10 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 41F88201DE for ; Sat, 10 May 2014 04:02:09 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by mail.kernel.org (Postfix) with ESMTP id 034AC201DC for ; Sat, 10 May 2014 04:02:08 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 796B66F088; Fri, 9 May 2014 21:02:07 -0700 (PDT) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mail.bwidawsk.net (bwidawsk.net [166.78.191.112]) by gabe.freedesktop.org (Postfix) with ESMTP id 94AE46F088 for ; Fri, 9 May 2014 21:02:05 -0700 (PDT) Received: by mail.bwidawsk.net (Postfix, from userid 5001) id 9D123580B5; Fri, 9 May 2014 21:02:04 -0700 (PDT) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Spam-Level: X-Spam-Status: No, score=-4.8 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_MED, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 Received: from ironside.intel.com (c-24-21-100-90.hsd1.or.comcast.net [24.21.100.90]) by mail.bwidawsk.net (Postfix) with ESMTPSA id 26B3D58091; Fri, 9 May 2014 21:00:23 -0700 (PDT) From: Ben Widawsky To: Intel GFX Date: Fri, 9 May 2014 20:59:33 -0700 Message-Id: <1399694391-3935-39-git-send-email-benjamin.widawsky@intel.com> X-Mailer: git-send-email 1.9.2 In-Reply-To: <1399694391-3935-1-git-send-email-benjamin.widawsky@intel.com> References: <1399694391-3935-1-git-send-email-benjamin.widawsky@intel.com> Cc: Ben Widawsky , Ben Widawsky Subject: [Intel-gfx] [PATCH 38/56] drm/i915/bdw: Dynamic page table allocations X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP This finishes off the dynamic page tables allocations, in the legacy 3 level style that already exists. Most everything has already been setup to this point, the patch finishes off the enabling by setting the appropriate function pointers. Signed-off-by: Ben Widawsky --- drivers/gpu/drm/i915/i915_gem_gtt.c | 261 +++++++++++++++++++++++++++++------- 1 file changed, 216 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 82b98ea..66ed943 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -653,58 +653,160 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) gen8_ppgtt_free(ppgtt); } -static int gen8_ppgtt_alloc_pagetabs(struct i915_pagedir *pd, +/** + * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. + * @ppgtt: Master ppgtt structure. + * @pd: Page directory for this address range. + * @start: Starting virtual address to begin allocations. + * @length Size of the allocations. + * @new_pts: Bitmap set by function with new allocations. Likely used by the + * caller to free on error. + * + * Allocate the required number of page tables. Extremely similar to + * gen8_ppgtt_alloc_pagedirs(). The main difference is here we are limited by + * the page directory boundary (instead of the page directory pointer). That + * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_pagedirs(), it is + * possible, and likely that the caller will need to use multiple calls of this + * function to achieve the appropriate allocation. + * + * Return: 0 if success; negative error code otherwise. + */ +static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt, + struct i915_pagedir *pd, uint64_t start, uint64_t length, - struct drm_device *dev) + unsigned long *new_pts) { struct i915_pagetab *unused; uint64_t temp; uint32_t pde; gen8_for_each_pde(unused, pd, start, length, temp, pde) { - BUG_ON(unused); - pd->page_tables[pde] = alloc_pt_single(dev); + if (unused) + continue; + + pd->page_tables[pde] = alloc_pt_single(ppgtt->base.dev); + if (IS_ERR(pd->page_tables[pde])) goto unwind_out; + + set_bit(pde, new_pts); } return 0; unwind_out: - while (pde--) - free_pt_single(pd->page_tables[pde], dev); + for_each_set_bit(pde, new_pts, I915_PDES_PER_PD) + free_pt_single(pd->page_tables[pde], ppgtt->base.dev); return -ENOMEM; } -/* bitmap of new pagedirs */ -static int gen8_ppgtt_alloc_pagedirs(struct i915_pagedirpo *pdp, +/** + * gen8_ppgtt_alloc_pagedirs() - Allocate page directories for VA range. + * @ppgtt: Master ppgtt structure. + * @pdp: Page directory pointer for this address range. + * @start: Starting virtual address to begin allocations. + * @length Size of the allocations. + * @new_pds Bitmap set by function with new allocations. Likely used by the + * caller to free on error. + * + * Allocate the required number of page directories starting at the pde index of + * @start, and ending at the pde index @start + @length. This function will skip + * over already allocated page directories within the range, and only allocate + * new ones, setting the appropriate pointer within the pdp as well as the + * correct position in the bitmap @new_pds. + * + * The function will only allocate the pages within the range for a give page + * directory pointer. In other words, if @start + @length straddles a virtually + * addressed PDP boundary (512GB for 4k pages), there will be more allocations + * required by the caller, This is not currently possible, and the BUG in the + * code will prevent it. + * + * Return: 0 if success; negative error code otherwise. + */ +static int gen8_ppgtt_alloc_pagedirs(struct i915_hw_ppgtt *ppgtt, + struct i915_pagedirpo *pdp, uint64_t start, uint64_t length, - struct drm_device *dev) + unsigned long *new_pds) { struct i915_pagedir *unused; uint64_t temp; uint32_t pdpe; + BUG_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES)); + /* FIXME: PPGTT container_of won't work for 64b */ BUG_ON((start + length) > 0x800000000ULL); gen8_for_each_pdpe(unused, pdp, start, length, temp, pdpe) { - BUG_ON(unused); - pdp->pagedirs[pdpe] = alloc_pd_single(dev); + struct i915_pagedir *pd; + if (unused) + continue; - if (IS_ERR(pdp->pagedirs[pdpe])) + pd = alloc_pd_single(ppgtt->base.dev); + if (IS_ERR(pd)) goto unwind_out; + + pdp->pagedirs[pdpe] = pd; + set_bit(pdpe, new_pds); } return 0; unwind_out: - while (pdpe--) - free_pd_single(pdp->pagedirs[pdpe], dev); + for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES) + free_pd_single(pdp->pagedirs[pdpe], ppgtt->base.dev); + + return -ENOMEM; +} + +void free_gen8_temp_bitmaps(unsigned long *new_pds, + unsigned long **new_pts) +{ + int i; + for (i = 0; i < GEN8_LEGACY_PDPES; i++) + kfree(new_pts[i]); + kfree(new_pts); + kfree(new_pds); +} +/* Fills in the page directory bitmap, ant the array of page tables bitmap. Both + * of these are based on the number of PDPEs in the system. + */ +int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, + unsigned long ***new_pts) +{ + int i; + unsigned long *pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), + sizeof(unsigned long), + GFP_KERNEL); + + unsigned long **pts = kcalloc(I915_PDES_PER_PD, + sizeof(unsigned long *), + GFP_KERNEL); + + if (!pts || !pds) + goto err_out; + + for (i = 0; i < GEN8_LEGACY_PDPES; i++) { + pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES_PER_PD), + sizeof(unsigned long), GFP_KERNEL); + if (!pts[i]) + goto err_out; + } + + *new_pds = pds; + *new_pts = (unsigned long **)pts; + + return 0; + +err_out: + for (i = 0; i < GEN8_LEGACY_PDPES; i++) + kfree(pts[i]); + kfree(pds); + kfree(pts); return -ENOMEM; } @@ -714,6 +816,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); + unsigned long *new_page_dirs, **new_page_tables; struct i915_pagedir *pd; const uint64_t orig_start = start; const uint64_t orig_length = length; @@ -721,22 +824,40 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, uint32_t pdpe; int ret; - /* Do the allocations first so we can easily bail out */ - ret = gen8_ppgtt_alloc_pagedirs(&ppgtt->pdp, start, length, - ppgtt->base.dev); +#ifdef CONFIG_32BIT + /* Disallow 64b address on 32b platforms. Nothing is wrong with doing + * this in hardware, but a lot of the drm code is not prepared to handle + * 64b offset on 32b platforms. */ + if (start + length > 0x100000000ULL) + return -E2BIG; +#endif + + /* Wrap is never okay since we can only represent 48b, and we don't + * actually use the other side of the canonical address space. + */ + if (WARN_ON(start + length < start)) + return -ERANGE; + + ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables); if (ret) return ret; + /* Do the allocations first so we can easily bail out */ + ret = gen8_ppgtt_alloc_pagedirs(ppgtt, &ppgtt->pdp, start, length, + new_page_dirs); + if (ret) { + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + return ret; + } + gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) { - ret = gen8_ppgtt_alloc_pagetabs(pd, start, length, - ppgtt->base.dev); + bitmap_zero(new_page_tables[pdpe], I915_PDES_PER_PD); + ret = gen8_ppgtt_alloc_pagetabs(ppgtt, pd, start, length, + new_page_tables[pdpe]); if (ret) goto err_out; } - /* Now mark everything we've touched as used. This doesn't allow for - * robust error checking, but it makes the code a hell of a lot simpler. - */ start = orig_start; length = orig_length; @@ -745,19 +866,37 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, uint64_t pd_len = gen8_clamp_pd(start, length); uint64_t pd_start = start; uint32_t pde; - gen8_for_each_pde(pt, &ppgtt->pd, pd_start, pd_len, temp, pde) { - bitmap_set(pd->page_tables[pde]->used_ptes, - gen8_pte_index(start), - gen8_pte_count(start, length)); + + BUG_ON(!pd); + + gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) { + BUG_ON(!pt); + + bitmap_set(pt->used_ptes, + gen8_pte_index(pd_start), + gen8_pte_count(pd_start, pd_len)); + set_bit(pde, pd->used_pdes); } + set_bit(pdpe, ppgtt->pdp.used_pdpes); + + gen8_map_pagetable_range(pd, start, length, ppgtt->base.dev); } + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); return 0; err_out: - gen8_teardown_va_range(vm, orig_start, start); + while (pdpe--) { + for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES_PER_PD) + free_pt_single(pd->page_tables[temp], ppgtt->base.dev); + } + + for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES) + free_pd_single(ppgtt->pdp.pagedirs[pdpe], ppgtt->base.dev); + + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); return ret; } @@ -768,38 +907,65 @@ err_out: * space. * */ -static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) +static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size) { - struct i915_pagedir *pd; - uint64_t temp, start = 0; - const uint64_t orig_length = size; - uint32_t pdpe; - int ret; - ppgtt->base.start = 0; ppgtt->base.total = size; - ppgtt->base.clear_range = gen8_ppgtt_clear_range; - ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->enable = gen8_ppgtt_enable; ppgtt->switch_mm = gen8_mm_switch; + ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; ppgtt->scratch_pd = alloc_pt_scratch(ppgtt->base.dev); if (IS_ERR(ppgtt->scratch_pd)) return PTR_ERR(ppgtt->scratch_pd); + return 0; +} + +static int gen8_aliasing_ppgtt_init(struct i915_hw_ppgtt *ppgtt) +{ + struct drm_device *dev = ppgtt->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_pagedir *pd; + uint64_t temp, start = 0, size = dev_priv->gtt.base.total; + uint32_t pdpe; + int ret; + + ret = gen8_ppgtt_init_common(ppgtt, dev_priv->gtt.base.total); + if (ret) + return ret; + ret = gen8_alloc_va_range(&ppgtt->base, start, size); if (ret) { free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev); return ret; } - start = 0; - size = orig_length; - gen8_for_each_pdpe(pd, &ppgtt->pdp, start, size, temp, pdpe) gen8_map_pagetable_range(pd, start, size, ppgtt->base.dev); + ppgtt->base.allocate_va_range = NULL; + ppgtt->base.teardown_va_range = NULL; + ppgtt->base.clear_range = gen8_ppgtt_clear_range; + + return 0; +} + +static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) +{ + struct drm_device *dev = ppgtt->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + ret = gen8_ppgtt_init_common(ppgtt, dev_priv->gtt.base.total); + if (ret) + return ret; + + ppgtt->base.allocate_va_range = gen8_alloc_va_range; + ppgtt->base.teardown_va_range = gen8_teardown_va_range; + ppgtt->base.clear_range = NULL; + return 0; } @@ -1454,8 +1620,10 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt, boo if (INTEL_INFO(dev)->gen < 8) ret = gen6_ppgtt_init(ppgtt, aliasing); + else if (IS_GEN8(dev) && aliasing) + ret = gen8_aliasing_ppgtt_init(ppgtt); else if (IS_GEN8(dev)) - ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); + ret = gen8_ppgtt_init(ppgtt); else BUG(); @@ -1464,7 +1632,8 @@ int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt, boo kref_init(&ppgtt->ref); drm_mm_init(&ppgtt->base.mm, ppgtt->base.start, ppgtt->base.total); - ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); + if (ppgtt->base.clear_range) + ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); i915_init_vm(dev_priv, &ppgtt->base); return 0; @@ -1508,10 +1677,12 @@ ppgtt_bind_vma(struct i915_vma *vma, static void ppgtt_unbind_vma(struct i915_vma *vma) { - vma->vm->clear_range(vma->vm, - vma->node.start, - vma->obj->base.size, - true); + if (vma->vm->clear_range) + vma->vm->clear_range(vma->vm, + vma->node.start, + vma->obj->base.size, + true); + if (vma->vm->teardown_va_range) { trace_i915_va_teardown(vma->vm, vma->node.start, vma->node.size);