Message ID | 1438250729-22955-1-git-send-email-michel.thierry@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Reviewed the patch & it looks fine. Reviewed-by: "Akash Goel <akash.goel@intel.com>" On 7/30/2015 3:35 PM, Michel Thierry wrote: > PML4 has no special attributes, and there will always be a PML4. > So simply initialize it at creation, and destroy it at the end. > > The code for 4lvl is able to call into the existing 3lvl page table code > to handle all of the lower levels. > > v2: Return something at the end of gen8_alloc_va_range_4lvl to keep the > compiler happy. And define ret only in one place. > Updated gen8_ppgtt_unmap_pages and gen8_ppgtt_free to handle 4lvl. > v3: Use i915_dma_unmap_single instead of pci API. Fix a > couple of incorrect checks when unmapping pdp and pd pages (Akash). > v4: Call __pdp_fini also for 32b PPGTT. Clean up alloc_pdp param list. > v5: Prevent (harmless) out of range access in gen8_for_each_pml4e. > v6: Simplify alloc_vma_range_4lvl and gen8_ppgtt_init_common error > paths. (Akash) > v7: Rebase, s/gen8_ppgtt_free_*/gen8_ppgtt_cleanup_*/. > v8: Change location of pml4_init/fini. It will make next patches > cleaner. > v9: Rebase after Mika's ppgtt cleanup / scratch merge patch series, while > trying to reuse as much as possible for pdp alloc. pml4_init/fini > replaced by setup/cleanup_px macros. > v10: Rebase after Mika's merged ppgtt cleanup patch series. > v11: Rebase after final merged version of Mika's ppgtt/scratch > patches. > v12: Fix pdpe start value in trace (Akash) > v13: Define all 4lvl functions in this patch directly, instead of > previous patches, add i915_page_directory_pointer_entry_alloc here, > use test_bit to detect when pdp is already allocated (Akash). > v14: Move pdp allocation into a new gen8_ppgtt_alloc_page_dirpointers > funtion, as we do for pds and pts; move pd and pdp setup functions to > this patch (Akash). > v15: Added kfree(pdp) from previous patch to this (Akash). > > Cc: Akash Goel <akash.goel@intel.com> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+) > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 239 +++++++++++++++++++++++++++++++++--- > drivers/gpu/drm/i915/i915_gem_gtt.h | 15 ++- > drivers/gpu/drm/i915/i915_trace.h | 8 ++ > 3 files changed, 246 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 3288154..c498eaa 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -210,6 +210,9 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, > return pde; > } > > +#define gen8_pdpe_encode gen8_pde_encode > +#define gen8_pml4e_encode gen8_pde_encode > + > static gen6_pte_t snb_pte_encode(dma_addr_t addr, > enum i915_cache_level level, > bool valid, u32 unused) > @@ -559,10 +562,73 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp) > pdp->page_directory = NULL; > } > > +static struct > +i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) > +{ > + struct i915_page_directory_pointer *pdp; > + int ret = -ENOMEM; > + > + WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); > + > + pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); > + if (!pdp) > + return ERR_PTR(-ENOMEM); > + > + ret = __pdp_init(dev, pdp); > + if (ret) > + goto fail_bitmap; > + > + ret = setup_px(dev, pdp); > + if (ret) > + goto fail_page_m; > + > + return pdp; > + > +fail_page_m: > + __pdp_fini(pdp); > +fail_bitmap: > + kfree(pdp); > + > + return ERR_PTR(ret); > +} > + > static void free_pdp(struct drm_device *dev, > struct i915_page_directory_pointer *pdp) > { > __pdp_fini(pdp); > + if (USES_FULL_48BIT_PPGTT(dev)) { > + cleanup_px(dev, pdp); > + kfree(pdp); > + } > +} > + > +static void > +gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, > + struct i915_page_directory_pointer *pdp, > + struct i915_page_directory *pd, > + int index) > +{ > + gen8_ppgtt_pdpe_t *page_directorypo; > + > + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) > + return; > + > + page_directorypo = kmap_px(pdp); > + page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); > + kunmap_px(ppgtt, page_directorypo); > +} > + > +static void > +gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, > + struct i915_pml4 *pml4, > + struct i915_page_directory_pointer *pdp, > + int index) > +{ > + gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); > + > + WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)); > + pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); > + kunmap_px(ppgtt, pagemap); > } > > /* Broadwell Page Directory Pointer Descriptors */ > @@ -785,12 +851,9 @@ static void gen8_free_scratch(struct i915_address_space *vm) > free_scratch_page(dev, vm->scratch_page); > } > > -static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > +static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, > + struct i915_page_directory_pointer *pdp) > { > - struct i915_hw_ppgtt *ppgtt = > - container_of(vm, struct i915_hw_ppgtt, base); > - struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */ > - struct drm_device *dev = ppgtt->base.dev; > int i; > > for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { > @@ -802,6 +865,31 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > } > > free_pdp(dev, pdp); > +} > + > +static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) > +{ > + int i; > + > + for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { > + if (WARN_ON(!ppgtt->pml4.pdps[i])) > + continue; > + > + gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); > + } > + > + cleanup_px(ppgtt->base.dev, &ppgtt->pml4); > +} > + > +static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > +{ > + struct i915_hw_ppgtt *ppgtt = > + container_of(vm, struct i915_hw_ppgtt, base); > + > + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) > + gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); > + else > + gen8_ppgtt_cleanup_4lvl(ppgtt); > > gen8_free_scratch(vm); > } > @@ -923,6 +1011,60 @@ unwind_out: > return -ENOMEM; > } > > +/** > + * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. > + * @vm: Master vm structure. > + * @pml4: Page map level 4 for this address range. > + * @start: Starting virtual address to begin allocations. > + * @length: Size of the allocations. > + * @new_pdps: Bitmap set by function with new allocations. Likely used by the > + * caller to free on error. > + * > + * Allocate the required number of page directory pointers. Extremely similar to > + * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). > + * The main difference is here we are limited by the pml4 boundary (instead of > + * the page directory pointer). > + * > + * Return: 0 if success; negative error code otherwise. > + */ > +static int > +gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, > + struct i915_pml4 *pml4, > + uint64_t start, > + uint64_t length, > + unsigned long *new_pdps) > +{ > + struct drm_device *dev = vm->dev; > + struct i915_page_directory_pointer *pdp; > + uint64_t temp; > + uint32_t pml4e; > + > + WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); > + > + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { > + if (!test_bit(pml4e, pml4->used_pml4es)) { > + pdp = alloc_pdp(dev); > + if (IS_ERR(pdp)) > + goto unwind_out; > + > + pml4->pdps[pml4e] = pdp; > + __set_bit(pml4e, new_pdps); > + trace_i915_page_directory_pointer_entry_alloc(vm, > + pml4e, > + start, > + GEN8_PML4E_SHIFT); > + } > + } > + > + return 0; > + > +unwind_out: > + for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) > + free_pdp(dev, pml4->pdps[pml4e]); > + > + return -ENOMEM; > +} > + > static void > free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts, > uint32_t pdpes) > @@ -984,14 +1126,15 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) > ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; > } > > -static int gen8_alloc_va_range(struct i915_address_space *vm, > - uint64_t start, uint64_t length) > +static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, > + struct i915_page_directory_pointer *pdp, > + uint64_t start, > + uint64_t length) > { > struct i915_hw_ppgtt *ppgtt = > container_of(vm, struct i915_hw_ppgtt, base); > unsigned long *new_page_dirs, **new_page_tables; > struct drm_device *dev = vm->dev; > - struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */ > struct i915_page_directory *pd; > const uint64_t orig_start = start; > const uint64_t orig_length = length; > @@ -1072,6 +1215,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, > > kunmap_px(ppgtt, page_directory); > __set_bit(pdpe, pdp->used_pdpes); > + gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); > } > > free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); > @@ -1092,6 +1236,68 @@ err_out: > return ret; > } > > +static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, > + struct i915_pml4 *pml4, > + uint64_t start, > + uint64_t length) > +{ > + DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); > + struct i915_hw_ppgtt *ppgtt = > + container_of(vm, struct i915_hw_ppgtt, base); > + struct i915_page_directory_pointer *pdp; > + uint64_t temp, pml4e; > + int ret = 0; > + > + /* Do the pml4 allocations first, so we don't need to track the newly > + * allocated tables below the pdp */ > + bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); > + > + /* The pagedirectory and pagetable allocations are done in the shared 3 > + * and 4 level code. Just allocate the pdps. > + */ > + ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, > + new_pdps); > + if (ret) > + return ret; > + > + WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, > + "The allocation has spanned more than 512GB. " > + "It is highly likely this is incorrect."); > + > + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { > + WARN_ON(!pdp); > + > + ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); > + if (ret) > + goto err_out; > + > + gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); > + } > + > + bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, > + GEN8_PML4ES_PER_PML4); > + > + return 0; > + > +err_out: > + for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) > + gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); > + > + return ret; > +} > + > +static int gen8_alloc_va_range(struct i915_address_space *vm, > + uint64_t start, uint64_t length) > +{ > + struct i915_hw_ppgtt *ppgtt = > + container_of(vm, struct i915_hw_ppgtt, base); > + > + if (USES_FULL_48BIT_PPGTT(vm->dev)) > + return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); > + else > + return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); > +} > + > /* > * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers > * with a net effect resembling a 2-level page table in normal x86 terms. Each > @@ -1117,9 +1323,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > > ppgtt->switch_mm = gen8_mm_switch; > > - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > - ret = __pdp_init(false, &ppgtt->pdp); > + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > + ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); > + if (ret) > + goto free_scratch; > > + ppgtt->base.total = 1ULL << 48; > + } else { > + ret = __pdp_init(false, &ppgtt->pdp); > if (ret) > goto free_scratch; > > @@ -1131,10 +1342,10 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > * 2GiB). > */ > ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total; > - } else { > - ppgtt->base.total = 1ULL << 48; > - ret = -EPERM; /* Not yet implemented */ > - goto free_scratch; > + > + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, > + 0, 0, > + GEN8_PML4E_SHIFT); > } > > return 0; > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index 04bc66f..11d44b3 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -39,6 +39,8 @@ struct drm_i915_file_private; > typedef uint32_t gen6_pte_t; > typedef uint64_t gen8_pte_t; > typedef uint64_t gen8_pde_t; > +typedef uint64_t gen8_ppgtt_pdpe_t; > +typedef uint64_t gen8_ppgtt_pml4e_t; > > #define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT) > > @@ -95,6 +97,7 @@ typedef uint64_t gen8_pde_t; > */ > #define GEN8_PML4ES_PER_PML4 512 > #define GEN8_PML4E_SHIFT 39 > +#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1) > #define GEN8_PDPE_SHIFT 30 > /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page > * tables */ > @@ -465,6 +468,15 @@ static inline uint32_t gen6_pde_index(uint32_t addr) > temp = min(temp, length), \ > start += temp, length -= temp) > > +#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter) \ > + for (iter = gen8_pml4e_index(start); \ > + pdp = (pml4)->pdps[iter], \ > + length > 0 && iter < GEN8_PML4ES_PER_PML4; \ > + iter++, \ > + temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start, \ > + temp = min(temp, length), \ > + start += temp, length -= temp) > + > static inline uint32_t gen8_pte_index(uint64_t address) > { > return i915_pte_index(address, GEN8_PDE_SHIFT); > @@ -482,8 +494,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address) > > static inline uint32_t gen8_pml4e_index(uint64_t address) > { > - WARN_ON(1); /* For 64B */ > - return 0; > + return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; > } > > static inline size_t gen8_pte_count(uint64_t address, uint64_t length) > diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h > index f230d76..e6b5c74 100644 > --- a/drivers/gpu/drm/i915/i915_trace.h > +++ b/drivers/gpu/drm/i915/i915_trace.h > @@ -221,6 +221,14 @@ DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_entry_alloc, > __entry->vm, __entry->px, __entry->start, __entry->end) > ); > > +DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc, > + TP_PROTO(struct i915_address_space *vm, u32 pml4e, u64 start, u64 pml4e_shift), > + TP_ARGS(vm, pml4e, start, pml4e_shift), > + > + TP_printk("vm=%p, pml4e=%d (0x%llx-0x%llx)", > + __entry->vm, __entry->px, __entry->start, __entry->end) > +); > + > /* Avoid extra math because we only support two sizes. The format is defined by > * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */ > #define TRACE_PT_SIZE(bits) \ >
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3288154..c498eaa 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -210,6 +210,9 @@ static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, return pde; } +#define gen8_pdpe_encode gen8_pde_encode +#define gen8_pml4e_encode gen8_pde_encode + static gen6_pte_t snb_pte_encode(dma_addr_t addr, enum i915_cache_level level, bool valid, u32 unused) @@ -559,10 +562,73 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp) pdp->page_directory = NULL; } +static struct +i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) +{ + struct i915_page_directory_pointer *pdp; + int ret = -ENOMEM; + + WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); + + pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); + if (!pdp) + return ERR_PTR(-ENOMEM); + + ret = __pdp_init(dev, pdp); + if (ret) + goto fail_bitmap; + + ret = setup_px(dev, pdp); + if (ret) + goto fail_page_m; + + return pdp; + +fail_page_m: + __pdp_fini(pdp); +fail_bitmap: + kfree(pdp); + + return ERR_PTR(ret); +} + static void free_pdp(struct drm_device *dev, struct i915_page_directory_pointer *pdp) { __pdp_fini(pdp); + if (USES_FULL_48BIT_PPGTT(dev)) { + cleanup_px(dev, pdp); + kfree(pdp); + } +} + +static void +gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, + struct i915_page_directory_pointer *pdp, + struct i915_page_directory *pd, + int index) +{ + gen8_ppgtt_pdpe_t *page_directorypo; + + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + return; + + page_directorypo = kmap_px(pdp); + page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); + kunmap_px(ppgtt, page_directorypo); +} + +static void +gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, + struct i915_pml4 *pml4, + struct i915_page_directory_pointer *pdp, + int index) +{ + gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); + + WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)); + pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); + kunmap_px(ppgtt, pagemap); } /* Broadwell Page Directory Pointer Descriptors */ @@ -785,12 +851,9 @@ static void gen8_free_scratch(struct i915_address_space *vm) free_scratch_page(dev, vm->scratch_page); } -static void gen8_ppgtt_cleanup(struct i915_address_space *vm) +static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, + struct i915_page_directory_pointer *pdp) { - struct i915_hw_ppgtt *ppgtt = - container_of(vm, struct i915_hw_ppgtt, base); - struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */ - struct drm_device *dev = ppgtt->base.dev; int i; for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { @@ -802,6 +865,31 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) } free_pdp(dev, pdp); +} + +static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) +{ + int i; + + for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { + if (WARN_ON(!ppgtt->pml4.pdps[i])) + continue; + + gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); + } + + cleanup_px(ppgtt->base.dev, &ppgtt->pml4); +} + +static void gen8_ppgtt_cleanup(struct i915_address_space *vm) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); + else + gen8_ppgtt_cleanup_4lvl(ppgtt); gen8_free_scratch(vm); } @@ -923,6 +1011,60 @@ unwind_out: return -ENOMEM; } +/** + * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. + * @vm: Master vm structure. + * @pml4: Page map level 4 for this address range. + * @start: Starting virtual address to begin allocations. + * @length: Size of the allocations. + * @new_pdps: Bitmap set by function with new allocations. Likely used by the + * caller to free on error. + * + * Allocate the required number of page directory pointers. Extremely similar to + * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). + * The main difference is here we are limited by the pml4 boundary (instead of + * the page directory pointer). + * + * Return: 0 if success; negative error code otherwise. + */ +static int +gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, + struct i915_pml4 *pml4, + uint64_t start, + uint64_t length, + unsigned long *new_pdps) +{ + struct drm_device *dev = vm->dev; + struct i915_page_directory_pointer *pdp; + uint64_t temp; + uint32_t pml4e; + + WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); + + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { + if (!test_bit(pml4e, pml4->used_pml4es)) { + pdp = alloc_pdp(dev); + if (IS_ERR(pdp)) + goto unwind_out; + + pml4->pdps[pml4e] = pdp; + __set_bit(pml4e, new_pdps); + trace_i915_page_directory_pointer_entry_alloc(vm, + pml4e, + start, + GEN8_PML4E_SHIFT); + } + } + + return 0; + +unwind_out: + for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) + free_pdp(dev, pml4->pdps[pml4e]); + + return -ENOMEM; +} + static void free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts, uint32_t pdpes) @@ -984,14 +1126,15 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; } -static int gen8_alloc_va_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) +static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + uint64_t start, + uint64_t length) { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); unsigned long *new_page_dirs, **new_page_tables; struct drm_device *dev = vm->dev; - struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */ struct i915_page_directory *pd; const uint64_t orig_start = start; const uint64_t orig_length = length; @@ -1072,6 +1215,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, kunmap_px(ppgtt, page_directory); __set_bit(pdpe, pdp->used_pdpes); + gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); } free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); @@ -1092,6 +1236,68 @@ err_out: return ret; } +static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, + struct i915_pml4 *pml4, + uint64_t start, + uint64_t length) +{ + DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + struct i915_page_directory_pointer *pdp; + uint64_t temp, pml4e; + int ret = 0; + + /* Do the pml4 allocations first, so we don't need to track the newly + * allocated tables below the pdp */ + bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); + + /* The pagedirectory and pagetable allocations are done in the shared 3 + * and 4 level code. Just allocate the pdps. + */ + ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, + new_pdps); + if (ret) + return ret; + + WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, + "The allocation has spanned more than 512GB. " + "It is highly likely this is incorrect."); + + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { + WARN_ON(!pdp); + + ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); + if (ret) + goto err_out; + + gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); + } + + bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, + GEN8_PML4ES_PER_PML4); + + return 0; + +err_out: + for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) + gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); + + return ret; +} + +static int gen8_alloc_va_range(struct i915_address_space *vm, + uint64_t start, uint64_t length) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + + if (USES_FULL_48BIT_PPGTT(vm->dev)) + return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); + else + return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); +} + /* * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * with a net effect resembling a 2-level page table in normal x86 terms. Each @@ -1117,9 +1323,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->switch_mm = gen8_mm_switch; - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { - ret = __pdp_init(false, &ppgtt->pdp); + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); + if (ret) + goto free_scratch; + ppgtt->base.total = 1ULL << 48; + } else { + ret = __pdp_init(false, &ppgtt->pdp); if (ret) goto free_scratch; @@ -1131,10 +1342,10 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) * 2GiB). */ ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total; - } else { - ppgtt->base.total = 1ULL << 48; - ret = -EPERM; /* Not yet implemented */ - goto free_scratch; + + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, + 0, 0, + GEN8_PML4E_SHIFT); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 04bc66f..11d44b3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -39,6 +39,8 @@ struct drm_i915_file_private; typedef uint32_t gen6_pte_t; typedef uint64_t gen8_pte_t; typedef uint64_t gen8_pde_t; +typedef uint64_t gen8_ppgtt_pdpe_t; +typedef uint64_t gen8_ppgtt_pml4e_t; #define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT) @@ -95,6 +97,7 @@ typedef uint64_t gen8_pde_t; */ #define GEN8_PML4ES_PER_PML4 512 #define GEN8_PML4E_SHIFT 39 +#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1) #define GEN8_PDPE_SHIFT 30 /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page * tables */ @@ -465,6 +468,15 @@ static inline uint32_t gen6_pde_index(uint32_t addr) temp = min(temp, length), \ start += temp, length -= temp) +#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter) \ + for (iter = gen8_pml4e_index(start); \ + pdp = (pml4)->pdps[iter], \ + length > 0 && iter < GEN8_PML4ES_PER_PML4; \ + iter++, \ + temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start, \ + temp = min(temp, length), \ + start += temp, length -= temp) + static inline uint32_t gen8_pte_index(uint64_t address) { return i915_pte_index(address, GEN8_PDE_SHIFT); @@ -482,8 +494,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address) static inline uint32_t gen8_pml4e_index(uint64_t address) { - WARN_ON(1); /* For 64B */ - return 0; + return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; } static inline size_t gen8_pte_count(uint64_t address, uint64_t length) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index f230d76..e6b5c74 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -221,6 +221,14 @@ DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_entry_alloc, __entry->vm, __entry->px, __entry->start, __entry->end) ); +DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc, + TP_PROTO(struct i915_address_space *vm, u32 pml4e, u64 start, u64 pml4e_shift), + TP_ARGS(vm, pml4e, start, pml4e_shift), + + TP_printk("vm=%p, pml4e=%d (0x%llx-0x%llx)", + __entry->vm, __entry->px, __entry->start, __entry->end) +); + /* Avoid extra math because we only support two sizes. The format is defined by * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */ #define TRACE_PT_SIZE(bits) \