Message ID | 1435764453-11954-6-git-send-email-michel.thierry@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 7/1/2015 8:57 PM, Michel Thierry wrote: > PML4 has no special attributes, and there will always be a PML4. > So simply initialize it at creation, and destroy it at the end. > > The code for 4lvl is able to call into the existing 3lvl page table code > to handle all of the lower levels. > > v2: Return something at the end of gen8_alloc_va_range_4lvl to keep the > compiler happy. And define ret only in one place. > Updated gen8_ppgtt_unmap_pages and gen8_ppgtt_free to handle 4lvl. > v3: Use i915_dma_unmap_single instead of pci API. Fix a > couple of incorrect checks when unmapping pdp and pd pages (Akash). > v4: Call __pdp_fini also for 32b PPGTT. Clean up alloc_pdp param list. > v5: Prevent (harmless) out of range access in gen8_for_each_pml4e. > v6: Simplify alloc_vma_range_4lvl and gen8_ppgtt_init_common error > paths. (Akash) > v7: Rebase, s/gen8_ppgtt_free_*/gen8_ppgtt_cleanup_*/. > v8: Change location of pml4_init/fini. It will make next patches > cleaner. > v9: Rebase after Mika's ppgtt cleanup / scratch merge patch series, while > trying to reuse as much as possible for pdp alloc. pml4_init/fini > replaced by setup/cleanup_px macros. > v10: Rebase after Mika's merged ppgtt cleanup patch series. > v11: Rebase after final merged version of Mika's ppgtt/scratch patches. > > Cc: Akash Goel <akash.goel@intel.com> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2+) > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 162 ++++++++++++++++++++++++++++++------ > drivers/gpu/drm/i915/i915_gem_gtt.h | 12 ++- > 2 files changed, 146 insertions(+), 28 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 1327e41..d23b0a8 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -584,12 +584,44 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp) > pdp->page_directory = NULL; > } > > +static struct > +i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) > +{ > + struct i915_page_directory_pointer *pdp; > + int ret = -ENOMEM; > + > + WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); > + > + pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); > + if (!pdp) > + return ERR_PTR(-ENOMEM); > + > + ret = __pdp_init(dev, pdp); > + if (ret) > + goto fail_bitmap; > + > + ret = setup_px(dev, pdp); > + if (ret) > + goto fail_page_m; > + > + return pdp; > + > +fail_page_m: > + __pdp_fini(pdp); > +fail_bitmap: > + kfree(pdp); > + > + return ERR_PTR(ret); > +} > + > static void free_pdp(struct drm_device *dev, > struct i915_page_directory_pointer *pdp) > { > __pdp_fini(pdp); > - if (USES_FULL_48BIT_PPGTT(dev)) > + if (USES_FULL_48BIT_PPGTT(dev)) { > + cleanup_px(dev, pdp); > kfree(pdp); > + } > } > > /* Broadwell Page Directory Pointer Descriptors */ > @@ -783,28 +815,46 @@ static void gen8_free_scratch(struct i915_address_space *vm) > free_scratch_page(dev, vm->scratch_page); > } > > -static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > +static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, > + struct i915_page_directory_pointer *pdp) > { > - struct i915_hw_ppgtt *ppgtt = > - container_of(vm, struct i915_hw_ppgtt, base); > int i; > > - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > - for_each_set_bit(i, ppgtt->pdp.used_pdpes, > - I915_PDPES_PER_PDP(ppgtt->base.dev)) { > - if (WARN_ON(!ppgtt->pdp.page_directory[i])) > - continue; > + for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { > + if (WARN_ON(!pdp->page_directory[i])) > + continue; > > - gen8_free_page_tables(ppgtt->base.dev, > - ppgtt->pdp.page_directory[i]); > - free_pd(ppgtt->base.dev, > - ppgtt->pdp.page_directory[i]); > - } > - free_pdp(ppgtt->base.dev, &ppgtt->pdp); > - } else { > - WARN_ON(1); /* to be implemented later */ > + gen8_free_page_tables(dev, pdp->page_directory[i]); > + free_pd(dev, pdp->page_directory[i]); > } > > + free_pdp(dev, pdp); > +} > + > +static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) > +{ > + int i; > + > + for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { > + if (WARN_ON(!ppgtt->pml4.pdps[i])) > + continue; > + > + gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); > + } > + > + cleanup_px(ppgtt->base.dev, &ppgtt->pml4); > +} > + > +static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > +{ > + struct i915_hw_ppgtt *ppgtt = > + container_of(vm, struct i915_hw_ppgtt, base); > + > + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) > + gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); > + else > + gen8_ppgtt_cleanup_4lvl(ppgtt); > + > gen8_free_scratch(vm); > } > > @@ -1087,8 +1137,62 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, > uint64_t start, > uint64_t length) > { > - WARN_ON(1); /* to be implemented later */ > + DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); > + struct i915_hw_ppgtt *ppgtt = > + container_of(vm, struct i915_hw_ppgtt, base); > + struct i915_page_directory_pointer *pdp; > + const uint64_t orig_start = start; > + const uint64_t orig_length = length; > + uint64_t temp, pml4e; > + int ret = 0; > + > + /* Do the pml4 allocations first, so we don't need to track the newly > + * allocated tables below the pdp */ > + bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); > + > + /* The pagedirectory and pagetable allocations are done in the shared 3 > + * and 4 level code. Just allocate the pdps. > + */ > + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { > + if (!pdp) { > + WARN_ON(test_bit(pml4e, pml4->used_pml4es)); > + pdp = alloc_pdp(vm->dev); > + if (IS_ERR(pdp)) > + goto err_out; > + > + pml4->pdps[pml4e] = pdp; > + __set_bit(pml4e, new_pdps); > + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, pml4e, > + pml4e << GEN8_PML4E_SHIFT, The ‘start’ variable should be used here in place of ‘pml4e << GEN8_PML4E_SHIFT’ ? > + GEN8_PML4E_SHIFT); > + } > + } > + > + WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, > + "The allocation has spanned more than 512GB. " > + "It is highly likely this is incorrect."); > + > + start = orig_start; > + length = orig_length; > + > + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { > + WARN_ON(!pdp); > + > + ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); > + if (ret) > + goto err_out; > + } > + > + bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, > + GEN8_PML4ES_PER_PML4); > + > return 0; > + > +err_out: > + for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) > + gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); > + > + return ret; > } > > static int gen8_alloc_va_range(struct i915_address_space *vm, > @@ -1097,10 +1201,10 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, > struct i915_hw_ppgtt *ppgtt = > container_of(vm, struct i915_hw_ppgtt, base); > > - if (!USES_FULL_48BIT_PPGTT(vm->dev)) > - return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); > - else > + if (USES_FULL_48BIT_PPGTT(vm->dev)) > return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); > + else > + return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); > } > > /* > @@ -1128,9 +1232,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > > ppgtt->switch_mm = gen8_mm_switch; > > - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > - ret = __pdp_init(false, &ppgtt->pdp); > + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > + ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); > + if (ret) > + goto free_scratch; > > + ppgtt->base.total = 1ULL << 48; > + } else { > + ret = __pdp_init(false, &ppgtt->pdp); > if (ret) > goto free_scratch; > > @@ -1142,9 +1251,10 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > * 2GiB). > */ > ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total; > - } else { > - ppgtt->base.total = 1ULL << 48; > - return -EPERM; /* Not yet implemented */ > + > + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, > + 0, 0, > + GEN8_PML4E_SHIFT); > } > > return 0; > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index e2b684e..c8ac0b5 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -95,6 +95,7 @@ typedef uint64_t gen8_pde_t; > */ > #define GEN8_PML4ES_PER_PML4 512 > #define GEN8_PML4E_SHIFT 39 > +#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1) > #define GEN8_PDPE_SHIFT 30 > /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page > * tables */ > @@ -464,6 +465,14 @@ static inline uint32_t gen6_pde_index(uint32_t addr) > temp = min(temp, length), \ > start += temp, length -= temp) > > +#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter) \ > + for (iter = gen8_pml4e_index(start); \ > + pdp = (pml4)->pdps[iter], length > 0 && iter < GEN8_PML4ES_PER_PML4; \ > + iter++, \ > + temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start, \ > + temp = min(temp, length), \ > + start += temp, length -= temp) > + > #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ > gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, I915_PDPES_PER_PDP(dev)) > > @@ -484,8 +493,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address) > > static inline uint32_t gen8_pml4e_index(uint64_t address) > { > - WARN_ON(1); /* For 64B */ > - return 0; > + return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; > } > > static inline size_t gen8_pte_count(uint64_t address, uint64_t length) >
On 7/7/2015 1:48 PM, Goel, Akash wrote: > > > On 7/1/2015 8:57 PM, Michel Thierry wrote: >> @@ -1087,8 +1137,62 @@ static int gen8_alloc_va_range_4lvl(struct >> i915_address_space *vm, >> uint64_t start, >> uint64_t length) >> { >> - WARN_ON(1); /* to be implemented later */ >> + DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); >> + struct i915_hw_ppgtt *ppgtt = >> + container_of(vm, struct i915_hw_ppgtt, base); >> + struct i915_page_directory_pointer *pdp; >> + const uint64_t orig_start = start; >> + const uint64_t orig_length = length; >> + uint64_t temp, pml4e; >> + int ret = 0; >> + >> + /* Do the pml4 allocations first, so we don't need to track the >> newly >> + * allocated tables below the pdp */ >> + bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); >> + >> + /* The pagedirectory and pagetable allocations are done in the >> shared 3 >> + * and 4 level code. Just allocate the pdps. >> + */ >> + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { >> + if (!pdp) { >> + WARN_ON(test_bit(pml4e, pml4->used_pml4es)); >> + pdp = alloc_pdp(vm->dev); >> + if (IS_ERR(pdp)) >> + goto err_out; >> + >> + pml4->pdps[pml4e] = pdp; >> + __set_bit(pml4e, new_pdps); >> + >> trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, pml4e, >> + pml4e << GEN8_PML4E_SHIFT, > The ‘start’ variable should be used here in place of ‘pml4e << > GEN8_PML4E_SHIFT’ ? Correct, should be ‘start’. Thanks >> + GEN8_PML4E_SHIFT); >> + } >> + } >> +
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1327e41..d23b0a8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -584,12 +584,44 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp) pdp->page_directory = NULL; } +static struct +i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) +{ + struct i915_page_directory_pointer *pdp; + int ret = -ENOMEM; + + WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); + + pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); + if (!pdp) + return ERR_PTR(-ENOMEM); + + ret = __pdp_init(dev, pdp); + if (ret) + goto fail_bitmap; + + ret = setup_px(dev, pdp); + if (ret) + goto fail_page_m; + + return pdp; + +fail_page_m: + __pdp_fini(pdp); +fail_bitmap: + kfree(pdp); + + return ERR_PTR(ret); +} + static void free_pdp(struct drm_device *dev, struct i915_page_directory_pointer *pdp) { __pdp_fini(pdp); - if (USES_FULL_48BIT_PPGTT(dev)) + if (USES_FULL_48BIT_PPGTT(dev)) { + cleanup_px(dev, pdp); kfree(pdp); + } } /* Broadwell Page Directory Pointer Descriptors */ @@ -783,28 +815,46 @@ static void gen8_free_scratch(struct i915_address_space *vm) free_scratch_page(dev, vm->scratch_page); } -static void gen8_ppgtt_cleanup(struct i915_address_space *vm) +static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, + struct i915_page_directory_pointer *pdp) { - struct i915_hw_ppgtt *ppgtt = - container_of(vm, struct i915_hw_ppgtt, base); int i; - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { - for_each_set_bit(i, ppgtt->pdp.used_pdpes, - I915_PDPES_PER_PDP(ppgtt->base.dev)) { - if (WARN_ON(!ppgtt->pdp.page_directory[i])) - continue; + for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { + if (WARN_ON(!pdp->page_directory[i])) + continue; - gen8_free_page_tables(ppgtt->base.dev, - ppgtt->pdp.page_directory[i]); - free_pd(ppgtt->base.dev, - ppgtt->pdp.page_directory[i]); - } - free_pdp(ppgtt->base.dev, &ppgtt->pdp); - } else { - WARN_ON(1); /* to be implemented later */ + gen8_free_page_tables(dev, pdp->page_directory[i]); + free_pd(dev, pdp->page_directory[i]); } + free_pdp(dev, pdp); +} + +static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) +{ + int i; + + for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { + if (WARN_ON(!ppgtt->pml4.pdps[i])) + continue; + + gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); + } + + cleanup_px(ppgtt->base.dev, &ppgtt->pml4); +} + +static void gen8_ppgtt_cleanup(struct i915_address_space *vm) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); + else + gen8_ppgtt_cleanup_4lvl(ppgtt); + gen8_free_scratch(vm); } @@ -1087,8 +1137,62 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, uint64_t start, uint64_t length) { - WARN_ON(1); /* to be implemented later */ + DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + struct i915_page_directory_pointer *pdp; + const uint64_t orig_start = start; + const uint64_t orig_length = length; + uint64_t temp, pml4e; + int ret = 0; + + /* Do the pml4 allocations first, so we don't need to track the newly + * allocated tables below the pdp */ + bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); + + /* The pagedirectory and pagetable allocations are done in the shared 3 + * and 4 level code. Just allocate the pdps. + */ + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { + if (!pdp) { + WARN_ON(test_bit(pml4e, pml4->used_pml4es)); + pdp = alloc_pdp(vm->dev); + if (IS_ERR(pdp)) + goto err_out; + + pml4->pdps[pml4e] = pdp; + __set_bit(pml4e, new_pdps); + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, pml4e, + pml4e << GEN8_PML4E_SHIFT, + GEN8_PML4E_SHIFT); + } + } + + WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, + "The allocation has spanned more than 512GB. " + "It is highly likely this is incorrect."); + + start = orig_start; + length = orig_length; + + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { + WARN_ON(!pdp); + + ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); + if (ret) + goto err_out; + } + + bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, + GEN8_PML4ES_PER_PML4); + return 0; + +err_out: + for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) + gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); + + return ret; } static int gen8_alloc_va_range(struct i915_address_space *vm, @@ -1097,10 +1201,10 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); - if (!USES_FULL_48BIT_PPGTT(vm->dev)) - return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); - else + if (USES_FULL_48BIT_PPGTT(vm->dev)) return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); + else + return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); } /* @@ -1128,9 +1232,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->switch_mm = gen8_mm_switch; - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { - ret = __pdp_init(false, &ppgtt->pdp); + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); + if (ret) + goto free_scratch; + ppgtt->base.total = 1ULL << 48; + } else { + ret = __pdp_init(false, &ppgtt->pdp); if (ret) goto free_scratch; @@ -1142,9 +1251,10 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) * 2GiB). */ ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total; - } else { - ppgtt->base.total = 1ULL << 48; - return -EPERM; /* Not yet implemented */ + + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, + 0, 0, + GEN8_PML4E_SHIFT); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index e2b684e..c8ac0b5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -95,6 +95,7 @@ typedef uint64_t gen8_pde_t; */ #define GEN8_PML4ES_PER_PML4 512 #define GEN8_PML4E_SHIFT 39 +#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1) #define GEN8_PDPE_SHIFT 30 /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page * tables */ @@ -464,6 +465,14 @@ static inline uint32_t gen6_pde_index(uint32_t addr) temp = min(temp, length), \ start += temp, length -= temp) +#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter) \ + for (iter = gen8_pml4e_index(start); \ + pdp = (pml4)->pdps[iter], length > 0 && iter < GEN8_PML4ES_PER_PML4; \ + iter++, \ + temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start, \ + temp = min(temp, length), \ + start += temp, length -= temp) + #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, I915_PDPES_PER_PDP(dev)) @@ -484,8 +493,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address) static inline uint32_t gen8_pml4e_index(uint64_t address) { - WARN_ON(1); /* For 64B */ - return 0; + return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; } static inline size_t gen8_pte_count(uint64_t address, uint64_t length)