Message ID | 1393386735-998-1-git-send-email-benjamin.widawsky@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, 2014-02-25 at 19:52 -0800, Ben Widawsky wrote: > These page table helpers make the code much cleaner. There is some > room to use the arch/x86 header files. The reason I've opted not to is > in several cases, the definitions are dictated by the CONFIG_ options > which do not always indicate the restrictions in the GPU. > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > --- > > I have this patch queued up for the next round of /stuff/ I am working on. If > you want to pull it into this series, it's fine by me. As I deal with the code > more, it does become more obvious what looks good, and what does not. > > --- > > drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++++++++++++++++----------- > 1 file changed, 81 insertions(+), 34 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index aa3ef7f..43d9129 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -30,8 +30,6 @@ > #include "i915_trace.h" > #include "intel_drv.h" > > -#define GEN6_PPGTT_PD_ENTRIES 512 > -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) > typedef uint64_t gen8_gtt_pte_t; > typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; > > @@ -51,6 +49,27 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; > #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) > #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) > > +/* GEN6 PPGTT resembles a 2 level page table: > + * 31:22 | 21:12 | 11:0 > + * PDE | PTE | offset > + */ > +#define GEN6_PDE_SHIFT 22 > +#define GEN6_PPGTT_PD_ENTRIES 512 > +#define GEN6_PDE_MASK (GEN6_PPGTT_PD_ENTRIES-1) > +#define GEN6_PTE_SHIFT 12 > +#define GEN6_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) > +#define GEN6_PTE_MASK (GEN6_PPGTT_PT_ENTRIES-1) > + > +static inline uint32_t gen6_pte_index(uint32_t address) > +{ > + return (address >> GEN6_PTE_SHIFT) & GEN6_PTE_MASK; > +} > + > +static inline uint32_t gen6_pde_index(uint32_t address) > +{ > + return (address >> GEN6_PDE_SHIFT) & GEN6_PDE_MASK; > +} > + > /* Cacheability Control is a 4-bit value. The low three bits are stored in * > * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. > */ > @@ -63,6 +82,11 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; > #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) > #define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7) > > +#define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) > +#define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ > +#define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ > +#define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ > + > #define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t)) > #define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t)) > > @@ -71,6 +95,10 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; > * PDPE | PDE | PTE | offset > * The difference as compared to normal x86 3 level page table is the PDPEs are > * programmed via register. > + * > + * The x86 pagetable code is flexible in its ability to handle varying page > + * table depths via abstracted PGDIR/PUD/PMD/PTE. I've opted to not do this and > + * instead replicate the interesting functionality. > */ > #define GEN8_PDPE_SHIFT 30 > #define GEN8_PDPE_MASK 0x3 > @@ -79,10 +107,31 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; > #define GEN8_PTE_SHIFT 12 > #define GEN8_PTE_MASK 0x1ff > > -#define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) > -#define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ > -#define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ > -#define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ > +static inline uint32_t gen8_pte_index(uint64_t address) > +{ > + return (address >> GEN8_PTE_SHIFT) & GEN8_PTE_MASK; > +} > + > +static inline uint32_t gen8_pde_index(uint64_t address) > +{ > + return (address >> GEN8_PDE_SHIFT) & GEN8_PDE_MASK; > +} > + > +static inline uint32_t gen8_pdpe_index(uint64_t address) > +{ > + return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK; > +} > + > +static inline uint32_t gen8_pml4e_index(uint64_t address) > +{ > + BUG(); > +} Would be better to add this if/when it's first used. > + > +/* Useful for 3 level page table functions */ > +static inline uint32_t gen8_pde_count(uint64_t start, uint64_t length) > +{ > + return ((length - start) / PAGE_SIZE) << GEN8_PDE_SHIFT; > +} This isn't used anywhere either and has the <start,length> vs. <start,end> issue Daniel pointed out elsewhere. Otherwise the patch looks good and makes things indeed clearer, so with the above two functions removed: Reviewed-by: Imre Deak <imre.deak@intel.com> > > static void ppgtt_bind_vma(struct i915_vma *vma, > enum i915_cache_level cache_level, > @@ -274,9 +323,9 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, > struct i915_hw_ppgtt *ppgtt = > container_of(vm, struct i915_hw_ppgtt, base); > gen8_gtt_pte_t *pt_vaddr, scratch_pte; > - unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; > - unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; > - unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; > + unsigned pdpe = gen8_pdpe_index(start); > + unsigned pde = gen8_pde_index(start); > + unsigned pte = gen8_pte_index(start); > unsigned num_entries = (length - start) >> PAGE_SHIFT; > unsigned last_pte, i; > > @@ -315,9 +364,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, > struct i915_hw_ppgtt *ppgtt = > container_of(vm, struct i915_hw_ppgtt, base); > gen8_gtt_pte_t *pt_vaddr; > - unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; > - unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; > - unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; > + unsigned pdpe = gen8_pdpe_index(start); > + unsigned pde = gen8_pde_index(start); > + unsigned pte = gen8_pte_index(start); > struct sg_page_iter sg_iter; > > pt_vaddr = NULL; > @@ -661,9 +710,9 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) > seq_printf(m, "\tPDE: %x\n", pd_entry); > > pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); > - for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { > + for (pte = 0; pte < GEN6_PPGTT_PT_ENTRIES; pte+=4) { > unsigned long va = > - (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + > + (pde * PAGE_SIZE * GEN6_PPGTT_PT_ENTRIES) + > (pte * PAGE_SIZE); > int i; > bool found = false; > @@ -938,29 +987,28 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, > struct i915_hw_ppgtt *ppgtt = > container_of(vm, struct i915_hw_ppgtt, base); > gen6_gtt_pte_t *pt_vaddr, scratch_pte; > - unsigned first_entry = start >> PAGE_SHIFT; > + unsigned pde = gen6_pde_index(start); > unsigned num_entries = (length - start) >> PAGE_SHIFT; > - unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; > - unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; > + unsigned pte = gen6_pte_index(start); > unsigned last_pte, i; > > scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); > > while (num_entries) { > - last_pte = first_pte + num_entries; > - if (last_pte > I915_PPGTT_PT_ENTRIES) > - last_pte = I915_PPGTT_PT_ENTRIES; > + last_pte = pte + num_entries; > + if (last_pte > GEN6_PPGTT_PT_ENTRIES) > + last_pte = GEN6_PPGTT_PT_ENTRIES; > > - pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); > + pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); > > - for (i = first_pte; i < last_pte; i++) > + for (i = pte; i < last_pte; i++) > pt_vaddr[i] = scratch_pte; > > kunmap_atomic(pt_vaddr); > > - num_entries -= last_pte - first_pte; > - first_pte = 0; > - act_pt++; > + num_entries -= last_pte - pte; > + pte = 0; > + pde++; > } > } > > @@ -972,24 +1020,23 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, > struct i915_hw_ppgtt *ppgtt = > container_of(vm, struct i915_hw_ppgtt, base); > gen6_gtt_pte_t *pt_vaddr; > - unsigned first_entry = start >> PAGE_SHIFT; > - unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; > - unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; > + unsigned pde = gen6_pde_index(start); > + unsigned pte = gen6_pte_index(start); > struct sg_page_iter sg_iter; > > pt_vaddr = NULL; > for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { > if (pt_vaddr == NULL) > - pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); > + pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); > > - pt_vaddr[act_pte] = > + pt_vaddr[pte] = > vm->pte_encode(sg_page_iter_dma_address(&sg_iter), > cache_level, true); > - if (++act_pte == I915_PPGTT_PT_ENTRIES) { > + if (++pte == GEN6_PPGTT_PT_ENTRIES) { > kunmap_atomic(pt_vaddr); > pt_vaddr = NULL; > - act_pt++; > - act_pte = 0; > + pde++; > + pte = 0; > } > } > if (pt_vaddr) > @@ -1173,7 +1220,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > ppgtt->base.cleanup = gen6_ppgtt_cleanup; > ppgtt->base.scratch = dev_priv->gtt.base.scratch; > ppgtt->base.start = 0; > - ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; > + ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * GEN6_PPGTT_PT_ENTRIES * PAGE_SIZE; > ppgtt->debug_dump = gen6_dump_ppgtt; > > ppgtt->pd_offset =
On Mon, Mar 10, 2014 at 11:05:42PM +0200, Imre Deak wrote: > On Tue, 2014-02-25 at 19:52 -0800, Ben Widawsky wrote: > > These page table helpers make the code much cleaner. There is some > > room to use the arch/x86 header files. The reason I've opted not to is > > in several cases, the definitions are dictated by the CONFIG_ options > > which do not always indicate the restrictions in the GPU. > > > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > > --- > > > > I have this patch queued up for the next round of /stuff/ I am working on. If > > you want to pull it into this series, it's fine by me. As I deal with the code > > more, it does become more obvious what looks good, and what does not. > > > > --- > > > > drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++++++++++++++++----------- > > 1 file changed, 81 insertions(+), 34 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > > index aa3ef7f..43d9129 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > > @@ -30,8 +30,6 @@ > > #include "i915_trace.h" > > #include "intel_drv.h" > > > > -#define GEN6_PPGTT_PD_ENTRIES 512 > > -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) > > typedef uint64_t gen8_gtt_pte_t; > > typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; > > > > @@ -51,6 +49,27 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; > > #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) > > #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) > > > > +/* GEN6 PPGTT resembles a 2 level page table: > > + * 31:22 | 21:12 | 11:0 > > + * PDE | PTE | offset > > + */ > > +#define GEN6_PDE_SHIFT 22 > > +#define GEN6_PPGTT_PD_ENTRIES 512 > > +#define GEN6_PDE_MASK (GEN6_PPGTT_PD_ENTRIES-1) > > +#define GEN6_PTE_SHIFT 12 > > +#define GEN6_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) > > +#define GEN6_PTE_MASK (GEN6_PPGTT_PT_ENTRIES-1) > > + > > +static inline uint32_t gen6_pte_index(uint32_t address) > > +{ > > + return (address >> GEN6_PTE_SHIFT) & GEN6_PTE_MASK; > > +} > > + > > +static inline uint32_t gen6_pde_index(uint32_t address) > > +{ > > + return (address >> GEN6_PDE_SHIFT) & GEN6_PDE_MASK; > > +} > > + > > /* Cacheability Control is a 4-bit value. The low three bits are stored in * > > * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. > > */ > > @@ -63,6 +82,11 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; > > #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) > > #define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7) > > > > +#define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) > > +#define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ > > +#define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ > > +#define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ > > + > > #define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t)) > > #define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t)) > > > > @@ -71,6 +95,10 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; > > * PDPE | PDE | PTE | offset > > * The difference as compared to normal x86 3 level page table is the PDPEs are > > * programmed via register. > > + * > > + * The x86 pagetable code is flexible in its ability to handle varying page > > + * table depths via abstracted PGDIR/PUD/PMD/PTE. I've opted to not do this and > > + * instead replicate the interesting functionality. > > */ > > #define GEN8_PDPE_SHIFT 30 > > #define GEN8_PDPE_MASK 0x3 > > @@ -79,10 +107,31 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; > > #define GEN8_PTE_SHIFT 12 > > #define GEN8_PTE_MASK 0x1ff > > > > -#define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) > > -#define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ > > -#define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ > > -#define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ > > +static inline uint32_t gen8_pte_index(uint64_t address) > > +{ > > + return (address >> GEN8_PTE_SHIFT) & GEN8_PTE_MASK; > > +} > > + > > +static inline uint32_t gen8_pde_index(uint64_t address) > > +{ > > + return (address >> GEN8_PDE_SHIFT) & GEN8_PDE_MASK; > > +} > > + > > +static inline uint32_t gen8_pdpe_index(uint64_t address) > > +{ > > + return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK; > > +} > > + > > +static inline uint32_t gen8_pml4e_index(uint64_t address) > > +{ > > + BUG(); > > +} > > Would be better to add this if/when it's first used. > > > + > > +/* Useful for 3 level page table functions */ > > +static inline uint32_t gen8_pde_count(uint64_t start, uint64_t length) > > +{ > > + return ((length - start) / PAGE_SIZE) << GEN8_PDE_SHIFT; > > +} > > This isn't used anywhere either and has the <start,length> vs. > <start,end> issue Daniel pointed out elsewhere. > > Otherwise the patch looks good and makes things indeed clearer, so with > the above two functions removed: > > Reviewed-by: Imre Deak <imre.deak@intel.com> > I'll be posting some code soon of which this patch is a part of. I think it makes everything a lot simpler, but there is one weird bug I still need to resolve. Thanks for looking at this. If you want to see dynamic-pt-pages in my fdo repo, you can see where things are going. http://cgit.freedesktop.org/~bwidawsk/drm-intel/log/?h=dynamic_pt_alloc > > > > static void ppgtt_bind_vma(struct i915_vma *vma, > > enum i915_cache_level cache_level, > > @@ -274,9 +323,9 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, > > struct i915_hw_ppgtt *ppgtt = > > container_of(vm, struct i915_hw_ppgtt, base); > > gen8_gtt_pte_t *pt_vaddr, scratch_pte; > > - unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; > > - unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; > > - unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; > > + unsigned pdpe = gen8_pdpe_index(start); > > + unsigned pde = gen8_pde_index(start); > > + unsigned pte = gen8_pte_index(start); > > unsigned num_entries = (length - start) >> PAGE_SHIFT; > > unsigned last_pte, i; > > > > @@ -315,9 +364,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, > > struct i915_hw_ppgtt *ppgtt = > > container_of(vm, struct i915_hw_ppgtt, base); > > gen8_gtt_pte_t *pt_vaddr; > > - unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; > > - unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; > > - unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; > > + unsigned pdpe = gen8_pdpe_index(start); > > + unsigned pde = gen8_pde_index(start); > > + unsigned pte = gen8_pte_index(start); > > struct sg_page_iter sg_iter; > > > > pt_vaddr = NULL; > > @@ -661,9 +710,9 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) > > seq_printf(m, "\tPDE: %x\n", pd_entry); > > > > pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); > > - for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { > > + for (pte = 0; pte < GEN6_PPGTT_PT_ENTRIES; pte+=4) { > > unsigned long va = > > - (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + > > + (pde * PAGE_SIZE * GEN6_PPGTT_PT_ENTRIES) + > > (pte * PAGE_SIZE); > > int i; > > bool found = false; > > @@ -938,29 +987,28 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, > > struct i915_hw_ppgtt *ppgtt = > > container_of(vm, struct i915_hw_ppgtt, base); > > gen6_gtt_pte_t *pt_vaddr, scratch_pte; > > - unsigned first_entry = start >> PAGE_SHIFT; > > + unsigned pde = gen6_pde_index(start); > > unsigned num_entries = (length - start) >> PAGE_SHIFT; > > - unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; > > - unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; > > + unsigned pte = gen6_pte_index(start); > > unsigned last_pte, i; > > > > scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); > > > > while (num_entries) { > > - last_pte = first_pte + num_entries; > > - if (last_pte > I915_PPGTT_PT_ENTRIES) > > - last_pte = I915_PPGTT_PT_ENTRIES; > > + last_pte = pte + num_entries; > > + if (last_pte > GEN6_PPGTT_PT_ENTRIES) > > + last_pte = GEN6_PPGTT_PT_ENTRIES; > > > > - pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); > > + pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); > > > > - for (i = first_pte; i < last_pte; i++) > > + for (i = pte; i < last_pte; i++) > > pt_vaddr[i] = scratch_pte; > > > > kunmap_atomic(pt_vaddr); > > > > - num_entries -= last_pte - first_pte; > > - first_pte = 0; > > - act_pt++; > > + num_entries -= last_pte - pte; > > + pte = 0; > > + pde++; > > } > > } > > > > @@ -972,24 +1020,23 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, > > struct i915_hw_ppgtt *ppgtt = > > container_of(vm, struct i915_hw_ppgtt, base); > > gen6_gtt_pte_t *pt_vaddr; > > - unsigned first_entry = start >> PAGE_SHIFT; > > - unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; > > - unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; > > + unsigned pde = gen6_pde_index(start); > > + unsigned pte = gen6_pte_index(start); > > struct sg_page_iter sg_iter; > > > > pt_vaddr = NULL; > > for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { > > if (pt_vaddr == NULL) > > - pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); > > + pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); > > > > - pt_vaddr[act_pte] = > > + pt_vaddr[pte] = > > vm->pte_encode(sg_page_iter_dma_address(&sg_iter), > > cache_level, true); > > - if (++act_pte == I915_PPGTT_PT_ENTRIES) { > > + if (++pte == GEN6_PPGTT_PT_ENTRIES) { > > kunmap_atomic(pt_vaddr); > > pt_vaddr = NULL; > > - act_pt++; > > - act_pte = 0; > > + pde++; > > + pte = 0; > > } > > } > > if (pt_vaddr) > > @@ -1173,7 +1220,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > > ppgtt->base.cleanup = gen6_ppgtt_cleanup; > > ppgtt->base.scratch = dev_priv->gtt.base.scratch; > > ppgtt->base.start = 0; > > - ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; > > + ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * GEN6_PPGTT_PT_ENTRIES * PAGE_SIZE; > > ppgtt->debug_dump = gen6_dump_ppgtt; > > > > ppgtt->pd_offset = > >
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index aa3ef7f..43d9129 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -30,8 +30,6 @@ #include "i915_trace.h" #include "intel_drv.h" -#define GEN6_PPGTT_PD_ENTRIES 512 -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) typedef uint64_t gen8_gtt_pte_t; typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; @@ -51,6 +49,27 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) +/* GEN6 PPGTT resembles a 2 level page table: + * 31:22 | 21:12 | 11:0 + * PDE | PTE | offset + */ +#define GEN6_PDE_SHIFT 22 +#define GEN6_PPGTT_PD_ENTRIES 512 +#define GEN6_PDE_MASK (GEN6_PPGTT_PD_ENTRIES-1) +#define GEN6_PTE_SHIFT 12 +#define GEN6_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) +#define GEN6_PTE_MASK (GEN6_PPGTT_PT_ENTRIES-1) + +static inline uint32_t gen6_pte_index(uint32_t address) +{ + return (address >> GEN6_PTE_SHIFT) & GEN6_PTE_MASK; +} + +static inline uint32_t gen6_pde_index(uint32_t address) +{ + return (address >> GEN6_PDE_SHIFT) & GEN6_PDE_MASK; +} + /* Cacheability Control is a 4-bit value. The low three bits are stored in * * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE. */ @@ -63,6 +82,11 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) #define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7) +#define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) +#define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ +#define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ +#define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ + #define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t)) #define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t)) @@ -71,6 +95,10 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; * PDPE | PDE | PTE | offset * The difference as compared to normal x86 3 level page table is the PDPEs are * programmed via register. + * + * The x86 pagetable code is flexible in its ability to handle varying page + * table depths via abstracted PGDIR/PUD/PMD/PTE. I've opted to not do this and + * instead replicate the interesting functionality. */ #define GEN8_PDPE_SHIFT 30 #define GEN8_PDPE_MASK 0x3 @@ -79,10 +107,31 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; #define GEN8_PTE_SHIFT 12 #define GEN8_PTE_MASK 0x1ff -#define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) -#define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ -#define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ -#define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ +static inline uint32_t gen8_pte_index(uint64_t address) +{ + return (address >> GEN8_PTE_SHIFT) & GEN8_PTE_MASK; +} + +static inline uint32_t gen8_pde_index(uint64_t address) +{ + return (address >> GEN8_PDE_SHIFT) & GEN8_PDE_MASK; +} + +static inline uint32_t gen8_pdpe_index(uint64_t address) +{ + return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK; +} + +static inline uint32_t gen8_pml4e_index(uint64_t address) +{ + BUG(); +} + +/* Useful for 3 level page table functions */ +static inline uint32_t gen8_pde_count(uint64_t start, uint64_t length) +{ + return ((length - start) / PAGE_SIZE) << GEN8_PDE_SHIFT; +} static void ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, @@ -274,9 +323,9 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen8_gtt_pte_t *pt_vaddr, scratch_pte; - unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; - unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; - unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; + unsigned pdpe = gen8_pdpe_index(start); + unsigned pde = gen8_pde_index(start); + unsigned pte = gen8_pte_index(start); unsigned num_entries = (length - start) >> PAGE_SHIFT; unsigned last_pte, i; @@ -315,9 +364,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen8_gtt_pte_t *pt_vaddr; - unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK; - unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK; - unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK; + unsigned pdpe = gen8_pdpe_index(start); + unsigned pde = gen8_pde_index(start); + unsigned pte = gen8_pte_index(start); struct sg_page_iter sg_iter; pt_vaddr = NULL; @@ -661,9 +710,9 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) seq_printf(m, "\tPDE: %x\n", pd_entry); pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); - for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { + for (pte = 0; pte < GEN6_PPGTT_PT_ENTRIES; pte+=4) { unsigned long va = - (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + + (pde * PAGE_SIZE * GEN6_PPGTT_PT_ENTRIES) + (pte * PAGE_SIZE); int i; bool found = false; @@ -938,29 +987,28 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen6_gtt_pte_t *pt_vaddr, scratch_pte; - unsigned first_entry = start >> PAGE_SHIFT; + unsigned pde = gen6_pde_index(start); unsigned num_entries = (length - start) >> PAGE_SHIFT; - unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; - unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES; + unsigned pte = gen6_pte_index(start); unsigned last_pte, i; scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true); while (num_entries) { - last_pte = first_pte + num_entries; - if (last_pte > I915_PPGTT_PT_ENTRIES) - last_pte = I915_PPGTT_PT_ENTRIES; + last_pte = pte + num_entries; + if (last_pte > GEN6_PPGTT_PT_ENTRIES) + last_pte = GEN6_PPGTT_PT_ENTRIES; - pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); + pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); - for (i = first_pte; i < last_pte; i++) + for (i = pte; i < last_pte; i++) pt_vaddr[i] = scratch_pte; kunmap_atomic(pt_vaddr); - num_entries -= last_pte - first_pte; - first_pte = 0; - act_pt++; + num_entries -= last_pte - pte; + pte = 0; + pde++; } } @@ -972,24 +1020,23 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); gen6_gtt_pte_t *pt_vaddr; - unsigned first_entry = start >> PAGE_SHIFT; - unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES; - unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES; + unsigned pde = gen6_pde_index(start); + unsigned pte = gen6_pte_index(start); struct sg_page_iter sg_iter; pt_vaddr = NULL; for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { if (pt_vaddr == NULL) - pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]); + pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]); - pt_vaddr[act_pte] = + pt_vaddr[pte] = vm->pte_encode(sg_page_iter_dma_address(&sg_iter), cache_level, true); - if (++act_pte == I915_PPGTT_PT_ENTRIES) { + if (++pte == GEN6_PPGTT_PT_ENTRIES) { kunmap_atomic(pt_vaddr); pt_vaddr = NULL; - act_pt++; - act_pte = 0; + pde++; + pte = 0; } } if (pt_vaddr) @@ -1173,7 +1220,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.cleanup = gen6_ppgtt_cleanup; ppgtt->base.scratch = dev_priv->gtt.base.scratch; ppgtt->base.start = 0; - ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE; + ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * GEN6_PPGTT_PT_ENTRIES * PAGE_SIZE; ppgtt->debug_dump = gen6_dump_ppgtt; ppgtt->pd_offset =
These page table helpers make the code much cleaner. There is some room to use the arch/x86 header files. The reason I've opted not to is in several cases, the definitions are dictated by the CONFIG_ options which do not always indicate the restrictions in the GPU. Signed-off-by: Ben Widawsky <ben@bwidawsk.net> --- I have this patch queued up for the next round of /stuff/ I am working on. If you want to pull it into this series, it's fine by me. As I deal with the code more, it does become more obvious what looks good, and what does not. --- drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++++++++++++++++----------- 1 file changed, 81 insertions(+), 34 deletions(-)