diff mbox

drm/i915: Page table helpers

Message ID 1393386735-998-1-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky Feb. 26, 2014, 3:52 a.m. UTC
These page table helpers make the code much cleaner. There is some
room to use the arch/x86 header files. The reason I've opted not to is
in several cases, the definitions are dictated by the CONFIG_ options
which do not always indicate the restrictions in the GPU.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---

I have this patch queued up for the next round of /stuff/ I am working on. If
you want to pull it into this series, it's fine by me. As I deal with the code
more, it does become more obvious what looks good, and what does not.

---

 drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++++++++++++++++-----------
 1 file changed, 81 insertions(+), 34 deletions(-)

Comments

Imre Deak March 10, 2014, 9:05 p.m. UTC | #1
On Tue, 2014-02-25 at 19:52 -0800, Ben Widawsky wrote:
> These page table helpers make the code much cleaner. There is some
> room to use the arch/x86 header files. The reason I've opted not to is
> in several cases, the definitions are dictated by the CONFIG_ options
> which do not always indicate the restrictions in the GPU.
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---
> 
> I have this patch queued up for the next round of /stuff/ I am working on. If
> you want to pull it into this series, it's fine by me. As I deal with the code
> more, it does become more obvious what looks good, and what does not.
> 
> ---
> 
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++++++++++++++++-----------
>  1 file changed, 81 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index aa3ef7f..43d9129 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -30,8 +30,6 @@
>  #include "i915_trace.h"
>  #include "intel_drv.h"
>  
> -#define GEN6_PPGTT_PD_ENTRIES 512
> -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
>  typedef uint64_t gen8_gtt_pte_t;
>  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>  
> @@ -51,6 +49,27 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>  #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
>  #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
>  
> +/* GEN6 PPGTT resembles a 2 level page table:
> + * 31:22 | 21:12 |  11:0
> + *  PDE  |  PTE  | offset
> + */
> +#define GEN6_PDE_SHIFT			22
> +#define GEN6_PPGTT_PD_ENTRIES		512
> +#define GEN6_PDE_MASK			(GEN6_PPGTT_PD_ENTRIES-1)
> +#define GEN6_PTE_SHIFT			12
> +#define GEN6_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> +#define GEN6_PTE_MASK			(GEN6_PPGTT_PT_ENTRIES-1)
> +
> +static inline uint32_t gen6_pte_index(uint32_t address)
> +{
> +	return (address >> GEN6_PTE_SHIFT) & GEN6_PTE_MASK;
> +}
> +
> +static inline uint32_t gen6_pde_index(uint32_t address)
> +{
> +	return (address >> GEN6_PDE_SHIFT) & GEN6_PDE_MASK;
> +}
> +
>  /* Cacheability Control is a 4-bit value. The low three bits are stored in *
>   * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
>   */
> @@ -63,6 +82,11 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>  #define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
>  #define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
>  
> +#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
> +#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
> +#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
> +#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
> +
>  #define GEN8_PTES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
>  #define GEN8_PDES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
>  
> @@ -71,6 +95,10 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>   * PDPE  |  PDE  |  PTE  | offset
>   * The difference as compared to normal x86 3 level page table is the PDPEs are
>   * programmed via register.
> + *
> + * The x86 pagetable code is flexible in its ability to handle varying page
> + * table depths via abstracted PGDIR/PUD/PMD/PTE. I've opted to not do this and
> + * instead replicate the interesting functionality.
>   */
>  #define GEN8_PDPE_SHIFT			30
>  #define GEN8_PDPE_MASK			0x3
> @@ -79,10 +107,31 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>  #define GEN8_PTE_SHIFT			12
>  #define GEN8_PTE_MASK			0x1ff
>  
> -#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
> -#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
> -#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
> -#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
> +static inline uint32_t gen8_pte_index(uint64_t address)
> +{
> +	return (address >> GEN8_PTE_SHIFT) & GEN8_PTE_MASK;
> +}
> +
> +static inline uint32_t gen8_pde_index(uint64_t address)
> +{
> +	return (address >> GEN8_PDE_SHIFT) & GEN8_PDE_MASK;
> +}
> +
> +static inline uint32_t gen8_pdpe_index(uint64_t address)
> +{
> +	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
> +}
> +
> +static inline uint32_t gen8_pml4e_index(uint64_t address)
> +{
> +	BUG();
> +}

Would be better to add this if/when it's first used.

> +
> +/* Useful for 3 level page table functions */
> +static inline uint32_t gen8_pde_count(uint64_t start, uint64_t length)
> +{
> +	return ((length - start) / PAGE_SIZE) << GEN8_PDE_SHIFT;
> +}

This isn't used anywhere either and has the <start,length> vs.
<start,end> issue Daniel pointed out elsewhere.

Otherwise the patch looks good and makes things indeed clearer, so with
the above two functions removed:

Reviewed-by: Imre Deak <imre.deak@intel.com>

>  
>  static void ppgtt_bind_vma(struct i915_vma *vma,
>  			   enum i915_cache_level cache_level,
> @@ -274,9 +323,9 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
>  	gen8_gtt_pte_t *pt_vaddr, scratch_pte;
> -	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
> -	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
> -	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
> +	unsigned pdpe = gen8_pdpe_index(start);
> +	unsigned pde = gen8_pde_index(start);
> +	unsigned pte = gen8_pte_index(start);
>  	unsigned num_entries = (length - start) >> PAGE_SHIFT;
>  	unsigned last_pte, i;
>  
> @@ -315,9 +364,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
>  	gen8_gtt_pte_t *pt_vaddr;
> -	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
> -	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
> -	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
> +	unsigned pdpe = gen8_pdpe_index(start);
> +	unsigned pde = gen8_pde_index(start);
> +	unsigned pte = gen8_pte_index(start);
>  	struct sg_page_iter sg_iter;
>  
>  	pt_vaddr = NULL;
> @@ -661,9 +710,9 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>  		seq_printf(m, "\tPDE: %x\n", pd_entry);
>  
>  		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
> -		for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
> +		for (pte = 0; pte < GEN6_PPGTT_PT_ENTRIES; pte+=4) {
>  			unsigned long va =
> -				(pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
> +				(pde * PAGE_SIZE * GEN6_PPGTT_PT_ENTRIES) +
>  				(pte * PAGE_SIZE);
>  			int i;
>  			bool found = false;
> @@ -938,29 +987,28 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
>  	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
> -	unsigned first_entry = start >> PAGE_SHIFT;
> +	unsigned pde = gen6_pde_index(start);
>  	unsigned num_entries = (length - start) >> PAGE_SHIFT;
> -	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
> -	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
> +	unsigned pte = gen6_pte_index(start);
>  	unsigned last_pte, i;
>  
>  	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
>  
>  	while (num_entries) {
> -		last_pte = first_pte + num_entries;
> -		if (last_pte > I915_PPGTT_PT_ENTRIES)
> -			last_pte = I915_PPGTT_PT_ENTRIES;
> +		last_pte = pte + num_entries;
> +		if (last_pte > GEN6_PPGTT_PT_ENTRIES)
> +			last_pte = GEN6_PPGTT_PT_ENTRIES;
>  
> -		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
> +		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
>  
> -		for (i = first_pte; i < last_pte; i++)
> +		for (i = pte; i < last_pte; i++)
>  			pt_vaddr[i] = scratch_pte;
>  
>  		kunmap_atomic(pt_vaddr);
>  
> -		num_entries -= last_pte - first_pte;
> -		first_pte = 0;
> -		act_pt++;
> +		num_entries -= last_pte - pte;
> +		pte = 0;
> +		pde++;
>  	}
>  }
>  
> @@ -972,24 +1020,23 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
>  	gen6_gtt_pte_t *pt_vaddr;
> -	unsigned first_entry = start >> PAGE_SHIFT;
> -	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
> -	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
> +	unsigned pde = gen6_pde_index(start);
> +	unsigned pte = gen6_pte_index(start);
>  	struct sg_page_iter sg_iter;
>  
>  	pt_vaddr = NULL;
>  	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
>  		if (pt_vaddr == NULL)
> -			pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
> +			pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
>  
> -		pt_vaddr[act_pte] =
> +		pt_vaddr[pte] =
>  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
>  				       cache_level, true);
> -		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
> +		if (++pte == GEN6_PPGTT_PT_ENTRIES) {
>  			kunmap_atomic(pt_vaddr);
>  			pt_vaddr = NULL;
> -			act_pt++;
> -			act_pte = 0;
> +			pde++;
> +			pte = 0;
>  		}
>  	}
>  	if (pt_vaddr)
> @@ -1173,7 +1220,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
>  	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
>  	ppgtt->base.start = 0;
> -	ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
> +	ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * GEN6_PPGTT_PT_ENTRIES * PAGE_SIZE;
>  	ppgtt->debug_dump = gen6_dump_ppgtt;
>  
>  	ppgtt->pd_offset =
Ben Widawsky March 10, 2014, 9:37 p.m. UTC | #2
On Mon, Mar 10, 2014 at 11:05:42PM +0200, Imre Deak wrote:
> On Tue, 2014-02-25 at 19:52 -0800, Ben Widawsky wrote:
> > These page table helpers make the code much cleaner. There is some
> > room to use the arch/x86 header files. The reason I've opted not to is
> > in several cases, the definitions are dictated by the CONFIG_ options
> > which do not always indicate the restrictions in the GPU.
> > 
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > ---
> > 
> > I have this patch queued up for the next round of /stuff/ I am working on. If
> > you want to pull it into this series, it's fine by me. As I deal with the code
> > more, it does become more obvious what looks good, and what does not.
> > 
> > ---
> > 
> >  drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++++++++++++++++-----------
> >  1 file changed, 81 insertions(+), 34 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index aa3ef7f..43d9129 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -30,8 +30,6 @@
> >  #include "i915_trace.h"
> >  #include "intel_drv.h"
> >  
> > -#define GEN6_PPGTT_PD_ENTRIES 512
> > -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> >  typedef uint64_t gen8_gtt_pte_t;
> >  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >  
> > @@ -51,6 +49,27 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >  #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
> >  #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
> >  
> > +/* GEN6 PPGTT resembles a 2 level page table:
> > + * 31:22 | 21:12 |  11:0
> > + *  PDE  |  PTE  | offset
> > + */
> > +#define GEN6_PDE_SHIFT			22
> > +#define GEN6_PPGTT_PD_ENTRIES		512
> > +#define GEN6_PDE_MASK			(GEN6_PPGTT_PD_ENTRIES-1)
> > +#define GEN6_PTE_SHIFT			12
> > +#define GEN6_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> > +#define GEN6_PTE_MASK			(GEN6_PPGTT_PT_ENTRIES-1)
> > +
> > +static inline uint32_t gen6_pte_index(uint32_t address)
> > +{
> > +	return (address >> GEN6_PTE_SHIFT) & GEN6_PTE_MASK;
> > +}
> > +
> > +static inline uint32_t gen6_pde_index(uint32_t address)
> > +{
> > +	return (address >> GEN6_PDE_SHIFT) & GEN6_PDE_MASK;
> > +}
> > +
> >  /* Cacheability Control is a 4-bit value. The low three bits are stored in *
> >   * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
> >   */
> > @@ -63,6 +82,11 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >  #define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
> >  #define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
> >  
> > +#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
> > +#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
> > +#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
> > +#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
> > +
> >  #define GEN8_PTES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
> >  #define GEN8_PDES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
> >  
> > @@ -71,6 +95,10 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >   * PDPE  |  PDE  |  PTE  | offset
> >   * The difference as compared to normal x86 3 level page table is the PDPEs are
> >   * programmed via register.
> > + *
> > + * The x86 pagetable code is flexible in its ability to handle varying page
> > + * table depths via abstracted PGDIR/PUD/PMD/PTE. I've opted to not do this and
> > + * instead replicate the interesting functionality.
> >   */
> >  #define GEN8_PDPE_SHIFT			30
> >  #define GEN8_PDPE_MASK			0x3
> > @@ -79,10 +107,31 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >  #define GEN8_PTE_SHIFT			12
> >  #define GEN8_PTE_MASK			0x1ff
> >  
> > -#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
> > -#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
> > -#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
> > -#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
> > +static inline uint32_t gen8_pte_index(uint64_t address)
> > +{
> > +	return (address >> GEN8_PTE_SHIFT) & GEN8_PTE_MASK;
> > +}
> > +
> > +static inline uint32_t gen8_pde_index(uint64_t address)
> > +{
> > +	return (address >> GEN8_PDE_SHIFT) & GEN8_PDE_MASK;
> > +}
> > +
> > +static inline uint32_t gen8_pdpe_index(uint64_t address)
> > +{
> > +	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
> > +}
> > +
> > +static inline uint32_t gen8_pml4e_index(uint64_t address)
> > +{
> > +	BUG();
> > +}
> 
> Would be better to add this if/when it's first used.
> 
> > +
> > +/* Useful for 3 level page table functions */
> > +static inline uint32_t gen8_pde_count(uint64_t start, uint64_t length)
> > +{
> > +	return ((length - start) / PAGE_SIZE) << GEN8_PDE_SHIFT;
> > +}
> 
> This isn't used anywhere either and has the <start,length> vs.
> <start,end> issue Daniel pointed out elsewhere.
> 
> Otherwise the patch looks good and makes things indeed clearer, so with
> the above two functions removed:
> 
> Reviewed-by: Imre Deak <imre.deak@intel.com>
> 

I'll be posting some code soon of which this patch is a part of. I think
it makes everything a lot simpler, but there is one weird bug I still
need to resolve.

Thanks for looking at this. If you want to see dynamic-pt-pages in my
fdo repo, you can see where things are going.
http://cgit.freedesktop.org/~bwidawsk/drm-intel/log/?h=dynamic_pt_alloc

> >  
> >  static void ppgtt_bind_vma(struct i915_vma *vma,
> >  			   enum i915_cache_level cache_level,
> > @@ -274,9 +323,9 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
> >  	struct i915_hw_ppgtt *ppgtt =
> >  		container_of(vm, struct i915_hw_ppgtt, base);
> >  	gen8_gtt_pte_t *pt_vaddr, scratch_pte;
> > -	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
> > -	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
> > -	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
> > +	unsigned pdpe = gen8_pdpe_index(start);
> > +	unsigned pde = gen8_pde_index(start);
> > +	unsigned pte = gen8_pte_index(start);
> >  	unsigned num_entries = (length - start) >> PAGE_SHIFT;
> >  	unsigned last_pte, i;
> >  
> > @@ -315,9 +364,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
> >  	struct i915_hw_ppgtt *ppgtt =
> >  		container_of(vm, struct i915_hw_ppgtt, base);
> >  	gen8_gtt_pte_t *pt_vaddr;
> > -	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
> > -	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
> > -	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
> > +	unsigned pdpe = gen8_pdpe_index(start);
> > +	unsigned pde = gen8_pde_index(start);
> > +	unsigned pte = gen8_pte_index(start);
> >  	struct sg_page_iter sg_iter;
> >  
> >  	pt_vaddr = NULL;
> > @@ -661,9 +710,9 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
> >  		seq_printf(m, "\tPDE: %x\n", pd_entry);
> >  
> >  		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
> > -		for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
> > +		for (pte = 0; pte < GEN6_PPGTT_PT_ENTRIES; pte+=4) {
> >  			unsigned long va =
> > -				(pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
> > +				(pde * PAGE_SIZE * GEN6_PPGTT_PT_ENTRIES) +
> >  				(pte * PAGE_SIZE);
> >  			int i;
> >  			bool found = false;
> > @@ -938,29 +987,28 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
> >  	struct i915_hw_ppgtt *ppgtt =
> >  		container_of(vm, struct i915_hw_ppgtt, base);
> >  	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
> > -	unsigned first_entry = start >> PAGE_SHIFT;
> > +	unsigned pde = gen6_pde_index(start);
> >  	unsigned num_entries = (length - start) >> PAGE_SHIFT;
> > -	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
> > -	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
> > +	unsigned pte = gen6_pte_index(start);
> >  	unsigned last_pte, i;
> >  
> >  	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
> >  
> >  	while (num_entries) {
> > -		last_pte = first_pte + num_entries;
> > -		if (last_pte > I915_PPGTT_PT_ENTRIES)
> > -			last_pte = I915_PPGTT_PT_ENTRIES;
> > +		last_pte = pte + num_entries;
> > +		if (last_pte > GEN6_PPGTT_PT_ENTRIES)
> > +			last_pte = GEN6_PPGTT_PT_ENTRIES;
> >  
> > -		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
> > +		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
> >  
> > -		for (i = first_pte; i < last_pte; i++)
> > +		for (i = pte; i < last_pte; i++)
> >  			pt_vaddr[i] = scratch_pte;
> >  
> >  		kunmap_atomic(pt_vaddr);
> >  
> > -		num_entries -= last_pte - first_pte;
> > -		first_pte = 0;
> > -		act_pt++;
> > +		num_entries -= last_pte - pte;
> > +		pte = 0;
> > +		pde++;
> >  	}
> >  }
> >  
> > @@ -972,24 +1020,23 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
> >  	struct i915_hw_ppgtt *ppgtt =
> >  		container_of(vm, struct i915_hw_ppgtt, base);
> >  	gen6_gtt_pte_t *pt_vaddr;
> > -	unsigned first_entry = start >> PAGE_SHIFT;
> > -	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
> > -	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
> > +	unsigned pde = gen6_pde_index(start);
> > +	unsigned pte = gen6_pte_index(start);
> >  	struct sg_page_iter sg_iter;
> >  
> >  	pt_vaddr = NULL;
> >  	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
> >  		if (pt_vaddr == NULL)
> > -			pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
> > +			pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
> >  
> > -		pt_vaddr[act_pte] =
> > +		pt_vaddr[pte] =
> >  			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
> >  				       cache_level, true);
> > -		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
> > +		if (++pte == GEN6_PPGTT_PT_ENTRIES) {
> >  			kunmap_atomic(pt_vaddr);
> >  			pt_vaddr = NULL;
> > -			act_pt++;
> > -			act_pte = 0;
> > +			pde++;
> > +			pte = 0;
> >  		}
> >  	}
> >  	if (pt_vaddr)
> > @@ -1173,7 +1220,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
> >  	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
> >  	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
> >  	ppgtt->base.start = 0;
> > -	ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
> > +	ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * GEN6_PPGTT_PT_ENTRIES * PAGE_SIZE;
> >  	ppgtt->debug_dump = gen6_dump_ppgtt;
> >  
> >  	ppgtt->pd_offset =
> 
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index aa3ef7f..43d9129 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -30,8 +30,6 @@ 
 #include "i915_trace.h"
 #include "intel_drv.h"
 
-#define GEN6_PPGTT_PD_ENTRIES 512
-#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
 typedef uint64_t gen8_gtt_pte_t;
 typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 
@@ -51,6 +49,27 @@  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
 #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
 
+/* GEN6 PPGTT resembles a 2 level page table:
+ * 31:22 | 21:12 |  11:0
+ *  PDE  |  PTE  | offset
+ */
+#define GEN6_PDE_SHIFT			22
+#define GEN6_PPGTT_PD_ENTRIES		512
+#define GEN6_PDE_MASK			(GEN6_PPGTT_PD_ENTRIES-1)
+#define GEN6_PTE_SHIFT			12
+#define GEN6_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
+#define GEN6_PTE_MASK			(GEN6_PPGTT_PT_ENTRIES-1)
+
+static inline uint32_t gen6_pte_index(uint32_t address)
+{
+	return (address >> GEN6_PTE_SHIFT) & GEN6_PTE_MASK;
+}
+
+static inline uint32_t gen6_pde_index(uint32_t address)
+{
+	return (address >> GEN6_PDE_SHIFT) & GEN6_PDE_MASK;
+}
+
 /* Cacheability Control is a 4-bit value. The low three bits are stored in *
  * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
  */
@@ -63,6 +82,11 @@  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 #define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
 #define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
 
+#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
+#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
+#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
+#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
+
 #define GEN8_PTES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
 #define GEN8_PDES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
 
@@ -71,6 +95,10 @@  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
  * PDPE  |  PDE  |  PTE  | offset
  * The difference as compared to normal x86 3 level page table is the PDPEs are
  * programmed via register.
+ *
+ * The x86 pagetable code is flexible in its ability to handle varying page
+ * table depths via abstracted PGDIR/PUD/PMD/PTE. I've opted to not do this and
+ * instead replicate the interesting functionality.
  */
 #define GEN8_PDPE_SHIFT			30
 #define GEN8_PDPE_MASK			0x3
@@ -79,10 +107,31 @@  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 #define GEN8_PTE_SHIFT			12
 #define GEN8_PTE_MASK			0x1ff
 
-#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
-#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
-#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
-#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
+static inline uint32_t gen8_pte_index(uint64_t address)
+{
+	return (address >> GEN8_PTE_SHIFT) & GEN8_PTE_MASK;
+}
+
+static inline uint32_t gen8_pde_index(uint64_t address)
+{
+	return (address >> GEN8_PDE_SHIFT) & GEN8_PDE_MASK;
+}
+
+static inline uint32_t gen8_pdpe_index(uint64_t address)
+{
+	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
+}
+
+static inline uint32_t gen8_pml4e_index(uint64_t address)
+{
+	BUG();
+}
+
+/* Useful for 3 level page table functions */
+static inline uint32_t gen8_pde_count(uint64_t start, uint64_t length)
+{
+	return ((length - start) / PAGE_SIZE) << GEN8_PDE_SHIFT;
+}
 
 static void ppgtt_bind_vma(struct i915_vma *vma,
 			   enum i915_cache_level cache_level,
@@ -274,9 +323,9 @@  static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen8_gtt_pte_t *pt_vaddr, scratch_pte;
-	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
-	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
-	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+	unsigned pdpe = gen8_pdpe_index(start);
+	unsigned pde = gen8_pde_index(start);
+	unsigned pte = gen8_pte_index(start);
 	unsigned num_entries = (length - start) >> PAGE_SHIFT;
 	unsigned last_pte, i;
 
@@ -315,9 +364,9 @@  static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen8_gtt_pte_t *pt_vaddr;
-	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
-	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
-	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+	unsigned pdpe = gen8_pdpe_index(start);
+	unsigned pde = gen8_pde_index(start);
+	unsigned pte = gen8_pte_index(start);
 	struct sg_page_iter sg_iter;
 
 	pt_vaddr = NULL;
@@ -661,9 +710,9 @@  static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 		seq_printf(m, "\tPDE: %x\n", pd_entry);
 
 		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
-		for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
+		for (pte = 0; pte < GEN6_PPGTT_PT_ENTRIES; pte+=4) {
 			unsigned long va =
-				(pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
+				(pde * PAGE_SIZE * GEN6_PPGTT_PT_ENTRIES) +
 				(pte * PAGE_SIZE);
 			int i;
 			bool found = false;
@@ -938,29 +987,28 @@  static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
-	unsigned first_entry = start >> PAGE_SHIFT;
+	unsigned pde = gen6_pde_index(start);
 	unsigned num_entries = (length - start) >> PAGE_SHIFT;
-	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
-	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+	unsigned pte = gen6_pte_index(start);
 	unsigned last_pte, i;
 
 	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
 
 	while (num_entries) {
-		last_pte = first_pte + num_entries;
-		if (last_pte > I915_PPGTT_PT_ENTRIES)
-			last_pte = I915_PPGTT_PT_ENTRIES;
+		last_pte = pte + num_entries;
+		if (last_pte > GEN6_PPGTT_PT_ENTRIES)
+			last_pte = GEN6_PPGTT_PT_ENTRIES;
 
-		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
+		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
 
-		for (i = first_pte; i < last_pte; i++)
+		for (i = pte; i < last_pte; i++)
 			pt_vaddr[i] = scratch_pte;
 
 		kunmap_atomic(pt_vaddr);
 
-		num_entries -= last_pte - first_pte;
-		first_pte = 0;
-		act_pt++;
+		num_entries -= last_pte - pte;
+		pte = 0;
+		pde++;
 	}
 }
 
@@ -972,24 +1020,23 @@  static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr;
-	unsigned first_entry = start >> PAGE_SHIFT;
-	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
-	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+	unsigned pde = gen6_pde_index(start);
+	unsigned pte = gen6_pte_index(start);
 	struct sg_page_iter sg_iter;
 
 	pt_vaddr = NULL;
 	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
 		if (pt_vaddr == NULL)
-			pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
+			pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
 
-		pt_vaddr[act_pte] =
+		pt_vaddr[pte] =
 			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
 				       cache_level, true);
-		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
+		if (++pte == GEN6_PPGTT_PT_ENTRIES) {
 			kunmap_atomic(pt_vaddr);
 			pt_vaddr = NULL;
-			act_pt++;
-			act_pte = 0;
+			pde++;
+			pte = 0;
 		}
 	}
 	if (pt_vaddr)
@@ -1173,7 +1220,7 @@  static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
 	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
 	ppgtt->base.start = 0;
-	ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
+	ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * GEN6_PPGTT_PT_ENTRIES * PAGE_SIZE;
 	ppgtt->debug_dump = gen6_dump_ppgtt;
 
 	ppgtt->pd_offset =