@@ -220,7 +220,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
int i, ret;
/* bit of a hack to find the actual last used pd */
- int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
+ int used_pd = ppgtt->num_pd_entries / I915_PDES_PER_PD;
for (i = used_pd - 1; i >= 0; i--) {
dma_addr_t addr = ppgtt->pd_dma_addr[i];
@@ -240,9 +240,9 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
gen8_gtt_pte_t *pt_vaddr, scratch_pte;
- unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
- unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
- unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+ unsigned pdpe = gen8_pdpe_index(start);
+ unsigned pde = gen8_pde_index(start);
+ unsigned pte = gen8_pte_index(start);
unsigned num_entries = length >> PAGE_SHIFT;
unsigned last_pte, i;
@@ -253,8 +253,8 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde];
last_pte = pte + num_entries;
- if (last_pte > GEN8_PTES_PER_PAGE)
- last_pte = GEN8_PTES_PER_PAGE;
+ if (last_pte > GEN8_PTES_PER_PT)
+ last_pte = GEN8_PTES_PER_PT;
pt_vaddr = kmap_atomic(page_table);
@@ -266,7 +266,7 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
kunmap_atomic(pt_vaddr);
pte = 0;
- if (++pde == GEN8_PDES_PER_PAGE) {
+ if (++pde == I915_PDES_PER_PD) {
pdpe++;
pde = 0;
}
@@ -281,9 +281,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
gen8_gtt_pte_t *pt_vaddr;
- unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
- unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
- unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+ unsigned pdpe = gen8_pdpe_index(start);
+ unsigned pde = gen8_pde_index(start);
+ unsigned pte = gen8_pte_index(start);
struct sg_page_iter sg_iter;
pt_vaddr = NULL;
@@ -298,10 +298,10 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
pt_vaddr[pte] =
gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
cache_level, true);
- if (++pte == GEN8_PTES_PER_PAGE) {
+ if (++pte == GEN8_PTES_PER_PT) {
kunmap_atomic(pt_vaddr);
pt_vaddr = NULL;
- if (++pde == GEN8_PDES_PER_PAGE) {
+ if (++pde == I915_PDES_PER_PD) {
pdpe++;
pde = 0;
}
@@ -319,7 +319,7 @@ static void gen8_free_page_tables(struct page **pt_pages)
if (pt_pages == NULL)
return;
- for (i = 0; i < GEN8_PDES_PER_PAGE; i++)
+ for (i = 0; i < I915_PDES_PER_PD; i++)
if (pt_pages[i])
__free_pages(pt_pages[i], 0);
}
@@ -351,7 +351,7 @@ static void gen8_ppgtt_dma_unmap_pages(struct i915_hw_ppgtt *ppgtt)
pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL);
- for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
+ for (j = 0; j < I915_PDES_PER_PD; j++) {
dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
if (addr)
pci_unmap_page(hwdev, addr, PAGE_SIZE,
@@ -377,11 +377,11 @@ static struct page **__gen8_alloc_page_tables(void)
struct page **pt_pages;
int i;
- pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL);
+ pt_pages = kcalloc(I915_PDES_PER_PD, sizeof(struct page *), GFP_KERNEL);
if (!pt_pages)
return ERR_PTR(-ENOMEM);
- for (i = 0; i < GEN8_PDES_PER_PAGE; i++) {
+ for (i = 0; i < I915_PDES_PER_PD; i++) {
pt_pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!pt_pages[i])
goto bail;
@@ -431,7 +431,7 @@ static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt)
int i;
for (i = 0; i < ppgtt->num_pd_pages; i++) {
- ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE,
+ ppgtt->gen8_pt_dma_addr[i] = kcalloc(I915_PDES_PER_PD,
sizeof(dma_addr_t),
GFP_KERNEL);
if (!ppgtt->gen8_pt_dma_addr[i])
@@ -470,7 +470,7 @@ static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
return ret;
}
- ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
+ ppgtt->num_pd_entries = max_pdp * I915_PDES_PER_PD;
ret = gen8_ppgtt_allocate_dma(ppgtt);
if (ret)
@@ -531,7 +531,7 @@ static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
{
const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
- const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
+ const int min_pt_pages = I915_PDES_PER_PD * max_pdp;
int i, j, ret;
if (size % (1<<30))
@@ -550,7 +550,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
if (ret)
goto bail;
- for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
+ for (j = 0; j < I915_PDES_PER_PD; j++) {
ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j);
if (ret)
goto bail;
@@ -568,7 +568,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
for (i = 0; i < max_pdp; i++) {
gen8_ppgtt_pde_t *pd_vaddr;
pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]);
- for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
+ for (j = 0; j < I915_PDES_PER_PD; j++) {
dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
I915_CACHE_LLC);
@@ -582,7 +582,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
ppgtt->base.cleanup = gen8_ppgtt_cleanup;
ppgtt->base.start = 0;
- ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE;
+ ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PT * PAGE_SIZE;
DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
@@ -628,9 +628,9 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
seq_printf(m, "\tPDE: %x\n", pd_entry);
pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
- for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
+ for (pte = 0; pte < GEN6_PTES_PER_PT; pte+=4) {
unsigned long va =
- (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
+ (pde * PAGE_SIZE * GEN6_PTES_PER_PT) +
(pte * PAGE_SIZE);
int i;
bool found = false;
@@ -909,29 +909,28 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
gen6_gtt_pte_t *pt_vaddr, scratch_pte;
- unsigned first_entry = start >> PAGE_SHIFT;
+ unsigned pde = gen6_pde_index(start);
unsigned num_entries = length >> PAGE_SHIFT;
- unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
- unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+ unsigned pte = gen6_pte_index(start);
unsigned last_pte, i;
scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
while (num_entries) {
- last_pte = first_pte + num_entries;
- if (last_pte > I915_PPGTT_PT_ENTRIES)
- last_pte = I915_PPGTT_PT_ENTRIES;
+ last_pte = pte + num_entries;
+ if (last_pte > GEN6_PTES_PER_PT)
+ last_pte = GEN6_PTES_PER_PT;
- pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
+ pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
- for (i = first_pte; i < last_pte; i++)
+ for (i = pte; i < last_pte; i++)
pt_vaddr[i] = scratch_pte;
kunmap_atomic(pt_vaddr);
- num_entries -= last_pte - first_pte;
- first_pte = 0;
- act_pt++;
+ num_entries -= last_pte - pte;
+ pte = 0;
+ pde++;
}
}
@@ -943,24 +942,23 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt =
container_of(vm, struct i915_hw_ppgtt, base);
gen6_gtt_pte_t *pt_vaddr;
- unsigned first_entry = start >> PAGE_SHIFT;
- unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
- unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+ unsigned pde = gen6_pde_index(start);
+ unsigned pte = gen6_pte_index(start);
struct sg_page_iter sg_iter;
pt_vaddr = NULL;
for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
if (pt_vaddr == NULL)
- pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
+ pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
- pt_vaddr[act_pte] =
+ pt_vaddr[pte] =
vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
cache_level, true);
- if (++act_pte == I915_PPGTT_PT_ENTRIES) {
+ if (++pte == GEN6_PTES_PER_PT) {
kunmap_atomic(pt_vaddr);
pt_vaddr = NULL;
- act_pt++;
- act_pte = 0;
+ pde++;
+ pte = 0;
}
}
if (pt_vaddr)
@@ -1005,7 +1003,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
{
#define GEN6_PD_ALIGN (PAGE_SIZE * 16)
-#define GEN6_PD_SIZE (GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE)
+#define GEN6_PD_SIZE (I915_PDES_PER_PD * PAGE_SIZE)
struct drm_device *dev = ppgtt->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
bool retried = false;
@@ -1039,7 +1037,7 @@ alloc:
if (ppgtt->node.start < dev_priv->gtt.mappable_end)
DRM_DEBUG("Forced to use aperture for PDEs\n");
- ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
+ ppgtt->num_pd_entries = I915_PDES_PER_PD;
return 0;
}
@@ -1144,7 +1142,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
ppgtt->base.cleanup = gen6_ppgtt_cleanup;
ppgtt->base.start = 0;
- ppgtt->base.total = ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
+ ppgtt->base.total = ppgtt->num_pd_entries * GEN6_PTES_PER_PT * PAGE_SIZE;
ppgtt->debug_dump = gen6_dump_ppgtt;
ppgtt->pd_offset =
@@ -1,8 +1,11 @@
#ifndef _I915_GEM_GTT_H
#define _I915_GEM_GTT_H
-#define GEN6_PPGTT_PD_ENTRIES 512
-#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
+/* GEN Agnostic defines */
+#define I915_PDES_PER_PD 512
+#define I915_PTE_MASK (PAGE_SHIFT-1)
+#define I915_PDE_MASK (I915_PDES_PER_PD-1)
+
typedef uint32_t gen6_gtt_pte_t;
typedef uint64_t gen8_gtt_pte_t;
typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
@@ -23,6 +26,98 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
#define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr)
+
+/* GEN6 PPGTT resembles a 2 level page table:
+ * 31:22 | 21:12 | 11:0
+ * PDE | PTE | offset
+ */
+#define GEN6_PDE_SHIFT 22
+#define GEN6_PTES_PER_PT (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
+
+static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift)
+{
+ const uint32_t mask = (1 << (pde_shift - PAGE_SHIFT)) - 1;
+ return (address >> PAGE_SHIFT) & mask;
+}
+
+/* Helper to counts the number of PTEs within the given length. This count does
+ * not cross a page table boundary, so the max value would be
+ * GEN6_PTES_PER_PT for GEN6, and GEN8_PTES_PER_PT for GEN8.
+ */
+static inline size_t i915_pte_count(uint64_t addr, size_t length,
+ uint32_t pde_shift)
+{
+ const uint64_t pd_mask = ~((1 << pde_shift) - 1);
+ uint64_t end;
+
+ if (WARN_ON(!length))
+ return 0;
+
+ if (WARN_ON(addr % PAGE_SIZE))
+ addr = round_down(addr, PAGE_SIZE);
+
+ if (WARN_ON(length % PAGE_SIZE))
+ length = round_up(length, PAGE_SIZE);
+
+ end = addr + length;
+
+ if ((addr & pd_mask) != (end & pd_mask))
+ return (1 << (pde_shift - PAGE_SHIFT)) -
+ i915_pte_index(addr, pde_shift);
+
+ return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
+}
+
+static inline uint32_t i915_pde_index(uint64_t addr, uint32_t shift)
+{
+ return (addr >> shift) & I915_PDE_MASK;
+}
+
+static inline size_t i915_pde_count(uint64_t addr, uint64_t length,
+ uint32_t pde_shift)
+{
+ const uint32_t pdp_shift = pde_shift + 9;
+ uint32_t start, end;
+
+ if (WARN_ON(!length))
+ return 0;
+
+ if (WARN_ON(addr % PAGE_SIZE))
+ addr = round_down(addr, PAGE_SIZE);
+
+ if (WARN_ON(length % PAGE_SIZE))
+ length = round_up(length, PAGE_SIZE);
+
+ start = i915_pde_index(addr, pde_shift);
+ end = round_up(addr + length, (1 << pde_shift)) >> pde_shift;
+
+ if (addr >> pdp_shift != (addr + length) >> pdp_shift)
+ end = round_down(end, I915_PDES_PER_PD);
+
+ BUG_ON(start > end);
+ return end - start;
+}
+
+static inline uint32_t gen6_pte_index(uint32_t addr)
+{
+ return i915_pte_index(addr, GEN6_PDE_SHIFT);
+}
+
+static inline size_t gen6_pte_count(uint32_t addr, uint32_t length)
+{
+ return i915_pte_count(addr, length, GEN6_PDE_SHIFT);
+}
+
+static inline uint32_t gen6_pde_index(uint32_t addr)
+{
+ return i915_pde_index(addr, GEN6_PDE_SHIFT);
+}
+
+static inline size_t gen6_pde_count(uint32_t addr, uint32_t length)
+{
+ return i915_pde_count(addr, length, GEN6_PDE_SHIFT);
+}
+
#define BYT_PTE_WRITEABLE (1 << 1)
#define BYT_PTE_SNOOPED_BY_CPU_CACHES (1 << 2)
@@ -44,8 +139,7 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
#define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */
#define GEN8_LEGACY_PDPS 4
-#define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t))
-#define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
+#define GEN8_PTES_PER_PT (PAGE_SIZE / sizeof(gen8_gtt_pte_t))
/* GEN8 legacy style addressis defined as a 3 level page table:
* 31:30 | 29:21 | 20:12 | 11:0
@@ -60,9 +154,26 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
#define GEN8_PDPE_SHIFT 30
#define GEN8_PDPE_MASK 0x3
#define GEN8_PDE_SHIFT 21
-#define GEN8_PDE_MASK 0x1ff
-#define GEN8_PTE_SHIFT 12
-#define GEN8_PTE_MASK 0x1ff
+
+static inline uint32_t gen8_pte_index(uint64_t address)
+{
+ return i915_pte_index(address, GEN8_PDE_SHIFT);
+}
+
+static inline uint32_t gen8_pde_index(uint64_t address)
+{
+ return i915_pde_index(address, GEN8_PDE_SHIFT);
+}
+
+static inline uint32_t gen8_pdpe_index(uint64_t address)
+{
+ return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
+}
+
+static inline uint32_t gen8_pml4e_index(uint64_t address)
+{
+ BUG();
+}
enum i915_cache_level;
/**
These page table helpers make the code much cleaner. There is some room to use the arch/x86 header files. The reason I've opted not to is in several cases, the definitions are dictated by the CONFIG_ options which do not always indicate the restrictions in the GPU. While here, clean up the defines to have more concise names, and consolidate between gen6 and gen8 where appropriate. I've made a lot of tiny errors in these helpers. Often I'd correct an error only to introduce another one. While IGT was capable of catching them, the tests often took a while to catch, and where hard/slow to debug in the kernel. As a result, to test this, I compiled i915_gem_gtt.h in userspace, and ran tests from userspace. What follows isn't by any means complete, but it was able to catch lot of bugs. Gen8 is also untested, but since the current code is almost identical, I feel pretty comfortable with that. void test_pte(uint32_t base) { uint32_t ret; assert_pte_index((base + 0), 0); assert_pte_index((base + 1), 0); assert_pte_index((base + 0x1000), 1); assert_pte_index((base + (1<<22)), 0); assert_pte_index((base + ((1<<22) - 1)), 1023); assert_pte_index((base + (1<<21)), 512); assert_pte_count(base + 0, 0, 0); assert_pte_count(base + 0, 1, 1); assert_pte_count(base + 0, 0x1000, 1); assert_pte_count(base + 0, 0x1001, 2); assert_pte_count(base + 0, 1<<21, 512); assert_pte_count(base + 0, 1<<22, 1024); assert_pte_count(base + 0, (1<<22) - 1, 1024); assert_pte_count(base + (1<<21), 1<<22, 512); assert_pte_count(base + (1<<21), (1<<22)+1, 512); assert_pte_count(base + (1<<21), 10<<22, 512); } void test_pde(uint32_t base) { assert(gen6_pde_index(base + 0) == 0); assert(gen6_pde_index(base + 1) == 0); assert(gen6_pde_index(base + (1<<21)) == 0); assert(gen6_pde_index(base + (1<<22)) == 1); assert(gen6_pde_index(base + ((256<<22)))== 256); assert(gen6_pde_index(base + ((512<<22))) == 0); assert(gen6_pde_index(base + ((513<<22))) == 1); /* This is actually not possible on gen6 */ assert(gen6_pde_count(base + 0, 0) == 0); assert(gen6_pde_count(base + 0, 1) == 1); assert(gen6_pde_count(base + 0, 1<<21) == 1); assert(gen6_pde_count(base + 0, 1<<22) == 1); assert(gen6_pde_count(base + 0, (1<<22) + 0x1000) == 2); assert(gen6_pde_count(base + 0x1000, 1<<22) == 2); assert(gen6_pde_count(base + 0, 511<<22) == 511); assert(gen6_pde_count(base + 0, 512<<22) == 512); assert(gen6_pde_count(base + 0x1000, 512<<22) == 512); assert(gen6_pde_count(base + (1<<22), 512<<22) == 511); } int main() { test_pde(0); while (1) test_pte(rand() & ~((1<<22) - 1)); return 0; } Signed-off-by: Ben Widawsky <ben@bwidawsk.net> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 90 +++++++++++++------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 125 ++++++++++++++++++++++++++++++++++-- 2 files changed, 162 insertions(+), 53 deletions(-)