Message ID | 1405340324-2315-12-git-send-email-rodrigo.vivi@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, 14 Jul 2014 05:18:44 -0700 Rodrigo Vivi <rodrigo.vivi@intel.com> wrote: > From: Bob Beckett <robert.beckett@intel.com> > > Create a scratch page for the two unused PDPs and set all the PTEs > for them to point to it. > > This patch addresses a page fault, and subsequent hang in pipe > control flush. In these cases, the Main Graphic Arbiter Error > register [0x40A0] showed a TLB Page Fault error, and a high memory > address (higher than the size of our PPGTT) was reported in the > Fault TLB RD Data0 register (0x4B10). > > PDP2 & PDP3 were not set because, in theory, they aren't required > for our PPGTT size, but they should be mapped to a scratch page > anyway. > > v2: Rebase on latest nightly. > > Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v1) > Signed-off-by: Dave Gordon <david.s.gordon@intel.com> (v2) > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Just as an FYI, this patch is definitely needed (I confirmed with design), and the same "fix" was in my 64b PPGTT series. One thing which appears different with my fix, though I didn't check closely, is I allocate an extra page per PDP. I can't remember exactly why I did it, but I may have put it in the commit message. I'd like to see this fix get merged (and I'd prefer we just merge 64b series, but since that's unlikely...). CC'ing James who should pick this up. > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 79 > +++++++++++++++++++++++++++++-------- > drivers/gpu/drm/i915/i915_gem_gtt.h | 2 + 2 files changed, 65 > insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c > b/drivers/gpu/drm/i915/i915_gem_gtt.c index 743512e..7743e4f 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -364,6 +364,11 @@ static void gen8_ppgtt_free(const struct > i915_hw_ppgtt *ppgtt) kfree(ppgtt->gen8_pt_dma_addr[i]); > } > > + /* Unused PDPs are always assigned to scratch page */ > + for (i = ppgtt->num_pd_pages; i < GEN8_LEGACY_PDPS; i++) > + kfree(ppgtt->gen8_pt_dma_addr[i]); > + __free_page(ppgtt->scratch_page); > + > __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages > << PAGE_SHIFT)); } > > @@ -388,6 +393,13 @@ static void gen8_ppgtt_unmap_pages(struct > i915_hw_ppgtt *ppgtt) PCI_DMA_BIDIRECTIONAL); > } > } > + > + /* Unused PDPs are always assigned to scratch page */ > + for (i = ppgtt->num_pd_pages; i < GEN8_LEGACY_PDPS; i++) { > + if (ppgtt->pd_dma_addr[i]) > + pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], > + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); > + } > } > > static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > @@ -474,10 +486,21 @@ static int gen8_ppgtt_allocate_dma(struct > i915_hw_ppgtt *ppgtt) static int > gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, > const int max_pdp) { > - ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp > << PAGE_SHIFT)); > - if (!ppgtt->pd_pages) > + /* Scratch page for unmapped PDP's */ > + ppgtt->scratch_page = alloc_page(GFP_KERNEL); > + if (!ppgtt->scratch_page) > return -ENOMEM; > > + /* Must allocate space for all 4 PDPs. HW has implemented > cache which > + * pre-fetches entries; that pre-fetch can attempt access > for entries > + * even if no resources are located in that range. > + */ > + ppgtt->pd_pages = alloc_pages(GFP_KERNEL, GEN8_LEGACY_PDPS); > + if (!ppgtt->pd_pages) { > + __free_page(ppgtt->scratch_page); > + return -ENOMEM; > + } > + > ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); > BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); > > @@ -495,6 +518,7 @@ static int gen8_ppgtt_alloc(struct i915_hw_ppgtt > *ppgtt, > ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp); > if (ret) { > + __free_page(ppgtt->scratch_page); > __free_pages(ppgtt->pd_pages, get_order(max_pdp << > PAGE_SHIFT)); return ret; > } > @@ -529,18 +553,25 @@ static int > gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt, > static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, > const int pd, > - const int pt) > + const int pt, > + const int max_pdp) > { > dma_addr_t pt_addr; > struct page *p; > int ret; > > - p = ppgtt->gen8_pt_pages[pd][pt]; > - pt_addr = pci_map_page(ppgtt->base.dev->pdev, > - p, 0, PAGE_SIZE, > PCI_DMA_BIDIRECTIONAL); > - ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr); > - if (ret) > - return ret; > + /* Unused PDPs need to have their ptes pointing to the > + * existing scratch page. > + */ > + if (pd < max_pdp) { > + p = ppgtt->gen8_pt_pages[pd][pt]; > + pt_addr = pci_map_page(ppgtt->base.dev->pdev, > + p, 0, PAGE_SIZE, > PCI_DMA_BIDIRECTIONAL); > + ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, > pt_addr); > + if (ret) > + return ret; > + } else > + pt_addr = ppgtt->scratch_dma_addr; > > ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr; > > @@ -562,6 +593,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt > *ppgtt, uint64_t size) const int max_pdp = DIV_ROUND_UP(size, 1 << > 30); const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; > int i, j, ret; > + gen8_gtt_pte_t *pt_vaddr, scratch_pte; > > if (size % (1<<30)) > DRM_INFO("Pages will be wasted unless GTT size > (%llu) is divisible by 1GB\n", size); @@ -571,30 +603,38 @@ static > int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) if > (ret) return ret; > > - /* > - * 2. Create DMA mappings for the page directories and page > tables. > - */ > - for (i = 0; i < max_pdp; i++) { > + /* 2. Map the scratch page */ > + ppgtt->scratch_dma_addr = > + pci_map_page(ppgtt->base.dev->pdev, > + ppgtt->scratch_page, 0, PAGE_SIZE, > + PCI_DMA_BIDIRECTIONAL); > + > + ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, > ppgtt->scratch_dma_addr); > + if (ret) > + goto bail; > + > + /* 3. Create DMA mappings for the page directories and page > tables. */ > + for (i = 0; i < GEN8_LEGACY_PDPS; i++) { > ret = gen8_ppgtt_setup_page_directories(ppgtt, i); > if (ret) > goto bail; > > for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { > - ret = gen8_ppgtt_setup_page_tables(ppgtt, i, > j); > + ret = gen8_ppgtt_setup_page_tables(ppgtt, i, > j, max_pdp); if (ret) > goto bail; > } > } > > /* > - * 3. Map all the page directory entires to point to the > page tables > + * 4. Map all the page directory entries to point to the > page tables > * we've allocated. > * > * For now, the PPGTT helper functions all require that the > PDEs are > * plugged in correctly. So we do that now/here. For > aliasing PPGTT, we > * will never need to touch the PDEs again. > */ > - for (i = 0; i < max_pdp; i++) { > + for (i = 0; i < GEN8_LEGACY_PDPS; i++) { > gen8_ppgtt_pde_t *pd_vaddr; > pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); > for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { > @@ -617,6 +657,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt > *ppgtt, uint64_t size) > ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, > true); > + scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, > + I915_CACHE_LLC, true); > + pt_vaddr = kmap_atomic(ppgtt->scratch_page); > + for (i = 0; i < GEN8_PTES_PER_PAGE; i++) > + pt_vaddr[i] = scratch_pte; > + kunmap_atomic(pt_vaddr); > + > DRM_DEBUG_DRIVER("Allocated %d pages for page directories > (%d wasted)\n", ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); > DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld > wasted)\n", diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h > b/drivers/gpu/drm/i915/i915_gem_gtt.h index 666c938..02032b3 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -249,6 +249,7 @@ struct i915_hw_ppgtt { > struct page **gen8_pt_pages[GEN8_LEGACY_PDPS]; > }; > struct page *pd_pages; > + struct page *scratch_page; > union { > uint32_t pd_offset; > dma_addr_t pd_dma_addr[GEN8_LEGACY_PDPS]; > @@ -258,6 +259,7 @@ struct i915_hw_ppgtt { > dma_addr_t *gen8_pt_dma_addr[4]; > }; > > + dma_addr_t scratch_dma_addr; > struct intel_context *ctx; > > int (*enable)(struct i915_hw_ppgtt *ppgtt);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 743512e..7743e4f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -364,6 +364,11 @@ static void gen8_ppgtt_free(const struct i915_hw_ppgtt *ppgtt) kfree(ppgtt->gen8_pt_dma_addr[i]); } + /* Unused PDPs are always assigned to scratch page */ + for (i = ppgtt->num_pd_pages; i < GEN8_LEGACY_PDPS; i++) + kfree(ppgtt->gen8_pt_dma_addr[i]); + __free_page(ppgtt->scratch_page); + __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT)); } @@ -388,6 +393,13 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) PCI_DMA_BIDIRECTIONAL); } } + + /* Unused PDPs are always assigned to scratch page */ + for (i = ppgtt->num_pd_pages; i < GEN8_LEGACY_PDPS; i++) { + if (ppgtt->pd_dma_addr[i]) + pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + } } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -474,10 +486,21 @@ static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt) static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, const int max_pdp) { - ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); - if (!ppgtt->pd_pages) + /* Scratch page for unmapped PDP's */ + ppgtt->scratch_page = alloc_page(GFP_KERNEL); + if (!ppgtt->scratch_page) return -ENOMEM; + /* Must allocate space for all 4 PDPs. HW has implemented cache which + * pre-fetches entries; that pre-fetch can attempt access for entries + * even if no resources are located in that range. + */ + ppgtt->pd_pages = alloc_pages(GFP_KERNEL, GEN8_LEGACY_PDPS); + if (!ppgtt->pd_pages) { + __free_page(ppgtt->scratch_page); + return -ENOMEM; + } + ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); @@ -495,6 +518,7 @@ static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt, ret = gen8_ppgtt_allocate_page_tables(ppgtt, max_pdp); if (ret) { + __free_page(ppgtt->scratch_page); __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); return ret; } @@ -529,18 +553,25 @@ static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt, static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, const int pd, - const int pt) + const int pt, + const int max_pdp) { dma_addr_t pt_addr; struct page *p; int ret; - p = ppgtt->gen8_pt_pages[pd][pt]; - pt_addr = pci_map_page(ppgtt->base.dev->pdev, - p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr); - if (ret) - return ret; + /* Unused PDPs need to have their ptes pointing to the + * existing scratch page. + */ + if (pd < max_pdp) { + p = ppgtt->gen8_pt_pages[pd][pt]; + pt_addr = pci_map_page(ppgtt->base.dev->pdev, + p, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pt_addr); + if (ret) + return ret; + } else + pt_addr = ppgtt->scratch_dma_addr; ppgtt->gen8_pt_dma_addr[pd][pt] = pt_addr; @@ -562,6 +593,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) const int max_pdp = DIV_ROUND_UP(size, 1 << 30); const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; int i, j, ret; + gen8_gtt_pte_t *pt_vaddr, scratch_pte; if (size % (1<<30)) DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); @@ -571,30 +603,38 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) if (ret) return ret; - /* - * 2. Create DMA mappings for the page directories and page tables. - */ - for (i = 0; i < max_pdp; i++) { + /* 2. Map the scratch page */ + ppgtt->scratch_dma_addr = + pci_map_page(ppgtt->base.dev->pdev, + ppgtt->scratch_page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + + ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, ppgtt->scratch_dma_addr); + if (ret) + goto bail; + + /* 3. Create DMA mappings for the page directories and page tables. */ + for (i = 0; i < GEN8_LEGACY_PDPS; i++) { ret = gen8_ppgtt_setup_page_directories(ppgtt, i); if (ret) goto bail; for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { - ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j); + ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j, max_pdp); if (ret) goto bail; } } /* - * 3. Map all the page directory entires to point to the page tables + * 4. Map all the page directory entries to point to the page tables * we've allocated. * * For now, the PPGTT helper functions all require that the PDEs are * plugged in correctly. So we do that now/here. For aliasing PPGTT, we * will never need to touch the PDEs again. */ - for (i = 0; i < max_pdp; i++) { + for (i = 0; i < GEN8_LEGACY_PDPS; i++) { gen8_ppgtt_pde_t *pd_vaddr; pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { @@ -617,6 +657,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true); + scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, + I915_CACHE_LLC, true); + pt_vaddr = kmap_atomic(ppgtt->scratch_page); + for (i = 0; i < GEN8_PTES_PER_PAGE; i++) + pt_vaddr[i] = scratch_pte; + kunmap_atomic(pt_vaddr); + DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n", diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 666c938..02032b3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -249,6 +249,7 @@ struct i915_hw_ppgtt { struct page **gen8_pt_pages[GEN8_LEGACY_PDPS]; }; struct page *pd_pages; + struct page *scratch_page; union { uint32_t pd_offset; dma_addr_t pd_dma_addr[GEN8_LEGACY_PDPS]; @@ -258,6 +259,7 @@ struct i915_hw_ppgtt { dma_addr_t *gen8_pt_dma_addr[4]; }; + dma_addr_t scratch_dma_addr; struct intel_context *ctx; int (*enable)(struct i915_hw_ppgtt *ppgtt);