Message ID | 20170404221128.3943-8-matthew.auld@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Apr 04, 2017 at 11:11:17PM +0100, Matthew Auld wrote: > To enable 64K pages we need to set the intermediate-page-size(IPS) bit > of the pde, therefore a page table is said to be either operating in 64K > or 4K mode. To accommodate this vm placement restriction we introduce a > color for pages and corresponding color_adjust callback. > > Signed-off-by: Matthew Auld <matthew.auld@intel.com> > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 25 +++++++++++++++++++++++++ > drivers/gpu/drm/i915/i915_gem_gtt.h | 6 ++++++ > drivers/gpu/drm/i915/i915_vma.c | 2 ++ > 3 files changed, 33 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 0989af4a17e4..ddc3db345b76 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -1332,6 +1332,28 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) > return -ENOMEM; > } > > +static void i915_page_color_adjust(const struct drm_mm_node *node, > + unsigned long color, > + u64 *start, > + u64 *end) > +{ > + GEM_BUG_ON(!is_valid_gtt_page_size(color)); > + > + if (!(color & (I915_GTT_PAGE_SIZE_4K | I915_GTT_PAGE_SIZE_64K))) > + return; > + > + GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color)); > + > + if (i915_node_color_differs(node, color)) > + *start = roundup(*start, 1 << GEN8_PDE_SHIFT); > + > + node = list_next_entry(node, node_list); > + if (i915_node_color_differs(node, color)) > + *end = rounddown(*end, 1 << GEN8_PDE_SHIFT); > + > + GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color)); > +} > + > /* > * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers > * with a net effect resembling a 2-level page table in normal x86 terms. Each > @@ -1372,6 +1394,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; > ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; > ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; > + > + if (SUPPORTS_PAGE_SIZE(dev_priv, I915_GTT_PAGE_SIZE_64K)) > + ppgtt->base.mm.color_adjust = i915_page_color_adjust; > } else { > ret = __pdp_init(&ppgtt->base, &ppgtt->pdp); > if (ret) > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index 9c592e2de516..8d893ddd98f2 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -353,6 +353,12 @@ i915_vm_has_cache_coloring(const struct i915_address_space *vm) > } > > static inline bool > +i915_vm_has_page_coloring(const struct i915_address_space *vm) > +{ > + return vm->mm.color_adjust && !i915_is_ggtt(vm); > +} > + > +static inline bool > i915_vm_is_48bit(const struct i915_address_space *vm) > { > return (vm->total - 1) >> 32; > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index 8f0041ba328f..4043145b4310 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -471,6 +471,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > > if (i915_vm_has_cache_coloring(vma->vm)) > color = obj->cache_level; > + else if (i915_vm_has_page_coloring(vma->vm)) > + color = obj->gtt_page_size; This does not need color_adjust since you are just specifying an alignment and size. Why the extra complications? I remember asking the same last time. -Chris
On 5 April 2017 at 14:41, Chris Wilson <chris@chris-wilson.co.uk> wrote: > On Tue, Apr 04, 2017 at 11:11:17PM +0100, Matthew Auld wrote: >> To enable 64K pages we need to set the intermediate-page-size(IPS) bit >> of the pde, therefore a page table is said to be either operating in 64K >> or 4K mode. To accommodate this vm placement restriction we introduce a >> color for pages and corresponding color_adjust callback. >> >> Signed-off-by: Matthew Auld <matthew.auld@intel.com> >> --- >> drivers/gpu/drm/i915/i915_gem_gtt.c | 25 +++++++++++++++++++++++++ >> drivers/gpu/drm/i915/i915_gem_gtt.h | 6 ++++++ >> drivers/gpu/drm/i915/i915_vma.c | 2 ++ >> 3 files changed, 33 insertions(+) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c >> index 0989af4a17e4..ddc3db345b76 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >> @@ -1332,6 +1332,28 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) >> return -ENOMEM; >> } >> >> +static void i915_page_color_adjust(const struct drm_mm_node *node, >> + unsigned long color, >> + u64 *start, >> + u64 *end) >> +{ >> + GEM_BUG_ON(!is_valid_gtt_page_size(color)); >> + >> + if (!(color & (I915_GTT_PAGE_SIZE_4K | I915_GTT_PAGE_SIZE_64K))) >> + return; >> + >> + GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color)); >> + >> + if (i915_node_color_differs(node, color)) >> + *start = roundup(*start, 1 << GEN8_PDE_SHIFT); >> + >> + node = list_next_entry(node, node_list); >> + if (i915_node_color_differs(node, color)) >> + *end = rounddown(*end, 1 << GEN8_PDE_SHIFT); >> + >> + GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color)); >> +} >> + >> /* >> * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers >> * with a net effect resembling a 2-level page table in normal x86 terms. Each >> @@ -1372,6 +1394,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) >> ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; >> ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; >> ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; >> + >> + if (SUPPORTS_PAGE_SIZE(dev_priv, I915_GTT_PAGE_SIZE_64K)) >> + ppgtt->base.mm.color_adjust = i915_page_color_adjust; >> } else { >> ret = __pdp_init(&ppgtt->base, &ppgtt->pdp); >> if (ret) >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h >> index 9c592e2de516..8d893ddd98f2 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h >> @@ -353,6 +353,12 @@ i915_vm_has_cache_coloring(const struct i915_address_space *vm) >> } >> >> static inline bool >> +i915_vm_has_page_coloring(const struct i915_address_space *vm) >> +{ >> + return vm->mm.color_adjust && !i915_is_ggtt(vm); >> +} >> + >> +static inline bool >> i915_vm_is_48bit(const struct i915_address_space *vm) >> { >> return (vm->total - 1) >> 32; >> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c >> index 8f0041ba328f..4043145b4310 100644 >> --- a/drivers/gpu/drm/i915/i915_vma.c >> +++ b/drivers/gpu/drm/i915/i915_vma.c >> @@ -471,6 +471,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) >> >> if (i915_vm_has_cache_coloring(vma->vm)) >> color = obj->cache_level; >> + else if (i915_vm_has_page_coloring(vma->vm)) >> + color = obj->gtt_page_size; > > This does not need color_adjust since you are just specifying an > alignment and size. Why the extra complications? I remember asking the > same last time. Hmm, are you saying the whole idea of needing a color_adjust for 4K/64K vm placement is completely unnecessary?
On Wed, Apr 05, 2017 at 02:50:41PM +0100, Matthew Auld wrote: > On 5 April 2017 at 14:41, Chris Wilson <chris@chris-wilson.co.uk> wrote: > > On Tue, Apr 04, 2017 at 11:11:17PM +0100, Matthew Auld wrote: > >> To enable 64K pages we need to set the intermediate-page-size(IPS) bit > >> of the pde, therefore a page table is said to be either operating in 64K > >> or 4K mode. To accommodate this vm placement restriction we introduce a > >> color for pages and corresponding color_adjust callback. > >> > >> Signed-off-by: Matthew Auld <matthew.auld@intel.com> > >> --- > >> drivers/gpu/drm/i915/i915_gem_gtt.c | 25 +++++++++++++++++++++++++ > >> drivers/gpu/drm/i915/i915_gem_gtt.h | 6 ++++++ > >> drivers/gpu/drm/i915/i915_vma.c | 2 ++ > >> 3 files changed, 33 insertions(+) > >> > >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > >> index 0989af4a17e4..ddc3db345b76 100644 > >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > >> @@ -1332,6 +1332,28 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) > >> return -ENOMEM; > >> } > >> > >> +static void i915_page_color_adjust(const struct drm_mm_node *node, > >> + unsigned long color, > >> + u64 *start, > >> + u64 *end) > >> +{ > >> + GEM_BUG_ON(!is_valid_gtt_page_size(color)); > >> + > >> + if (!(color & (I915_GTT_PAGE_SIZE_4K | I915_GTT_PAGE_SIZE_64K))) > >> + return; > >> + > >> + GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color)); > >> + > >> + if (i915_node_color_differs(node, color)) > >> + *start = roundup(*start, 1 << GEN8_PDE_SHIFT); > >> + > >> + node = list_next_entry(node, node_list); > >> + if (i915_node_color_differs(node, color)) > >> + *end = rounddown(*end, 1 << GEN8_PDE_SHIFT); > >> + > >> + GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color)); > >> +} > >> + > >> /* > >> * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers > >> * with a net effect resembling a 2-level page table in normal x86 terms. Each > >> @@ -1372,6 +1394,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) > >> ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; > >> ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; > >> ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; > >> + > >> + if (SUPPORTS_PAGE_SIZE(dev_priv, I915_GTT_PAGE_SIZE_64K)) > >> + ppgtt->base.mm.color_adjust = i915_page_color_adjust; > >> } else { > >> ret = __pdp_init(&ppgtt->base, &ppgtt->pdp); > >> if (ret) > >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > >> index 9c592e2de516..8d893ddd98f2 100644 > >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > >> @@ -353,6 +353,12 @@ i915_vm_has_cache_coloring(const struct i915_address_space *vm) > >> } > >> > >> static inline bool > >> +i915_vm_has_page_coloring(const struct i915_address_space *vm) > >> +{ > >> + return vm->mm.color_adjust && !i915_is_ggtt(vm); > >> +} > >> + > >> +static inline bool > >> i915_vm_is_48bit(const struct i915_address_space *vm) > >> { > >> return (vm->total - 1) >> 32; > >> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > >> index 8f0041ba328f..4043145b4310 100644 > >> --- a/drivers/gpu/drm/i915/i915_vma.c > >> +++ b/drivers/gpu/drm/i915/i915_vma.c > >> @@ -471,6 +471,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > >> > >> if (i915_vm_has_cache_coloring(vma->vm)) > >> color = obj->cache_level; > >> + else if (i915_vm_has_page_coloring(vma->vm)) > >> + color = obj->gtt_page_size; > > > > This does not need color_adjust since you are just specifying an > > alignment and size. Why the extra complications? I remember asking the > > same last time. > Hmm, are you saying the whole idea of needing a color_adjust for > 4K/64K vm placement is completely unnecessary? As constructed here, yes. Since you just want to request a obj->gtt_page_size aligned block: .size = round_up(size, obj->gtt_page_size), .align = max(align, obj->gtt_page_size). (Hmm, now I think about it you shouldn't round size up unless the insert_pages() is careful not to assume that the last page is a full superpage. More I think about this, you only want to align the base and let insert_pages group up the superpages.) Unless I have completely misunderstood, you do not need to insert gaps between blocks. Both the color_adjust approach and this approach still need lower level support to amalgamate pages. -Chris
On 5 April 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote: > On Wed, Apr 05, 2017 at 02:50:41PM +0100, Matthew Auld wrote: >> On 5 April 2017 at 14:41, Chris Wilson <chris@chris-wilson.co.uk> wrote: >> > On Tue, Apr 04, 2017 at 11:11:17PM +0100, Matthew Auld wrote: >> >> To enable 64K pages we need to set the intermediate-page-size(IPS) bit >> >> of the pde, therefore a page table is said to be either operating in 64K >> >> or 4K mode. To accommodate this vm placement restriction we introduce a >> >> color for pages and corresponding color_adjust callback. >> >> >> >> Signed-off-by: Matthew Auld <matthew.auld@intel.com> >> >> --- >> >> drivers/gpu/drm/i915/i915_gem_gtt.c | 25 +++++++++++++++++++++++++ >> >> drivers/gpu/drm/i915/i915_gem_gtt.h | 6 ++++++ >> >> drivers/gpu/drm/i915/i915_vma.c | 2 ++ >> >> 3 files changed, 33 insertions(+) >> >> >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c >> >> index 0989af4a17e4..ddc3db345b76 100644 >> >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >> >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >> >> @@ -1332,6 +1332,28 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) >> >> return -ENOMEM; >> >> } >> >> >> >> +static void i915_page_color_adjust(const struct drm_mm_node *node, >> >> + unsigned long color, >> >> + u64 *start, >> >> + u64 *end) >> >> +{ >> >> + GEM_BUG_ON(!is_valid_gtt_page_size(color)); >> >> + >> >> + if (!(color & (I915_GTT_PAGE_SIZE_4K | I915_GTT_PAGE_SIZE_64K))) >> >> + return; >> >> + >> >> + GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color)); >> >> + >> >> + if (i915_node_color_differs(node, color)) >> >> + *start = roundup(*start, 1 << GEN8_PDE_SHIFT); >> >> + >> >> + node = list_next_entry(node, node_list); >> >> + if (i915_node_color_differs(node, color)) >> >> + *end = rounddown(*end, 1 << GEN8_PDE_SHIFT); >> >> + >> >> + GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color)); >> >> +} >> >> + >> >> /* >> >> * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers >> >> * with a net effect resembling a 2-level page table in normal x86 terms. Each >> >> @@ -1372,6 +1394,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) >> >> ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; >> >> ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; >> >> ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; >> >> + >> >> + if (SUPPORTS_PAGE_SIZE(dev_priv, I915_GTT_PAGE_SIZE_64K)) >> >> + ppgtt->base.mm.color_adjust = i915_page_color_adjust; >> >> } else { >> >> ret = __pdp_init(&ppgtt->base, &ppgtt->pdp); >> >> if (ret) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h >> >> index 9c592e2de516..8d893ddd98f2 100644 >> >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h >> >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h >> >> @@ -353,6 +353,12 @@ i915_vm_has_cache_coloring(const struct i915_address_space *vm) >> >> } >> >> >> >> static inline bool >> >> +i915_vm_has_page_coloring(const struct i915_address_space *vm) >> >> +{ >> >> + return vm->mm.color_adjust && !i915_is_ggtt(vm); >> >> +} >> >> + >> >> +static inline bool >> >> i915_vm_is_48bit(const struct i915_address_space *vm) >> >> { >> >> return (vm->total - 1) >> 32; >> >> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c >> >> index 8f0041ba328f..4043145b4310 100644 >> >> --- a/drivers/gpu/drm/i915/i915_vma.c >> >> +++ b/drivers/gpu/drm/i915/i915_vma.c >> >> @@ -471,6 +471,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) >> >> >> >> if (i915_vm_has_cache_coloring(vma->vm)) >> >> color = obj->cache_level; >> >> + else if (i915_vm_has_page_coloring(vma->vm)) >> >> + color = obj->gtt_page_size; >> > >> > This does not need color_adjust since you are just specifying an >> > alignment and size. Why the extra complications? I remember asking the >> > same last time. >> Hmm, are you saying the whole idea of needing a color_adjust for >> 4K/64K vm placement is completely unnecessary? > > As constructed here, yes. Since you just want to request a > obj->gtt_page_size aligned block: > > .size = round_up(size, obj->gtt_page_size), > .align = max(align, obj->gtt_page_size). Unless I've gone completely mad, I really don't think it's that simple, we never ever want a 4K object or 64K object to overlap the same page-table. We derive the pde and hence the page-table from node.start, so if we just request an obj->gtt_page_size aligned block, we could have a page-table with a mixture of 64K and 4K pte's, at which point we're screwed. > > (Hmm, now I think about it you shouldn't round size up unless the > insert_pages() is careful not to assume that the last page is a full > superpage. More I think about this, you only want to align the base and > let insert_pages group up the superpages.) > > Unless I have completely misunderstood, you do not need to insert > gaps between blocks. Both the color_adjust approach and this approach > still need lower level support to amalgamate pages. > -Chris > > -- > Chris Wilson, Intel Open Source Technology Centre
On 5 April 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote: > On Wed, Apr 05, 2017 at 02:50:41PM +0100, Matthew Auld wrote: >> On 5 April 2017 at 14:41, Chris Wilson <chris@chris-wilson.co.uk> wrote: >> > On Tue, Apr 04, 2017 at 11:11:17PM +0100, Matthew Auld wrote: >> >> To enable 64K pages we need to set the intermediate-page-size(IPS) bit >> >> of the pde, therefore a page table is said to be either operating in 64K >> >> or 4K mode. To accommodate this vm placement restriction we introduce a >> >> color for pages and corresponding color_adjust callback. >> >> >> >> Signed-off-by: Matthew Auld <matthew.auld@intel.com> >> >> --- >> >> drivers/gpu/drm/i915/i915_gem_gtt.c | 25 +++++++++++++++++++++++++ >> >> drivers/gpu/drm/i915/i915_gem_gtt.h | 6 ++++++ >> >> drivers/gpu/drm/i915/i915_vma.c | 2 ++ >> >> 3 files changed, 33 insertions(+) >> >> >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c >> >> index 0989af4a17e4..ddc3db345b76 100644 >> >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >> >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >> >> @@ -1332,6 +1332,28 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) >> >> return -ENOMEM; >> >> } >> >> >> >> +static void i915_page_color_adjust(const struct drm_mm_node *node, >> >> + unsigned long color, >> >> + u64 *start, >> >> + u64 *end) >> >> +{ >> >> + GEM_BUG_ON(!is_valid_gtt_page_size(color)); >> >> + >> >> + if (!(color & (I915_GTT_PAGE_SIZE_4K | I915_GTT_PAGE_SIZE_64K))) >> >> + return; >> >> + >> >> + GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color)); >> >> + >> >> + if (i915_node_color_differs(node, color)) >> >> + *start = roundup(*start, 1 << GEN8_PDE_SHIFT); >> >> + >> >> + node = list_next_entry(node, node_list); >> >> + if (i915_node_color_differs(node, color)) >> >> + *end = rounddown(*end, 1 << GEN8_PDE_SHIFT); >> >> + >> >> + GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color)); >> >> +} >> >> + >> >> /* >> >> * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers >> >> * with a net effect resembling a 2-level page table in normal x86 terms. Each >> >> @@ -1372,6 +1394,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) >> >> ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; >> >> ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; >> >> ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; >> >> + >> >> + if (SUPPORTS_PAGE_SIZE(dev_priv, I915_GTT_PAGE_SIZE_64K)) >> >> + ppgtt->base.mm.color_adjust = i915_page_color_adjust; >> >> } else { >> >> ret = __pdp_init(&ppgtt->base, &ppgtt->pdp); >> >> if (ret) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h >> >> index 9c592e2de516..8d893ddd98f2 100644 >> >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h >> >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h >> >> @@ -353,6 +353,12 @@ i915_vm_has_cache_coloring(const struct i915_address_space *vm) >> >> } >> >> >> >> static inline bool >> >> +i915_vm_has_page_coloring(const struct i915_address_space *vm) >> >> +{ >> >> + return vm->mm.color_adjust && !i915_is_ggtt(vm); >> >> +} >> >> + >> >> +static inline bool >> >> i915_vm_is_48bit(const struct i915_address_space *vm) >> >> { >> >> return (vm->total - 1) >> 32; >> >> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c >> >> index 8f0041ba328f..4043145b4310 100644 >> >> --- a/drivers/gpu/drm/i915/i915_vma.c >> >> +++ b/drivers/gpu/drm/i915/i915_vma.c >> >> @@ -471,6 +471,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) >> >> >> >> if (i915_vm_has_cache_coloring(vma->vm)) >> >> color = obj->cache_level; >> >> + else if (i915_vm_has_page_coloring(vma->vm)) >> >> + color = obj->gtt_page_size; >> > >> > This does not need color_adjust since you are just specifying an >> > alignment and size. Why the extra complications? I remember asking the >> > same last time. >> Hmm, are you saying the whole idea of needing a color_adjust for >> 4K/64K vm placement is completely unnecessary? > > As constructed here, yes. Since you just want to request a > obj->gtt_page_size aligned block: > > .size = round_up(size, obj->gtt_page_size), > .align = max(align, obj->gtt_page_size). > > (Hmm, now I think about it you shouldn't round size up unless the > insert_pages() is careful not to assume that the last page is a full > superpage. More I think about this, you only want to align the base and > let insert_pages group up the superpages.) I feel like I must be missing your point, could you expand on what you mean my grouping superpages? > > Unless I have completely misunderstood, you do not need to insert > gaps between blocks. Both the color_adjust approach and this approach > still need lower level support to amalgamate pages. > -Chris > > -- > Chris Wilson, Intel Open Source Technology Centre
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0989af4a17e4..ddc3db345b76 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1332,6 +1332,28 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) return -ENOMEM; } +static void i915_page_color_adjust(const struct drm_mm_node *node, + unsigned long color, + u64 *start, + u64 *end) +{ + GEM_BUG_ON(!is_valid_gtt_page_size(color)); + + if (!(color & (I915_GTT_PAGE_SIZE_4K | I915_GTT_PAGE_SIZE_64K))) + return; + + GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color)); + + if (i915_node_color_differs(node, color)) + *start = roundup(*start, 1 << GEN8_PDE_SHIFT); + + node = list_next_entry(node, node_list); + if (i915_node_color_differs(node, color)) + *end = rounddown(*end, 1 << GEN8_PDE_SHIFT); + + GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color)); +} + /* * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * with a net effect resembling a 2-level page table in normal x86 terms. Each @@ -1372,6 +1394,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; + + if (SUPPORTS_PAGE_SIZE(dev_priv, I915_GTT_PAGE_SIZE_64K)) + ppgtt->base.mm.color_adjust = i915_page_color_adjust; } else { ret = __pdp_init(&ppgtt->base, &ppgtt->pdp); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 9c592e2de516..8d893ddd98f2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -353,6 +353,12 @@ i915_vm_has_cache_coloring(const struct i915_address_space *vm) } static inline bool +i915_vm_has_page_coloring(const struct i915_address_space *vm) +{ + return vm->mm.color_adjust && !i915_is_ggtt(vm); +} + +static inline bool i915_vm_is_48bit(const struct i915_address_space *vm) { return (vm->total - 1) >> 32; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 8f0041ba328f..4043145b4310 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -471,6 +471,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (i915_vm_has_cache_coloring(vma->vm)) color = obj->cache_level; + else if (i915_vm_has_page_coloring(vma->vm)) + color = obj->gtt_page_size; if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK;
To enable 64K pages we need to set the intermediate-page-size(IPS) bit of the pde, therefore a page table is said to be either operating in 64K or 4K mode. To accommodate this vm placement restriction we introduce a color for pages and corresponding color_adjust callback. Signed-off-by: Matthew Auld <matthew.auld@intel.com> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 25 +++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 6 ++++++ drivers/gpu/drm/i915/i915_vma.c | 2 ++ 3 files changed, 33 insertions(+)