diff mbox

[07/18] drm/i915: introduce ppgtt page coloring

Message ID 20170404221128.3943-8-matthew.auld@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Matthew Auld April 4, 2017, 10:11 p.m. UTC
To enable 64K pages we need to set the intermediate-page-size(IPS) bit
of the pde, therefore a page table is said to be either operating in 64K
or 4K mode. To accommodate this vm placement restriction we introduce a
color for pages and corresponding color_adjust callback.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 25 +++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.h |  6 ++++++
 drivers/gpu/drm/i915/i915_vma.c     |  2 ++
 3 files changed, 33 insertions(+)

Comments

Chris Wilson April 5, 2017, 1:41 p.m. UTC | #1
On Tue, Apr 04, 2017 at 11:11:17PM +0100, Matthew Auld wrote:
> To enable 64K pages we need to set the intermediate-page-size(IPS) bit
> of the pde, therefore a page table is said to be either operating in 64K
> or 4K mode. To accommodate this vm placement restriction we introduce a
> color for pages and corresponding color_adjust callback.
> 
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 25 +++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_gtt.h |  6 ++++++
>  drivers/gpu/drm/i915/i915_vma.c     |  2 ++
>  3 files changed, 33 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 0989af4a17e4..ddc3db345b76 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -1332,6 +1332,28 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
>  	return -ENOMEM;
>  }
>  
> +static void i915_page_color_adjust(const struct drm_mm_node *node,
> +				   unsigned long color,
> +				   u64 *start,
> +				   u64 *end)
> +{
> +	GEM_BUG_ON(!is_valid_gtt_page_size(color));
> +
> +	if (!(color & (I915_GTT_PAGE_SIZE_4K | I915_GTT_PAGE_SIZE_64K)))
> +		return;
> +
> +	GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color));
> +
> +	if (i915_node_color_differs(node, color))
> +		*start = roundup(*start, 1 << GEN8_PDE_SHIFT);
> +
> +	node = list_next_entry(node, node_list);
> +	if (i915_node_color_differs(node, color))
> +		*end = rounddown(*end, 1 << GEN8_PDE_SHIFT);
> +
> +	GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color));
> +}
> +
>  /*
>   * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
>   * with a net effect resembling a 2-level page table in normal x86 terms. Each
> @@ -1372,6 +1394,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>  		ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl;
>  		ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
>  		ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
> +
> +		if (SUPPORTS_PAGE_SIZE(dev_priv, I915_GTT_PAGE_SIZE_64K))
> +			ppgtt->base.mm.color_adjust = i915_page_color_adjust;
>  	} else {
>  		ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
>  		if (ret)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 9c592e2de516..8d893ddd98f2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -353,6 +353,12 @@ i915_vm_has_cache_coloring(const struct i915_address_space *vm)
>  }
>  
>  static inline bool
> +i915_vm_has_page_coloring(const struct i915_address_space *vm)
> +{
> +	return vm->mm.color_adjust && !i915_is_ggtt(vm);
> +}
> +
> +static inline bool
>  i915_vm_is_48bit(const struct i915_address_space *vm)
>  {
>  	return (vm->total - 1) >> 32;
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index 8f0041ba328f..4043145b4310 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -471,6 +471,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>  
>  	if (i915_vm_has_cache_coloring(vma->vm))
>  		color = obj->cache_level;
> +	else if (i915_vm_has_page_coloring(vma->vm))
> +		color = obj->gtt_page_size;

This does not need color_adjust since you are just specifying an
alignment and size. Why the extra complications? I remember asking the
same last time.
-Chris
Matthew Auld April 5, 2017, 1:50 p.m. UTC | #2
On 5 April 2017 at 14:41, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Tue, Apr 04, 2017 at 11:11:17PM +0100, Matthew Auld wrote:
>> To enable 64K pages we need to set the intermediate-page-size(IPS) bit
>> of the pde, therefore a page table is said to be either operating in 64K
>> or 4K mode. To accommodate this vm placement restriction we introduce a
>> color for pages and corresponding color_adjust callback.
>>
>> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_gem_gtt.c | 25 +++++++++++++++++++++++++
>>  drivers/gpu/drm/i915/i915_gem_gtt.h |  6 ++++++
>>  drivers/gpu/drm/i915/i915_vma.c     |  2 ++
>>  3 files changed, 33 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> index 0989af4a17e4..ddc3db345b76 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> @@ -1332,6 +1332,28 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
>>       return -ENOMEM;
>>  }
>>
>> +static void i915_page_color_adjust(const struct drm_mm_node *node,
>> +                                unsigned long color,
>> +                                u64 *start,
>> +                                u64 *end)
>> +{
>> +     GEM_BUG_ON(!is_valid_gtt_page_size(color));
>> +
>> +     if (!(color & (I915_GTT_PAGE_SIZE_4K | I915_GTT_PAGE_SIZE_64K)))
>> +             return;
>> +
>> +     GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color));
>> +
>> +     if (i915_node_color_differs(node, color))
>> +             *start = roundup(*start, 1 << GEN8_PDE_SHIFT);
>> +
>> +     node = list_next_entry(node, node_list);
>> +     if (i915_node_color_differs(node, color))
>> +             *end = rounddown(*end, 1 << GEN8_PDE_SHIFT);
>> +
>> +     GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color));
>> +}
>> +
>>  /*
>>   * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
>>   * with a net effect resembling a 2-level page table in normal x86 terms. Each
>> @@ -1372,6 +1394,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>>               ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl;
>>               ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
>>               ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
>> +
>> +             if (SUPPORTS_PAGE_SIZE(dev_priv, I915_GTT_PAGE_SIZE_64K))
>> +                     ppgtt->base.mm.color_adjust = i915_page_color_adjust;
>>       } else {
>>               ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
>>               if (ret)
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
>> index 9c592e2de516..8d893ddd98f2 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
>> @@ -353,6 +353,12 @@ i915_vm_has_cache_coloring(const struct i915_address_space *vm)
>>  }
>>
>>  static inline bool
>> +i915_vm_has_page_coloring(const struct i915_address_space *vm)
>> +{
>> +     return vm->mm.color_adjust && !i915_is_ggtt(vm);
>> +}
>> +
>> +static inline bool
>>  i915_vm_is_48bit(const struct i915_address_space *vm)
>>  {
>>       return (vm->total - 1) >> 32;
>> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
>> index 8f0041ba328f..4043145b4310 100644
>> --- a/drivers/gpu/drm/i915/i915_vma.c
>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>> @@ -471,6 +471,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>>
>>       if (i915_vm_has_cache_coloring(vma->vm))
>>               color = obj->cache_level;
>> +     else if (i915_vm_has_page_coloring(vma->vm))
>> +             color = obj->gtt_page_size;
>
> This does not need color_adjust since you are just specifying an
> alignment and size. Why the extra complications? I remember asking the
> same last time.
Hmm, are you saying the whole idea of needing a color_adjust for
4K/64K vm placement is completely unnecessary?
Chris Wilson April 5, 2017, 2:02 p.m. UTC | #3
On Wed, Apr 05, 2017 at 02:50:41PM +0100, Matthew Auld wrote:
> On 5 April 2017 at 14:41, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Tue, Apr 04, 2017 at 11:11:17PM +0100, Matthew Auld wrote:
> >> To enable 64K pages we need to set the intermediate-page-size(IPS) bit
> >> of the pde, therefore a page table is said to be either operating in 64K
> >> or 4K mode. To accommodate this vm placement restriction we introduce a
> >> color for pages and corresponding color_adjust callback.
> >>
> >> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> >> ---
> >>  drivers/gpu/drm/i915/i915_gem_gtt.c | 25 +++++++++++++++++++++++++
> >>  drivers/gpu/drm/i915/i915_gem_gtt.h |  6 ++++++
> >>  drivers/gpu/drm/i915/i915_vma.c     |  2 ++
> >>  3 files changed, 33 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >> index 0989af4a17e4..ddc3db345b76 100644
> >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> >> @@ -1332,6 +1332,28 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
> >>       return -ENOMEM;
> >>  }
> >>
> >> +static void i915_page_color_adjust(const struct drm_mm_node *node,
> >> +                                unsigned long color,
> >> +                                u64 *start,
> >> +                                u64 *end)
> >> +{
> >> +     GEM_BUG_ON(!is_valid_gtt_page_size(color));
> >> +
> >> +     if (!(color & (I915_GTT_PAGE_SIZE_4K | I915_GTT_PAGE_SIZE_64K)))
> >> +             return;
> >> +
> >> +     GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color));
> >> +
> >> +     if (i915_node_color_differs(node, color))
> >> +             *start = roundup(*start, 1 << GEN8_PDE_SHIFT);
> >> +
> >> +     node = list_next_entry(node, node_list);
> >> +     if (i915_node_color_differs(node, color))
> >> +             *end = rounddown(*end, 1 << GEN8_PDE_SHIFT);
> >> +
> >> +     GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color));
> >> +}
> >> +
> >>  /*
> >>   * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
> >>   * with a net effect resembling a 2-level page table in normal x86 terms. Each
> >> @@ -1372,6 +1394,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
> >>               ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl;
> >>               ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
> >>               ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
> >> +
> >> +             if (SUPPORTS_PAGE_SIZE(dev_priv, I915_GTT_PAGE_SIZE_64K))
> >> +                     ppgtt->base.mm.color_adjust = i915_page_color_adjust;
> >>       } else {
> >>               ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
> >>               if (ret)
> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> >> index 9c592e2de516..8d893ddd98f2 100644
> >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> >> @@ -353,6 +353,12 @@ i915_vm_has_cache_coloring(const struct i915_address_space *vm)
> >>  }
> >>
> >>  static inline bool
> >> +i915_vm_has_page_coloring(const struct i915_address_space *vm)
> >> +{
> >> +     return vm->mm.color_adjust && !i915_is_ggtt(vm);
> >> +}
> >> +
> >> +static inline bool
> >>  i915_vm_is_48bit(const struct i915_address_space *vm)
> >>  {
> >>       return (vm->total - 1) >> 32;
> >> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> >> index 8f0041ba328f..4043145b4310 100644
> >> --- a/drivers/gpu/drm/i915/i915_vma.c
> >> +++ b/drivers/gpu/drm/i915/i915_vma.c
> >> @@ -471,6 +471,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
> >>
> >>       if (i915_vm_has_cache_coloring(vma->vm))
> >>               color = obj->cache_level;
> >> +     else if (i915_vm_has_page_coloring(vma->vm))
> >> +             color = obj->gtt_page_size;
> >
> > This does not need color_adjust since you are just specifying an
> > alignment and size. Why the extra complications? I remember asking the
> > same last time.
> Hmm, are you saying the whole idea of needing a color_adjust for
> 4K/64K vm placement is completely unnecessary?

As constructed here, yes. Since you just want to request a
obj->gtt_page_size aligned block:

	.size = round_up(size, obj->gtt_page_size),
	.align = max(align, obj->gtt_page_size).

(Hmm, now I think about it you shouldn't round size up unless the
insert_pages() is careful not to assume that the last page is a full
superpage. More I think about this, you only want to align the base and
let insert_pages group up the superpages.)

Unless I have completely misunderstood, you do not need to insert
gaps between blocks. Both the color_adjust approach and this approach
still need lower level support to amalgamate pages.
-Chris
Matthew Auld April 5, 2017, 3:05 p.m. UTC | #4
On 5 April 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Wed, Apr 05, 2017 at 02:50:41PM +0100, Matthew Auld wrote:
>> On 5 April 2017 at 14:41, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> > On Tue, Apr 04, 2017 at 11:11:17PM +0100, Matthew Auld wrote:
>> >> To enable 64K pages we need to set the intermediate-page-size(IPS) bit
>> >> of the pde, therefore a page table is said to be either operating in 64K
>> >> or 4K mode. To accommodate this vm placement restriction we introduce a
>> >> color for pages and corresponding color_adjust callback.
>> >>
>> >> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> >> ---
>> >>  drivers/gpu/drm/i915/i915_gem_gtt.c | 25 +++++++++++++++++++++++++
>> >>  drivers/gpu/drm/i915/i915_gem_gtt.h |  6 ++++++
>> >>  drivers/gpu/drm/i915/i915_vma.c     |  2 ++
>> >>  3 files changed, 33 insertions(+)
>> >>
>> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> >> index 0989af4a17e4..ddc3db345b76 100644
>> >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> >> @@ -1332,6 +1332,28 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
>> >>       return -ENOMEM;
>> >>  }
>> >>
>> >> +static void i915_page_color_adjust(const struct drm_mm_node *node,
>> >> +                                unsigned long color,
>> >> +                                u64 *start,
>> >> +                                u64 *end)
>> >> +{
>> >> +     GEM_BUG_ON(!is_valid_gtt_page_size(color));
>> >> +
>> >> +     if (!(color & (I915_GTT_PAGE_SIZE_4K | I915_GTT_PAGE_SIZE_64K)))
>> >> +             return;
>> >> +
>> >> +     GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color));
>> >> +
>> >> +     if (i915_node_color_differs(node, color))
>> >> +             *start = roundup(*start, 1 << GEN8_PDE_SHIFT);
>> >> +
>> >> +     node = list_next_entry(node, node_list);
>> >> +     if (i915_node_color_differs(node, color))
>> >> +             *end = rounddown(*end, 1 << GEN8_PDE_SHIFT);
>> >> +
>> >> +     GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color));
>> >> +}
>> >> +
>> >>  /*
>> >>   * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
>> >>   * with a net effect resembling a 2-level page table in normal x86 terms. Each
>> >> @@ -1372,6 +1394,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>> >>               ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl;
>> >>               ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
>> >>               ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
>> >> +
>> >> +             if (SUPPORTS_PAGE_SIZE(dev_priv, I915_GTT_PAGE_SIZE_64K))
>> >> +                     ppgtt->base.mm.color_adjust = i915_page_color_adjust;
>> >>       } else {
>> >>               ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
>> >>               if (ret)
>> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
>> >> index 9c592e2de516..8d893ddd98f2 100644
>> >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
>> >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
>> >> @@ -353,6 +353,12 @@ i915_vm_has_cache_coloring(const struct i915_address_space *vm)
>> >>  }
>> >>
>> >>  static inline bool
>> >> +i915_vm_has_page_coloring(const struct i915_address_space *vm)
>> >> +{
>> >> +     return vm->mm.color_adjust && !i915_is_ggtt(vm);
>> >> +}
>> >> +
>> >> +static inline bool
>> >>  i915_vm_is_48bit(const struct i915_address_space *vm)
>> >>  {
>> >>       return (vm->total - 1) >> 32;
>> >> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
>> >> index 8f0041ba328f..4043145b4310 100644
>> >> --- a/drivers/gpu/drm/i915/i915_vma.c
>> >> +++ b/drivers/gpu/drm/i915/i915_vma.c
>> >> @@ -471,6 +471,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>> >>
>> >>       if (i915_vm_has_cache_coloring(vma->vm))
>> >>               color = obj->cache_level;
>> >> +     else if (i915_vm_has_page_coloring(vma->vm))
>> >> +             color = obj->gtt_page_size;
>> >
>> > This does not need color_adjust since you are just specifying an
>> > alignment and size. Why the extra complications? I remember asking the
>> > same last time.
>> Hmm, are you saying the whole idea of needing a color_adjust for
>> 4K/64K vm placement is completely unnecessary?
>
> As constructed here, yes. Since you just want to request a
> obj->gtt_page_size aligned block:
>
>         .size = round_up(size, obj->gtt_page_size),
>         .align = max(align, obj->gtt_page_size).
Unless I've gone completely mad, I really don't think it's that
simple, we never ever want a 4K object or 64K object to overlap the
same page-table. We derive the pde and hence the page-table from
node.start, so if we just request an obj->gtt_page_size aligned block,
we could have a page-table with a mixture of 64K and 4K pte's, at
which point we're screwed.

>
> (Hmm, now I think about it you shouldn't round size up unless the
> insert_pages() is careful not to assume that the last page is a full
> superpage. More I think about this, you only want to align the base and
> let insert_pages group up the superpages.)
>
> Unless I have completely misunderstood, you do not need to insert
> gaps between blocks. Both the color_adjust approach and this approach
> still need lower level support to amalgamate pages.
> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
Matthew Auld April 10, 2017, 12:08 p.m. UTC | #5
On 5 April 2017 at 15:02, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Wed, Apr 05, 2017 at 02:50:41PM +0100, Matthew Auld wrote:
>> On 5 April 2017 at 14:41, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> > On Tue, Apr 04, 2017 at 11:11:17PM +0100, Matthew Auld wrote:
>> >> To enable 64K pages we need to set the intermediate-page-size(IPS) bit
>> >> of the pde, therefore a page table is said to be either operating in 64K
>> >> or 4K mode. To accommodate this vm placement restriction we introduce a
>> >> color for pages and corresponding color_adjust callback.
>> >>
>> >> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
>> >> ---
>> >>  drivers/gpu/drm/i915/i915_gem_gtt.c | 25 +++++++++++++++++++++++++
>> >>  drivers/gpu/drm/i915/i915_gem_gtt.h |  6 ++++++
>> >>  drivers/gpu/drm/i915/i915_vma.c     |  2 ++
>> >>  3 files changed, 33 insertions(+)
>> >>
>> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> >> index 0989af4a17e4..ddc3db345b76 100644
>> >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> >> @@ -1332,6 +1332,28 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
>> >>       return -ENOMEM;
>> >>  }
>> >>
>> >> +static void i915_page_color_adjust(const struct drm_mm_node *node,
>> >> +                                unsigned long color,
>> >> +                                u64 *start,
>> >> +                                u64 *end)
>> >> +{
>> >> +     GEM_BUG_ON(!is_valid_gtt_page_size(color));
>> >> +
>> >> +     if (!(color & (I915_GTT_PAGE_SIZE_4K | I915_GTT_PAGE_SIZE_64K)))
>> >> +             return;
>> >> +
>> >> +     GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color));
>> >> +
>> >> +     if (i915_node_color_differs(node, color))
>> >> +             *start = roundup(*start, 1 << GEN8_PDE_SHIFT);
>> >> +
>> >> +     node = list_next_entry(node, node_list);
>> >> +     if (i915_node_color_differs(node, color))
>> >> +             *end = rounddown(*end, 1 << GEN8_PDE_SHIFT);
>> >> +
>> >> +     GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color));
>> >> +}
>> >> +
>> >>  /*
>> >>   * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
>> >>   * with a net effect resembling a 2-level page table in normal x86 terms. Each
>> >> @@ -1372,6 +1394,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>> >>               ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl;
>> >>               ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
>> >>               ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
>> >> +
>> >> +             if (SUPPORTS_PAGE_SIZE(dev_priv, I915_GTT_PAGE_SIZE_64K))
>> >> +                     ppgtt->base.mm.color_adjust = i915_page_color_adjust;
>> >>       } else {
>> >>               ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
>> >>               if (ret)
>> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
>> >> index 9c592e2de516..8d893ddd98f2 100644
>> >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
>> >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
>> >> @@ -353,6 +353,12 @@ i915_vm_has_cache_coloring(const struct i915_address_space *vm)
>> >>  }
>> >>
>> >>  static inline bool
>> >> +i915_vm_has_page_coloring(const struct i915_address_space *vm)
>> >> +{
>> >> +     return vm->mm.color_adjust && !i915_is_ggtt(vm);
>> >> +}
>> >> +
>> >> +static inline bool
>> >>  i915_vm_is_48bit(const struct i915_address_space *vm)
>> >>  {
>> >>       return (vm->total - 1) >> 32;
>> >> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
>> >> index 8f0041ba328f..4043145b4310 100644
>> >> --- a/drivers/gpu/drm/i915/i915_vma.c
>> >> +++ b/drivers/gpu/drm/i915/i915_vma.c
>> >> @@ -471,6 +471,8 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>> >>
>> >>       if (i915_vm_has_cache_coloring(vma->vm))
>> >>               color = obj->cache_level;
>> >> +     else if (i915_vm_has_page_coloring(vma->vm))
>> >> +             color = obj->gtt_page_size;
>> >
>> > This does not need color_adjust since you are just specifying an
>> > alignment and size. Why the extra complications? I remember asking the
>> > same last time.
>> Hmm, are you saying the whole idea of needing a color_adjust for
>> 4K/64K vm placement is completely unnecessary?
>
> As constructed here, yes. Since you just want to request a
> obj->gtt_page_size aligned block:
>
>         .size = round_up(size, obj->gtt_page_size),
>         .align = max(align, obj->gtt_page_size).
>
> (Hmm, now I think about it you shouldn't round size up unless the
> insert_pages() is careful not to assume that the last page is a full
> superpage. More I think about this, you only want to align the base and
> let insert_pages group up the superpages.)
I feel like I must be missing your point, could you expand on what you
mean my grouping superpages?

>
> Unless I have completely misunderstood, you do not need to insert
> gaps between blocks. Both the color_adjust approach and this approach
> still need lower level support to amalgamate pages.
> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 0989af4a17e4..ddc3db345b76 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1332,6 +1332,28 @@  static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
 	return -ENOMEM;
 }
 
+static void i915_page_color_adjust(const struct drm_mm_node *node,
+				   unsigned long color,
+				   u64 *start,
+				   u64 *end)
+{
+	GEM_BUG_ON(!is_valid_gtt_page_size(color));
+
+	if (!(color & (I915_GTT_PAGE_SIZE_4K | I915_GTT_PAGE_SIZE_64K)))
+		return;
+
+	GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color));
+
+	if (i915_node_color_differs(node, color))
+		*start = roundup(*start, 1 << GEN8_PDE_SHIFT);
+
+	node = list_next_entry(node, node_list);
+	if (i915_node_color_differs(node, color))
+		*end = rounddown(*end, 1 << GEN8_PDE_SHIFT);
+
+	GEM_BUG_ON(node->allocated && !is_valid_gtt_page_size(node->color));
+}
+
 /*
  * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
  * with a net effect resembling a 2-level page table in normal x86 terms. Each
@@ -1372,6 +1394,9 @@  static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 		ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl;
 		ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
 		ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
+
+		if (SUPPORTS_PAGE_SIZE(dev_priv, I915_GTT_PAGE_SIZE_64K))
+			ppgtt->base.mm.color_adjust = i915_page_color_adjust;
 	} else {
 		ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
 		if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 9c592e2de516..8d893ddd98f2 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -353,6 +353,12 @@  i915_vm_has_cache_coloring(const struct i915_address_space *vm)
 }
 
 static inline bool
+i915_vm_has_page_coloring(const struct i915_address_space *vm)
+{
+	return vm->mm.color_adjust && !i915_is_ggtt(vm);
+}
+
+static inline bool
 i915_vm_is_48bit(const struct i915_address_space *vm)
 {
 	return (vm->total - 1) >> 32;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 8f0041ba328f..4043145b4310 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -471,6 +471,8 @@  i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 
 	if (i915_vm_has_cache_coloring(vma->vm))
 		color = obj->cache_level;
+	else if (i915_vm_has_page_coloring(vma->vm))
+		color = obj->gtt_page_size;
 
 	if (flags & PIN_OFFSET_FIXED) {
 		u64 offset = flags & PIN_OFFSET_MASK;