diff mbox series

[RESEND,v3,2/3] drm/ttm: Reduce the number of used allocation orders for TTM pages

Message ID 20230404200650.11043-3-thomas.hellstrom@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series drm/ttm: Small fixes / cleanups in prep for shrinking | expand

Commit Message

Thomas Hellstrom April 4, 2023, 8:06 p.m. UTC
When swapping out, we will split multi-order pages both in order to
move them to the swap-cache and to be able to return memory to the
swap cache as soon as possible on a page-by-page basis.
Reduce the page max order to the system PMD size, as we can then be nicer
to the system and avoid splitting gigantic pages.

Looking forward to when we might be able to swap out PMD size folios
without splitting, this will also be a benefit.

v2:
- Include all orders up to the PMD size (Christian König)
v3:
- Avoid compilation errors for architectures with special PFN_SHIFTs

Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

Comments

Daniel Vetter April 11, 2023, 9:51 a.m. UTC | #1
On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote:
> When swapping out, we will split multi-order pages both in order to
> move them to the swap-cache and to be able to return memory to the
> swap cache as soon as possible on a page-by-page basis.
> Reduce the page max order to the system PMD size, as we can then be nicer
> to the system and avoid splitting gigantic pages.
> 
> Looking forward to when we might be able to swap out PMD size folios
> without splitting, this will also be a benefit.
> 
> v2:
> - Include all orders up to the PMD size (Christian König)
> v3:
> - Avoid compilation errors for architectures with special PFN_SHIFTs
> 
> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> Reviewed-by: Christian König <christian.koenig@amd.com>

Apparently this fails on ppc build testing. Please supply build fix asap
(or I guess we need to revert). I'm kinda not clear why this only showed
up when I merged the drm-misc-next pr into drm-next ...
-Daniel

> ---
>  drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++-----------
>  1 file changed, 19 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index dfce896c4bae..18c342a919a2 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -47,6 +47,11 @@
>  
>  #include "ttm_module.h"
>  
> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
> +/* Some architectures have a weird PMD_SHIFT */
> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER)
> +
>  /**
>   * struct ttm_pool_dma - Helper object for coherent DMA mappings
>   *
> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644);
>  
>  static atomic_long_t allocated_pages;
>  
> -static struct ttm_pool_type global_write_combined[MAX_ORDER];
> -static struct ttm_pool_type global_uncached[MAX_ORDER];
> +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
>  
> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
> +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER];
> +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER];
>  
>  static spinlock_t shrinker_lock;
>  static struct list_head shrinker_list;
> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
>  	else
>  		gfp_flags |= GFP_HIGHUSER;
>  
> -	for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
> +	for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages));
>  	     num_pages;
>  	     order = min_t(unsigned int, order, __fls(num_pages))) {
>  		struct ttm_pool_type *pt;
> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
>  
>  	if (use_dma_alloc) {
>  		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> -			for (j = 0; j < MAX_ORDER; ++j)
> +			for (j = 0; j < TTM_DIM_ORDER; ++j)
>  				ttm_pool_type_init(&pool->caching[i].orders[j],
>  						   pool, i, j);
>  	}
> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
>  
>  	if (pool->use_dma_alloc) {
>  		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> -			for (j = 0; j < MAX_ORDER; ++j)
> +			for (j = 0; j < TTM_DIM_ORDER; ++j)
>  				ttm_pool_type_fini(&pool->caching[i].orders[j]);
>  	}
>  
> @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
>  	unsigned int i;
>  
>  	seq_puts(m, "\t ");
> -	for (i = 0; i < MAX_ORDER; ++i)
> +	for (i = 0; i < TTM_DIM_ORDER; ++i)
>  		seq_printf(m, " ---%2u---", i);
>  	seq_puts(m, "\n");
>  }
> @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
>  {
>  	unsigned int i;
>  
> -	for (i = 0; i < MAX_ORDER; ++i)
> +	for (i = 0; i < TTM_DIM_ORDER; ++i)
>  		seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
>  	seq_puts(m, "\n");
>  }
> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages)
>  {
>  	unsigned int i;
>  
> +	BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER);
> +	BUILD_BUG_ON(TTM_DIM_ORDER < 1);
> +
>  	if (!page_pool_size)
>  		page_pool_size = num_pages;
>  
>  	spin_lock_init(&shrinker_lock);
>  	INIT_LIST_HEAD(&shrinker_list);
>  
> -	for (i = 0; i < MAX_ORDER; ++i) {
> +	for (i = 0; i < TTM_DIM_ORDER; ++i) {
>  		ttm_pool_type_init(&global_write_combined[i], NULL,
>  				   ttm_write_combined, i);
>  		ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void)
>  {
>  	unsigned int i;
>  
> -	for (i = 0; i < MAX_ORDER; ++i) {
> +	for (i = 0; i < TTM_DIM_ORDER; ++i) {
>  		ttm_pool_type_fini(&global_write_combined[i]);
>  		ttm_pool_type_fini(&global_uncached[i]);
>  
> -- 
> 2.39.2
>
Christian König April 11, 2023, 12:11 p.m. UTC | #2
Am 11.04.23 um 11:51 schrieb Daniel Vetter:
> On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote:
>> When swapping out, we will split multi-order pages both in order to
>> move them to the swap-cache and to be able to return memory to the
>> swap cache as soon as possible on a page-by-page basis.
>> Reduce the page max order to the system PMD size, as we can then be nicer
>> to the system and avoid splitting gigantic pages.
>>
>> Looking forward to when we might be able to swap out PMD size folios
>> without splitting, this will also be a benefit.
>>
>> v2:
>> - Include all orders up to the PMD size (Christian König)
>> v3:
>> - Avoid compilation errors for architectures with special PFN_SHIFTs
>>
>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>> Reviewed-by: Christian König <christian.koenig@amd.com>
> Apparently this fails on ppc build testing. Please supply build fix asap
> (or I guess we need to revert). I'm kinda not clear why this only showed
> up when I merged the drm-misc-next pr into drm-next ...

I'm really wondering this as well. It looks like PMD_SHIFT isn't a 
constant on this particular platform.

But from what I can find in the upstream 6.2 kernel PMD_SHIFT always 
seems to be a constant.

So how exactly can that here break?

Christian.

> -Daniel
>
>> ---
>>   drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++-----------
>>   1 file changed, 19 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
>> index dfce896c4bae..18c342a919a2 100644
>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>> @@ -47,6 +47,11 @@
>>   
>>   #include "ttm_module.h"
>>   
>> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
>> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
>> +/* Some architectures have a weird PMD_SHIFT */
>> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER)
>> +
>>   /**
>>    * struct ttm_pool_dma - Helper object for coherent DMA mappings
>>    *
>> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644);
>>   
>>   static atomic_long_t allocated_pages;
>>   
>> -static struct ttm_pool_type global_write_combined[MAX_ORDER];
>> -static struct ttm_pool_type global_uncached[MAX_ORDER];
>> +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
>> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
>>   
>> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
>> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
>> +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER];
>> +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER];
>>   
>>   static spinlock_t shrinker_lock;
>>   static struct list_head shrinker_list;
>> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
>>   	else
>>   		gfp_flags |= GFP_HIGHUSER;
>>   
>> -	for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
>> +	for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages));
>>   	     num_pages;
>>   	     order = min_t(unsigned int, order, __fls(num_pages))) {
>>   		struct ttm_pool_type *pt;
>> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
>>   
>>   	if (use_dma_alloc) {
>>   		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>> -			for (j = 0; j < MAX_ORDER; ++j)
>> +			for (j = 0; j < TTM_DIM_ORDER; ++j)
>>   				ttm_pool_type_init(&pool->caching[i].orders[j],
>>   						   pool, i, j);
>>   	}
>> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
>>   
>>   	if (pool->use_dma_alloc) {
>>   		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>> -			for (j = 0; j < MAX_ORDER; ++j)
>> +			for (j = 0; j < TTM_DIM_ORDER; ++j)
>>   				ttm_pool_type_fini(&pool->caching[i].orders[j]);
>>   	}
>>   
>> @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
>>   	unsigned int i;
>>   
>>   	seq_puts(m, "\t ");
>> -	for (i = 0; i < MAX_ORDER; ++i)
>> +	for (i = 0; i < TTM_DIM_ORDER; ++i)
>>   		seq_printf(m, " ---%2u---", i);
>>   	seq_puts(m, "\n");
>>   }
>> @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
>>   {
>>   	unsigned int i;
>>   
>> -	for (i = 0; i < MAX_ORDER; ++i)
>> +	for (i = 0; i < TTM_DIM_ORDER; ++i)
>>   		seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
>>   	seq_puts(m, "\n");
>>   }
>> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages)
>>   {
>>   	unsigned int i;
>>   
>> +	BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER);
>> +	BUILD_BUG_ON(TTM_DIM_ORDER < 1);
>> +
>>   	if (!page_pool_size)
>>   		page_pool_size = num_pages;
>>   
>>   	spin_lock_init(&shrinker_lock);
>>   	INIT_LIST_HEAD(&shrinker_list);
>>   
>> -	for (i = 0; i < MAX_ORDER; ++i) {
>> +	for (i = 0; i < TTM_DIM_ORDER; ++i) {
>>   		ttm_pool_type_init(&global_write_combined[i], NULL,
>>   				   ttm_write_combined, i);
>>   		ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
>> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void)
>>   {
>>   	unsigned int i;
>>   
>> -	for (i = 0; i < MAX_ORDER; ++i) {
>> +	for (i = 0; i < TTM_DIM_ORDER; ++i) {
>>   		ttm_pool_type_fini(&global_write_combined[i]);
>>   		ttm_pool_type_fini(&global_uncached[i]);
>>   
>> -- 
>> 2.39.2
>>
Daniel Vetter April 11, 2023, 1:45 p.m. UTC | #3
On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote:
> Am 11.04.23 um 11:51 schrieb Daniel Vetter:
> > On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote:
> > > When swapping out, we will split multi-order pages both in order to
> > > move them to the swap-cache and to be able to return memory to the
> > > swap cache as soon as possible on a page-by-page basis.
> > > Reduce the page max order to the system PMD size, as we can then be nicer
> > > to the system and avoid splitting gigantic pages.
> > > 
> > > Looking forward to when we might be able to swap out PMD size folios
> > > without splitting, this will also be a benefit.
> > > 
> > > v2:
> > > - Include all orders up to the PMD size (Christian König)
> > > v3:
> > > - Avoid compilation errors for architectures with special PFN_SHIFTs
> > > 
> > > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > > Reviewed-by: Christian König <christian.koenig@amd.com>
> > Apparently this fails on ppc build testing. Please supply build fix asap
> > (or I guess we need to revert). I'm kinda not clear why this only showed
> > up when I merged the drm-misc-next pr into drm-next ...
> 
> I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant
> on this particular platform.
> 
> But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems
> to be a constant.
> 
> So how exactly can that here break?

There's some in-flight patches to rework MAX_ORDER and other things in
linux-next, maybe it's recent? If you check out linux-next then you need
to reapply the patch (since sfr reverted it).
-Daniel

> 
> Christian.
> 
> > -Daniel
> > 
> > > ---
> > >   drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++-----------
> > >   1 file changed, 19 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> > > index dfce896c4bae..18c342a919a2 100644
> > > --- a/drivers/gpu/drm/ttm/ttm_pool.c
> > > +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> > > @@ -47,6 +47,11 @@
> > >   #include "ttm_module.h"
> > > +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
> > > +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
> > > +/* Some architectures have a weird PMD_SHIFT */
> > > +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER)
> > > +
> > >   /**
> > >    * struct ttm_pool_dma - Helper object for coherent DMA mappings
> > >    *
> > > @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644);
> > >   static atomic_long_t allocated_pages;
> > > -static struct ttm_pool_type global_write_combined[MAX_ORDER];
> > > -static struct ttm_pool_type global_uncached[MAX_ORDER];
> > > +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
> > > +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
> > > -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
> > > -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
> > > +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER];
> > > +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER];
> > >   static spinlock_t shrinker_lock;
> > >   static struct list_head shrinker_list;
> > > @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
> > >   	else
> > >   		gfp_flags |= GFP_HIGHUSER;
> > > -	for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
> > > +	for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages));
> > >   	     num_pages;
> > >   	     order = min_t(unsigned int, order, __fls(num_pages))) {
> > >   		struct ttm_pool_type *pt;
> > > @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
> > >   	if (use_dma_alloc) {
> > >   		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> > > -			for (j = 0; j < MAX_ORDER; ++j)
> > > +			for (j = 0; j < TTM_DIM_ORDER; ++j)
> > >   				ttm_pool_type_init(&pool->caching[i].orders[j],
> > >   						   pool, i, j);
> > >   	}
> > > @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
> > >   	if (pool->use_dma_alloc) {
> > >   		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> > > -			for (j = 0; j < MAX_ORDER; ++j)
> > > +			for (j = 0; j < TTM_DIM_ORDER; ++j)
> > >   				ttm_pool_type_fini(&pool->caching[i].orders[j]);
> > >   	}
> > > @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
> > >   	unsigned int i;
> > >   	seq_puts(m, "\t ");
> > > -	for (i = 0; i < MAX_ORDER; ++i)
> > > +	for (i = 0; i < TTM_DIM_ORDER; ++i)
> > >   		seq_printf(m, " ---%2u---", i);
> > >   	seq_puts(m, "\n");
> > >   }
> > > @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
> > >   {
> > >   	unsigned int i;
> > > -	for (i = 0; i < MAX_ORDER; ++i)
> > > +	for (i = 0; i < TTM_DIM_ORDER; ++i)
> > >   		seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
> > >   	seq_puts(m, "\n");
> > >   }
> > > @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages)
> > >   {
> > >   	unsigned int i;
> > > +	BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER);
> > > +	BUILD_BUG_ON(TTM_DIM_ORDER < 1);
> > > +
> > >   	if (!page_pool_size)
> > >   		page_pool_size = num_pages;
> > >   	spin_lock_init(&shrinker_lock);
> > >   	INIT_LIST_HEAD(&shrinker_list);
> > > -	for (i = 0; i < MAX_ORDER; ++i) {
> > > +	for (i = 0; i < TTM_DIM_ORDER; ++i) {
> > >   		ttm_pool_type_init(&global_write_combined[i], NULL,
> > >   				   ttm_write_combined, i);
> > >   		ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
> > > @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void)
> > >   {
> > >   	unsigned int i;
> > > -	for (i = 0; i < MAX_ORDER; ++i) {
> > > +	for (i = 0; i < TTM_DIM_ORDER; ++i) {
> > >   		ttm_pool_type_fini(&global_write_combined[i]);
> > >   		ttm_pool_type_fini(&global_uncached[i]);
> > > -- 
> > > 2.39.2
> > > 
>
Daniel Vetter April 12, 2023, 9:08 a.m. UTC | #4
On Tue, 11 Apr 2023 at 15:45, Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote:
> > Am 11.04.23 um 11:51 schrieb Daniel Vetter:
> > > On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote:
> > > > When swapping out, we will split multi-order pages both in order to
> > > > move them to the swap-cache and to be able to return memory to the
> > > > swap cache as soon as possible on a page-by-page basis.
> > > > Reduce the page max order to the system PMD size, as we can then be nicer
> > > > to the system and avoid splitting gigantic pages.
> > > >
> > > > Looking forward to when we might be able to swap out PMD size folios
> > > > without splitting, this will also be a benefit.
> > > >
> > > > v2:
> > > > - Include all orders up to the PMD size (Christian König)
> > > > v3:
> > > > - Avoid compilation errors for architectures with special PFN_SHIFTs
> > > >
> > > > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> > > > Reviewed-by: Christian König <christian.koenig@amd.com>
> > > Apparently this fails on ppc build testing. Please supply build fix asap
> > > (or I guess we need to revert). I'm kinda not clear why this only showed
> > > up when I merged the drm-misc-next pr into drm-next ...
> >
> > I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant
> > on this particular platform.
> >
> > But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems
> > to be a constant.
> >
> > So how exactly can that here break?
>
> There's some in-flight patches to rework MAX_ORDER and other things in
> linux-next, maybe it's recent? If you check out linux-next then you need
> to reapply the patch (since sfr reverted it).

So I looked and on ppc64 PMD_SHIFT is defined in terms of
PTE_INDEX_SIZE, which is defined (for book3s) in terms of the variable
__pte_index_size. This is in 6.3 already and seems pretty old.

So revert? Or fixup patch to make this work on ppc?


> > > > ---
> > > >   drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++-----------
> > > >   1 file changed, 19 insertions(+), 11 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> > > > index dfce896c4bae..18c342a919a2 100644
> > > > --- a/drivers/gpu/drm/ttm/ttm_pool.c
> > > > +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> > > > @@ -47,6 +47,11 @@
> > > >   #include "ttm_module.h"
> > > > +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
> > > > +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
> > > > +/* Some architectures have a weird PMD_SHIFT */
> > > > +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER)
> > > > +
> > > >   /**
> > > >    * struct ttm_pool_dma - Helper object for coherent DMA mappings
> > > >    *
> > > > @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644);
> > > >   static atomic_long_t allocated_pages;
> > > > -static struct ttm_pool_type global_write_combined[MAX_ORDER];
> > > > -static struct ttm_pool_type global_uncached[MAX_ORDER];
> > > > +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
> > > > +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
> > > > -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
> > > > -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
> > > > +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER];
> > > > +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER];
> > > >   static spinlock_t shrinker_lock;
> > > >   static struct list_head shrinker_list;
> > > > @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
> > > >           else
> > > >                   gfp_flags |= GFP_HIGHUSER;
> > > > - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
> > > > + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages));
> > > >                num_pages;
> > > >                order = min_t(unsigned int, order, __fls(num_pages))) {
> > > >                   struct ttm_pool_type *pt;
> > > > @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
> > > >           if (use_dma_alloc) {
> > > >                   for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> > > > -                 for (j = 0; j < MAX_ORDER; ++j)
> > > > +                 for (j = 0; j < TTM_DIM_ORDER; ++j)
> > > >                                   ttm_pool_type_init(&pool->caching[i].orders[j],
> > > >                                                      pool, i, j);
> > > >           }
> > > > @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
> > > >           if (pool->use_dma_alloc) {
> > > >                   for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> > > > -                 for (j = 0; j < MAX_ORDER; ++j)
> > > > +                 for (j = 0; j < TTM_DIM_ORDER; ++j)
> > > >                                   ttm_pool_type_fini(&pool->caching[i].orders[j]);
> > > >           }
> > > > @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
> > > >           unsigned int i;
> > > >           seq_puts(m, "\t ");
> > > > - for (i = 0; i < MAX_ORDER; ++i)
> > > > + for (i = 0; i < TTM_DIM_ORDER; ++i)
> > > >                   seq_printf(m, " ---%2u---", i);
> > > >           seq_puts(m, "\n");
> > > >   }
> > > > @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
> > > >   {
> > > >           unsigned int i;
> > > > - for (i = 0; i < MAX_ORDER; ++i)
> > > > + for (i = 0; i < TTM_DIM_ORDER; ++i)
> > > >                   seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
> > > >           seq_puts(m, "\n");
> > > >   }
> > > > @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages)
> > > >   {
> > > >           unsigned int i;
> > > > + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER);
> > > > + BUILD_BUG_ON(TTM_DIM_ORDER < 1);
> > > > +
> > > >           if (!page_pool_size)
> > > >                   page_pool_size = num_pages;
> > > >           spin_lock_init(&shrinker_lock);
> > > >           INIT_LIST_HEAD(&shrinker_list);
> > > > - for (i = 0; i < MAX_ORDER; ++i) {
> > > > + for (i = 0; i < TTM_DIM_ORDER; ++i) {
> > > >                   ttm_pool_type_init(&global_write_combined[i], NULL,
> > > >                                      ttm_write_combined, i);
> > > >                   ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
> > > > @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void)
> > > >   {
> > > >           unsigned int i;
> > > > - for (i = 0; i < MAX_ORDER; ++i) {
> > > > + for (i = 0; i < TTM_DIM_ORDER; ++i) {
> > > >                   ttm_pool_type_fini(&global_write_combined[i]);
> > > >                   ttm_pool_type_fini(&global_uncached[i]);
> > > > --
> > > > 2.39.2
> > > >
> >
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Christian König April 12, 2023, 2:17 p.m. UTC | #5
Am 12.04.23 um 11:08 schrieb Daniel Vetter:
> On Tue, 11 Apr 2023 at 15:45, Daniel Vetter <daniel@ffwll.ch> wrote:
>> On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote:
>>> Am 11.04.23 um 11:51 schrieb Daniel Vetter:
>>>> On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote:
>>>>> When swapping out, we will split multi-order pages both in order to
>>>>> move them to the swap-cache and to be able to return memory to the
>>>>> swap cache as soon as possible on a page-by-page basis.
>>>>> Reduce the page max order to the system PMD size, as we can then be nicer
>>>>> to the system and avoid splitting gigantic pages.
>>>>>
>>>>> Looking forward to when we might be able to swap out PMD size folios
>>>>> without splitting, this will also be a benefit.
>>>>>
>>>>> v2:
>>>>> - Include all orders up to the PMD size (Christian König)
>>>>> v3:
>>>>> - Avoid compilation errors for architectures with special PFN_SHIFTs
>>>>>
>>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>> Apparently this fails on ppc build testing. Please supply build fix asap
>>>> (or I guess we need to revert). I'm kinda not clear why this only showed
>>>> up when I merged the drm-misc-next pr into drm-next ...
>>> I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant
>>> on this particular platform.
>>>
>>> But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems
>>> to be a constant.
>>>
>>> So how exactly can that here break?
>> There's some in-flight patches to rework MAX_ORDER and other things in
>> linux-next, maybe it's recent? If you check out linux-next then you need
>> to reapply the patch (since sfr reverted it).
> So I looked and on ppc64 PMD_SHIFT is defined in terms of
> PTE_INDEX_SIZE, which is defined (for book3s) in terms of the variable
> __pte_index_size. This is in 6.3 already and seems pretty old.

Ah! I missed that one, thanks.

> So revert? Or fixup patch to make this work on ppc?

I think for now just revert or change it so that we check if PMD_SHIFT 
is a constant.

Thomas do you have any quick solution?

Christian.

>
>
>>>>> ---
>>>>>    drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++-----------
>>>>>    1 file changed, 19 insertions(+), 11 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>> index dfce896c4bae..18c342a919a2 100644
>>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>> @@ -47,6 +47,11 @@
>>>>>    #include "ttm_module.h"
>>>>> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
>>>>> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
>>>>> +/* Some architectures have a weird PMD_SHIFT */
>>>>> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER)
>>>>> +
>>>>>    /**
>>>>>     * struct ttm_pool_dma - Helper object for coherent DMA mappings
>>>>>     *
>>>>> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644);
>>>>>    static atomic_long_t allocated_pages;
>>>>> -static struct ttm_pool_type global_write_combined[MAX_ORDER];
>>>>> -static struct ttm_pool_type global_uncached[MAX_ORDER];
>>>>> +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
>>>>> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
>>>>> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
>>>>> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
>>>>> +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER];
>>>>> +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER];
>>>>>    static spinlock_t shrinker_lock;
>>>>>    static struct list_head shrinker_list;
>>>>> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
>>>>>            else
>>>>>                    gfp_flags |= GFP_HIGHUSER;
>>>>> - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
>>>>> + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages));
>>>>>                 num_pages;
>>>>>                 order = min_t(unsigned int, order, __fls(num_pages))) {
>>>>>                    struct ttm_pool_type *pt;
>>>>> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
>>>>>            if (use_dma_alloc) {
>>>>>                    for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>>>> -                 for (j = 0; j < MAX_ORDER; ++j)
>>>>> +                 for (j = 0; j < TTM_DIM_ORDER; ++j)
>>>>>                                    ttm_pool_type_init(&pool->caching[i].orders[j],
>>>>>                                                       pool, i, j);
>>>>>            }
>>>>> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
>>>>>            if (pool->use_dma_alloc) {
>>>>>                    for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>>>> -                 for (j = 0; j < MAX_ORDER; ++j)
>>>>> +                 for (j = 0; j < TTM_DIM_ORDER; ++j)
>>>>>                                    ttm_pool_type_fini(&pool->caching[i].orders[j]);
>>>>>            }
>>>>> @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
>>>>>            unsigned int i;
>>>>>            seq_puts(m, "\t ");
>>>>> - for (i = 0; i < MAX_ORDER; ++i)
>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i)
>>>>>                    seq_printf(m, " ---%2u---", i);
>>>>>            seq_puts(m, "\n");
>>>>>    }
>>>>> @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
>>>>>    {
>>>>>            unsigned int i;
>>>>> - for (i = 0; i < MAX_ORDER; ++i)
>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i)
>>>>>                    seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
>>>>>            seq_puts(m, "\n");
>>>>>    }
>>>>> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages)
>>>>>    {
>>>>>            unsigned int i;
>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER);
>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER < 1);
>>>>> +
>>>>>            if (!page_pool_size)
>>>>>                    page_pool_size = num_pages;
>>>>>            spin_lock_init(&shrinker_lock);
>>>>>            INIT_LIST_HEAD(&shrinker_list);
>>>>> - for (i = 0; i < MAX_ORDER; ++i) {
>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) {
>>>>>                    ttm_pool_type_init(&global_write_combined[i], NULL,
>>>>>                                       ttm_write_combined, i);
>>>>>                    ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
>>>>> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void)
>>>>>    {
>>>>>            unsigned int i;
>>>>> - for (i = 0; i < MAX_ORDER; ++i) {
>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) {
>>>>>                    ttm_pool_type_fini(&global_write_combined[i]);
>>>>>                    ttm_pool_type_fini(&global_uncached[i]);
>>>>> --
>>>>> 2.39.2
>>>>>
>> --
>> Daniel Vetter
>> Software Engineer, Intel Corporation
>> http://blog.ffwll.ch
>
>
Daniel Vetter April 13, 2023, 8:48 a.m. UTC | #6
On Wed, 12 Apr 2023 at 16:18, Christian König <christian.koenig@amd.com> wrote:
>
> Am 12.04.23 um 11:08 schrieb Daniel Vetter:
> > On Tue, 11 Apr 2023 at 15:45, Daniel Vetter <daniel@ffwll.ch> wrote:
> >> On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote:
> >>> Am 11.04.23 um 11:51 schrieb Daniel Vetter:
> >>>> On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote:
> >>>>> When swapping out, we will split multi-order pages both in order to
> >>>>> move them to the swap-cache and to be able to return memory to the
> >>>>> swap cache as soon as possible on a page-by-page basis.
> >>>>> Reduce the page max order to the system PMD size, as we can then be nicer
> >>>>> to the system and avoid splitting gigantic pages.
> >>>>>
> >>>>> Looking forward to when we might be able to swap out PMD size folios
> >>>>> without splitting, this will also be a benefit.
> >>>>>
> >>>>> v2:
> >>>>> - Include all orders up to the PMD size (Christian König)
> >>>>> v3:
> >>>>> - Avoid compilation errors for architectures with special PFN_SHIFTs
> >>>>>
> >>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> >>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
> >>>> Apparently this fails on ppc build testing. Please supply build fix asap
> >>>> (or I guess we need to revert). I'm kinda not clear why this only showed
> >>>> up when I merged the drm-misc-next pr into drm-next ...
> >>> I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant
> >>> on this particular platform.
> >>>
> >>> But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems
> >>> to be a constant.
> >>>
> >>> So how exactly can that here break?
> >> There's some in-flight patches to rework MAX_ORDER and other things in
> >> linux-next, maybe it's recent? If you check out linux-next then you need
> >> to reapply the patch (since sfr reverted it).
> > So I looked and on ppc64 PMD_SHIFT is defined in terms of
> > PTE_INDEX_SIZE, which is defined (for book3s) in terms of the variable
> > __pte_index_size. This is in 6.3 already and seems pretty old.
>
> Ah! I missed that one, thanks.
>
> > So revert? Or fixup patch to make this work on ppc?
>
> I think for now just revert or change it so that we check if PMD_SHIFT
> is a constant.
>
> Thomas do you have any quick solution?

I guess Thomas is on vacations. Can you pls do the revert and push it
to drm-misc-next-fixes so this won't get lost?

Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>

preemptively for that. Normally I think we could wait a bit more but
it's really close to merge window PR and I don't like handing too many
open things to Dave when he's back :-)
-Daniel

>
> Christian.
>
> >
> >
> >>>>> ---
> >>>>>    drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++-----------
> >>>>>    1 file changed, 19 insertions(+), 11 deletions(-)
> >>>>>
> >>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> >>>>> index dfce896c4bae..18c342a919a2 100644
> >>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> >>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> >>>>> @@ -47,6 +47,11 @@
> >>>>>    #include "ttm_module.h"
> >>>>> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
> >>>>> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
> >>>>> +/* Some architectures have a weird PMD_SHIFT */
> >>>>> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER)
> >>>>> +
> >>>>>    /**
> >>>>>     * struct ttm_pool_dma - Helper object for coherent DMA mappings
> >>>>>     *
> >>>>> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644);
> >>>>>    static atomic_long_t allocated_pages;
> >>>>> -static struct ttm_pool_type global_write_combined[MAX_ORDER];
> >>>>> -static struct ttm_pool_type global_uncached[MAX_ORDER];
> >>>>> +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
> >>>>> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
> >>>>> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
> >>>>> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
> >>>>> +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER];
> >>>>> +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER];
> >>>>>    static spinlock_t shrinker_lock;
> >>>>>    static struct list_head shrinker_list;
> >>>>> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
> >>>>>            else
> >>>>>                    gfp_flags |= GFP_HIGHUSER;
> >>>>> - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
> >>>>> + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages));
> >>>>>                 num_pages;
> >>>>>                 order = min_t(unsigned int, order, __fls(num_pages))) {
> >>>>>                    struct ttm_pool_type *pt;
> >>>>> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
> >>>>>            if (use_dma_alloc) {
> >>>>>                    for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> >>>>> -                 for (j = 0; j < MAX_ORDER; ++j)
> >>>>> +                 for (j = 0; j < TTM_DIM_ORDER; ++j)
> >>>>>                                    ttm_pool_type_init(&pool->caching[i].orders[j],
> >>>>>                                                       pool, i, j);
> >>>>>            }
> >>>>> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
> >>>>>            if (pool->use_dma_alloc) {
> >>>>>                    for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> >>>>> -                 for (j = 0; j < MAX_ORDER; ++j)
> >>>>> +                 for (j = 0; j < TTM_DIM_ORDER; ++j)
> >>>>>                                    ttm_pool_type_fini(&pool->caching[i].orders[j]);
> >>>>>            }
> >>>>> @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
> >>>>>            unsigned int i;
> >>>>>            seq_puts(m, "\t ");
> >>>>> - for (i = 0; i < MAX_ORDER; ++i)
> >>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i)
> >>>>>                    seq_printf(m, " ---%2u---", i);
> >>>>>            seq_puts(m, "\n");
> >>>>>    }
> >>>>> @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
> >>>>>    {
> >>>>>            unsigned int i;
> >>>>> - for (i = 0; i < MAX_ORDER; ++i)
> >>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i)
> >>>>>                    seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
> >>>>>            seq_puts(m, "\n");
> >>>>>    }
> >>>>> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages)
> >>>>>    {
> >>>>>            unsigned int i;
> >>>>> + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER);
> >>>>> + BUILD_BUG_ON(TTM_DIM_ORDER < 1);
> >>>>> +
> >>>>>            if (!page_pool_size)
> >>>>>                    page_pool_size = num_pages;
> >>>>>            spin_lock_init(&shrinker_lock);
> >>>>>            INIT_LIST_HEAD(&shrinker_list);
> >>>>> - for (i = 0; i < MAX_ORDER; ++i) {
> >>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) {
> >>>>>                    ttm_pool_type_init(&global_write_combined[i], NULL,
> >>>>>                                       ttm_write_combined, i);
> >>>>>                    ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
> >>>>> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void)
> >>>>>    {
> >>>>>            unsigned int i;
> >>>>> - for (i = 0; i < MAX_ORDER; ++i) {
> >>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) {
> >>>>>                    ttm_pool_type_fini(&global_write_combined[i]);
> >>>>>                    ttm_pool_type_fini(&global_uncached[i]);
> >>>>> --
> >>>>> 2.39.2
> >>>>>
> >> --
> >> Daniel Vetter
> >> Software Engineer, Intel Corporation
> >> http://blog.ffwll.ch
> >
> >
>
Christian König April 13, 2023, 9:45 a.m. UTC | #7
Am 13.04.23 um 10:48 schrieb Daniel Vetter:
> On Wed, 12 Apr 2023 at 16:18, Christian König <christian.koenig@amd.com> wrote:
>> Am 12.04.23 um 11:08 schrieb Daniel Vetter:
>>> On Tue, 11 Apr 2023 at 15:45, Daniel Vetter <daniel@ffwll.ch> wrote:
>>>> On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote:
>>>>> Am 11.04.23 um 11:51 schrieb Daniel Vetter:
>>>>>> On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote:
>>>>>>> When swapping out, we will split multi-order pages both in order to
>>>>>>> move them to the swap-cache and to be able to return memory to the
>>>>>>> swap cache as soon as possible on a page-by-page basis.
>>>>>>> Reduce the page max order to the system PMD size, as we can then be nicer
>>>>>>> to the system and avoid splitting gigantic pages.
>>>>>>>
>>>>>>> Looking forward to when we might be able to swap out PMD size folios
>>>>>>> without splitting, this will also be a benefit.
>>>>>>>
>>>>>>> v2:
>>>>>>> - Include all orders up to the PMD size (Christian König)
>>>>>>> v3:
>>>>>>> - Avoid compilation errors for architectures with special PFN_SHIFTs
>>>>>>>
>>>>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>>>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>>> Apparently this fails on ppc build testing. Please supply build fix asap
>>>>>> (or I guess we need to revert). I'm kinda not clear why this only showed
>>>>>> up when I merged the drm-misc-next pr into drm-next ...
>>>>> I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant
>>>>> on this particular platform.
>>>>>
>>>>> But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems
>>>>> to be a constant.
>>>>>
>>>>> So how exactly can that here break?
>>>> There's some in-flight patches to rework MAX_ORDER and other things in
>>>> linux-next, maybe it's recent? If you check out linux-next then you need
>>>> to reapply the patch (since sfr reverted it).
>>> So I looked and on ppc64 PMD_SHIFT is defined in terms of
>>> PTE_INDEX_SIZE, which is defined (for book3s) in terms of the variable
>>> __pte_index_size. This is in 6.3 already and seems pretty old.
>> Ah! I missed that one, thanks.
>>
>>> So revert? Or fixup patch to make this work on ppc?
>> I think for now just revert or change it so that we check if PMD_SHIFT
>> is a constant.
>>
>> Thomas do you have any quick solution?
> I guess Thomas is on vacations. Can you pls do the revert and push it
> to drm-misc-next-fixes so this won't get lost?

The offending patch hasn't showed up in drm-misc-next-fixes nor 
drm-misc-fixes yet. Looks like the branches are lacking behind.

I can revert it on drm-misc-next, but I', not 100% sure that will then 
get picked up in time.

Christian.

>
> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>
> preemptively for that. Normally I think we could wait a bit more but
> it's really close to merge window PR and I don't like handing too many
> open things to Dave when he's back :-)
> -Daniel
>
>> Christian.
>>
>>>
>>>>>>> ---
>>>>>>>     drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++-----------
>>>>>>>     1 file changed, 19 insertions(+), 11 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>>> index dfce896c4bae..18c342a919a2 100644
>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>>> @@ -47,6 +47,11 @@
>>>>>>>     #include "ttm_module.h"
>>>>>>> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
>>>>>>> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
>>>>>>> +/* Some architectures have a weird PMD_SHIFT */
>>>>>>> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER)
>>>>>>> +
>>>>>>>     /**
>>>>>>>      * struct ttm_pool_dma - Helper object for coherent DMA mappings
>>>>>>>      *
>>>>>>> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644);
>>>>>>>     static atomic_long_t allocated_pages;
>>>>>>> -static struct ttm_pool_type global_write_combined[MAX_ORDER];
>>>>>>> -static struct ttm_pool_type global_uncached[MAX_ORDER];
>>>>>>> +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
>>>>>>> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
>>>>>>> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
>>>>>>> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
>>>>>>> +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER];
>>>>>>> +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER];
>>>>>>>     static spinlock_t shrinker_lock;
>>>>>>>     static struct list_head shrinker_list;
>>>>>>> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
>>>>>>>             else
>>>>>>>                     gfp_flags |= GFP_HIGHUSER;
>>>>>>> - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
>>>>>>> + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages));
>>>>>>>                  num_pages;
>>>>>>>                  order = min_t(unsigned int, order, __fls(num_pages))) {
>>>>>>>                     struct ttm_pool_type *pt;
>>>>>>> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
>>>>>>>             if (use_dma_alloc) {
>>>>>>>                     for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>>>>>> -                 for (j = 0; j < MAX_ORDER; ++j)
>>>>>>> +                 for (j = 0; j < TTM_DIM_ORDER; ++j)
>>>>>>>                                     ttm_pool_type_init(&pool->caching[i].orders[j],
>>>>>>>                                                        pool, i, j);
>>>>>>>             }
>>>>>>> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
>>>>>>>             if (pool->use_dma_alloc) {
>>>>>>>                     for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>>>>>> -                 for (j = 0; j < MAX_ORDER; ++j)
>>>>>>> +                 for (j = 0; j < TTM_DIM_ORDER; ++j)
>>>>>>>                                     ttm_pool_type_fini(&pool->caching[i].orders[j]);
>>>>>>>             }
>>>>>>> @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
>>>>>>>             unsigned int i;
>>>>>>>             seq_puts(m, "\t ");
>>>>>>> - for (i = 0; i < MAX_ORDER; ++i)
>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i)
>>>>>>>                     seq_printf(m, " ---%2u---", i);
>>>>>>>             seq_puts(m, "\n");
>>>>>>>     }
>>>>>>> @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
>>>>>>>     {
>>>>>>>             unsigned int i;
>>>>>>> - for (i = 0; i < MAX_ORDER; ++i)
>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i)
>>>>>>>                     seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
>>>>>>>             seq_puts(m, "\n");
>>>>>>>     }
>>>>>>> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages)
>>>>>>>     {
>>>>>>>             unsigned int i;
>>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER);
>>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER < 1);
>>>>>>> +
>>>>>>>             if (!page_pool_size)
>>>>>>>                     page_pool_size = num_pages;
>>>>>>>             spin_lock_init(&shrinker_lock);
>>>>>>>             INIT_LIST_HEAD(&shrinker_list);
>>>>>>> - for (i = 0; i < MAX_ORDER; ++i) {
>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) {
>>>>>>>                     ttm_pool_type_init(&global_write_combined[i], NULL,
>>>>>>>                                        ttm_write_combined, i);
>>>>>>>                     ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
>>>>>>> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void)
>>>>>>>     {
>>>>>>>             unsigned int i;
>>>>>>> - for (i = 0; i < MAX_ORDER; ++i) {
>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) {
>>>>>>>                     ttm_pool_type_fini(&global_write_combined[i]);
>>>>>>>                     ttm_pool_type_fini(&global_uncached[i]);
>>>>>>> --
>>>>>>> 2.39.2
>>>>>>>
>>>> --
>>>> Daniel Vetter
>>>> Software Engineer, Intel Corporation
>>>> http://blog.ffwll.ch
>>>
>
Daniel Vetter April 13, 2023, 1:13 p.m. UTC | #8
On Thu, 13 Apr 2023 at 11:46, Christian König <christian.koenig@amd.com> wrote:
>
> Am 13.04.23 um 10:48 schrieb Daniel Vetter:
> > On Wed, 12 Apr 2023 at 16:18, Christian König <christian.koenig@amd.com> wrote:
> >> Am 12.04.23 um 11:08 schrieb Daniel Vetter:
> >>> On Tue, 11 Apr 2023 at 15:45, Daniel Vetter <daniel@ffwll.ch> wrote:
> >>>> On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote:
> >>>>> Am 11.04.23 um 11:51 schrieb Daniel Vetter:
> >>>>>> On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote:
> >>>>>>> When swapping out, we will split multi-order pages both in order to
> >>>>>>> move them to the swap-cache and to be able to return memory to the
> >>>>>>> swap cache as soon as possible on a page-by-page basis.
> >>>>>>> Reduce the page max order to the system PMD size, as we can then be nicer
> >>>>>>> to the system and avoid splitting gigantic pages.
> >>>>>>>
> >>>>>>> Looking forward to when we might be able to swap out PMD size folios
> >>>>>>> without splitting, this will also be a benefit.
> >>>>>>>
> >>>>>>> v2:
> >>>>>>> - Include all orders up to the PMD size (Christian König)
> >>>>>>> v3:
> >>>>>>> - Avoid compilation errors for architectures with special PFN_SHIFTs
> >>>>>>>
> >>>>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
> >>>>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
> >>>>>> Apparently this fails on ppc build testing. Please supply build fix asap
> >>>>>> (or I guess we need to revert). I'm kinda not clear why this only showed
> >>>>>> up when I merged the drm-misc-next pr into drm-next ...
> >>>>> I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant
> >>>>> on this particular platform.
> >>>>>
> >>>>> But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems
> >>>>> to be a constant.
> >>>>>
> >>>>> So how exactly can that here break?
> >>>> There's some in-flight patches to rework MAX_ORDER and other things in
> >>>> linux-next, maybe it's recent? If you check out linux-next then you need
> >>>> to reapply the patch (since sfr reverted it).
> >>> So I looked and on ppc64 PMD_SHIFT is defined in terms of
> >>> PTE_INDEX_SIZE, which is defined (for book3s) in terms of the variable
> >>> __pte_index_size. This is in 6.3 already and seems pretty old.
> >> Ah! I missed that one, thanks.
> >>
> >>> So revert? Or fixup patch to make this work on ppc?
> >> I think for now just revert or change it so that we check if PMD_SHIFT
> >> is a constant.
> >>
> >> Thomas do you have any quick solution?
> > I guess Thomas is on vacations. Can you pls do the revert and push it
> > to drm-misc-next-fixes so this won't get lost?
>
> The offending patch hasn't showed up in drm-misc-next-fixes nor
> drm-misc-fixes yet. Looks like the branches are lacking behind.
>
> I can revert it on drm-misc-next, but I', not 100% sure that will then
> get picked up in time.

It's there now, Maarten forwarded drm-misc-next-fixes this morning.
That's why I pinged here again, trees are ready to land the revert :-)
-Daniel

>
> Christian.
>
> >
> > Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> >
> > preemptively for that. Normally I think we could wait a bit more but
> > it's really close to merge window PR and I don't like handing too many
> > open things to Dave when he's back :-)
> > -Daniel
> >
> >> Christian.
> >>
> >>>
> >>>>>>> ---
> >>>>>>>     drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++-----------
> >>>>>>>     1 file changed, 19 insertions(+), 11 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> >>>>>>> index dfce896c4bae..18c342a919a2 100644
> >>>>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> >>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> >>>>>>> @@ -47,6 +47,11 @@
> >>>>>>>     #include "ttm_module.h"
> >>>>>>> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
> >>>>>>> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
> >>>>>>> +/* Some architectures have a weird PMD_SHIFT */
> >>>>>>> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER)
> >>>>>>> +
> >>>>>>>     /**
> >>>>>>>      * struct ttm_pool_dma - Helper object for coherent DMA mappings
> >>>>>>>      *
> >>>>>>> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644);
> >>>>>>>     static atomic_long_t allocated_pages;
> >>>>>>> -static struct ttm_pool_type global_write_combined[MAX_ORDER];
> >>>>>>> -static struct ttm_pool_type global_uncached[MAX_ORDER];
> >>>>>>> +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
> >>>>>>> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
> >>>>>>> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
> >>>>>>> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
> >>>>>>> +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER];
> >>>>>>> +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER];
> >>>>>>>     static spinlock_t shrinker_lock;
> >>>>>>>     static struct list_head shrinker_list;
> >>>>>>> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
> >>>>>>>             else
> >>>>>>>                     gfp_flags |= GFP_HIGHUSER;
> >>>>>>> - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
> >>>>>>> + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages));
> >>>>>>>                  num_pages;
> >>>>>>>                  order = min_t(unsigned int, order, __fls(num_pages))) {
> >>>>>>>                     struct ttm_pool_type *pt;
> >>>>>>> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
> >>>>>>>             if (use_dma_alloc) {
> >>>>>>>                     for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> >>>>>>> -                 for (j = 0; j < MAX_ORDER; ++j)
> >>>>>>> +                 for (j = 0; j < TTM_DIM_ORDER; ++j)
> >>>>>>>                                     ttm_pool_type_init(&pool->caching[i].orders[j],
> >>>>>>>                                                        pool, i, j);
> >>>>>>>             }
> >>>>>>> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
> >>>>>>>             if (pool->use_dma_alloc) {
> >>>>>>>                     for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> >>>>>>> -                 for (j = 0; j < MAX_ORDER; ++j)
> >>>>>>> +                 for (j = 0; j < TTM_DIM_ORDER; ++j)
> >>>>>>>                                     ttm_pool_type_fini(&pool->caching[i].orders[j]);
> >>>>>>>             }
> >>>>>>> @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
> >>>>>>>             unsigned int i;
> >>>>>>>             seq_puts(m, "\t ");
> >>>>>>> - for (i = 0; i < MAX_ORDER; ++i)
> >>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i)
> >>>>>>>                     seq_printf(m, " ---%2u---", i);
> >>>>>>>             seq_puts(m, "\n");
> >>>>>>>     }
> >>>>>>> @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
> >>>>>>>     {
> >>>>>>>             unsigned int i;
> >>>>>>> - for (i = 0; i < MAX_ORDER; ++i)
> >>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i)
> >>>>>>>                     seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
> >>>>>>>             seq_puts(m, "\n");
> >>>>>>>     }
> >>>>>>> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages)
> >>>>>>>     {
> >>>>>>>             unsigned int i;
> >>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER);
> >>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER < 1);
> >>>>>>> +
> >>>>>>>             if (!page_pool_size)
> >>>>>>>                     page_pool_size = num_pages;
> >>>>>>>             spin_lock_init(&shrinker_lock);
> >>>>>>>             INIT_LIST_HEAD(&shrinker_list);
> >>>>>>> - for (i = 0; i < MAX_ORDER; ++i) {
> >>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) {
> >>>>>>>                     ttm_pool_type_init(&global_write_combined[i], NULL,
> >>>>>>>                                        ttm_write_combined, i);
> >>>>>>>                     ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
> >>>>>>> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void)
> >>>>>>>     {
> >>>>>>>             unsigned int i;
> >>>>>>> - for (i = 0; i < MAX_ORDER; ++i) {
> >>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) {
> >>>>>>>                     ttm_pool_type_fini(&global_write_combined[i]);
> >>>>>>>                     ttm_pool_type_fini(&global_uncached[i]);
> >>>>>>> --
> >>>>>>> 2.39.2
> >>>>>>>
> >>>> --
> >>>> Daniel Vetter
> >>>> Software Engineer, Intel Corporation
> >>>> http://blog.ffwll.ch
> >>>
> >
>
Christian König April 14, 2023, 10:11 a.m. UTC | #9
Am 13.04.23 um 15:13 schrieb Daniel Vetter:
> On Thu, 13 Apr 2023 at 11:46, Christian König <christian.koenig@amd.com> wrote:
>> Am 13.04.23 um 10:48 schrieb Daniel Vetter:
>>> On Wed, 12 Apr 2023 at 16:18, Christian König <christian.koenig@amd.com> wrote:
>>>> Am 12.04.23 um 11:08 schrieb Daniel Vetter:
>>>>> On Tue, 11 Apr 2023 at 15:45, Daniel Vetter <daniel@ffwll.ch> wrote:
>>>>>> On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote:
>>>>>>> Am 11.04.23 um 11:51 schrieb Daniel Vetter:
>>>>>>>> On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote:
>>>>>>>>> When swapping out, we will split multi-order pages both in order to
>>>>>>>>> move them to the swap-cache and to be able to return memory to the
>>>>>>>>> swap cache as soon as possible on a page-by-page basis.
>>>>>>>>> Reduce the page max order to the system PMD size, as we can then be nicer
>>>>>>>>> to the system and avoid splitting gigantic pages.
>>>>>>>>>
>>>>>>>>> Looking forward to when we might be able to swap out PMD size folios
>>>>>>>>> without splitting, this will also be a benefit.
>>>>>>>>>
>>>>>>>>> v2:
>>>>>>>>> - Include all orders up to the PMD size (Christian König)
>>>>>>>>> v3:
>>>>>>>>> - Avoid compilation errors for architectures with special PFN_SHIFTs
>>>>>>>>>
>>>>>>>>> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
>>>>>>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>>>>> Apparently this fails on ppc build testing. Please supply build fix asap
>>>>>>>> (or I guess we need to revert). I'm kinda not clear why this only showed
>>>>>>>> up when I merged the drm-misc-next pr into drm-next ...
>>>>>>> I'm really wondering this as well. It looks like PMD_SHIFT isn't a constant
>>>>>>> on this particular platform.
>>>>>>>
>>>>>>> But from what I can find in the upstream 6.2 kernel PMD_SHIFT always seems
>>>>>>> to be a constant.
>>>>>>>
>>>>>>> So how exactly can that here break?
>>>>>> There's some in-flight patches to rework MAX_ORDER and other things in
>>>>>> linux-next, maybe it's recent? If you check out linux-next then you need
>>>>>> to reapply the patch (since sfr reverted it).
>>>>> So I looked and on ppc64 PMD_SHIFT is defined in terms of
>>>>> PTE_INDEX_SIZE, which is defined (for book3s) in terms of the variable
>>>>> __pte_index_size. This is in 6.3 already and seems pretty old.
>>>> Ah! I missed that one, thanks.
>>>>
>>>>> So revert? Or fixup patch to make this work on ppc?
>>>> I think for now just revert or change it so that we check if PMD_SHIFT
>>>> is a constant.
>>>>
>>>> Thomas do you have any quick solution?
>>> I guess Thomas is on vacations. Can you pls do the revert and push it
>>> to drm-misc-next-fixes so this won't get lost?
>> The offending patch hasn't showed up in drm-misc-next-fixes nor
>> drm-misc-fixes yet. Looks like the branches are lacking behind.
>>
>> I can revert it on drm-misc-next, but I', not 100% sure that will then
>> get picked up in time.
> It's there now, Maarten forwarded drm-misc-next-fixes this morning.
> That's why I pinged here again, trees are ready to land the revert :-)

Just pushed it.

Christian.

> -Daniel
>
>> Christian.
>>
>>> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>
>>> preemptively for that. Normally I think we could wait a bit more but
>>> it's really close to merge window PR and I don't like handing too many
>>> open things to Dave when he's back :-)
>>> -Daniel
>>>
>>>> Christian.
>>>>
>>>>>>>>> ---
>>>>>>>>>      drivers/gpu/drm/ttm/ttm_pool.c | 30 +++++++++++++++++++-----------
>>>>>>>>>      1 file changed, 19 insertions(+), 11 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>>>>> index dfce896c4bae..18c342a919a2 100644
>>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>>>>> @@ -47,6 +47,11 @@
>>>>>>>>>      #include "ttm_module.h"
>>>>>>>>> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
>>>>>>>>> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
>>>>>>>>> +/* Some architectures have a weird PMD_SHIFT */
>>>>>>>>> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER)
>>>>>>>>> +
>>>>>>>>>      /**
>>>>>>>>>       * struct ttm_pool_dma - Helper object for coherent DMA mappings
>>>>>>>>>       *
>>>>>>>>> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644);
>>>>>>>>>      static atomic_long_t allocated_pages;
>>>>>>>>> -static struct ttm_pool_type global_write_combined[MAX_ORDER];
>>>>>>>>> -static struct ttm_pool_type global_uncached[MAX_ORDER];
>>>>>>>>> +static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
>>>>>>>>> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
>>>>>>>>> -static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
>>>>>>>>> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
>>>>>>>>> +static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER];
>>>>>>>>> +static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER];
>>>>>>>>>      static spinlock_t shrinker_lock;
>>>>>>>>>      static struct list_head shrinker_list;
>>>>>>>>> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
>>>>>>>>>              else
>>>>>>>>>                      gfp_flags |= GFP_HIGHUSER;
>>>>>>>>> - for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
>>>>>>>>> + for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages));
>>>>>>>>>                   num_pages;
>>>>>>>>>                   order = min_t(unsigned int, order, __fls(num_pages))) {
>>>>>>>>>                      struct ttm_pool_type *pt;
>>>>>>>>> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
>>>>>>>>>              if (use_dma_alloc) {
>>>>>>>>>                      for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>>>>>>>> -                 for (j = 0; j < MAX_ORDER; ++j)
>>>>>>>>> +                 for (j = 0; j < TTM_DIM_ORDER; ++j)
>>>>>>>>>                                      ttm_pool_type_init(&pool->caching[i].orders[j],
>>>>>>>>>                                                         pool, i, j);
>>>>>>>>>              }
>>>>>>>>> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
>>>>>>>>>              if (pool->use_dma_alloc) {
>>>>>>>>>                      for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>>>>>>>> -                 for (j = 0; j < MAX_ORDER; ++j)
>>>>>>>>> +                 for (j = 0; j < TTM_DIM_ORDER; ++j)
>>>>>>>>>                                      ttm_pool_type_fini(&pool->caching[i].orders[j]);
>>>>>>>>>              }
>>>>>>>>> @@ -637,7 +642,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
>>>>>>>>>              unsigned int i;
>>>>>>>>>              seq_puts(m, "\t ");
>>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i)
>>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i)
>>>>>>>>>                      seq_printf(m, " ---%2u---", i);
>>>>>>>>>              seq_puts(m, "\n");
>>>>>>>>>      }
>>>>>>>>> @@ -648,7 +653,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
>>>>>>>>>      {
>>>>>>>>>              unsigned int i;
>>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i)
>>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i)
>>>>>>>>>                      seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
>>>>>>>>>              seq_puts(m, "\n");
>>>>>>>>>      }
>>>>>>>>> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long num_pages)
>>>>>>>>>      {
>>>>>>>>>              unsigned int i;
>>>>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER);
>>>>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER < 1);
>>>>>>>>> +
>>>>>>>>>              if (!page_pool_size)
>>>>>>>>>                      page_pool_size = num_pages;
>>>>>>>>>              spin_lock_init(&shrinker_lock);
>>>>>>>>>              INIT_LIST_HEAD(&shrinker_list);
>>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i) {
>>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) {
>>>>>>>>>                      ttm_pool_type_init(&global_write_combined[i], NULL,
>>>>>>>>>                                         ttm_write_combined, i);
>>>>>>>>>                      ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
>>>>>>>>> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void)
>>>>>>>>>      {
>>>>>>>>>              unsigned int i;
>>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i) {
>>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) {
>>>>>>>>>                      ttm_pool_type_fini(&global_write_combined[i]);
>>>>>>>>>                      ttm_pool_type_fini(&global_uncached[i]);
>>>>>>>>> --
>>>>>>>>> 2.39.2
>>>>>>>>>
>>>>>> --
>>>>>> Daniel Vetter
>>>>>> Software Engineer, Intel Corporation
>>>>>> http://blog.ffwll.ch
>
Thomas Hellstrom April 17, 2023, 8:02 a.m. UTC | #10
Hi

On 4/14/23 12:11, Christian König wrote:
> Am 13.04.23 um 15:13 schrieb Daniel Vetter:
>> On Thu, 13 Apr 2023 at 11:46, Christian König 
>> <christian.koenig@amd.com> wrote:
>>> Am 13.04.23 um 10:48 schrieb Daniel Vetter:
>>>> On Wed, 12 Apr 2023 at 16:18, Christian König 
>>>> <christian.koenig@amd.com> wrote:
>>>>> Am 12.04.23 um 11:08 schrieb Daniel Vetter:
>>>>>> On Tue, 11 Apr 2023 at 15:45, Daniel Vetter <daniel@ffwll.ch> wrote:
>>>>>>> On Tue, Apr 11, 2023 at 02:11:18PM +0200, Christian König wrote:
>>>>>>>> Am 11.04.23 um 11:51 schrieb Daniel Vetter:
>>>>>>>>> On Tue, Apr 04, 2023 at 10:06:49PM +0200, Thomas Hellström wrote:
>>>>>>>>>> When swapping out, we will split multi-order pages both in 
>>>>>>>>>> order to
>>>>>>>>>> move them to the swap-cache and to be able to return memory 
>>>>>>>>>> to the
>>>>>>>>>> swap cache as soon as possible on a page-by-page basis.
>>>>>>>>>> Reduce the page max order to the system PMD size, as we can 
>>>>>>>>>> then be nicer
>>>>>>>>>> to the system and avoid splitting gigantic pages.
>>>>>>>>>>
>>>>>>>>>> Looking forward to when we might be able to swap out PMD size 
>>>>>>>>>> folios
>>>>>>>>>> without splitting, this will also be a benefit.
>>>>>>>>>>
>>>>>>>>>> v2:
>>>>>>>>>> - Include all orders up to the PMD size (Christian König)
>>>>>>>>>> v3:
>>>>>>>>>> - Avoid compilation errors for architectures with special 
>>>>>>>>>> PFN_SHIFTs
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Thomas Hellström 
>>>>>>>>>> <thomas.hellstrom@linux.intel.com>
>>>>>>>>>> Reviewed-by: Christian König <christian.koenig@amd.com>
>>>>>>>>> Apparently this fails on ppc build testing. Please supply 
>>>>>>>>> build fix asap
>>>>>>>>> (or I guess we need to revert). I'm kinda not clear why this 
>>>>>>>>> only showed
>>>>>>>>> up when I merged the drm-misc-next pr into drm-next ...
>>>>>>>> I'm really wondering this as well. It looks like PMD_SHIFT 
>>>>>>>> isn't a constant
>>>>>>>> on this particular platform.
>>>>>>>>
>>>>>>>> But from what I can find in the upstream 6.2 kernel PMD_SHIFT 
>>>>>>>> always seems
>>>>>>>> to be a constant.
>>>>>>>>
>>>>>>>> So how exactly can that here break?
>>>>>>> There's some in-flight patches to rework MAX_ORDER and other 
>>>>>>> things in
>>>>>>> linux-next, maybe it's recent? If you check out linux-next then 
>>>>>>> you need
>>>>>>> to reapply the patch (since sfr reverted it).
>>>>>> So I looked and on ppc64 PMD_SHIFT is defined in terms of
>>>>>> PTE_INDEX_SIZE, which is defined (for book3s) in terms of the 
>>>>>> variable
>>>>>> __pte_index_size. This is in 6.3 already and seems pretty old.
>>>>> Ah! I missed that one, thanks.
>>>>>
>>>>>> So revert? Or fixup patch to make this work on ppc?
>>>>> I think for now just revert or change it so that we check if 
>>>>> PMD_SHIFT
>>>>> is a constant.
>>>>>
>>>>> Thomas do you have any quick solution?
>>>> I guess Thomas is on vacations. Can you pls do the revert and push it
>>>> to drm-misc-next-fixes so this won't get lost?
>>> The offending patch hasn't showed up in drm-misc-next-fixes nor
>>> drm-misc-fixes yet. Looks like the branches are lacking behind.
>>>
>>> I can revert it on drm-misc-next, but I', not 100% sure that will then
>>> get picked up in time.
>> It's there now, Maarten forwarded drm-misc-next-fixes this morning.
>> That's why I pinged here again, trees are ready to land the revert :-)
>
> Just pushed it.
>
> Christian.

Thanks for fixing this. (I was on vacation). I got a "BUILD SUCCESS" for 
this series based on drm-misc-next so I didn't think anything weird 
would show up.

Thanks,

Thomas

>
>> -Daniel
>>
>>> Christian.
>>>
>>>> Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>>
>>>> preemptively for that. Normally I think we could wait a bit more but
>>>> it's really close to merge window PR and I don't like handing too many
>>>> open things to Dave when he's back :-)
>>>> -Daniel
>>>>
>>>>> Christian.
>>>>>
>>>>>>>>>> ---
>>>>>>>>>>      drivers/gpu/drm/ttm/ttm_pool.c | 30 
>>>>>>>>>> +++++++++++++++++++-----------
>>>>>>>>>>      1 file changed, 19 insertions(+), 11 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c 
>>>>>>>>>> b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>>>>>> index dfce896c4bae..18c342a919a2 100644
>>>>>>>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>>>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>>>>>>>>>> @@ -47,6 +47,11 @@
>>>>>>>>>>      #include "ttm_module.h"
>>>>>>>>>> +#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
>>>>>>>>>> +#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
>>>>>>>>>> +/* Some architectures have a weird PMD_SHIFT */
>>>>>>>>>> +#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? 
>>>>>>>>>> __TTM_DIM_ORDER : MAX_ORDER)
>>>>>>>>>> +
>>>>>>>>>>      /**
>>>>>>>>>>       * struct ttm_pool_dma - Helper object for coherent DMA 
>>>>>>>>>> mappings
>>>>>>>>>>       *
>>>>>>>>>> @@ -65,11 +70,11 @@ module_param(page_pool_size, ulong, 0644);
>>>>>>>>>>      static atomic_long_t allocated_pages;
>>>>>>>>>> -static struct ttm_pool_type global_write_combined[MAX_ORDER];
>>>>>>>>>> -static struct ttm_pool_type global_uncached[MAX_ORDER];
>>>>>>>>>> +static struct ttm_pool_type 
>>>>>>>>>> global_write_combined[TTM_DIM_ORDER];
>>>>>>>>>> +static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
>>>>>>>>>> -static struct ttm_pool_type 
>>>>>>>>>> global_dma32_write_combined[MAX_ORDER];
>>>>>>>>>> -static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
>>>>>>>>>> +static struct ttm_pool_type 
>>>>>>>>>> global_dma32_write_combined[TTM_DIM_ORDER];
>>>>>>>>>> +static struct ttm_pool_type 
>>>>>>>>>> global_dma32_uncached[TTM_DIM_ORDER];
>>>>>>>>>>      static spinlock_t shrinker_lock;
>>>>>>>>>>      static struct list_head shrinker_list;
>>>>>>>>>> @@ -444,7 +449,7 @@ int ttm_pool_alloc(struct ttm_pool *pool, 
>>>>>>>>>> struct ttm_tt *tt,
>>>>>>>>>>              else
>>>>>>>>>>                      gfp_flags |= GFP_HIGHUSER;
>>>>>>>>>> - for (order = min_t(unsigned int, MAX_ORDER - 1, 
>>>>>>>>>> __fls(num_pages));
>>>>>>>>>> + for (order = min_t(unsigned int, TTM_MAX_ORDER, 
>>>>>>>>>> __fls(num_pages));
>>>>>>>>>>                   num_pages;
>>>>>>>>>>                   order = min_t(unsigned int, order, 
>>>>>>>>>> __fls(num_pages))) {
>>>>>>>>>>                      struct ttm_pool_type *pt;
>>>>>>>>>> @@ -563,7 +568,7 @@ void ttm_pool_init(struct ttm_pool *pool, 
>>>>>>>>>> struct device *dev,
>>>>>>>>>>              if (use_dma_alloc) {
>>>>>>>>>>                      for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>>>>>>>>> -                 for (j = 0; j < MAX_ORDER; ++j)
>>>>>>>>>> +                 for (j = 0; j < TTM_DIM_ORDER; ++j)
>>>>>>>>>> ttm_pool_type_init(&pool->caching[i].orders[j],
>>>>>>>>>>                                                         pool, 
>>>>>>>>>> i, j);
>>>>>>>>>>              }
>>>>>>>>>> @@ -583,7 +588,7 @@ void ttm_pool_fini(struct ttm_pool *pool)
>>>>>>>>>>              if (pool->use_dma_alloc) {
>>>>>>>>>>                      for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>>>>>>>>> -                 for (j = 0; j < MAX_ORDER; ++j)
>>>>>>>>>> +                 for (j = 0; j < TTM_DIM_ORDER; ++j)
>>>>>>>>>> ttm_pool_type_fini(&pool->caching[i].orders[j]);
>>>>>>>>>>              }
>>>>>>>>>> @@ -637,7 +642,7 @@ static void 
>>>>>>>>>> ttm_pool_debugfs_header(struct seq_file *m)
>>>>>>>>>>              unsigned int i;
>>>>>>>>>>              seq_puts(m, "\t ");
>>>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i)
>>>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i)
>>>>>>>>>>                      seq_printf(m, " ---%2u---", i);
>>>>>>>>>>              seq_puts(m, "\n");
>>>>>>>>>>      }
>>>>>>>>>> @@ -648,7 +653,7 @@ static void 
>>>>>>>>>> ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
>>>>>>>>>>      {
>>>>>>>>>>              unsigned int i;
>>>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i)
>>>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i)
>>>>>>>>>>                      seq_printf(m, " %8u", 
>>>>>>>>>> ttm_pool_type_count(&pt[i]));
>>>>>>>>>>              seq_puts(m, "\n");
>>>>>>>>>>      }
>>>>>>>>>> @@ -751,13 +756,16 @@ int ttm_pool_mgr_init(unsigned long 
>>>>>>>>>> num_pages)
>>>>>>>>>>      {
>>>>>>>>>>              unsigned int i;
>>>>>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER);
>>>>>>>>>> + BUILD_BUG_ON(TTM_DIM_ORDER < 1);
>>>>>>>>>> +
>>>>>>>>>>              if (!page_pool_size)
>>>>>>>>>>                      page_pool_size = num_pages;
>>>>>>>>>>              spin_lock_init(&shrinker_lock);
>>>>>>>>>>              INIT_LIST_HEAD(&shrinker_list);
>>>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i) {
>>>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) {
>>>>>>>>>> ttm_pool_type_init(&global_write_combined[i], NULL,
>>>>>>>>>> ttm_write_combined, i);
>>>>>>>>>> ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
>>>>>>>>>> @@ -790,7 +798,7 @@ void ttm_pool_mgr_fini(void)
>>>>>>>>>>      {
>>>>>>>>>>              unsigned int i;
>>>>>>>>>> - for (i = 0; i < MAX_ORDER; ++i) {
>>>>>>>>>> + for (i = 0; i < TTM_DIM_ORDER; ++i) {
>>>>>>>>>> ttm_pool_type_fini(&global_write_combined[i]);
>>>>>>>>>> ttm_pool_type_fini(&global_uncached[i]);
>>>>>>>>>> -- 
>>>>>>>>>> 2.39.2
>>>>>>>>>>
>>>>>>> -- 
>>>>>>> Daniel Vetter
>>>>>>> Software Engineer, Intel Corporation
>>>>>>> http://blog.ffwll.ch
>>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index dfce896c4bae..18c342a919a2 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -47,6 +47,11 @@ 
 
 #include "ttm_module.h"
 
+#define TTM_MAX_ORDER (PMD_SHIFT - PAGE_SHIFT)
+#define __TTM_DIM_ORDER (TTM_MAX_ORDER + 1)
+/* Some architectures have a weird PMD_SHIFT */
+#define TTM_DIM_ORDER (__TTM_DIM_ORDER <= MAX_ORDER ? __TTM_DIM_ORDER : MAX_ORDER)
+
 /**
  * struct ttm_pool_dma - Helper object for coherent DMA mappings
  *
@@ -65,11 +70,11 @@  module_param(page_pool_size, ulong, 0644);
 
 static atomic_long_t allocated_pages;
 
-static struct ttm_pool_type global_write_combined[MAX_ORDER];
-static struct ttm_pool_type global_uncached[MAX_ORDER];
+static struct ttm_pool_type global_write_combined[TTM_DIM_ORDER];
+static struct ttm_pool_type global_uncached[TTM_DIM_ORDER];
 
-static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER];
-static struct ttm_pool_type global_dma32_uncached[MAX_ORDER];
+static struct ttm_pool_type global_dma32_write_combined[TTM_DIM_ORDER];
+static struct ttm_pool_type global_dma32_uncached[TTM_DIM_ORDER];
 
 static spinlock_t shrinker_lock;
 static struct list_head shrinker_list;
@@ -444,7 +449,7 @@  int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 	else
 		gfp_flags |= GFP_HIGHUSER;
 
-	for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
+	for (order = min_t(unsigned int, TTM_MAX_ORDER, __fls(num_pages));
 	     num_pages;
 	     order = min_t(unsigned int, order, __fls(num_pages))) {
 		struct ttm_pool_type *pt;
@@ -563,7 +568,7 @@  void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
 
 	if (use_dma_alloc) {
 		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-			for (j = 0; j < MAX_ORDER; ++j)
+			for (j = 0; j < TTM_DIM_ORDER; ++j)
 				ttm_pool_type_init(&pool->caching[i].orders[j],
 						   pool, i, j);
 	}
@@ -583,7 +588,7 @@  void ttm_pool_fini(struct ttm_pool *pool)
 
 	if (pool->use_dma_alloc) {
 		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-			for (j = 0; j < MAX_ORDER; ++j)
+			for (j = 0; j < TTM_DIM_ORDER; ++j)
 				ttm_pool_type_fini(&pool->caching[i].orders[j]);
 	}
 
@@ -637,7 +642,7 @@  static void ttm_pool_debugfs_header(struct seq_file *m)
 	unsigned int i;
 
 	seq_puts(m, "\t ");
-	for (i = 0; i < MAX_ORDER; ++i)
+	for (i = 0; i < TTM_DIM_ORDER; ++i)
 		seq_printf(m, " ---%2u---", i);
 	seq_puts(m, "\n");
 }
@@ -648,7 +653,7 @@  static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
 {
 	unsigned int i;
 
-	for (i = 0; i < MAX_ORDER; ++i)
+	for (i = 0; i < TTM_DIM_ORDER; ++i)
 		seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
 	seq_puts(m, "\n");
 }
@@ -751,13 +756,16 @@  int ttm_pool_mgr_init(unsigned long num_pages)
 {
 	unsigned int i;
 
+	BUILD_BUG_ON(TTM_DIM_ORDER > MAX_ORDER);
+	BUILD_BUG_ON(TTM_DIM_ORDER < 1);
+
 	if (!page_pool_size)
 		page_pool_size = num_pages;
 
 	spin_lock_init(&shrinker_lock);
 	INIT_LIST_HEAD(&shrinker_list);
 
-	for (i = 0; i < MAX_ORDER; ++i) {
+	for (i = 0; i < TTM_DIM_ORDER; ++i) {
 		ttm_pool_type_init(&global_write_combined[i], NULL,
 				   ttm_write_combined, i);
 		ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
@@ -790,7 +798,7 @@  void ttm_pool_mgr_fini(void)
 {
 	unsigned int i;
 
-	for (i = 0; i < MAX_ORDER; ++i) {
+	for (i = 0; i < TTM_DIM_ORDER; ++i) {
 		ttm_pool_type_fini(&global_write_combined[i]);
 		ttm_pool_type_fini(&global_uncached[i]);