drm/ttm: stop pooling cached NUMA pages v2

Message ID	20240415134821.1919-1-christian.koenig@amd.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <dri-devel-bounces@lists.freedesktop.org> From: " =?utf-8?q?Christian_K=C3=B6nig?= " <ckoenig.leichtzumerken@gmail.com> To: Felix.Kuehling@amd.com, Alexander.Deucher@amd.com, Rajneesh.Bhardwaj@amd.com, Steven.Roberts@amd.com, dri-devel@lists.freedesktop.org Subject: [PATCH] drm/ttm: stop pooling cached NUMA pages v2 Date: Mon, 15 Apr 2024 15:48:21 +0200 Message-Id: <20240415134821.1919-1-christian.koenig@amd.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Precedence: list Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" <dri-devel-bounces@lists.freedesktop.org>
Series	drm/ttm: stop pooling cached NUMA pages v2 \| expand drm/ttm: stop pooling cached NUMA pages v2

Message ID

20240415134821.1919-1-christian.koenig@amd.com (mailing list archive)

State

New, archived

Headers

From: " =?utf-8?q?Christian_K=C3=B6nig?= " <ckoenig.leichtzumerken@gmail.com>
To: Felix.Kuehling@amd.com, Alexander.Deucher@amd.com,
 Rajneesh.Bhardwaj@amd.com, Steven.Roberts@amd.com,
 dri-devel@lists.freedesktop.org
Subject: [PATCH] drm/ttm: stop pooling cached NUMA pages v2
Date: Mon, 15 Apr 2024 15:48:21 +0200
Message-Id: <20240415134821.1919-1-christian.koenig@amd.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Precedence: list
Errors-To: dri-devel-bounces@lists.freedesktop.org
Sender: "dri-devel" <dri-devel-bounces@lists.freedesktop.org>

Series

drm/ttm: stop pooling cached NUMA pages v2 | expand

Commit Message

Christian König April 15, 2024, 1:48 p.m. UTC

From: Christian König <ckoenig.leichtzumerken@gmail.com>

We only pool write combined and uncached allocations because they
require extra overhead on allocation and release.

If we also pool cached NUMA it not only means some extra unnecessary
overhead, but also that under memory pressure it can happen that
pages from the wrong NUMA node enters the pool and are re-used
over and over again.

This can lead to performance reduction after running into memory
pressure.

v2: restructure and cleanup the code a bit from the internal hack to
    test this.

Signed-off-by: Christian König <christian.koenig@amd.com>
Fixes: 4482d3c94d7f ("drm/ttm: add NUMA node id to the pool")
CC: stable@vger.kernel.org
---
 drivers/gpu/drm/ttm/ttm_pool.c | 38 +++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 10 deletions(-)

Comments

Felix Kuehling April 15, 2024, 1:53 p.m. UTC | #1

On 2024-04-15 9:48, Christian König wrote:
> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>
> We only pool write combined and uncached allocations because they
> require extra overhead on allocation and release.
>
> If we also pool cached NUMA it not only means some extra unnecessary
> overhead, but also that under memory pressure it can happen that
> pages from the wrong NUMA node enters the pool and are re-used
> over and over again.
>
> This can lead to performance reduction after running into memory
> pressure.
>
> v2: restructure and cleanup the code a bit from the internal hack to
>      test this.
>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Fixes: 4482d3c94d7f ("drm/ttm: add NUMA node id to the pool")
> CC: stable@vger.kernel.org
> ---
>   drivers/gpu/drm/ttm/ttm_pool.c | 38 +++++++++++++++++++++++++---------
>   1 file changed, 28 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
> index 112438d965ff..6e1fd6985ffc 100644
> --- a/drivers/gpu/drm/ttm/ttm_pool.c
> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
> @@ -288,17 +288,23 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
>   						  enum ttm_caching caching,
>   						  unsigned int order)
>   {
> -	if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
> +	if (pool->use_dma_alloc)
>   		return &pool->caching[caching].orders[order];
>   
>   #ifdef CONFIG_X86
>   	switch (caching) {
>   	case ttm_write_combined:
> +		if (pool->nid != NUMA_NO_NODE)
> +			return &pool->caching[caching].orders[order];

Doesn't this break USWC allocations on NUMA systems, where we set a NUMA 
node for the default pool (at least we were planning to at some point)?

Regards,
   Felix


> +
>   		if (pool->use_dma32)
>   			return &global_dma32_write_combined[order];
>   
>   		return &global_write_combined[order];
>   	case ttm_uncached:
> +		if (pool->nid != NUMA_NO_NODE)
> +			return &pool->caching[caching].orders[order];
> +
>   		if (pool->use_dma32)
>   			return &global_dma32_uncached[order];
>   
> @@ -566,11 +572,17 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
>   	pool->use_dma_alloc = use_dma_alloc;
>   	pool->use_dma32 = use_dma32;
>   
> -	if (use_dma_alloc || nid != NUMA_NO_NODE) {
> -		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> -			for (j = 0; j < NR_PAGE_ORDERS; ++j)
> -				ttm_pool_type_init(&pool->caching[i].orders[j],
> -						   pool, i, j);
> +	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
> +		for (j = 0; j < NR_PAGE_ORDERS; ++j) {
> +			struct ttm_pool_type *pt;
> +
> +			/* Initialize only pool types which are actually used */
> +			pt = ttm_pool_select_type(pool, i, j);
> +			if (pt != &pool->caching[i].orders[j])
> +				continue;
> +
> +			ttm_pool_type_init(pt, pool, i, j);
> +		}
>   	}
>   }
>   EXPORT_SYMBOL(ttm_pool_init);
> @@ -599,10 +611,16 @@ void ttm_pool_fini(struct ttm_pool *pool)
>   {
>   	unsigned int i, j;
>   
> -	if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
> -		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
> -			for (j = 0; j < NR_PAGE_ORDERS; ++j)
> -				ttm_pool_type_fini(&pool->caching[i].orders[j]);
> +	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
> +		for (j = 0; j < NR_PAGE_ORDERS; ++j) {
> +			struct ttm_pool_type *pt;
> +
> +			pt = ttm_pool_select_type(pool, i, j);
> +			if (pt != &pool->caching[i].orders[j])
> +				continue;
> +
> +			ttm_pool_type_fini(pt);
> +		}
>   	}
>   
>   	/* We removed the pool types from the LRU, but we need to also make sure

Christian König April 15, 2024, 2:08 p.m. UTC | #2

Am 15.04.24 um 15:53 schrieb Felix Kuehling:
> On 2024-04-15 9:48, Christian König wrote:
>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>
>> We only pool write combined and uncached allocations because they
>> require extra overhead on allocation and release.
>>
>> If we also pool cached NUMA it not only means some extra unnecessary
>> overhead, but also that under memory pressure it can happen that
>> pages from the wrong NUMA node enters the pool and are re-used
>> over and over again.
>>
>> This can lead to performance reduction after running into memory
>> pressure.
>>
>> v2: restructure and cleanup the code a bit from the internal hack to
>>      test this.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Fixes: 4482d3c94d7f ("drm/ttm: add NUMA node id to the pool")
>> CC: stable@vger.kernel.org
>> ---
>>   drivers/gpu/drm/ttm/ttm_pool.c | 38 +++++++++++++++++++++++++---------
>>   1 file changed, 28 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c 
>> b/drivers/gpu/drm/ttm/ttm_pool.c
>> index 112438d965ff..6e1fd6985ffc 100644
>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>> @@ -288,17 +288,23 @@ static struct ttm_pool_type 
>> *ttm_pool_select_type(struct ttm_pool *pool,
>>                             enum ttm_caching caching,
>>                             unsigned int order)
>>   {
>> -    if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
>> +    if (pool->use_dma_alloc)
>>           return &pool->caching[caching].orders[order];
>>     #ifdef CONFIG_X86
>>       switch (caching) {
>>       case ttm_write_combined:
>> +        if (pool->nid != NUMA_NO_NODE)
>> +            return &pool->caching[caching].orders[order];
>
> Doesn't this break USWC allocations on NUMA systems, where we set a 
> NUMA node for the default pool (at least we were planning to at some 
> point)?

I don't think so, but I might have missed something. Why do you think 
that would break?

I mean the idea is basically if the pool is associated with a NUMA id we 
should rather use this pool instead of the global one.

And that is true for both cases, the default pool and the specialized ones.

Regards,
Christian.

>
> Regards,
>   Felix
>
>
>> +
>>           if (pool->use_dma32)
>>               return &global_dma32_write_combined[order];
>>             return &global_write_combined[order];
>>       case ttm_uncached:
>> +        if (pool->nid != NUMA_NO_NODE)
>> +            return &pool->caching[caching].orders[order];
>> +
>>           if (pool->use_dma32)
>>               return &global_dma32_uncached[order];
>>   @@ -566,11 +572,17 @@ void ttm_pool_init(struct ttm_pool *pool, 
>> struct device *dev,
>>       pool->use_dma_alloc = use_dma_alloc;
>>       pool->use_dma32 = use_dma32;
>>   -    if (use_dma_alloc || nid != NUMA_NO_NODE) {
>> -        for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>> -            for (j = 0; j < NR_PAGE_ORDERS; ++j)
>> - ttm_pool_type_init(&pool->caching[i].orders[j],
>> -                           pool, i, j);
>> +    for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
>> +        for (j = 0; j < NR_PAGE_ORDERS; ++j) {
>> +            struct ttm_pool_type *pt;
>> +
>> +            /* Initialize only pool types which are actually used */
>> +            pt = ttm_pool_select_type(pool, i, j);
>> +            if (pt != &pool->caching[i].orders[j])
>> +                continue;
>> +
>> +            ttm_pool_type_init(pt, pool, i, j);
>> +        }
>>       }
>>   }
>>   EXPORT_SYMBOL(ttm_pool_init);
>> @@ -599,10 +611,16 @@ void ttm_pool_fini(struct ttm_pool *pool)
>>   {
>>       unsigned int i, j;
>>   -    if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
>> -        for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>> -            for (j = 0; j < NR_PAGE_ORDERS; ++j)
>> - ttm_pool_type_fini(&pool->caching[i].orders[j]);
>> +    for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
>> +        for (j = 0; j < NR_PAGE_ORDERS; ++j) {
>> +            struct ttm_pool_type *pt;
>> +
>> +            pt = ttm_pool_select_type(pool, i, j);
>> +            if (pt != &pool->caching[i].orders[j])
>> +                continue;
>> +
>> +            ttm_pool_type_fini(pt);
>> +        }
>>       }
>>         /* We removed the pool types from the LRU, but we need to 
>> also make sure

Felix Kuehling April 15, 2024, 2:32 p.m. UTC | #3

On 2024-04-15 10:08, Christian König wrote:
> Am 15.04.24 um 15:53 schrieb Felix Kuehling:
>> On 2024-04-15 9:48, Christian König wrote:
>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>
>>> We only pool write combined and uncached allocations because they
>>> require extra overhead on allocation and release.
>>>
>>> If we also pool cached NUMA it not only means some extra unnecessary
>>> overhead, but also that under memory pressure it can happen that
>>> pages from the wrong NUMA node enters the pool and are re-used
>>> over and over again.
>>>
>>> This can lead to performance reduction after running into memory
>>> pressure.
>>>
>>> v2: restructure and cleanup the code a bit from the internal hack to
>>>      test this.
>>>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> Fixes: 4482d3c94d7f ("drm/ttm: add NUMA node id to the pool")
>>> CC: stable@vger.kernel.org
>>> ---
>>>   drivers/gpu/drm/ttm/ttm_pool.c | 38 
>>> +++++++++++++++++++++++++---------
>>>   1 file changed, 28 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c 
>>> b/drivers/gpu/drm/ttm/ttm_pool.c
>>> index 112438d965ff..6e1fd6985ffc 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>>> @@ -288,17 +288,23 @@ static struct ttm_pool_type 
>>> *ttm_pool_select_type(struct ttm_pool *pool,
>>>                             enum ttm_caching caching,
>>>                             unsigned int order)
>>>   {
>>> -    if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
>>> +    if (pool->use_dma_alloc)
>>>           return &pool->caching[caching].orders[order];
>>>     #ifdef CONFIG_X86
>>>       switch (caching) {
>>>       case ttm_write_combined:
>>> +        if (pool->nid != NUMA_NO_NODE)
>>> +            return &pool->caching[caching].orders[order];
>>
>> Doesn't this break USWC allocations on NUMA systems, where we set a 
>> NUMA node for the default pool (at least we were planning to at some 
>> point)?
>
> I don't think so, but I might have missed something. Why do you think 
> that would break?
>
> I mean the idea is basically if the pool is associated with a NUMA id 
> we should rather use this pool instead of the global one.
>
> And that is true for both cases, the default pool and the specialized 
> ones.

OK, I think I misunderstood what I was reading. It looked to me like it 
would always use a "caching" pool if nid was set. But caching here is a 
variable; each node still has specialized pools for write combining etc.

Then the concern you stated in the commit message "under memory pressure 
it can happen that pages from the wrong NUMA node enters the pool and 
are re-used over and over again" is still possible for uncached and wc 
pages. Anyway, it's better than not having NUMA, I guess.

The patch is

Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>


>
> Regards,
> Christian.
>
>>
>> Regards,
>>   Felix
>>
>>
>>> +
>>>           if (pool->use_dma32)
>>>               return &global_dma32_write_combined[order];
>>>             return &global_write_combined[order];
>>>       case ttm_uncached:
>>> +        if (pool->nid != NUMA_NO_NODE)
>>> +            return &pool->caching[caching].orders[order];
>>> +
>>>           if (pool->use_dma32)
>>>               return &global_dma32_uncached[order];
>>>   @@ -566,11 +572,17 @@ void ttm_pool_init(struct ttm_pool *pool, 
>>> struct device *dev,
>>>       pool->use_dma_alloc = use_dma_alloc;
>>>       pool->use_dma32 = use_dma32;
>>>   -    if (use_dma_alloc || nid != NUMA_NO_NODE) {
>>> -        for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>> -            for (j = 0; j < NR_PAGE_ORDERS; ++j)
>>> - ttm_pool_type_init(&pool->caching[i].orders[j],
>>> -                           pool, i, j);
>>> +    for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
>>> +        for (j = 0; j < NR_PAGE_ORDERS; ++j) {
>>> +            struct ttm_pool_type *pt;
>>> +
>>> +            /* Initialize only pool types which are actually used */
>>> +            pt = ttm_pool_select_type(pool, i, j);
>>> +            if (pt != &pool->caching[i].orders[j])
>>> +                continue;
>>> +
>>> +            ttm_pool_type_init(pt, pool, i, j);
>>> +        }
>>>       }
>>>   }
>>>   EXPORT_SYMBOL(ttm_pool_init);
>>> @@ -599,10 +611,16 @@ void ttm_pool_fini(struct ttm_pool *pool)
>>>   {
>>>       unsigned int i, j;
>>>   -    if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
>>> -        for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>> -            for (j = 0; j < NR_PAGE_ORDERS; ++j)
>>> - ttm_pool_type_fini(&pool->caching[i].orders[j]);
>>> +    for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
>>> +        for (j = 0; j < NR_PAGE_ORDERS; ++j) {
>>> +            struct ttm_pool_type *pt;
>>> +
>>> +            pt = ttm_pool_select_type(pool, i, j);
>>> +            if (pt != &pool->caching[i].orders[j])
>>> +                continue;
>>> +
>>> +            ttm_pool_type_fini(pt);
>>> +        }
>>>       }
>>>         /* We removed the pool types from the LRU, but we need to 
>>> also make sure
>

Christian König April 15, 2024, 2:38 p.m. UTC | #4

Am 15.04.24 um 16:32 schrieb Felix Kuehling:
> On 2024-04-15 10:08, Christian König wrote:
>> Am 15.04.24 um 15:53 schrieb Felix Kuehling:
>>> On 2024-04-15 9:48, Christian König wrote:
>>>> From: Christian König <ckoenig.leichtzumerken@gmail.com>
>>>>
>>>> We only pool write combined and uncached allocations because they
>>>> require extra overhead on allocation and release.
>>>>
>>>> If we also pool cached NUMA it not only means some extra unnecessary
>>>> overhead, but also that under memory pressure it can happen that
>>>> pages from the wrong NUMA node enters the pool and are re-used
>>>> over and over again.
>>>>
>>>> This can lead to performance reduction after running into memory
>>>> pressure.
>>>>
>>>> v2: restructure and cleanup the code a bit from the internal hack to
>>>>      test this.
>>>>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> Fixes: 4482d3c94d7f ("drm/ttm: add NUMA node id to the pool")
>>>> CC: stable@vger.kernel.org
>>>> ---
>>>>   drivers/gpu/drm/ttm/ttm_pool.c | 38 
>>>> +++++++++++++++++++++++++---------
>>>>   1 file changed, 28 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/ttm/ttm_pool.c 
>>>> b/drivers/gpu/drm/ttm/ttm_pool.c
>>>> index 112438d965ff..6e1fd6985ffc 100644
>>>> --- a/drivers/gpu/drm/ttm/ttm_pool.c
>>>> +++ b/drivers/gpu/drm/ttm/ttm_pool.c
>>>> @@ -288,17 +288,23 @@ static struct ttm_pool_type 
>>>> *ttm_pool_select_type(struct ttm_pool *pool,
>>>>                             enum ttm_caching caching,
>>>>                             unsigned int order)
>>>>   {
>>>> -    if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
>>>> +    if (pool->use_dma_alloc)
>>>>           return &pool->caching[caching].orders[order];
>>>>     #ifdef CONFIG_X86
>>>>       switch (caching) {
>>>>       case ttm_write_combined:
>>>> +        if (pool->nid != NUMA_NO_NODE)
>>>> +            return &pool->caching[caching].orders[order];
>>>
>>> Doesn't this break USWC allocations on NUMA systems, where we set a 
>>> NUMA node for the default pool (at least we were planning to at some 
>>> point)?
>>
>> I don't think so, but I might have missed something. Why do you think 
>> that would break?
>>
>> I mean the idea is basically if the pool is associated with a NUMA id 
>> we should rather use this pool instead of the global one.
>>
>> And that is true for both cases, the default pool and the specialized 
>> ones.
>
> OK, I think I misunderstood what I was reading. It looked to me like 
> it would always use a "caching" pool if nid was set. But caching here 
> is a variable; each node still has specialized pools for write 
> combining etc.
>
> Then the concern you stated in the commit message "under memory 
> pressure it can happen that pages from the wrong NUMA node enters the 
> pool and are re-used over and over again" is still possible for 
> uncached and wc pages. Anyway, it's better than not having NUMA, I guess.

Yes, correct. But since KFD doesn't use USWC that much I don't think 
this will cause an issue.

If we really start to see issues with that we can always re-consider 
using __GFP_THIS_NODE.

>
> The patch is
>
> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>

Thanks, going to push to drm-misc-fixes now.

Regards,
Christian.

>
>
>>
>> Regards,
>> Christian.
>>
>>>
>>> Regards,
>>>   Felix
>>>
>>>
>>>> +
>>>>           if (pool->use_dma32)
>>>>               return &global_dma32_write_combined[order];
>>>>             return &global_write_combined[order];
>>>>       case ttm_uncached:
>>>> +        if (pool->nid != NUMA_NO_NODE)
>>>> +            return &pool->caching[caching].orders[order];
>>>> +
>>>>           if (pool->use_dma32)
>>>>               return &global_dma32_uncached[order];
>>>>   @@ -566,11 +572,17 @@ void ttm_pool_init(struct ttm_pool *pool, 
>>>> struct device *dev,
>>>>       pool->use_dma_alloc = use_dma_alloc;
>>>>       pool->use_dma32 = use_dma32;
>>>>   -    if (use_dma_alloc || nid != NUMA_NO_NODE) {
>>>> -        for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>>> -            for (j = 0; j < NR_PAGE_ORDERS; ++j)
>>>> - ttm_pool_type_init(&pool->caching[i].orders[j],
>>>> -                           pool, i, j);
>>>> +    for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
>>>> +        for (j = 0; j < NR_PAGE_ORDERS; ++j) {
>>>> +            struct ttm_pool_type *pt;
>>>> +
>>>> +            /* Initialize only pool types which are actually used */
>>>> +            pt = ttm_pool_select_type(pool, i, j);
>>>> +            if (pt != &pool->caching[i].orders[j])
>>>> +                continue;
>>>> +
>>>> +            ttm_pool_type_init(pt, pool, i, j);
>>>> +        }
>>>>       }
>>>>   }
>>>>   EXPORT_SYMBOL(ttm_pool_init);
>>>> @@ -599,10 +611,16 @@ void ttm_pool_fini(struct ttm_pool *pool)
>>>>   {
>>>>       unsigned int i, j;
>>>>   -    if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
>>>> -        for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
>>>> -            for (j = 0; j < NR_PAGE_ORDERS; ++j)
>>>> - ttm_pool_type_fini(&pool->caching[i].orders[j]);
>>>> +    for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
>>>> +        for (j = 0; j < NR_PAGE_ORDERS; ++j) {
>>>> +            struct ttm_pool_type *pt;
>>>> +
>>>> +            pt = ttm_pool_select_type(pool, i, j);
>>>> +            if (pt != &pool->caching[i].orders[j])
>>>> +                continue;
>>>> +
>>>> +            ttm_pool_type_fini(pt);
>>>> +        }
>>>>       }
>>>>         /* We removed the pool types from the LRU, but we need to 
>>>> also make sure
>>

diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 112438d965ff..6e1fd6985ffc 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -288,17 +288,23 @@  static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
 						  enum ttm_caching caching,
 						  unsigned int order)
 {
-	if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
+	if (pool->use_dma_alloc)
 		return &pool->caching[caching].orders[order];
 
 #ifdef CONFIG_X86
 	switch (caching) {
 	case ttm_write_combined:
+		if (pool->nid != NUMA_NO_NODE)
+			return &pool->caching[caching].orders[order];
+
 		if (pool->use_dma32)
 			return &global_dma32_write_combined[order];
 
 		return &global_write_combined[order];
 	case ttm_uncached:
+		if (pool->nid != NUMA_NO_NODE)
+			return &pool->caching[caching].orders[order];
+
 		if (pool->use_dma32)
 			return &global_dma32_uncached[order];
 
@@ -566,11 +572,17 @@  void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
 	pool->use_dma_alloc = use_dma_alloc;
 	pool->use_dma32 = use_dma32;
 
-	if (use_dma_alloc || nid != NUMA_NO_NODE) {
-		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-			for (j = 0; j < NR_PAGE_ORDERS; ++j)
-				ttm_pool_type_init(&pool->caching[i].orders[j],
-						   pool, i, j);
+	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+		for (j = 0; j < NR_PAGE_ORDERS; ++j) {
+			struct ttm_pool_type *pt;
+
+			/* Initialize only pool types which are actually used */
+			pt = ttm_pool_select_type(pool, i, j);
+			if (pt != &pool->caching[i].orders[j])
+				continue;
+
+			ttm_pool_type_init(pt, pool, i, j);
+		}
 	}
 }
 EXPORT_SYMBOL(ttm_pool_init);
@@ -599,10 +611,16 @@  void ttm_pool_fini(struct ttm_pool *pool)
 {
 	unsigned int i, j;
 
-	if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
-		for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-			for (j = 0; j < NR_PAGE_ORDERS; ++j)
-				ttm_pool_type_fini(&pool->caching[i].orders[j]);
+	for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+		for (j = 0; j < NR_PAGE_ORDERS; ++j) {
+			struct ttm_pool_type *pt;
+
+			pt = ttm_pool_select_type(pool, i, j);
+			if (pt != &pool->caching[i].orders[j])
+				continue;
+
+			ttm_pool_type_fini(pt);
+		}
 	}
 
 	/* We removed the pool types from the LRU, but we need to also make sure

drm/ttm: stop pooling cached NUMA pages v2

Commit Message

Comments

Patch