diff mbox series

[6/6] drm/amdgpu: Do bo_va ref counting for KFD BOs

Message ID 20230112013157.750568-7-Felix.Kuehling@amd.com (mailing list archive)
State New, archived
Headers show
Series Enable KFD to use render node BO mappings | expand

Commit Message

Felix Kuehling Jan. 12, 2023, 1:31 a.m. UTC
This is needed to correctly handle BOs imported into the GEM API, which
would otherwise get added twice to the same VM.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28 +++++++++++++++----
 1 file changed, 22 insertions(+), 6 deletions(-)

Comments

Chen, Xiaogang Jan. 13, 2023, 10:58 p.m. UTC | #1
Reviewed-by: Xiaogang Chen <Xiaoganng.Chen@amd.com>

Regards

Xiaogang

On 1/11/2023 7:31 PM, Felix Kuehling wrote:
> This is needed to correctly handle BOs imported into the GEM API, which
> would otherwise get added twice to the same VM.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28 +++++++++++++++----
>   1 file changed, 22 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index df08e84f01d7..8b5ba2e04a79 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -370,9 +370,17 @@ static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
>   		return ret;
>   
>   	ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
> -	if (!ret)
> -		dma_resv_add_fence(bo->tbo.base.resv, fence,
> -				   DMA_RESV_USAGE_BOOKKEEP);
> +	if (ret)
> +		goto unreserve_out;
> +
> +	ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
> +	if (ret)
> +		goto unreserve_out;
> +
> +	dma_resv_add_fence(bo->tbo.base.resv, fence,
> +			   DMA_RESV_USAGE_BOOKKEEP);
> +
> +unreserve_out:
>   	amdgpu_bo_unreserve(bo);
>   
>   	return ret;
> @@ -785,6 +793,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
>   	uint64_t va = mem->va;
>   	struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
>   	struct amdgpu_bo *bo[2] = {NULL, NULL};
> +	struct amdgpu_bo_va *bo_va;
>   	bool same_hive = false;
>   	int i, ret;
>   
> @@ -871,7 +880,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
>   			pr_debug("Unable to reserve BO during memory attach");
>   			goto unwind;
>   		}
> -		attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
> +		bo_va = amdgpu_vm_bo_find(vm, bo[i]);
> +		if (!bo_va)
> +			bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
> +		else
> +			++bo_va->ref_count;
> +		attachment[i]->bo_va = bo_va;
>   		amdgpu_bo_unreserve(bo[i]);
>   		if (unlikely(!attachment[i]->bo_va)) {
>   			ret = -ENOMEM;
> @@ -895,7 +909,8 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
>   			continue;
>   		if (attachment[i]->bo_va) {
>   			amdgpu_bo_reserve(bo[i], true);
> -			amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
> +			if (--attachment[i]->bo_va->ref_count == 0)
> +				amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
>   			amdgpu_bo_unreserve(bo[i]);
>   			list_del(&attachment[i]->list);
>   		}
> @@ -912,7 +927,8 @@ static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
>   
>   	pr_debug("\t remove VA 0x%llx in entry %p\n",
>   			attachment->va, attachment);
> -	amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
> +	if (--attachment->bo_va->ref_count == 0)
> +		amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
>   	drm_gem_object_put(&bo->tbo.base);
>   	list_del(&attachment->list);
>   	kfree(attachment);
Ramesh Errabolu Jan. 16, 2023, 10:12 p.m. UTC | #2
[AMD Official Use Only - General]

Comment inline.

Regards,
Ramesh

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Chen, Xiaogang
Sent: Saturday, January 14, 2023 4:28 AM
To: Kuehling, Felix <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org
Cc: Koenig, Christian <Christian.Koenig@amd.com>
Subject: Re: [PATCH 6/6] drm/amdgpu: Do bo_va ref counting for KFD BOs

Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.


Reviewed-by: Xiaogang Chen <Xiaoganng.Chen@amd.com>

Regards

Xiaogang

On 1/11/2023 7:31 PM, Felix Kuehling wrote:
> This is needed to correctly handle BOs imported into the GEM API, 
> which would otherwise get added twice to the same VM.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28 +++++++++++++++----
>   1 file changed, 22 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index df08e84f01d7..8b5ba2e04a79 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -370,9 +370,17 @@ static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
>               return ret;
>
>       ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
> -     if (!ret)
> -             dma_resv_add_fence(bo->tbo.base.resv, fence,
> -                                DMA_RESV_USAGE_BOOKKEEP);
> +     if (ret)
> +             goto unreserve_out;
> +
> +     ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
Ramesh: Could this stmt to reserve space for fence be applied in patch 4.

> +     if (ret)
> +             goto unreserve_out;
> +
> +     dma_resv_add_fence(bo->tbo.base.resv, fence,
> +                        DMA_RESV_USAGE_BOOKKEEP);
> +
> +unreserve_out:
>       amdgpu_bo_unreserve(bo);
>
>       return ret;
> @@ -785,6 +793,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
>       uint64_t va = mem->va;
>       struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
>       struct amdgpu_bo *bo[2] = {NULL, NULL};
> +     struct amdgpu_bo_va *bo_va;
>       bool same_hive = false;
>       int i, ret;
>
> @@ -871,7 +880,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
>                       pr_debug("Unable to reserve BO during memory attach");
>                       goto unwind;
>               }
> -             attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
> +             bo_va = amdgpu_vm_bo_find(vm, bo[i]);
> +             if (!bo_va)
> +                     bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
> +             else
> +                     ++bo_va->ref_count;
> +             attachment[i]->bo_va = bo_va;
>               amdgpu_bo_unreserve(bo[i]);
>               if (unlikely(!attachment[i]->bo_va)) {
>                       ret = -ENOMEM;
> @@ -895,7 +909,8 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
>                       continue;
>               if (attachment[i]->bo_va) {
>                       amdgpu_bo_reserve(bo[i], true);
> -                     amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
> +                     if (--attachment[i]->bo_va->ref_count == 0)
> +                             amdgpu_vm_bo_del(adev, 
> + attachment[i]->bo_va);
>                       amdgpu_bo_unreserve(bo[i]);
>                       list_del(&attachment[i]->list);
>               }
> @@ -912,7 +927,8 @@ static void kfd_mem_detach(struct 
> kfd_mem_attachment *attachment)
>
>       pr_debug("\t remove VA 0x%llx in entry %p\n",
>                       attachment->va, attachment);
> -     amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
> +     if (--attachment->bo_va->ref_count == 0)
> +             amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
>       drm_gem_object_put(&bo->tbo.base);
>       list_del(&attachment->list);
>       kfree(attachment);
Felix Kuehling Jan. 16, 2023, 10:58 p.m. UTC | #3
On 2023-01-16 17:12, Errabolu, Ramesh wrote:
> [AMD Official Use Only - General]
>
> Comment inline.
>
> Regards,
> Ramesh
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Chen, Xiaogang
> Sent: Saturday, January 14, 2023 4:28 AM
> To: Kuehling, Felix <Felix.Kuehling@amd.com>; amd-gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org
> Cc: Koenig, Christian <Christian.Koenig@amd.com>
> Subject: Re: [PATCH 6/6] drm/amdgpu: Do bo_va ref counting for KFD BOs
>
> Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
>
>
> Reviewed-by: Xiaogang Chen <Xiaoganng.Chen@amd.com>
>
> Regards
>
> Xiaogang
>
> On 1/11/2023 7:31 PM, Felix Kuehling wrote:
>> This is needed to correctly handle BOs imported into the GEM API,
>> which would otherwise get added twice to the same VM.
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>    .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28 +++++++++++++++----
>>    1 file changed, 22 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> index df08e84f01d7..8b5ba2e04a79 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>> @@ -370,9 +370,17 @@ static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
>>                return ret;
>>
>>        ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
>> -     if (!ret)
>> -             dma_resv_add_fence(bo->tbo.base.resv, fence,
>> -                                DMA_RESV_USAGE_BOOKKEEP);
>> +     if (ret)
>> +             goto unreserve_out;
>> +
>> +     ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
> Ramesh: Could this stmt to reserve space for fence be applied in patch 4.

Done. Thanks for pointing this out.

Regards,
   Felix


>
>> +     if (ret)
>> +             goto unreserve_out;
>> +
>> +     dma_resv_add_fence(bo->tbo.base.resv, fence,
>> +                        DMA_RESV_USAGE_BOOKKEEP);
>> +
>> +unreserve_out:
>>        amdgpu_bo_unreserve(bo);
>>
>>        return ret;
>> @@ -785,6 +793,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
>>        uint64_t va = mem->va;
>>        struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
>>        struct amdgpu_bo *bo[2] = {NULL, NULL};
>> +     struct amdgpu_bo_va *bo_va;
>>        bool same_hive = false;
>>        int i, ret;
>>
>> @@ -871,7 +880,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
>>                        pr_debug("Unable to reserve BO during memory attach");
>>                        goto unwind;
>>                }
>> -             attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
>> +             bo_va = amdgpu_vm_bo_find(vm, bo[i]);
>> +             if (!bo_va)
>> +                     bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
>> +             else
>> +                     ++bo_va->ref_count;
>> +             attachment[i]->bo_va = bo_va;
>>                amdgpu_bo_unreserve(bo[i]);
>>                if (unlikely(!attachment[i]->bo_va)) {
>>                        ret = -ENOMEM;
>> @@ -895,7 +909,8 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
>>                        continue;
>>                if (attachment[i]->bo_va) {
>>                        amdgpu_bo_reserve(bo[i], true);
>> -                     amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
>> +                     if (--attachment[i]->bo_va->ref_count == 0)
>> +                             amdgpu_vm_bo_del(adev,
>> + attachment[i]->bo_va);
>>                        amdgpu_bo_unreserve(bo[i]);
>>                        list_del(&attachment[i]->list);
>>                }
>> @@ -912,7 +927,8 @@ static void kfd_mem_detach(struct
>> kfd_mem_attachment *attachment)
>>
>>        pr_debug("\t remove VA 0x%llx in entry %p\n",
>>                        attachment->va, attachment);
>> -     amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
>> +     if (--attachment->bo_va->ref_count == 0)
>> +             amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
>>        drm_gem_object_put(&bo->tbo.base);
>>        list_del(&attachment->list);
>>        kfree(attachment);
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index df08e84f01d7..8b5ba2e04a79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -370,9 +370,17 @@  static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
 		return ret;
 
 	ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
-	if (!ret)
-		dma_resv_add_fence(bo->tbo.base.resv, fence,
-				   DMA_RESV_USAGE_BOOKKEEP);
+	if (ret)
+		goto unreserve_out;
+
+	ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
+	if (ret)
+		goto unreserve_out;
+
+	dma_resv_add_fence(bo->tbo.base.resv, fence,
+			   DMA_RESV_USAGE_BOOKKEEP);
+
+unreserve_out:
 	amdgpu_bo_unreserve(bo);
 
 	return ret;
@@ -785,6 +793,7 @@  static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 	uint64_t va = mem->va;
 	struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
 	struct amdgpu_bo *bo[2] = {NULL, NULL};
+	struct amdgpu_bo_va *bo_va;
 	bool same_hive = false;
 	int i, ret;
 
@@ -871,7 +880,12 @@  static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 			pr_debug("Unable to reserve BO during memory attach");
 			goto unwind;
 		}
-		attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+		bo_va = amdgpu_vm_bo_find(vm, bo[i]);
+		if (!bo_va)
+			bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+		else
+			++bo_va->ref_count;
+		attachment[i]->bo_va = bo_va;
 		amdgpu_bo_unreserve(bo[i]);
 		if (unlikely(!attachment[i]->bo_va)) {
 			ret = -ENOMEM;
@@ -895,7 +909,8 @@  static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
 			continue;
 		if (attachment[i]->bo_va) {
 			amdgpu_bo_reserve(bo[i], true);
-			amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
+			if (--attachment[i]->bo_va->ref_count == 0)
+				amdgpu_vm_bo_del(adev, attachment[i]->bo_va);
 			amdgpu_bo_unreserve(bo[i]);
 			list_del(&attachment[i]->list);
 		}
@@ -912,7 +927,8 @@  static void kfd_mem_detach(struct kfd_mem_attachment *attachment)
 
 	pr_debug("\t remove VA 0x%llx in entry %p\n",
 			attachment->va, attachment);
-	amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
+	if (--attachment->bo_va->ref_count == 0)
+		amdgpu_vm_bo_del(attachment->adev, attachment->bo_va);
 	drm_gem_object_put(&bo->tbo.base);
 	list_del(&attachment->list);
 	kfree(attachment);