diff mbox series

[v2,04/10] drm/amdgpu: Simplify AQL queue mapping

Message ID 20210422013058.6305-5-Felix.Kuehling@amd.com (mailing list archive)
State New, archived
Headers show
Series Implement multi-GPU DMA mappings for KFD | expand

Commit Message

Felix Kuehling April 22, 2021, 1:30 a.m. UTC
Do AQL queue double-mapping with a single attach call. That will make it
easier to create per-GPU BOs later, to be shared between the two BO VA
mappings on the same GPU.

Freeing the attachments is not necessary if map_to_gpu fails. These will be
cleaned up when the kdg_mem object is destroyed in
amdgpu_amdkfd_gpuvm_free_memory_of_gpu.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 103 ++++++++----------
 1 file changed, 48 insertions(+), 55 deletions(-)

Comments

Zeng, Oak April 23, 2021, 1:33 a.m. UTC | #1
Regards,
Oak 

 

On 2021-04-21, 9:31 PM, "amd-gfx on behalf of Felix Kuehling" <amd-gfx-bounces@lists.freedesktop.org on behalf of Felix.Kuehling@amd.com> wrote:

    Do AQL queue double-mapping with a single attach call. That will make it
    easier to create per-GPU BOs later, to be shared between the two BO VA
    mappings on the same GPU.

    Freeing the attachments is not necessary if map_to_gpu fails. These will be
    cleaned up when the kdg_mem object is destroyed in
    amdgpu_amdkfd_gpuvm_free_memory_of_gpu.

    Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
    ---
     .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 103 ++++++++----------
     1 file changed, 48 insertions(+), 55 deletions(-)

    diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
    index 34c9a2d0028e..fbd7e786b54e 100644
    --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
    +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
    @@ -486,70 +486,76 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
      * 4a.  Validate new page tables and directories
      */
     static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
    -		struct amdgpu_vm *vm, bool is_aql,
    -		struct kfd_mem_attachment **p_attachment)
    +		struct amdgpu_vm *vm, bool is_aql)
     {
     	unsigned long bo_size = mem->bo->tbo.base.size;
     	uint64_t va = mem->va;
    -	struct kfd_mem_attachment *attachment;
    -	struct amdgpu_bo *bo;
    -	int ret;
    +	struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
    +	struct amdgpu_bo *bo[2] = {NULL, NULL};
    +	int i, ret;

     	if (!va) {
     		pr_err("Invalid VA when adding BO to VM\n");
     		return -EINVAL;
     	}

    -	if (is_aql)
    -		va += bo_size;
    -
    -	attachment = kzalloc(sizeof(*attachment), GFP_KERNEL);
    -	if (!attachment)
    -		return -ENOMEM;
    +	for (i = 0; i <= is_aql; i++) {
    +		attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
    +		if (unlikely(!attachment[i])) {
    +			ret = -ENOMEM;
    +			goto unwind;
    +		}

    -	pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
    -			va + bo_size, vm);
    +		pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
    +			 va + bo_size, vm);

    -	/* FIXME: For now all attachments use the same BO. This is incorrect
    -	 * because one BO can only have one DMA mapping for one GPU. We need
    -	 * one BO per GPU, e.g. a DMABuf import with dynamic attachment. This
    -	 * will be addressed one BO-type at a time in subsequent patches.
    -	 */
    -	bo = mem->bo;
    -	drm_gem_object_get(&bo->tbo.base);
    +		/* FIXME: For now all attachments use the same BO. This is
    +		 * incorrect because one BO can only have one DMA mapping
    +		 * for one GPU. We need one BO per GPU, e.g. a DMABuf
    +		 * import with dynamic attachment. This will be addressed
    +		 * one BO-type at a time in subsequent patches.
    +		 */
    +		bo[i] = mem->bo;
    +		drm_gem_object_get(&bo[i]->tbo.base);

    -	/* Add BO to VM internal data structures*/
    -	attachment->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
    -	if (!attachment->bo_va) {
    -		ret = -EINVAL;
    -		pr_err("Failed to add BO object to VM. ret == %d\n",
    -				ret);
    -		goto err_vmadd;
    -	}
    +		/* Add BO to VM internal data structures */
    +		attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
Just for discussion. Are we allowed to add one bo twice to a vm? When I looked at amdgpu_vm_bo_base_init (called by amdgpu_vm_bo_add), line:
bo->vm_bo = base;
when you add the same bo to vm the second time, bo->vm_bo will be overwritten. I am not sure whether this will cause an issue later.
This is not introduced by your code. The original code (calling kfd_mem_attach twice for aql) has the same problem.
    +		if (unlikely(!attachment[i]->bo_va)) {
    +			ret = -ENOMEM;
    +			pr_err("Failed to add BO object to VM. ret == %d\n",
    +			       ret);
    +			goto unwind;
    +		}

    -	attachment->va = va;
    -	attachment->pte_flags = get_pte_flags(adev, mem);
    -	attachment->adev = adev;
    -	list_add(&attachment->list, &mem->attachments);
    +		attachment[i]->va = va;
    +		attachment[i]->pte_flags = get_pte_flags(adev, mem);
    +		attachment[i]->adev = adev;
    +		list_add(&attachment[i]->list, &mem->attachments);

    -	if (p_attachment)
    -		*p_attachment = attachment;
    +		va += bo_size;
    +	}

     	/* Allocate validate page tables if needed */
     	ret = vm_validate_pt_pd_bos(vm);
     	if (unlikely(ret)) {
     		pr_err("validate_pt_pd_bos() failed\n");
    -		goto err_alloc_pts;
    +		goto unwind;
     	}

     	return 0;

    -err_alloc_pts:
    -	amdgpu_vm_bo_rmv(adev, attachment->bo_va);
    -	list_del(&attachment->list);
    -err_vmadd:
    -	drm_gem_object_put(&bo->tbo.base);
    -	kfree(attachment);
    +unwind:
    +	for (; i >= 0; i--) {
    +		if (!attachment[i])
    +			continue;
    +		if (attachment[i]->bo_va) {
    +			amdgpu_vm_bo_rmv(adev, attachment[i]->bo_va);
    +			list_del(&attachment[i]->list);
    +		}
    +		if (bo[i])
    +			drm_gem_object_put(&bo[i]->tbo.base);
    +		kfree(attachment[i]);
    +	}
     	return ret;
     }

    @@ -1382,8 +1388,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
     	uint32_t domain;
     	struct kfd_mem_attachment *entry;
     	struct bo_vm_reservation_context ctx;
    -	struct kfd_mem_attachment *attachment = NULL;
    -	struct kfd_mem_attachment *attachment_aql = NULL;
     	unsigned long bo_size;
     	bool is_invalid_userptr = false;

    @@ -1433,15 +1437,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
     		is_invalid_userptr = true;

     	if (!kfd_mem_is_attached(avm, mem)) {
    -		ret = kfd_mem_attach(adev, mem, avm, false, &attachment);
    +		ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
     		if (ret)
     			goto attach_failed;
    -		if (mem->aql_queue) {
    -			ret = kfd_mem_attach(adev, mem, avm, true,
    -					     &attachment_aql);
    -			if (ret)
    -				goto attach_failed_aql;
    -		}
     	} else {
     		ret = vm_validate_pt_pd_bos(avm);
     		if (unlikely(ret))
    @@ -1496,11 +1494,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
     	goto out;

     map_bo_to_gpuvm_failed:
    -	if (attachment_aql)
    -		kfd_mem_detach(attachment_aql);
    -attach_failed_aql:
    -	if (attachment)
    -		kfd_mem_detach(attachment);
     attach_failed:
     	unreserve_bo_and_vms(&ctx, false, false);
     out:
    -- 
    2.31.1

    _______________________________________________
    amd-gfx mailing list
    amd-gfx@lists.freedesktop.org
    https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Coak.zeng%40amd.com%7C5e91627d89664d410dee08d9052e628c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637546519049009398%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&amp;sdata=k8rqHZCG80WdfogGnNydykqQdG%2FQ%2BE%2BwCFo0mQ0aAno%3D&amp;reserved=0
Felix Kuehling April 23, 2021, 7:23 a.m. UTC | #2
Am 2021-04-22 um 9:33 p.m. schrieb Zeng, Oak:
> Regards,
> Oak 
>
>  
>
> On 2021-04-21, 9:31 PM, "amd-gfx on behalf of Felix Kuehling" <amd-gfx-bounces@lists.freedesktop.org on behalf of Felix.Kuehling@amd.com> wrote:
>
>     Do AQL queue double-mapping with a single attach call. That will make it
>     easier to create per-GPU BOs later, to be shared between the two BO VA
>     mappings on the same GPU.
>
>     Freeing the attachments is not necessary if map_to_gpu fails. These will be
>     cleaned up when the kdg_mem object is destroyed in
>     amdgpu_amdkfd_gpuvm_free_memory_of_gpu.
>
>     Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>     ---
>      .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 103 ++++++++----------
>      1 file changed, 48 insertions(+), 55 deletions(-)
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>     index 34c9a2d0028e..fbd7e786b54e 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>     @@ -486,70 +486,76 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
>       * 4a.  Validate new page tables and directories
>       */
>      static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
>     -		struct amdgpu_vm *vm, bool is_aql,
>     -		struct kfd_mem_attachment **p_attachment)
>     +		struct amdgpu_vm *vm, bool is_aql)
>      {
>      	unsigned long bo_size = mem->bo->tbo.base.size;
>      	uint64_t va = mem->va;
>     -	struct kfd_mem_attachment *attachment;
>     -	struct amdgpu_bo *bo;
>     -	int ret;
>     +	struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
>     +	struct amdgpu_bo *bo[2] = {NULL, NULL};
>     +	int i, ret;
>
>      	if (!va) {
>      		pr_err("Invalid VA when adding BO to VM\n");
>      		return -EINVAL;
>      	}
>
>     -	if (is_aql)
>     -		va += bo_size;
>     -
>     -	attachment = kzalloc(sizeof(*attachment), GFP_KERNEL);
>     -	if (!attachment)
>     -		return -ENOMEM;
>     +	for (i = 0; i <= is_aql; i++) {
>     +		attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
>     +		if (unlikely(!attachment[i])) {
>     +			ret = -ENOMEM;
>     +			goto unwind;
>     +		}
>
>     -	pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
>     -			va + bo_size, vm);
>     +		pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
>     +			 va + bo_size, vm);
>
>     -	/* FIXME: For now all attachments use the same BO. This is incorrect
>     -	 * because one BO can only have one DMA mapping for one GPU. We need
>     -	 * one BO per GPU, e.g. a DMABuf import with dynamic attachment. This
>     -	 * will be addressed one BO-type at a time in subsequent patches.
>     -	 */
>     -	bo = mem->bo;
>     -	drm_gem_object_get(&bo->tbo.base);
>     +		/* FIXME: For now all attachments use the same BO. This is
>     +		 * incorrect because one BO can only have one DMA mapping
>     +		 * for one GPU. We need one BO per GPU, e.g. a DMABuf
>     +		 * import with dynamic attachment. This will be addressed
>     +		 * one BO-type at a time in subsequent patches.
>     +		 */
>     +		bo[i] = mem->bo;
>     +		drm_gem_object_get(&bo[i]->tbo.base);
>
>     -	/* Add BO to VM internal data structures*/
>     -	attachment->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
>     -	if (!attachment->bo_va) {
>     -		ret = -EINVAL;
>     -		pr_err("Failed to add BO object to VM. ret == %d\n",
>     -				ret);
>     -		goto err_vmadd;
>     -	}
>     +		/* Add BO to VM internal data structures */
>     +		attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
> Just for discussion. Are we allowed to add one bo twice to a vm? When I looked at amdgpu_vm_bo_base_init (called by amdgpu_vm_bo_add), line:
> bo->vm_bo = base;
> when you add the same bo to vm the second time, bo->vm_bo will be overwritten. I am not sure whether this will cause an issue later.
> This is not introduced by your code. The original code (calling kfd_mem_attach twice for aql) has the same problem.

If you just add one more line of context, you'll see that bo->vm_bo is
the start of a single linked list of struct amdgpu_vm_bo_base. So adding
a BO to a VM multiple times just extends that single-linked list:

        base->next = bo->vm_bo;
        bo->vm_bo = base;

Regards,
  Felix


>     +		if (unlikely(!attachment[i]->bo_va)) {
>     +			ret = -ENOMEM;
>     +			pr_err("Failed to add BO object to VM. ret == %d\n",
>     +			       ret);
>     +			goto unwind;
>     +		}
>
>     -	attachment->va = va;
>     -	attachment->pte_flags = get_pte_flags(adev, mem);
>     -	attachment->adev = adev;
>     -	list_add(&attachment->list, &mem->attachments);
>     +		attachment[i]->va = va;
>     +		attachment[i]->pte_flags = get_pte_flags(adev, mem);
>     +		attachment[i]->adev = adev;
>     +		list_add(&attachment[i]->list, &mem->attachments);
>
>     -	if (p_attachment)
>     -		*p_attachment = attachment;
>     +		va += bo_size;
>     +	}
>
>      	/* Allocate validate page tables if needed */
>      	ret = vm_validate_pt_pd_bos(vm);
>      	if (unlikely(ret)) {
>      		pr_err("validate_pt_pd_bos() failed\n");
>     -		goto err_alloc_pts;
>     +		goto unwind;
>      	}
>
>      	return 0;
>
>     -err_alloc_pts:
>     -	amdgpu_vm_bo_rmv(adev, attachment->bo_va);
>     -	list_del(&attachment->list);
>     -err_vmadd:
>     -	drm_gem_object_put(&bo->tbo.base);
>     -	kfree(attachment);
>     +unwind:
>     +	for (; i >= 0; i--) {
>     +		if (!attachment[i])
>     +			continue;
>     +		if (attachment[i]->bo_va) {
>     +			amdgpu_vm_bo_rmv(adev, attachment[i]->bo_va);
>     +			list_del(&attachment[i]->list);
>     +		}
>     +		if (bo[i])
>     +			drm_gem_object_put(&bo[i]->tbo.base);
>     +		kfree(attachment[i]);
>     +	}
>      	return ret;
>      }
>
>     @@ -1382,8 +1388,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>      	uint32_t domain;
>      	struct kfd_mem_attachment *entry;
>      	struct bo_vm_reservation_context ctx;
>     -	struct kfd_mem_attachment *attachment = NULL;
>     -	struct kfd_mem_attachment *attachment_aql = NULL;
>      	unsigned long bo_size;
>      	bool is_invalid_userptr = false;
>
>     @@ -1433,15 +1437,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>      		is_invalid_userptr = true;
>
>      	if (!kfd_mem_is_attached(avm, mem)) {
>     -		ret = kfd_mem_attach(adev, mem, avm, false, &attachment);
>     +		ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
>      		if (ret)
>      			goto attach_failed;
>     -		if (mem->aql_queue) {
>     -			ret = kfd_mem_attach(adev, mem, avm, true,
>     -					     &attachment_aql);
>     -			if (ret)
>     -				goto attach_failed_aql;
>     -		}
>      	} else {
>      		ret = vm_validate_pt_pd_bos(avm);
>      		if (unlikely(ret))
>     @@ -1496,11 +1494,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>      	goto out;
>
>      map_bo_to_gpuvm_failed:
>     -	if (attachment_aql)
>     -		kfd_mem_detach(attachment_aql);
>     -attach_failed_aql:
>     -	if (attachment)
>     -		kfd_mem_detach(attachment);
>      attach_failed:
>      	unreserve_bo_and_vms(&ctx, false, false);
>      out:
>     -- 
>     2.31.1
>
>     _______________________________________________
>     amd-gfx mailing list
>     amd-gfx@lists.freedesktop.org
>     https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
Ramesh Errabolu May 10, 2021, 10:03 p.m. UTC | #3
[AMD Official Use Only - Internal Distribution Only]

Acked-by: Ramesh Errabolu <ramesh.errabolu@amd.com>

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Kuehling, Felix
Sent: Friday, April 23, 2021 2:23 AM
To: Zeng, Oak <Oak.Zeng@amd.com>; amd-gfx@lists.freedesktop.org; dri-devel@lists.freedesktop.org
Subject: Re: [PATCH v2 04/10] drm/amdgpu: Simplify AQL queue mapping

Am 2021-04-22 um 9:33 p.m. schrieb Zeng, Oak:
> Regards,
> Oak
>
>  
>
> On 2021-04-21, 9:31 PM, "amd-gfx on behalf of Felix Kuehling" <amd-gfx-bounces@lists.freedesktop.org on behalf of Felix.Kuehling@amd.com> wrote:
>
>     Do AQL queue double-mapping with a single attach call. That will make it
>     easier to create per-GPU BOs later, to be shared between the two BO VA
>     mappings on the same GPU.
>
>     Freeing the attachments is not necessary if map_to_gpu fails. These will be
>     cleaned up when the kdg_mem object is destroyed in
>     amdgpu_amdkfd_gpuvm_free_memory_of_gpu.
>
>     Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>     ---
>      .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 103 ++++++++----------
>      1 file changed, 48 insertions(+), 55 deletions(-)
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>     index 34c9a2d0028e..fbd7e786b54e 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>     @@ -486,70 +486,76 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
>       * 4a.  Validate new page tables and directories
>       */
>      static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
>     -		struct amdgpu_vm *vm, bool is_aql,
>     -		struct kfd_mem_attachment **p_attachment)
>     +		struct amdgpu_vm *vm, bool is_aql)
>      {
>      	unsigned long bo_size = mem->bo->tbo.base.size;
>      	uint64_t va = mem->va;
>     -	struct kfd_mem_attachment *attachment;
>     -	struct amdgpu_bo *bo;
>     -	int ret;
>     +	struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
>     +	struct amdgpu_bo *bo[2] = {NULL, NULL};
>     +	int i, ret;
>
>      	if (!va) {
>      		pr_err("Invalid VA when adding BO to VM\n");
>      		return -EINVAL;
>      	}
>
>     -	if (is_aql)
>     -		va += bo_size;
>     -
>     -	attachment = kzalloc(sizeof(*attachment), GFP_KERNEL);
>     -	if (!attachment)
>     -		return -ENOMEM;
>     +	for (i = 0; i <= is_aql; i++) {
>     +		attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
>     +		if (unlikely(!attachment[i])) {
>     +			ret = -ENOMEM;
>     +			goto unwind;
>     +		}
>
>     -	pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
>     -			va + bo_size, vm);
>     +		pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
>     +			 va + bo_size, vm);
>
>     -	/* FIXME: For now all attachments use the same BO. This is incorrect
>     -	 * because one BO can only have one DMA mapping for one GPU. We need
>     -	 * one BO per GPU, e.g. a DMABuf import with dynamic attachment. This
>     -	 * will be addressed one BO-type at a time in subsequent patches.
>     -	 */
>     -	bo = mem->bo;
>     -	drm_gem_object_get(&bo->tbo.base);
>     +		/* FIXME: For now all attachments use the same BO. This is
>     +		 * incorrect because one BO can only have one DMA mapping
>     +		 * for one GPU. We need one BO per GPU, e.g. a DMABuf
>     +		 * import with dynamic attachment. This will be addressed
>     +		 * one BO-type at a time in subsequent patches.
>     +		 */
>     +		bo[i] = mem->bo;
>     +		drm_gem_object_get(&bo[i]->tbo.base);
>
>     -	/* Add BO to VM internal data structures*/
>     -	attachment->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
>     -	if (!attachment->bo_va) {
>     -		ret = -EINVAL;
>     -		pr_err("Failed to add BO object to VM. ret == %d\n",
>     -				ret);
>     -		goto err_vmadd;
>     -	}
>     +		/* Add BO to VM internal data structures */
>     +		attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
> Just for discussion. Are we allowed to add one bo twice to a vm? When I looked at amdgpu_vm_bo_base_init (called by amdgpu_vm_bo_add), line:
> bo->vm_bo = base;
> when you add the same bo to vm the second time, bo->vm_bo will be overwritten. I am not sure whether this will cause an issue later.
> This is not introduced by your code. The original code (calling kfd_mem_attach twice for aql) has the same problem.

If you just add one more line of context, you'll see that bo->vm_bo is the start of a single linked list of struct amdgpu_vm_bo_base. So adding a BO to a VM multiple times just extends that single-linked list:

        base->next = bo->vm_bo;
        bo->vm_bo = base;

Regards,
  Felix


>     +		if (unlikely(!attachment[i]->bo_va)) {
>     +			ret = -ENOMEM;
>     +			pr_err("Failed to add BO object to VM. ret == %d\n",
>     +			       ret);
>     +			goto unwind;
>     +		}
>
>     -	attachment->va = va;
>     -	attachment->pte_flags = get_pte_flags(adev, mem);
>     -	attachment->adev = adev;
>     -	list_add(&attachment->list, &mem->attachments);
>     +		attachment[i]->va = va;
>     +		attachment[i]->pte_flags = get_pte_flags(adev, mem);
>     +		attachment[i]->adev = adev;
>     +		list_add(&attachment[i]->list, &mem->attachments);
>
>     -	if (p_attachment)
>     -		*p_attachment = attachment;
>     +		va += bo_size;
>     +	}
>
>      	/* Allocate validate page tables if needed */
>      	ret = vm_validate_pt_pd_bos(vm);
>      	if (unlikely(ret)) {
>      		pr_err("validate_pt_pd_bos() failed\n");
>     -		goto err_alloc_pts;
>     +		goto unwind;
>      	}
>
>      	return 0;
>
>     -err_alloc_pts:
>     -	amdgpu_vm_bo_rmv(adev, attachment->bo_va);
>     -	list_del(&attachment->list);
>     -err_vmadd:
>     -	drm_gem_object_put(&bo->tbo.base);
>     -	kfree(attachment);
>     +unwind:
>     +	for (; i >= 0; i--) {
>     +		if (!attachment[i])
>     +			continue;
>     +		if (attachment[i]->bo_va) {
>     +			amdgpu_vm_bo_rmv(adev, attachment[i]->bo_va);
>     +			list_del(&attachment[i]->list);
>     +		}
>     +		if (bo[i])
>     +			drm_gem_object_put(&bo[i]->tbo.base);
>     +		kfree(attachment[i]);
>     +	}
>      	return ret;
>      }
>
>     @@ -1382,8 +1388,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>      	uint32_t domain;
>      	struct kfd_mem_attachment *entry;
>      	struct bo_vm_reservation_context ctx;
>     -	struct kfd_mem_attachment *attachment = NULL;
>     -	struct kfd_mem_attachment *attachment_aql = NULL;
>      	unsigned long bo_size;
>      	bool is_invalid_userptr = false;
>
>     @@ -1433,15 +1437,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>      		is_invalid_userptr = true;
>
>      	if (!kfd_mem_is_attached(avm, mem)) {
>     -		ret = kfd_mem_attach(adev, mem, avm, false, &attachment);
>     +		ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
>      		if (ret)
>      			goto attach_failed;
>     -		if (mem->aql_queue) {
>     -			ret = kfd_mem_attach(adev, mem, avm, true,
>     -					     &attachment_aql);
>     -			if (ret)
>     -				goto attach_failed_aql;
>     -		}
>      	} else {
>      		ret = vm_validate_pt_pd_bos(avm);
>      		if (unlikely(ret))
>     @@ -1496,11 +1494,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>      	goto out;
>
>      map_bo_to_gpuvm_failed:
>     -	if (attachment_aql)
>     -		kfd_mem_detach(attachment_aql);
>     -attach_failed_aql:
>     -	if (attachment)
>     -		kfd_mem_detach(attachment);
>      attach_failed:
>      	unreserve_bo_and_vms(&ctx, false, false);
>      out:
>     -- 
>     2.31.1
>
>     _______________________________________________
>     amd-gfx mailing list
>     amd-gfx@lists.freedesktop.org
>     
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flist
> s.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=04%7C01%7Cph
> ilip.yang%40amd.com%7Ca464efb3fc9e4139a80508d90628b3a0%7C3dd8961fe4884
> e608e11a82d994e183d%7C0%7C0%7C637547594180231281%7CUnknown%7CTWFpbGZsb
> 3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%
> 7C1000&amp;sdata=D2seAdDqbmuCiDQzFTcv33Uyc8ELzu6zXxIralfCC8E%3D&amp;re
> served=0
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 34c9a2d0028e..fbd7e786b54e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -486,70 +486,76 @@  static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
  * 4a.  Validate new page tables and directories
  */
 static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
-		struct amdgpu_vm *vm, bool is_aql,
-		struct kfd_mem_attachment **p_attachment)
+		struct amdgpu_vm *vm, bool is_aql)
 {
 	unsigned long bo_size = mem->bo->tbo.base.size;
 	uint64_t va = mem->va;
-	struct kfd_mem_attachment *attachment;
-	struct amdgpu_bo *bo;
-	int ret;
+	struct kfd_mem_attachment *attachment[2] = {NULL, NULL};
+	struct amdgpu_bo *bo[2] = {NULL, NULL};
+	int i, ret;
 
 	if (!va) {
 		pr_err("Invalid VA when adding BO to VM\n");
 		return -EINVAL;
 	}
 
-	if (is_aql)
-		va += bo_size;
-
-	attachment = kzalloc(sizeof(*attachment), GFP_KERNEL);
-	if (!attachment)
-		return -ENOMEM;
+	for (i = 0; i <= is_aql; i++) {
+		attachment[i] = kzalloc(sizeof(*attachment[i]), GFP_KERNEL);
+		if (unlikely(!attachment[i])) {
+			ret = -ENOMEM;
+			goto unwind;
+		}
 
-	pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
-			va + bo_size, vm);
+		pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
+			 va + bo_size, vm);
 
-	/* FIXME: For now all attachments use the same BO. This is incorrect
-	 * because one BO can only have one DMA mapping for one GPU. We need
-	 * one BO per GPU, e.g. a DMABuf import with dynamic attachment. This
-	 * will be addressed one BO-type at a time in subsequent patches.
-	 */
-	bo = mem->bo;
-	drm_gem_object_get(&bo->tbo.base);
+		/* FIXME: For now all attachments use the same BO. This is
+		 * incorrect because one BO can only have one DMA mapping
+		 * for one GPU. We need one BO per GPU, e.g. a DMABuf
+		 * import with dynamic attachment. This will be addressed
+		 * one BO-type at a time in subsequent patches.
+		 */
+		bo[i] = mem->bo;
+		drm_gem_object_get(&bo[i]->tbo.base);
 
-	/* Add BO to VM internal data structures*/
-	attachment->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
-	if (!attachment->bo_va) {
-		ret = -EINVAL;
-		pr_err("Failed to add BO object to VM. ret == %d\n",
-				ret);
-		goto err_vmadd;
-	}
+		/* Add BO to VM internal data structures */
+		attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]);
+		if (unlikely(!attachment[i]->bo_va)) {
+			ret = -ENOMEM;
+			pr_err("Failed to add BO object to VM. ret == %d\n",
+			       ret);
+			goto unwind;
+		}
 
-	attachment->va = va;
-	attachment->pte_flags = get_pte_flags(adev, mem);
-	attachment->adev = adev;
-	list_add(&attachment->list, &mem->attachments);
+		attachment[i]->va = va;
+		attachment[i]->pte_flags = get_pte_flags(adev, mem);
+		attachment[i]->adev = adev;
+		list_add(&attachment[i]->list, &mem->attachments);
 
-	if (p_attachment)
-		*p_attachment = attachment;
+		va += bo_size;
+	}
 
 	/* Allocate validate page tables if needed */
 	ret = vm_validate_pt_pd_bos(vm);
 	if (unlikely(ret)) {
 		pr_err("validate_pt_pd_bos() failed\n");
-		goto err_alloc_pts;
+		goto unwind;
 	}
 
 	return 0;
 
-err_alloc_pts:
-	amdgpu_vm_bo_rmv(adev, attachment->bo_va);
-	list_del(&attachment->list);
-err_vmadd:
-	drm_gem_object_put(&bo->tbo.base);
-	kfree(attachment);
+unwind:
+	for (; i >= 0; i--) {
+		if (!attachment[i])
+			continue;
+		if (attachment[i]->bo_va) {
+			amdgpu_vm_bo_rmv(adev, attachment[i]->bo_va);
+			list_del(&attachment[i]->list);
+		}
+		if (bo[i])
+			drm_gem_object_put(&bo[i]->tbo.base);
+		kfree(attachment[i]);
+	}
 	return ret;
 }
 
@@ -1382,8 +1388,6 @@  int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	uint32_t domain;
 	struct kfd_mem_attachment *entry;
 	struct bo_vm_reservation_context ctx;
-	struct kfd_mem_attachment *attachment = NULL;
-	struct kfd_mem_attachment *attachment_aql = NULL;
 	unsigned long bo_size;
 	bool is_invalid_userptr = false;
 
@@ -1433,15 +1437,9 @@  int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 		is_invalid_userptr = true;
 
 	if (!kfd_mem_is_attached(avm, mem)) {
-		ret = kfd_mem_attach(adev, mem, avm, false, &attachment);
+		ret = kfd_mem_attach(adev, mem, avm, mem->aql_queue);
 		if (ret)
 			goto attach_failed;
-		if (mem->aql_queue) {
-			ret = kfd_mem_attach(adev, mem, avm, true,
-					     &attachment_aql);
-			if (ret)
-				goto attach_failed_aql;
-		}
 	} else {
 		ret = vm_validate_pt_pd_bos(avm);
 		if (unlikely(ret))
@@ -1496,11 +1494,6 @@  int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	goto out;
 
 map_bo_to_gpuvm_failed:
-	if (attachment_aql)
-		kfd_mem_detach(attachment_aql);
-attach_failed_aql:
-	if (attachment)
-		kfd_mem_detach(attachment);
 attach_failed:
 	unreserve_bo_and_vms(&ctx, false, false);
 out: