diff mbox series

[5/6] drm/amdgpu: update mappings not managed by KFD

Message ID 20230112013157.750568-6-Felix.Kuehling@amd.com (mailing list archive)
State New, archived
Headers show
Series Enable KFD to use render node BO mappings | expand

Commit Message

Felix Kuehling Jan. 12, 2023, 1:31 a.m. UTC
When restoring after an eviction, use amdgpu_vm_handle_moved to update
BO VA mappings in KFD VMs that are not managed through the KFD API. This
should allow using the render node API to create more flexible memory
mappings in KFD VMs.

v2: Sync with pd fence after all page table updates
v3: Update comments, remove TODOs that are no longer applicable

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28 +++++++++++++++----
 1 file changed, 22 insertions(+), 6 deletions(-)

Comments

Chen, Xiaogang Jan. 13, 2023, 8:02 p.m. UTC | #1
Reviewed-by: Xiaogang Chen <Xiaoganng.Chen@amd.com>

Regards

Xiaogang

On 1/11/2023 7:31 PM, Felix Kuehling wrote:
> When restoring after an eviction, use amdgpu_vm_handle_moved to update
> BO VA mappings in KFD VMs that are not managed through the KFD API. This
> should allow using the render node API to create more flexible memory
> mappings in KFD VMs.
>
> v2: Sync with pd fence after all page table updates
> v3: Update comments, remove TODOs that are no longer applicable
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 28 +++++++++++++++----
>   1 file changed, 22 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 79213f476493..df08e84f01d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -2728,12 +2728,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
>   	if (ret)
>   		goto validate_map_fail;
>   
> -	ret = process_sync_pds_resv(process_info, &sync_obj);
> -	if (ret) {
> -		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
> -		goto validate_map_fail;
> -	}
> -
>   	/* Validate BOs and map them to GPUVM (update VM page tables). */
>   	list_for_each_entry(mem, &process_info->kfd_bo_list,
>   			    validate_list.head) {
> @@ -2781,6 +2775,19 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
>   	if (failed_size)
>   		pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
>   
> +	/* Update mappings not managed by KFD */
> +	list_for_each_entry(peer_vm, &process_info->vm_list_head,
> +			vm_list_node) {
> +		struct amdgpu_device *adev = amdgpu_ttm_adev(
> +			peer_vm->root.bo->tbo.bdev);
> +
> +		ret = amdgpu_vm_handle_moved(adev, peer_vm, &ctx.ticket);
> +		if (ret) {
> +			pr_debug("Memory eviction: handle moved failed. Try again\n");
> +			goto validate_map_fail;
> +		}
> +	}
> +
>   	/* Update page directories */
>   	ret = process_update_pds(process_info, &sync_obj);
>   	if (ret) {
> @@ -2788,6 +2795,15 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
>   		goto validate_map_fail;
>   	}
>   
> +	/* Sync with fences on all the page tables. They implicitly depend on any
> +	 * move fences from amdgpu_vm_handle_moved above.
> +	 */
> +	ret = process_sync_pds_resv(process_info, &sync_obj);
> +	if (ret) {
> +		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
> +		goto validate_map_fail;
> +	}
> +
>   	/* Wait for validate and PT updates to finish */
>   	amdgpu_sync_wait(&sync_obj, false);
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 79213f476493..df08e84f01d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -2728,12 +2728,6 @@  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	if (ret)
 		goto validate_map_fail;
 
-	ret = process_sync_pds_resv(process_info, &sync_obj);
-	if (ret) {
-		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
-		goto validate_map_fail;
-	}
-
 	/* Validate BOs and map them to GPUVM (update VM page tables). */
 	list_for_each_entry(mem, &process_info->kfd_bo_list,
 			    validate_list.head) {
@@ -2781,6 +2775,19 @@  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	if (failed_size)
 		pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
 
+	/* Update mappings not managed by KFD */
+	list_for_each_entry(peer_vm, &process_info->vm_list_head,
+			vm_list_node) {
+		struct amdgpu_device *adev = amdgpu_ttm_adev(
+			peer_vm->root.bo->tbo.bdev);
+
+		ret = amdgpu_vm_handle_moved(adev, peer_vm, &ctx.ticket);
+		if (ret) {
+			pr_debug("Memory eviction: handle moved failed. Try again\n");
+			goto validate_map_fail;
+		}
+	}
+
 	/* Update page directories */
 	ret = process_update_pds(process_info, &sync_obj);
 	if (ret) {
@@ -2788,6 +2795,15 @@  int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 		goto validate_map_fail;
 	}
 
+	/* Sync with fences on all the page tables. They implicitly depend on any
+	 * move fences from amdgpu_vm_handle_moved above.
+	 */
+	ret = process_sync_pds_resv(process_info, &sync_obj);
+	if (ret) {
+		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
+		goto validate_map_fail;
+	}
+
 	/* Wait for validate and PT updates to finish */
 	amdgpu_sync_wait(&sync_obj, false);