diff mbox series

[08/10] accel/habanalabs: return tlb inv error code upon failure

Message ID 20230316113640.499267-8-ogabbay@kernel.org (mailing list archive)
State New, archived
Headers show
Series [01/10] accel/habanalabs: align to latest firmware specs | expand

Commit Message

Oded Gabbay March 16, 2023, 11:36 a.m. UTC
From: Koby Elbaz <kelbaz@habana.ai>

Now that CQ-completion based jobs do not trigger a reset upon failure,
failure of such jobs (e.g., MMU cache invalidation) should be handled
by the caller itself depending on the error code returned to it.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/accel/habanalabs/gaudi/gaudi.c   | 24 +++++++++------
 drivers/accel/habanalabs/gaudi2/gaudi2.c | 37 ++++++++++++++++++------
 2 files changed, 43 insertions(+), 18 deletions(-)
diff mbox series

Patch

diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c
index a9d84675b407..08a4b1cf2b42 100644
--- a/drivers/accel/habanalabs/gaudi/gaudi.c
+++ b/drivers/accel/habanalabs/gaudi/gaudi.c
@@ -3725,7 +3725,7 @@  static int gaudi_mmu_init(struct hl_device *hdev)
 		if (rc) {
 			dev_err(hdev->dev,
 				"failed to set hop0 addr for asid %d\n", i);
-			goto err;
+			return rc;
 		}
 	}
 
@@ -3736,7 +3736,9 @@  static int gaudi_mmu_init(struct hl_device *hdev)
 	/* mem cache invalidation */
 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
 
-	hl_mmu_invalidate_cache(hdev, true, 0);
+	rc = hl_mmu_invalidate_cache(hdev, true, 0);
+	if (rc)
+		return rc;
 
 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
@@ -3752,9 +3754,6 @@  static int gaudi_mmu_init(struct hl_device *hdev)
 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
 
 	return 0;
-
-err:
-	return rc;
 }
 
 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
@@ -8420,19 +8419,26 @@  static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
 	}
 
 	mutex_lock(&hdev->mmu_lock);
+
 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
 			hdev->internal_cb_pool_dma_addr,
 			HOST_SPACE_INTERNAL_CB_SZ);
-
-	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
-	mutex_unlock(&hdev->mmu_lock);
-
 	if (rc)
 		goto unreserve_internal_cb_pool;
 
+	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
+	if (rc)
+		goto unmap_internal_cb_pool;
+
+	mutex_unlock(&hdev->mmu_lock);
+
 	return 0;
 
+unmap_internal_cb_pool:
+	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
+			HOST_SPACE_INTERNAL_CB_SZ);
 unreserve_internal_cb_pool:
+	mutex_unlock(&hdev->mmu_lock);
 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
 			HOST_SPACE_INTERNAL_CB_SZ);
 destroy_internal_cb_pool:
diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c
index 94d53cd1b0ff..247a6e2c2e91 100644
--- a/drivers/accel/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c
@@ -10239,16 +10239,23 @@  static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v
 
 	/* Create mapping on asic side */
 	mutex_lock(&hdev->mmu_lock);
+
 	rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
-	hl_mmu_invalidate_cache_range(hdev, false,
+	if (rc) {
+		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
+		goto unreserve_va;
+	}
+
+	rc = hl_mmu_invalidate_cache_range(hdev, false,
 				      MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
 				      ctx->asid, reserved_va_base, SZ_2M);
-	mutex_unlock(&hdev->mmu_lock);
 	if (rc) {
-		dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
+		hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
 		goto unreserve_va;
 	}
 
+	mutex_unlock(&hdev->mmu_lock);
+
 	/* Enable MMU on KDMA */
 	gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
 
@@ -10277,11 +10284,16 @@  static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, v
 	gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
 
 	mutex_lock(&hdev->mmu_lock);
-	hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
-	hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
+
+	rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
+	if (rc)
+		goto unreserve_va;
+
+	rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
 				      ctx->asid, reserved_va_base, SZ_2M);
-	mutex_unlock(&hdev->mmu_lock);
+
 unreserve_va:
+	mutex_unlock(&hdev->mmu_lock);
 	hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
 free_data_buffer:
 	hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
@@ -10334,17 +10346,24 @@  static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *c
 	}
 
 	mutex_lock(&hdev->mmu_lock);
+
 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
 					HOST_SPACE_INTERNAL_CB_SZ);
-	hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
-	mutex_unlock(&hdev->mmu_lock);
-
 	if (rc)
 		goto unreserve_internal_cb_pool;
 
+	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
+	if (rc)
+		goto unmap_internal_cb_pool;
+
+	mutex_unlock(&hdev->mmu_lock);
+
 	return 0;
 
+unmap_internal_cb_pool:
+	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
 unreserve_internal_cb_pool:
+	mutex_unlock(&hdev->mmu_lock);
 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
 destroy_internal_cb_pool:
 	gen_pool_destroy(hdev->internal_cb_pool);