@@ -710,35 +710,6 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
return false;
}
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
- uint16_t vmid)
-{
- if (adev->family == AMDGPU_FAMILY_AI) {
- int i;
-
- for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
- } else {
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0);
- }
-
- return 0;
-}
-
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid,
- enum TLB_FLUSH_TYPE flush_type,
- uint32_t inst)
-{
- bool all_hub = false;
-
- if (adev->family == AMDGPU_FAMILY_AI ||
- adev->family == AMDGPU_FAMILY_RV)
- all_hub = true;
-
- return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst);
-}
-
bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
{
return adev->have_atomics_support;
@@ -163,11 +163,6 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
uint32_t *ib_cmd, uint32_t ib_len);
void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle);
bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
-int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
- uint16_t vmid);
-int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
- uint16_t pasid, enum TLB_FLUSH_TYPE flush_type,
- uint32_t inst);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
@@ -1349,6 +1349,50 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
return 0;
}
+/**
+ * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ * @flush_type: flush type
+ *
+ * Flush TLB if needed for a compute VM.
+ *
+ * Returns:
+ * 0 for success.
+ */
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint32_t flush_type,
+ uint32_t xcc_mask)
+{
+ uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
+ bool all_hub = false;
+ int xcc = 0, r = 0;
+
+ WARN_ON_ONCE(!vm->is_compute_context);
+
+ /*
+ * It can be that we race and lose here, but that is extremely unlikely
+ * and the worst thing which could happen is that we flush the changes
+ * into the TLB once more which is harmless.
+ */
+ if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
+ return 0;
+
+ if (adev->family == AMDGPU_FAMILY_AI ||
+ adev->family == AMDGPU_FAMILY_RV)
+ all_hub = true;
+
+ for_each_inst(xcc, xcc_mask) {
+ r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type,
+ all_hub, xcc);
+ if (r)
+ break;
+ }
+ return r;
+}
+
/**
* amdgpu_vm_bo_add - add a bo to a specific vm
*
@@ -316,6 +316,7 @@ struct amdgpu_vm {
/* Last finished delayed update */
atomic64_t tlb_seq;
struct dma_fence *last_tlb_flush;
+ atomic64_t kfd_last_flushed_seq;
/* Last unlocked submission to the scheduler entities */
struct dma_fence *last_unlocked;
@@ -434,6 +435,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct ww_acquire_ctx *ticket);
+int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ uint32_t flush_type,
+ uint32_t xcc_mask);
void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
struct amdgpu_vm *vm, struct amdgpu_bo *bo);
int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
@@ -748,7 +748,6 @@ struct kfd_process_device {
/* VM context for GPUVM allocations */
struct file *drm_file;
void *drm_priv;
- atomic64_t tlb_seq;
/* GPUVM allocations storage */
struct idr alloc_idr;
@@ -1462,7 +1461,14 @@ void kfd_signal_reset_event(struct kfd_node *dev);
void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid);
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
+static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
+ enum TLB_FLUSH_TYPE type)
+{
+ struct amdgpu_device *adev = pdd->dev->adev;
+ struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
+
+ amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask);
+}
static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
{
@@ -1662,7 +1662,6 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
return ret;
}
pdd->drm_priv = drm_file->private_data;
- atomic64_set(&pdd->tlb_seq, 0);
ret = kfd_process_device_reserve_ib_mem(pdd);
if (ret)
@@ -2054,36 +2053,6 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
}
-void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
-{
- struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
- uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
- struct kfd_node *dev = pdd->dev;
- uint32_t xcc_mask = dev->xcc_mask;
- int xcc = 0;
-
- /*
- * It can be that we race and lose here, but that is extremely unlikely
- * and the worst thing which could happen is that we flush the changes
- * into the TLB once more which is harmless.
- */
- if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq)
- return;
-
- if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
- /* Nothing to flush until a VMID is assigned, which
- * only happens when the first queue is created.
- */
- if (pdd->qpd.vmid)
- amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
- pdd->qpd.vmid);
- } else {
- for_each_inst(xcc, xcc_mask)
- amdgpu_amdkfd_flush_gpu_tlb_pasid(
- dev->adev, pdd->process->pasid, type, xcc);
- }
-}
-
/* assumes caller holds process lock. */
int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
{