diff mbox series

[23/35] drm/amdkfd: invalidate tables on page retry fault

Message ID 20210107030127.20393-24-Felix.Kuehling@amd.com (mailing list archive)
State New, archived
Headers show
Series Add HMM-based SVM memory manager to KFD | expand

Commit Message

Felix Kuehling Jan. 7, 2021, 3:01 a.m. UTC
From: Alex Sierra <alex.sierra@amd.com>

GPU page tables are invalidated by unmapping prange directly at
the mmu notifier, when page fault retry is enabled through
amdgpu_noretry global parameter. The restore page table is
performed at the page fault handler.

If xnack is on, we need update GPU mapping after prefetch migration
to avoid GPU vm fault, because range migration unmap the range from
GPUs, there is no restore work scheduled to update GPU mapping.

Signed-off-by: Alex Sierra <alex.sierra@amd.com>
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 37f35f986930..ea27c5ed4ef3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1279,7 +1279,9 @@  svm_range_evict(struct svm_range_list *svms, struct mm_struct *mm,
 	int r = 0;
 	struct interval_tree_node *node;
 	struct svm_range *prange;
+	struct kfd_process *p;
 
+	p = container_of(svms, struct kfd_process, svms);
 	svms_lock(svms);
 
 	pr_debug("invalidate svms 0x%p [0x%lx 0x%lx]\n", svms, start, last);
@@ -1292,8 +1294,13 @@  svm_range_evict(struct svm_range_list *svms, struct mm_struct *mm,
 		next = interval_tree_iter_next(node, start, last);
 
 		invalid = atomic_inc_return(&prange->invalid);
-		evicted_ranges = atomic_inc_return(&svms->evicted_ranges);
-		if (evicted_ranges == 1) {
+
+		if (!p->xnack_enabled) {
+			evicted_ranges =
+				atomic_inc_return(&svms->evicted_ranges);
+			if (evicted_ranges != 1)
+				goto next_node;
+
 			pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
 				 prange->svms, prange->it_node.start,
 				 prange->it_node.last);
@@ -1306,7 +1313,14 @@  svm_range_evict(struct svm_range_list *svms, struct mm_struct *mm,
 			pr_debug("schedule to restore svm %p ranges\n", svms);
 			schedule_delayed_work(&svms->restore_work,
 			   msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+		} else {
+			pr_debug("invalidate svms 0x%p [0x%lx 0x%lx] %d\n",
+				 prange->svms, prange->it_node.start,
+				 prange->it_node.last, invalid);
+			if (invalid == 1)
+				svm_range_unmap_from_gpus(prange);
 		}
+next_node:
 		node = next;
 	}
 
@@ -1944,7 +1958,7 @@  svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
 		if (r)
 			goto out_unlock;
 
-		if (migrated) {
+		if (migrated && !p->xnack_enabled) {
 			pr_debug("restore_work will update mappings of GPUs\n");
 			mutex_unlock(&prange->mutex);
 			continue;