@@ -455,18 +455,19 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
* svm_migrate_ram_to_vram - migrate svm range from system to device
* @prange: range structure
* @best_loc: the device to migrate to
+ * @mm: the process mm structure
*
* Context: Process context, caller hold mmap read lock, svms lock, prange lock
*
* Return:
* 0 - OK, otherwise error code
*/
-int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc)
+int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+ struct mm_struct *mm)
{
unsigned long addr, start, end;
struct vm_area_struct *vma;
struct amdgpu_device *adev;
- struct mm_struct *mm;
int r = 0;
if (prange->actual_loc == best_loc) {
@@ -487,8 +488,6 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc)
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
- mm = current->mm;
-
for (addr = start; addr < end;) {
unsigned long next;
@@ -37,7 +37,8 @@ enum MIGRATION_COPY_DIR {
FROM_VRAM_TO_RAM
};
-int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc);
+int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+ struct mm_struct *mm);
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
@@ -867,6 +867,9 @@ int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
int kfd_process_device_from_gpuidx(struct kfd_process *p,
uint32_t gpu_idx, struct kfd_dev **gpu);
+int kfd_process_gpuid_from_kgd(struct kfd_process *p,
+ struct amdgpu_device *adev, uint32_t *gpuid,
+ uint32_t *gpuidx);
void kfd_unref_process(struct kfd_process *p);
int kfd_process_evict_queues(struct kfd_process *p);
int kfd_process_restore_queues(struct kfd_process *p);
@@ -1684,6 +1684,22 @@ int kfd_process_device_from_gpuidx(struct kfd_process *p,
return -EINVAL;
}
+int
+kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
+ uint32_t *gpuid, uint32_t *gpuidx)
+{
+ struct kgd_dev *kgd = (struct kgd_dev *)adev;
+ int i;
+
+ for (i = 0; i < p->n_pdds; i++)
+ if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
+ *gpuid = p->pdds[i]->dev->id;
+ *gpuidx = i;
+ return 0;
+ }
+ return -EINVAL;
+}
+
static void evict_process_worker(struct work_struct *work)
{
int ret;
@@ -179,8 +179,11 @@ static void svm_range_free_dma_mappings(struct svm_range *prange)
int r;
p = container_of(prange->svms, struct kfd_process, svms);
- bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
- MAX_GPU_INSTANCE);
+ if (p->xnack_enabled)
+ bitmap_copy(bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
+ else
+ bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
+ MAX_GPU_INSTANCE);
for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
dma_addr = prange->dma_addr[gpuidx];
@@ -1262,7 +1265,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return r;
}
-static int svm_range_map_to_gpus(struct svm_range *prange, bool reserve_vm)
+int svm_range_map_to_gpus(struct svm_range *prange, bool reserve_vm)
{
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
struct kfd_process_device *pdd;
@@ -1279,9 +1282,29 @@ static int svm_range_map_to_gpus(struct svm_range *prange, bool reserve_vm)
else
bo_adev = NULL;
- bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
- MAX_GPU_INSTANCE);
p = container_of(prange->svms, struct kfd_process, svms);
+ if (p->xnack_enabled) {
+ bitmap_copy(bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
+
+ /* If prefetch range to GPU, or GPU retry fault migrate range to
+ * GPU, which has ACCESS attribute to the range, create mapping
+ * on that GPU.
+ */
+ if (prange->actual_loc) {
+ gpuidx = kfd_process_gpuidx_from_gpuid(p,
+ prange->actual_loc);
+ if (gpuidx < 0) {
+ WARN_ONCE(1, "failed get device by id 0x%x\n",
+ prange->actual_loc);
+ return -EINVAL;
+ }
+ if (test_bit(gpuidx, prange->bitmap_access))
+ bitmap_set(bitmap, gpuidx, 1);
+ }
+ } else {
+ bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
+ MAX_GPU_INSTANCE);
+ }
for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
r = kfd_process_device_from_gpuidx(p, gpuidx, &dev);
@@ -1995,15 +2018,76 @@ svm_range_from_addr(struct svm_range_list *svms, unsigned long addr,
return NULL;
}
+/* svm_range_best_restore_location - decide the best fault restore location
+ * @prange: svm range structure
+ * @adev: the GPU on which vm fault happened
+ *
+ * This is only called when xnack is on, to decide the best location to restore
+ * the range mapping after GPU vm fault. Caller uses the best location to do
+ * migration if actual loc is not best location, then update GPU page table
+ * mapping to the best location.
+ *
+ * If vm fault gpu is range preferred loc, the best_loc is preferred loc.
+ * If vm fault gpu idx is on range ACCESSIBLE bitmap, best_loc is vm fault gpu
+ * If vm fault gpu idx is on range ACCESSIBLE_IN_PLACE bitmap, then
+ * if range actual loc is cpu, best_loc is cpu
+ * if vm fault gpu is on xgmi same hive of range actual loc gpu, best_loc is
+ * range actual loc.
+ * Otherwise, GPU no access, best_loc is -1.
+ *
+ * Return:
+ * -1 means vm fault GPU no access
+ * 0 for CPU or GPU id
+ */
+static int32_t
+svm_range_best_restore_location(struct svm_range *prange,
+ struct amdgpu_device *adev)
+{
+ struct amdgpu_device *bo_adev;
+ struct kfd_process *p;
+ int32_t gpuidx;
+ uint32_t gpuid;
+ int r;
+
+ p = container_of(prange->svms, struct kfd_process, svms);
+
+ r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx);
+ if (r < 0) {
+ pr_debug("failed to get gpuid from kgd\n");
+ return -1;
+ }
+
+ if (prange->preferred_loc == gpuid)
+ return prange->preferred_loc;
+
+ if (test_bit(gpuidx, prange->bitmap_access))
+ return gpuid;
+
+ if (test_bit(gpuidx, prange->bitmap_aip)) {
+ if (!prange->actual_loc)
+ return 0;
+
+ bo_adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
+ if (amdgpu_xgmi_same_hive(adev, bo_adev))
+ return prange->actual_loc;
+ else
+ return 0;
+ }
+
+ return -1;
+}
+
int
svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
uint64_t addr)
{
- int r = 0;
+ struct amdgpu_device *bo_adev;
struct mm_struct *mm = NULL;
- struct svm_range *prange;
struct svm_range_list *svms;
+ struct svm_range *prange;
struct kfd_process *p;
+ int32_t best_loc;
+ int r = 0;
p = kfd_lookup_process_by_pasid(pasid);
if (!p) {
@@ -2036,21 +2120,50 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
mutex_lock(&prange->migrate_mutex);
+ best_loc = svm_range_best_restore_location(prange, adev);
+ if (best_loc == -1) {
+ pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n",
+ svms, prange->start, prange->last);
+ r = -EACCES;
+ goto out_unlock_range;
+ }
+
+ pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n",
+ svms, prange->start, prange->last, best_loc,
+ prange->actual_loc);
+
+ if (prange->actual_loc != best_loc) {
+ if (best_loc)
+ r = svm_migrate_ram_to_vram(prange, best_loc, mm);
+ else
+ r = svm_migrate_vram_to_ram(prange, mm);
+ if (r) {
+ pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
+ r, svms, prange->start, prange->last);
+ goto out_unlock_range;
+ }
+ }
+
r = svm_range_validate(mm, prange);
if (r) {
- pr_debug("failed %d to validate svms 0x%p [0x%lx 0x%lx]\n", r,
+ pr_debug("failed %d to validate svms %p [0x%lx 0x%lx]\n", r,
svms, prange->start, prange->last);
-
goto out_unlock_range;
}
- pr_debug("restoring svms 0x%p [0x%lx %lx] mapping\n",
- svms, prange->start, prange->last);
+ if (prange->svm_bo && prange->ttm_res)
+ bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+ else
+ bo_adev = NULL;
+
+ pr_debug("restoring svms 0x%p [0x%lx %lx] mapping, bo_adev is %s\n",
+ svms, prange->start, prange->last,
+ bo_adev ? "not NULL" : "NULL");
r = svm_range_map_to_gpus(prange, true);
if (r)
- pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpu\n", r,
- svms, prange->start, prange->last);
+ pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n",
+ r, svms, prange->start, prange->last);
out_unlock_range:
mutex_unlock(&prange->migrate_mutex);
@@ -2184,7 +2297,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
return 0;
}
-/* svm_range_best_location - decide the best actual location
+/* svm_range_best_prefetch_location - decide the best prefetch location
* @prange: svm range structure
*
* For xnack off:
@@ -2206,7 +2319,8 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
* Return:
* 0 for CPU or GPU id
*/
-static uint32_t svm_range_best_location(struct svm_range *prange)
+static uint32_t
+svm_range_best_prefetch_location(struct svm_range *prange)
{
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
uint32_t best_loc = prange->prefetch_loc;
@@ -2283,7 +2397,7 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
int r = 0;
*migrated = false;
- best_loc = svm_range_best_location(prange);
+ best_loc = svm_range_best_prefetch_location(prange);
if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
best_loc == prange->actual_loc)
@@ -2304,10 +2418,10 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
}
pr_debug("migrate from ram to vram\n");
- r = svm_migrate_ram_to_vram(prange, best_loc);
+ r = svm_migrate_ram_to_vram(prange, best_loc, mm);
} else {
pr_debug("migrate from vram to ram\n");
- r = svm_migrate_vram_to_ram(prange, current->mm);
+ r = svm_migrate_vram_to_ram(prange, mm);
}
if (!r)
@@ -166,6 +166,7 @@ int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
int svm_range_restore_pages(struct amdgpu_device *adev,
unsigned int pasid, uint64_t addr);
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
+int svm_range_map_to_gpus(struct svm_range *prange, bool reserve_vm);
void svm_range_add_list_work(struct svm_range_list *svms,
struct svm_range *prange, struct mm_struct *mm,
enum svm_work_list_ops op);