@@ -259,6 +259,35 @@ svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
put_page(page);
}
+static unsigned long
+svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
+{
+ unsigned long addr;
+
+ addr = page_to_pfn(page) << PAGE_SHIFT;
+ return (addr - adev->kfd.dev->pgmap.res.start);
+}
+
+static struct page *
+svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
+{
+ struct page *page;
+
+ page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+ if (page)
+ lock_page(page);
+
+ return page;
+}
+
+void svm_migrate_put_sys_page(unsigned long addr)
+{
+ struct page *page;
+
+ page = pfn_to_page(addr >> PAGE_SHIFT);
+ unlock_page(page);
+ put_page(page);
+}
static int
svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
@@ -471,13 +500,208 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc)
static void svm_migrate_page_free(struct page *page)
{
+ /* Keep this function to avoid warning */
+}
+
+static int
+svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
+ struct migrate_vma *migrate,
+ struct dma_fence **mfence)
+{
+ uint64_t npages = migrate->cpages;
+ uint64_t *src, *dst;
+ struct page *dpage;
+ uint64_t i = 0, j;
+ uint64_t addr;
+ int r = 0;
+
+ pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
+ prange->it_node.start, prange->it_node.last);
+
+ addr = prange->it_node.start << PAGE_SHIFT;
+
+ src = kvmalloc_array(npages << 1, sizeof(*src), GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+
+ dst = src + npages;
+
+ prange->pages_addr = kvmalloc_array(npages, sizeof(*prange->pages_addr),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!prange->pages_addr) {
+ r = -ENOMEM;
+ goto out_oom;
+ }
+
+ for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) {
+ struct page *spage;
+
+ spage = migrate_pfn_to_page(migrate->src[i]);
+ if (!spage) {
+ pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n",
+ prange->svms, prange->it_node.start,
+ prange->it_node.last);
+ r = -ENOMEM;
+ goto out_oom;
+ }
+ src[i] = svm_migrate_addr(adev, spage);
+ if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
+ r = svm_migrate_copy_memory_gart(adev, src + i - j,
+ dst + i - j, j,
+ FROM_VRAM_TO_RAM,
+ mfence);
+ if (r)
+ goto out_oom;
+ j = 0;
+ }
+
+ dpage = svm_migrate_get_sys_page(migrate->vma, addr);
+ if (!dpage) {
+ pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
+ prange->svms, prange->it_node.start,
+ prange->it_node.last);
+ r = -ENOMEM;
+ goto out_oom;
+ }
+
+ dst[i] = page_to_pfn(dpage) << PAGE_SHIFT;
+ *(prange->pages_addr + i) = dst[i];
+
+ migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
+ migrate->dst[i] |= MIGRATE_PFN_LOCKED;
+
+ }
+
+ r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j,
+ FROM_VRAM_TO_RAM, mfence);
+
+out_oom:
+ kvfree(src);
+ if (r) {
+ pr_debug("failed %d copy to ram\n", r);
+ while (i--) {
+ svm_migrate_put_sys_page(dst[i]);
+ migrate->dst[i] = 0;
+ }
+ }
+
+ return r;
+}
+
+static int
+svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
+ struct vm_area_struct *vma, uint64_t start, uint64_t end)
+{
+ uint64_t npages = (end - start) >> PAGE_SHIFT;
+ struct dma_fence *mfence = NULL;
+ struct migrate_vma migrate;
+ int r = -ENOMEM;
+
+ memset(&migrate, 0, sizeof(migrate));
+ migrate.vma = vma;
+ migrate.start = start;
+ migrate.end = end;
+ migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+ migrate.pgmap_owner = adev;
+
+ migrate.src = kvmalloc_array(npages << 1, sizeof(*migrate.src),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!migrate.src)
+ goto out;
+ migrate.dst = migrate.src + npages;
+
+ r = migrate_vma_setup(&migrate);
+ if (r) {
+ pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
+ r, prange->svms, prange->it_node.start,
+ prange->it_node.last);
+ goto out_free;
+ }
+
+ pr_debug("cpages %ld\n", migrate.cpages);
+
+ if (migrate.cpages) {
+ svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence);
+ migrate_vma_pages(&migrate);
+ svm_migrate_copy_done(adev, mfence);
+ migrate_vma_finalize(&migrate);
+ } else {
+ pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
+ prange->it_node.start, prange->it_node.last);
+ }
+
+out_free:
+ kvfree(migrate.src);
+out:
+ return r;
+}
+
+/**
+ * svm_migrate_vram_to_ram - migrate svm range from device to system
+ * @prange: range structure
+ * @mm: process mm, use current->mm if NULL
+ *
+ * Context: Process context, caller hold mm->mmap_sem and prange->lock and take
+ * svms srcu read lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
+{
+ struct amdgpu_device *adev;
+ struct vm_area_struct *vma;
+ unsigned long addr;
+ unsigned long start;
+ unsigned long end;
+ int r = 0;
+
+ if (!prange->actual_loc) {
+ pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
+ prange->it_node.start, prange->it_node.last);
+ return 0;
+ }
+
+ adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
+ if (!adev) {
+ pr_debug("failed to get device by id 0x%x\n",
+ prange->actual_loc);
+ return -ENODEV;
+ }
+
+ pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
+ prange->it_node.start, prange->it_node.last);
+
+ start = prange->it_node.start << PAGE_SHIFT;
+ end = (prange->it_node.last + 1) << PAGE_SHIFT;
+
+ for (addr = start; addr < end;) {
+ unsigned long next;
+
+ vma = find_vma(mm, addr);
+ if (!vma || addr < vma->vm_start)
+ break;
+
+ next = min(vma->vm_end, end);
+ r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
+ if (r) {
+ pr_debug("failed %d to migrate\n", r);
+ break;
+ }
+ addr = next;
+ }
+
+ svm_range_vram_node_free(prange);
+ prange->actual_loc = 0;
+
+ return r;
}
/**
* svm_migrate_to_ram - CPU page fault handler
* @vmf: CPU vm fault vma, address
*
- * Context: vm fault handler, mm->mmap_sem is taken
+ * Context: vm fault handler, caller holds the mmap lock
*
* Return:
* 0 - OK
@@ -485,7 +709,53 @@ static void svm_migrate_page_free(struct page *page)
*/
static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
{
- return VM_FAULT_SIGBUS;
+ unsigned long addr = vmf->address;
+ struct vm_area_struct *vma;
+ struct svm_range *prange;
+ struct list_head list;
+ struct kfd_process *p;
+ int r = VM_FAULT_SIGBUS;
+ int srcu_idx;
+
+ vma = vmf->vma;
+
+ p = kfd_lookup_process_by_mm(vma->vm_mm);
+ if (!p) {
+ pr_debug("failed find process at fault address 0x%lx\n", addr);
+ return VM_FAULT_SIGBUS;
+ }
+
+ /* To prevent prange is removed */
+ srcu_idx = srcu_read_lock(&p->svms.srcu);
+
+ addr >>= PAGE_SHIFT;
+ pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr);
+
+ r = svm_range_split_by_granularity(p, addr, &list);
+ if (r) {
+ pr_debug("failed %d to split range by granularity\n", r);
+ goto out_srcu;
+ }
+
+ list_for_each_entry(prange, &list, update_list) {
+ mutex_lock(&prange->mutex);
+ r = svm_migrate_vram_to_ram(prange, vma->vm_mm);
+ mutex_unlock(&prange->mutex);
+ if (r) {
+ pr_debug("failed %d migrate [0x%lx 0x%lx] to ram\n", r,
+ prange->it_node.start, prange->it_node.last);
+ goto out_srcu;
+ }
+ }
+
+out_srcu:
+ srcu_read_unlock(&p->svms.srcu, srcu_idx);
+ kfd_unref_process(p);
+
+ if (r)
+ return VM_FAULT_SIGBUS;
+
+ return 0;
}
static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
@@ -38,6 +38,9 @@ enum MIGRATION_COPY_DIR {
};
int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc);
+int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
+unsigned long
+svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
#if defined(CONFIG_DEVICE_PRIVATE)
int svm_migrate_init(struct amdgpu_device *adev);
@@ -819,6 +819,92 @@ svm_range_split_add_front(struct svm_range *prange, struct svm_range *new,
return 0;
}
+/**
+ * svm_range_split_by_granularity - collect ranges within granularity boundary
+ *
+ * @p: the process with svms list
+ * @addr: the vm fault address in pages, to search ranges
+ * @list: output, the range list
+ *
+ * Collects small ranges that make up one migration granule and splits the first
+ * and the last range at the granularity boundary
+ *
+ * Context: hold and release svms lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+int svm_range_split_by_granularity(struct kfd_process *p, unsigned long addr,
+ struct list_head *list)
+{
+ struct svm_range *prange;
+ struct svm_range *tail;
+ struct svm_range *new;
+ unsigned long start;
+ unsigned long last;
+ unsigned long size;
+ int r = 0;
+
+ svms_lock(&p->svms);
+
+ prange = svm_range_from_addr(&p->svms, addr);
+ if (!prange) {
+ pr_debug("cannot find svm range at 0x%lx\n", addr);
+ svms_unlock(&p->svms);
+ return -EFAULT;
+ }
+
+ /* Align splited range start and size to granularity size, then a single
+ * PTE will be used for whole range, this reduces the number of PTE
+ * updated and the L1 TLB space used for translation.
+ */
+ size = 1ULL << prange->granularity;
+ start = ALIGN_DOWN(addr, size);
+ last = ALIGN(addr + 1, size) - 1;
+ INIT_LIST_HEAD(list);
+
+ pr_debug("svms 0x%p split [0x%lx 0x%lx] at 0x%lx granularity 0x%lx\n",
+ prange->svms, start, last, addr, size);
+
+ if (start > prange->it_node.start) {
+ r = svm_range_split(prange, prange->it_node.start, start - 1,
+ &new);
+ if (r)
+ goto out_unlock;
+
+ svm_range_add_to_svms(new);
+ } else {
+ new = prange;
+ }
+
+ while (size > new->npages) {
+ struct interval_tree_node *next;
+
+ list_add(&new->update_list, list);
+
+ next = interval_tree_iter_next(&new->it_node, start, last);
+ if (!next)
+ goto out_unlock;
+
+ size -= new->npages;
+ new = container_of(next, struct svm_range, it_node);
+ }
+
+ if (last < new->it_node.last) {
+ r = svm_range_split(new, new->it_node.start, last, &tail);
+ if (r)
+ goto out_unlock;
+ svm_range_add_to_svms(tail);
+ }
+
+ list_add(&new->update_list, list);
+
+out_unlock:
+ svms_unlock(&p->svms);
+
+ return r;
+}
+
static uint64_t
svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange)
{
@@ -1508,6 +1594,27 @@ static const struct mmu_interval_notifier_ops svm_range_mn_ops = {
.invalidate = svm_range_cpu_invalidate_pagetables,
};
+/**
+ * svm_range_from_addr - find svm range from fault address
+ * @svms: svm range list header
+ * @addr: address to search range interval tree, in pages
+ *
+ * Context: The caller must hold svms_lock
+ *
+ * Return: the svm_range found or NULL
+ */
+struct svm_range *
+svm_range_from_addr(struct svm_range_list *svms, unsigned long addr)
+{
+ struct interval_tree_node *node;
+
+ node = interval_tree_iter_first(&svms->objects, addr, addr);
+ if (!node)
+ return NULL;
+
+ return container_of(node, struct svm_range, it_node);
+}
+
void svm_range_list_fini(struct kfd_process *p)
{
pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, &p->svms);
@@ -1754,11 +1861,14 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
pr_debug("migrate from ram to vram\n");
r = svm_migrate_ram_to_vram(prange, best_loc);
-
- if (!r)
- *migrated = true;
+ } else {
+ pr_debug("migrate from vram to ram\n");
+ r = svm_migrate_vram_to_ram(prange, current->mm);
}
+ if (!r)
+ *migrated = true;
+
return r;
}
@@ -112,10 +112,14 @@ void svm_range_list_fini(struct kfd_process *p);
int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
uint64_t size, uint32_t nattrs,
struct kfd_ioctl_svm_attribute *attrs);
+struct svm_range *svm_range_from_addr(struct svm_range_list *svms,
+ unsigned long addr);
struct amdgpu_device *svm_range_get_adev_by_id(struct svm_range *prange,
uint32_t id);
int svm_range_vram_node_new(struct amdgpu_device *adev,
struct svm_range *prange, bool clear);
void svm_range_vram_node_free(struct svm_range *prange);
+int svm_range_split_by_granularity(struct kfd_process *p, unsigned long addr,
+ struct list_head *list);
#endif /* KFD_SVM_H_ */