@@ -117,6 +117,7 @@ struct amdkfd_process_info {
atomic_t evicted_bos;
struct delayed_work restore_userptr_work;
struct pid *pid;
+ bool block_mmu_notifications;
};
int amdgpu_amdkfd_init(void);
@@ -249,7 +250,9 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
- uint64_t *offset, uint32_t flags);
+ uint64_t *offset, uint32_t flags, bool criu_resume);
+void amdgpu_amdkfd_block_mmu_notifications(void *p);
+int amdgpu_amdkfd_criu_resume(void *p);
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
uint64_t *size);
@@ -597,7 +597,8 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
*
* Returns 0 for success, negative errno for errors.
*/
-static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
+ bool criu_resume)
{
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
@@ -619,6 +620,17 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
goto out;
}
+ if (criu_resume) {
+ /*
+ * During a CRIU restore operation, the userptr buffer objects
+ * will be validated in the restore_userptr_work worker at a
+ * later stage when it is scheduled by another ioctl called by
+ * CRIU master process for the target pid for restore.
+ */
+ atomic_inc(&mem->invalid);
+ mutex_unlock(&process_info->lock);
+ return 0;
+ }
ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
@@ -982,6 +994,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
INIT_DELAYED_WORK(&info->restore_userptr_work,
amdgpu_amdkfd_restore_userptr_worker);
+ info->block_mmu_notifications = false;
*process_info = info;
*ef = dma_fence_get(&info->eviction_fence->base);
}
@@ -1139,10 +1152,37 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
return avm->pd_phys_addr;
}
+void amdgpu_amdkfd_block_mmu_notifications(void *p)
+{
+ struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+ pinfo->block_mmu_notifications = true;
+}
+
+int amdgpu_amdkfd_criu_resume(void *p)
+{
+ int ret = 0;
+ struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+ mutex_lock(&pinfo->lock);
+ pr_debug("scheduling work\n");
+ atomic_inc(&pinfo->evicted_bos);
+ if (!pinfo->block_mmu_notifications) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ pinfo->block_mmu_notifications = false;
+ schedule_delayed_work(&pinfo->restore_userptr_work, 0);
+
+out_unlock:
+ mutex_unlock(&pinfo->lock);
+ return ret;
+}
+
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size,
void *drm_priv, struct kgd_mem **mem,
- uint64_t *offset, uint32_t flags)
+ uint64_t *offset, uint32_t flags, bool criu_resume)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
@@ -1247,7 +1287,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
if (user_addr) {
- ret = init_user_pages(*mem, user_addr);
+ pr_debug("creating userptr BO for user_addr = %llu\n", user_addr);
+ ret = init_user_pages(*mem, user_addr, criu_resume);
if (ret)
goto allocate_init_user_pages_failed;
}
@@ -1742,6 +1783,10 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
int evicted_bos;
int r = 0;
+ /* Do not process MMU notifications until stage-4 IOCTL is received */
+ if (process_info->block_mmu_notifications)
+ return 0;
+
atomic_inc(&mem->invalid);
evicted_bos = atomic_inc_return(&process_info->evicted_bos);
if (evicted_bos == 1) {
@@ -1181,6 +1181,26 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
return ttm_pool_free(&adev->mman.bdev.pool, ttm);
}
+/**
+ * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current
+ * task
+ *
+ * @tbo: The ttm_buffer_object that contains the userptr
+ * @user_addr: The returned value
+ */
+int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
+ uint64_t *user_addr)
+{
+ struct amdgpu_ttm_tt *gtt;
+
+ if (!tbo->ttm)
+ return -EINVAL;
+
+ gtt = (void *)tbo->ttm;
+ *user_addr = gtt->userptr;
+ return 0;
+}
+
/**
* amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
* task
@@ -168,6 +168,8 @@ static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
#endif
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages);
+int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
+ uint64_t *user_addr);
int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
uint64_t addr, uint32_t flags);
bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
@@ -1301,7 +1301,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
dev->kgd, args->va_addr, args->size,
pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
- flags);
+ flags, false);
if (err)
goto err_unlock;
@@ -1959,6 +1959,14 @@ static int kfd_ioctl_criu_dumper(struct file *filep,
bo_bucket[i].gpu_id = pdd->dev->id;
bo_bucket[i].bo_alloc_flags = (uint32_t)kgd_mem->alloc_flags;
bo_bucket[i].idr_handle = id;
+ if (bo_bucket[i].bo_alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,
+ &bo_bucket[i].user_addr);
+ if (ret) {
+ pr_err("Failed to obtain user address for user-pointer bo\n");
+ goto err_unlock;
+ }
+ }
if (bo_bucket[i].bo_alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
bo_bucket[i].bo_offset = KFD_MMAP_TYPE_DOORBELL |
@@ -2004,6 +2012,7 @@ static int kfd_ioctl_criu_restorer(struct file *filep,
struct kfd_criu_devinfo_bucket *devinfos = NULL;
uint64_t *restored_bo_offsets_arr = NULL;
struct kfd_criu_bo_buckets *bo_bucket = NULL;
+ const bool criu_resume = true;
long err = 0;
int ret, i, j = 0;
@@ -2052,6 +2061,9 @@ static int kfd_ioctl_criu_restorer(struct file *filep,
goto err_unlock;
}
+ /* Prevent MMU notifications until stage-4 IOCTL is received */
+ amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);
+
/* Create and map new BOs */
for (i = 0; i < args->num_of_bos; i++) {
struct kfd_dev *dev;
@@ -2108,8 +2120,12 @@ static int kfd_ioctl_criu_restorer(struct file *filep,
err = -ENOMEM;
goto err_unlock;
}
+ } else if (bo_bucket[i].bo_alloc_flags &
+ KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ offset = bo_bucket[i].user_addr;
}
+
/* Create the BO */
ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(dev->kgd,
bo_bucket[i].bo_addr,
@@ -2117,7 +2133,8 @@ static int kfd_ioctl_criu_restorer(struct file *filep,
pdd->drm_priv,
(struct kgd_mem **) &mem,
&offset,
- bo_bucket[i].bo_alloc_flags);
+ bo_bucket[i].bo_alloc_flags,
+ criu_resume);
if (ret) {
pr_err("Could not create the BO\n");
err = -ENOMEM;
@@ -2230,6 +2247,40 @@ static int kfd_ioctl_criu_restorer(struct file *filep,
return err;
}
+static int kfd_ioctl_criu_resume(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_criu_resume_args *args = data;
+ struct kfd_process *target = NULL;
+ struct pid *pid = NULL;
+ int ret = 0;
+
+ pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,
+ args->pid);
+
+ pid = find_get_pid(args->pid);
+ if (!pid) {
+ pr_err("Cannot find pid info for %i\n", args->pid);
+ return -ESRCH;
+ }
+
+ pr_debug("calling kfd_lookup_process_by_pid\n");
+ target = kfd_lookup_process_by_pid(pid);
+ if (!target) {
+ pr_debug("Cannot find process info for %i\n", args->pid);
+ put_pid(pid);
+ return -ESRCH;
+ }
+
+ mutex_lock(&target->mutex);
+ ret = amdgpu_amdkfd_criu_resume(target->kgd_process_info);
+ mutex_unlock(&target->mutex);
+
+ put_pid(pid);
+ kfd_unref_process(target);
+ return ret;
+}
+
static int kfd_ioctl_criu_helper(struct file *filep,
struct kfd_process *p, void *data)
{
@@ -2276,14 +2327,6 @@ static int kfd_ioctl_criu_helper(struct file *filep,
return ret;
}
-static int kfd_ioctl_criu_resume(struct file *filep,
- struct kfd_process *p, void *data)
-{
- pr_info("Inside %s\n",__func__);
-
- return 0;
-}
-
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@@ -937,6 +937,7 @@ void *kfd_process_device_translate_handle(struct kfd_process_device *p,
int handle);
void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
int handle);
+struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
bool kfd_has_process_device_data(struct kfd_process *p);
@@ -65,7 +65,8 @@ static struct workqueue_struct *kfd_process_wq;
*/
static struct workqueue_struct *kfd_restore_wq;
-static struct kfd_process *find_process(const struct task_struct *thread);
+static struct kfd_process *find_process(const struct task_struct *thread,
+ bool ref);
static void kfd_process_ref_release(struct kref *ref);
static struct kfd_process *create_process(const struct task_struct *thread);
static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
@@ -670,7 +671,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
int err;
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
- pdd->drm_priv, &mem, NULL, flags);
+ pdd->drm_priv, &mem, NULL, flags,
+ false);
if (err)
goto err_alloc_mem;
@@ -773,7 +775,7 @@ struct kfd_process *kfd_create_process(struct file *filep)
mutex_lock(&kfd_processes_mutex);
/* A prior open of /dev/kfd could have already created the process. */
- process = find_process(thread);
+ process = find_process(thread, false);
if (process) {
pr_debug("Process already found\n");
} else {
@@ -852,7 +854,7 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread)
if (thread->group_leader->mm != thread->mm)
return ERR_PTR(-EINVAL);
- process = find_process(thread);
+ process = find_process(thread, false);
if (!process)
return ERR_PTR(-EINVAL);
@@ -871,13 +873,16 @@ static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
return NULL;
}
-static struct kfd_process *find_process(const struct task_struct *thread)
+static struct kfd_process *find_process(const struct task_struct *thread,
+ bool ref)
{
struct kfd_process *p;
int idx;
idx = srcu_read_lock(&kfd_processes_srcu);
p = find_process_by_mm(thread->mm);
+ if (p && ref)
+ kref_get(&p->ref);
srcu_read_unlock(&kfd_processes_srcu, idx);
return p;
@@ -1578,6 +1583,27 @@ void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
idr_remove(&pdd->alloc_idr, handle);
}
+/* This increments the process->ref counter. */
+struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid)
+{
+ struct task_struct *task = NULL;
+ struct kfd_process *p = NULL;
+
+ if (!pid) {
+ task = current;
+ get_task_struct(task);
+ } else {
+ task = get_pid_task(pid, PIDTYPE_PID);
+ }
+
+ if (task) {
+ p = find_process(task, true);
+ put_task_struct(task);
+ }
+
+ return p;
+}
+
/* This increments the process->ref counter. */
struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid)
{