@@ -2078,11 +2078,307 @@ static int criu_checkpoint(struct file *filep,
return ret;
}
+static int criu_restore_process(struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ int ret = 0;
+ struct kfd_criu_process_priv_data process_priv;
+
+ if (*priv_offset + sizeof(process_priv) > max_priv_data_size)
+ return -EINVAL;
+
+ ret = copy_from_user(&process_priv,
+ (void __user *)(args->priv_data + *priv_offset),
+ sizeof(process_priv));
+ if (ret) {
+ pr_err("Failed to copy process private information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_offset += sizeof(process_priv);
+
+ if (process_priv.version != KFD_CRIU_PRIV_VERSION) {
+ pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n",
+ process_priv.version, KFD_CRIU_PRIV_VERSION);
+ return -EINVAL;
+ }
+
+exit:
+ return ret;
+}
+
+static int criu_restore_bos(struct kfd_process *p,
+ struct kfd_ioctl_criu_args *args,
+ uint64_t *priv_offset,
+ uint64_t max_priv_data_size)
+{
+ struct kfd_criu_bo_bucket *bo_buckets;
+ struct kfd_criu_bo_priv_data *bo_privs;
+ bool flush_tlbs = false;
+ int ret = 0, j = 0;
+ uint32_t i;
+
+ if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
+ return -EINVAL;
+
+ bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);
+ if (!bo_buckets)
+ return -ENOMEM;
+
+ ret = copy_from_user(bo_buckets, (void __user *)args->bos,
+ args->num_bos * sizeof(*bo_buckets));
+ if (ret) {
+ pr_err("Failed to copy BOs information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
+ if (!bo_privs) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
+ args->num_bos * sizeof(*bo_privs));
+ if (ret) {
+ pr_err("Failed to copy BOs information from user\n");
+ ret = -EFAULT;
+ goto exit;
+ }
+ *priv_offset += args->num_bos * sizeof(*bo_privs);
+
+ /* Create and map new BOs */
+ for (i = 0; i < args->num_bos; i++) {
+ struct kfd_criu_bo_bucket *bo_bucket;
+ struct kfd_criu_bo_priv_data *bo_priv;
+ struct kfd_dev *dev;
+ struct kfd_process_device *pdd;
+ void *mem;
+ u64 offset;
+ int idr_handle;
+
+ bo_bucket = &bo_buckets[i];
+ bo_priv = &bo_privs[i];
+
+ dev = kfd_device_by_id(bo_bucket->gpu_id);
+ if (!dev) {
+ ret = -EINVAL;
+ pr_err("Failed to get pdd\n");
+ goto exit;
+ }
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd) {
+ ret = -EINVAL;
+ pr_err("Failed to get pdd\n");
+ goto exit;
+ }
+
+ pr_debug("kfd restore ioctl - bo_bucket[%d]:\n", i);
+ pr_debug("size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n"
+ "gpu_id = 0x%x alloc_flags = 0x%x\n"
+ "idr_handle = 0x%x\n",
+ bo_bucket->size,
+ bo_bucket->addr,
+ bo_bucket->offset,
+ bo_bucket->gpu_id,
+ bo_bucket->alloc_flags,
+ bo_priv->idr_handle);
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
+ pr_debug("restore ioctl: KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL\n");
+ if (bo_bucket->size != kfd_doorbell_process_slice(dev)) {
+ ret = -EINVAL;
+ goto exit;
+ }
+ offset = kfd_get_process_doorbells(pdd);
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+ /* MMIO BOs need remapped bus address */
+ pr_debug("restore ioctl :KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP\n");
+ if (bo_bucket->size != PAGE_SIZE) {
+ pr_err("Invalid page size\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+ offset = dev->adev->rmmio_remap.bus_addr;
+ if (!offset) {
+ pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");
+ ret = -ENOMEM;
+ goto exit;
+ }
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
+ offset = bo_priv->user_addr;
+ }
+
+ /* Create the BO */
+ ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(dev->adev,
+ bo_bucket->addr,
+ bo_bucket->size,
+ pdd->drm_priv,
+ (struct kgd_mem **) &mem,
+ &offset,
+ bo_bucket->alloc_flags);
+ if (ret) {
+ pr_err("Could not create the BO\n");
+ ret = -ENOMEM;
+ goto exit;
+ }
+ pr_debug("New BO created: size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n",
+ bo_bucket->size, bo_bucket->addr, offset);
+
+ /* Restore previuos IDR handle */
+ pr_debug("Restoring old IDR handle for the BO");
+ idr_handle = idr_alloc(&pdd->alloc_idr, mem,
+ bo_priv->idr_handle,
+ bo_priv->idr_handle + 1, GFP_KERNEL);
+
+ if (idr_handle < 0) {
+ pr_err("Could not allocate idr\n");
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev,
+ (struct kgd_mem *)mem,
+ pdd->drm_priv, NULL);
+
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
+ bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL |
+ KFD_MMAP_GPU_ID(pdd->dev->id);
+ if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+ bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO |
+ KFD_MMAP_GPU_ID(pdd->dev->id);
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
+ bo_bucket->restored_offset = offset;
+ pr_debug("updating offset for GTT\n");
+ } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ bo_bucket->restored_offset = offset;
+ /* Update the VRAM usage count */
+ WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
+ pr_debug("updating offset for VRAM\n");
+ }
+
+ /* now map these BOs to GPU/s */
+ for (j = 0; j < p->n_pdds; j++) {
+ struct kfd_dev *peer;
+ struct kfd_process_device *peer_pdd;
+ bool table_freed = false;
+
+ if (!bo_priv->mapped_gpuids[j])
+ break;
+
+ peer = kfd_device_by_id(bo_priv->mapped_gpuids[j]);
+ if (!peer) {
+ pr_debug("Getting device by id failed for 0x%x\n", pdd->dev->id);
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ peer_pdd = kfd_bind_process_to_device(peer, p);
+ if (IS_ERR(peer_pdd)) {
+ ret = PTR_ERR(peer_pdd);
+ goto exit;
+ }
+ pr_debug("map mem in restore ioctl -> 0x%llx\n",
+ ((struct kgd_mem *)mem)->va);
+ ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev,
+ (struct kgd_mem *)mem, peer_pdd->drm_priv, &table_freed);
+ if (ret) {
+ pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds);
+ goto exit;
+ }
+ if (table_freed)
+ flush_tlbs = true;
+ }
+
+ ret = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev,
+ (struct kgd_mem *) mem, true);
+ if (ret) {
+ pr_debug("Sync memory failed, wait interrupted by user signal\n");
+ goto exit;
+ }
+
+ pr_debug("map memory was successful for the BO\n");
+ } /* done */
+
+ if (flush_tlbs) {
+ /* Flush TLBs after waiting for the page table updates to complete */
+ for (j = 0; j < p->n_pdds; j++) {
+ struct kfd_dev *peer;
+ struct kfd_process_device *pdd = p->pdds[j];
+ struct kfd_process_device *peer_pdd;
+
+ peer = kfd_device_by_id(pdd->dev->id);
+ if (WARN_ON_ONCE(!peer))
+ continue;
+ peer_pdd = kfd_get_process_device_data(peer, p);
+ if (WARN_ON_ONCE(!peer_pdd))
+ continue;
+ kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
+ }
+ }
+
+ /* Copy only the buckets back so user can read bo_buckets[N].restored_offset */
+ ret = copy_to_user((void __user *)args->bos,
+ bo_buckets,
+ (args->num_bos * sizeof(*bo_buckets)));
+ if (ret)
+ ret = -EFAULT;
+
+exit:
+ kvfree(bo_buckets);
+ kvfree(bo_privs);
+ return ret;
+}
+
static int criu_restore(struct file *filep,
struct kfd_process *p,
struct kfd_ioctl_criu_args *args)
{
- return 0;
+ uint64_t priv_offset = 0;
+ int ret = 0;
+
+ pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
+ args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
+
+ if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||
+ !args->num_devices || !args->num_bos)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ /*
+ * Set the process to evicted state to avoid running any new queues before all the memory
+ * mappings are ready.
+ */
+ ret = kfd_process_evict_queues(p);
+ if (ret)
+ goto exit_unlock;
+
+ /* Each function will adjust priv_offset based on how many bytes they consumed */
+ ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);
+ if (ret)
+ goto exit_unlock;
+
+ if (priv_offset != args->priv_data_size) {
+ pr_err("Invalid private data size\n");
+ ret = -EINVAL;
+ }
+
+exit_unlock:
+ mutex_unlock(&p->mutex);
+ if (ret)
+ pr_err("Failed to restore CRIU ret:%d\n", ret);
+ else
+ pr_debug("CRIU restore successful\n");
+
+ return ret;
}
static int criu_unpause(struct file *filep,