@@ -2040,6 +2040,14 @@ static int criu_checkpoint(struct file *filep,
goto exit_unlock;
}
+ /* Confirm all process queues are evicted */
+ if (!p->queues_paused) {
+ pr_err("Cannot dump process when queues are not in evicted state\n");
+ /* CRIU plugin did not call op PROCESS_INFO before checkpointing */
+ ret = -EINVAL;
+ goto exit_unlock;
+ }
+
criu_get_process_object_info(p, &num_bos, &priv_size);
if (num_bos != args->num_bos ||
@@ -2382,7 +2390,24 @@ static int criu_unpause(struct file *filep,
struct kfd_process *p,
struct kfd_ioctl_criu_args *args)
{
- return 0;
+ int ret;
+
+ mutex_lock(&p->mutex);
+
+ if (!p->queues_paused) {
+ mutex_unlock(&p->mutex);
+ return -EINVAL;
+ }
+
+ ret = kfd_process_restore_queues(p);
+ if (ret)
+ pr_err("Failed to unpause queues ret:%d\n", ret);
+ else
+ p->queues_paused = false;
+
+ mutex_unlock(&p->mutex);
+
+ return ret;
}
static int criu_resume(struct file *filep,
@@ -2434,6 +2459,12 @@ static int criu_process_info(struct file *filep,
goto err_unlock;
}
+ ret = kfd_process_evict_queues(p);
+ if (ret)
+ goto err_unlock;
+
+ p->queues_paused = true;
+
args->pid = task_pid_nr_ns(p->lead_thread,
task_active_pid_ns(p->lead_thread));
@@ -2441,6 +2472,10 @@ static int criu_process_info(struct file *filep,
dev_dbg(kfd_device, "Num of bos:%u\n", args->num_bos);
err_unlock:
+ if (ret) {
+ kfd_process_restore_queues(p);
+ p->queues_paused = false;
+ }
mutex_unlock(&p->mutex);
return ret;
}
@@ -875,6 +875,9 @@ struct kfd_process {
struct svm_range_list svms;
bool xnack_enabled;
+
+ /* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
+ bool queues_paused;
};
#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@@ -1364,6 +1364,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
process->mm = thread->mm;
process->lead_thread = thread->group_leader;
process->n_pdds = 0;
+ process->queues_paused = false;
INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
process->last_restore_timestamp = get_jiffies_64();