Message ID | 20211223003711.13064-24-rajneesh.bhardwaj@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | CHECKPOINT RESTORE WITH ROCm | expand |
On 2022-01-05 9:43 a.m., philip yang wrote: > > > On 2021-12-22 7:37 p.m., Rajneesh Bhardwaj wrote: >> During CRIU restore phase, the VMAs for the virtual address ranges are >> not at their final location yet so in this stage, only cache the data >> required to successfully resume the svm ranges during an imminent CRIU >> resume phase. >> >> Signed-off-by: Rajneesh Bhardwaj<rajneesh.bhardwaj@amd.com> >> --- >> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 +- >> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 ++ >> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 99 ++++++++++++++++++++++++ >> drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 12 +++ >> 4 files changed, 118 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c >> index 916b8d000317..f7aa15b18f95 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c >> @@ -2638,8 +2638,8 @@ static int criu_restore_objects(struct file *filep, >> goto exit; >> break; >> case KFD_CRIU_OBJECT_TYPE_SVM_RANGE: >> - /* TODO: Implement SVM range */ >> - *priv_offset += sizeof(struct kfd_criu_svm_range_priv_data); >> + ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data, >> + priv_offset, max_priv_data_size); >> if (ret) >> goto exit; >> break; >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> index 87eb6739a78e..92191c541c29 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> @@ -790,6 +790,7 @@ struct svm_range_list { >> struct list_head list; >> struct work_struct deferred_list_work; >> struct list_head deferred_range_list; >> + struct list_head criu_svm_metadata_list; >> spinlock_t deferred_list_lock; >> atomic_t evicted_ranges; >> bool drain_pagefaults; >> @@ -1148,6 +1149,10 @@ int kfd_criu_restore_event(struct file *devkfd, >> uint8_t __user *user_priv_data, >> uint64_t *priv_data_offset, >> uint64_t max_priv_data_size); >> +int kfd_criu_restore_svm(struct kfd_process *p, >> + uint8_t __user *user_priv_data, >> + uint64_t *priv_data_offset, >> + uint64_t max_priv_data_size); >> /* CRIU - End */ >> >> /* Queue Context Management */ >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >> index 6d59f1bedcf2..e9f6c63c2a26 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >> @@ -45,6 +45,14 @@ >> */ >> #define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000 >> >> +struct criu_svm_metadata { >> + struct list_head list; >> + __u64 start_addr; >> + __u64 size; >> + /* Variable length array of attributes */ >> + struct kfd_ioctl_svm_attribute attrs[0]; >> +}; > This data structure is struct kfd_criu_svm_range_priv_data plus > list_head, maybe you can add list_head to struct > kfd_criu_svm_range_priv_data and remove this new data structure, then > you can remove extra kzalloc, kfree for each svm object resume and > function svm_criu_prepare_for_resume could be removed. Adding list_head to the private structure is a bad idea, because that structure is copied to/from user mode. Kernel mode pointers should not be exposed to user mode, even in an opaque structure. That's just begging for an exploit. But you could define criu_svm_metadata as struct criu_svm_metadata { struct list_head list; kfd_criu_svm_range_priv_data data; }; Then copy_from_user directly into criu_svm_md->data in kfd_criu_restore_svm to avoid the double allocation. Regards, Felix >> + >> static void svm_range_evict_svm_bo_worker(struct work_struct *work); >> static bool >> svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, >> @@ -2753,6 +2761,7 @@ int svm_range_list_init(struct kfd_process *p) >> INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work); >> INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work); >> INIT_LIST_HEAD(&svms->deferred_range_list); >> + INIT_LIST_HEAD(&svms->criu_svm_metadata_list); >> spin_lock_init(&svms->deferred_list_lock); >> >> for (i = 0; i < p->n_pdds; i++) >> @@ -3418,6 +3427,96 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm, >> return 0; >> } >> >> +int svm_criu_prepare_for_resume(struct kfd_process *p, >> + struct kfd_criu_svm_range_priv_data *svm_priv) >> +{ >> + int nattr_common = 4, nattr_accessibility = 1; >> + struct criu_svm_metadata *criu_svm_md = NULL; >> + uint64_t svm_attrs_size, svm_object_md_size; >> + struct svm_range_list *svms = &p->svms; >> + int num_devices = p->n_pdds; >> + int i, ret = 0; >> + >> + svm_attrs_size = sizeof(struct kfd_ioctl_svm_attribute) * >> + (nattr_common + nattr_accessibility * num_devices); >> + svm_object_md_size = sizeof(struct criu_svm_metadata) + svm_attrs_size; >> + >> + criu_svm_md = kzalloc(svm_object_md_size, GFP_KERNEL); >> + if (!criu_svm_md) { >> + pr_err("failed to allocate memory to store svm metadata\n"); >> + ret = -ENOMEM; >> + goto exit; >> + } >> + >> + criu_svm_md->start_addr = svm_priv->start_addr; >> + criu_svm_md->size = svm_priv->size; >> + for (i = 0; i < svm_attrs_size; i++) > > for (i = 0; i < nattr_common + nattr_accessibility * num_devices ; i++) > > This function and for loop is not needed if you remove struct > criu_svm_metadata. > > Regards, > > Philip > >> + { >> + criu_svm_md->attrs[i].type = svm_priv->attrs[i].type; >> + criu_svm_md->attrs[i].value = svm_priv->attrs[i].value; >> + } >> + >> + list_add_tail(&criu_svm_md->list, &svms->criu_svm_metadata_list); >> + >> + >> +exit: >> + return ret; >> +} >> + >> +int kfd_criu_restore_svm(struct kfd_process *p, >> + uint8_t __user *user_priv_ptr, >> + uint64_t *priv_data_offset, >> + uint64_t max_priv_data_size) >> +{ >> + uint64_t total_size, accessibility_size, common_attr_size; >> + struct kfd_criu_svm_range_priv_data *svm_priv = NULL; >> + int nattr_common = 4, naatr_accessibility = 1; >> + uint32_t num_devices; >> + int ret = 0; >> + >> + num_devices = p->n_pdds; >> + /* Handle one SVM range object at a time, also the number of gpus are >> + * assumed to be same on the restore node, checking must be done while >> + * evaluating the topology earlier */ >> + common_attr_size = sizeof(struct kfd_ioctl_svm_attribute) * >> + nattr_common; >> + accessibility_size = sizeof(struct kfd_ioctl_svm_attribute) * >> + naatr_accessibility * num_devices; >> + total_size = sizeof(struct kfd_criu_svm_range_priv_data) + >> + common_attr_size + accessibility_size; >> + >> + svm_priv = kvzalloc(total_size, GFP_KERNEL); >> + if (!svm_priv) >> + return -ENOMEM; >> + >> + if (*priv_data_offset + total_size > max_priv_data_size) { >> + ret = -EINVAL; >> + goto exit; >> + } >> + >> + ret = copy_from_user(svm_priv, user_priv_ptr + *priv_data_offset, >> + total_size); >> + if (ret) { >> + ret = -EFAULT; >> + goto exit; >> + } >> + *priv_data_offset += total_size; >> + >> + ret = svm_criu_prepare_for_resume(p, svm_priv); >> + if (ret) { >> + ret = -EFAULT; >> + pr_err("svm_criu_prepare_for_resume failed\n"); >> + goto exit; >> + } >> + >> + >> +exit: >> + >> + kvfree(svm_priv); >> + >> + return ret; >> +} >> + >> int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges, >> uint64_t *svm_priv_data_size) >> { >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h >> index b00576db5baa..e0c0853f085c 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h >> @@ -191,6 +191,10 @@ int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges, >> int kfd_criu_checkpoint_svm(struct kfd_process *p, >> uint8_t __user *user_priv_data, >> uint64_t *priv_offset); >> +int kfd_criu_restore_svm(struct kfd_process *p, >> + uint8_t __user *user_priv_ptr, >> + uint64_t *priv_data_offset, >> + uint64_t max_priv_data_size); >> struct kfd_process_device * >> svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev); >> void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm); >> @@ -244,6 +248,14 @@ static inline int kfd_criu_checkpoint_svm(struct kfd_process *p, >> return 0; >> } >> >> +static inline int kfd_criu_restore_svm(struct kfd_process *p, >> + uint8_t __user *user_priv_ptr, >> + uint64_t *priv_data_offset, >> + uint64_t max_priv_data_size) >> +{ >> + return -EINVAL; >> +} >> + >> #define KFD_IS_SVM_API_SUPPORTED(dev) false >> >> #endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 916b8d000317..f7aa15b18f95 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2638,8 +2638,8 @@ static int criu_restore_objects(struct file *filep, goto exit; break; case KFD_CRIU_OBJECT_TYPE_SVM_RANGE: - /* TODO: Implement SVM range */ - *priv_offset += sizeof(struct kfd_criu_svm_range_priv_data); + ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data, + priv_offset, max_priv_data_size); if (ret) goto exit; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 87eb6739a78e..92191c541c29 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -790,6 +790,7 @@ struct svm_range_list { struct list_head list; struct work_struct deferred_list_work; struct list_head deferred_range_list; + struct list_head criu_svm_metadata_list; spinlock_t deferred_list_lock; atomic_t evicted_ranges; bool drain_pagefaults; @@ -1148,6 +1149,10 @@ int kfd_criu_restore_event(struct file *devkfd, uint8_t __user *user_priv_data, uint64_t *priv_data_offset, uint64_t max_priv_data_size); +int kfd_criu_restore_svm(struct kfd_process *p, + uint8_t __user *user_priv_data, + uint64_t *priv_data_offset, + uint64_t max_priv_data_size); /* CRIU - End */ /* Queue Context Management */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 6d59f1bedcf2..e9f6c63c2a26 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -45,6 +45,14 @@ */ #define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000 +struct criu_svm_metadata { + struct list_head list; + __u64 start_addr; + __u64 size; + /* Variable length array of attributes */ + struct kfd_ioctl_svm_attribute attrs[0]; +}; + static void svm_range_evict_svm_bo_worker(struct work_struct *work); static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, @@ -2753,6 +2761,7 @@ int svm_range_list_init(struct kfd_process *p) INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work); INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work); INIT_LIST_HEAD(&svms->deferred_range_list); + INIT_LIST_HEAD(&svms->criu_svm_metadata_list); spin_lock_init(&svms->deferred_list_lock); for (i = 0; i < p->n_pdds; i++) @@ -3418,6 +3427,96 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm, return 0; } +int svm_criu_prepare_for_resume(struct kfd_process *p, + struct kfd_criu_svm_range_priv_data *svm_priv) +{ + int nattr_common = 4, nattr_accessibility = 1; + struct criu_svm_metadata *criu_svm_md = NULL; + uint64_t svm_attrs_size, svm_object_md_size; + struct svm_range_list *svms = &p->svms; + int num_devices = p->n_pdds; + int i, ret = 0; + + svm_attrs_size = sizeof(struct kfd_ioctl_svm_attribute) * + (nattr_common + nattr_accessibility * num_devices); + svm_object_md_size = sizeof(struct criu_svm_metadata) + svm_attrs_size; + + criu_svm_md = kzalloc(svm_object_md_size, GFP_KERNEL); + if (!criu_svm_md) { + pr_err("failed to allocate memory to store svm metadata\n"); + ret = -ENOMEM; + goto exit; + } + + criu_svm_md->start_addr = svm_priv->start_addr; + criu_svm_md->size = svm_priv->size; + for (i = 0; i < svm_attrs_size; i++) + { + criu_svm_md->attrs[i].type = svm_priv->attrs[i].type; + criu_svm_md->attrs[i].value = svm_priv->attrs[i].value; + } + + list_add_tail(&criu_svm_md->list, &svms->criu_svm_metadata_list); + + +exit: + return ret; +} + +int kfd_criu_restore_svm(struct kfd_process *p, + uint8_t __user *user_priv_ptr, + uint64_t *priv_data_offset, + uint64_t max_priv_data_size) +{ + uint64_t total_size, accessibility_size, common_attr_size; + struct kfd_criu_svm_range_priv_data *svm_priv = NULL; + int nattr_common = 4, naatr_accessibility = 1; + uint32_t num_devices; + int ret = 0; + + num_devices = p->n_pdds; + /* Handle one SVM range object at a time, also the number of gpus are + * assumed to be same on the restore node, checking must be done while + * evaluating the topology earlier */ + common_attr_size = sizeof(struct kfd_ioctl_svm_attribute) * + nattr_common; + accessibility_size = sizeof(struct kfd_ioctl_svm_attribute) * + naatr_accessibility * num_devices; + total_size = sizeof(struct kfd_criu_svm_range_priv_data) + + common_attr_size + accessibility_size; + + svm_priv = kvzalloc(total_size, GFP_KERNEL); + if (!svm_priv) + return -ENOMEM; + + if (*priv_data_offset + total_size > max_priv_data_size) { + ret = -EINVAL; + goto exit; + } + + ret = copy_from_user(svm_priv, user_priv_ptr + *priv_data_offset, + total_size); + if (ret) { + ret = -EFAULT; + goto exit; + } + *priv_data_offset += total_size; + + ret = svm_criu_prepare_for_resume(p, svm_priv); + if (ret) { + ret = -EFAULT; + pr_err("svm_criu_prepare_for_resume failed\n"); + goto exit; + } + + +exit: + + kvfree(svm_priv); + + return ret; +} + int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges, uint64_t *svm_priv_data_size) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index b00576db5baa..e0c0853f085c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -191,6 +191,10 @@ int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges, int kfd_criu_checkpoint_svm(struct kfd_process *p, uint8_t __user *user_priv_data, uint64_t *priv_offset); +int kfd_criu_restore_svm(struct kfd_process *p, + uint8_t __user *user_priv_ptr, + uint64_t *priv_data_offset, + uint64_t max_priv_data_size); struct kfd_process_device * svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev); void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm); @@ -244,6 +248,14 @@ static inline int kfd_criu_checkpoint_svm(struct kfd_process *p, return 0; } +static inline int kfd_criu_restore_svm(struct kfd_process *p, + uint8_t __user *user_priv_ptr, + uint64_t *priv_data_offset, + uint64_t max_priv_data_size) +{ + return -EINVAL; +} + #define KFD_IS_SVM_API_SUPPORTED(dev) false #endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */
During CRIU restore phase, the VMAs for the virtual address ranges are not at their final location yet so in this stage, only cache the data required to successfully resume the svm ranges during an imminent CRIU resume phase. Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com> --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 ++ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 99 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 12 +++ 4 files changed, 118 insertions(+), 2 deletions(-)