Message ID | 20230308213724.3396058-1-olvaffe@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/amdkfd: fix potential kgd_mem UAFs | expand |
Am 2023-03-08 um 16:37 schrieb Chia-I Wu: > kgd_mem should be accessed with p->mutex locked, or it could have been > freed by kfd_ioctl_free_memory_of_gpu. Thank you for the patch. It's not just about accessing kgd_mem with p->mutex held. It's also about holding the mutex continuously. I'd update the description to be more explicit about the invariant being broken here: kgd_mem pointers returned by kfd_process_device_translate_handle are only guaranteed to be valid while p->mutex is held. As soon as the mutex is unlocked, another thread can free the BO. I can update the description and submit the patch. Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Regards, Felix > > Signed-off-by: Chia-I Wu <olvaffe@gmail.com> > --- > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 16 ++++++++++------ > 1 file changed, 10 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index 6d291aa6386bd..3c630114210d6 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -1293,14 +1293,14 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, > args->n_success = i+1; > } > > - mutex_unlock(&p->mutex); > - > err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true); > if (err) { > pr_debug("Sync memory failed, wait interrupted by user signal\n"); > goto sync_memory_failed; > } > > + mutex_unlock(&p->mutex); > + > /* Flush TLBs after waiting for the page table updates to complete */ > for (i = 0; i < args->n_devices; i++) { > peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); > @@ -1316,9 +1316,9 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, > bind_process_to_device_failed: > get_mem_obj_from_handle_failed: > map_memory_to_gpu_failed: > +sync_memory_failed: > mutex_unlock(&p->mutex); > copy_from_user_failed: > -sync_memory_failed: > kfree(devices_arr); > > return err; > @@ -1332,6 +1332,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, > void *mem; > long err = 0; > uint32_t *devices_arr = NULL, i; > + bool flush_tlb; > > if (!args->n_devices) { > pr_debug("Device IDs array empty\n"); > @@ -1384,16 +1385,19 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, > } > args->n_success = i+1; > } > - mutex_unlock(&p->mutex); > > - if (kfd_flush_tlb_after_unmap(pdd->dev)) { > + flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev); > + if (flush_tlb) { > err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev, > (struct kgd_mem *) mem, true); > if (err) { > pr_debug("Sync memory failed, wait interrupted by user signal\n"); > goto sync_memory_failed; > } > + } > + mutex_unlock(&p->mutex); > > + if (flush_tlb) { > /* Flush TLBs after waiting for the page table updates to complete */ > for (i = 0; i < args->n_devices; i++) { > peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); > @@ -1409,9 +1413,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, > bind_process_to_device_failed: > get_mem_obj_from_handle_failed: > unmap_memory_from_gpu_failed: > +sync_memory_failed: > mutex_unlock(&p->mutex); > copy_from_user_failed: > -sync_memory_failed: > kfree(devices_arr); > return err; > }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 6d291aa6386bd..3c630114210d6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1293,14 +1293,14 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, args->n_success = i+1; } - mutex_unlock(&p->mutex); - err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed; } + mutex_unlock(&p->mutex); + /* Flush TLBs after waiting for the page table updates to complete */ for (i = 0; i < args->n_devices; i++) { peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); @@ -1316,9 +1316,9 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, bind_process_to_device_failed: get_mem_obj_from_handle_failed: map_memory_to_gpu_failed: +sync_memory_failed: mutex_unlock(&p->mutex); copy_from_user_failed: -sync_memory_failed: kfree(devices_arr); return err; @@ -1332,6 +1332,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, void *mem; long err = 0; uint32_t *devices_arr = NULL, i; + bool flush_tlb; if (!args->n_devices) { pr_debug("Device IDs array empty\n"); @@ -1384,16 +1385,19 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, } args->n_success = i+1; } - mutex_unlock(&p->mutex); - if (kfd_flush_tlb_after_unmap(pdd->dev)) { + flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev); + if (flush_tlb) { err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev, (struct kgd_mem *) mem, true); if (err) { pr_debug("Sync memory failed, wait interrupted by user signal\n"); goto sync_memory_failed; } + } + mutex_unlock(&p->mutex); + if (flush_tlb) { /* Flush TLBs after waiting for the page table updates to complete */ for (i = 0; i < args->n_devices; i++) { peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); @@ -1409,9 +1413,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, bind_process_to_device_failed: get_mem_obj_from_handle_failed: unmap_memory_from_gpu_failed: +sync_memory_failed: mutex_unlock(&p->mutex); copy_from_user_failed: -sync_memory_failed: kfree(devices_arr); return err; }
kgd_mem should be accessed with p->mutex locked, or it could have been freed by kfd_ioctl_free_memory_of_gpu. Signed-off-by: Chia-I Wu <olvaffe@gmail.com> --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-)