diff mbox series

[v6,01/16] drm/ttm: Remap all page faults to per process dummy page.

Message ID 20210510163625.407105-2-andrey.grodzovsky@amd.com (mailing list archive)
State New, archived
Headers show
Series RFC Support hot device unplug in amdgpu | expand

Commit Message

Andrey Grodzovsky May 10, 2021, 4:36 p.m. UTC
On device removal reroute all CPU mappings to dummy page.

v3:
Remove loop to find DRM file and instead access it
by vma->vm_file->private_data. Move dummy page installation
into a separate function.

v4:
Map the entire BOs VA space into on demand allocated dummy page
on the first fault for that BO.

v5: Remove duplicate return.

v6: Polish ttm_bo_vm_dummy_page, remove superflous code.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 57 ++++++++++++++++++++++++++++++++-
 include/drm/ttm/ttm_bo_api.h    |  2 ++
 2 files changed, 58 insertions(+), 1 deletion(-)

Comments

Christian König May 11, 2021, 6:38 a.m. UTC | #1
Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:
> On device removal reroute all CPU mappings to dummy page.
>
> v3:
> Remove loop to find DRM file and instead access it
> by vma->vm_file->private_data. Move dummy page installation
> into a separate function.
>
> v4:
> Map the entire BOs VA space into on demand allocated dummy page
> on the first fault for that BO.
>
> v5: Remove duplicate return.
>
> v6: Polish ttm_bo_vm_dummy_page, remove superflous code.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo_vm.c | 57 ++++++++++++++++++++++++++++++++-
>   include/drm/ttm/ttm_bo_api.h    |  2 ++
>   2 files changed, 58 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> index b31b18058965..e5a9615519d1 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> @@ -34,6 +34,8 @@
>   #include <drm/ttm/ttm_bo_driver.h>
>   #include <drm/ttm/ttm_placement.h>
>   #include <drm/drm_vma_manager.h>
> +#include <drm/drm_drv.h>
> +#include <drm/drm_managed.h>
>   #include <linux/mm.h>
>   #include <linux/pfn_t.h>
>   #include <linux/rbtree.h>
> @@ -380,19 +382,72 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
>   }
>   EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
>   
> +static void ttm_bo_release_dummy_page(struct drm_device *dev, void *res)
> +{
> +	struct page *dummy_page = (struct page *)res;
> +
> +	__free_page(dummy_page);
> +}
> +
> +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot)
> +{
> +	struct vm_area_struct *vma = vmf->vma;
> +	struct ttm_buffer_object *bo = vma->vm_private_data;
> +	struct drm_device *ddev = bo->base.dev;
> +	vm_fault_t ret = VM_FAULT_NOPAGE;
> +	unsigned long address;
> +	unsigned long pfn;
> +	struct page *page;
> +
> +	/* Allocate new dummy page to map all the VA range in this VMA to it*/
> +	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +	if (!page)
> +		return VM_FAULT_OOM;
> +
> +	pfn = page_to_pfn(page);
> +
> +	/* Prefault the entire VMA range right away to avoid further faults */
> +	for (address = vma->vm_start; address < vma->vm_end; address += PAGE_SIZE) {
> +

> +		if (unlikely(address >= vma->vm_end))
> +			break;

That extra check can be removed as far as I can see.


> +
> +		if (vma->vm_flags & VM_MIXEDMAP)
> +			ret = vmf_insert_mixed_prot(vma, address,
> +						    __pfn_to_pfn_t(pfn, PFN_DEV),
> +						    prot);
> +		else
> +			ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
> +	}
> +

> +	/* Set the page to be freed using drmm release action */
> +	if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page))
> +		return VM_FAULT_OOM;

You should probably move that before inserting the page into the VMA and 
also free the allocated page if it goes wrong.

Apart from that patch looks good to me,
Christian.

> +
> +	return ret;
> +}
> +EXPORT_SYMBOL(ttm_bo_vm_dummy_page);
> +
>   vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>   {
>   	struct vm_area_struct *vma = vmf->vma;
>   	pgprot_t prot;
>   	struct ttm_buffer_object *bo = vma->vm_private_data;
> +	struct drm_device *ddev = bo->base.dev;
>   	vm_fault_t ret;
> +	int idx;
>   
>   	ret = ttm_bo_vm_reserve(bo, vmf);
>   	if (ret)
>   		return ret;
>   
>   	prot = vma->vm_page_prot;
> -	ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
> +	if (drm_dev_enter(ddev, &idx)) {
> +		ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
> +		drm_dev_exit(idx);
> +	} else {
> +		ret = ttm_bo_vm_dummy_page(vmf, prot);
> +	}
>   	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
>   		return ret;
>   
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
> index 639521880c29..254ede97f8e3 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -620,4 +620,6 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
>   		     void *buf, int len, int write);
>   bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all);
>   
> +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot);
> +
>   #endif
Andrey Grodzovsky May 11, 2021, 2:44 p.m. UTC | #2
On 2021-05-11 2:38 a.m., Christian König wrote:
> Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:
>> On device removal reroute all CPU mappings to dummy page.
>>
>> v3:
>> Remove loop to find DRM file and instead access it
>> by vma->vm_file->private_data. Move dummy page installation
>> into a separate function.
>>
>> v4:
>> Map the entire BOs VA space into on demand allocated dummy page
>> on the first fault for that BO.
>>
>> v5: Remove duplicate return.
>>
>> v6: Polish ttm_bo_vm_dummy_page, remove superflous code.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>>   drivers/gpu/drm/ttm/ttm_bo_vm.c | 57 ++++++++++++++++++++++++++++++++-
>>   include/drm/ttm/ttm_bo_api.h    |  2 ++
>>   2 files changed, 58 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c 
>> b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>> index b31b18058965..e5a9615519d1 100644
>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>> @@ -34,6 +34,8 @@
>>   #include <drm/ttm/ttm_bo_driver.h>
>>   #include <drm/ttm/ttm_placement.h>
>>   #include <drm/drm_vma_manager.h>
>> +#include <drm/drm_drv.h>
>> +#include <drm/drm_managed.h>
>>   #include <linux/mm.h>
>>   #include <linux/pfn_t.h>
>>   #include <linux/rbtree.h>
>> @@ -380,19 +382,72 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct 
>> vm_fault *vmf,
>>   }
>>   EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
>>   +static void ttm_bo_release_dummy_page(struct drm_device *dev, void 
>> *res)
>> +{
>> +    struct page *dummy_page = (struct page *)res;
>> +
>> +    __free_page(dummy_page);
>> +}
>> +
>> +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot)
>> +{
>> +    struct vm_area_struct *vma = vmf->vma;
>> +    struct ttm_buffer_object *bo = vma->vm_private_data;
>> +    struct drm_device *ddev = bo->base.dev;
>> +    vm_fault_t ret = VM_FAULT_NOPAGE;
>> +    unsigned long address;
>> +    unsigned long pfn;
>> +    struct page *page;
>> +
>> +    /* Allocate new dummy page to map all the VA range in this VMA 
>> to it*/
>> +    page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>> +    if (!page)
>> +        return VM_FAULT_OOM;
>> +
>> +    pfn = page_to_pfn(page);
>> +
>> +    /* Prefault the entire VMA range right away to avoid further 
>> faults */
>> +    for (address = vma->vm_start; address < vma->vm_end; address += 
>> PAGE_SIZE) {
>> +
>
>> +        if (unlikely(address >= vma->vm_end))
>> +            break;
>
> That extra check can be removed as far as I can see.
>
>
>> +
>> +        if (vma->vm_flags & VM_MIXEDMAP)
>> +            ret = vmf_insert_mixed_prot(vma, address,
>> +                            __pfn_to_pfn_t(pfn, PFN_DEV),
>> +                            prot);
>> +        else
>> +            ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
>> +    }
>> +
>
>> +    /* Set the page to be freed using drmm release action */
>> +    if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, 
>> page))
>> +        return VM_FAULT_OOM;
>
> You should probably move that before inserting the page into the VMA 
> and also free the allocated page if it goes wrong.


drmm_add_action_or_reset will automatically release the page if the add 
action fails, that the 'reset' part of the function.

Andrey


>
> Apart from that patch looks good to me,
> Christian.
>
>> +
>> +    return ret;
>> +}
>> +EXPORT_SYMBOL(ttm_bo_vm_dummy_page);
>> +
>>   vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>>   {
>>       struct vm_area_struct *vma = vmf->vma;
>>       pgprot_t prot;
>>       struct ttm_buffer_object *bo = vma->vm_private_data;
>> +    struct drm_device *ddev = bo->base.dev;
>>       vm_fault_t ret;
>> +    int idx;
>>         ret = ttm_bo_vm_reserve(bo, vmf);
>>       if (ret)
>>           return ret;
>>         prot = vma->vm_page_prot;
>> -    ret = ttm_bo_vm_fault_reserved(vmf, prot, 
>> TTM_BO_VM_NUM_PREFAULT, 1);
>> +    if (drm_dev_enter(ddev, &idx)) {
>> +        ret = ttm_bo_vm_fault_reserved(vmf, prot, 
>> TTM_BO_VM_NUM_PREFAULT, 1);
>> +        drm_dev_exit(idx);
>> +    } else {
>> +        ret = ttm_bo_vm_dummy_page(vmf, prot);
>> +    }
>>       if (ret == VM_FAULT_RETRY && !(vmf->flags & 
>> FAULT_FLAG_RETRY_NOWAIT))
>>           return ret;
>>   diff --git a/include/drm/ttm/ttm_bo_api.h 
>> b/include/drm/ttm/ttm_bo_api.h
>> index 639521880c29..254ede97f8e3 100644
>> --- a/include/drm/ttm/ttm_bo_api.h
>> +++ b/include/drm/ttm/ttm_bo_api.h
>> @@ -620,4 +620,6 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, 
>> unsigned long addr,
>>                void *buf, int len, int write);
>>   bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all);
>>   +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot);
>> +
>>   #endif
>
Christian König May 11, 2021, 3:12 p.m. UTC | #3
Am 11.05.21 um 16:44 schrieb Andrey Grodzovsky:
>
> On 2021-05-11 2:38 a.m., Christian König wrote:
>> Am 10.05.21 um 18:36 schrieb Andrey Grodzovsky:
>>> On device removal reroute all CPU mappings to dummy page.
>>>
>>> v3:
>>> Remove loop to find DRM file and instead access it
>>> by vma->vm_file->private_data. Move dummy page installation
>>> into a separate function.
>>>
>>> v4:
>>> Map the entire BOs VA space into on demand allocated dummy page
>>> on the first fault for that BO.
>>>
>>> v5: Remove duplicate return.
>>>
>>> v6: Polish ttm_bo_vm_dummy_page, remove superflous code.
>>>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>> ---
>>>   drivers/gpu/drm/ttm/ttm_bo_vm.c | 57 
>>> ++++++++++++++++++++++++++++++++-
>>>   include/drm/ttm/ttm_bo_api.h    |  2 ++
>>>   2 files changed, 58 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c 
>>> b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> index b31b18058965..e5a9615519d1 100644
>>> --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
>>> @@ -34,6 +34,8 @@
>>>   #include <drm/ttm/ttm_bo_driver.h>
>>>   #include <drm/ttm/ttm_placement.h>
>>>   #include <drm/drm_vma_manager.h>
>>> +#include <drm/drm_drv.h>
>>> +#include <drm/drm_managed.h>
>>>   #include <linux/mm.h>
>>>   #include <linux/pfn_t.h>
>>>   #include <linux/rbtree.h>
>>> @@ -380,19 +382,72 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct 
>>> vm_fault *vmf,
>>>   }
>>>   EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
>>>   +static void ttm_bo_release_dummy_page(struct drm_device *dev, 
>>> void *res)
>>> +{
>>> +    struct page *dummy_page = (struct page *)res;
>>> +
>>> +    __free_page(dummy_page);
>>> +}
>>> +
>>> +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot)
>>> +{
>>> +    struct vm_area_struct *vma = vmf->vma;
>>> +    struct ttm_buffer_object *bo = vma->vm_private_data;
>>> +    struct drm_device *ddev = bo->base.dev;
>>> +    vm_fault_t ret = VM_FAULT_NOPAGE;
>>> +    unsigned long address;
>>> +    unsigned long pfn;
>>> +    struct page *page;
>>> +
>>> +    /* Allocate new dummy page to map all the VA range in this VMA 
>>> to it*/
>>> +    page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>> +    if (!page)
>>> +        return VM_FAULT_OOM;
>>> +
>>> +    pfn = page_to_pfn(page);
>>> +
>>> +    /* Prefault the entire VMA range right away to avoid further 
>>> faults */
>>> +    for (address = vma->vm_start; address < vma->vm_end; address += 
>>> PAGE_SIZE) {
>>> +
>>
>>> +        if (unlikely(address >= vma->vm_end))
>>> +            break;
>>
>> That extra check can be removed as far as I can see.
>>
>>
>>> +
>>> +        if (vma->vm_flags & VM_MIXEDMAP)
>>> +            ret = vmf_insert_mixed_prot(vma, address,
>>> +                            __pfn_to_pfn_t(pfn, PFN_DEV),
>>> +                            prot);
>>> +        else
>>> +            ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
>>> +    }
>>> +
>>
>>> +    /* Set the page to be freed using drmm release action */
>>> +    if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, 
>>> page))
>>> +        return VM_FAULT_OOM;
>>
>> You should probably move that before inserting the page into the VMA 
>> and also free the allocated page if it goes wrong.
>
>
> drmm_add_action_or_reset will automatically release the page if the 
> add action fails, that the 'reset' part of the function.

Ah! Ok that makes it even more important that you do this before you 
insert the page into any VMA.

Otherwise userspace has access to a freed page with the rather ugly 
consequences.

Christian.

>
> Andrey
>
>
>>
>> Apart from that patch looks good to me,
>> Christian.
>>
>>> +
>>> +    return ret;
>>> +}
>>> +EXPORT_SYMBOL(ttm_bo_vm_dummy_page);
>>> +
>>>   vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
>>>   {
>>>       struct vm_area_struct *vma = vmf->vma;
>>>       pgprot_t prot;
>>>       struct ttm_buffer_object *bo = vma->vm_private_data;
>>> +    struct drm_device *ddev = bo->base.dev;
>>>       vm_fault_t ret;
>>> +    int idx;
>>>         ret = ttm_bo_vm_reserve(bo, vmf);
>>>       if (ret)
>>>           return ret;
>>>         prot = vma->vm_page_prot;
>>> -    ret = ttm_bo_vm_fault_reserved(vmf, prot, 
>>> TTM_BO_VM_NUM_PREFAULT, 1);
>>> +    if (drm_dev_enter(ddev, &idx)) {
>>> +        ret = ttm_bo_vm_fault_reserved(vmf, prot, 
>>> TTM_BO_VM_NUM_PREFAULT, 1);
>>> +        drm_dev_exit(idx);
>>> +    } else {
>>> +        ret = ttm_bo_vm_dummy_page(vmf, prot);
>>> +    }
>>>       if (ret == VM_FAULT_RETRY && !(vmf->flags & 
>>> FAULT_FLAG_RETRY_NOWAIT))
>>>           return ret;
>>>   diff --git a/include/drm/ttm/ttm_bo_api.h 
>>> b/include/drm/ttm/ttm_bo_api.h
>>> index 639521880c29..254ede97f8e3 100644
>>> --- a/include/drm/ttm/ttm_bo_api.h
>>> +++ b/include/drm/ttm/ttm_bo_api.h
>>> @@ -620,4 +620,6 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, 
>>> unsigned long addr,
>>>                void *buf, int len, int write);
>>>   bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all);
>>>   +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t 
>>> prot);
>>> +
>>>   #endif
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index b31b18058965..e5a9615519d1 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -34,6 +34,8 @@ 
 #include <drm/ttm/ttm_bo_driver.h>
 #include <drm/ttm/ttm_placement.h>
 #include <drm/drm_vma_manager.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
 #include <linux/mm.h>
 #include <linux/pfn_t.h>
 #include <linux/rbtree.h>
@@ -380,19 +382,72 @@  vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
 }
 EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
 
+static void ttm_bo_release_dummy_page(struct drm_device *dev, void *res)
+{
+	struct page *dummy_page = (struct page *)res;
+
+	__free_page(dummy_page);
+}
+
+vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct ttm_buffer_object *bo = vma->vm_private_data;
+	struct drm_device *ddev = bo->base.dev;
+	vm_fault_t ret = VM_FAULT_NOPAGE;
+	unsigned long address;
+	unsigned long pfn;
+	struct page *page;
+
+	/* Allocate new dummy page to map all the VA range in this VMA to it*/
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page)
+		return VM_FAULT_OOM;
+
+	pfn = page_to_pfn(page);
+
+	/* Prefault the entire VMA range right away to avoid further faults */
+	for (address = vma->vm_start; address < vma->vm_end; address += PAGE_SIZE) {
+
+		if (unlikely(address >= vma->vm_end))
+			break;
+
+		if (vma->vm_flags & VM_MIXEDMAP)
+			ret = vmf_insert_mixed_prot(vma, address,
+						    __pfn_to_pfn_t(pfn, PFN_DEV),
+						    prot);
+		else
+			ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
+	}
+
+	/* Set the page to be freed using drmm release action */
+	if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page))
+		return VM_FAULT_OOM;
+
+	return ret;
+}
+EXPORT_SYMBOL(ttm_bo_vm_dummy_page);
+
 vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	pgprot_t prot;
 	struct ttm_buffer_object *bo = vma->vm_private_data;
+	struct drm_device *ddev = bo->base.dev;
 	vm_fault_t ret;
+	int idx;
 
 	ret = ttm_bo_vm_reserve(bo, vmf);
 	if (ret)
 		return ret;
 
 	prot = vma->vm_page_prot;
-	ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
+	if (drm_dev_enter(ddev, &idx)) {
+		ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
+		drm_dev_exit(idx);
+	} else {
+		ret = ttm_bo_vm_dummy_page(vmf, prot);
+	}
 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
 		return ret;
 
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 639521880c29..254ede97f8e3 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -620,4 +620,6 @@  int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
 		     void *buf, int len, int write);
 bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all);
 
+vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot);
+
 #endif