diff mbox

[5/5] drm/amdgpu: replace iova debugfs file with iomem

Message ID 20180202190948.2654-5-christian.koenig@amd.com (mailing list archive)
State New, archived
Headers show

Commit Message

Christian König Feb. 2, 2018, 7:09 p.m. UTC
This allows access to pages allocated through the driver with optional
IOMMU mapping.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 57 ++++++++++++++++++++-------------
 1 file changed, 35 insertions(+), 22 deletions(-)

Comments

StDenis, Tom Feb. 9, 2018, 1:32 p.m. UTC | #1
On 02/02/18 02:09 PM, Christian König wrote:
> This allows access to pages allocated through the driver with optional
> IOMMU mapping.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 57 ++++++++++++++++++++-------------
>   1 file changed, 35 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 648c449aaa79..795ceaeb82d5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1929,38 +1929,51 @@ static const struct file_operations amdgpu_ttm_gtt_fops = {
>   
>   #endif
>   
> -static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf,
> -				   size_t size, loff_t *pos)
> +static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
> +				 size_t size, loff_t *pos)
>   {
>   	struct amdgpu_device *adev = file_inode(f)->i_private;
> -	int r;
> -	uint64_t phys;
>   	struct iommu_domain *dom;
> +	ssize_t result = 0;
> +	int r;
>   
> -	// always return 8 bytes
> -	if (size != 8)
> -		return -EINVAL;
> +	dom = iommu_get_domain_for_dev(adev->dev);
>   
> -	// only accept page addresses
> -	if (*pos & 0xFFF)
> -		return -EINVAL;
> +	while (size) {
> +		phys_addr_t addr = *pos & PAGE_MASK;
> +		loff_t off = *pos & ~PAGE_MASK;
> +		size_t bytes = PAGE_SIZE - off;
> +		unsigned long pfn;
> +		struct page *p;
> +		void *ptr;
>   
> -	dom = iommu_get_domain_for_dev(adev->dev);
> -	if (dom)
> -		phys = iommu_iova_to_phys(dom, *pos);
> -	else
> -		phys = *pos;
> +		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
>   
> -	r = copy_to_user(buf, &phys, 8);
> -	if (r)
> -		return -EFAULT;
> +		pfn = addr >> PAGE_SHIFT;
> +		if (!pfn_valid(pfn))
> +			return -EPERM;
> +
> +		p = pfn_to_page(pfn);
> +		if (p->mapping != adev->mman.bdev.dev_mapping)
> +			return -EPERM;

This comparison fails for both IOMMU and non-IOMMU devices in my 
carrizo+polaris10 box.

The address being read from is what the VM decodes to (checked with strace).

Tom
Christian König Feb. 9, 2018, 1:56 p.m. UTC | #2
Am 09.02.2018 um 14:32 schrieb Tom St Denis:
> On 02/02/18 02:09 PM, Christian König wrote:
>> [SNIP]
>> +        if (p->mapping != adev->mman.bdev.dev_mapping)
>> +            return -EPERM;
>
> This comparison fails for both IOMMU and non-IOMMU devices in my 
> carrizo+polaris10 box.
>
> The address being read from is what the VM decodes to (checked with 
> strace).

Have you applied the whole series? That patches before this one are 
necessary to initialize p->mapping when there isn't any userspace 
mapping for the page.

Christian.

>
> Tom
StDenis, Tom Feb. 9, 2018, 2:02 p.m. UTC | #3
On 09/02/18 08:56 AM, Christian König wrote:
> Am 09.02.2018 um 14:32 schrieb Tom St Denis:
>> On 02/02/18 02:09 PM, Christian König wrote:
>>> [SNIP]
>>> +        if (p->mapping != adev->mman.bdev.dev_mapping)
>>> +            return -EPERM;
>>
>> This comparison fails for both IOMMU and non-IOMMU devices in my 
>> carrizo+polaris10 box.
>>
>> The address being read from is what the VM decodes to (checked with 
>> strace).
> 
> Have you applied the whole series? That patches before this one are 
> necessary to initialize p->mapping when there isn't any userspace 
> mapping for the page.


Yes, I have the entire 5 pages applied to a temp branch based on the tip 
of drm-next

$ git log --oneline HEAD~10..
405bc1dc85db (HEAD -> iomem) wip
a06d7a6f29e4 drm/amdgpu: replace iova debugfs file with iomem
d324c21f2c5e drm/ttm: set page mapping during allocation
9f440ee91c58 drm/radeon: remove extra TT unpopulated check
f55d505b0387 drm/amdgpu: remove extra TT unpopulated check
37d705119ea8 drm/ttm: add ttm_tt_populate wrapper
53af6035d04b (origin/amd-staging-drm-next, amd-staging-drm-next) 
drm/radeon: only enable swiotlb path when need v2

(the wip is me adding printks to see which error path is taken).

I don't see an init call for adev->mman.bdev.man[TTM_PL_SYSTEM] 
anywhere.  Maybe that's related?

Tom
Christian König Feb. 9, 2018, 2:12 p.m. UTC | #4
Am 09.02.2018 um 15:02 schrieb Tom St Denis:
> On 09/02/18 08:56 AM, Christian König wrote:
>> Am 09.02.2018 um 14:32 schrieb Tom St Denis:
>>> On 02/02/18 02:09 PM, Christian König wrote:
>>>> [SNIP]
>>>> +        if (p->mapping != adev->mman.bdev.dev_mapping)
>>>> +            return -EPERM;
>>>
>>> This comparison fails for both IOMMU and non-IOMMU devices in my 
>>> carrizo+polaris10 box.
>>>
>>> The address being read from is what the VM decodes to (checked with 
>>> strace).
>>
>> Have you applied the whole series? That patches before this one are 
>> necessary to initialize p->mapping when there isn't any userspace 
>> mapping for the page.
>
>
> Yes, I have the entire 5 pages applied to a temp branch based on the 
> tip of drm-next
>
> $ git log --oneline HEAD~10..
> 405bc1dc85db (HEAD -> iomem) wip
> a06d7a6f29e4 drm/amdgpu: replace iova debugfs file with iomem
> d324c21f2c5e drm/ttm: set page mapping during allocation
> 9f440ee91c58 drm/radeon: remove extra TT unpopulated check
> f55d505b0387 drm/amdgpu: remove extra TT unpopulated check
> 37d705119ea8 drm/ttm: add ttm_tt_populate wrapper
> 53af6035d04b (origin/amd-staging-drm-next, amd-staging-drm-next) 
> drm/radeon: only enable swiotlb path when need v2
>
> (the wip is me adding printks to see which error path is taken).
>
> I don't see an init call for adev->mman.bdev.man[TTM_PL_SYSTEM] 
> anywhere.  Maybe that's related?

No, there is simply no need to initialize the system domain. What are 
the values of p->mapping and adev->mman.bdev.dev_mapping when they don't 
match? Maybe we are allocating memory before initializing 
adev->mman.bdev.dev_mapping.

Or do you have more than one GPU in the system? E.g. APU+dGPU? Could it 
be that you read through the wrong device?

Christian.

>
> Tom
StDenis, Tom Feb. 9, 2018, 2:51 p.m. UTC | #5
On 09/02/18 09:12 AM, Christian König wrote:
> No, there is simply no need to initialize the system domain. What are 
> the values of p->mapping and adev->mman.bdev.dev_mapping when they don't 
> match? Maybe we are allocating memory before initializing 
> adev->mman.bdev.dev_mapping.

In my test setup I'm running test 3 from libdrm (suite 1) with a pause 
before the unmap/free call.  So the IB should still be mapped.  Indeed 
the VM PTE decoding has the V bit set.

> Or do you have more than one GPU in the system? E.g. APU+dGPU? Could it 
> be that you read through the wrong device?

I found the issue:

	while (size) {
		phys_addr_t addr = *pos & PAGE_MASK;
		loff_t off = *pos & ~PAGE_MASK;
		size_t bytes = PAGE_SIZE - off;

"bytes" should be limited by the 'size' parameter passed in.  What is 
happening instead is it's reading the entire PTB until it hits a V=0 
page and then returns an error and in the process is doing "fun things" 
to the user mode application (by copying more data than I asked for).


Tom
Christian König Feb. 9, 2018, 2:56 p.m. UTC | #6
Am 09.02.2018 um 15:51 schrieb Tom St Denis:
> On 09/02/18 09:12 AM, Christian König wrote:
>> No, there is simply no need to initialize the system domain. What are 
>> the values of p->mapping and adev->mman.bdev.dev_mapping when they 
>> don't match? Maybe we are allocating memory before initializing 
>> adev->mman.bdev.dev_mapping.
>
> In my test setup I'm running test 3 from libdrm (suite 1) with a pause 
> before the unmap/free call.  So the IB should still be mapped.  Indeed 
> the VM PTE decoding has the V bit set.
>
>> Or do you have more than one GPU in the system? E.g. APU+dGPU? Could 
>> it be that you read through the wrong device?
>
> I found the issue:
>
>     while (size) {
>         phys_addr_t addr = *pos & PAGE_MASK;
>         loff_t off = *pos & ~PAGE_MASK;
>         size_t bytes = PAGE_SIZE - off;
>
> "bytes" should be limited by the 'size' parameter passed in.  What is 
> happening instead is it's reading the entire PTB until it hits a V=0 
> page and then returns an error and in the process is doing "fun 
> things" to the user mode application (by copying more data than I 
> asked for).

Ah, obvious problem.

Do you want to fix it or should I take a look? You wanted to add write 
support as well anyway IIRC.

I've just pushed the first three patches from that series to 
amd-staging-drm-next.

Thanks for testing,
Christian.

>
>
> Tom
StDenis, Tom Feb. 9, 2018, 4:23 p.m. UTC | #7
On 09/02/18 09:56 AM, Christian König wrote:
> Am 09.02.2018 um 15:51 schrieb Tom St Denis:
>> On 09/02/18 09:12 AM, Christian König wrote:
>>> No, there is simply no need to initialize the system domain. What are 
>>> the values of p->mapping and adev->mman.bdev.dev_mapping when they 
>>> don't match? Maybe we are allocating memory before initializing 
>>> adev->mman.bdev.dev_mapping.
>>
>> In my test setup I'm running test 3 from libdrm (suite 1) with a pause 
>> before the unmap/free call.  So the IB should still be mapped.  Indeed 
>> the VM PTE decoding has the V bit set.
>>
>>> Or do you have more than one GPU in the system? E.g. APU+dGPU? Could 
>>> it be that you read through the wrong device?
>>
>> I found the issue:
>>
>>     while (size) {
>>         phys_addr_t addr = *pos & PAGE_MASK;
>>         loff_t off = *pos & ~PAGE_MASK;
>>         size_t bytes = PAGE_SIZE - off;
>>
>> "bytes" should be limited by the 'size' parameter passed in.  What is 
>> happening instead is it's reading the entire PTB until it hits a V=0 
>> page and then returns an error and in the process is doing "fun 
>> things" to the user mode application (by copying more data than I 
>> asked for).
> 
> Ah, obvious problem.
> 
> Do you want to fix it or should I take a look? You wanted to add write 
> support as well anyway IIRC.

Yup, I can tackle this this afternoon.

I'll take your read only patch and make it do both read/write (and fix 
the minor error).

Tom
diff mbox

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 648c449aaa79..795ceaeb82d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1929,38 +1929,51 @@  static const struct file_operations amdgpu_ttm_gtt_fops = {
 
 #endif
 
-static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf,
-				   size_t size, loff_t *pos)
+static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
+				 size_t size, loff_t *pos)
 {
 	struct amdgpu_device *adev = file_inode(f)->i_private;
-	int r;
-	uint64_t phys;
 	struct iommu_domain *dom;
+	ssize_t result = 0;
+	int r;
 
-	// always return 8 bytes
-	if (size != 8)
-		return -EINVAL;
+	dom = iommu_get_domain_for_dev(adev->dev);
 
-	// only accept page addresses
-	if (*pos & 0xFFF)
-		return -EINVAL;
+	while (size) {
+		phys_addr_t addr = *pos & PAGE_MASK;
+		loff_t off = *pos & ~PAGE_MASK;
+		size_t bytes = PAGE_SIZE - off;
+		unsigned long pfn;
+		struct page *p;
+		void *ptr;
 
-	dom = iommu_get_domain_for_dev(adev->dev);
-	if (dom)
-		phys = iommu_iova_to_phys(dom, *pos);
-	else
-		phys = *pos;
+		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
 
-	r = copy_to_user(buf, &phys, 8);
-	if (r)
-		return -EFAULT;
+		pfn = addr >> PAGE_SHIFT;
+		if (!pfn_valid(pfn))
+			return -EPERM;
+
+		p = pfn_to_page(pfn);
+		if (p->mapping != adev->mman.bdev.dev_mapping)
+			return -EPERM;
+
+		ptr = kmap(p);
+		r = copy_to_user(buf, ptr, bytes);
+		kunmap(p);
+		if (r)
+			return -EFAULT;
 
-	return 8;
+		size -= bytes;
+		*pos += bytes;
+		result += bytes;
+	}
+
+	return result;
 }
 
-static const struct file_operations amdgpu_ttm_iova_fops = {
+static const struct file_operations amdgpu_ttm_iomem_fops = {
 	.owner = THIS_MODULE,
-	.read = amdgpu_iova_to_phys_read,
+	.read = amdgpu_iomem_read,
 	.llseek = default_llseek
 };
 
@@ -1973,7 +1986,7 @@  static const struct {
 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 	{ "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT },
 #endif
-	{ "amdgpu_iova", &amdgpu_ttm_iova_fops, TTM_PL_SYSTEM },
+	{ "amdgpu_iomem", &amdgpu_ttm_iomem_fops, TTM_PL_SYSTEM },
 };
 
 #endif