diff mbox

arm64/dma-mapping: fix DMA_ATTR_FORCE_CONTIGUOUS mmaping code

Message ID 1490781926-6209-1-git-send-email-a.hajda@samsung.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andrzej Hajda March 29, 2017, 10:05 a.m. UTC
In case of DMA_ATTR_FORCE_CONTIGUOUS allocations vm_area->pages
is invalid. __iommu_mmap_attrs and __iommu_get_sgtable cannot use
it. In first case temporary pages array is passed to iommu_dma_mmap,
in 2nd case single entry sg table is created directly instead
of calling helper.

Fixes: 44176bb ("arm64: Add support for DMA_ATTR_FORCE_CONTIGUOUS to IOMMU")
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
---
Hi,

I am not familiar with this framework so please don't be too cruel ;)
Alternative solution I see is to always create vm_area->pages,
I do not know which approach is preferred.

Regards
Andrzej
---
 arch/arm64/mm/dma-mapping.c | 40 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

Comments

Geert Uytterhoeven March 29, 2017, 12:54 p.m. UTC | #1
Hi Andrzej,

On Wed, Mar 29, 2017 at 12:05 PM, Andrzej Hajda <a.hajda@samsung.com> wrote:
> In case of DMA_ATTR_FORCE_CONTIGUOUS allocations vm_area->pages
> is invalid. __iommu_mmap_attrs and __iommu_get_sgtable cannot use
> it. In first case temporary pages array is passed to iommu_dma_mmap,
> in 2nd case single entry sg table is created directly instead
> of calling helper.
>
> Fixes: 44176bb ("arm64: Add support for DMA_ATTR_FORCE_CONTIGUOUS to IOMMU")
> Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
> ---
> Hi,
>
> I am not familiar with this framework so please don't be too cruel ;)
> Alternative solution I see is to always create vm_area->pages,
> I do not know which approach is preferred.
>
> Regards
> Andrzej
> ---
>  arch/arm64/mm/dma-mapping.c | 40 ++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 38 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index f7b5401..bba2bc8 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -704,7 +704,30 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
>                 return ret;
>
>         area = find_vm_area(cpu_addr);
> -       if (WARN_ON(!area || !area->pages))
> +       if (WARN_ON(!area))
> +               return -ENXIO;
> +
> +       if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
> +               struct page *page = vmalloc_to_page(cpu_addr);
> +               unsigned int count = size >> PAGE_SHIFT;
> +               struct page **pages;
> +               unsigned long pfn;
> +               int i;
> +
> +               pages = kmalloc_array(count, sizeof(*pages), GFP_KERNEL);
> +               if (!pages)
> +                       return -ENOMEM;
> +
> +               for (i = 0, pfn = page_to_pfn(page); i < count; i++)
> +                       pages[i] = pfn_to_page(pfn + i);

If we're gonna allocate and set up such an array over and over again
(dma_common_contiguous_remap() has already done the same), we may as well
just keep the initial array.

Hence replace the call to dma_common_contiguous_remap() in
__iommu_alloc_attrs() by an open coded version that doesn't free the pages,
and handle the other operations like the !DMA_ATTR_FORCE_CONTIGUOUS
case.

> +
> +               ret = iommu_dma_mmap(pages, size, vma);
> +               kfree(pages);
> +
> +               return ret;
> +       }
> +
> +       if (WARN_ON(!area->pages))
>                 return -ENXIO;
>
>         return iommu_dma_mmap(area->pages, size, vma);
> @@ -717,7 +740,20 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
>         unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
>         struct vm_struct *area = find_vm_area(cpu_addr);
>
> -       if (WARN_ON(!area || !area->pages))
> +       if (WARN_ON(!area))
> +               return -ENXIO;
> +
> +       if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
> +               int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
> +
> +               if (!ret)
> +                       sg_set_page(sgt->sgl, vmalloc_to_page(cpu_addr),
> +                               PAGE_ALIGN(size), 0);
> +
> +               return ret;
> +       }
> +
> +       if (WARN_ON(!area->pages))
>                 return -ENXIO;
>
>         return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size,

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Robin Murphy March 29, 2017, 12:55 p.m. UTC | #2
On 29/03/17 11:05, Andrzej Hajda wrote:
> In case of DMA_ATTR_FORCE_CONTIGUOUS allocations vm_area->pages
> is invalid. __iommu_mmap_attrs and __iommu_get_sgtable cannot use
> it. In first case temporary pages array is passed to iommu_dma_mmap,
> in 2nd case single entry sg table is created directly instead
> of calling helper.
> 
> Fixes: 44176bb ("arm64: Add support for DMA_ATTR_FORCE_CONTIGUOUS to IOMMU")
> Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
> ---
> Hi,
> 
> I am not familiar with this framework so please don't be too cruel ;)
> Alternative solution I see is to always create vm_area->pages,
> I do not know which approach is preferred.
> 
> Regards
> Andrzej
> ---
>  arch/arm64/mm/dma-mapping.c | 40 ++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 38 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index f7b5401..bba2bc8 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -704,7 +704,30 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
>  		return ret;
>  
>  	area = find_vm_area(cpu_addr);
> -	if (WARN_ON(!area || !area->pages))
> +	if (WARN_ON(!area))

From the look of things, it doesn't seem strictly necessary to change
this, but whether that's a good thing is another matter. I'm not sure
that dma_common_contiguous_remap() should really be leaving a dangling
pointer in area->pages as it apparently does... :/

> +		return -ENXIO;
> +
> +	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
> +		struct page *page = vmalloc_to_page(cpu_addr);
> +		unsigned int count = size >> PAGE_SHIFT;
> +		struct page **pages;
> +		unsigned long pfn;
> +		int i;
> +
> +		pages = kmalloc_array(count, sizeof(*pages), GFP_KERNEL);
> +		if (!pages)
> +			return -ENOMEM;
> +
> +		for (i = 0, pfn = page_to_pfn(page); i < count; i++)
> +			pages[i] = pfn_to_page(pfn + i);
> +
> +		ret = iommu_dma_mmap(pages, size, vma);

/**
 * iommu_dma_mmap - Map a buffer into provided user VMA
 * @pages: Array representing buffer from iommu_dma_alloc()
   ...

In this case, the buffer has not come from iommu_dma_alloc(), so passing
into iommu_dma_mmap() is wrong by contract, even if having to fake up a
page array wasn't enough of a red flag. Given that a FORCE_CONTIGUOUS
allocation is essentially the same as for the non-IOMMU case, I think it
should be sufficient to defer to that path, i.e.:

	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS)
		return __swiotlb_mmap(dev, vma, cpu_addr,
				phys_to_dma(virt_to_phys(cpu_addr)),
				size, attrs);

> +		kfree(pages);
> +
> +		return ret;
> +	}
> +
> +	if (WARN_ON(!area->pages))
>  		return -ENXIO;
>  
>  	return iommu_dma_mmap(area->pages, size, vma);
> @@ -717,7 +740,20 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
>  	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
>  	struct vm_struct *area = find_vm_area(cpu_addr);
>  
> -	if (WARN_ON(!area || !area->pages))
> +	if (WARN_ON(!area))
> +		return -ENXIO;
> +
> +	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
> +		int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
> +
> +		if (!ret)
> +			sg_set_page(sgt->sgl, vmalloc_to_page(cpu_addr),
> +				PAGE_ALIGN(size), 0);
> +
> +		return ret;
> +	}

As above, this may as well just go straight to the non-IOMMU version,
although I agree with Russell's concerns[1] that in general is is not
safe to assume a coherent allocation is backed by struct pages at all.

Robin.

[1]:http://lists.infradead.org/pipermail/linux-arm-kernel/2017-March/496068.html

> +
> +	if (WARN_ON(!area->pages))
>  		return -ENXIO;
>  
>  	return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size,
>
Andrzej Hajda March 29, 2017, 3:12 p.m. UTC | #3
On 29.03.2017 14:55, Robin Murphy wrote:
> On 29/03/17 11:05, Andrzej Hajda wrote:
>> In case of DMA_ATTR_FORCE_CONTIGUOUS allocations vm_area->pages
>> is invalid. __iommu_mmap_attrs and __iommu_get_sgtable cannot use
>> it. In first case temporary pages array is passed to iommu_dma_mmap,
>> in 2nd case single entry sg table is created directly instead
>> of calling helper.
>>
>> Fixes: 44176bb ("arm64: Add support for DMA_ATTR_FORCE_CONTIGUOUS to IOMMU")
>> Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
>> ---
>> Hi,
>>
>> I am not familiar with this framework so please don't be too cruel ;)
>> Alternative solution I see is to always create vm_area->pages,
>> I do not know which approach is preferred.
>>
>> Regards
>> Andrzej
>> ---
>>  arch/arm64/mm/dma-mapping.c | 40 ++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 38 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
>> index f7b5401..bba2bc8 100644
>> --- a/arch/arm64/mm/dma-mapping.c
>> +++ b/arch/arm64/mm/dma-mapping.c
>> @@ -704,7 +704,30 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
>>  		return ret;
>>  
>>  	area = find_vm_area(cpu_addr);
>> -	if (WARN_ON(!area || !area->pages))
>> +	if (WARN_ON(!area))
> >From the look of things, it doesn't seem strictly necessary to change
> this, but whether that's a good thing is another matter. I'm not sure
> that dma_common_contiguous_remap() should really be leaving a dangling
> pointer in area->pages as it apparently does... :/
>
>> +		return -ENXIO;
>> +
>> +	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
>> +		struct page *page = vmalloc_to_page(cpu_addr);
>> +		unsigned int count = size >> PAGE_SHIFT;
>> +		struct page **pages;
>> +		unsigned long pfn;
>> +		int i;
>> +
>> +		pages = kmalloc_array(count, sizeof(*pages), GFP_KERNEL);
>> +		if (!pages)
>> +			return -ENOMEM;
>> +
>> +		for (i = 0, pfn = page_to_pfn(page); i < count; i++)
>> +			pages[i] = pfn_to_page(pfn + i);
>> +
>> +		ret = iommu_dma_mmap(pages, size, vma);
> /**
>  * iommu_dma_mmap - Map a buffer into provided user VMA
>  * @pages: Array representing buffer from iommu_dma_alloc()
>    ...
>
> In this case, the buffer has not come from iommu_dma_alloc(), so passing
> into iommu_dma_mmap() is wrong by contract, even if having to fake up a
> page array wasn't enough of a red flag. Given that a FORCE_CONTIGUOUS
> allocation is essentially the same as for the non-IOMMU case, I think it
> should be sufficient to defer to that path, i.e.:
>
> 	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS)
> 		return __swiotlb_mmap(dev, vma, cpu_addr,
> 				phys_to_dma(virt_to_phys(cpu_addr)),
> 				size, attrs);

Maybe I have make mistake somewhere but it does not work, here and below
(hangs or crashes). I suppose it can be due to different address
translations, my patch uses
    page = vmalloc_to_page(cpu_addr).
And here we have:
    handle = phys_to_dma(dev, virt_to_phys(cpu_addr)); // in
__iommu_mmap_attrs
    page = phys_to_page(dma_to_phys(dev, handle)); // in
__swiotlb_get_sgtable
I guess it is similarly in __swiotlb_mmap.

Are these translations equivalent?


Regards
Andrzej

>> +		kfree(pages);
>> +
>> +		return ret;
>> +	}
>> +
>> +	if (WARN_ON(!area->pages))
>>  		return -ENXIO;
>>  
>>  	return iommu_dma_mmap(area->pages, size, vma);
>> @@ -717,7 +740,20 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
>>  	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
>>  	struct vm_struct *area = find_vm_area(cpu_addr);
>>  
>> -	if (WARN_ON(!area || !area->pages))
>> +	if (WARN_ON(!area))
>> +		return -ENXIO;
>> +
>> +	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
>> +		int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
>> +
>> +		if (!ret)
>> +			sg_set_page(sgt->sgl, vmalloc_to_page(cpu_addr),
>> +				PAGE_ALIGN(size), 0);
>> +
>> +		return ret;
>> +	}
> As above, this may as well just go straight to the non-IOMMU version,
> although I agree with Russell's concerns[1] that in general is is not
> safe to assume a coherent allocation is backed by struct pages at all.
>
> Robin.
>
> [1]:http://lists.infradead.org/pipermail/linux-arm-kernel/2017-March/496068.html
>
>> +
>> +	if (WARN_ON(!area->pages))
>>  		return -ENXIO;
>>  
>>  	return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size,
>>
>
>
>
Robin Murphy March 29, 2017, 3:33 p.m. UTC | #4
On 29/03/17 16:12, Andrzej Hajda wrote:
> On 29.03.2017 14:55, Robin Murphy wrote:
>> On 29/03/17 11:05, Andrzej Hajda wrote:
>>> In case of DMA_ATTR_FORCE_CONTIGUOUS allocations vm_area->pages
>>> is invalid. __iommu_mmap_attrs and __iommu_get_sgtable cannot use
>>> it. In first case temporary pages array is passed to iommu_dma_mmap,
>>> in 2nd case single entry sg table is created directly instead
>>> of calling helper.
>>>
>>> Fixes: 44176bb ("arm64: Add support for DMA_ATTR_FORCE_CONTIGUOUS to IOMMU")
>>> Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
>>> ---
>>> Hi,
>>>
>>> I am not familiar with this framework so please don't be too cruel ;)
>>> Alternative solution I see is to always create vm_area->pages,
>>> I do not know which approach is preferred.
>>>
>>> Regards
>>> Andrzej
>>> ---
>>>  arch/arm64/mm/dma-mapping.c | 40 ++++++++++++++++++++++++++++++++++++++--
>>>  1 file changed, 38 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
>>> index f7b5401..bba2bc8 100644
>>> --- a/arch/arm64/mm/dma-mapping.c
>>> +++ b/arch/arm64/mm/dma-mapping.c
>>> @@ -704,7 +704,30 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
>>>  		return ret;
>>>  
>>>  	area = find_vm_area(cpu_addr);
>>> -	if (WARN_ON(!area || !area->pages))
>>> +	if (WARN_ON(!area))
>> >From the look of things, it doesn't seem strictly necessary to change
>> this, but whether that's a good thing is another matter. I'm not sure
>> that dma_common_contiguous_remap() should really be leaving a dangling
>> pointer in area->pages as it apparently does... :/
>>
>>> +		return -ENXIO;
>>> +
>>> +	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
>>> +		struct page *page = vmalloc_to_page(cpu_addr);
>>> +		unsigned int count = size >> PAGE_SHIFT;
>>> +		struct page **pages;
>>> +		unsigned long pfn;
>>> +		int i;
>>> +
>>> +		pages = kmalloc_array(count, sizeof(*pages), GFP_KERNEL);
>>> +		if (!pages)
>>> +			return -ENOMEM;
>>> +
>>> +		for (i = 0, pfn = page_to_pfn(page); i < count; i++)
>>> +			pages[i] = pfn_to_page(pfn + i);
>>> +
>>> +		ret = iommu_dma_mmap(pages, size, vma);
>> /**
>>  * iommu_dma_mmap - Map a buffer into provided user VMA
>>  * @pages: Array representing buffer from iommu_dma_alloc()
>>    ...
>>
>> In this case, the buffer has not come from iommu_dma_alloc(), so passing
>> into iommu_dma_mmap() is wrong by contract, even if having to fake up a
>> page array wasn't enough of a red flag. Given that a FORCE_CONTIGUOUS
>> allocation is essentially the same as for the non-IOMMU case, I think it
>> should be sufficient to defer to that path, i.e.:
>>
>> 	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS)
>> 		return __swiotlb_mmap(dev, vma, cpu_addr,
>> 				phys_to_dma(virt_to_phys(cpu_addr)),
>> 				size, attrs);
> 
> Maybe I have make mistake somewhere but it does not work, here and below
> (hangs or crashes). I suppose it can be due to different address
> translations, my patch uses
>     page = vmalloc_to_page(cpu_addr).
> And here we have:
>     handle = phys_to_dma(dev, virt_to_phys(cpu_addr)); // in
> __iommu_mmap_attrs
>     page = phys_to_page(dma_to_phys(dev, handle)); // in
> __swiotlb_get_sgtable
> I guess it is similarly in __swiotlb_mmap.
> 
> Are these translations equivalent?

Ah, my mistake, sorry - I managed to forget that cpu_addr is always
remapped for FORCE_CONTIGUOUS (*and* somehow overlook the use of
vmalloc_to_page() in the patch itself), so the virt_to_phys() part of my
example is indeed bogus. The general point still stands, though.

Robin.

> 
> 
> Regards
> Andrzej
> 
>>> +		kfree(pages);
>>> +
>>> +		return ret;
>>> +	}
>>> +
>>> +	if (WARN_ON(!area->pages))
>>>  		return -ENXIO;
>>>  
>>>  	return iommu_dma_mmap(area->pages, size, vma);
>>> @@ -717,7 +740,20 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
>>>  	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
>>>  	struct vm_struct *area = find_vm_area(cpu_addr);
>>>  
>>> -	if (WARN_ON(!area || !area->pages))
>>> +	if (WARN_ON(!area))
>>> +		return -ENXIO;
>>> +
>>> +	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
>>> +		int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
>>> +
>>> +		if (!ret)
>>> +			sg_set_page(sgt->sgl, vmalloc_to_page(cpu_addr),
>>> +				PAGE_ALIGN(size), 0);
>>> +
>>> +		return ret;
>>> +	}
>> As above, this may as well just go straight to the non-IOMMU version,
>> although I agree with Russell's concerns[1] that in general is is not
>> safe to assume a coherent allocation is backed by struct pages at all.
>>
>> Robin.
>>
>> [1]:http://lists.infradead.org/pipermail/linux-arm-kernel/2017-March/496068.html
>>
>>> +
>>> +	if (WARN_ON(!area->pages))
>>>  		return -ENXIO;
>>>  
>>>  	return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size,
>>>
>>
>>
>>
>
Andrzej Hajda March 30, 2017, 6:51 a.m. UTC | #5
On 29.03.2017 17:33, Robin Murphy wrote:
> On 29/03/17 16:12, Andrzej Hajda wrote:
>> On 29.03.2017 14:55, Robin Murphy wrote:
>>> On 29/03/17 11:05, Andrzej Hajda wrote:
>>>> In case of DMA_ATTR_FORCE_CONTIGUOUS allocations vm_area->pages
>>>> is invalid. __iommu_mmap_attrs and __iommu_get_sgtable cannot use
>>>> it. In first case temporary pages array is passed to iommu_dma_mmap,
>>>> in 2nd case single entry sg table is created directly instead
>>>> of calling helper.
>>>>
>>>> Fixes: 44176bb ("arm64: Add support for DMA_ATTR_FORCE_CONTIGUOUS to IOMMU")
>>>> Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
>>>> ---
>>>> Hi,
>>>>
>>>> I am not familiar with this framework so please don't be too cruel ;)
>>>> Alternative solution I see is to always create vm_area->pages,
>>>> I do not know which approach is preferred.
>>>>
>>>> Regards
>>>> Andrzej
>>>> ---
>>>>  arch/arm64/mm/dma-mapping.c | 40 ++++++++++++++++++++++++++++++++++++++--
>>>>  1 file changed, 38 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
>>>> index f7b5401..bba2bc8 100644
>>>> --- a/arch/arm64/mm/dma-mapping.c
>>>> +++ b/arch/arm64/mm/dma-mapping.c
>>>> @@ -704,7 +704,30 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
>>>>  		return ret;
>>>>  
>>>>  	area = find_vm_area(cpu_addr);
>>>> -	if (WARN_ON(!area || !area->pages))
>>>> +	if (WARN_ON(!area))
>>> >From the look of things, it doesn't seem strictly necessary to change
>>> this, but whether that's a good thing is another matter. I'm not sure
>>> that dma_common_contiguous_remap() should really be leaving a dangling
>>> pointer in area->pages as it apparently does... :/
>>>
>>>> +		return -ENXIO;
>>>> +
>>>> +	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
>>>> +		struct page *page = vmalloc_to_page(cpu_addr);
>>>> +		unsigned int count = size >> PAGE_SHIFT;
>>>> +		struct page **pages;
>>>> +		unsigned long pfn;
>>>> +		int i;
>>>> +
>>>> +		pages = kmalloc_array(count, sizeof(*pages), GFP_KERNEL);
>>>> +		if (!pages)
>>>> +			return -ENOMEM;
>>>> +
>>>> +		for (i = 0, pfn = page_to_pfn(page); i < count; i++)
>>>> +			pages[i] = pfn_to_page(pfn + i);
>>>> +
>>>> +		ret = iommu_dma_mmap(pages, size, vma);
>>> /**
>>>  * iommu_dma_mmap - Map a buffer into provided user VMA
>>>  * @pages: Array representing buffer from iommu_dma_alloc()
>>>    ...
>>>
>>> In this case, the buffer has not come from iommu_dma_alloc(), so passing
>>> into iommu_dma_mmap() is wrong by contract, even if having to fake up a
>>> page array wasn't enough of a red flag. Given that a FORCE_CONTIGUOUS
>>> allocation is essentially the same as for the non-IOMMU case, I think it
>>> should be sufficient to defer to that path, i.e.:
>>>
>>> 	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS)
>>> 		return __swiotlb_mmap(dev, vma, cpu_addr,
>>> 				phys_to_dma(virt_to_phys(cpu_addr)),
>>> 				size, attrs);
>> Maybe I have make mistake somewhere but it does not work, here and below
>> (hangs or crashes). I suppose it can be due to different address
>> translations, my patch uses
>>     page = vmalloc_to_page(cpu_addr).
>> And here we have:
>>     handle = phys_to_dma(dev, virt_to_phys(cpu_addr)); // in
>> __iommu_mmap_attrs
>>     page = phys_to_page(dma_to_phys(dev, handle)); // in
>> __swiotlb_get_sgtable
>> I guess it is similarly in __swiotlb_mmap.
>>
>> Are these translations equivalent?
> Ah, my mistake, sorry - I managed to forget that cpu_addr is always
> remapped for FORCE_CONTIGUOUS (*and* somehow overlook the use of
> vmalloc_to_page() in the patch itself), so the virt_to_phys() part of my
> example is indeed bogus. The general point still stands, though.

I guess Geert's proposition to create pages permanently is also not
acceptable[2]. So how to fix crashes which appeared after patch adding
support to DMA_ATTR_FORCE_CONTIGUOUS in IOMMU on arm64 [1]?
Maybe temporary solution is to drop the patch until proper handling of
mmapping is proposed, without it the patch looks incomplete and causes
regression.
Moreover __iommu_alloc_attrs uses also dma_common_contiguous_remap which
also assumes existence of struct pages.

[1]: https://patchwork.kernel.org/patch/9609551/
[2]: https://www.spinics.net/lists/arm-kernel/msg572688.html

Regards
Andrzej


>
> Robin.
>
>>
>> Regards
>> Andrzej
>>
>>>> +		kfree(pages);
>>>> +
>>>> +		return ret;
>>>> +	}
>>>> +
>>>> +	if (WARN_ON(!area->pages))
>>>>  		return -ENXIO;
>>>>  
>>>>  	return iommu_dma_mmap(area->pages, size, vma);
>>>> @@ -717,7 +740,20 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
>>>>  	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
>>>>  	struct vm_struct *area = find_vm_area(cpu_addr);
>>>>  
>>>> -	if (WARN_ON(!area || !area->pages))
>>>> +	if (WARN_ON(!area))
>>>> +		return -ENXIO;
>>>> +
>>>> +	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
>>>> +		int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
>>>> +
>>>> +		if (!ret)
>>>> +			sg_set_page(sgt->sgl, vmalloc_to_page(cpu_addr),
>>>> +				PAGE_ALIGN(size), 0);
>>>> +
>>>> +		return ret;
>>>> +	}
>>> As above, this may as well just go straight to the non-IOMMU version,
>>> although I agree with Russell's concerns[1] that in general is is not
>>> safe to assume a coherent allocation is backed by struct pages at all.
>>>
>>> Robin.
>>>
>>> [1]:http://lists.infradead.org/pipermail/linux-arm-kernel/2017-March/496068.html
>>>
>>>> +
>>>> +	if (WARN_ON(!area->pages))
>>>>  		return -ENXIO;
>>>>  
>>>>  	return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size,
>>>>
>>>
>>>
>
>
>
Geert Uytterhoeven March 30, 2017, 7:40 a.m. UTC | #6
Hi Andrzej,

On Thu, Mar 30, 2017 at 8:51 AM, Andrzej Hajda <a.hajda@samsung.com> wrote:
> On 29.03.2017 17:33, Robin Murphy wrote:
>> On 29/03/17 16:12, Andrzej Hajda wrote:
>>> On 29.03.2017 14:55, Robin Murphy wrote:
>>>> On 29/03/17 11:05, Andrzej Hajda wrote:
>>>>> In case of DMA_ATTR_FORCE_CONTIGUOUS allocations vm_area->pages
>>>>> is invalid. __iommu_mmap_attrs and __iommu_get_sgtable cannot use
>>>>> it. In first case temporary pages array is passed to iommu_dma_mmap,
>>>>> in 2nd case single entry sg table is created directly instead
>>>>> of calling helper.
>>>>>
>>>>> Fixes: 44176bb ("arm64: Add support for DMA_ATTR_FORCE_CONTIGUOUS to IOMMU")
>>>>> Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
>>>>> ---
>>>>> Hi,
>>>>>
>>>>> I am not familiar with this framework so please don't be too cruel ;)
>>>>> Alternative solution I see is to always create vm_area->pages,
>>>>> I do not know which approach is preferred.
>>>>>
>>>>> Regards
>>>>> Andrzej
>>>>> ---
>>>>>  arch/arm64/mm/dma-mapping.c | 40 ++++++++++++++++++++++++++++++++++++++--
>>>>>  1 file changed, 38 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
>>>>> index f7b5401..bba2bc8 100644
>>>>> --- a/arch/arm64/mm/dma-mapping.c
>>>>> +++ b/arch/arm64/mm/dma-mapping.c
>>>>> @@ -704,7 +704,30 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
>>>>>            return ret;
>>>>>
>>>>>    area = find_vm_area(cpu_addr);
>>>>> -  if (WARN_ON(!area || !area->pages))
>>>>> +  if (WARN_ON(!area))
>>>> >From the look of things, it doesn't seem strictly necessary to change
>>>> this, but whether that's a good thing is another matter. I'm not sure
>>>> that dma_common_contiguous_remap() should really be leaving a dangling
>>>> pointer in area->pages as it apparently does... :/
>>>>
>>>>> +          return -ENXIO;
>>>>> +
>>>>> +  if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
>>>>> +          struct page *page = vmalloc_to_page(cpu_addr);
>>>>> +          unsigned int count = size >> PAGE_SHIFT;
>>>>> +          struct page **pages;
>>>>> +          unsigned long pfn;
>>>>> +          int i;
>>>>> +
>>>>> +          pages = kmalloc_array(count, sizeof(*pages), GFP_KERNEL);
>>>>> +          if (!pages)
>>>>> +                  return -ENOMEM;
>>>>> +
>>>>> +          for (i = 0, pfn = page_to_pfn(page); i < count; i++)
>>>>> +                  pages[i] = pfn_to_page(pfn + i);
>>>>> +
>>>>> +          ret = iommu_dma_mmap(pages, size, vma);
>>>> /**
>>>>  * iommu_dma_mmap - Map a buffer into provided user VMA
>>>>  * @pages: Array representing buffer from iommu_dma_alloc()
>>>>    ...
>>>>
>>>> In this case, the buffer has not come from iommu_dma_alloc(), so passing
>>>> into iommu_dma_mmap() is wrong by contract, even if having to fake up a
>>>> page array wasn't enough of a red flag. Given that a FORCE_CONTIGUOUS
>>>> allocation is essentially the same as for the non-IOMMU case, I think it
>>>> should be sufficient to defer to that path, i.e.:
>>>>
>>>>     if (attrs & DMA_ATTR_FORCE_CONTIGUOUS)
>>>>             return __swiotlb_mmap(dev, vma, cpu_addr,
>>>>                             phys_to_dma(virt_to_phys(cpu_addr)),
>>>>                             size, attrs);
>>> Maybe I have make mistake somewhere but it does not work, here and below
>>> (hangs or crashes). I suppose it can be due to different address
>>> translations, my patch uses
>>>     page = vmalloc_to_page(cpu_addr).
>>> And here we have:
>>>     handle = phys_to_dma(dev, virt_to_phys(cpu_addr)); // in
>>> __iommu_mmap_attrs
>>>     page = phys_to_page(dma_to_phys(dev, handle)); // in
>>> __swiotlb_get_sgtable
>>> I guess it is similarly in __swiotlb_mmap.
>>>
>>> Are these translations equivalent?
>> Ah, my mistake, sorry - I managed to forget that cpu_addr is always
>> remapped for FORCE_CONTIGUOUS (*and* somehow overlook the use of
>> vmalloc_to_page() in the patch itself), so the virt_to_phys() part of my
>> example is indeed bogus. The general point still stands, though.
>
> I guess Geert's proposition to create pages permanently is also not
> acceptable[2]. So how to fix crashes which appeared after patch adding

If I'm not mistaken, creating the pages permanently is what the
!DMA_ATTR_FORCE_CONTIGUOUS does? The only difference is where
the underlying memory is allocated from.

Am I missing something?

Thanks!

> support to DMA_ATTR_FORCE_CONTIGUOUS in IOMMU on arm64 [1]?
> Maybe temporary solution is to drop the patch until proper handling of
> mmapping is proposed, without it the patch looks incomplete and causes
> regression.
> Moreover __iommu_alloc_attrs uses also dma_common_contiguous_remap which
> also assumes existence of struct pages.
>
> [1]: https://patchwork.kernel.org/patch/9609551/
> [2]: https://www.spinics.net/lists/arm-kernel/msg572688.html

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Andrzej Hajda March 30, 2017, 8:30 a.m. UTC | #7
On 30.03.2017 09:40, Geert Uytterhoeven wrote:
> Hi Andrzej,
>
> On Thu, Mar 30, 2017 at 8:51 AM, Andrzej Hajda <a.hajda@samsung.com> wrote:
>> On 29.03.2017 17:33, Robin Murphy wrote:
>>> On 29/03/17 16:12, Andrzej Hajda wrote:
>>>> On 29.03.2017 14:55, Robin Murphy wrote:
>>>>> On 29/03/17 11:05, Andrzej Hajda wrote:
>>>>>> In case of DMA_ATTR_FORCE_CONTIGUOUS allocations vm_area->pages
>>>>>> is invalid. __iommu_mmap_attrs and __iommu_get_sgtable cannot use
>>>>>> it. In first case temporary pages array is passed to iommu_dma_mmap,
>>>>>> in 2nd case single entry sg table is created directly instead
>>>>>> of calling helper.
>>>>>>
>>>>>> Fixes: 44176bb ("arm64: Add support for DMA_ATTR_FORCE_CONTIGUOUS to IOMMU")
>>>>>> Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
>>>>>> ---
>>>>>> Hi,
>>>>>>
>>>>>> I am not familiar with this framework so please don't be too cruel ;)
>>>>>> Alternative solution I see is to always create vm_area->pages,
>>>>>> I do not know which approach is preferred.
>>>>>>
>>>>>> Regards
>>>>>> Andrzej
>>>>>> ---
>>>>>>  arch/arm64/mm/dma-mapping.c | 40 ++++++++++++++++++++++++++++++++++++++--
>>>>>>  1 file changed, 38 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
>>>>>> index f7b5401..bba2bc8 100644
>>>>>> --- a/arch/arm64/mm/dma-mapping.c
>>>>>> +++ b/arch/arm64/mm/dma-mapping.c
>>>>>> @@ -704,7 +704,30 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
>>>>>>            return ret;
>>>>>>
>>>>>>    area = find_vm_area(cpu_addr);
>>>>>> -  if (WARN_ON(!area || !area->pages))
>>>>>> +  if (WARN_ON(!area))
>>>>> >From the look of things, it doesn't seem strictly necessary to change
>>>>> this, but whether that's a good thing is another matter. I'm not sure
>>>>> that dma_common_contiguous_remap() should really be leaving a dangling
>>>>> pointer in area->pages as it apparently does... :/
>>>>>
>>>>>> +          return -ENXIO;
>>>>>> +
>>>>>> +  if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
>>>>>> +          struct page *page = vmalloc_to_page(cpu_addr);
>>>>>> +          unsigned int count = size >> PAGE_SHIFT;
>>>>>> +          struct page **pages;
>>>>>> +          unsigned long pfn;
>>>>>> +          int i;
>>>>>> +
>>>>>> +          pages = kmalloc_array(count, sizeof(*pages), GFP_KERNEL);
>>>>>> +          if (!pages)
>>>>>> +                  return -ENOMEM;
>>>>>> +
>>>>>> +          for (i = 0, pfn = page_to_pfn(page); i < count; i++)
>>>>>> +                  pages[i] = pfn_to_page(pfn + i);
>>>>>> +
>>>>>> +          ret = iommu_dma_mmap(pages, size, vma);
>>>>> /**
>>>>>  * iommu_dma_mmap - Map a buffer into provided user VMA
>>>>>  * @pages: Array representing buffer from iommu_dma_alloc()
>>>>>    ...
>>>>>
>>>>> In this case, the buffer has not come from iommu_dma_alloc(), so passing
>>>>> into iommu_dma_mmap() is wrong by contract, even if having to fake up a
>>>>> page array wasn't enough of a red flag. Given that a FORCE_CONTIGUOUS
>>>>> allocation is essentially the same as for the non-IOMMU case, I think it
>>>>> should be sufficient to defer to that path, i.e.:
>>>>>
>>>>>     if (attrs & DMA_ATTR_FORCE_CONTIGUOUS)
>>>>>             return __swiotlb_mmap(dev, vma, cpu_addr,
>>>>>                             phys_to_dma(virt_to_phys(cpu_addr)),
>>>>>                             size, attrs);
>>>> Maybe I have make mistake somewhere but it does not work, here and below
>>>> (hangs or crashes). I suppose it can be due to different address
>>>> translations, my patch uses
>>>>     page = vmalloc_to_page(cpu_addr).
>>>> And here we have:
>>>>     handle = phys_to_dma(dev, virt_to_phys(cpu_addr)); // in
>>>> __iommu_mmap_attrs
>>>>     page = phys_to_page(dma_to_phys(dev, handle)); // in
>>>> __swiotlb_get_sgtable
>>>> I guess it is similarly in __swiotlb_mmap.
>>>>
>>>> Are these translations equivalent?
>>> Ah, my mistake, sorry - I managed to forget that cpu_addr is always
>>> remapped for FORCE_CONTIGUOUS (*and* somehow overlook the use of
>>> vmalloc_to_page() in the patch itself), so the virt_to_phys() part of my
>>> example is indeed bogus. The general point still stands, though.
>> I guess Geert's proposition to create pages permanently is also not
>> acceptable[2]. So how to fix crashes which appeared after patch adding
> If I'm not mistaken, creating the pages permanently is what the
> !DMA_ATTR_FORCE_CONTIGUOUS does? The only difference is where
> the underlying memory is allocated from.
>
> Am I missing something?

Quoting Robin from his response:
> in general is is not
> safe to assume a coherent allocation is backed by struct pages at all

As I said before I am not familiar with the subject, so it is possible I
misunderstood something.

Regards
Andrzej


>
> Thanks!
>
>> support to DMA_ATTR_FORCE_CONTIGUOUS in IOMMU on arm64 [1]?
>> Maybe temporary solution is to drop the patch until proper handling of
>> mmapping is proposed, without it the patch looks incomplete and causes
>> regression.
>> Moreover __iommu_alloc_attrs uses also dma_common_contiguous_remap which
>> also assumes existence of struct pages.
>>
>> [1]: https://patchwork.kernel.org/patch/9609551/
>> [2]: https://www.spinics.net/lists/arm-kernel/msg572688.html
> Gr{oetje,eeting}s,
>
>                         Geert
>
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
>
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like that.
>                                 -- Linus Torvalds
>
>
>
Robin Murphy March 30, 2017, 10:44 a.m. UTC | #8
On 30/03/17 09:30, Andrzej Hajda wrote:
> On 30.03.2017 09:40, Geert Uytterhoeven wrote:
>> Hi Andrzej,
>>
>> On Thu, Mar 30, 2017 at 8:51 AM, Andrzej Hajda <a.hajda@samsung.com> wrote:
>>> On 29.03.2017 17:33, Robin Murphy wrote:
>>>> On 29/03/17 16:12, Andrzej Hajda wrote:
>>>>> On 29.03.2017 14:55, Robin Murphy wrote:
>>>>>> On 29/03/17 11:05, Andrzej Hajda wrote:
>>>>>>> In case of DMA_ATTR_FORCE_CONTIGUOUS allocations vm_area->pages
>>>>>>> is invalid. __iommu_mmap_attrs and __iommu_get_sgtable cannot use
>>>>>>> it. In first case temporary pages array is passed to iommu_dma_mmap,
>>>>>>> in 2nd case single entry sg table is created directly instead
>>>>>>> of calling helper.
>>>>>>>
>>>>>>> Fixes: 44176bb ("arm64: Add support for DMA_ATTR_FORCE_CONTIGUOUS to IOMMU")
>>>>>>> Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
>>>>>>> ---
>>>>>>> Hi,
>>>>>>>
>>>>>>> I am not familiar with this framework so please don't be too cruel ;)
>>>>>>> Alternative solution I see is to always create vm_area->pages,
>>>>>>> I do not know which approach is preferred.
>>>>>>>
>>>>>>> Regards
>>>>>>> Andrzej
>>>>>>> ---
>>>>>>>  arch/arm64/mm/dma-mapping.c | 40 ++++++++++++++++++++++++++++++++++++++--
>>>>>>>  1 file changed, 38 insertions(+), 2 deletions(-)
>>>>>>>
>>>>>>> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
>>>>>>> index f7b5401..bba2bc8 100644
>>>>>>> --- a/arch/arm64/mm/dma-mapping.c
>>>>>>> +++ b/arch/arm64/mm/dma-mapping.c
>>>>>>> @@ -704,7 +704,30 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
>>>>>>>            return ret;
>>>>>>>
>>>>>>>    area = find_vm_area(cpu_addr);
>>>>>>> -  if (WARN_ON(!area || !area->pages))
>>>>>>> +  if (WARN_ON(!area))
>>>>>> >From the look of things, it doesn't seem strictly necessary to change
>>>>>> this, but whether that's a good thing is another matter. I'm not sure
>>>>>> that dma_common_contiguous_remap() should really be leaving a dangling
>>>>>> pointer in area->pages as it apparently does... :/
>>>>>>
>>>>>>> +          return -ENXIO;
>>>>>>> +
>>>>>>> +  if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
>>>>>>> +          struct page *page = vmalloc_to_page(cpu_addr);
>>>>>>> +          unsigned int count = size >> PAGE_SHIFT;
>>>>>>> +          struct page **pages;
>>>>>>> +          unsigned long pfn;
>>>>>>> +          int i;
>>>>>>> +
>>>>>>> +          pages = kmalloc_array(count, sizeof(*pages), GFP_KERNEL);
>>>>>>> +          if (!pages)
>>>>>>> +                  return -ENOMEM;
>>>>>>> +
>>>>>>> +          for (i = 0, pfn = page_to_pfn(page); i < count; i++)
>>>>>>> +                  pages[i] = pfn_to_page(pfn + i);
>>>>>>> +
>>>>>>> +          ret = iommu_dma_mmap(pages, size, vma);
>>>>>> /**
>>>>>>  * iommu_dma_mmap - Map a buffer into provided user VMA
>>>>>>  * @pages: Array representing buffer from iommu_dma_alloc()
>>>>>>    ...
>>>>>>
>>>>>> In this case, the buffer has not come from iommu_dma_alloc(), so passing
>>>>>> into iommu_dma_mmap() is wrong by contract, even if having to fake up a
>>>>>> page array wasn't enough of a red flag. Given that a FORCE_CONTIGUOUS
>>>>>> allocation is essentially the same as for the non-IOMMU case, I think it
>>>>>> should be sufficient to defer to that path, i.e.:
>>>>>>
>>>>>>     if (attrs & DMA_ATTR_FORCE_CONTIGUOUS)
>>>>>>             return __swiotlb_mmap(dev, vma, cpu_addr,
>>>>>>                             phys_to_dma(virt_to_phys(cpu_addr)),
>>>>>>                             size, attrs);
>>>>> Maybe I have make mistake somewhere but it does not work, here and below
>>>>> (hangs or crashes). I suppose it can be due to different address
>>>>> translations, my patch uses
>>>>>     page = vmalloc_to_page(cpu_addr).
>>>>> And here we have:
>>>>>     handle = phys_to_dma(dev, virt_to_phys(cpu_addr)); // in
>>>>> __iommu_mmap_attrs
>>>>>     page = phys_to_page(dma_to_phys(dev, handle)); // in
>>>>> __swiotlb_get_sgtable
>>>>> I guess it is similarly in __swiotlb_mmap.
>>>>>
>>>>> Are these translations equivalent?
>>>> Ah, my mistake, sorry - I managed to forget that cpu_addr is always
>>>> remapped for FORCE_CONTIGUOUS (*and* somehow overlook the use of
>>>> vmalloc_to_page() in the patch itself), so the virt_to_phys() part of my
>>>> example is indeed bogus. The general point still stands, though.
>>> I guess Geert's proposition to create pages permanently is also not
>>> acceptable[2]. So how to fix crashes which appeared after patch adding
>> If I'm not mistaken, creating the pages permanently is what the
>> !DMA_ATTR_FORCE_CONTIGUOUS does? The only difference is where
>> the underlying memory is allocated from.
>>
>> Am I missing something?
> 
> Quoting Robin from his response:
>> in general is is not
>> safe to assume a coherent allocation is backed by struct pages at all
> 
> As I said before I am not familiar with the subject, so it is possible I
> misunderstood something.

That's from the perspective of external callers of
dma_alloc_coherent()/dma_alloc_attrs(), wherein
dma_alloc_from_coherent() may have returned device-specific memory
without calling into the arch dma_map_ops implementation. Internal to
the arm64 implementation, however, everything *we* allocate comes from
either CMA or the normal page allocator, so should always be backed by
real kernel pages.

Robin.

> Regards
> Andrzej
> 
> 
>>
>> Thanks!
>>
>>> support to DMA_ATTR_FORCE_CONTIGUOUS in IOMMU on arm64 [1]?
>>> Maybe temporary solution is to drop the patch until proper handling of
>>> mmapping is proposed, without it the patch looks incomplete and causes
>>> regression.
>>> Moreover __iommu_alloc_attrs uses also dma_common_contiguous_remap which
>>> also assumes existence of struct pages.
>>>
>>> [1]: https://patchwork.kernel.org/patch/9609551/
>>> [2]: https://www.spinics.net/lists/arm-kernel/msg572688.html
>> Gr{oetje,eeting}s,
>>
>>                         Geert
>>
>> --
>> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
>>
>> In personal conversations with technical people, I call myself a hacker. But
>> when I'm talking to journalists I just say "programmer" or something like that.
>>                                 -- Linus Torvalds
>>
>>
>>
>
Andrzej Hajda March 30, 2017, 11:16 a.m. UTC | #9
Hi Robin,

On 30.03.2017 12:44, Robin Murphy wrote:
> On 30/03/17 09:30, Andrzej Hajda wrote:
>> On 30.03.2017 09:40, Geert Uytterhoeven wrote:
>>> Hi Andrzej,
>>>
>>> On Thu, Mar 30, 2017 at 8:51 AM, Andrzej Hajda <a.hajda@samsung.com> wrote:
>>>> On 29.03.2017 17:33, Robin Murphy wrote:
>>>>> On 29/03/17 16:12, Andrzej Hajda wrote:
>>>>>> On 29.03.2017 14:55, Robin Murphy wrote:
>>>>>>> On 29/03/17 11:05, Andrzej Hajda wrote:
>>>>>>>> In case of DMA_ATTR_FORCE_CONTIGUOUS allocations vm_area->pages
>>>>>>>> is invalid. __iommu_mmap_attrs and __iommu_get_sgtable cannot use
>>>>>>>> it. In first case temporary pages array is passed to iommu_dma_mmap,
>>>>>>>> in 2nd case single entry sg table is created directly instead
>>>>>>>> of calling helper.
>>>>>>>>
>>>>>>>> Fixes: 44176bb ("arm64: Add support for DMA_ATTR_FORCE_CONTIGUOUS to IOMMU")
>>>>>>>> Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
>>>>>>>> ---
>>>>>>>> Hi,
>>>>>>>>
>>>>>>>> I am not familiar with this framework so please don't be too cruel ;)
>>>>>>>> Alternative solution I see is to always create vm_area->pages,
>>>>>>>> I do not know which approach is preferred.
>>>>>>>>
>>>>>>>> Regards
>>>>>>>> Andrzej
>>>>>>>> ---
>>>>>>>>  arch/arm64/mm/dma-mapping.c | 40 ++++++++++++++++++++++++++++++++++++++--
>>>>>>>>  1 file changed, 38 insertions(+), 2 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
>>>>>>>> index f7b5401..bba2bc8 100644
>>>>>>>> --- a/arch/arm64/mm/dma-mapping.c
>>>>>>>> +++ b/arch/arm64/mm/dma-mapping.c
>>>>>>>> @@ -704,7 +704,30 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
>>>>>>>>            return ret;
>>>>>>>>
>>>>>>>>    area = find_vm_area(cpu_addr);
>>>>>>>> -  if (WARN_ON(!area || !area->pages))
>>>>>>>> +  if (WARN_ON(!area))
>>>>>>> >From the look of things, it doesn't seem strictly necessary to change
>>>>>>> this, but whether that's a good thing is another matter. I'm not sure
>>>>>>> that dma_common_contiguous_remap() should really be leaving a dangling
>>>>>>> pointer in area->pages as it apparently does... :/
>>>>>>>
>>>>>>>> +          return -ENXIO;
>>>>>>>> +
>>>>>>>> +  if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
>>>>>>>> +          struct page *page = vmalloc_to_page(cpu_addr);
>>>>>>>> +          unsigned int count = size >> PAGE_SHIFT;
>>>>>>>> +          struct page **pages;
>>>>>>>> +          unsigned long pfn;
>>>>>>>> +          int i;
>>>>>>>> +
>>>>>>>> +          pages = kmalloc_array(count, sizeof(*pages), GFP_KERNEL);
>>>>>>>> +          if (!pages)
>>>>>>>> +                  return -ENOMEM;
>>>>>>>> +
>>>>>>>> +          for (i = 0, pfn = page_to_pfn(page); i < count; i++)
>>>>>>>> +                  pages[i] = pfn_to_page(pfn + i);
>>>>>>>> +
>>>>>>>> +          ret = iommu_dma_mmap(pages, size, vma);
>>>>>>> /**
>>>>>>>  * iommu_dma_mmap - Map a buffer into provided user VMA
>>>>>>>  * @pages: Array representing buffer from iommu_dma_alloc()
>>>>>>>    ...
>>>>>>>
>>>>>>> In this case, the buffer has not come from iommu_dma_alloc(), so passing
>>>>>>> into iommu_dma_mmap() is wrong by contract, even if having to fake up a
>>>>>>> page array wasn't enough of a red flag. Given that a FORCE_CONTIGUOUS
>>>>>>> allocation is essentially the same as for the non-IOMMU case, I think it
>>>>>>> should be sufficient to defer to that path, i.e.:
>>>>>>>
>>>>>>>     if (attrs & DMA_ATTR_FORCE_CONTIGUOUS)
>>>>>>>             return __swiotlb_mmap(dev, vma, cpu_addr,
>>>>>>>                             phys_to_dma(virt_to_phys(cpu_addr)),
>>>>>>>                             size, attrs);
>>>>>> Maybe I have make mistake somewhere but it does not work, here and below
>>>>>> (hangs or crashes). I suppose it can be due to different address
>>>>>> translations, my patch uses
>>>>>>     page = vmalloc_to_page(cpu_addr).
>>>>>> And here we have:
>>>>>>     handle = phys_to_dma(dev, virt_to_phys(cpu_addr)); // in
>>>>>> __iommu_mmap_attrs
>>>>>>     page = phys_to_page(dma_to_phys(dev, handle)); // in
>>>>>> __swiotlb_get_sgtable
>>>>>> I guess it is similarly in __swiotlb_mmap.
>>>>>>
>>>>>> Are these translations equivalent?
>>>>> Ah, my mistake, sorry - I managed to forget that cpu_addr is always
>>>>> remapped for FORCE_CONTIGUOUS (*and* somehow overlook the use of
>>>>> vmalloc_to_page() in the patch itself), so the virt_to_phys() part of my
>>>>> example is indeed bogus. The general point still stands, though.
>>>> I guess Geert's proposition to create pages permanently is also not
>>>> acceptable[2]. So how to fix crashes which appeared after patch adding
>>> If I'm not mistaken, creating the pages permanently is what the
>>> !DMA_ATTR_FORCE_CONTIGUOUS does? The only difference is where
>>> the underlying memory is allocated from.
>>>
>>> Am I missing something?
>> Quoting Robin from his response:
>>> in general is is not
>>> safe to assume a coherent allocation is backed by struct pages at all
>> As I said before I am not familiar with the subject, so it is possible I
>> misunderstood something.
> That's from the perspective of external callers of
> dma_alloc_coherent()/dma_alloc_attrs(), wherein
> dma_alloc_from_coherent() may have returned device-specific memory
> without calling into the arch dma_map_ops implementation. Internal to
> the arm64 implementation, however, everything *we* allocate comes from
> either CMA or the normal page allocator, so should always be backed by
> real kernel pages.
>
> Robin.


OK, so what do you exactly mean by "The general point still stands", my
patch fixes handling of allocations made internally?
Anyway as I understand approach "creating the pages permanently" in
__iommu_alloc_attrs is OK. Is it worth to create patch adding it? It
should solve the issue as well and also looks saner (for me).

Regards
Andrzej

>> Regards
>> Andrzej
>>
>>
>>> Thanks!
>>>
>>>> support to DMA_ATTR_FORCE_CONTIGUOUS in IOMMU on arm64 [1]?
>>>> Maybe temporary solution is to drop the patch until proper handling of
>>>> mmapping is proposed, without it the patch looks incomplete and causes
>>>> regression.
>>>> Moreover __iommu_alloc_attrs uses also dma_common_contiguous_remap which
>>>> also assumes existence of struct pages.
>>>>
>>>> [1]: https://patchwork.kernel.org/patch/9609551/
>>>> [2]: https://www.spinics.net/lists/arm-kernel/msg572688.html
>>> Gr{oetje,eeting}s,
>>>
>>>                         Geert
>>>
>>> --
>>> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
>>>
>>> In personal conversations with technical people, I call myself a hacker. But
>>> when I'm talking to journalists I just say "programmer" or something like that.
>>>                                 -- Linus Torvalds
>>>
>>>
>>>
>
>
>
Robin Murphy March 30, 2017, 11:46 a.m. UTC | #10
On 30/03/17 12:16, Andrzej Hajda wrote:
[...]
>>>>> I guess Geert's proposition to create pages permanently is also not
>>>>> acceptable[2]. So how to fix crashes which appeared after patch adding
>>>> If I'm not mistaken, creating the pages permanently is what the
>>>> !DMA_ATTR_FORCE_CONTIGUOUS does? The only difference is where
>>>> the underlying memory is allocated from.
>>>>
>>>> Am I missing something?
>>> Quoting Robin from his response:
>>>> in general is is not
>>>> safe to assume a coherent allocation is backed by struct pages at all
>>> As I said before I am not familiar with the subject, so it is possible I
>>> misunderstood something.
>> That's from the perspective of external callers of
>> dma_alloc_coherent()/dma_alloc_attrs(), wherein
>> dma_alloc_from_coherent() may have returned device-specific memory
>> without calling into the arch dma_map_ops implementation. Internal to
>> the arm64 implementation, however, everything *we* allocate comes from
>> either CMA or the normal page allocator, so should always be backed by
>> real kernel pages.
>>
>> Robin.
> 
> 
> OK, so what do you exactly mean by "The general point still stands", my
> patch fixes handling of allocations made internally?

That since FORCE_CONTIGUOUS allocations always come from CMA, you can
let the existing CMA-based implementations handle them just by fixing up
dma_addr appropriately.

> Anyway as I understand approach "creating the pages permanently" in
> __iommu_alloc_attrs is OK. Is it worth to create patch adding it? It
> should solve the issue as well and also looks saner (for me).

Sure, you *could* - there's nothing technically wrong with that other
than violating a strict interpretation of the iommu-dma API
documentation if you pass it into iommu_dma_mmap() - it's just that the
only point of using the pages array here in the first place is to cover
the general case where an allocation might not be physically contiguous.
If you have a guarantee that a given allocation definitely *is* both
physically and virtually contiguous, then that's a bunch of extra work
you simply have no need to do.

If you do want to go down that route, then I would much rather we fix
dma_common_contiguous_remap() to leave a valid array in area->pages in
the first place, than be temporarily faking them up around individual calls.

Robin.
Geert Uytterhoeven March 30, 2017, 11:53 a.m. UTC | #11
Hi Robin,

On Thu, Mar 30, 2017 at 1:46 PM, Robin Murphy <robin.murphy@arm.com> wrote:
> On 30/03/17 12:16, Andrzej Hajda wrote:
> [...]
>>>>>> I guess Geert's proposition to create pages permanently is also not
>>>>>> acceptable[2]. So how to fix crashes which appeared after patch adding
>>>>> If I'm not mistaken, creating the pages permanently is what the
>>>>> !DMA_ATTR_FORCE_CONTIGUOUS does? The only difference is where
>>>>> the underlying memory is allocated from.
>>>>>
>>>>> Am I missing something?
>>>> Quoting Robin from his response:
>>>>> in general is is not
>>>>> safe to assume a coherent allocation is backed by struct pages at all
>>>> As I said before I am not familiar with the subject, so it is possible I
>>>> misunderstood something.
>>> That's from the perspective of external callers of
>>> dma_alloc_coherent()/dma_alloc_attrs(), wherein
>>> dma_alloc_from_coherent() may have returned device-specific memory
>>> without calling into the arch dma_map_ops implementation. Internal to
>>> the arm64 implementation, however, everything *we* allocate comes from
>>> either CMA or the normal page allocator, so should always be backed by
>>> real kernel pages.
>>>
>>> Robin.
>>
>> OK, so what do you exactly mean by "The general point still stands", my
>> patch fixes handling of allocations made internally?
>
> That since FORCE_CONTIGUOUS allocations always come from CMA, you can
> let the existing CMA-based implementations handle them just by fixing up
> dma_addr appropriately.
>
>> Anyway as I understand approach "creating the pages permanently" in
>> __iommu_alloc_attrs is OK. Is it worth to create patch adding it? It
>> should solve the issue as well and also looks saner (for me).
>
> Sure, you *could* - there's nothing technically wrong with that other
> than violating a strict interpretation of the iommu-dma API
> documentation if you pass it into iommu_dma_mmap() - it's just that the
> only point of using the pages array here in the first place is to cover
> the general case where an allocation might not be physically contiguous.
> If you have a guarantee that a given allocation definitely *is* both
> physically and virtually contiguous, then that's a bunch of extra work
> you simply have no need to do.

The same can be said for dma_common_contiguous_remap() below...

> If you do want to go down that route, then I would much rather we fix
> dma_common_contiguous_remap() to leave a valid array in area->pages in
> the first place, than be temporarily faking them up around individual calls.

The only point of using the pages array here in the first place is to cover
the general case in dma_common_pages_remap().

Providing a contiguous variant of map_vm_area() could resolve that.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Robin Murphy March 30, 2017, 2:10 p.m. UTC | #12
On 30/03/17 12:53, Geert Uytterhoeven wrote:
> Hi Robin,
> 
> On Thu, Mar 30, 2017 at 1:46 PM, Robin Murphy <robin.murphy@arm.com> wrote:
>> On 30/03/17 12:16, Andrzej Hajda wrote:
>> [...]
>>>>>>> I guess Geert's proposition to create pages permanently is also not
>>>>>>> acceptable[2]. So how to fix crashes which appeared after patch adding
>>>>>> If I'm not mistaken, creating the pages permanently is what the
>>>>>> !DMA_ATTR_FORCE_CONTIGUOUS does? The only difference is where
>>>>>> the underlying memory is allocated from.
>>>>>>
>>>>>> Am I missing something?
>>>>> Quoting Robin from his response:
>>>>>> in general is is not
>>>>>> safe to assume a coherent allocation is backed by struct pages at all
>>>>> As I said before I am not familiar with the subject, so it is possible I
>>>>> misunderstood something.
>>>> That's from the perspective of external callers of
>>>> dma_alloc_coherent()/dma_alloc_attrs(), wherein
>>>> dma_alloc_from_coherent() may have returned device-specific memory
>>>> without calling into the arch dma_map_ops implementation. Internal to
>>>> the arm64 implementation, however, everything *we* allocate comes from
>>>> either CMA or the normal page allocator, so should always be backed by
>>>> real kernel pages.
>>>>
>>>> Robin.
>>>
>>> OK, so what do you exactly mean by "The general point still stands", my
>>> patch fixes handling of allocations made internally?
>>
>> That since FORCE_CONTIGUOUS allocations always come from CMA, you can
>> let the existing CMA-based implementations handle them just by fixing up
>> dma_addr appropriately.
>>
>>> Anyway as I understand approach "creating the pages permanently" in
>>> __iommu_alloc_attrs is OK. Is it worth to create patch adding it? It
>>> should solve the issue as well and also looks saner (for me).
>>
>> Sure, you *could* - there's nothing technically wrong with that other
>> than violating a strict interpretation of the iommu-dma API
>> documentation if you pass it into iommu_dma_mmap() - it's just that the
>> only point of using the pages array here in the first place is to cover
>> the general case where an allocation might not be physically contiguous.
>> If you have a guarantee that a given allocation definitely *is* both
>> physically and virtually contiguous, then that's a bunch of extra work
>> you simply have no need to do.
> 
> The same can be said for dma_common_contiguous_remap() below...

Indeed it can. See if you can spot anything I've said in defence of that
particular function ;)

>> If you do want to go down that route, then I would much rather we fix
>> dma_common_contiguous_remap() to leave a valid array in area->pages in
>> the first place, than be temporarily faking them up around individual calls.
> 
> The only point of using the pages array here in the first place is to cover
> the general case in dma_common_pages_remap().
>
> Providing a contiguous variant of map_vm_area() could resolve that.

Isn't that what remap_pfn_range() already is? iommu_dma_mmap() had to
exist specifically because the likes of dma_common_mmap() *are* using
that on the assumption of physically contiguous ranges. I don't really
have the time or inclination to go digging into this myself, but there's
almost certainly room for some improvement and/or cleanup in the common
code too (and as I said, if something can be done there than I would
rather it were tackled head-on than worked around with point fixes in
the arch code).

Robin.

> 
> Gr{oetje,eeting}s,
> 
>                         Geert
> 
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org
> 
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like that.
>                                 -- Linus Torvalds
>
diff mbox

Patch

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index f7b5401..bba2bc8 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -704,7 +704,30 @@  static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 		return ret;
 
 	area = find_vm_area(cpu_addr);
-	if (WARN_ON(!area || !area->pages))
+	if (WARN_ON(!area))
+		return -ENXIO;
+
+	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+		struct page *page = vmalloc_to_page(cpu_addr);
+		unsigned int count = size >> PAGE_SHIFT;
+		struct page **pages;
+		unsigned long pfn;
+		int i;
+
+		pages = kmalloc_array(count, sizeof(*pages), GFP_KERNEL);
+		if (!pages)
+			return -ENOMEM;
+
+		for (i = 0, pfn = page_to_pfn(page); i < count; i++)
+			pages[i] = pfn_to_page(pfn + i);
+
+		ret = iommu_dma_mmap(pages, size, vma);
+		kfree(pages);
+
+		return ret;
+	}
+
+	if (WARN_ON(!area->pages))
 		return -ENXIO;
 
 	return iommu_dma_mmap(area->pages, size, vma);
@@ -717,7 +740,20 @@  static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	struct vm_struct *area = find_vm_area(cpu_addr);
 
-	if (WARN_ON(!area || !area->pages))
+	if (WARN_ON(!area))
+		return -ENXIO;
+
+	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+		int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+
+		if (!ret)
+			sg_set_page(sgt->sgl, vmalloc_to_page(cpu_addr),
+				PAGE_ALIGN(size), 0);
+
+		return ret;
+	}
+
+	if (WARN_ON(!area->pages))
 		return -ENXIO;
 
 	return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size,