diff mbox

[3/3] swiotlb: Add support for CMA allocations

Message ID 1386634334-31139-4-git-send-email-lauraa@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Laura Abbott Dec. 10, 2013, 12:12 a.m. UTC
Some architectures may implement the CMA APIs to allow allocation
of larger contiguous blocks of memory. Add support in the swiotlb
alloc/free functions to allocate from the CMA APIs instead of the
basic page allocator.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
---
 lib/swiotlb.c |   92 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 86 insertions(+), 6 deletions(-)

Comments

Konrad Rzeszutek Wilk Dec. 10, 2013, 12:29 a.m. UTC | #1
Laura Abbott <lauraa@codeaurora.org> wrote:
>Some architectures may implement the CMA APIs to allow allocation
>of larger contiguous blocks of memory. Add support in the swiotlb
>alloc/free functions to allocate from the CMA APIs instead of the
>basic page allocator.
>
>Cc: Will Deacon <will.deacon@arm.com>
>Cc: Catalin Marinas <catalin.marinas@arm.com>
>Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
>Cc: Marek Szyprowski <m.szyprowski@samsung.com>
>Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
>---
>lib/swiotlb.c |   92
>+++++++++++++++++++++++++++++++++++++++++++++++++++++----
> 1 files changed, 86 insertions(+), 6 deletions(-)
>
>diff --git a/lib/swiotlb.c b/lib/swiotlb.c
>index e4399fa..77b4b17 100644
>--- a/lib/swiotlb.c
>+++ b/lib/swiotlb.c
>@@ -29,6 +29,9 @@
> #include <linux/ctype.h>
> #include <linux/highmem.h>
> #include <linux/gfp.h>
>+#include <linux/dma-contiguous.h>
>+#include <linux/io.h>
>+#include <linux/vmalloc.h>
> 
> #include <asm/io.h>
> #include <asm/dma.h>
>@@ -610,6 +613,66 @@ void swiotlb_tbl_sync_single(struct device *hwdev,
>phys_addr_t tlb_addr,
> }
> EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
> 
>+static void * __alloc_from_contiguous(struct device *hwdev, size_t
>size,
>+					struct page **ret_page)
>+{
>+	unsigned long order = get_order(size);
>+	size_t count = size >> PAGE_SHIFT;
>+	struct page *page;
>+	void *ptr = NULL;
>+
>+	page = dma_alloc_from_contiguous(hwdev, count, order);
>+	if (!page)
>+		return NULL;
>+
>+	if (PageHighMem(page)) {
>+		struct vm_struct *area;
>+		unsigned long addr;
>+
>+		/*
>+		 * DMA allocation can be mapped to user space, so lets
>+		 * set VM_USERMAP flags too.
>+		 */
>+		area = get_vm_area(size, VM_USERMAP);
>+		if (!area)
>+			goto err;
>+		addr = (unsigned long)area->addr;
>+		area->phys_addr = __pfn_to_phys(page_to_pfn(page));
>+
>+		if (ioremap_page_range(addr, addr + size, area->phys_addr,
>+		    PAGE_KERNEL)) {
>+			vunmap((void *)addr);
>+			goto err;
>+		}
>+		ptr = area->addr;
>+	} else {
>+		ptr = page_address(page);
>+	}
>+
>+	*ret_page = page;
>+	return ptr;
>+
>+err:
>+	dma_release_from_contiguous(hwdev, page, count);
>+	return NULL;
>+}
>+
>+static void __free_from_contiguous(struct device *hwdev, struct page
>*page,
>+				void *cpu_addr, size_t size)
>+{
>+	if (PageHighMem(page)) {
>+		struct vm_struct *area = find_vm_area(cpu_addr);
>+		if (!area) {
>+			WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
>+			return;
>+		}
>+		unmap_kernel_range((unsigned long)cpu_addr, size);
>+		vunmap(cpu_addr);
>+	}
>+	dma_release_from_contiguous(hwdev, page, size >> PAGE_SHIFT);
>+}
>+
>+
> void *
> swiotlb_alloc_coherent(struct device *hwdev, size_t size,
> 		       dma_addr_t *dma_handle, gfp_t flags)
>@@ -618,18 +681,27 @@ swiotlb_alloc_coherent(struct device *hwdev,
>size_t size,
> 	void *ret;
> 	int order = get_order(size);
> 	u64 dma_mask = DMA_BIT_MASK(32);
>+	struct page *page;
> 
> 	if (hwdev && hwdev->coherent_dma_mask)
> 		dma_mask = hwdev->coherent_dma_mask;
> 
>-	ret = (void *)__get_free_pages(flags, order);
>-	if (ret) {
>+	if (IS_ENABLED(CONFIG_DMA_CMA)) {
>+		ret = __alloc_from_contiguous(hwdev, size, &page);
>+		dev_addr = phys_to_dma(hwdev, page_to_phys(page));
>+	} else {
>+		ret = (void *)__get_free_pages(flags, order);
> 		dev_addr = swiotlb_virt_to_bus(hwdev, ret);
>+	}
>+	if (ret) {
> 		if (dev_addr + size - 1 > dma_mask) {
> 			/*
> 			 * The allocated memory isn't reachable by the device.
> 			 */
>-			free_pages((unsigned long) ret, order);
>+			if(IS_ENABLED(CONFIG_DMA_CMA))
>+				__free_from_contiguous(hwdev, page, ret, size);
>+			else
>+				free_pages((unsigned long) ret, order);
> 			ret = NULL;
> 		}
> 	}
>@@ -673,11 +745,19 @@ swiotlb_free_coherent(struct device *hwdev,
>size_t size, void *vaddr,
> 	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
> 
> 	WARN_ON(irqs_disabled());
>-	if (!is_swiotlb_buffer(paddr))
>-		free_pages((unsigned long)vaddr, get_order(size));
>-	else
>+	if (!is_swiotlb_buffer(paddr)) {
>+		if (IS_ENABLED(CONFIG_DMA_CMA)) {
>+			__free_from_contiguous(hwdev,
>+				pfn_to_page(paddr >> PAGE_SHIFT),
>+				vaddr,
>+				size);
>+		} else {
>+			free_pages((unsigned long)vaddr, get_order(size));
>+		}
>+	} else {
> 		/* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
> 		swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE);
>+	}
> }
> EXPORT_SYMBOL(swiotlb_free_coherent);
> 

Can this be done in the platform dma_ops functions instead?
Laura Abbott Dec. 10, 2013, 12:36 a.m. UTC | #2
On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote:
> Laura Abbott <lauraa@codeaurora.org> wrote:
>> Some architectures may implement the CMA APIs to allow allocation
>> of larger contiguous blocks of memory. Add support in the swiotlb
>> alloc/free functions to allocate from the CMA APIs instead of the
>> basic page allocator.
>>
>> Cc: Will Deacon <will.deacon@arm.com>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
>> Cc: Marek Szyprowski <m.szyprowski@samsung.com>
>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
...
>>
>
> Can this be done in the platform dma_ops functions instead?
>

I suppose it could but that seems like it would result in lots of 
duplicated code if every architecture that uses swiotlb wants to use CMA.

Thanks,
Laura
Konrad Rzeszutek Wilk Dec. 10, 2013, 12:40 a.m. UTC | #3
Laura Abbott <lauraa@codeaurora.org> wrote:
>On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote:
>> Laura Abbott <lauraa@codeaurora.org> wrote:
>>> Some architectures may implement the CMA APIs to allow allocation
>>> of larger contiguous blocks of memory. Add support in the swiotlb
>>> alloc/free functions to allocate from the CMA APIs instead of the
>>> basic page allocator.
>>>
>>> Cc: Will Deacon <will.deacon@arm.com>
>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
>>> Cc: Marek Szyprowski <m.szyprowski@samsung.com>
>>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
>...
>>>
>>
>> Can this be done in the platform dma_ops functions instead?
>>
>
>I suppose it could but that seems like it would result in lots of 
>duplicated code if every architecture that uses swiotlb wants to use
>CMA.
>
>Thanks,
>Laura

Then let's do that it that way. Thank you.
Will Deacon Dec. 10, 2013, 10:25 a.m. UTC | #4
On Tue, Dec 10, 2013 at 12:40:20AM +0000, Konrad Rzeszutek Wilk wrote:
> Laura Abbott <lauraa@codeaurora.org> wrote:
> >On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote:
> >> Laura Abbott <lauraa@codeaurora.org> wrote:
> >>> Some architectures may implement the CMA APIs to allow allocation
> >>> of larger contiguous blocks of memory. Add support in the swiotlb
> >>> alloc/free functions to allocate from the CMA APIs instead of the
> >>> basic page allocator.
> >>>
> >>> Cc: Will Deacon <will.deacon@arm.com>
> >>> Cc: Catalin Marinas <catalin.marinas@arm.com>
> >>> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> >>> Cc: Marek Szyprowski <m.szyprowski@samsung.com>
> >>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> >...
> >>>
> >>
> >> Can this be done in the platform dma_ops functions instead?
> >>
> >
> >I suppose it could but that seems like it would result in lots of 
> >duplicated code if every architecture that uses swiotlb wants to use
> >CMA.
> >
> >Thanks,
> >Laura
> 
> Then let's do that it that way. Thank you.

Note that once arch/arm64 starts growing things like support for non-coherent
DMA and IOMMU mappings, we'll probably want to factor out a bunch of the
boilerplat from our dma-mapping.c file into places like lib/iommu-helper.c.

However, until then, I can see this making sense to live in the arch-code.
Ultimately, the swiotlb code could just call a helper, but for now we can
rip-out the highmem parts (which doesn't leave much) and put it under
arch/arm64.

Will
Catalin Marinas Dec. 10, 2013, 10:42 a.m. UTC | #5
On Tue, Dec 10, 2013 at 10:25:56AM +0000, Will Deacon wrote:
> On Tue, Dec 10, 2013 at 12:40:20AM +0000, Konrad Rzeszutek Wilk wrote:
> > Laura Abbott <lauraa@codeaurora.org> wrote:
> > >On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote:
> > >> Laura Abbott <lauraa@codeaurora.org> wrote:
> > >>> Some architectures may implement the CMA APIs to allow allocation
> > >>> of larger contiguous blocks of memory. Add support in the swiotlb
> > >>> alloc/free functions to allocate from the CMA APIs instead of the
> > >>> basic page allocator.
> > >>>
> > >>> Cc: Will Deacon <will.deacon@arm.com>
> > >>> Cc: Catalin Marinas <catalin.marinas@arm.com>
> > >>> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
> > >>> Cc: Marek Szyprowski <m.szyprowski@samsung.com>
> > >>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> > >...
> > >>>
> > >>
> > >> Can this be done in the platform dma_ops functions instead?
> > >>
> > >
> > >I suppose it could but that seems like it would result in lots of 
> > >duplicated code if every architecture that uses swiotlb wants to use
> > >CMA.
> > >
> > >Thanks,
> > >Laura
> > 
> > Then let's do that it that way. Thank you.
> 
> Note that once arch/arm64 starts growing things like support for non-coherent
> DMA and IOMMU mappings, we'll probably want to factor out a bunch of the
> boilerplat from our dma-mapping.c file into places like lib/iommu-helper.c.

For coherency, we could build it on top of whatever dma (allocation) ops
are registered, whether swiotlb or iommu (see part of
https://git.kernel.org/cgit/linux/kernel/git/cmarinas/linux-aarch64.git/commit/?h=devel&id=c67fe405be6b55399c9e53dfeba5e2c6b930e429)

Regarding iommu, I don't think we need CMA on top, so it makes sense to
keep the CMA in the swiotlb code.
Will Deacon Dec. 10, 2013, 1:50 p.m. UTC | #6
On Tue, Dec 10, 2013 at 10:42:31AM +0000, Catalin Marinas wrote:
> On Tue, Dec 10, 2013 at 10:25:56AM +0000, Will Deacon wrote:
> > On Tue, Dec 10, 2013 at 12:40:20AM +0000, Konrad Rzeszutek Wilk wrote:
> > > Laura Abbott <lauraa@codeaurora.org> wrote:
> > > >On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote:
> > > >> Can this be done in the platform dma_ops functions instead?
> > > >
> > > >I suppose it could but that seems like it would result in lots of 
> > > >duplicated code if every architecture that uses swiotlb wants to use
> > > >CMA.
> > > >
> > > 
> > > Then let's do that it that way. Thank you.
> > 
> > Note that once arch/arm64 starts growing things like support for non-coherent
> > DMA and IOMMU mappings, we'll probably want to factor out a bunch of the
> > boilerplat from our dma-mapping.c file into places like lib/iommu-helper.c.
> 
> For coherency, we could build it on top of whatever dma (allocation) ops
> are registered, whether swiotlb or iommu (see part of
> https://git.kernel.org/cgit/linux/kernel/git/cmarinas/linux-aarch64.git/commit/?h=devel&id=c67fe405be6b55399c9e53dfeba5e2c6b930e429)
> 
> Regarding iommu, I don't think we need CMA on top, so it makes sense to
> keep the CMA in the swiotlb code.

I don't think it does; swiotlb doesn't care about things like remapping
highmem pages returned from CMA, so inlining the code in there just implies
that we should inline it in all of the dma_ops implementations that might
want it (although agreed about IOMMU not needing it. I'm thinking about
things like the non-coherent ops under arch/arm/).

Instead, it should either be in a library that they can all use as they see
fit, or in the code that deals with all of the dma_ops in the architecture
backend.

My reading of Konrad's reply was that he doesn't want this in the swiotlb
code either...

Will
Konrad Rzeszutek Wilk Dec. 10, 2013, 1:56 p.m. UTC | #7
Will Deacon <will.deacon@arm.com> wrote:
>On Tue, Dec 10, 2013 at 10:42:31AM +0000, Catalin Marinas wrote:
>> On Tue, Dec 10, 2013 at 10:25:56AM +0000, Will Deacon wrote:
>> > On Tue, Dec 10, 2013 at 12:40:20AM +0000, Konrad Rzeszutek Wilk
>wrote:
>> > > Laura Abbott <lauraa@codeaurora.org> wrote:
>> > > >On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote:
>> > > >> Can this be done in the platform dma_ops functions instead?
>> > > >
>> > > >I suppose it could but that seems like it would result in lots
>of 
>> > > >duplicated code if every architecture that uses swiotlb wants to
>use
>> > > >CMA.
>> > > >
>> > > 
>> > > Then let's do that it that way. Thank you.
>> > 
>> > Note that once arch/arm64 starts growing things like support for
>non-coherent
>> > DMA and IOMMU mappings, we'll probably want to factor out a bunch
>of the
>> > boilerplat from our dma-mapping.c file into places like
>lib/iommu-helper.c.
>> 
>> For coherency, we could build it on top of whatever dma (allocation)
>ops
>> are registered, whether swiotlb or iommu (see part of
>>
>https://git.kernel.org/cgit/linux/kernel/git/cmarinas/linux-aarch64.git/commit/?h=devel&id=c67fe405be6b55399c9e53dfeba5e2c6b930e429)
>> 
>> Regarding iommu, I don't think we need CMA on top, so it makes sense
>to
>> keep the CMA in the swiotlb code.
>
>I don't think it does; swiotlb doesn't care about things like remapping
>highmem pages returned from CMA, so inlining the code in there just
>implies
>that we should inline it in all of the dma_ops implementations that
>might
>want it (although agreed about IOMMU not needing it. I'm thinking about
>things like the non-coherent ops under arch/arm/).
>
>Instead, it should either be in a library that they can all use as they
>see
>fit, or in the code that deals with all of the dma_ops in the
>architecture
>backend.
>
>My reading of Konrad's reply was that he doesn't want this in the
>swiotlb
>code either...
>
>Will

Having it in a library - such as iommu-helper would be better. We could rename the library to dma-helper to make it more obvious of its intended usage.
Catalin Marinas Dec. 10, 2013, 2:50 p.m. UTC | #8
On Tue, Dec 10, 2013 at 01:50:32PM +0000, Will Deacon wrote:
> On Tue, Dec 10, 2013 at 10:42:31AM +0000, Catalin Marinas wrote:
> > On Tue, Dec 10, 2013 at 10:25:56AM +0000, Will Deacon wrote:
> > > On Tue, Dec 10, 2013 at 12:40:20AM +0000, Konrad Rzeszutek Wilk wrote:
> > > > Laura Abbott <lauraa@codeaurora.org> wrote:
> > > > >On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote:
> > > > >> Can this be done in the platform dma_ops functions instead?
> > > > >
> > > > >I suppose it could but that seems like it would result in lots of 
> > > > >duplicated code if every architecture that uses swiotlb wants to use
> > > > >CMA.
> > > > >
> > > > 
> > > > Then let's do that it that way. Thank you.
> > > 
> > > Note that once arch/arm64 starts growing things like support for non-coherent
> > > DMA and IOMMU mappings, we'll probably want to factor out a bunch of the
> > > boilerplat from our dma-mapping.c file into places like lib/iommu-helper.c.
> > 
> > For coherency, we could build it on top of whatever dma (allocation) ops
> > are registered, whether swiotlb or iommu (see part of
> > https://git.kernel.org/cgit/linux/kernel/git/cmarinas/linux-aarch64.git/commit/?h=devel&id=c67fe405be6b55399c9e53dfeba5e2c6b930e429)
> > 
> > Regarding iommu, I don't think we need CMA on top, so it makes sense to
> > keep the CMA in the swiotlb code.
> 
> I don't think it does; swiotlb doesn't care about things like remapping
> highmem pages returned from CMA, so inlining the code in there just implies
> that we should inline it in all of the dma_ops implementations that might
> want it (although agreed about IOMMU not needing it. I'm thinking about
> things like the non-coherent ops under arch/arm/).

My suggestion was to build coherency on top of the low-level dma
allocation/mapping ops in the arch code by function pointer redirection
or with arch hooks in the dma alloc code (e.g. swiotlb.c) as an
optimisation. Anyway, that's for another thread.

Looking through the arm code, it seems that contiguous allocation can be
triggered when dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS) independent of
iommu use. At a second thought, this could be useful to reduce the SMMU
TLB pressure for certain devices (not sure about alignment guarantees of
CMA).

If we look at the buffer allocation independent of the actual dma
address generation, I agree that we shouldn't merge CMA into swiotlb.
With swiotlb we get bouncing if needed (I assume this is not required
with CMA). With iommu, the same buffer gets mapped in the device memory
space and we don't actually need to bother with ioremap_page_range(),
just temporary kmap for cache flushing (if highmem).

> Instead, it should either be in a library that they can all use as they see
> fit, or in the code that deals with all of the dma_ops in the architecture
> backend.

For arm64, since we don't need highmem, I'm tempted to just call the
dma_alloc_from_contiguous directly in arch/arm64/mm/dma-mapping.c, the
patch should be a few lines only. We let the code sharing via lib/ to
other 32-bit architectures ;).
Laura Abbott Dec. 10, 2013, 7:03 p.m. UTC | #9
On 12/10/2013 6:50 AM, Catalin Marinas wrote:
> On Tue, Dec 10, 2013 at 01:50:32PM +0000, Will Deacon wrote:
>> On Tue, Dec 10, 2013 at 10:42:31AM +0000, Catalin Marinas wrote:
>>> On Tue, Dec 10, 2013 at 10:25:56AM +0000, Will Deacon wrote:
>>>> On Tue, Dec 10, 2013 at 12:40:20AM +0000, Konrad Rzeszutek Wilk wrote:
>>>>> Laura Abbott <lauraa@codeaurora.org> wrote:
>>>>>> On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote:
>>>>>>> Can this be done in the platform dma_ops functions instead?
>>>>>>
>>>>>> I suppose it could but that seems like it would result in lots of
>>>>>> duplicated code if every architecture that uses swiotlb wants to use
>>>>>> CMA.
>>>>>>
>>>>>
>>>>> Then let's do that it that way. Thank you.
>>>>
>>>> Note that once arch/arm64 starts growing things like support for non-coherent
>>>> DMA and IOMMU mappings, we'll probably want to factor out a bunch of the
>>>> boilerplat from our dma-mapping.c file into places like lib/iommu-helper.c.
>>>
>>> For coherency, we could build it on top of whatever dma (allocation) ops
>>> are registered, whether swiotlb or iommu (see part of
>>> https://git.kernel.org/cgit/linux/kernel/git/cmarinas/linux-aarch64.git/commit/?h=devel&id=c67fe405be6b55399c9e53dfeba5e2c6b930e429)
>>>
>>> Regarding iommu, I don't think we need CMA on top, so it makes sense to
>>> keep the CMA in the swiotlb code.
>>
>> I don't think it does; swiotlb doesn't care about things like remapping
>> highmem pages returned from CMA, so inlining the code in there just implies
>> that we should inline it in all of the dma_ops implementations that might
>> want it (although agreed about IOMMU not needing it. I'm thinking about
>> things like the non-coherent ops under arch/arm/).
>
> My suggestion was to build coherency on top of the low-level dma
> allocation/mapping ops in the arch code by function pointer redirection
> or with arch hooks in the dma alloc code (e.g. swiotlb.c) as an
> optimisation. Anyway, that's for another thread.
>
> Looking through the arm code, it seems that contiguous allocation can be
> triggered when dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS) independent of
> iommu use. At a second thought, this could be useful to reduce the SMMU
> TLB pressure for certain devices (not sure about alignment guarantees of
> CMA).
>
> If we look at the buffer allocation independent of the actual dma
> address generation, I agree that we shouldn't merge CMA into swiotlb.
> With swiotlb we get bouncing if needed (I assume this is not required
> with CMA). With iommu, the same buffer gets mapped in the device memory
> space and we don't actually need to bother with ioremap_page_range(),
> just temporary kmap for cache flushing (if highmem).
>
>> Instead, it should either be in a library that they can all use as they see
>> fit, or in the code that deals with all of the dma_ops in the architecture
>> backend.
>
> For arm64, since we don't need highmem, I'm tempted to just call the
> dma_alloc_from_contiguous directly in arch/arm64/mm/dma-mapping.c, the
> patch should be a few lines only. We let the code sharing via lib/ to
> other 32-bit architectures ;).
>

Yeah, I fell into the 'premature optimization' trap here by trying to 
fold things into swiotlb. I'll re-submit with the code directly in arm64 
dma-mapping.c for now and we can figure out how to optimize the 'force 
contiguous' for IOMMU allocations later.

Thanks,
Laura
Laura Abbott Dec. 13, 2013, 12:48 a.m. UTC | #10
On 12/10/2013 2:42 AM, Catalin Marinas wrote:
>
> For coherency, we could build it on top of whatever dma (allocation) ops
> are registered, whether swiotlb or iommu (see part of
> https://git.kernel.org/cgit/linux/kernel/git/cmarinas/linux-aarch64.git/commit/?h=devel&id=c67fe405be6b55399c9e53dfeba5e2c6b930e429)
>
> Regarding iommu, I don't think we need CMA on top, so it makes sense to
> keep the CMA in the swiotlb code.
>

Catalin, is that just sample/design code or is that patch going to be 
merged sometime?

Thanks,
Laura
Catalin Marinas Dec. 13, 2013, 1:37 p.m. UTC | #11
On Fri, Dec 13, 2013 at 12:48:27AM +0000, Laura Abbott wrote:
> On 12/10/2013 2:42 AM, Catalin Marinas wrote:
> > For coherency, we could build it on top of whatever dma (allocation) ops
> > are registered, whether swiotlb or iommu (see part of
> > https://git.kernel.org/cgit/linux/kernel/git/cmarinas/linux-aarch64.git/commit/?h=devel&id=c67fe405be6b55399c9e53dfeba5e2c6b930e429)
> >
> > Regarding iommu, I don't think we need CMA on top, so it makes sense to
> > keep the CMA in the swiotlb code.
> 
> Catalin, is that just sample/design code or is that patch going to be 
> merged sometime?

I was originally hoping that on ARMv8 systems the DMA will be coherent.
But I got requests (though people claiming that only for development)
for cache coherency, hence the above patch. I would like to merge it at
some point (this form or another). The pending issue is describing
whether a device or bus is coherent or not. So there is some work on
system topology and DT before.
Will Deacon Dec. 13, 2013, 1:45 p.m. UTC | #12
On Fri, Dec 13, 2013 at 01:37:17PM +0000, Catalin Marinas wrote:
> I was originally hoping that on ARMv8 systems the DMA will be coherent.

Hehe, wishful thinking, I reckon :)

Will
diff mbox

Patch

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index e4399fa..77b4b17 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -29,6 +29,9 @@ 
 #include <linux/ctype.h>
 #include <linux/highmem.h>
 #include <linux/gfp.h>
+#include <linux/dma-contiguous.h>
+#include <linux/io.h>
+#include <linux/vmalloc.h>
 
 #include <asm/io.h>
 #include <asm/dma.h>
@@ -610,6 +613,66 @@  void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 }
 EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
 
+static void * __alloc_from_contiguous(struct device *hwdev, size_t size,
+					struct page **ret_page)
+{
+	unsigned long order = get_order(size);
+	size_t count = size >> PAGE_SHIFT;
+	struct page *page;
+	void *ptr = NULL;
+
+	page = dma_alloc_from_contiguous(hwdev, count, order);
+	if (!page)
+		return NULL;
+
+	if (PageHighMem(page)) {
+		struct vm_struct *area;
+		unsigned long addr;
+
+		/*
+		 * DMA allocation can be mapped to user space, so lets
+		 * set VM_USERMAP flags too.
+		 */
+		area = get_vm_area(size, VM_USERMAP);
+		if (!area)
+			goto err;
+		addr = (unsigned long)area->addr;
+		area->phys_addr = __pfn_to_phys(page_to_pfn(page));
+
+		if (ioremap_page_range(addr, addr + size, area->phys_addr,
+		    PAGE_KERNEL)) {
+			vunmap((void *)addr);
+			goto err;
+		}
+		ptr = area->addr;
+	} else {
+		ptr = page_address(page);
+	}
+
+	*ret_page = page;
+	return ptr;
+
+err:
+	dma_release_from_contiguous(hwdev, page, count);
+	return NULL;
+}
+
+static void __free_from_contiguous(struct device *hwdev, struct page *page,
+				void *cpu_addr, size_t size)
+{
+	if (PageHighMem(page)) {
+		struct vm_struct *area = find_vm_area(cpu_addr);
+		if (!area) {
+			WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
+			return;
+		}
+		unmap_kernel_range((unsigned long)cpu_addr, size);
+		vunmap(cpu_addr);
+	}
+	dma_release_from_contiguous(hwdev, page, size >> PAGE_SHIFT);
+}
+
+
 void *
 swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		       dma_addr_t *dma_handle, gfp_t flags)
@@ -618,18 +681,27 @@  swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	void *ret;
 	int order = get_order(size);
 	u64 dma_mask = DMA_BIT_MASK(32);
+	struct page *page;
 
 	if (hwdev && hwdev->coherent_dma_mask)
 		dma_mask = hwdev->coherent_dma_mask;
 
-	ret = (void *)__get_free_pages(flags, order);
-	if (ret) {
+	if (IS_ENABLED(CONFIG_DMA_CMA)) {
+		ret = __alloc_from_contiguous(hwdev, size, &page);
+		dev_addr = phys_to_dma(hwdev, page_to_phys(page));
+	} else {
+		ret = (void *)__get_free_pages(flags, order);
 		dev_addr = swiotlb_virt_to_bus(hwdev, ret);
+	}
+	if (ret) {
 		if (dev_addr + size - 1 > dma_mask) {
 			/*
 			 * The allocated memory isn't reachable by the device.
 			 */
-			free_pages((unsigned long) ret, order);
+			if(IS_ENABLED(CONFIG_DMA_CMA))
+				__free_from_contiguous(hwdev, page, ret, size);
+			else
+				free_pages((unsigned long) ret, order);
 			ret = NULL;
 		}
 	}
@@ -673,11 +745,19 @@  swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	WARN_ON(irqs_disabled());
-	if (!is_swiotlb_buffer(paddr))
-		free_pages((unsigned long)vaddr, get_order(size));
-	else
+	if (!is_swiotlb_buffer(paddr)) {
+		if (IS_ENABLED(CONFIG_DMA_CMA)) {
+			__free_from_contiguous(hwdev,
+				pfn_to_page(paddr >> PAGE_SHIFT),
+				vaddr,
+				size);
+		} else {
+			free_pages((unsigned long)vaddr, get_order(size));
+		}
+	} else {
 		/* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
 		swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE);
+	}
 }
 EXPORT_SYMBOL(swiotlb_free_coherent);