Message ID | 1386634334-31139-4-git-send-email-lauraa@codeaurora.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Laura Abbott <lauraa@codeaurora.org> wrote: >Some architectures may implement the CMA APIs to allow allocation >of larger contiguous blocks of memory. Add support in the swiotlb >alloc/free functions to allocate from the CMA APIs instead of the >basic page allocator. > >Cc: Will Deacon <will.deacon@arm.com> >Cc: Catalin Marinas <catalin.marinas@arm.com> >Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> >Cc: Marek Szyprowski <m.szyprowski@samsung.com> >Signed-off-by: Laura Abbott <lauraa@codeaurora.org> >--- >lib/swiotlb.c | 92 >+++++++++++++++++++++++++++++++++++++++++++++++++++++---- > 1 files changed, 86 insertions(+), 6 deletions(-) > >diff --git a/lib/swiotlb.c b/lib/swiotlb.c >index e4399fa..77b4b17 100644 >--- a/lib/swiotlb.c >+++ b/lib/swiotlb.c >@@ -29,6 +29,9 @@ > #include <linux/ctype.h> > #include <linux/highmem.h> > #include <linux/gfp.h> >+#include <linux/dma-contiguous.h> >+#include <linux/io.h> >+#include <linux/vmalloc.h> > > #include <asm/io.h> > #include <asm/dma.h> >@@ -610,6 +613,66 @@ void swiotlb_tbl_sync_single(struct device *hwdev, >phys_addr_t tlb_addr, > } > EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single); > >+static void * __alloc_from_contiguous(struct device *hwdev, size_t >size, >+ struct page **ret_page) >+{ >+ unsigned long order = get_order(size); >+ size_t count = size >> PAGE_SHIFT; >+ struct page *page; >+ void *ptr = NULL; >+ >+ page = dma_alloc_from_contiguous(hwdev, count, order); >+ if (!page) >+ return NULL; >+ >+ if (PageHighMem(page)) { >+ struct vm_struct *area; >+ unsigned long addr; >+ >+ /* >+ * DMA allocation can be mapped to user space, so lets >+ * set VM_USERMAP flags too. >+ */ >+ area = get_vm_area(size, VM_USERMAP); >+ if (!area) >+ goto err; >+ addr = (unsigned long)area->addr; >+ area->phys_addr = __pfn_to_phys(page_to_pfn(page)); >+ >+ if (ioremap_page_range(addr, addr + size, area->phys_addr, >+ PAGE_KERNEL)) { >+ vunmap((void *)addr); >+ goto err; >+ } >+ ptr = area->addr; >+ } else { >+ ptr = page_address(page); >+ } >+ >+ *ret_page = page; >+ return ptr; >+ >+err: >+ dma_release_from_contiguous(hwdev, page, count); >+ return NULL; >+} >+ >+static void __free_from_contiguous(struct device *hwdev, struct page >*page, >+ void *cpu_addr, size_t size) >+{ >+ if (PageHighMem(page)) { >+ struct vm_struct *area = find_vm_area(cpu_addr); >+ if (!area) { >+ WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); >+ return; >+ } >+ unmap_kernel_range((unsigned long)cpu_addr, size); >+ vunmap(cpu_addr); >+ } >+ dma_release_from_contiguous(hwdev, page, size >> PAGE_SHIFT); >+} >+ >+ > void * > swiotlb_alloc_coherent(struct device *hwdev, size_t size, > dma_addr_t *dma_handle, gfp_t flags) >@@ -618,18 +681,27 @@ swiotlb_alloc_coherent(struct device *hwdev, >size_t size, > void *ret; > int order = get_order(size); > u64 dma_mask = DMA_BIT_MASK(32); >+ struct page *page; > > if (hwdev && hwdev->coherent_dma_mask) > dma_mask = hwdev->coherent_dma_mask; > >- ret = (void *)__get_free_pages(flags, order); >- if (ret) { >+ if (IS_ENABLED(CONFIG_DMA_CMA)) { >+ ret = __alloc_from_contiguous(hwdev, size, &page); >+ dev_addr = phys_to_dma(hwdev, page_to_phys(page)); >+ } else { >+ ret = (void *)__get_free_pages(flags, order); > dev_addr = swiotlb_virt_to_bus(hwdev, ret); >+ } >+ if (ret) { > if (dev_addr + size - 1 > dma_mask) { > /* > * The allocated memory isn't reachable by the device. > */ >- free_pages((unsigned long) ret, order); >+ if(IS_ENABLED(CONFIG_DMA_CMA)) >+ __free_from_contiguous(hwdev, page, ret, size); >+ else >+ free_pages((unsigned long) ret, order); > ret = NULL; > } > } >@@ -673,11 +745,19 @@ swiotlb_free_coherent(struct device *hwdev, >size_t size, void *vaddr, > phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); > > WARN_ON(irqs_disabled()); >- if (!is_swiotlb_buffer(paddr)) >- free_pages((unsigned long)vaddr, get_order(size)); >- else >+ if (!is_swiotlb_buffer(paddr)) { >+ if (IS_ENABLED(CONFIG_DMA_CMA)) { >+ __free_from_contiguous(hwdev, >+ pfn_to_page(paddr >> PAGE_SHIFT), >+ vaddr, >+ size); >+ } else { >+ free_pages((unsigned long)vaddr, get_order(size)); >+ } >+ } else { > /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ > swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE); >+ } > } > EXPORT_SYMBOL(swiotlb_free_coherent); > Can this be done in the platform dma_ops functions instead?
On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote: > Laura Abbott <lauraa@codeaurora.org> wrote: >> Some architectures may implement the CMA APIs to allow allocation >> of larger contiguous blocks of memory. Add support in the swiotlb >> alloc/free functions to allocate from the CMA APIs instead of the >> basic page allocator. >> >> Cc: Will Deacon <will.deacon@arm.com> >> Cc: Catalin Marinas <catalin.marinas@arm.com> >> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> >> Cc: Marek Szyprowski <m.szyprowski@samsung.com> >> Signed-off-by: Laura Abbott <lauraa@codeaurora.org> ... >> > > Can this be done in the platform dma_ops functions instead? > I suppose it could but that seems like it would result in lots of duplicated code if every architecture that uses swiotlb wants to use CMA. Thanks, Laura
Laura Abbott <lauraa@codeaurora.org> wrote: >On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote: >> Laura Abbott <lauraa@codeaurora.org> wrote: >>> Some architectures may implement the CMA APIs to allow allocation >>> of larger contiguous blocks of memory. Add support in the swiotlb >>> alloc/free functions to allocate from the CMA APIs instead of the >>> basic page allocator. >>> >>> Cc: Will Deacon <will.deacon@arm.com> >>> Cc: Catalin Marinas <catalin.marinas@arm.com> >>> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> >>> Cc: Marek Szyprowski <m.szyprowski@samsung.com> >>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org> >... >>> >> >> Can this be done in the platform dma_ops functions instead? >> > >I suppose it could but that seems like it would result in lots of >duplicated code if every architecture that uses swiotlb wants to use >CMA. > >Thanks, >Laura Then let's do that it that way. Thank you.
On Tue, Dec 10, 2013 at 12:40:20AM +0000, Konrad Rzeszutek Wilk wrote: > Laura Abbott <lauraa@codeaurora.org> wrote: > >On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote: > >> Laura Abbott <lauraa@codeaurora.org> wrote: > >>> Some architectures may implement the CMA APIs to allow allocation > >>> of larger contiguous blocks of memory. Add support in the swiotlb > >>> alloc/free functions to allocate from the CMA APIs instead of the > >>> basic page allocator. > >>> > >>> Cc: Will Deacon <will.deacon@arm.com> > >>> Cc: Catalin Marinas <catalin.marinas@arm.com> > >>> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > >>> Cc: Marek Szyprowski <m.szyprowski@samsung.com> > >>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org> > >... > >>> > >> > >> Can this be done in the platform dma_ops functions instead? > >> > > > >I suppose it could but that seems like it would result in lots of > >duplicated code if every architecture that uses swiotlb wants to use > >CMA. > > > >Thanks, > >Laura > > Then let's do that it that way. Thank you. Note that once arch/arm64 starts growing things like support for non-coherent DMA and IOMMU mappings, we'll probably want to factor out a bunch of the boilerplat from our dma-mapping.c file into places like lib/iommu-helper.c. However, until then, I can see this making sense to live in the arch-code. Ultimately, the swiotlb code could just call a helper, but for now we can rip-out the highmem parts (which doesn't leave much) and put it under arch/arm64. Will
On Tue, Dec 10, 2013 at 10:25:56AM +0000, Will Deacon wrote: > On Tue, Dec 10, 2013 at 12:40:20AM +0000, Konrad Rzeszutek Wilk wrote: > > Laura Abbott <lauraa@codeaurora.org> wrote: > > >On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote: > > >> Laura Abbott <lauraa@codeaurora.org> wrote: > > >>> Some architectures may implement the CMA APIs to allow allocation > > >>> of larger contiguous blocks of memory. Add support in the swiotlb > > >>> alloc/free functions to allocate from the CMA APIs instead of the > > >>> basic page allocator. > > >>> > > >>> Cc: Will Deacon <will.deacon@arm.com> > > >>> Cc: Catalin Marinas <catalin.marinas@arm.com> > > >>> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> > > >>> Cc: Marek Szyprowski <m.szyprowski@samsung.com> > > >>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org> > > >... > > >>> > > >> > > >> Can this be done in the platform dma_ops functions instead? > > >> > > > > > >I suppose it could but that seems like it would result in lots of > > >duplicated code if every architecture that uses swiotlb wants to use > > >CMA. > > > > > >Thanks, > > >Laura > > > > Then let's do that it that way. Thank you. > > Note that once arch/arm64 starts growing things like support for non-coherent > DMA and IOMMU mappings, we'll probably want to factor out a bunch of the > boilerplat from our dma-mapping.c file into places like lib/iommu-helper.c. For coherency, we could build it on top of whatever dma (allocation) ops are registered, whether swiotlb or iommu (see part of https://git.kernel.org/cgit/linux/kernel/git/cmarinas/linux-aarch64.git/commit/?h=devel&id=c67fe405be6b55399c9e53dfeba5e2c6b930e429) Regarding iommu, I don't think we need CMA on top, so it makes sense to keep the CMA in the swiotlb code.
On Tue, Dec 10, 2013 at 10:42:31AM +0000, Catalin Marinas wrote: > On Tue, Dec 10, 2013 at 10:25:56AM +0000, Will Deacon wrote: > > On Tue, Dec 10, 2013 at 12:40:20AM +0000, Konrad Rzeszutek Wilk wrote: > > > Laura Abbott <lauraa@codeaurora.org> wrote: > > > >On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote: > > > >> Can this be done in the platform dma_ops functions instead? > > > > > > > >I suppose it could but that seems like it would result in lots of > > > >duplicated code if every architecture that uses swiotlb wants to use > > > >CMA. > > > > > > > > > > Then let's do that it that way. Thank you. > > > > Note that once arch/arm64 starts growing things like support for non-coherent > > DMA and IOMMU mappings, we'll probably want to factor out a bunch of the > > boilerplat from our dma-mapping.c file into places like lib/iommu-helper.c. > > For coherency, we could build it on top of whatever dma (allocation) ops > are registered, whether swiotlb or iommu (see part of > https://git.kernel.org/cgit/linux/kernel/git/cmarinas/linux-aarch64.git/commit/?h=devel&id=c67fe405be6b55399c9e53dfeba5e2c6b930e429) > > Regarding iommu, I don't think we need CMA on top, so it makes sense to > keep the CMA in the swiotlb code. I don't think it does; swiotlb doesn't care about things like remapping highmem pages returned from CMA, so inlining the code in there just implies that we should inline it in all of the dma_ops implementations that might want it (although agreed about IOMMU not needing it. I'm thinking about things like the non-coherent ops under arch/arm/). Instead, it should either be in a library that they can all use as they see fit, or in the code that deals with all of the dma_ops in the architecture backend. My reading of Konrad's reply was that he doesn't want this in the swiotlb code either... Will
Will Deacon <will.deacon@arm.com> wrote: >On Tue, Dec 10, 2013 at 10:42:31AM +0000, Catalin Marinas wrote: >> On Tue, Dec 10, 2013 at 10:25:56AM +0000, Will Deacon wrote: >> > On Tue, Dec 10, 2013 at 12:40:20AM +0000, Konrad Rzeszutek Wilk >wrote: >> > > Laura Abbott <lauraa@codeaurora.org> wrote: >> > > >On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote: >> > > >> Can this be done in the platform dma_ops functions instead? >> > > > >> > > >I suppose it could but that seems like it would result in lots >of >> > > >duplicated code if every architecture that uses swiotlb wants to >use >> > > >CMA. >> > > > >> > > >> > > Then let's do that it that way. Thank you. >> > >> > Note that once arch/arm64 starts growing things like support for >non-coherent >> > DMA and IOMMU mappings, we'll probably want to factor out a bunch >of the >> > boilerplat from our dma-mapping.c file into places like >lib/iommu-helper.c. >> >> For coherency, we could build it on top of whatever dma (allocation) >ops >> are registered, whether swiotlb or iommu (see part of >> >https://git.kernel.org/cgit/linux/kernel/git/cmarinas/linux-aarch64.git/commit/?h=devel&id=c67fe405be6b55399c9e53dfeba5e2c6b930e429) >> >> Regarding iommu, I don't think we need CMA on top, so it makes sense >to >> keep the CMA in the swiotlb code. > >I don't think it does; swiotlb doesn't care about things like remapping >highmem pages returned from CMA, so inlining the code in there just >implies >that we should inline it in all of the dma_ops implementations that >might >want it (although agreed about IOMMU not needing it. I'm thinking about >things like the non-coherent ops under arch/arm/). > >Instead, it should either be in a library that they can all use as they >see >fit, or in the code that deals with all of the dma_ops in the >architecture >backend. > >My reading of Konrad's reply was that he doesn't want this in the >swiotlb >code either... > >Will Having it in a library - such as iommu-helper would be better. We could rename the library to dma-helper to make it more obvious of its intended usage.
On Tue, Dec 10, 2013 at 01:50:32PM +0000, Will Deacon wrote: > On Tue, Dec 10, 2013 at 10:42:31AM +0000, Catalin Marinas wrote: > > On Tue, Dec 10, 2013 at 10:25:56AM +0000, Will Deacon wrote: > > > On Tue, Dec 10, 2013 at 12:40:20AM +0000, Konrad Rzeszutek Wilk wrote: > > > > Laura Abbott <lauraa@codeaurora.org> wrote: > > > > >On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote: > > > > >> Can this be done in the platform dma_ops functions instead? > > > > > > > > > >I suppose it could but that seems like it would result in lots of > > > > >duplicated code if every architecture that uses swiotlb wants to use > > > > >CMA. > > > > > > > > > > > > > Then let's do that it that way. Thank you. > > > > > > Note that once arch/arm64 starts growing things like support for non-coherent > > > DMA and IOMMU mappings, we'll probably want to factor out a bunch of the > > > boilerplat from our dma-mapping.c file into places like lib/iommu-helper.c. > > > > For coherency, we could build it on top of whatever dma (allocation) ops > > are registered, whether swiotlb or iommu (see part of > > https://git.kernel.org/cgit/linux/kernel/git/cmarinas/linux-aarch64.git/commit/?h=devel&id=c67fe405be6b55399c9e53dfeba5e2c6b930e429) > > > > Regarding iommu, I don't think we need CMA on top, so it makes sense to > > keep the CMA in the swiotlb code. > > I don't think it does; swiotlb doesn't care about things like remapping > highmem pages returned from CMA, so inlining the code in there just implies > that we should inline it in all of the dma_ops implementations that might > want it (although agreed about IOMMU not needing it. I'm thinking about > things like the non-coherent ops under arch/arm/). My suggestion was to build coherency on top of the low-level dma allocation/mapping ops in the arch code by function pointer redirection or with arch hooks in the dma alloc code (e.g. swiotlb.c) as an optimisation. Anyway, that's for another thread. Looking through the arm code, it seems that contiguous allocation can be triggered when dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS) independent of iommu use. At a second thought, this could be useful to reduce the SMMU TLB pressure for certain devices (not sure about alignment guarantees of CMA). If we look at the buffer allocation independent of the actual dma address generation, I agree that we shouldn't merge CMA into swiotlb. With swiotlb we get bouncing if needed (I assume this is not required with CMA). With iommu, the same buffer gets mapped in the device memory space and we don't actually need to bother with ioremap_page_range(), just temporary kmap for cache flushing (if highmem). > Instead, it should either be in a library that they can all use as they see > fit, or in the code that deals with all of the dma_ops in the architecture > backend. For arm64, since we don't need highmem, I'm tempted to just call the dma_alloc_from_contiguous directly in arch/arm64/mm/dma-mapping.c, the patch should be a few lines only. We let the code sharing via lib/ to other 32-bit architectures ;).
On 12/10/2013 6:50 AM, Catalin Marinas wrote: > On Tue, Dec 10, 2013 at 01:50:32PM +0000, Will Deacon wrote: >> On Tue, Dec 10, 2013 at 10:42:31AM +0000, Catalin Marinas wrote: >>> On Tue, Dec 10, 2013 at 10:25:56AM +0000, Will Deacon wrote: >>>> On Tue, Dec 10, 2013 at 12:40:20AM +0000, Konrad Rzeszutek Wilk wrote: >>>>> Laura Abbott <lauraa@codeaurora.org> wrote: >>>>>> On 12/9/2013 4:29 PM, Konrad Rzeszutek Wilk wrote: >>>>>>> Can this be done in the platform dma_ops functions instead? >>>>>> >>>>>> I suppose it could but that seems like it would result in lots of >>>>>> duplicated code if every architecture that uses swiotlb wants to use >>>>>> CMA. >>>>>> >>>>> >>>>> Then let's do that it that way. Thank you. >>>> >>>> Note that once arch/arm64 starts growing things like support for non-coherent >>>> DMA and IOMMU mappings, we'll probably want to factor out a bunch of the >>>> boilerplat from our dma-mapping.c file into places like lib/iommu-helper.c. >>> >>> For coherency, we could build it on top of whatever dma (allocation) ops >>> are registered, whether swiotlb or iommu (see part of >>> https://git.kernel.org/cgit/linux/kernel/git/cmarinas/linux-aarch64.git/commit/?h=devel&id=c67fe405be6b55399c9e53dfeba5e2c6b930e429) >>> >>> Regarding iommu, I don't think we need CMA on top, so it makes sense to >>> keep the CMA in the swiotlb code. >> >> I don't think it does; swiotlb doesn't care about things like remapping >> highmem pages returned from CMA, so inlining the code in there just implies >> that we should inline it in all of the dma_ops implementations that might >> want it (although agreed about IOMMU not needing it. I'm thinking about >> things like the non-coherent ops under arch/arm/). > > My suggestion was to build coherency on top of the low-level dma > allocation/mapping ops in the arch code by function pointer redirection > or with arch hooks in the dma alloc code (e.g. swiotlb.c) as an > optimisation. Anyway, that's for another thread. > > Looking through the arm code, it seems that contiguous allocation can be > triggered when dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS) independent of > iommu use. At a second thought, this could be useful to reduce the SMMU > TLB pressure for certain devices (not sure about alignment guarantees of > CMA). > > If we look at the buffer allocation independent of the actual dma > address generation, I agree that we shouldn't merge CMA into swiotlb. > With swiotlb we get bouncing if needed (I assume this is not required > with CMA). With iommu, the same buffer gets mapped in the device memory > space and we don't actually need to bother with ioremap_page_range(), > just temporary kmap for cache flushing (if highmem). > >> Instead, it should either be in a library that they can all use as they see >> fit, or in the code that deals with all of the dma_ops in the architecture >> backend. > > For arm64, since we don't need highmem, I'm tempted to just call the > dma_alloc_from_contiguous directly in arch/arm64/mm/dma-mapping.c, the > patch should be a few lines only. We let the code sharing via lib/ to > other 32-bit architectures ;). > Yeah, I fell into the 'premature optimization' trap here by trying to fold things into swiotlb. I'll re-submit with the code directly in arm64 dma-mapping.c for now and we can figure out how to optimize the 'force contiguous' for IOMMU allocations later. Thanks, Laura
On 12/10/2013 2:42 AM, Catalin Marinas wrote: > > For coherency, we could build it on top of whatever dma (allocation) ops > are registered, whether swiotlb or iommu (see part of > https://git.kernel.org/cgit/linux/kernel/git/cmarinas/linux-aarch64.git/commit/?h=devel&id=c67fe405be6b55399c9e53dfeba5e2c6b930e429) > > Regarding iommu, I don't think we need CMA on top, so it makes sense to > keep the CMA in the swiotlb code. > Catalin, is that just sample/design code or is that patch going to be merged sometime? Thanks, Laura
On Fri, Dec 13, 2013 at 12:48:27AM +0000, Laura Abbott wrote: > On 12/10/2013 2:42 AM, Catalin Marinas wrote: > > For coherency, we could build it on top of whatever dma (allocation) ops > > are registered, whether swiotlb or iommu (see part of > > https://git.kernel.org/cgit/linux/kernel/git/cmarinas/linux-aarch64.git/commit/?h=devel&id=c67fe405be6b55399c9e53dfeba5e2c6b930e429) > > > > Regarding iommu, I don't think we need CMA on top, so it makes sense to > > keep the CMA in the swiotlb code. > > Catalin, is that just sample/design code or is that patch going to be > merged sometime? I was originally hoping that on ARMv8 systems the DMA will be coherent. But I got requests (though people claiming that only for development) for cache coherency, hence the above patch. I would like to merge it at some point (this form or another). The pending issue is describing whether a device or bus is coherent or not. So there is some work on system topology and DT before.
On Fri, Dec 13, 2013 at 01:37:17PM +0000, Catalin Marinas wrote:
> I was originally hoping that on ARMv8 systems the DMA will be coherent.
Hehe, wishful thinking, I reckon :)
Will
diff --git a/lib/swiotlb.c b/lib/swiotlb.c index e4399fa..77b4b17 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -29,6 +29,9 @@ #include <linux/ctype.h> #include <linux/highmem.h> #include <linux/gfp.h> +#include <linux/dma-contiguous.h> +#include <linux/io.h> +#include <linux/vmalloc.h> #include <asm/io.h> #include <asm/dma.h> @@ -610,6 +613,66 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, } EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single); +static void * __alloc_from_contiguous(struct device *hwdev, size_t size, + struct page **ret_page) +{ + unsigned long order = get_order(size); + size_t count = size >> PAGE_SHIFT; + struct page *page; + void *ptr = NULL; + + page = dma_alloc_from_contiguous(hwdev, count, order); + if (!page) + return NULL; + + if (PageHighMem(page)) { + struct vm_struct *area; + unsigned long addr; + + /* + * DMA allocation can be mapped to user space, so lets + * set VM_USERMAP flags too. + */ + area = get_vm_area(size, VM_USERMAP); + if (!area) + goto err; + addr = (unsigned long)area->addr; + area->phys_addr = __pfn_to_phys(page_to_pfn(page)); + + if (ioremap_page_range(addr, addr + size, area->phys_addr, + PAGE_KERNEL)) { + vunmap((void *)addr); + goto err; + } + ptr = area->addr; + } else { + ptr = page_address(page); + } + + *ret_page = page; + return ptr; + +err: + dma_release_from_contiguous(hwdev, page, count); + return NULL; +} + +static void __free_from_contiguous(struct device *hwdev, struct page *page, + void *cpu_addr, size_t size) +{ + if (PageHighMem(page)) { + struct vm_struct *area = find_vm_area(cpu_addr); + if (!area) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; + } + unmap_kernel_range((unsigned long)cpu_addr, size); + vunmap(cpu_addr); + } + dma_release_from_contiguous(hwdev, page, size >> PAGE_SHIFT); +} + + void * swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) @@ -618,18 +681,27 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, void *ret; int order = get_order(size); u64 dma_mask = DMA_BIT_MASK(32); + struct page *page; if (hwdev && hwdev->coherent_dma_mask) dma_mask = hwdev->coherent_dma_mask; - ret = (void *)__get_free_pages(flags, order); - if (ret) { + if (IS_ENABLED(CONFIG_DMA_CMA)) { + ret = __alloc_from_contiguous(hwdev, size, &page); + dev_addr = phys_to_dma(hwdev, page_to_phys(page)); + } else { + ret = (void *)__get_free_pages(flags, order); dev_addr = swiotlb_virt_to_bus(hwdev, ret); + } + if (ret) { if (dev_addr + size - 1 > dma_mask) { /* * The allocated memory isn't reachable by the device. */ - free_pages((unsigned long) ret, order); + if(IS_ENABLED(CONFIG_DMA_CMA)) + __free_from_contiguous(hwdev, page, ret, size); + else + free_pages((unsigned long) ret, order); ret = NULL; } } @@ -673,11 +745,19 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); WARN_ON(irqs_disabled()); - if (!is_swiotlb_buffer(paddr)) - free_pages((unsigned long)vaddr, get_order(size)); - else + if (!is_swiotlb_buffer(paddr)) { + if (IS_ENABLED(CONFIG_DMA_CMA)) { + __free_from_contiguous(hwdev, + pfn_to_page(paddr >> PAGE_SHIFT), + vaddr, + size); + } else { + free_pages((unsigned long)vaddr, get_order(size)); + } + } else { /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ swiotlb_tbl_unmap_single(hwdev, paddr, size, DMA_TO_DEVICE); + } } EXPORT_SYMBOL(swiotlb_free_coherent);
Some architectures may implement the CMA APIs to allow allocation of larger contiguous blocks of memory. Add support in the swiotlb alloc/free functions to allocate from the CMA APIs instead of the basic page allocator. Cc: Will Deacon <will.deacon@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Marek Szyprowski <m.szyprowski@samsung.com> Signed-off-by: Laura Abbott <lauraa@codeaurora.org> --- lib/swiotlb.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 86 insertions(+), 6 deletions(-)