@@ -47,7 +47,7 @@ static int __etnaviv_iommu_init(struct etnaviv_iommuv1_domain *etnaviv_domain)
u32 *p;
int i;
- etnaviv_domain->base.bad_page_cpu = dma_alloc_coherent(
+ etnaviv_domain->base.bad_page_cpu = dma_alloc_writecombine(
etnaviv_domain->base.dev,
SZ_4K,
&etnaviv_domain->base.bad_page_dma,
@@ -60,13 +60,14 @@ static int __etnaviv_iommu_init(struct etnaviv_iommuv1_domain *etnaviv_domain)
*p++ = 0xdead55aa;
etnaviv_domain->pgtable_cpu =
- dma_alloc_coherent(etnaviv_domain->base.dev, PT_SIZE,
- &etnaviv_domain->pgtable_dma,
- GFP_KERNEL);
+ dma_alloc_writecombine(etnaviv_domain->base.dev,
+ PT_SIZE,
+ &etnaviv_domain->pgtable_dma,
+ GFP_KERNEL);
if (!etnaviv_domain->pgtable_cpu) {
- dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
- etnaviv_domain->base.bad_page_cpu,
- etnaviv_domain->base.bad_page_dma);
+ dma_free_writecombine(etnaviv_domain->base.dev, SZ_4K,
+ etnaviv_domain->base.bad_page_cpu,
+ etnaviv_domain->base.bad_page_dma);
return -ENOMEM;
}
@@ -81,13 +82,13 @@ static void etnaviv_iommuv1_domain_free(struct etnaviv_iommu_domain *domain)
struct etnaviv_iommuv1_domain *etnaviv_domain =
to_etnaviv_domain(domain);
- dma_free_coherent(etnaviv_domain->base.dev, PT_SIZE,
- etnaviv_domain->pgtable_cpu,
- etnaviv_domain->pgtable_dma);
+ dma_free_writecombine(etnaviv_domain->base.dev, PT_SIZE,
+ etnaviv_domain->pgtable_cpu,
+ etnaviv_domain->pgtable_dma);
- dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
- etnaviv_domain->base.bad_page_cpu,
- etnaviv_domain->base.bad_page_dma);
+ dma_free_writecombine(etnaviv_domain->base.dev, SZ_4K,
+ etnaviv_domain->base.bad_page_cpu,
+ etnaviv_domain->base.bad_page_dma);
kfree(etnaviv_domain);
}
@@ -104,7 +104,7 @@ static int etnaviv_iommuv2_init(struct etnaviv_iommuv2_domain *etnaviv_domain)
int ret, i, j;
/* allocate scratch page */
- etnaviv_domain->base.bad_page_cpu = dma_alloc_coherent(
+ etnaviv_domain->base.bad_page_cpu = dma_alloc_writecombine(
etnaviv_domain->base.dev,
SZ_4K,
&etnaviv_domain->base.bad_page_dma,
@@ -117,19 +117,19 @@ static int etnaviv_iommuv2_init(struct etnaviv_iommuv2_domain *etnaviv_domain)
for (i = 0; i < SZ_4K / 4; i++)
*p++ = 0xdead55aa;
- etnaviv_domain->pta_cpu = dma_alloc_coherent(etnaviv_domain->base.dev,
- SZ_4K,
- &etnaviv_domain->pta_dma,
- GFP_KERNEL);
+ etnaviv_domain->pta_cpu =
+ dma_alloc_writecombine(etnaviv_domain->base.dev,
+ SZ_4K, &etnaviv_domain->pta_dma,
+ GFP_KERNEL);
if (!etnaviv_domain->pta_cpu) {
ret = -ENOMEM;
goto fail_mem;
}
- etnaviv_domain->mtlb_cpu = dma_alloc_coherent(etnaviv_domain->base.dev,
- SZ_4K,
- &etnaviv_domain->mtlb_dma,
- GFP_KERNEL);
+ etnaviv_domain->mtlb_cpu =
+ dma_alloc_writecombine(etnaviv_domain->base.dev,
+ SZ_4K, &etnaviv_domain->mtlb_dma,
+ GFP_KERNEL);
if (!etnaviv_domain->mtlb_cpu) {
ret = -ENOMEM;
goto fail_mem;
@@ -138,10 +138,10 @@ static int etnaviv_iommuv2_init(struct etnaviv_iommuv2_domain *etnaviv_domain)
/* pre-populate STLB pages (may want to switch to on-demand later) */
for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) {
etnaviv_domain->stlb_cpu[i] =
- dma_alloc_coherent(etnaviv_domain->base.dev,
- SZ_4K,
- &etnaviv_domain->stlb_dma[i],
- GFP_KERNEL);
+ dma_alloc_writecombine(etnaviv_domain->base.dev,
+ SZ_4K,
+ &etnaviv_domain->stlb_dma[i],
+ GFP_KERNEL);
if (!etnaviv_domain->stlb_cpu[i]) {
ret = -ENOMEM;
goto fail_mem;
@@ -158,25 +158,25 @@ static int etnaviv_iommuv2_init(struct etnaviv_iommuv2_domain *etnaviv_domain)
fail_mem:
if (etnaviv_domain->base.bad_page_cpu)
- dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
- etnaviv_domain->base.bad_page_cpu,
- etnaviv_domain->base.bad_page_dma);
+ dma_free_writecombine(etnaviv_domain->base.dev, SZ_4K,
+ etnaviv_domain->base.bad_page_cpu,
+ etnaviv_domain->base.bad_page_dma);
if (etnaviv_domain->pta_cpu)
- dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
- etnaviv_domain->pta_cpu,
- etnaviv_domain->pta_dma);
+ dma_free_writecombine(etnaviv_domain->base.dev, SZ_4K,
+ etnaviv_domain->pta_cpu,
+ etnaviv_domain->pta_dma);
if (etnaviv_domain->mtlb_cpu)
- dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
- etnaviv_domain->mtlb_cpu,
- etnaviv_domain->mtlb_dma);
+ dma_free_writecombine(etnaviv_domain->base.dev, SZ_4K,
+ etnaviv_domain->mtlb_cpu,
+ etnaviv_domain->mtlb_dma);
for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) {
if (etnaviv_domain->stlb_cpu[i])
- dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
- etnaviv_domain->stlb_cpu[i],
- etnaviv_domain->stlb_dma[i]);
+ dma_free_writecombine(etnaviv_domain->base.dev, SZ_4K,
+ etnaviv_domain->stlb_cpu[i],
+ etnaviv_domain->stlb_dma[i]);
}
return ret;
@@ -188,23 +188,23 @@ static void etnaviv_iommuv2_domain_free(struct etnaviv_iommu_domain *domain)
to_etnaviv_domain(domain);
int i;
- dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
- etnaviv_domain->base.bad_page_cpu,
- etnaviv_domain->base.bad_page_dma);
+ dma_free_writecombine(etnaviv_domain->base.dev, SZ_4K,
+ etnaviv_domain->base.bad_page_cpu,
+ etnaviv_domain->base.bad_page_dma);
- dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
- etnaviv_domain->pta_cpu,
- etnaviv_domain->pta_dma);
+ dma_free_writecombine(etnaviv_domain->base.dev, SZ_4K,
+ etnaviv_domain->pta_cpu,
+ etnaviv_domain->pta_dma);
- dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
- etnaviv_domain->mtlb_cpu,
- etnaviv_domain->mtlb_dma);
+ dma_free_writecombine(etnaviv_domain->base.dev, SZ_4K,
+ etnaviv_domain->mtlb_cpu,
+ etnaviv_domain->mtlb_dma);
for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) {
if (etnaviv_domain->stlb_cpu[i])
- dma_free_coherent(etnaviv_domain->base.dev, SZ_4K,
- etnaviv_domain->stlb_cpu[i],
- etnaviv_domain->stlb_dma[i]);
+ dma_free_writecombine(etnaviv_domain->base.dev, SZ_4K,
+ etnaviv_domain->stlb_cpu[i],
+ etnaviv_domain->stlb_dma[i]);
}
vfree(etnaviv_domain);
We are likely to write multiple page entries at once and already ensure proper write buffer flushing before GPU submit, so this improves CPU time usage in the submit path without any downsides. Signed-off-by: Lucas Stach <l.stach@pengutronix.de> --- drivers/gpu/drm/etnaviv/etnaviv_iommu.c | 27 +++++------ drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c | 74 +++++++++++++++--------------- 2 files changed, 51 insertions(+), 50 deletions(-)