Message ID | 20200520234520.22563-9-sstabellini@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | fix swiotlb-xen for RPi4 | expand |
Hi, On 21/05/2020 00:45, Stefano Stabellini wrote: > From: Stefano Stabellini <stefano.stabellini@xilinx.com> > > Add phys_to_dma/dma_to_phys calls to > xen_dma_sync_for_cpu, xen_dma_sync_for_device, and > xen_arch_need_swiotlb. > > In xen_arch_need_swiotlb, take the opportunity to switch to the simpler > pfn_valid check we use everywhere else. > > dma_cache_maint is fixed by the next patch. Like patch #8, this explains what the code is doing not why this is necessary. > > Signed-off-by: Stefano Stabellini <stefano.stabellini@xilinx.com> > --- > arch/arm/xen/mm.c | 10 +++++----- > 1 file changed, 5 insertions(+), 5 deletions(-) > > diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c > index f2414ea40a79..7639251bcc79 100644 > --- a/arch/arm/xen/mm.c > +++ b/arch/arm/xen/mm.c > @@ -1,5 +1,6 @@ > // SPDX-License-Identifier: GPL-2.0-only > #include <linux/cpu.h> > +#include <linux/dma-direct.h> > #include <linux/dma-noncoherent.h> > #include <linux/gfp.h> > #include <linux/highmem.h> > @@ -75,7 +76,7 @@ void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, > phys_addr_t paddr, size_t size, > enum dma_data_direction dir) > { > - if (pfn_valid(PFN_DOWN(handle))) > + if (pfn_valid(PFN_DOWN(dma_to_phys(dev, handle)))) > arch_sync_dma_for_cpu(paddr, size, dir); > else if (dir != DMA_TO_DEVICE) > dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL); > @@ -85,7 +86,7 @@ void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle, > phys_addr_t paddr, size_t size, > enum dma_data_direction dir) > { > - if (pfn_valid(PFN_DOWN(handle))) > + if (pfn_valid(PFN_DOWN(dma_to_phys(dev, handle)))) > arch_sync_dma_for_device(paddr, size, dir); > else if (dir == DMA_FROM_DEVICE) > dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL); > @@ -97,8 +98,7 @@ bool xen_arch_need_swiotlb(struct device *dev, > phys_addr_t phys, > dma_addr_t dev_addr) > { > - unsigned int xen_pfn = XEN_PFN_DOWN(phys); > - unsigned int bfn = XEN_PFN_DOWN(dev_addr); > + unsigned int bfn = XEN_PFN_DOWN(dma_to_phys(dev, dev_addr)); > > /* > * The swiotlb buffer should be used if > @@ -115,7 +115,7 @@ bool xen_arch_need_swiotlb(struct device *dev, > * require a bounce buffer because the device doesn't support coherent > * memory and we are not able to flush the cache. > */ > - return (!hypercall_cflush && (xen_pfn != bfn) && > + return (!hypercall_cflush && !pfn_valid(bfn) && I believe this change is incorrect. The bfn is a frame based on Xen page granularity (always 4K) while pfn_valid() is expecting a frame based on the Kernel page granularity. > !dev_is_dma_coherent(dev)); > } > > Cheers,
On Thu, 21 May 2020, Julien Grall wrote: > > @@ -97,8 +98,7 @@ bool xen_arch_need_swiotlb(struct device *dev, > > phys_addr_t phys, > > dma_addr_t dev_addr) > > { > > - unsigned int xen_pfn = XEN_PFN_DOWN(phys); > > - unsigned int bfn = XEN_PFN_DOWN(dev_addr); > > + unsigned int bfn = XEN_PFN_DOWN(dma_to_phys(dev, dev_addr)); > > /* > > * The swiotlb buffer should be used if > > @@ -115,7 +115,7 @@ bool xen_arch_need_swiotlb(struct device *dev, > > * require a bounce buffer because the device doesn't support coherent > > * memory and we are not able to flush the cache. > > */ > > - return (!hypercall_cflush && (xen_pfn != bfn) && > > + return (!hypercall_cflush && !pfn_valid(bfn) && > > I believe this change is incorrect. The bfn is a frame based on Xen page > granularity (always 4K) while pfn_valid() is expecting a frame based on the > Kernel page granularity. Given that kernel granularity >= xen granularity it looks like it would be safe to use PFN_DOWN instead of XEN_PFN_DOWN: unsigned int bfn = PFN_DOWN(dma_to_phys(dev, dev_addr)); return (!hypercall_cflush && !pfn_valid(bfn) &&
On Thu, 21 May 2020 at 21:08, Stefano Stabellini <sstabellini@kernel.org> wrote: > > On Thu, 21 May 2020, Julien Grall wrote: > > > @@ -97,8 +98,7 @@ bool xen_arch_need_swiotlb(struct device *dev, > > > phys_addr_t phys, > > > dma_addr_t dev_addr) > > > { > > > - unsigned int xen_pfn = XEN_PFN_DOWN(phys); > > > - unsigned int bfn = XEN_PFN_DOWN(dev_addr); > > > + unsigned int bfn = XEN_PFN_DOWN(dma_to_phys(dev, dev_addr)); > > > /* > > > * The swiotlb buffer should be used if > > > @@ -115,7 +115,7 @@ bool xen_arch_need_swiotlb(struct device *dev, > > > * require a bounce buffer because the device doesn't support coherent > > > * memory and we are not able to flush the cache. > > > */ > > > - return (!hypercall_cflush && (xen_pfn != bfn) && > > > + return (!hypercall_cflush && !pfn_valid(bfn) && > > > > I believe this change is incorrect. The bfn is a frame based on Xen page > > granularity (always 4K) while pfn_valid() is expecting a frame based on the > > Kernel page granularity. > > Given that kernel granularity >= xen granularity it looks like it would > be safe to use PFN_DOWN instead of XEN_PFN_DOWN: > > unsigned int bfn = PFN_DOWN(dma_to_phys(dev, dev_addr)); Yes. But is the change worth it though? pfn_valid() is definitely going to be more expensive than the current check. Cheers,
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index f2414ea40a79..7639251bcc79 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/cpu.h> +#include <linux/dma-direct.h> #include <linux/dma-noncoherent.h> #include <linux/gfp.h> #include <linux/highmem.h> @@ -75,7 +76,7 @@ void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - if (pfn_valid(PFN_DOWN(handle))) + if (pfn_valid(PFN_DOWN(dma_to_phys(dev, handle)))) arch_sync_dma_for_cpu(paddr, size, dir); else if (dir != DMA_TO_DEVICE) dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL); @@ -85,7 +86,7 @@ void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle, phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - if (pfn_valid(PFN_DOWN(handle))) + if (pfn_valid(PFN_DOWN(dma_to_phys(dev, handle)))) arch_sync_dma_for_device(paddr, size, dir); else if (dir == DMA_FROM_DEVICE) dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL); @@ -97,8 +98,7 @@ bool xen_arch_need_swiotlb(struct device *dev, phys_addr_t phys, dma_addr_t dev_addr) { - unsigned int xen_pfn = XEN_PFN_DOWN(phys); - unsigned int bfn = XEN_PFN_DOWN(dev_addr); + unsigned int bfn = XEN_PFN_DOWN(dma_to_phys(dev, dev_addr)); /* * The swiotlb buffer should be used if @@ -115,7 +115,7 @@ bool xen_arch_need_swiotlb(struct device *dev, * require a bounce buffer because the device doesn't support coherent * memory and we are not able to flush the cache. */ - return (!hypercall_cflush && (xen_pfn != bfn) && + return (!hypercall_cflush && !pfn_valid(bfn) && !dev_is_dma_coherent(dev)); }