diff mbox series

swiotlb-xen: fix DMA_ATTR_NO_KERNEL_MAPPING on arm

Message ID 20220423171422.1831676-1-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series swiotlb-xen: fix DMA_ATTR_NO_KERNEL_MAPPING on arm | expand

Commit Message

Christoph Hellwig April 23, 2022, 5:14 p.m. UTC
swiotlb-xen uses very different ways to allocate coherent memory on x86
vs arm.  On the former it allocates memory from the page allocator, while
on the later it reuses the dma-direct allocator the handles the
complexities of non-coherent DMA on arm platforms.

Unfortunately the complexities of trying to deal with the two cases in
the swiotlb-xen.c code lead to a bug in the handling of
DMA_ATTR_NO_KERNEL_MAPPING on arm.  With the DMA_ATTR_NO_KERNEL_MAPPING
flag the coherent memory allocator does not actually allocate coherent
memory, but just a DMA handle for some memory that is DMA addressable
by the device, but which does not have to have a kernel mapping.  Thus
dereferencing the return value will lead to kernel crashed and memory
corruption.

Fix this by using the dma-direct allocator directly for arm, which works
perfectly fine because on arm swiotlb-xen is only used when the domain is
1:1 mapped, and then simplifying the remaining code to only cater for the
x86 case with DMA coherent device.

Reported-by: Rahul Singh <Rahul.Singh@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm/include/asm/xen/page-coherent.h   |   2 -
 arch/arm/xen/mm.c                          |  17 ----
 arch/arm64/include/asm/xen/page-coherent.h |   2 -
 arch/x86/include/asm/xen/page-coherent.h   |  24 -----
 arch/x86/include/asm/xen/swiotlb-xen.h     |   5 +
 drivers/xen/swiotlb-xen.c                  | 106 ++++++++-------------
 include/xen/arm/page-coherent.h            |  20 ----
 include/xen/xen-ops.h                      |   7 --
 8 files changed, 45 insertions(+), 138 deletions(-)
 delete mode 100644 arch/arm/include/asm/xen/page-coherent.h
 delete mode 100644 arch/arm64/include/asm/xen/page-coherent.h
 delete mode 100644 arch/x86/include/asm/xen/page-coherent.h
 delete mode 100644 include/xen/arm/page-coherent.h

Comments

Rahul Singh April 26, 2022, 12:42 p.m. UTC | #1
Hi Christoph,

> On 23 Apr 2022, at 6:14 pm, Christoph Hellwig <hch@lst.de> wrote:
> 
> swiotlb-xen uses very different ways to allocate coherent memory on x86
> vs arm.  On the former it allocates memory from the page allocator, while
> on the later it reuses the dma-direct allocator the handles the
> complexities of non-coherent DMA on arm platforms.
> 
> Unfortunately the complexities of trying to deal with the two cases in
> the swiotlb-xen.c code lead to a bug in the handling of
> DMA_ATTR_NO_KERNEL_MAPPING on arm.  With the DMA_ATTR_NO_KERNEL_MAPPING
> flag the coherent memory allocator does not actually allocate coherent
> memory, but just a DMA handle for some memory that is DMA addressable
> by the device, but which does not have to have a kernel mapping.  Thus
> dereferencing the return value will lead to kernel crashed and memory
> corruption.
> 
> Fix this by using the dma-direct allocator directly for arm, which works
> perfectly fine because on arm swiotlb-xen is only used when the domain is
> 1:1 mapped, and then simplifying the remaining code to only cater for the
> x86 case with DMA coherent device.
> 
> Reported-by: Rahul Singh <Rahul.Singh@arm.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Rahul Singh <rahul.singh@arm.com>
Tested-by: Rahul Singh <rahul.singh@arm.com>

Regards,
Rahul
> ---
> arch/arm/include/asm/xen/page-coherent.h   |   2 -
> arch/arm/xen/mm.c                          |  17 ----
> arch/arm64/include/asm/xen/page-coherent.h |   2 -
> arch/x86/include/asm/xen/page-coherent.h   |  24 -----
> arch/x86/include/asm/xen/swiotlb-xen.h     |   5 +
> drivers/xen/swiotlb-xen.c                  | 106 ++++++++-------------
> include/xen/arm/page-coherent.h            |  20 ----
> include/xen/xen-ops.h                      |   7 --
> 8 files changed, 45 insertions(+), 138 deletions(-)
> delete mode 100644 arch/arm/include/asm/xen/page-coherent.h
> delete mode 100644 arch/arm64/include/asm/xen/page-coherent.h
> delete mode 100644 arch/x86/include/asm/xen/page-coherent.h
> delete mode 100644 include/xen/arm/page-coherent.h
> 
> diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
> deleted file mode 100644
> index 27e984977402b..0000000000000
> --- a/arch/arm/include/asm/xen/page-coherent.h
> +++ /dev/null
> @@ -1,2 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#include <xen/arm/page-coherent.h>
> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
> index a7e54a087b802..6e603e5fdebb1 100644
> --- a/arch/arm/xen/mm.c
> +++ b/arch/arm/xen/mm.c
> @@ -118,23 +118,6 @@ bool xen_arch_need_swiotlb(struct device *dev,
> 		!dev_is_dma_coherent(dev));
> }
> 
> -int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
> -				 unsigned int address_bits,
> -				 dma_addr_t *dma_handle)
> -{
> -	if (!xen_initial_domain())
> -		return -EINVAL;
> -
> -	/* we assume that dom0 is mapped 1:1 for now */
> -	*dma_handle = pstart;
> -	return 0;
> -}
> -
> -void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
> -{
> -	return;
> -}
> -
> static int __init xen_mm_init(void)
> {
> 	struct gnttab_cache_flush cflush;
> diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h
> deleted file mode 100644
> index 27e984977402b..0000000000000
> --- a/arch/arm64/include/asm/xen/page-coherent.h
> +++ /dev/null
> @@ -1,2 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#include <xen/arm/page-coherent.h>
> diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
> deleted file mode 100644
> index 63cd41b2e17ac..0000000000000
> --- a/arch/x86/include/asm/xen/page-coherent.h
> +++ /dev/null
> @@ -1,24 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#ifndef _ASM_X86_XEN_PAGE_COHERENT_H
> -#define _ASM_X86_XEN_PAGE_COHERENT_H
> -
> -#include <asm/page.h>
> -#include <linux/dma-mapping.h>
> -
> -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
> -		dma_addr_t *dma_handle, gfp_t flags,
> -		unsigned long attrs)
> -{
> -	void *vstart = (void*)__get_free_pages(flags, get_order(size));
> -	*dma_handle = virt_to_phys(vstart);
> -	return vstart;
> -}
> -
> -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
> -		void *cpu_addr, dma_addr_t dma_handle,
> -		unsigned long attrs)
> -{
> -	free_pages((unsigned long) cpu_addr, get_order(size));
> -}
> -
> -#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
> diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h
> index 66b4ddde77430..558821387808e 100644
> --- a/arch/x86/include/asm/xen/swiotlb-xen.h
> +++ b/arch/x86/include/asm/xen/swiotlb-xen.h
> @@ -10,4 +10,9 @@ extern int pci_xen_swiotlb_init_late(void);
> static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; }
> #endif
> 
> +int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
> +				unsigned int address_bits,
> +				dma_addr_t *dma_handle);
> +void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
> +
> #endif /* _ASM_X86_SWIOTLB_XEN_H */
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index 47aebd98f52f5..557edb9c54879 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -36,7 +36,6 @@
> #include <xen/hvc-console.h>
> 
> #include <asm/dma-mapping.h>
> -#include <asm/xen/page-coherent.h>
> 
> #include <trace/events/swiotlb.h>
> #define MAX_DMA_BITS 32
> @@ -104,6 +103,7 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
> 	return 0;
> }
> 
> +#ifdef CONFIG_X86
> static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
> {
> 	int rc;
> @@ -129,6 +129,12 @@ static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
> 	} while (i < nslabs);
> 	return 0;
> }
> +#else
> +static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
> +{
> +	return 0;
> +}
> +#endif
> 
> enum xen_swiotlb_err {
> 	XEN_SWIOTLB_UNKNOWN = 0,
> @@ -256,97 +262,60 @@ void __init xen_swiotlb_init_early(void)
> 		panic("Cannot allocate SWIOTLB buffer");
> 	swiotlb_set_max_segment(PAGE_SIZE);
> }
> -#endif /* CONFIG_X86 */
> 
> static void *
> -xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
> -			   dma_addr_t *dma_handle, gfp_t flags,
> -			   unsigned long attrs)
> +xen_swiotlb_alloc_coherent(struct device *dev, size_t size,
> +		dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
> {
> -	void *ret;
> +	u64 dma_mask = dev->coherent_dma_mask;
> 	int order = get_order(size);
> -	u64 dma_mask = DMA_BIT_MASK(32);
> 	phys_addr_t phys;
> -	dma_addr_t dev_addr;
> -
> -	/*
> -	* Ignore region specifiers - the kernel's ideas of
> -	* pseudo-phys memory layout has nothing to do with the
> -	* machine physical layout.  We can't allocate highmem
> -	* because we can't return a pointer to it.
> -	*/
> -	flags &= ~(__GFP_DMA | __GFP_HIGHMEM);
> +	void *ret;
> 
> -	/* Convert the size to actually allocated. */
> +	/* Align the allocation to the Xen page size */
> 	size = 1UL << (order + XEN_PAGE_SHIFT);
> 
> -	/* On ARM this function returns an ioremap'ped virtual address for
> -	 * which virt_to_phys doesn't return the corresponding physical
> -	 * address. In fact on ARM virt_to_phys only works for kernel direct
> -	 * mapped RAM memory. Also see comment below.
> -	 */
> -	ret = xen_alloc_coherent_pages(hwdev, size, dma_handle, flags, attrs);
> -
> +	ret = (void *)__get_free_pages(flags, get_order(size));
> 	if (!ret)
> 		return ret;
> -
> -	if (hwdev && hwdev->coherent_dma_mask)
> -		dma_mask = hwdev->coherent_dma_mask;
> -
> -	/* At this point dma_handle is the dma address, next we are
> -	 * going to set it to the machine address.
> -	 * Do not use virt_to_phys(ret) because on ARM it doesn't correspond
> -	 * to *dma_handle. */
> -	phys = dma_to_phys(hwdev, *dma_handle);
> -	dev_addr = xen_phys_to_dma(hwdev, phys);
> -	if (((dev_addr + size - 1 <= dma_mask)) &&
> -	    !range_straddles_page_boundary(phys, size))
> -		*dma_handle = dev_addr;
> -	else {
> -		if (xen_create_contiguous_region(phys, order,
> -						 fls64(dma_mask), dma_handle) != 0) {
> -			xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs);
> -			return NULL;
> -		}
> -		*dma_handle = phys_to_dma(hwdev, *dma_handle);
> +	phys = virt_to_phys(ret);
> +
> +	*dma_handle = xen_phys_to_dma(dev, phys);
> +	if (*dma_handle + size - 1 > dma_mask ||
> +	    range_straddles_page_boundary(phys, size)) {
> +		if (xen_create_contiguous_region(phys, order, fls64(dma_mask),
> +				dma_handle) != 0)
> +			goto out_free_pages;
> 		SetPageXenRemapped(virt_to_page(ret));
> 	}
> +
> 	memset(ret, 0, size);
> 	return ret;
> +
> +out_free_pages:
> +	free_pages((unsigned long)ret, get_order(size));
> +	return NULL;
> }
> 
> static void
> -xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
> -			  dma_addr_t dev_addr, unsigned long attrs)
> +xen_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr,
> +		dma_addr_t dma_handle, unsigned long attrs)
> {
> +	phys_addr_t phys = virt_to_phys(vaddr);
> 	int order = get_order(size);
> -	phys_addr_t phys;
> -	u64 dma_mask = DMA_BIT_MASK(32);
> -	struct page *page;
> -
> -	if (hwdev && hwdev->coherent_dma_mask)
> -		dma_mask = hwdev->coherent_dma_mask;
> -
> -	/* do not use virt_to_phys because on ARM it doesn't return you the
> -	 * physical address */
> -	phys = xen_dma_to_phys(hwdev, dev_addr);
> 
> 	/* Convert the size to actually allocated. */
> 	size = 1UL << (order + XEN_PAGE_SHIFT);
> 
> -	if (is_vmalloc_addr(vaddr))
> -		page = vmalloc_to_page(vaddr);
> -	else
> -		page = virt_to_page(vaddr);
> +	if (WARN_ON_ONCE(dma_handle + size - 1 > dev->coherent_dma_mask) ||
> +	    WARN_ON_ONCE(range_straddles_page_boundary(phys, size)))
> +	    	return;
> 
> -	if (!WARN_ON((dev_addr + size - 1 > dma_mask) ||
> -		     range_straddles_page_boundary(phys, size)) &&
> -	    TestClearPageXenRemapped(page))
> +	if (TestClearPageXenRemapped(virt_to_page(vaddr)))
> 		xen_destroy_contiguous_region(phys, order);
> -
> -	xen_free_coherent_pages(hwdev, size, vaddr, phys_to_dma(hwdev, phys),
> -				attrs);
> +	free_pages((unsigned long)vaddr, get_order(size));
> }
> +#endif /* CONFIG_X86 */
> 
> /*
>  * Map a single buffer of the indicated size for DMA in streaming mode.  The
> @@ -549,8 +518,13 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
> }
> 
> const struct dma_map_ops xen_swiotlb_dma_ops = {
> +#ifdef CONFIG_X86
> 	.alloc = xen_swiotlb_alloc_coherent,
> 	.free = xen_swiotlb_free_coherent,
> +#else
> +	.alloc = dma_direct_alloc,
> +	.free = dma_direct_free,
> +#endif
> 	.sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
> 	.sync_single_for_device = xen_swiotlb_sync_single_for_device,
> 	.sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
> diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h
> deleted file mode 100644
> index b9cc11e887ed5..0000000000000
> --- a/include/xen/arm/page-coherent.h
> +++ /dev/null
> @@ -1,20 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#ifndef _XEN_ARM_PAGE_COHERENT_H
> -#define _XEN_ARM_PAGE_COHERENT_H
> -
> -#include <linux/dma-mapping.h>
> -#include <asm/page.h>
> -
> -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
> -		dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
> -{
> -	return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
> -}
> -
> -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
> -		void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
> -{
> -	dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
> -}
> -
> -#endif /* _XEN_ARM_PAGE_COHERENT_H */
> diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> index a3584a357f353..c7c1b46ff4cd4 100644
> --- a/include/xen/xen-ops.h
> +++ b/include/xen/xen-ops.h
> @@ -42,13 +42,6 @@ int xen_setup_shutdown_event(void);
> 
> extern unsigned long *xen_contiguous_bitmap;
> 
> -#if defined(CONFIG_XEN_PV) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)
> -int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
> -				unsigned int address_bits,
> -				dma_addr_t *dma_handle);
> -void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
> -#endif
> -
> #if defined(CONFIG_XEN_PV)
> int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
> 		  xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
> -- 
> 2.30.2
>
Stefano Stabellini April 26, 2022, 11:07 p.m. UTC | #2
On Sat, 23 Apr 2022, Christoph Hellwig wrote:
> swiotlb-xen uses very different ways to allocate coherent memory on x86
> vs arm.  On the former it allocates memory from the page allocator, while
> on the later it reuses the dma-direct allocator the handles the
> complexities of non-coherent DMA on arm platforms.
> 
> Unfortunately the complexities of trying to deal with the two cases in
> the swiotlb-xen.c code lead to a bug in the handling of
> DMA_ATTR_NO_KERNEL_MAPPING on arm.  With the DMA_ATTR_NO_KERNEL_MAPPING
> flag the coherent memory allocator does not actually allocate coherent
> memory, but just a DMA handle for some memory that is DMA addressable
> by the device, but which does not have to have a kernel mapping.  Thus
> dereferencing the return value will lead to kernel crashed and memory
> corruption.
> 
> Fix this by using the dma-direct allocator directly for arm, which works
> perfectly fine because on arm swiotlb-xen is only used when the domain is
> 1:1 mapped, and then simplifying the remaining code to only cater for the
> x86 case with DMA coherent device.
> 
> Reported-by: Rahul Singh <Rahul.Singh@arm.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>


> ---
>  arch/arm/include/asm/xen/page-coherent.h   |   2 -
>  arch/arm/xen/mm.c                          |  17 ----
>  arch/arm64/include/asm/xen/page-coherent.h |   2 -
>  arch/x86/include/asm/xen/page-coherent.h   |  24 -----
>  arch/x86/include/asm/xen/swiotlb-xen.h     |   5 +
>  drivers/xen/swiotlb-xen.c                  | 106 ++++++++-------------
>  include/xen/arm/page-coherent.h            |  20 ----
>  include/xen/xen-ops.h                      |   7 --
>  8 files changed, 45 insertions(+), 138 deletions(-)
>  delete mode 100644 arch/arm/include/asm/xen/page-coherent.h
>  delete mode 100644 arch/arm64/include/asm/xen/page-coherent.h
>  delete mode 100644 arch/x86/include/asm/xen/page-coherent.h
>  delete mode 100644 include/xen/arm/page-coherent.h
> 
> diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
> deleted file mode 100644
> index 27e984977402b..0000000000000
> --- a/arch/arm/include/asm/xen/page-coherent.h
> +++ /dev/null
> @@ -1,2 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#include <xen/arm/page-coherent.h>
> diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
> index a7e54a087b802..6e603e5fdebb1 100644
> --- a/arch/arm/xen/mm.c
> +++ b/arch/arm/xen/mm.c
> @@ -118,23 +118,6 @@ bool xen_arch_need_swiotlb(struct device *dev,
>  		!dev_is_dma_coherent(dev));
>  }
>  
> -int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
> -				 unsigned int address_bits,
> -				 dma_addr_t *dma_handle)
> -{
> -	if (!xen_initial_domain())
> -		return -EINVAL;
> -
> -	/* we assume that dom0 is mapped 1:1 for now */
> -	*dma_handle = pstart;
> -	return 0;
> -}
> -
> -void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
> -{
> -	return;
> -}
> -
>  static int __init xen_mm_init(void)
>  {
>  	struct gnttab_cache_flush cflush;
> diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h
> deleted file mode 100644
> index 27e984977402b..0000000000000
> --- a/arch/arm64/include/asm/xen/page-coherent.h
> +++ /dev/null
> @@ -1,2 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#include <xen/arm/page-coherent.h>
> diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
> deleted file mode 100644
> index 63cd41b2e17ac..0000000000000
> --- a/arch/x86/include/asm/xen/page-coherent.h
> +++ /dev/null
> @@ -1,24 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#ifndef _ASM_X86_XEN_PAGE_COHERENT_H
> -#define _ASM_X86_XEN_PAGE_COHERENT_H
> -
> -#include <asm/page.h>
> -#include <linux/dma-mapping.h>
> -
> -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
> -		dma_addr_t *dma_handle, gfp_t flags,
> -		unsigned long attrs)
> -{
> -	void *vstart = (void*)__get_free_pages(flags, get_order(size));
> -	*dma_handle = virt_to_phys(vstart);
> -	return vstart;
> -}
> -
> -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
> -		void *cpu_addr, dma_addr_t dma_handle,
> -		unsigned long attrs)
> -{
> -	free_pages((unsigned long) cpu_addr, get_order(size));
> -}
> -
> -#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
> diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h
> index 66b4ddde77430..558821387808e 100644
> --- a/arch/x86/include/asm/xen/swiotlb-xen.h
> +++ b/arch/x86/include/asm/xen/swiotlb-xen.h
> @@ -10,4 +10,9 @@ extern int pci_xen_swiotlb_init_late(void);
>  static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; }
>  #endif
>  
> +int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
> +				unsigned int address_bits,
> +				dma_addr_t *dma_handle);
> +void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
> +
>  #endif /* _ASM_X86_SWIOTLB_XEN_H */
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index 47aebd98f52f5..557edb9c54879 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -36,7 +36,6 @@
>  #include <xen/hvc-console.h>
>  
>  #include <asm/dma-mapping.h>
> -#include <asm/xen/page-coherent.h>
>  
>  #include <trace/events/swiotlb.h>
>  #define MAX_DMA_BITS 32
> @@ -104,6 +103,7 @@ static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
>  	return 0;
>  }
>  
> +#ifdef CONFIG_X86
>  static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
>  {
>  	int rc;
> @@ -129,6 +129,12 @@ static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
>  	} while (i < nslabs);
>  	return 0;
>  }
> +#else
> +static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
> +{
> +	return 0;
> +}
> +#endif
>  
>  enum xen_swiotlb_err {
>  	XEN_SWIOTLB_UNKNOWN = 0,
> @@ -256,97 +262,60 @@ void __init xen_swiotlb_init_early(void)
>  		panic("Cannot allocate SWIOTLB buffer");
>  	swiotlb_set_max_segment(PAGE_SIZE);
>  }
> -#endif /* CONFIG_X86 */
>  
>  static void *
> -xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
> -			   dma_addr_t *dma_handle, gfp_t flags,
> -			   unsigned long attrs)
> +xen_swiotlb_alloc_coherent(struct device *dev, size_t size,
> +		dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
>  {
> -	void *ret;
> +	u64 dma_mask = dev->coherent_dma_mask;
>  	int order = get_order(size);
> -	u64 dma_mask = DMA_BIT_MASK(32);
>  	phys_addr_t phys;
> -	dma_addr_t dev_addr;
> -
> -	/*
> -	* Ignore region specifiers - the kernel's ideas of
> -	* pseudo-phys memory layout has nothing to do with the
> -	* machine physical layout.  We can't allocate highmem
> -	* because we can't return a pointer to it.
> -	*/
> -	flags &= ~(__GFP_DMA | __GFP_HIGHMEM);
> +	void *ret;
>  
> -	/* Convert the size to actually allocated. */
> +	/* Align the allocation to the Xen page size */
>  	size = 1UL << (order + XEN_PAGE_SHIFT);
>  
> -	/* On ARM this function returns an ioremap'ped virtual address for
> -	 * which virt_to_phys doesn't return the corresponding physical
> -	 * address. In fact on ARM virt_to_phys only works for kernel direct
> -	 * mapped RAM memory. Also see comment below.
> -	 */
> -	ret = xen_alloc_coherent_pages(hwdev, size, dma_handle, flags, attrs);
> -
> +	ret = (void *)__get_free_pages(flags, get_order(size));
>  	if (!ret)
>  		return ret;
> -
> -	if (hwdev && hwdev->coherent_dma_mask)
> -		dma_mask = hwdev->coherent_dma_mask;
> -
> -	/* At this point dma_handle is the dma address, next we are
> -	 * going to set it to the machine address.
> -	 * Do not use virt_to_phys(ret) because on ARM it doesn't correspond
> -	 * to *dma_handle. */
> -	phys = dma_to_phys(hwdev, *dma_handle);
> -	dev_addr = xen_phys_to_dma(hwdev, phys);
> -	if (((dev_addr + size - 1 <= dma_mask)) &&
> -	    !range_straddles_page_boundary(phys, size))
> -		*dma_handle = dev_addr;
> -	else {
> -		if (xen_create_contiguous_region(phys, order,
> -						 fls64(dma_mask), dma_handle) != 0) {
> -			xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs);
> -			return NULL;
> -		}
> -		*dma_handle = phys_to_dma(hwdev, *dma_handle);
> +	phys = virt_to_phys(ret);
> +
> +	*dma_handle = xen_phys_to_dma(dev, phys);
> +	if (*dma_handle + size - 1 > dma_mask ||
> +	    range_straddles_page_boundary(phys, size)) {
> +		if (xen_create_contiguous_region(phys, order, fls64(dma_mask),
> +				dma_handle) != 0)
> +			goto out_free_pages;
>  		SetPageXenRemapped(virt_to_page(ret));
>  	}
> +
>  	memset(ret, 0, size);
>  	return ret;
> +
> +out_free_pages:
> +	free_pages((unsigned long)ret, get_order(size));
> +	return NULL;
>  }
>  
>  static void
> -xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
> -			  dma_addr_t dev_addr, unsigned long attrs)
> +xen_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr,
> +		dma_addr_t dma_handle, unsigned long attrs)
>  {
> +	phys_addr_t phys = virt_to_phys(vaddr);
>  	int order = get_order(size);
> -	phys_addr_t phys;
> -	u64 dma_mask = DMA_BIT_MASK(32);
> -	struct page *page;
> -
> -	if (hwdev && hwdev->coherent_dma_mask)
> -		dma_mask = hwdev->coherent_dma_mask;
> -
> -	/* do not use virt_to_phys because on ARM it doesn't return you the
> -	 * physical address */
> -	phys = xen_dma_to_phys(hwdev, dev_addr);
>  
>  	/* Convert the size to actually allocated. */
>  	size = 1UL << (order + XEN_PAGE_SHIFT);
>  
> -	if (is_vmalloc_addr(vaddr))
> -		page = vmalloc_to_page(vaddr);
> -	else
> -		page = virt_to_page(vaddr);
> +	if (WARN_ON_ONCE(dma_handle + size - 1 > dev->coherent_dma_mask) ||
> +	    WARN_ON_ONCE(range_straddles_page_boundary(phys, size)))
> +	    	return;
>  
> -	if (!WARN_ON((dev_addr + size - 1 > dma_mask) ||
> -		     range_straddles_page_boundary(phys, size)) &&
> -	    TestClearPageXenRemapped(page))
> +	if (TestClearPageXenRemapped(virt_to_page(vaddr)))
>  		xen_destroy_contiguous_region(phys, order);
> -
> -	xen_free_coherent_pages(hwdev, size, vaddr, phys_to_dma(hwdev, phys),
> -				attrs);
> +	free_pages((unsigned long)vaddr, get_order(size));
>  }
> +#endif /* CONFIG_X86 */
>  
>  /*
>   * Map a single buffer of the indicated size for DMA in streaming mode.  The
> @@ -549,8 +518,13 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
>  }
>  
>  const struct dma_map_ops xen_swiotlb_dma_ops = {
> +#ifdef CONFIG_X86
>  	.alloc = xen_swiotlb_alloc_coherent,
>  	.free = xen_swiotlb_free_coherent,
> +#else
> +	.alloc = dma_direct_alloc,
> +	.free = dma_direct_free,
> +#endif
>  	.sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
>  	.sync_single_for_device = xen_swiotlb_sync_single_for_device,
>  	.sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
> diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h
> deleted file mode 100644
> index b9cc11e887ed5..0000000000000
> --- a/include/xen/arm/page-coherent.h
> +++ /dev/null
> @@ -1,20 +0,0 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> -#ifndef _XEN_ARM_PAGE_COHERENT_H
> -#define _XEN_ARM_PAGE_COHERENT_H
> -
> -#include <linux/dma-mapping.h>
> -#include <asm/page.h>
> -
> -static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
> -		dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
> -{
> -	return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
> -}
> -
> -static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
> -		void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
> -{
> -	dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
> -}
> -
> -#endif /* _XEN_ARM_PAGE_COHERENT_H */
> diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
> index a3584a357f353..c7c1b46ff4cd4 100644
> --- a/include/xen/xen-ops.h
> +++ b/include/xen/xen-ops.h
> @@ -42,13 +42,6 @@ int xen_setup_shutdown_event(void);
>  
>  extern unsigned long *xen_contiguous_bitmap;
>  
> -#if defined(CONFIG_XEN_PV) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)
> -int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
> -				unsigned int address_bits,
> -				dma_addr_t *dma_handle);
> -void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
> -#endif
> -
>  #if defined(CONFIG_XEN_PV)
>  int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
>  		  xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
> -- 
> 2.30.2
>
Christoph Hellwig April 28, 2022, 1:27 p.m. UTC | #3
On Tue, Apr 26, 2022 at 04:07:45PM -0700, Stefano Stabellini wrote:
> > Reported-by: Rahul Singh <Rahul.Singh@arm.com>
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>

Do you want to take this through the Xen tree or should I pick it up?
Either way I'd love to see some testing on x86 as well.
Stefano Stabellini April 28, 2022, 9:49 p.m. UTC | #4
On Thu, 28 Apr 2022, Christoph Hellwig wrote:
> On Tue, Apr 26, 2022 at 04:07:45PM -0700, Stefano Stabellini wrote:
> > > Reported-by: Rahul Singh <Rahul.Singh@arm.com>
> > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > 
> > Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
> 
> Do you want to take this through the Xen tree or should I pick it up?
> Either way I'd love to see some testing on x86 as well.

I agree on the x86 testing. Juergen, Boris?

I'd say to take this patch via the Xen tree but Juergen has just sent a
Xen pull request to Linux last Saturday. Juergen do you plan to send
another one? Do you have something else lined up? If not, it might be
better to let Christoph pick it up.
Boris Ostrovsky April 28, 2022, 10:47 p.m. UTC | #5
On 4/28/22 5:49 PM, Stefano Stabellini wrote:
> On Thu, 28 Apr 2022, Christoph Hellwig wrote:
>> On Tue, Apr 26, 2022 at 04:07:45PM -0700, Stefano Stabellini wrote:
>>>> Reported-by: Rahul Singh <Rahul.Singh@arm.com>
>>>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>>> Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
>> Do you want to take this through the Xen tree or should I pick it up?
>> Either way I'd love to see some testing on x86 as well.
> I agree on the x86 testing. Juergen, Boris?
>
> I'd say to take this patch via the Xen tree but Juergen has just sent a
> Xen pull request to Linux last Saturday. Juergen do you plan to send
> another one? Do you have something else lined up? If not, it might be
> better to let Christoph pick it up.


We don't have anything pending.


I can test it but at best tomorrow so not sure we can get this into rc5. Do you consider this an urgent fix or can we wait until 5.19? Because it's a bit too much IMO for rc6.


-boris
Stefano Stabellini April 28, 2022, 10:49 p.m. UTC | #6
On Thu, 28 Apr 2022, Boris Ostrovsky wrote:
> On 4/28/22 5:49 PM, Stefano Stabellini wrote:
> > On Thu, 28 Apr 2022, Christoph Hellwig wrote:
> > > On Tue, Apr 26, 2022 at 04:07:45PM -0700, Stefano Stabellini wrote:
> > > > > Reported-by: Rahul Singh <Rahul.Singh@arm.com>
> > > > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > > > Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
> > > Do you want to take this through the Xen tree or should I pick it up?
> > > Either way I'd love to see some testing on x86 as well.
> > I agree on the x86 testing. Juergen, Boris?
> > 
> > I'd say to take this patch via the Xen tree but Juergen has just sent a
> > Xen pull request to Linux last Saturday. Juergen do you plan to send
> > another one? Do you have something else lined up? If not, it might be
> > better to let Christoph pick it up.
> 
> 
> We don't have anything pending.
> 
> 
> I can test it but at best tomorrow so not sure we can get this into rc5. Do
> you consider this an urgent fix or can we wait until 5.19? Because it's a bit
> too much IMO for rc6.

On one hand, Linux doesn't boot on a platform without this fix. On the
other hand, I totally see that this patch could introduce regressions on
x86 so I think it is fair that we are careful with it.

From my point of view, it might be better to wait for 5.19 and mark it
as backport.
Boris Ostrovsky April 29, 2022, 11:12 p.m. UTC | #7
On 4/28/22 6:49 PM, Stefano Stabellini wrote:
> On Thu, 28 Apr 2022, Boris Ostrovsky wrote:
>> On 4/28/22 5:49 PM, Stefano Stabellini wrote:
>>> On Thu, 28 Apr 2022, Christoph Hellwig wrote:
>>>> On Tue, Apr 26, 2022 at 04:07:45PM -0700, Stefano Stabellini wrote:
>>>>>> Reported-by: Rahul Singh <Rahul.Singh@arm.com>
>>>>>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>>>>> Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
>>>> Do you want to take this through the Xen tree or should I pick it up?
>>>> Either way I'd love to see some testing on x86 as well.
>>> I agree on the x86 testing. Juergen, Boris?
>>>
>>> I'd say to take this patch via the Xen tree but Juergen has just sent a
>>> Xen pull request to Linux last Saturday. Juergen do you plan to send
>>> another one? Do you have something else lined up? If not, it might be
>>> better to let Christoph pick it up.
>>
>> We don't have anything pending.
>>
>>
>> I can test it but at best tomorrow so not sure we can get this into rc5. Do
>> you consider this an urgent fix or can we wait until 5.19? Because it's a bit
>> too much IMO for rc6.
> On one hand, Linux doesn't boot on a platform without this fix. On the
> other hand, I totally see that this patch could introduce regressions on
> x86 so I think it is fair that we are careful with it.
>
>  From my point of view, it might be better to wait for 5.19 and mark it
> as backport.


No problems uncovered during testing.


-boris
Stefano Stabellini April 29, 2022, 11:15 p.m. UTC | #8
On Fri, 29 Apr 2022, Boris Ostrovsky wrote:
> On 4/28/22 6:49 PM, Stefano Stabellini wrote:
> > On Thu, 28 Apr 2022, Boris Ostrovsky wrote:
> > > On 4/28/22 5:49 PM, Stefano Stabellini wrote:
> > > > On Thu, 28 Apr 2022, Christoph Hellwig wrote:
> > > > > On Tue, Apr 26, 2022 at 04:07:45PM -0700, Stefano Stabellini wrote:
> > > > > > > Reported-by: Rahul Singh <Rahul.Singh@arm.com>
> > > > > > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > > > > > Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
> > > > > Do you want to take this through the Xen tree or should I pick it up?
> > > > > Either way I'd love to see some testing on x86 as well.
> > > > I agree on the x86 testing. Juergen, Boris?
> > > > 
> > > > I'd say to take this patch via the Xen tree but Juergen has just sent a
> > > > Xen pull request to Linux last Saturday. Juergen do you plan to send
> > > > another one? Do you have something else lined up? If not, it might be
> > > > better to let Christoph pick it up.
> > > 
> > > We don't have anything pending.
> > > 
> > > 
> > > I can test it but at best tomorrow so not sure we can get this into rc5.
> > > Do
> > > you consider this an urgent fix or can we wait until 5.19? Because it's a
> > > bit
> > > too much IMO for rc6.
> > On one hand, Linux doesn't boot on a platform without this fix. On the
> > other hand, I totally see that this patch could introduce regressions on
> > x86 so I think it is fair that we are careful with it.
> > 
> >  From my point of view, it might be better to wait for 5.19 and mark it
> > as backport.
> 
> 
> No problems uncovered during testing.

Great! Christoph you can go ahead and pick it up in your tree if you are
up for it.
Christoph Hellwig May 9, 2022, 6:18 a.m. UTC | #9
On Thu, Apr 28, 2022 at 03:49:53PM -0700, Stefano Stabellini wrote:
> On one hand, Linux doesn't boot on a platform without this fix. On the
> other hand, I totally see that this patch could introduce regressions on
> x86 so I think it is fair that we are careful with it.
> 
> >From my point of view, it might be better to wait for 5.19 and mark it
> as backport.

Sounds good to me.  Based on the other mails I assume you want me to
take it through the dma-mapping tree, so I will do that tomorrow unless
I hear otherwise.
Christoph Hellwig May 11, 2022, 2:13 p.m. UTC | #10
On Fri, Apr 29, 2022 at 04:15:38PM -0700, Stefano Stabellini wrote:
> Great! Christoph you can go ahead and pick it up in your tree if you are
> up for it.

The patch is in the dma-mapping for-next brancch now:

http://git.infradead.org/users/hch/dma-mapping.git/commitdiff/62cb1ca1654b57589c582efae2748159c74ee356

There were a few smaller merge conflicts with the swiotlb refactoring.
I think everything is fine, but please take another look if possible.
Stefano Stabellini May 11, 2022, 8:28 p.m. UTC | #11
On Wed, 11 May 2022, Christoph Hellwig wrote:
> On Fri, Apr 29, 2022 at 04:15:38PM -0700, Stefano Stabellini wrote:
> > Great! Christoph you can go ahead and pick it up in your tree if you are
> > up for it.
> 
> The patch is in the dma-mapping for-next brancch now:
> 
> http://git.infradead.org/users/hch/dma-mapping.git/commitdiff/62cb1ca1654b57589c582efae2748159c74ee356
> 
> There were a few smaller merge conflicts with the swiotlb refactoring.
> I think everything is fine, but please take another look if possible.

Looks good to me
diff mbox series

Patch

diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
deleted file mode 100644
index 27e984977402b..0000000000000
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ /dev/null
@@ -1,2 +0,0 @@ 
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <xen/arm/page-coherent.h>
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index a7e54a087b802..6e603e5fdebb1 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -118,23 +118,6 @@  bool xen_arch_need_swiotlb(struct device *dev,
 		!dev_is_dma_coherent(dev));
 }
 
-int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
-				 unsigned int address_bits,
-				 dma_addr_t *dma_handle)
-{
-	if (!xen_initial_domain())
-		return -EINVAL;
-
-	/* we assume that dom0 is mapped 1:1 for now */
-	*dma_handle = pstart;
-	return 0;
-}
-
-void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
-{
-	return;
-}
-
 static int __init xen_mm_init(void)
 {
 	struct gnttab_cache_flush cflush;
diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h
deleted file mode 100644
index 27e984977402b..0000000000000
--- a/arch/arm64/include/asm/xen/page-coherent.h
+++ /dev/null
@@ -1,2 +0,0 @@ 
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <xen/arm/page-coherent.h>
diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
deleted file mode 100644
index 63cd41b2e17ac..0000000000000
--- a/arch/x86/include/asm/xen/page-coherent.h
+++ /dev/null
@@ -1,24 +0,0 @@ 
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_XEN_PAGE_COHERENT_H
-#define _ASM_X86_XEN_PAGE_COHERENT_H
-
-#include <asm/page.h>
-#include <linux/dma-mapping.h>
-
-static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
-		dma_addr_t *dma_handle, gfp_t flags,
-		unsigned long attrs)
-{
-	void *vstart = (void*)__get_free_pages(flags, get_order(size));
-	*dma_handle = virt_to_phys(vstart);
-	return vstart;
-}
-
-static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
-		void *cpu_addr, dma_addr_t dma_handle,
-		unsigned long attrs)
-{
-	free_pages((unsigned long) cpu_addr, get_order(size));
-}
-
-#endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h
index 66b4ddde77430..558821387808e 100644
--- a/arch/x86/include/asm/xen/swiotlb-xen.h
+++ b/arch/x86/include/asm/xen/swiotlb-xen.h
@@ -10,4 +10,9 @@  extern int pci_xen_swiotlb_init_late(void);
 static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; }
 #endif
 
+int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
+				unsigned int address_bits,
+				dma_addr_t *dma_handle);
+void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
+
 #endif /* _ASM_X86_SWIOTLB_XEN_H */
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 47aebd98f52f5..557edb9c54879 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -36,7 +36,6 @@ 
 #include <xen/hvc-console.h>
 
 #include <asm/dma-mapping.h>
-#include <asm/xen/page-coherent.h>
 
 #include <trace/events/swiotlb.h>
 #define MAX_DMA_BITS 32
@@ -104,6 +103,7 @@  static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
 	return 0;
 }
 
+#ifdef CONFIG_X86
 static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
 {
 	int rc;
@@ -129,6 +129,12 @@  static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
 	} while (i < nslabs);
 	return 0;
 }
+#else
+static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
+{
+	return 0;
+}
+#endif
 
 enum xen_swiotlb_err {
 	XEN_SWIOTLB_UNKNOWN = 0,
@@ -256,97 +262,60 @@  void __init xen_swiotlb_init_early(void)
 		panic("Cannot allocate SWIOTLB buffer");
 	swiotlb_set_max_segment(PAGE_SIZE);
 }
-#endif /* CONFIG_X86 */
 
 static void *
-xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-			   dma_addr_t *dma_handle, gfp_t flags,
-			   unsigned long attrs)
+xen_swiotlb_alloc_coherent(struct device *dev, size_t size,
+		dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
 {
-	void *ret;
+	u64 dma_mask = dev->coherent_dma_mask;
 	int order = get_order(size);
-	u64 dma_mask = DMA_BIT_MASK(32);
 	phys_addr_t phys;
-	dma_addr_t dev_addr;
-
-	/*
-	* Ignore region specifiers - the kernel's ideas of
-	* pseudo-phys memory layout has nothing to do with the
-	* machine physical layout.  We can't allocate highmem
-	* because we can't return a pointer to it.
-	*/
-	flags &= ~(__GFP_DMA | __GFP_HIGHMEM);
+	void *ret;
 
-	/* Convert the size to actually allocated. */
+	/* Align the allocation to the Xen page size */
 	size = 1UL << (order + XEN_PAGE_SHIFT);
 
-	/* On ARM this function returns an ioremap'ped virtual address for
-	 * which virt_to_phys doesn't return the corresponding physical
-	 * address. In fact on ARM virt_to_phys only works for kernel direct
-	 * mapped RAM memory. Also see comment below.
-	 */
-	ret = xen_alloc_coherent_pages(hwdev, size, dma_handle, flags, attrs);
-
+	ret = (void *)__get_free_pages(flags, get_order(size));
 	if (!ret)
 		return ret;
-
-	if (hwdev && hwdev->coherent_dma_mask)
-		dma_mask = hwdev->coherent_dma_mask;
-
-	/* At this point dma_handle is the dma address, next we are
-	 * going to set it to the machine address.
-	 * Do not use virt_to_phys(ret) because on ARM it doesn't correspond
-	 * to *dma_handle. */
-	phys = dma_to_phys(hwdev, *dma_handle);
-	dev_addr = xen_phys_to_dma(hwdev, phys);
-	if (((dev_addr + size - 1 <= dma_mask)) &&
-	    !range_straddles_page_boundary(phys, size))
-		*dma_handle = dev_addr;
-	else {
-		if (xen_create_contiguous_region(phys, order,
-						 fls64(dma_mask), dma_handle) != 0) {
-			xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs);
-			return NULL;
-		}
-		*dma_handle = phys_to_dma(hwdev, *dma_handle);
+	phys = virt_to_phys(ret);
+
+	*dma_handle = xen_phys_to_dma(dev, phys);
+	if (*dma_handle + size - 1 > dma_mask ||
+	    range_straddles_page_boundary(phys, size)) {
+		if (xen_create_contiguous_region(phys, order, fls64(dma_mask),
+				dma_handle) != 0)
+			goto out_free_pages;
 		SetPageXenRemapped(virt_to_page(ret));
 	}
+
 	memset(ret, 0, size);
 	return ret;
+
+out_free_pages:
+	free_pages((unsigned long)ret, get_order(size));
+	return NULL;
 }
 
 static void
-xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
-			  dma_addr_t dev_addr, unsigned long attrs)
+xen_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr,
+		dma_addr_t dma_handle, unsigned long attrs)
 {
+	phys_addr_t phys = virt_to_phys(vaddr);
 	int order = get_order(size);
-	phys_addr_t phys;
-	u64 dma_mask = DMA_BIT_MASK(32);
-	struct page *page;
-
-	if (hwdev && hwdev->coherent_dma_mask)
-		dma_mask = hwdev->coherent_dma_mask;
-
-	/* do not use virt_to_phys because on ARM it doesn't return you the
-	 * physical address */
-	phys = xen_dma_to_phys(hwdev, dev_addr);
 
 	/* Convert the size to actually allocated. */
 	size = 1UL << (order + XEN_PAGE_SHIFT);
 
-	if (is_vmalloc_addr(vaddr))
-		page = vmalloc_to_page(vaddr);
-	else
-		page = virt_to_page(vaddr);
+	if (WARN_ON_ONCE(dma_handle + size - 1 > dev->coherent_dma_mask) ||
+	    WARN_ON_ONCE(range_straddles_page_boundary(phys, size)))
+	    	return;
 
-	if (!WARN_ON((dev_addr + size - 1 > dma_mask) ||
-		     range_straddles_page_boundary(phys, size)) &&
-	    TestClearPageXenRemapped(page))
+	if (TestClearPageXenRemapped(virt_to_page(vaddr)))
 		xen_destroy_contiguous_region(phys, order);
-
-	xen_free_coherent_pages(hwdev, size, vaddr, phys_to_dma(hwdev, phys),
-				attrs);
+	free_pages((unsigned long)vaddr, get_order(size));
 }
+#endif /* CONFIG_X86 */
 
 /*
  * Map a single buffer of the indicated size for DMA in streaming mode.  The
@@ -549,8 +518,13 @@  xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
 }
 
 const struct dma_map_ops xen_swiotlb_dma_ops = {
+#ifdef CONFIG_X86
 	.alloc = xen_swiotlb_alloc_coherent,
 	.free = xen_swiotlb_free_coherent,
+#else
+	.alloc = dma_direct_alloc,
+	.free = dma_direct_free,
+#endif
 	.sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
 	.sync_single_for_device = xen_swiotlb_sync_single_for_device,
 	.sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu,
diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h
deleted file mode 100644
index b9cc11e887ed5..0000000000000
--- a/include/xen/arm/page-coherent.h
+++ /dev/null
@@ -1,20 +0,0 @@ 
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _XEN_ARM_PAGE_COHERENT_H
-#define _XEN_ARM_PAGE_COHERENT_H
-
-#include <linux/dma-mapping.h>
-#include <asm/page.h>
-
-static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
-		dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
-{
-	return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
-}
-
-static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
-		void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
-{
-	dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
-}
-
-#endif /* _XEN_ARM_PAGE_COHERENT_H */
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index a3584a357f353..c7c1b46ff4cd4 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -42,13 +42,6 @@  int xen_setup_shutdown_event(void);
 
 extern unsigned long *xen_contiguous_bitmap;
 
-#if defined(CONFIG_XEN_PV) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)
-int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
-				unsigned int address_bits,
-				dma_addr_t *dma_handle);
-void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
-#endif
-
 #if defined(CONFIG_XEN_PV)
 int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
 		  xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,