diff mbox series

[2/4] x86: always initialize xen-swiotlb when xen-pcifront is enabling

Message ID 20230518134253.909623-3-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [1/4] x86: move a check out of pci_xen_swiotlb_init | expand

Commit Message

Christoph Hellwig May 18, 2023, 1:42 p.m. UTC
Remove the dangerous late initialization of xen-swiotlb in
pci_xen_swiotlb_init_late and instead just always initialize
xen-swiotlb in the boot code if CONFIG_XEN_PCIDEV_FRONTEND is enabled.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/include/asm/xen/swiotlb-xen.h |  6 ------
 arch/x86/kernel/pci-dma.c              | 25 +++----------------------
 drivers/pci/xen-pcifront.c             |  6 ------
 3 files changed, 3 insertions(+), 34 deletions(-)

Comments

Marek Marczykowski-Górecki May 18, 2023, 6:18 p.m. UTC | #1
On Thu, May 18, 2023 at 03:42:51PM +0200, Christoph Hellwig wrote:
> Remove the dangerous late initialization of xen-swiotlb in
> pci_xen_swiotlb_init_late and instead just always initialize
> xen-swiotlb in the boot code if CONFIG_XEN_PCIDEV_FRONTEND is enabled.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Doesn't it mean all the PV guests will basically waste 64MB of RAM
by default each if they don't really have PCI devices?

> ---
>  arch/x86/include/asm/xen/swiotlb-xen.h |  6 ------
>  arch/x86/kernel/pci-dma.c              | 25 +++----------------------
>  drivers/pci/xen-pcifront.c             |  6 ------
>  3 files changed, 3 insertions(+), 34 deletions(-)
> 
> diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h
> index 77a2d19cc9909e..abde0f44df57dc 100644
> --- a/arch/x86/include/asm/xen/swiotlb-xen.h
> +++ b/arch/x86/include/asm/xen/swiotlb-xen.h
> @@ -2,12 +2,6 @@
>  #ifndef _ASM_X86_SWIOTLB_XEN_H
>  #define _ASM_X86_SWIOTLB_XEN_H
>  
> -#ifdef CONFIG_SWIOTLB_XEN
> -extern int pci_xen_swiotlb_init_late(void);
> -#else
> -static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; }
> -#endif
> -
>  int xen_swiotlb_fixup(void *buf, unsigned long nslabs);
>  int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
>  				unsigned int address_bits,
> diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
> index f887b08ac5ffe4..c4a7ead9eb674e 100644
> --- a/arch/x86/kernel/pci-dma.c
> +++ b/arch/x86/kernel/pci-dma.c
> @@ -81,27 +81,6 @@ static void __init pci_xen_swiotlb_init(void)
>  	if (IS_ENABLED(CONFIG_PCI))
>  		pci_request_acs();
>  }
> -
> -int pci_xen_swiotlb_init_late(void)
> -{
> -	if (dma_ops == &xen_swiotlb_dma_ops)
> -		return 0;
> -
> -	/* we can work with the default swiotlb */
> -	if (!io_tlb_default_mem.nslabs) {
> -		int rc = swiotlb_init_late(swiotlb_size_or_default(),
> -					   GFP_KERNEL, xen_swiotlb_fixup);
> -		if (rc < 0)
> -			return rc;
> -	}
> -
> -	/* XXX: this switches the dma ops under live devices! */
> -	dma_ops = &xen_swiotlb_dma_ops;
> -	if (IS_ENABLED(CONFIG_PCI))
> -		pci_request_acs();
> -	return 0;
> -}
> -EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
>  #else
>  static inline void __init pci_xen_swiotlb_init(void)
>  {
> @@ -111,7 +90,9 @@ static inline void __init pci_xen_swiotlb_init(void)
>  void __init pci_iommu_alloc(void)
>  {
>  	if (xen_pv_domain()) {
> -		if (xen_initial_domain() || x86_swiotlb_enable)
> +		if (xen_initial_domain() ||
> +		    IS_ENABLED(CONFIG_XEN_PCIDEV_FRONTEND) ||
> +		    x86_swiotlb_enable)
>  			pci_xen_swiotlb_init();
>  		return;
>  	}
> diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
> index 83c0ab50676dff..11636634ae512f 100644
> --- a/drivers/pci/xen-pcifront.c
> +++ b/drivers/pci/xen-pcifront.c
> @@ -22,7 +22,6 @@
>  #include <linux/bitops.h>
>  #include <linux/time.h>
>  #include <linux/ktime.h>
> -#include <linux/swiotlb.h>
>  #include <xen/platform_pci.h>
>  
>  #include <asm/xen/swiotlb-xen.h>
> @@ -669,11 +668,6 @@ static int pcifront_connect_and_init_dma(struct pcifront_device *pdev)
>  
>  	spin_unlock(&pcifront_dev_lock);
>  
> -	if (!err && !is_swiotlb_active(&pdev->xdev->dev)) {
> -		err = pci_xen_swiotlb_init_late();
> -		if (err)
> -			dev_err(&pdev->xdev->dev, "Could not setup SWIOTLB!\n");
> -	}
>  	return err;
>  }
>  
> -- 
> 2.39.2
> 
>
Christoph Hellwig May 19, 2023, 4:04 a.m. UTC | #2
On Thu, May 18, 2023 at 08:18:39PM +0200, Marek Marczykowski-Górecki wrote:
> On Thu, May 18, 2023 at 03:42:51PM +0200, Christoph Hellwig wrote:
> > Remove the dangerous late initialization of xen-swiotlb in
> > pci_xen_swiotlb_init_late and instead just always initialize
> > xen-swiotlb in the boot code if CONFIG_XEN_PCIDEV_FRONTEND is enabled.
> > 
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Doesn't it mean all the PV guests will basically waste 64MB of RAM
> by default each if they don't really have PCI devices?

If CONFIG_XEN_PCIDEV_FRONTEND is enabled, and the kernel's isn't booted
with swiotlb=noforce, yes.
Marek Marczykowski-Górecki May 19, 2023, 10:10 a.m. UTC | #3
On Fri, May 19, 2023 at 06:04:05AM +0200, Christoph Hellwig wrote:
> On Thu, May 18, 2023 at 08:18:39PM +0200, Marek Marczykowski-Górecki wrote:
> > On Thu, May 18, 2023 at 03:42:51PM +0200, Christoph Hellwig wrote:
> > > Remove the dangerous late initialization of xen-swiotlb in
> > > pci_xen_swiotlb_init_late and instead just always initialize
> > > xen-swiotlb in the boot code if CONFIG_XEN_PCIDEV_FRONTEND is enabled.
> > > 
> > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > 
> > Doesn't it mean all the PV guests will basically waste 64MB of RAM
> > by default each if they don't really have PCI devices?
> 
> If CONFIG_XEN_PCIDEV_FRONTEND is enabled, and the kernel's isn't booted
> with swiotlb=noforce, yes.

That's "a bit" unfortunate, since that might be significant part of the
VM memory, or if you have a lot of VMs, a significant part of the host
memory - it quickly adds up.
While I would say PCI passthrough is not very common for PV guests, can
the decision about xen-swiotlb be delayed until you can enumerate
xenstore to check if there are any PCI devices connected (and not
allocate xen-swiotlb by default if there are none)? This would
still not cover the hotplug case (in which case, you'd need to force it
with a cmdline), but at least you wouldn't loose much memory just
because one of your VMs may use PCI passthrough (so, you have it enabled
in your kernel).
Please remember that guest kernel is not always under full control of
the host admin, so making guests loose 64MB of RAM always, in default
setup isn't good for customers of such VMs...
Christoph Hellwig May 19, 2023, 12:41 p.m. UTC | #4
On Fri, May 19, 2023 at 12:10:26PM +0200, Marek Marczykowski-Górecki wrote:
> While I would say PCI passthrough is not very common for PV guests, can
> the decision about xen-swiotlb be delayed until you can enumerate
> xenstore to check if there are any PCI devices connected (and not
> allocate xen-swiotlb by default if there are none)? This would
> still not cover the hotplug case (in which case, you'd need to force it
> with a cmdline), but at least you wouldn't loose much memory just
> because one of your VMs may use PCI passthrough (so, you have it enabled
> in your kernel).

How early can we query xenstore?  We'd need to do this before setting
up DMA for any device.

The alternative would be to finally merge swiotlb-xen into swiotlb, in
which case we might be able to do this later.  Let me see what I can
do there.
Andrew Cooper May 19, 2023, 12:49 p.m. UTC | #5
On 19/05/2023 1:41 pm, Christoph Hellwig wrote:
> On Fri, May 19, 2023 at 12:10:26PM +0200, Marek Marczykowski-Górecki wrote:
>> While I would say PCI passthrough is not very common for PV guests, can
>> the decision about xen-swiotlb be delayed until you can enumerate
>> xenstore to check if there are any PCI devices connected (and not
>> allocate xen-swiotlb by default if there are none)? This would
>> still not cover the hotplug case (in which case, you'd need to force it
>> with a cmdline), but at least you wouldn't loose much memory just
>> because one of your VMs may use PCI passthrough (so, you have it enabled
>> in your kernel).
> How early can we query xenstore?  We'd need to do this before setting
> up DMA for any device.

Not that early.  One supported configuration has xenstore not starting
for an indefinite period of time after boot.

> The alternative would be to finally merge swiotlb-xen into swiotlb, in
> which case we might be able to do this later.  Let me see what I can
> do there.

If that is an option, it would be great to reduce the special-cashing.

~Andrew
Christoph Hellwig May 19, 2023, 12:58 p.m. UTC | #6
On Fri, May 19, 2023 at 01:49:46PM +0100, Andrew Cooper wrote:
> > The alternative would be to finally merge swiotlb-xen into swiotlb, in
> > which case we might be able to do this later.  Let me see what I can
> > do there.
> 
> If that is an option, it would be great to reduce the special-cashing.

I think it's doable, and I've been wanting it for a while.  I just
need motivated testers, but it seems like I just found at least two :)
Christoph Hellwig May 20, 2023, 6:21 a.m. UTC | #7
On Fri, May 19, 2023 at 02:58:57PM +0200, Christoph Hellwig wrote:
> On Fri, May 19, 2023 at 01:49:46PM +0100, Andrew Cooper wrote:
> > > The alternative would be to finally merge swiotlb-xen into swiotlb, in
> > > which case we might be able to do this later.  Let me see what I can
> > > do there.
> > 
> > If that is an option, it would be great to reduce the special-cashing.
> 
> I think it's doable, and I've been wanting it for a while.  I just
> need motivated testers, but it seems like I just found at least two :)

So looking at swiotlb-xen it does these off things where it takes a value
generated originally be xen_phys_to_dma, then only does a dma_to_phys
to go back and call pfn_valid on the result.  Does this make sense, or
is it wrong and just works by accident?  I.e. is the patch below correct?


diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 67aa74d201627d..3396c5766f0dd8 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -90,9 +90,7 @@ static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
 
 static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
 {
-	unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));
-	unsigned long xen_pfn = bfn_to_local_pfn(bfn);
-	phys_addr_t paddr = (phys_addr_t)xen_pfn << XEN_PAGE_SHIFT;
+	phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
 
 	/* If the address is outside our domain, it CAN
 	 * have the same virtual address as another address
@@ -234,7 +232,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 
 done:
 	if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
-		if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dev_addr))))
+		if (pfn_valid(PFN_DOWN(phys)))
 			arch_sync_dma_for_device(phys, size, dir);
 		else
 			xen_dma_sync_for_device(dev, dev_addr, size, dir);
@@ -258,7 +256,7 @@ static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
 	BUG_ON(dir == DMA_NONE);
 
 	if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
-		if (pfn_valid(PFN_DOWN(dma_to_phys(hwdev, dev_addr))))
+		if (pfn_valid(PFN_DOWN(paddr)))
 			arch_sync_dma_for_cpu(paddr, size, dir);
 		else
 			xen_dma_sync_for_cpu(hwdev, dev_addr, size, dir);
@@ -276,7 +274,7 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
 	phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
 
 	if (!dev_is_dma_coherent(dev)) {
-		if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
+		if (pfn_valid(PFN_DOWN(paddr)))
 			arch_sync_dma_for_cpu(paddr, size, dir);
 		else
 			xen_dma_sync_for_cpu(dev, dma_addr, size, dir);
@@ -296,7 +294,7 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
 		swiotlb_sync_single_for_device(dev, paddr, size, dir);
 
 	if (!dev_is_dma_coherent(dev)) {
-		if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
+		if (pfn_valid(PFN_DOWN(paddr)))
 			arch_sync_dma_for_device(paddr, size, dir);
 		else
 			xen_dma_sync_for_device(dev, dma_addr, size, dir);
Petr Tesařík May 22, 2023, 7:52 a.m. UTC | #8
Hi Christoph,

On Sat, 20 May 2023 08:21:03 +0200
Christoph Hellwig <hch@lst.de> wrote:

> On Fri, May 19, 2023 at 02:58:57PM +0200, Christoph Hellwig wrote:
> > On Fri, May 19, 2023 at 01:49:46PM +0100, Andrew Cooper wrote:  
> > > > The alternative would be to finally merge swiotlb-xen into swiotlb, in
> > > > which case we might be able to do this later.  Let me see what I can
> > > > do there.  
> > > 
> > > If that is an option, it would be great to reduce the special-cashing.  
> > 
> > I think it's doable, and I've been wanting it for a while.  I just
> > need motivated testers, but it seems like I just found at least two :)  
> 
> So looking at swiotlb-xen it does these off things where it takes a value
> generated originally be xen_phys_to_dma, then only does a dma_to_phys
> to go back and call pfn_valid on the result.  Does this make sense, or
> is it wrong and just works by accident?  I.e. is the patch below correct?
> 
> 
> diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
> index 67aa74d201627d..3396c5766f0dd8 100644
> --- a/drivers/xen/swiotlb-xen.c
> +++ b/drivers/xen/swiotlb-xen.c
> @@ -90,9 +90,7 @@ static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
>  
>  static int is_xen_swiotlb_buffer(struct device *dev, dma_addr_t dma_addr)
>  {
> -	unsigned long bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));
> -	unsigned long xen_pfn = bfn_to_local_pfn(bfn);
> -	phys_addr_t paddr = (phys_addr_t)xen_pfn << XEN_PAGE_SHIFT;
> +	phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);


I'm no big Xen expert, but I think this is wrong. Let's go through it
line by line:

- bfn = XEN_PFN_DOWN(dma_to_phys(dev, dma_addr));

  Take the DMA address (as seen by devices on the bus), convert it to a
  physical address (as seen by the CPU on the bus) and shift it right
  by XEN_PAGE_SHIFT. The result is a Xen machine PFN.

- xen_pfn = bfn_to_local_pfn(bfn);

  Take the machine PFN and converts it to a physical PFN.

- paddr = (phys_addr_t)xen_pfn << XEN_PAGE_SHIFT;

  Convert the physical PFN to a physical address.

The important thing here is that Xen PV does not have auto-translated
physical addresses, so physical address != machine address. Physical
addresses in Xen PV domains are "artificial", used by the kernel to
index the mem_map array, so a PFN can be easily converted to a struct
page pointer and back. However, these addresses are never used by
hardware, not even by CPU. The addresses used by the CPU are called
machine addresses. There is no address translation between VCPUs and
CPUs, because a PV domain runs directly on the CPU. After all, that's
why it is called _para_virtualized.

HTH
Petr T
Petr Tesařík May 22, 2023, 7:54 a.m. UTC | #9
On Fri, 19 May 2023 12:10:26 +0200
Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com> wrote:

> On Fri, May 19, 2023 at 06:04:05AM +0200, Christoph Hellwig wrote:
> > On Thu, May 18, 2023 at 08:18:39PM +0200, Marek Marczykowski-Górecki wrote:  
> > > On Thu, May 18, 2023 at 03:42:51PM +0200, Christoph Hellwig wrote:  
> > > > Remove the dangerous late initialization of xen-swiotlb in
> > > > pci_xen_swiotlb_init_late and instead just always initialize
> > > > xen-swiotlb in the boot code if CONFIG_XEN_PCIDEV_FRONTEND is enabled.
> > > > 
> > > > Signed-off-by: Christoph Hellwig <hch@lst.de>  
> > > 
> > > Doesn't it mean all the PV guests will basically waste 64MB of RAM
> > > by default each if they don't really have PCI devices?  
> > 
> > If CONFIG_XEN_PCIDEV_FRONTEND is enabled, and the kernel's isn't booted
> > with swiotlb=noforce, yes.  
> 
> That's "a bit" unfortunate, since that might be significant part of the
> VM memory, or if you have a lot of VMs, a significant part of the host
> memory - it quickly adds up.

I wonder if dynamic swiotlb allocation might also help with this...

Petr T
Jürgen Groß May 22, 2023, 8:37 a.m. UTC | #10
On 19.05.23 12:10, Marek Marczykowski-Górecki wrote:
> On Fri, May 19, 2023 at 06:04:05AM +0200, Christoph Hellwig wrote:
>> On Thu, May 18, 2023 at 08:18:39PM +0200, Marek Marczykowski-Górecki wrote:
>>> On Thu, May 18, 2023 at 03:42:51PM +0200, Christoph Hellwig wrote:
>>>> Remove the dangerous late initialization of xen-swiotlb in
>>>> pci_xen_swiotlb_init_late and instead just always initialize
>>>> xen-swiotlb in the boot code if CONFIG_XEN_PCIDEV_FRONTEND is enabled.
>>>>
>>>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>>>
>>> Doesn't it mean all the PV guests will basically waste 64MB of RAM
>>> by default each if they don't really have PCI devices?
>>
>> If CONFIG_XEN_PCIDEV_FRONTEND is enabled, and the kernel's isn't booted
>> with swiotlb=noforce, yes.
> 
> That's "a bit" unfortunate, since that might be significant part of the
> VM memory, or if you have a lot of VMs, a significant part of the host
> memory - it quickly adds up.
> While I would say PCI passthrough is not very common for PV guests, can
> the decision about xen-swiotlb be delayed until you can enumerate
> xenstore to check if there are any PCI devices connected (and not
> allocate xen-swiotlb by default if there are none)? This would
> still not cover the hotplug case (in which case, you'd need to force it
> with a cmdline), but at least you wouldn't loose much memory just
> because one of your VMs may use PCI passthrough (so, you have it enabled
> in your kernel).
> Please remember that guest kernel is not always under full control of
> the host admin, so making guests loose 64MB of RAM always, in default
> setup isn't good for customers of such VMs...
> 

In normal cases PCI passthrough in PV guests requires to start the guest
with e820_host=1. So it should be rather easy to limit allocating the
64MB in PV guests to the cases where the memory map has non-RAM regions
especially in the first 1MB of the memory.

This will cover even hotplug cases. The only case not covered would be a
guest started with e820_host=1 even if no PCI passthrough was planned.
But this should be rather rare (at least I hope so).


Juergen
Christoph Hellwig June 7, 2023, 1:12 p.m. UTC | #11
On Mon, May 22, 2023 at 10:37:09AM +0200, Juergen Gross wrote:
> In normal cases PCI passthrough in PV guests requires to start the guest
> with e820_host=1. So it should be rather easy to limit allocating the
> 64MB in PV guests to the cases where the memory map has non-RAM regions
> especially in the first 1MB of the memory.
>
> This will cover even hotplug cases. The only case not covered would be a
> guest started with e820_host=1 even if no PCI passthrough was planned.
> But this should be rather rare (at least I hope so).

So is this an ACK for the patch and can we go ahead with it?

(I'd still like to merge swiotlb-xen into swiotlb eventually, but it's
probably not going to happen this merge window)
Jürgen Groß June 9, 2023, 3:38 p.m. UTC | #12
On 07.06.23 15:12, Christoph Hellwig wrote:
> On Mon, May 22, 2023 at 10:37:09AM +0200, Juergen Gross wrote:
>> In normal cases PCI passthrough in PV guests requires to start the guest
>> with e820_host=1. So it should be rather easy to limit allocating the
>> 64MB in PV guests to the cases where the memory map has non-RAM regions
>> especially in the first 1MB of the memory.
>>
>> This will cover even hotplug cases. The only case not covered would be a
>> guest started with e820_host=1 even if no PCI passthrough was planned.
>> But this should be rather rare (at least I hope so).
> 
> So is this an ACK for the patch and can we go ahead with it?

As long as above mentioned check of the E820 map is done, yes.

If you want I can send a diff to be folded into your patch on Monday.

> 
> (I'd still like to merge swiotlb-xen into swiotlb eventually, but it's
> probably not going to happen this merge window)

Would be nice.


Juergen
Christoph Hellwig June 12, 2023, 6:47 a.m. UTC | #13
On Fri, Jun 09, 2023 at 05:38:28PM +0200, Juergen Gross wrote:
>>> guest started with e820_host=1 even if no PCI passthrough was planned.
>>> But this should be rather rare (at least I hope so).
>>
>> So is this an ACK for the patch and can we go ahead with it?
>
> As long as above mentioned check of the E820 map is done, yes.
>
> If you want I can send a diff to be folded into your patch on Monday.

Yes, that would be great!
Jürgen Groß June 12, 2023, 8:08 a.m. UTC | #14
On 09.06.23 17:38, Juergen Gross wrote:
> On 07.06.23 15:12, Christoph Hellwig wrote:
>> On Mon, May 22, 2023 at 10:37:09AM +0200, Juergen Gross wrote:
>>> In normal cases PCI passthrough in PV guests requires to start the guest
>>> with e820_host=1. So it should be rather easy to limit allocating the
>>> 64MB in PV guests to the cases where the memory map has non-RAM regions
>>> especially in the first 1MB of the memory.
>>>
>>> This will cover even hotplug cases. The only case not covered would be a
>>> guest started with e820_host=1 even if no PCI passthrough was planned.
>>> But this should be rather rare (at least I hope so).
>>
>> So is this an ACK for the patch and can we go ahead with it?
> 
> As long as above mentioned check of the E820 map is done, yes.
> 
> If you want I can send a diff to be folded into your patch on Monday.

Attached is a patch for adding a new flag xen_pv_pci_possible. You can
either add the patch to your series or merge it into your patch 2.

You need to modify your patch like this:

@@ -111,7 +90,10 @@ static inline void __init pci_xen_swiotlb_init(void)
  void __init pci_iommu_alloc(void)
  {
  	if (xen_pv_domain()) {
-		if (xen_initial_domain() || x86_swiotlb_enable)
+		if (xen_initial_domain() ||
+		    (IS_ENABLED(CONFIG_XEN_PCIDEV_FRONTEND) &&
+		     xen_pv_pci_possible) ||
+		    x86_swiotlb_enable)
  			pci_xen_swiotlb_init();
  		return;
  	}


Juergen
Christoph Hellwig June 12, 2023, 8:23 a.m. UTC | #15
Thank you.  I'll queue it up as a separate patch.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h
index 77a2d19cc9909e..abde0f44df57dc 100644
--- a/arch/x86/include/asm/xen/swiotlb-xen.h
+++ b/arch/x86/include/asm/xen/swiotlb-xen.h
@@ -2,12 +2,6 @@ 
 #ifndef _ASM_X86_SWIOTLB_XEN_H
 #define _ASM_X86_SWIOTLB_XEN_H
 
-#ifdef CONFIG_SWIOTLB_XEN
-extern int pci_xen_swiotlb_init_late(void);
-#else
-static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; }
-#endif
-
 int xen_swiotlb_fixup(void *buf, unsigned long nslabs);
 int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
 				unsigned int address_bits,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index f887b08ac5ffe4..c4a7ead9eb674e 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -81,27 +81,6 @@  static void __init pci_xen_swiotlb_init(void)
 	if (IS_ENABLED(CONFIG_PCI))
 		pci_request_acs();
 }
-
-int pci_xen_swiotlb_init_late(void)
-{
-	if (dma_ops == &xen_swiotlb_dma_ops)
-		return 0;
-
-	/* we can work with the default swiotlb */
-	if (!io_tlb_default_mem.nslabs) {
-		int rc = swiotlb_init_late(swiotlb_size_or_default(),
-					   GFP_KERNEL, xen_swiotlb_fixup);
-		if (rc < 0)
-			return rc;
-	}
-
-	/* XXX: this switches the dma ops under live devices! */
-	dma_ops = &xen_swiotlb_dma_ops;
-	if (IS_ENABLED(CONFIG_PCI))
-		pci_request_acs();
-	return 0;
-}
-EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
 #else
 static inline void __init pci_xen_swiotlb_init(void)
 {
@@ -111,7 +90,9 @@  static inline void __init pci_xen_swiotlb_init(void)
 void __init pci_iommu_alloc(void)
 {
 	if (xen_pv_domain()) {
-		if (xen_initial_domain() || x86_swiotlb_enable)
+		if (xen_initial_domain() ||
+		    IS_ENABLED(CONFIG_XEN_PCIDEV_FRONTEND) ||
+		    x86_swiotlb_enable)
 			pci_xen_swiotlb_init();
 		return;
 	}
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 83c0ab50676dff..11636634ae512f 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -22,7 +22,6 @@ 
 #include <linux/bitops.h>
 #include <linux/time.h>
 #include <linux/ktime.h>
-#include <linux/swiotlb.h>
 #include <xen/platform_pci.h>
 
 #include <asm/xen/swiotlb-xen.h>
@@ -669,11 +668,6 @@  static int pcifront_connect_and_init_dma(struct pcifront_device *pdev)
 
 	spin_unlock(&pcifront_dev_lock);
 
-	if (!err && !is_swiotlb_active(&pdev->xdev->dev)) {
-		err = pci_xen_swiotlb_init_late();
-		if (err)
-			dev_err(&pdev->xdev->dev, "Could not setup SWIOTLB!\n");
-	}
 	return err;
 }