diff mbox

arm64: Add architecture support for PCI

Message ID 1391453028-23191-2-git-send-email-Liviu.Dudau@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Liviu Dudau Feb. 3, 2014, 6:43 p.m. UTC
Use the generic host bridge functions to provide support for
PCI Express on arm64. There is no support for ISA memory.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
---
 arch/arm64/Kconfig            |  17 +++++++
 arch/arm64/include/asm/Kbuild |   1 +
 arch/arm64/include/asm/io.h   |   4 ++
 arch/arm64/include/asm/pci.h  |  35 +++++++++++++
 arch/arm64/kernel/Makefile    |   1 +
 arch/arm64/kernel/pci.c       | 112 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 170 insertions(+)
 create mode 100644 arch/arm64/include/asm/pci.h
 create mode 100644 arch/arm64/kernel/pci.c

Comments

Arnd Bergmann Feb. 3, 2014, 6:58 p.m. UTC | #1
On Monday 03 February 2014 18:43:48 Liviu Dudau wrote:
> diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
> index 4cc813e..ce5bad2 100644
> --- a/arch/arm64/include/asm/io.h
> +++ b/arch/arm64/include/asm/io.h
> @@ -120,9 +120,13 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
>  /*
>   *  I/O port access primitives.
>   */
> +#define arch_has_dev_port()	(0)

Why not?

>  #define IO_SPACE_LIMIT		0xffff

You probably want to increase this a bit, to allow multiple host bridges
to have their own I/O space.

>  #define PCI_IOBASE		((void __iomem *)(MODULES_VADDR - SZ_2M))

And modify this location: There is no particular reason to have the I/O space
mapped exactly 2MB below the loadable modules, as virtual address space is
essentially free.

> +#define ioport_map(port, nr)	(PCI_IOBASE + ((port) & IO_SPACE_LIMIT))
> +#define ioport_unmap(addr)

inline functions?

>  static inline u8 inb(unsigned long addr)
>  {
>  	return readb(addr + PCI_IOBASE);
> diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
> new file mode 100644
> index 0000000..dd084a3
> --- /dev/null
> +++ b/arch/arm64/include/asm/pci.h
> @@ -0,0 +1,35 @@
> +#ifndef __ASM_PCI_H
> +#define __ASM_PCI_H
> +#ifdef __KERNEL__
> +
> +#include <linux/types.h>
> +#include <linux/slab.h>
> +#include <linux/dma-mapping.h>
> +
> +#include <asm/io.h>
> +#include <asm-generic/pci-bridge.h>
> +#include <asm-generic/pci-dma-compat.h>
> +
> +#define PCIBIOS_MIN_IO		0
> +#define PCIBIOS_MIN_MEM		0

PCIBIOS_MIN_IO is normally set to 0x1000, to stay out of the ISA range.

> diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
> new file mode 100644
> index 0000000..7b652cf
> --- /dev/null
> +++ b/arch/arm64/kernel/pci.c
> @@ -0,0 +1,112 @@

None of this looks really arm64 specific, nor should it be. I think
we should try a little harder to move this as a default implementation
into common code, even if we start out by having all architectures
override it.

> +int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
> +{
> +	BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT);
> +
> +	return ioremap_page_range((unsigned long)PCI_IOBASE + offset,
> +				(unsigned long)PCI_IOBASE + offset + SZ_64K,
> +				phys_addr,
> +				__pgprot(PROT_DEVICE_nGnRE));
> +}

Not sure if we want to treat this one as architecture specific though.
It certainly won't be portable to x86, but it could be shared with
a couple of others. We may also want to redesign the interface.
I've been thinking we could make this function allocate space in the
Linux virtual I/O space aperture, and pass two resources into it
(physical I/O aperture and bus I/O range), and get the actual
io_offset as the return value, or a negative error number.

That way, you could have an arbitrary number of host bridges in the
system and each one gets a share of the virtual aperture until
it's full.

	Arnd
Liviu Dudau Feb. 3, 2014, 7:18 p.m. UTC | #2
On Mon, Feb 03, 2014 at 06:58:56PM +0000, Arnd Bergmann wrote:
> On Monday 03 February 2014 18:43:48 Liviu Dudau wrote:
> > diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
> > index 4cc813e..ce5bad2 100644
> > --- a/arch/arm64/include/asm/io.h
> > +++ b/arch/arm64/include/asm/io.h
> > @@ -120,9 +120,13 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
> >  /*
> >   *  I/O port access primitives.
> >   */
> > +#define arch_has_dev_port()        (0)
>
> Why not?

Maybe I got it the wrong way around, but the comment in include/linux/io.h says:

/*
 * Some systems do not have legacy ISA devices.
 * /dev/port is not a valid interface on these systems.
 * So for those archs, <asm/io.h> should define the following symbol.
 */

So ... defining it should mean no legacy ISA devices, right?

>
> >  #define IO_SPACE_LIMIT             0xffff
>
> You probably want to increase this a bit, to allow multiple host bridges
> to have their own I/O space.

OK, but to what size?

>
> >  #define PCI_IOBASE         ((void __iomem *)(MODULES_VADDR - SZ_2M))
>
> And modify this location: There is no particular reason to have the I/O space
> mapped exactly 2MB below the loadable modules, as virtual address space is
> essentially free.

Will talk with Catalin about where to place this.

>
> > +#define ioport_map(port, nr)       (PCI_IOBASE + ((port) & IO_SPACE_LIMIT))
> > +#define ioport_unmap(addr)
>
> inline functions?

Will do, thanks!

>
> >  static inline u8 inb(unsigned long addr)
> >  {
> >     return readb(addr + PCI_IOBASE);
> > diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
> > new file mode 100644
> > index 0000000..dd084a3
> > --- /dev/null
> > +++ b/arch/arm64/include/asm/pci.h
> > @@ -0,0 +1,35 @@
> > +#ifndef __ASM_PCI_H
> > +#define __ASM_PCI_H
> > +#ifdef __KERNEL__
> > +
> > +#include <linux/types.h>
> > +#include <linux/slab.h>
> > +#include <linux/dma-mapping.h>
> > +
> > +#include <asm/io.h>
> > +#include <asm-generic/pci-bridge.h>
> > +#include <asm-generic/pci-dma-compat.h>
> > +
> > +#define PCIBIOS_MIN_IO             0
> > +#define PCIBIOS_MIN_MEM            0
>
> PCIBIOS_MIN_IO is normally set to 0x1000, to stay out of the ISA range.

:) No ISA support! (Die ISA, die!!)

>
> > diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
> > new file mode 100644
> > index 0000000..7b652cf
> > --- /dev/null
> > +++ b/arch/arm64/kernel/pci.c
> > @@ -0,0 +1,112 @@
>
> None of this looks really arm64 specific, nor should it be. I think
> we should try a little harder to move this as a default implementation
> into common code, even if we start out by having all architectures
> override it.

Agree. This is the RFC version. I didn't dare to post a patch with fixes
for all architectures. :)

>
> > +int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
> > +{
> > +   BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT);
> > +
> > +   return ioremap_page_range((unsigned long)PCI_IOBASE + offset,
> > +                           (unsigned long)PCI_IOBASE + offset + SZ_64K,
> > +                           phys_addr,
> > +                           __pgprot(PROT_DEVICE_nGnRE));
> > +}
>
> Not sure if we want to treat this one as architecture specific though.
> It certainly won't be portable to x86, but it could be shared with
> a couple of others. We may also want to redesign the interface.
> I've been thinking we could make this function allocate space in the
> Linux virtual I/O space aperture, and pass two resources into it
> (physical I/O aperture and bus I/O range), and get the actual
> io_offset as the return value, or a negative error number.

Not sure I completely follow your idea.

>
> That way, you could have an arbitrary number of host bridges in the
> system and each one gets a share of the virtual aperture until
> it's full.

One still needs to fix the pci_request_region use that checks against
ioport_resource. But it is an interesting idea.

>
>       Arnd
>
>

Thanks for reviewing this patch!

Liviu

--
====================
| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---------------
    ¯\_(?)_/¯

-- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium.  Thank you.

ARM Limited, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ, Registered in England & Wales, Company No:  2557590
ARM Holdings plc, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ, Registered in England & Wales, Company No:  2548782
Arnd Bergmann Feb. 3, 2014, 8:05 p.m. UTC | #3
On Monday 03 February 2014 19:18:38 Liviu Dudau wrote:
> On Mon, Feb 03, 2014 at 06:58:56PM +0000, Arnd Bergmann wrote:
> > On Monday 03 February 2014 18:43:48 Liviu Dudau wrote:
> > > diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
> > > index 4cc813e..ce5bad2 100644
> > > --- a/arch/arm64/include/asm/io.h
> > > +++ b/arch/arm64/include/asm/io.h
> > > @@ -120,9 +120,13 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
> > >  /*
> > >   *  I/O port access primitives.
> > >   */
> > > +#define arch_has_dev_port()	(0)
> > 
> > Why not?
> 
> Maybe I got it the wrong way around, but the comment in include/linux/io.h says:
> 
> /*
>  * Some systems do not have legacy ISA devices.
>  * /dev/port is not a valid interface on these systems.
>  * So for those archs, <asm/io.h> should define the following symbol.
>  */
> 
> So ... defining it should mean no legacy ISA devices, right?

I would read that comment as referring to systems that don't have
any I/O space. If you have PCI, you can by definition have ISA
compatible devices behind a bridge. A typical example would be
a VGA card that supports the 03c0-03df port range.

> > 
> > >  #define IO_SPACE_LIMIT		0xffff
> > 
> > You probably want to increase this a bit, to allow multiple host bridges
> > to have their own I/O space.
> 
> OK, but to what size?

2 MB was a compromise on arm32 to allow up to 32 PCI host bridges but not
take up too much virtual space. On arm64 it should be at least as big.
Could be more than that, although I don't see a reason why it should be,
unless we expect to see systems with tons of host bridges, or buses
that exceed 64KB of I/O space.

> > > +#define ioport_map(port, nr)	(PCI_IOBASE + ((port) & IO_SPACE_LIMIT))
> > > +#define ioport_unmap(addr)
> > 
> > inline functions?
> 
> Will do, thanks!

I suppose you can actually use the generic implementation from
asm-generic/io.h, and fix it by using the definition you have
above, since it's currently broken.

> > > diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
> > > new file mode 100644
> > > index 0000000..dd084a3
> > > --- /dev/null
> > > +++ b/arch/arm64/include/asm/pci.h
> > > @@ -0,0 +1,35 @@
> > > +#ifndef __ASM_PCI_H
> > > +#define __ASM_PCI_H
> > > +#ifdef __KERNEL__
> > > +
> > > +#include <linux/types.h>
> > > +#include <linux/slab.h>
> > > +#include <linux/dma-mapping.h>
> > > +
> > > +#include <asm/io.h>
> > > +#include <asm-generic/pci-bridge.h>
> > > +#include <asm-generic/pci-dma-compat.h>
> > > +
> > > +#define PCIBIOS_MIN_IO		0
> > > +#define PCIBIOS_MIN_MEM		0
> > 
> > PCIBIOS_MIN_IO is normally set to 0x1000, to stay out of the ISA range.
> 
> :) No ISA support! (Die ISA, die!!) 

If only it were that easy.

> > > diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
> > > new file mode 100644
> > > index 0000000..7b652cf
> > > --- /dev/null
> > > +++ b/arch/arm64/kernel/pci.c
> > > @@ -0,0 +1,112 @@
> > 
> > None of this looks really arm64 specific, nor should it be. I think
> > we should try a little harder to move this as a default implementation
> > into common code, even if we start out by having all architectures
> > override it.
> 
> Agree. This is the RFC version. I didn't dare to post a patch with fixes
> for all architectures. :)

No need to change the other architectures. You can make it opt-in for
now and just put the code into a common location.
 
An interesting question however is what the transition plan is to
have the code shared between arm32 and arm64: We will certainly need
to share at least the dw-pcie and the generic SBSA compliant pci
implementation.

> > > +int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
> > > +{
> > > +	BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT);
> > > +
> > > +	return ioremap_page_range((unsigned long)PCI_IOBASE + offset,
> > > +				(unsigned long)PCI_IOBASE + offset + SZ_64K,
> > > +				phys_addr,
> > > +				__pgprot(PROT_DEVICE_nGnRE));
> > > +}
> > 
> > Not sure if we want to treat this one as architecture specific though.
> > It certainly won't be portable to x86, but it could be shared with
> > a couple of others. We may also want to redesign the interface.
> > I've been thinking we could make this function allocate space in the
> > Linux virtual I/O space aperture, and pass two resources into it
> > (physical I/O aperture and bus I/O range), and get the actual
> > io_offset as the return value, or a negative error number.
> 
> Not sure I completely follow your idea.

Something like this (coded in mail client, don't try to compile):

#define IO_SPACE_PAGES (IO_SPACE_LIMIT + 1) / PAGE_SIZE)
static DECLARE_BITMAP(pci_iospace, IO_SPACE_PAGES);
unsigned long pci_ioremap_io(const struct resource *bus, const struct resource phys)
{
	unsigned long start, len, virt_start;
	int error;

	/* use logical address == bus address if possible */
	start = bus->start / PAGE_SIZE;
	if (start > IO_SPACE_LIMIT / PAGE_SIZE)
		start = 0;

	/*
	 * try finding free space for the whole size first,
	 * fall back to 64K if not available
	 */
	len = min(resource_size(bus), resource_size(phys);
	start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
				start, len / PAGE_SIZE, 0);
	if (start == IO_SPACE_PAGES && len > SZ_64K)
		len = SZ_64K;
		start = 0;
		start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
				start, len / PAGE_SIZE, 0);
	}

	/* no 64K area found */
	if (start == IO_SPACE_PAGES)
		return -ENOMEM;

	/* ioremap physical aperture to virtual aperture */
	virt_start = start * PAGE_SIZE + (unsigned long)PCI_IOBASE;
	error = ioremap_page_range(virt_start, virt_start + len,
				    phys->start, __pgprot(PROT_DEVICE_nGnRE));
	if (error)
		return error;

	bitmap_set(start, len / PAGE_SIZE);

	/* return io_offset */
	return start * PAGE_SIZE - bus->start;
}
EXPORT_SYMBOL_GPL(pci_ioremap_io);

	Arnd
Liviu Dudau Feb. 3, 2014, 9:36 p.m. UTC | #4
On Mon, Feb 03, 2014 at 08:05:56PM +0000, Arnd Bergmann wrote:
> On Monday 03 February 2014 19:18:38 Liviu Dudau wrote:
> > On Mon, Feb 03, 2014 at 06:58:56PM +0000, Arnd Bergmann wrote:
> > > On Monday 03 February 2014 18:43:48 Liviu Dudau wrote:
> > > > diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
> > > > index 4cc813e..ce5bad2 100644
> > > > --- a/arch/arm64/include/asm/io.h
> > > > +++ b/arch/arm64/include/asm/io.h
> > > > @@ -120,9 +120,13 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
> > > >  /*
> > > >   *  I/O port access primitives.
> > > >   */
> > > > +#define arch_has_dev_port()    (0)
> > >
> > > Why not?
> >
> > Maybe I got it the wrong way around, but the comment in include/linux/io.h says:
> >
> > /*
> >  * Some systems do not have legacy ISA devices.
> >  * /dev/port is not a valid interface on these systems.
> >  * So for those archs, <asm/io.h> should define the following symbol.
> >  */
> >
> > So ... defining it should mean no legacy ISA devices, right?
>
> I would read that comment as referring to systems that don't have
> any I/O space. If you have PCI, you can by definition have ISA
> compatible devices behind a bridge. A typical example would be
> a VGA card that supports the 03c0-03df port range.

But if you have PCI and don't want to support ISA do you have /dev/port? I guess yes.
In that case ISA support is irrelevant here. Will update the value when I send v2.

>
> > >
> > > >  #define IO_SPACE_LIMIT         0xffff
> > >
> > > You probably want to increase this a bit, to allow multiple host bridges
> > > to have their own I/O space.
> >
> > OK, but to what size?
>
> 2 MB was a compromise on arm32 to allow up to 32 PCI host bridges but not
> take up too much virtual space. On arm64 it should be at least as big.
> Could be more than that, although I don't see a reason why it should be,
> unless we expect to see systems with tons of host bridges, or buses
> that exceed 64KB of I/O space.

I will increase the size to 2MB for v2.

>
> > > > +#define ioport_map(port, nr)   (PCI_IOBASE + ((port) & IO_SPACE_LIMIT))
> > > > +#define ioport_unmap(addr)
> > >
> > > inline functions?
> >
> > Will do, thanks!
>
> I suppose you can actually use the generic implementation from
> asm-generic/io.h, and fix it by using the definition you have
> above, since it's currently broken.

Not exactly broken, but it makes the assumption that your IO space starts at
physical address zero and you have not remapped it. It does guard the
definition with #ifndef CONFIG_GENERIC_IOMAP after all, so it does not
expect to be generic :)

>
> > > > diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
> > > > new file mode 100644
> > > > index 0000000..dd084a3
> > > > --- /dev/null
> > > > +++ b/arch/arm64/include/asm/pci.h
> > > > @@ -0,0 +1,35 @@
> > > > +#ifndef __ASM_PCI_H
> > > > +#define __ASM_PCI_H
> > > > +#ifdef __KERNEL__
> > > > +
> > > > +#include <linux/types.h>
> > > > +#include <linux/slab.h>
> > > > +#include <linux/dma-mapping.h>
> > > > +
> > > > +#include <asm/io.h>
> > > > +#include <asm-generic/pci-bridge.h>
> > > > +#include <asm-generic/pci-dma-compat.h>
> > > > +
> > > > +#define PCIBIOS_MIN_IO         0
> > > > +#define PCIBIOS_MIN_MEM                0
> > >
> > > PCIBIOS_MIN_IO is normally set to 0x1000, to stay out of the ISA range.
> >
> > :) No ISA support! (Die ISA, die!!)
>
> If only it were that easy.

Lets try! :)

I wonder how many active devices that have an ISA slot are still supported
by mainline kernel.

I will update PCIBIOS_MIN_xxxx to match arch/arm for v2.

>
> > > > diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
> > > > new file mode 100644
> > > > index 0000000..7b652cf
> > > > --- /dev/null
> > > > +++ b/arch/arm64/kernel/pci.c
> > > > @@ -0,0 +1,112 @@
> > >
> > > None of this looks really arm64 specific, nor should it be. I think
> > > we should try a little harder to move this as a default implementation
> > > into common code, even if we start out by having all architectures
> > > override it.
> >
> > Agree. This is the RFC version. I didn't dare to post a patch with fixes
> > for all architectures. :)
>
> No need to change the other architectures. You can make it opt-in for
> now and just put the code into a common location.
>
> An interesting question however is what the transition plan is to
> have the code shared between arm32 and arm64: We will certainly need
> to share at least the dw-pcie and the generic SBSA compliant pci
> implementation.

My vote would be for updating the host controllers to the new API and
to offer the CONFIG option to choose between arch APIs. The alternative
is to use the existing API to wrap the generic implementation.

My main concern with the existing API is the requirement to have a subsys_initcall
in your host bridge or mach code, due to the way the initialisation is done (you
need the DT code to probe your driver, but you cannot start the scanning of the
PCI bus until the arch code is initialised, so it gets deferred via
subsys_initcall when it calls pci_common_init). I bet that if one tries to
instantiate a Tegra PCI host bridge controller on a Marvell platform things will
break pretty quickly (random example here).

>
> > > > +int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
> > > > +{
> > > > +       BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT);
> > > > +
> > > > +       return ioremap_page_range((unsigned long)PCI_IOBASE + offset,
> > > > +                               (unsigned long)PCI_IOBASE + offset + SZ_64K,
> > > > +                               phys_addr,
> > > > +                               __pgprot(PROT_DEVICE_nGnRE));
> > > > +}
> > >
> > > Not sure if we want to treat this one as architecture specific though.
> > > It certainly won't be portable to x86, but it could be shared with
> > > a couple of others. We may also want to redesign the interface.
> > > I've been thinking we could make this function allocate space in the
> > > Linux virtual I/O space aperture, and pass two resources into it
> > > (physical I/O aperture and bus I/O range), and get the actual
> > > io_offset as the return value, or a negative error number.
> >
> > Not sure I completely follow your idea.
>
> Something like this (coded in mail client, don't try to compile):
>
> #define IO_SPACE_PAGES (IO_SPACE_LIMIT + 1) / PAGE_SIZE)
> static DECLARE_BITMAP(pci_iospace, IO_SPACE_PAGES);
> unsigned long pci_ioremap_io(const struct resource *bus, const struct resource phys)
> {
>       unsigned long start, len, virt_start;
>       int error;
>
>       /* use logical address == bus address if possible */
>       start = bus->start / PAGE_SIZE;
>       if (start > IO_SPACE_LIMIT / PAGE_SIZE)
>               start = 0;
>
>       /*
>        * try finding free space for the whole size first,
>        * fall back to 64K if not available
>        */
>       len = min(resource_size(bus), resource_size(phys);
>       start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
>                               start, len / PAGE_SIZE, 0);
>       if (start == IO_SPACE_PAGES && len > SZ_64K)
>               len = SZ_64K;
>               start = 0;
>               start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
>                               start, len / PAGE_SIZE, 0);
>       }
>
>       /* no 64K area found */
>       if (start == IO_SPACE_PAGES)
>               return -ENOMEM;
>
>       /* ioremap physical aperture to virtual aperture */
>       virt_start = start * PAGE_SIZE + (unsigned long)PCI_IOBASE;
>       error = ioremap_page_range(virt_start, virt_start + len,
>                                   phys->start, __pgprot(PROT_DEVICE_nGnRE));
>       if (error)
>               return error;
>
>       bitmap_set(start, len / PAGE_SIZE);
>
>       /* return io_offset */
>       return start * PAGE_SIZE - bus->start;
> }
> EXPORT_SYMBOL_GPL(pci_ioremap_io);
>
>       Arnd
>

I see. I need to think how this will change the existing code. Current users of pci_ioremap_io
ask for multiples of SZ_64K offsets regardless of the actual need.

Best regards,
Liviu


--
====================
| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---------------
    ¯\_(?)_/¯

-- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium.  Thank you.

ARM Limited, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ, Registered in England & Wales, Company No:  2557590
ARM Holdings plc, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ, Registered in England & Wales, Company No:  2548782
Andrew Murray Feb. 3, 2014, 10:34 p.m. UTC | #5
On 3 February 2014 18:43, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
> index 4cc813e..ce5bad2 100644
> --- a/arch/arm64/include/asm/io.h
> +++ b/arch/arm64/include/asm/io.h
> @@ -120,9 +120,13 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
>  /*
>   *  I/O port access primitives.
>   */
> +#define arch_has_dev_port()    (0)
>  #define IO_SPACE_LIMIT         0xffff
>  #define PCI_IOBASE             ((void __iomem *)(MODULES_VADDR - SZ_2M))
>
> +#define ioport_map(port, nr)   (PCI_IOBASE + ((port) & IO_SPACE_LIMIT))
> +#define ioport_unmap(addr)

I'm not sure that this will work. The in[bwl], out[bwl] macros in
arch/arm64/include/asm/io.h already add the PCI_IOBASE offset.

Instead of these two #defines, why not just enforce that
GENERIC_PCI_IOMAP is enabled? Or at least wrap these defines with 'if
(!config_enabled(CONFIG_GENERIC_PCI_IOMAP))' or similar.

> +
>  static inline u8 inb(unsigned long addr)
>  {
>         return readb(addr + PCI_IOBASE);


Andrew Murray
Rob Herring Feb. 3, 2014, 11:07 p.m. UTC | #6
On Mon, Feb 3, 2014 at 2:05 PM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Monday 03 February 2014 19:18:38 Liviu Dudau wrote:
>> On Mon, Feb 03, 2014 at 06:58:56PM +0000, Arnd Bergmann wrote:
>> > On Monday 03 February 2014 18:43:48 Liviu Dudau wrote:

[snip]

>> Agree. This is the RFC version. I didn't dare to post a patch with fixes
>> for all architectures. :)
>
> No need to change the other architectures. You can make it opt-in for
> now and just put the code into a common location.
>
> An interesting question however is what the transition plan is to
> have the code shared between arm32 and arm64: We will certainly need
> to share at least the dw-pcie and the generic SBSA compliant pci
> implementation.

You might want to re-read the SBSA. Unless ARM provides an IP block or
there is some other standard such as EHCI or AHCI, there is no generic
implementation. You only have to go look at the Linux EHCI or AHCI
drivers and see how meaningless and inadequate "use EHCI" is. For PCI,
the text is so brief in the SBSA there will be no way PCI is going to
just work given all the variations of root complexes, bridges, address
windows, etc. we typically see on ARM platforms. I could be wrong and
some AML magic will solve all the problems. :)

Rob
Jason Gunthorpe Feb. 3, 2014, 11:31 p.m. UTC | #7
On Mon, Feb 03, 2014 at 05:07:48PM -0600, Rob Herring wrote:

> > An interesting question however is what the transition plan is to
> > have the code shared between arm32 and arm64: We will certainly need
> > to share at least the dw-pcie and the generic SBSA compliant pci
> > implementation.
> 
> You might want to re-read the SBSA. Unless ARM provides an IP block or
> there is some other standard such as EHCI or AHCI, there is no generic
> implementation. You only have to go look at the Linux EHCI or AHCI
> drivers and see how meaningless and inadequate "use EHCI" is. For PCI,
> the text is so brief in the SBSA there will be no way PCI is going to
> just work given all the variations of root complexes, bridges, address
> windows, etc. we typically see on ARM platforms. I could be wrong and
> some AML magic will solve all the problems. :)

The biggest hinderance I've seen while looking at ARM PCI drivers is
quite simply - they don't follow the PCI-E spec. There is a spec, and
it is not followed.

This fixup in the X-Gene is a solid example, the purpose of the BAR in
a PCI-PCI bridge is very clear: using it to specify the system
DRAM aperature is completely wrong.

Not having working aperture windows in the root complex's bridges is
completely wrong.

Lacking any ability to generate 8 and 16 bit config write TLP's is
wrong.

Starting with an end-port PCI-E core and re-tasking it to be a root
port bridge and ignoring all the requirements with the bridge's config
space is utterly and completely WRONG.

Specifying 'use EHCI, AHCI, etc' - which are all PCI based standards
without clearly specifying exactly how PCI is suppose to work is
completely bonkers.

What is needed is a spec that says:
 1) Here is how you generate config TLPs. A MMIO region that
    conforms to the already specified x86 ECAM would
    be perfect
 2) Here is a dword by dword break down of the entire config space in
    a SOC. Here is where a on-board AHCI controller must show up in
    config space. Here is how an external PCI-E port must show
    up. Etc. Most of this is already specified, but it clearly needs
    to be layed out explicitly for ARM SOCs to actually follow it.
 3) Here is how you specify the aperture(s) associated with PCI BAR's
    and bridge windows in config space. And yes: The CONFIG SPACE
    BARS MUST WORK.
 4) Here is how MSI works, these are the values you put in the
    address/data and here is how you collect the interrupt.
 5) Here is how Legacy INTx must be mapped into the GIC.

This is what x86 does, and they have been doing it well for 10
years. If you want to play in the server game you have to properly
implement PCI.

Jason
Arnd Bergmann Feb. 4, 2014, 8:44 a.m. UTC | #8
On Monday 03 February 2014 21:36:58 Liviu Dudau wrote:
> On Mon, Feb 03, 2014 at 08:05:56PM +0000, Arnd Bergmann wrote:
> > On Monday 03 February 2014 19:18:38 Liviu Dudau wrote:
> > > So ... defining it should mean no legacy ISA devices, right?
> > 
> > I would read that comment as referring to systems that don't have
> > any I/O space. If you have PCI, you can by definition have ISA
> > compatible devices behind a bridge. A typical example would be
> > a VGA card that supports the 03c0-03df port range.
> 
> But if you have PCI and don't want to support ISA do you have /dev/port? I guess yes.

Right, that is my interpretation. You could still have a tool
that tries to poke /dev/port from user space for any I/O BAR
the same way you'd use /dev/mem for memory BARs of PCI devices.

It's discouraged, but it's often the easiest solution for
a quick hack, and I'd expect tools to use this.

> > > > >  #define IO_SPACE_LIMIT		0xffff
> > > > 
> > > > You probably want to increase this a bit, to allow multiple host bridges
> > > > to have their own I/O space.
> > > 
> > > OK, but to what size?
> > 
> > 2 MB was a compromise on arm32 to allow up to 32 PCI host bridges but not
> > take up too much virtual space. On arm64 it should be at least as big.
> > Could be more than that, although I don't see a reason why it should be,
> > unless we expect to see systems with tons of host bridges, or buses
> > that exceed 64KB of I/O space.
> 
> I will increase the size to 2MB for v2.

Thinking about this some more, I'd go a little higher. In case of
pci_mv, we already register a 1MB region for one logical host
(which has multiple I/O spaces behind an emulated bridge), so
going to 16MB or more would let us handle multiple 1MB windows
for some amount of future proofing.

Maybe Catalin can assign us some virtual address space to use,
with the constraints that it should be 16MB or more in an
area that doesn't require additional kernel page table pages.
 
> > > > > +#define ioport_map(port, nr)	(PCI_IOBASE + ((port) & IO_SPACE_LIMIT))
> > > > > +#define ioport_unmap(addr)
> > > > 
> > > > inline functions?
> > > 
> > > Will do, thanks!
> > 
> > I suppose you can actually use the generic implementation from
> > asm-generic/io.h, and fix it by using the definition you have
> > above, since it's currently broken.
> 
> Not exactly broken, but it makes the assumption that your IO space starts at
> physical address zero and you have not remapped it. It does guard the
> definition with #ifndef CONFIG_GENERIC_IOMAP after all, so it does not
> expect to be generic :)

Well, I/O space never starts at physical zero in reality, so it is
broken in practice. The CONFIG_GENERIC_IOMAP option tries to solve
the problem of I/O spaces that are not memory mapped, which is
actually quite rare (x86, ia64, some rare powerpc bridges, and possibly
Alpha). The norm is that if you have I/O space, it is memory mapped
and you don't need GENERIC_IOMAP. I think most of the architectures
selecting GENERIC_IOMAP have incorrectly copied that from x86.

> > > > > diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
> > > > > new file mode 100644
> > > > > index 0000000..dd084a3
> > > > > --- /dev/null
> > > > > +++ b/arch/arm64/include/asm/pci.h
> > > > > @@ -0,0 +1,35 @@
> > > > > +#ifndef __ASM_PCI_H
> > > > > +#define __ASM_PCI_H
> > > > > +#ifdef __KERNEL__
> > > > > +
> > > > > +#include <linux/types.h>
> > > > > +#include <linux/slab.h>
> > > > > +#include <linux/dma-mapping.h>
> > > > > +
> > > > > +#include <asm/io.h>
> > > > > +#include <asm-generic/pci-bridge.h>
> > > > > +#include <asm-generic/pci-dma-compat.h>
> > > > > +
> > > > > +#define PCIBIOS_MIN_IO		0
> > > > > +#define PCIBIOS_MIN_MEM		0
> > > > 
> > > > PCIBIOS_MIN_IO is normally set to 0x1000, to stay out of the ISA range.
> > > 
> > > :) No ISA support! (Die ISA, die!!) 
> > 
> > If only it were that easy.
> 
> Lets try! :)
> 
> I wonder how many active devices that have an ISA slot are still supported
> by mainline kernel.

This is missing the point, but any architecture that has a PCI
slot can have an ISA device behind a bridge like this:

http://www.altera.com/products/ip/iup/pci/m-eur-pci-to-isa.html

The real reason is that a lot of PCI cards for practical reasons
expose some non-relocatable memory and I/O BARs in ISA-compatible
locations. Looking at /proc/ioports on my PC, I can spot a couple
of things that may well show up on any ARM machine:

0000-03af : PCI Bus 0000:00
  02f8-02ff : serial
03c0-03df : PCI Bus 0000:40
  03c0-03df : PCI Bus 0000:00
    03c0-03df : vga+
03e0-0cf7 : PCI Bus 0000:00
  03e8-03ef : serial
  03f8-03ff : serial
  0b00-0b0f : pnp 00:08
    0b00-0b07 : piix4_smbus
  0b20-0b3f : pnp 00:08
    0b20-0b27 : piix4_smbus
  0ca2-0ca3 : pnp 00:08
    0ca2-0ca2 : ipmi_si
    0ca3-0ca3 : ipmi_si
  0cf8-0cff : PCI conf1

Nothing wrong with the above. Now, it's also possible that
someone decides to build an ARM server by using a PC south
bridge with integrated legacy PC peripherals, such as these:

0000-03af : PCI Bus 0000:00
  0000-001f : dma1
  0020-0021 : pic1
  0040-0043 : timer0
  0050-0053 : timer1
  0060-0060 : keyboard
  0064-0064 : keyboard
  0070-0071 : rtc0
  0080-008f : dma page reg
  00a0-00a1 : pic2
  00b0-00b0 : APEI ERST
  00c0-00df : dma2
  00f0-00ff : fpu

There is some hope that it won't happen, but these things
exist and come with a regular PCIe front-end bus in some
cases.

Finally, there is the LPC bus, which can give you additional
ISAPnP compatible devices.

> I will update PCIBIOS_MIN_xxxx to match arch/arm for v2.

Ok.

> > > > > diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
> > > > > new file mode 100644
> > > > > index 0000000..7b652cf
> > > > > --- /dev/null
> > > > > +++ b/arch/arm64/kernel/pci.c
> > > > > @@ -0,0 +1,112 @@
> > > > 
> > > > None of this looks really arm64 specific, nor should it be. I think
> > > > we should try a little harder to move this as a default implementation
> > > > into common code, even if we start out by having all architectures
> > > > override it.
> > > 
> > > Agree. This is the RFC version. I didn't dare to post a patch with fixes
> > > for all architectures. :)
> > 
> > No need to change the other architectures. You can make it opt-in for
> > now and just put the code into a common location.
> >  
> > An interesting question however is what the transition plan is to
> > have the code shared between arm32 and arm64: We will certainly need
> > to share at least the dw-pcie and the generic SBSA compliant pci
> > implementation.
> 
> My vote would be for updating the host controllers to the new API and
> to offer the CONFIG option to choose between arch APIs. The alternative
> is to use the existing API to wrap the generic implementation.

The problem with an either/or CONFIG option is that it breaks
multiplatform support. A lot of the arm32 PCI implementations
are only used on platforms that are not multiplatform enabled
though, so we could get away by requiring all multiplatform
configurations to use the new API.

> My main concern with the existing API is the requirement to have a subsys_initcall
> in your host bridge or mach code, due to the way the initialisation is done (you
> need the DT code to probe your driver, but you cannot start the scanning of the 
> PCI bus until the arch code is initialised, so it gets deferred via 
> subsys_initcall when it calls pci_common_init). I bet that if one tries to
> instantiate a Tegra PCI host bridge controller on a Marvell platform things will
> break pretty quickly (random example here).

I'm not following here. All the new host controller drivers should
be platform drivers that only bind to the host devices in DT
that are present. Both mvebu and tegra use a normal "module_platform_driver"
for initialization, not a "subsys_initcall".

> > Something like this (coded in mail client, don't try to compile):
> > 
> > #define IO_SPACE_PAGES (IO_SPACE_LIMIT + 1) / PAGE_SIZE)
> > static DECLARE_BITMAP(pci_iospace, IO_SPACE_PAGES);
> > unsigned long pci_ioremap_io(const struct resource *bus, const struct resource phys)
> > {
> > 	unsigned long start, len, virt_start;
> > 	int error;
> > 
> > 	/* use logical address == bus address if possible */
> > 	start = bus->start / PAGE_SIZE;
> > 	if (start > IO_SPACE_LIMIT / PAGE_SIZE)
> > 		start = 0;
> > 
> > 	/*
> > 	 * try finding free space for the whole size first,
> > 	 * fall back to 64K if not available
> > 	 */
> > 	len = min(resource_size(bus), resource_size(phys);
> > 	start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
> > 				start, len / PAGE_SIZE, 0);
> > 	if (start == IO_SPACE_PAGES && len > SZ_64K)
> > 		len = SZ_64K;
> > 		start = 0;
> > 		start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
> > 				start, len / PAGE_SIZE, 0);
> > 	}
> > 
> > 	/* no 64K area found */
> > 	if (start == IO_SPACE_PAGES)
> > 		return -ENOMEM;
> > 
> > 	/* ioremap physical aperture to virtual aperture */
> > 	virt_start = start * PAGE_SIZE + (unsigned long)PCI_IOBASE;
> > 	error = ioremap_page_range(virt_start, virt_start + len,
> > 				    phys->start, __pgprot(PROT_DEVICE_nGnRE));
> > 	if (error)
> > 		return error;
> > 
> > 	bitmap_set(start, len / PAGE_SIZE);
> > 
> > 	/* return io_offset */
> > 	return start * PAGE_SIZE - bus->start;
> > }
> > EXPORT_SYMBOL_GPL(pci_ioremap_io);
> > 
> > 	Arnd
> > 
> 
> I see. I need to think how this will change the existing code. Current users
> of pci_ioremap_io  ask for multiples of SZ_64K offsets regardless of the
> actual need. 

Right. I guess we can support both interfaces on ARM32 for the forseeable
future (renaming the new one) and just change the existing implementation
to update the bitmap. Any cross-platform host controller driver would
have to use the new interface however.

	Arnd
Arnd Bergmann Feb. 4, 2014, 9:01 a.m. UTC | #9
On Monday 03 February 2014 17:07:48 Rob Herring wrote:
> On Mon, Feb 3, 2014 at 2:05 PM, Arnd Bergmann <arnd@arndb.de> wrote:
>
> You might want to re-read the SBSA. Unless ARM provides an IP block or
> there is some other standard such as EHCI or AHCI, there is no generic
> implementation. You only have to go look at the Linux EHCI or AHCI
> drivers and see how meaningless and inadequate "use EHCI" is. For PCI,
> the text is so brief in the SBSA there will be no way PCI is going to
> just work given all the variations of root complexes, bridges, address
> windows, etc. we typically see on ARM platforms. I could be wrong and
> some AML magic will solve all the problems. 

I don't think you need any AML, and SBSA seems to cover the PCI case
just fine, though I have to agree that the EHCI/AHCI/xHCI part is
rather ambiguous. What the existing PCI host controller drivers do is
essentially:

1. provide a config space access method
2. set up the I/O and memory address windows
3. set up clocks, PHYs etc
4. work around any deviations from the PCI standard
5. provide an MSI/MSI-X controller

For all I can tell, any SBSA compliant system should handle
those four like this:

1. config space is ECAM compliant, we only need to pass the
   location in DT. (SBSA 8.1)
2. all address windows are set up by the boot loader, we only
   need to know the location (IMHO this should be the
   preferred way to do things regardless of SBSA).
3. any external hardware dependencies are set up statically
   by the boot loader and are operational as we enter the
   kernel.
4. deviations from PCI are not allowed (SBSA 8.8)
5. MSI has to be handled by GICv3 (SBSA 8.3.2)

So I definitely expect SBSA compliant systems to work fine with a
very simple generic PCI host bridge driver (which is likely what
Liviu has implemented and waiting for approval to submit).
The more important question is what systems will actually be
compliant with the above. X-Gene manages to get all five wrong,
for instance, and so would any system that reuses the existing
PCI hardware (alphabetically: exynos, imx6, mvebu, rcar-gen2,
tegra, designware), although points 2 and 3 are a question of
the boot loader, and it's possible that the designware based ones
get point 4 right.

	Arnd
Arnd Bergmann Feb. 4, 2014, 9:44 a.m. UTC | #10
On Monday 03 February 2014 16:31:37 Jason Gunthorpe wrote:
> Specifying 'use EHCI, AHCI, etc' - which are all PCI based standards
> without clearly specifying exactly how PCI is suppose to work is
> completely bonkers.
> 
> What is needed is a spec that says:
>  1) Here is how you generate config TLPs. A MMIO region that
>     conforms to the already specified x86 ECAM would
>     be perfect
>  2) Here is a dword by dword break down of the entire config space in
>     a SOC. Here is where a on-board AHCI controller must show up in
>     config space. Here is how an external PCI-E port must show
>     up. Etc. Most of this is already specified, but it clearly needs
>     to be layed out explicitly for ARM SOCs to actually follow it.
>  3) Here is how you specify the aperture(s) associated with PCI BAR's
>     and bridge windows in config space. And yes: The CONFIG SPACE
>     BARS MUST WORK.
>  4) Here is how MSI works, these are the values you put in the
>     address/data and here is how you collect the interrupt.
>  5) Here is how Legacy INTx must be mapped into the GIC.
> 
> This is what x86 does, and they have been doing it well for 10
> years. If you want to play in the server game you have to properly
> implement PCI.

I'm pretty sure the authors of the SBSA actually thought that was
what they wrote, by referring to external specifications (pci-3.0,
ehci, ahci, ...).  However, it seems they were either foolish enough
to believe that hardware designers would follow these specs, or
they were intentionally misled and got talked into putting ambiguous
terminology in because there were already hardware designs that
are not exactly in line with the spirit of the SBSA but can be
argued to be conforming to the text for a lax interpretation.

I think EHCI is a much better example than PCI here, because the
spec has exactly one line to say about it, where it spends a whole
chapter on PCI.

Here is how a sane person would read SBSA to create a compliant
implementation:

  I have to use EHCI version 1.1 to provide USB-2.0 support. EHCI
  is a PCI device, so I'll put it behind a PCIe port that complies
  to the PCIe section of the SBSA. Since EHCI by itself only provides
  high-speed USB, and USB-2.0 mandates I provide low-speed and
  full-speed as well, I have to add a USB hub device. It would have
  been easier to just use OHCI for these, but SBSA says I can't.
  Now I want to integrate the EHCI into my SoC and not waste one
  of my precious PCIe root ports, so I have to create another PCI
  domain with its own ECAM compliant config space to put it into.
  Fortunately SBSA lets me add an arbitrary number of PCI domains,
  as long as they are all strictly compliant. To software it will
  look exactly as if it was on an external card, I just have to
  ensure the boot loader correctly sets up the clocks and the phy
  before an SBSA compliant OS gets loaded, all runtime power
  management will get handled through the EHCI-1.1 energy-efficiency
  extensions.

Here is how a crazy person would read the same sentence in the SBSA:

  I have an IP block that implements the EHCI register set, that
  should be good enough. It's not a fast device, so I can put it
  on a non-coherent bus. Since the SoC will be used for networking,
  I'll put the registers into big-endian configuration to make it
  easier for the OS to access. I'm not allowed to have USB-1.1
  according to SBSA, so I can get away without a hub or an extra
  OHCI. I can't support MSI because it's not a PCI device, and
  the GIC is pretty full, so I'll just connect the IRQ line to
  the GPIO controller. In order to do better power management,
  I'll design a fancy PHY that the device driver will manage
  for implementing autosuspend. I should also give the OS
  fine-grained control over the clocks, but it will have to share
  the clock domain with the other devices on the same edge of the
  chip. The EHCI device is not part of PCI, which measn I don't
  have to use the standard SMMU. However, my EHCI implementation
  can only do 32-bit DMA, and I'll have to design my own IOMMU
  to let it access the entire memory range. USB-OTG is a great
  feature and we already paid for having this in our EHCI
  implementation, better make sure it comes up in endpoint mode
  after waking up from power saving.

	Arnd
Liviu Dudau Feb. 4, 2014, 11:09 a.m. UTC | #11
On Tue, Feb 04, 2014 at 08:44:36AM +0000, Arnd Bergmann wrote:
> On Monday 03 February 2014 21:36:58 Liviu Dudau wrote:
> > On Mon, Feb 03, 2014 at 08:05:56PM +0000, Arnd Bergmann wrote:
> > > On Monday 03 February 2014 19:18:38 Liviu Dudau wrote:
> > > > So ... defining it should mean no legacy ISA devices, right?
> > >
> > > I would read that comment as referring to systems that don't have
> > > any I/O space. If you have PCI, you can by definition have ISA
> > > compatible devices behind a bridge. A typical example would be
> > > a VGA card that supports the 03c0-03df port range.
> >
> > But if you have PCI and don't want to support ISA do you have /dev/port? I guess yes.
>
> Right, that is my interpretation. You could still have a tool
> that tries to poke /dev/port from user space for any I/O BAR
> the same way you'd use /dev/mem for memory BARs of PCI devices.
>
> It's discouraged, but it's often the easiest solution for
> a quick hack, and I'd expect tools to use this.
>
> > > > > >  #define IO_SPACE_LIMIT             0xffff
> > > > >
> > > > > You probably want to increase this a bit, to allow multiple host bridges
> > > > > to have their own I/O space.
> > > >
> > > > OK, but to what size?
> > >
> > > 2 MB was a compromise on arm32 to allow up to 32 PCI host bridges but not
> > > take up too much virtual space. On arm64 it should be at least as big.
> > > Could be more than that, although I don't see a reason why it should be,
> > > unless we expect to see systems with tons of host bridges, or buses
> > > that exceed 64KB of I/O space.
> >
> > I will increase the size to 2MB for v2.
>
> Thinking about this some more, I'd go a little higher. In case of
> pci_mv, we already register a 1MB region for one logical host
> (which has multiple I/O spaces behind an emulated bridge), so
> going to 16MB or more would let us handle multiple 1MB windows
> for some amount of future proofing.
>
> Maybe Catalin can assign us some virtual address space to use,
> with the constraints that it should be 16MB or more in an
> area that doesn't require additional kernel page table pages.

I'll pick Catalin's brain for suggestions.

>
> > > > > > +#define ioport_map(port, nr)       (PCI_IOBASE + ((port) & IO_SPACE_LIMIT))
> > > > > > +#define ioport_unmap(addr)
> > > > >
> > > > > inline functions?
> > > >
> > > > Will do, thanks!
> > >
> > > I suppose you can actually use the generic implementation from
> > > asm-generic/io.h, and fix it by using the definition you have
> > > above, since it's currently broken.
> >
> > Not exactly broken, but it makes the assumption that your IO space starts at
> > physical address zero and you have not remapped it. It does guard the
> > definition with #ifndef CONFIG_GENERIC_IOMAP after all, so it does not
> > expect to be generic :)
>
> Well, I/O space never starts at physical zero in reality, so it is
> broken in practice. The CONFIG_GENERIC_IOMAP option tries to solve
> the problem of I/O spaces that are not memory mapped, which is
> actually quite rare (x86, ia64, some rare powerpc bridges, and possibly
> Alpha). The norm is that if you have I/O space, it is memory mapped
> and you don't need GENERIC_IOMAP. I think most of the architectures
> selecting GENERIC_IOMAP have incorrectly copied that from x86.

If you are talking about CPU addresses for I/O space, then you are (mostly?) right.
I've seen some code in powerpc that tries to handle the case where I/O starts at zero.

For MMIO, yes, it would be crazy to start at CPU address zero. But, the ioport_map
takes a port number, and those do start at zero, right?

>
> > > > > > diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
> > > > > > new file mode 100644
> > > > > > index 0000000..dd084a3
> > > > > > --- /dev/null
> > > > > > +++ b/arch/arm64/include/asm/pci.h
> > > > > > @@ -0,0 +1,35 @@
> > > > > > +#ifndef __ASM_PCI_H
> > > > > > +#define __ASM_PCI_H
> > > > > > +#ifdef __KERNEL__
> > > > > > +
> > > > > > +#include <linux/types.h>
> > > > > > +#include <linux/slab.h>
> > > > > > +#include <linux/dma-mapping.h>
> > > > > > +
> > > > > > +#include <asm/io.h>
> > > > > > +#include <asm-generic/pci-bridge.h>
> > > > > > +#include <asm-generic/pci-dma-compat.h>
> > > > > > +
> > > > > > +#define PCIBIOS_MIN_IO             0
> > > > > > +#define PCIBIOS_MIN_MEM            0
> > > > >
> > > > > PCIBIOS_MIN_IO is normally set to 0x1000, to stay out of the ISA range.
> > > >
> > > > :) No ISA support! (Die ISA, die!!)
> > >
> > > If only it were that easy.
> >
> > Lets try! :)
> >
> > I wonder how many active devices that have an ISA slot are still supported
> > by mainline kernel.
>
> This is missing the point, but any architecture that has a PCI
> slot can have an ISA device behind a bridge like this:
>
> http://www.altera.com/products/ip/iup/pci/m-eur-pci-to-isa.html
>
> The real reason is that a lot of PCI cards for practical reasons
> expose some non-relocatable memory and I/O BARs in ISA-compatible
> locations. Looking at /proc/ioports on my PC, I can spot a couple
> of things that may well show up on any ARM machine:
>
> 0000-03af : PCI Bus 0000:00
>   02f8-02ff : serial
> 03c0-03df : PCI Bus 0000:40
>   03c0-03df : PCI Bus 0000:00
>     03c0-03df : vga+
> 03e0-0cf7 : PCI Bus 0000:00
>   03e8-03ef : serial
>   03f8-03ff : serial
>   0b00-0b0f : pnp 00:08
>     0b00-0b07 : piix4_smbus
>   0b20-0b3f : pnp 00:08
>     0b20-0b27 : piix4_smbus
>   0ca2-0ca3 : pnp 00:08
>     0ca2-0ca2 : ipmi_si
>     0ca3-0ca3 : ipmi_si
>   0cf8-0cff : PCI conf1
>
> Nothing wrong with the above. Now, it's also possible that
> someone decides to build an ARM server by using a PC south
> bridge with integrated legacy PC peripherals, such as these:
>
> 0000-03af : PCI Bus 0000:00
>   0000-001f : dma1
>   0020-0021 : pic1
>   0040-0043 : timer0
>   0050-0053 : timer1
>   0060-0060 : keyboard
>   0064-0064 : keyboard
>   0070-0071 : rtc0
>   0080-008f : dma page reg
>   00a0-00a1 : pic2
>   00b0-00b0 : APEI ERST
>   00c0-00df : dma2
>   00f0-00ff : fpu
>
> There is some hope that it won't happen, but these things
> exist and come with a regular PCIe front-end bus in some
> cases.
>
> Finally, there is the LPC bus, which can give you additional
> ISAPnP compatible devices.
>
> > I will update PCIBIOS_MIN_xxxx to match arch/arm for v2.
>
> Ok.
>
> > > > > > diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
> > > > > > new file mode 100644
> > > > > > index 0000000..7b652cf
> > > > > > --- /dev/null
> > > > > > +++ b/arch/arm64/kernel/pci.c
> > > > > > @@ -0,0 +1,112 @@
> > > > >
> > > > > None of this looks really arm64 specific, nor should it be. I think
> > > > > we should try a little harder to move this as a default implementation
> > > > > into common code, even if we start out by having all architectures
> > > > > override it.
> > > >
> > > > Agree. This is the RFC version. I didn't dare to post a patch with fixes
> > > > for all architectures. :)
> > >
> > > No need to change the other architectures. You can make it opt-in for
> > > now and just put the code into a common location.
> > >
> > > An interesting question however is what the transition plan is to
> > > have the code shared between arm32 and arm64: We will certainly need
> > > to share at least the dw-pcie and the generic SBSA compliant pci
> > > implementation.
> >
> > My vote would be for updating the host controllers to the new API and
> > to offer the CONFIG option to choose between arch APIs. The alternative
> > is to use the existing API to wrap the generic implementation.
>
> The problem with an either/or CONFIG option is that it breaks
> multiplatform support. A lot of the arm32 PCI implementations
> are only used on platforms that are not multiplatform enabled
> though, so we could get away by requiring all multiplatform
> configurations to use the new API.

I was thinking more in terms of catching uses of the old API in the new host bridge
driver. The added functions should be able to live beside the old API for the
generic PCI framework, not sure how arm arch code should handle it.

>
> > My main concern with the existing API is the requirement to have a subsys_initcall
> > in your host bridge or mach code, due to the way the initialisation is done (you
> > need the DT code to probe your driver, but you cannot start the scanning of the
> > PCI bus until the arch code is initialised, so it gets deferred via
> > subsys_initcall when it calls pci_common_init). I bet that if one tries to
> > instantiate a Tegra PCI host bridge controller on a Marvell platform things will
> > break pretty quickly (random example here).
>
> I'm not following here. All the new host controller drivers should
> be platform drivers that only bind to the host devices in DT
> that are present. Both mvebu and tegra use a normal "module_platform_driver"
> for initialization, not a "subsys_initcall".

I was actually looking at mach-dove, I thought that was Marvell as well.

But both mvebu and tegra call pci_common_init_dev. The busnr gets assigned based on
the registration order. I wonder if any of the host bridge code copes with having
assigned a bus number other than zero for its "root bus".

>
> > > Something like this (coded in mail client, don't try to compile):
> > >
> > > #define IO_SPACE_PAGES (IO_SPACE_LIMIT + 1) / PAGE_SIZE)
> > > static DECLARE_BITMAP(pci_iospace, IO_SPACE_PAGES);
> > > unsigned long pci_ioremap_io(const struct resource *bus, const struct resource phys)
> > > {
> > >   unsigned long start, len, virt_start;
> > >   int error;
> > >
> > >   /* use logical address == bus address if possible */
> > >   start = bus->start / PAGE_SIZE;
> > >   if (start > IO_SPACE_LIMIT / PAGE_SIZE)
> > >           start = 0;
> > >
> > >   /*
> > >    * try finding free space for the whole size first,
> > >    * fall back to 64K if not available
> > >    */
> > >   len = min(resource_size(bus), resource_size(phys);
> > >   start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
> > >                           start, len / PAGE_SIZE, 0);
> > >   if (start == IO_SPACE_PAGES && len > SZ_64K)
> > >           len = SZ_64K;
> > >           start = 0;
> > >           start = bitmap_find_next_zero_area(pci_iospace, IO_SPACE_PAGES,
> > >                           start, len / PAGE_SIZE, 0);
> > >   }
> > >
> > >   /* no 64K area found */
> > >   if (start == IO_SPACE_PAGES)
> > >           return -ENOMEM;
> > >
> > >   /* ioremap physical aperture to virtual aperture */
> > >   virt_start = start * PAGE_SIZE + (unsigned long)PCI_IOBASE;
> > >   error = ioremap_page_range(virt_start, virt_start + len,
> > >                               phys->start, __pgprot(PROT_DEVICE_nGnRE));
> > >   if (error)
> > >           return error;
> > >
> > >   bitmap_set(start, len / PAGE_SIZE);
> > >
> > >   /* return io_offset */
> > >   return start * PAGE_SIZE - bus->start;
> > > }
> > > EXPORT_SYMBOL_GPL(pci_ioremap_io);
> > >
> > >   Arnd
> > >
> >
> > I see. I need to think how this will change the existing code. Current users
> > of pci_ioremap_io  ask for multiples of SZ_64K offsets regardless of the
> > actual need.
>
> Right. I guess we can support both interfaces on ARM32 for the forseeable
> future (renaming the new one) and just change the existing implementation
> to update the bitmap. Any cross-platform host controller driver would
> have to use the new interface however.
>
>       Arnd

OK, I can try to add the function to my patch. Call it pci_ioremap_iores?

Best regards,
Liviu

--
====================
| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---------------
    ¯\_(?)_/¯

-- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium.  Thank you.

ARM Limited, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ, Registered in England & Wales, Company No:  2557590
ARM Holdings plc, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ, Registered in England & Wales, Company No:  2548782
Arnd Bergmann Feb. 4, 2014, 11:54 a.m. UTC | #12
On Tuesday 04 February 2014 11:09:22 Liviu Dudau wrote:
> On Tue, Feb 04, 2014 at 08:44:36AM +0000, Arnd Bergmann wrote:

> > Well, I/O space never starts at physical zero in reality, so it is
> > broken in practice. The CONFIG_GENERIC_IOMAP option tries to solve
> > the problem of I/O spaces that are not memory mapped, which is
> > actually quite rare (x86, ia64, some rare powerpc bridges, and possibly
> > Alpha). The norm is that if you have I/O space, it is memory mapped
> > and you don't need GENERIC_IOMAP. I think most of the architectures
> > selecting GENERIC_IOMAP have incorrectly copied that from x86.
> 
> If you are talking about CPU addresses for I/O space, then you are (mostly?) right.
> I've seen some code in powerpc that tries to handle the case where I/O starts at zero.
>
> For MMIO, yes, it would be crazy to start at CPU address zero. But,
> the ioport_map takes a port number, and those do start at zero, right?

What I meant is that asm-generic/io.h assumes that the I/O ports are
mapped at /virtual/ address zero, which is even more crazy, since that
is normally in user space. Sorry for confusing it with physical address
zero.

Now the GENERIC_IOMAP uses a similar fiction by defining that virtual
address token 0x10000-0x1ffff are used to access I/O space when calling
inb/outb, but that is something you only need to do when you have
no memory mapped I/O port.

Some older ARM platforms (PXA for instance) also defined the I/O space
to start at virtual address zero, and use a per-bus io_offset that was
equal to the ioremapped I/O window. This actually works, but it means
that the logical port numbers are all high, and you have to set
IO_SPACE_LIMIT to ULONG_MAX, and it breaks horribly for any driver that
tries to store a port number in a type that is shorter than 'unsigned
long'. We definitely don't want to do this for new code.

> > > My main concern with the existing API is the requirement to have a subsys_initcall
> > > in your host bridge or mach code, due to the way the initialisation is done (you
> > > need the DT code to probe your driver, but you cannot start the scanning of the
> > > PCI bus until the arch code is initialised, so it gets deferred via
> > > subsys_initcall when it calls pci_common_init). I bet that if one tries to
> > > instantiate a Tegra PCI host bridge controller on a Marvell platform things will
> > > break pretty quickly (random example here).
> >
> > I'm not following here. All the new host controller drivers should
> > be platform drivers that only bind to the host devices in DT
> > that are present. Both mvebu and tegra use a normal "module_platform_driver"
> > for initialization, not a "subsys_initcall".
> 
> I was actually looking at mach-dove, I thought that was Marvell as well.

mach-dove is going away soon, it will get merged into mach-mvebu and
then use drivers/pci/host/pci-mvebu.c
 
> But both mvebu and tegra call pci_common_init_dev. The busnr gets assigned based on
> the registration order. I wonder if any of the host bridge code copes with having
> assigned a bus number other than zero for its "root bus".

I think all "new" host bridges now use nr_controllers=1, which means
that you always start at but number zero and use PCI domain if
you have multiple independent root bridges.

> >
> > Right. I guess we can support both interfaces on ARM32 for the forseeable
> > future (renaming the new one) and just change the existing implementation
> > to update the bitmap. Any cross-platform host controller driver would
> > have to use the new interface however.
> 
> OK, I can try to add the function to my patch. Call it pci_ioremap_iores?

Sounds ok, I can't think of anything better at least.

	Arnd
Liviu Dudau Feb. 4, 2014, 12:29 p.m. UTC | #13
On Mon, Feb 03, 2014 at 10:34:40PM +0000, Andrew Murray wrote:
> On 3 February 2014 18:43, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
> > index 4cc813e..ce5bad2 100644
> > --- a/arch/arm64/include/asm/io.h
> > +++ b/arch/arm64/include/asm/io.h
> > @@ -120,9 +120,13 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
> >  /*
> >   *  I/O port access primitives.
> >   */
> > +#define arch_has_dev_port()    (0)
> >  #define IO_SPACE_LIMIT         0xffff
> >  #define PCI_IOBASE             ((void __iomem *)(MODULES_VADDR - SZ_2M))
> >
> > +#define ioport_map(port, nr)   (PCI_IOBASE + ((port) & IO_SPACE_LIMIT))
> > +#define ioport_unmap(addr)
>
> I'm not sure that this will work. The in[bwl], out[bwl] macros in
> arch/arm64/include/asm/io.h already add the PCI_IOBASE offset.
>
> Instead of these two #defines, why not just enforce that
> GENERIC_PCI_IOMAP is enabled? Or at least wrap these defines with 'if
> (!config_enabled(CONFIG_GENERIC_PCI_IOMAP))' or similar.

GENERIC_PCI_IOMAP *is* enabled for AArch64. We have select GENERIC_IOMAP in
arch/arm64/Kconfig which in turn selects GENERIC_PCI_IOMAP in lib/Kconfig.

Best regards,
Liviu

>
> > +
> >  static inline u8 inb(unsigned long addr)
> >  {
> >         return readb(addr + PCI_IOBASE);
>
>
> Andrew Murray
>

--
====================
| I would like to |
| fix the world,  |
| but they're not |
| giving me the   |
 \ source code!  /
  ---------------
    ¯\_(?)_/¯

-- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium.  Thank you.

ARM Limited, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ, Registered in England & Wales, Company No:  2557590
ARM Holdings plc, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ, Registered in England & Wales, Company No:  2548782
Andrew Murray Feb. 4, 2014, 1:23 p.m. UTC | #14
On 4 February 2014 12:29, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> On Mon, Feb 03, 2014 at 10:34:40PM +0000, Andrew Murray wrote:
>> On 3 February 2014 18:43, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
>> > diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
>> > index 4cc813e..ce5bad2 100644
>> > --- a/arch/arm64/include/asm/io.h
>> > +++ b/arch/arm64/include/asm/io.h
>> > @@ -120,9 +120,13 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
>> >  /*
>> >   *  I/O port access primitives.
>> >   */
>> > +#define arch_has_dev_port()    (0)
>> >  #define IO_SPACE_LIMIT         0xffff
>> >  #define PCI_IOBASE             ((void __iomem *)(MODULES_VADDR - SZ_2M))
>> >
>> > +#define ioport_map(port, nr)   (PCI_IOBASE + ((port) & IO_SPACE_LIMIT))
>> > +#define ioport_unmap(addr)
>>
>> I'm not sure that this will work. The in[bwl], out[bwl] macros in
>> arch/arm64/include/asm/io.h already add the PCI_IOBASE offset.
>>
>> Instead of these two #defines, why not just enforce that
>> GENERIC_PCI_IOMAP is enabled? Or at least wrap these defines with 'if
>> (!config_enabled(CONFIG_GENERIC_PCI_IOMAP))' or similar.
>
> GENERIC_PCI_IOMAP *is* enabled for AArch64. We have select GENERIC_IOMAP in
> arch/arm64/Kconfig which in turn selects GENERIC_PCI_IOMAP in lib/Kconfig.

Sorry, it was intent to suggest using the ioport_[map|unmap] functions
in lib/iomap.c instead of defining something similar here. I guess you
will also need to also define CONFIG_HAS_IOPORT for this to happen.

Andrew Murray

>
> Best regards,
> Liviu
>
>>
>> > +
>> >  static inline u8 inb(unsigned long addr)
>> >  {
>> >         return readb(addr + PCI_IOBASE);
>>
>>
>> Andrew Murray
>>
>
> --
> ====================
> | I would like to |
> | fix the world,  |
> | but they're not |
> | giving me the   |
>  \ source code!  /
>   ---------------
>     ¯\_(?)_/¯
>
> -- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium.  Thank you.
>
> ARM Limited, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ, Registered in England & Wales, Company No:  2557590
> ARM Holdings plc, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ, Registered in England & Wales, Company No:  2548782
>
Rob Herring Feb. 4, 2014, 1:57 p.m. UTC | #15
On Tue, Feb 4, 2014 at 3:44 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Monday 03 February 2014 16:31:37 Jason Gunthorpe wrote:
>> Specifying 'use EHCI, AHCI, etc' - which are all PCI based standards
>> without clearly specifying exactly how PCI is suppose to work is
>> completely bonkers.
>>
>> What is needed is a spec that says:
>>  1) Here is how you generate config TLPs. A MMIO region that
>>     conforms to the already specified x86 ECAM would
>>     be perfect
>>  2) Here is a dword by dword break down of the entire config space in
>>     a SOC. Here is where a on-board AHCI controller must show up in
>>     config space. Here is how an external PCI-E port must show
>>     up. Etc. Most of this is already specified, but it clearly needs
>>     to be layed out explicitly for ARM SOCs to actually follow it.
>>  3) Here is how you specify the aperture(s) associated with PCI BAR's
>>     and bridge windows in config space. And yes: The CONFIG SPACE
>>     BARS MUST WORK.
>>  4) Here is how MSI works, these are the values you put in the
>>     address/data and here is how you collect the interrupt.
>>  5) Here is how Legacy INTx must be mapped into the GIC.
>>
>> This is what x86 does, and they have been doing it well for 10
>> years. If you want to play in the server game you have to properly
>> implement PCI.
>
> I'm pretty sure the authors of the SBSA actually thought that was
> what they wrote, by referring to external specifications (pci-3.0,
> ehci, ahci, ...).  However, it seems they were either foolish enough
> to believe that hardware designers would follow these specs, or
> they were intentionally misled and got talked into putting ambiguous
> terminology in because there were already hardware designs that
> are not exactly in line with the spirit of the SBSA but can be
> argued to be conforming to the text for a lax interpretation.
>
> I think EHCI is a much better example than PCI here, because the
> spec has exactly one line to say about it, where it spends a whole
> chapter on PCI.
>
> Here is how a sane person would read SBSA to create a compliant
> implementation:

s/sane/software/

>
>   I have to use EHCI version 1.1 to provide USB-2.0 support. EHCI
>   is a PCI device, so I'll put it behind a PCIe port that complies
>   to the PCIe section of the SBSA. Since EHCI by itself only provides
>   high-speed USB, and USB-2.0 mandates I provide low-speed and
>   full-speed as well, I have to add a USB hub device. It would have
>   been easier to just use OHCI for these, but SBSA says I can't.
>   Now I want to integrate the EHCI into my SoC and not waste one
>   of my precious PCIe root ports, so I have to create another PCI
>   domain with its own ECAM compliant config space to put it into.
>   Fortunately SBSA lets me add an arbitrary number of PCI domains,
>   as long as they are all strictly compliant. To software it will
>   look exactly as if it was on an external card, I just have to
>   ensure the boot loader correctly sets up the clocks and the phy
>   before an SBSA compliant OS gets loaded, all runtime power
>   management will get handled through the EHCI-1.1 energy-efficiency
>   extensions.
>
> Here is how a crazy person would read the same sentence in the SBSA:

s/crazy/hardware/

>
>   I have an IP block that implements the EHCI register set, that
>   should be good enough. It's not a fast device, so I can put it
>   on a non-coherent bus. Since the SoC will be used for networking,
>   I'll put the registers into big-endian configuration to make it
>   easier for the OS to access. I'm not allowed to have USB-1.1
>   according to SBSA, so I can get away without a hub or an extra
>   OHCI. I can't support MSI because it's not a PCI device, and
>   the GIC is pretty full, so I'll just connect the IRQ line to
>   the GPIO controller. In order to do better power management,
>   I'll design a fancy PHY that the device driver will manage
>   for implementing autosuspend. I should also give the OS
>   fine-grained control over the clocks, but it will have to share
>   the clock domain with the other devices on the same edge of the
>   chip. The EHCI device is not part of PCI, which measn I don't
>   have to use the standard SMMU. However, my EHCI implementation
>   can only do 32-bit DMA, and I'll have to design my own IOMMU
>   to let it access the entire memory range. USB-OTG is a great
>   feature and we already paid for having this in our EHCI
>   implementation, better make sure it comes up in endpoint mode
>   after waking up from power saving.

My money is on the latter. I think non-PCI implementations of xHCI
interfaces will be common. This was certainly the case at Calxeda in
what was believed to be a SBSA compliant SOC. However, I think PCI
device or not is the least of the issues and all the other examples
you list are the difficult ones to deal with.

Rob
Arnd Bergmann Feb. 4, 2014, 4:18 p.m. UTC | #16
On Tuesday 04 February 2014, Andrew Murray wrote:
> On 4 February 2014 12:29, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > On Mon, Feb 03, 2014 at 10:34:40PM +0000, Andrew Murray wrote:
> >> On 3 February 2014 18:43, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> >> > diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
> >> > index 4cc813e..ce5bad2 100644
> >> > --- a/arch/arm64/include/asm/io.h
> >> > +++ b/arch/arm64/include/asm/io.h
> >> > @@ -120,9 +120,13 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
> >> >  /*
> >> >   *  I/O port access primitives.
> >> >   */
> >> > +#define arch_has_dev_port()    (0)
> >> >  #define IO_SPACE_LIMIT         0xffff
> >> >  #define PCI_IOBASE             ((void __iomem *)(MODULES_VADDR - SZ_2M))
> >> >
> >> > +#define ioport_map(port, nr)   (PCI_IOBASE + ((port) & IO_SPACE_LIMIT))
> >> > +#define ioport_unmap(addr)
> >>
> >> I'm not sure that this will work. The in[bwl], out[bwl] macros in
> >> arch/arm64/include/asm/io.h already add the PCI_IOBASE offset.
> >>
> >> Instead of these two #defines, why not just enforce that
> >> GENERIC_PCI_IOMAP is enabled? Or at least wrap these defines with 'if
> >> (!config_enabled(CONFIG_GENERIC_PCI_IOMAP))' or similar.
> >
> > GENERIC_PCI_IOMAP is enabled for AArch64. We have select GENERIC_IOMAP in
> > arch/arm64/Kconfig which in turn selects GENERIC_PCI_IOMAP in lib/Kconfig.
> 
> Sorry, it was intent to suggest using the ioport_[map|unmap] functions
> in lib/iomap.c instead of defining something similar here. I guess you
> will also need to also define CONFIG_HAS_IOPORT for this to happen.

We do want CONFIG_HAS_IOPORT, but I would say that we should not use
CONFIG_GENERIC_IOMAP. As I explained in another reply, enabling this
was probably a simple mistake, and we only need this if we want I/O
spaces that are /not/ memory mapped. IMHO any ARM64 system that doesn't
map its PCI I/O space into MMIO space can live without I/O port
access.

	Arnd
Jason Gunthorpe Feb. 4, 2014, 6:15 p.m. UTC | #17
On Tue, Feb 04, 2014 at 10:44:52AM +0100, Arnd Bergmann wrote:

>   Now I want to integrate the EHCI into my SoC and not waste one
>   of my precious PCIe root ports, so I have to create another PCI
>   domain with its own ECAM compliant config space to put it into.
>   Fortunately SBSA lets me add an arbitrary number of PCI domains,
>   as long as they are all strictly compliant. To software it will

Just to touch on this for others who might be reading..

IMHO any simple SOC that requires multiple domains is *broken*. A
single domain covers all reasonable needs until you get up to
mega-scale NUMA systems, encouraging people to design with multiple
domains only complicates the kernel :(

SOC internal peripherals should all show up in the bus 0 config space
of the only domain and SOC PCI-E physical ports should show up on bus
0 as PCI-PCI bridges. This is all covered in the PCI-E specs regarding
the root complex.

Generally I would expect the internal peripherals to still be
internally connected with AXI, but also connected through the ECAM
space for configuration, control, power management and address
assignment.

> 2. all address windows are set up by the boot loader, we only
>   need to know the location (IMHO this should be the
>   preferred way to do things regardless of SBSA).

Linux does a full address map re-assignment on boot, IIRC. You need
more magics to inhibit that if your BAR's and bridge windows don't
work.

Hot plug is a whole other thing..

> it's possible that the designware based ones get point 4 right.

The designware one's also appear to be re-purposed end point cores, so
their config handling is somewhat bonkers. Tegra got theirs sort of
close because they re-used knowledge/IP from their x86 south bridges -
but even then they didn't really implement ECAM properly for an ARM
environment.

Since config space is where everyone to date has fallen down, I think
the SBSA would have been wise to list dword by dword what a typical
ECAM config space should look like.

Jason
Arnd Bergmann Feb. 4, 2014, 6:34 p.m. UTC | #18
On Tuesday 04 February 2014 11:15:14 Jason Gunthorpe wrote:
> On Tue, Feb 04, 2014 at 10:44:52AM +0100, Arnd Bergmann wrote:
> 
> >   Now I want to integrate the EHCI into my SoC and not waste one
> >   of my precious PCIe root ports, so I have to create another PCI
> >   domain with its own ECAM compliant config space to put it into.
> >   Fortunately SBSA lets me add an arbitrary number of PCI domains,
> >   as long as they are all strictly compliant. To software it will
> 
> Just to touch on this for others who might be reading..
> 
> IMHO any simple SOC that requires multiple domains is *broken*. A
> single domain covers all reasonable needs until you get up to
> mega-scale NUMA systems, encouraging people to design with multiple
> domains only complicates the kernel :(

Well, the way I see it, we already have support for arbitrary
PCI domains in the kernel, and that works fine, so we can just
as well use it. That way we don't have to partition the available
256 buses among the host bridges, and anything that needs a separate
PCI config space can live in its own world. Quite often when you
have multiple PCI hosts, they actually have different ways to
get at the config space and don't even share the same driver.

On x86, any kind of HT/PCI/PCIe/PCI-x bridge is stuffed into a
single domain so they can support OSs that only know the
traditional config space access methods, but I don't see
any real advantage to that for other architectures.

> SOC internal peripherals should all show up in the bus 0 config space
> of the only domain and SOC PCI-E physical ports should show up on bus
> 0 as PCI-PCI bridges. This is all covered in the PCI-E specs regarding
> the root complex.
> 
> Generally I would expect the internal peripherals to still be
> internally connected with AXI, but also connected through the ECAM
> space for configuration, control, power management and address
> assignment.

That would of course be very nice from a software perspective,
but I think that is much less likely for any practical
implementation.

> > 2. all address windows are set up by the boot loader, we only
> >   need to know the location (IMHO this should be the
> >   preferred way to do things regardless of SBSA).
> 
> Linux does a full address map re-assignment on boot, IIRC. You need
> more magics to inhibit that if your BAR's and bridge windows don't
> work.
> 
> Hot plug is a whole other thing..

I meant the I/O and memory space windows of the host bridge here,
which typically don't get reassigned (except on mvebu). For the
device resources, there is a per-host PCI_REASSIGN_ALL_RSRC
flag and pcibios_assign_all_busses() function that we typically
set on embedded systems where we don't trust the boot loader
to set this up correctly, or at all.

On server systems, I would expect to have the firmware assign
all resources and the kernel to leave them alone. On sparc
and powerpc servers, there is even a third method, which
is to trust firmware to put the correct resources for each
device into DT, overriding what is written in the BAR.

> > it's possible that the designware based ones get point 4 right.
> 
> The designware one's also appear to be re-purposed end point cores, so
> their config handling is somewhat bonkers. Tegra got theirs sort of
> close because they re-used knowledge/IP from their x86 south bridges -
> but even then they didn't really implement ECAM properly for an ARM
> environment.
> 
> Since config space is where everyone to date has fallen down, I think
> the SBSA would have been wise to list dword by dword what a typical
> ECAM config space should look like.

I absolutely agree.

	Arnd
Jason Gunthorpe Feb. 4, 2014, 7:10 p.m. UTC | #19
On Tue, Feb 04, 2014 at 07:34:50PM +0100, Arnd Bergmann wrote:

> Well, the way I see it, we already have support for arbitrary
> PCI domains in the kernel, and that works fine, so we can just
> as well use it. That way we don't have to partition the available
> 256 buses among the host bridges, and anything that needs a separate
> PCI config space can live in its own world. Quite often when you
> have multiple PCI hosts, they actually have different ways to
> get at the config space and don't even share the same driver.

> On x86, any kind of HT/PCI/PCIe/PCI-x bridge is stuffed into a
> single domain so they can support OSs that only know the
> traditional config space access methods, but I don't see
> any real advantage to that for other architectures.

Supporting a standard configration interface is a solid reason, but
there is alot more going on.

For instance to support peer-to-peer IO you need to have a consisent,
non-overlapping set of bus/device/function/tag to uniquely route TLPs
within the chip. Cross domain TLP routing in HW is non-trivial.

IOMMUs (and SR-IOv) rely on the BDF to identify the originating device
for each TLP. Multiple domains means a much more complex IOMMU
environment.

Failure to integrate on-chip devices into the PCI world also means
thing like SR-IOv won't work sanely with on-chip devices.

The only reason we should see multi-domain on a SOC is because the HW
design was lazy. Being lazy misses the Big Picture where PCI is the
cornerstone of many important Server/Enterprise technologies.

> > SOC internal peripherals should all show up in the bus 0 config space
> > of the only domain and SOC PCI-E physical ports should show up on bus
> > 0 as PCI-PCI bridges. This is all covered in the PCI-E specs regarding
> > the root complex.
> > 
> > Generally I would expect the internal peripherals to still be
> > internally connected with AXI, but also connected through the ECAM
> > space for configuration, control, power management and address
> > assignment.
> 
> That would of course be very nice from a software perspective,
> but I think that is much less likely for any practical
> implementation.

Well, all x86 implementations do this already.. It actually isn't that
big a deal from a HW perspective, you just have to think about it
fully, understand PCI, and position your registers accordingly.

Jason
Arnd Bergmann Feb. 4, 2014, 7:21 p.m. UTC | #20
On Tuesday 04 February 2014 12:10:55 Jason Gunthorpe wrote:
> 
> For instance to support peer-to-peer IO you need to have a consisent,
> non-overlapping set of bus/device/function/tag to uniquely route TLPs
> within the chip. Cross domain TLP routing in HW is non-trivial.

Yes, that is a good reason.

> IOMMUs (and SR-IOv) rely on the BDF to identify the originating device
> for each TLP. Multiple domains means a much more complex IOMMU
> environment.

I fear we already have to support complex IOMMU setups on ARM,
whether there are multiple PCI domains or not. But it would be
nice in theory not to require it.

> Failure to integrate on-chip devices into the PCI world also means
> thing like SR-IOv won't work sanely with on-chip devices.

I'd consider this a feature ;)

But you are probably right: people will do SR-IOV whether we like
it or not, and they will try to do it on non-PCI devices too,
and great suffering will be involved.

	Arnd
Arnd Bergmann Feb. 4, 2014, 7:50 p.m. UTC | #21
On Tuesday 04 February 2014, Rob Herring wrote:
> > Here is how a sane person would read SBSA to create a compliant
> > implementation:
> 
> s/sane/software/

> > Here is how a crazy person would read the same sentence in the SBSA:
> 
> s/crazy/hardware/

Not much of a difference, apparently ...

> My money is on the latter. I think non-PCI implementations of xHCI
> interfaces will be common. This was certainly the case at Calxeda in
> what was believed to be a SBSA compliant SOC.

I just looked up the EHCI and xHCI specs and am shocked to see that
the PCI config space access for both is an optional part of the
spec, so that assertion may well have been correct.

On the other hand, it does seem impossible to create a compliant
AHCI implementation without making it show up as a PCI function,
so any SBSA compliant SoC that contains AHCI already has to have
all the bits for doing the same on USB.

> However, I think PCI
> device or not is the least of the issues and all the other examples
> you list are the difficult ones to deal with.

Agreed. But if they get the difficult problems right, it's
trivial to also do the PCI config space either in the way
that Jason described, or as a separate PCI domain. In the worst
case, it could still be faked up by a secure-mode firmware
catching all config space accesses.

	Arnd
Yijing Wang Feb. 18, 2014, 6:33 a.m. UTC | #22
> +#include <linux/init.h>
> +#include <linux/io.h>
> +#include <linux/kernel.h>
> +#include <linux/mm.h>
> +#include <linux/of_pci.h>
> +#include <linux/of_platform.h>
> +#include <linux/slab.h>
> +
> +#include <asm/pci-bridge.h>
> +
> +
> +/*
> + * Return the domain number for this bus
> + */
> +int pci_domain_nr(struct pci_bus *bus)
> +{
> +	struct pci_host_bridge *bridge = to_pci_host_bridge(bus->bridge);

Here bus is specific to root bus ? or, what about use find_pci_host_bridge() to get the pci_host_bridge
instead.

> +
> +	if (bridge)
> +		return bridge->domain_nr;
> +
> +	return 0;
> +}
> +
> +int pci_proc_domain(struct pci_bus *bus)
> +{
> +	return pci_domain_nr(bus);
> +}
> +
> +/*
> + * Called after each bus is probed, but before its children are examined
> + */
> +void pcibios_fixup_bus(struct pci_bus *bus)
> +{
> +	struct pci_dev *dev;
> +	struct resource *res;
> +	int i;
> +
> +	if (bus->self != NULL) {

What about use !pci_is_root_bus() ?

> +		pci_read_bridge_bases(bus);
> +
> +		pci_bus_for_each_resource(bus, res, i) {
> +			if (!res || !res->flags || res->parent)
> +				continue;
> +
> +			/*
> +			 * If we are going to reassign everything, we can
> +			 * shrink the P2P resource to have zero size to
> +			 * save space
> +			 */
> +			if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
> +				res->flags |= IORESOURCE_UNSET;
> +				res->start = 0;
> +				res->end = -1;
> +				continue;
> +			}
> +		}
> +	}
> +
Liviu Dudau Feb. 20, 2014, 2:38 p.m. UTC | #23
On Tue, Feb 18, 2014 at 02:33:53PM +0800, Yijing Wang wrote:
> > +#include <linux/init.h>
> > +#include <linux/io.h>
> > +#include <linux/kernel.h>
> > +#include <linux/mm.h>
> > +#include <linux/of_pci.h>
> > +#include <linux/of_platform.h>
> > +#include <linux/slab.h>
> > +
> > +#include <asm/pci-bridge.h>
> > +
> > +
> > +/*
> > + * Return the domain number for this bus
> > + */
> > +int pci_domain_nr(struct pci_bus *bus)
> > +{
> > +	struct pci_host_bridge *bridge = to_pci_host_bridge(bus->bridge);

Hi Yijing,

> 
> Here bus is specific to root bus ? or, what about use find_pci_host_bridge() to get the pci_host_bridge
> instead.

Yes, the call should be to find_pci_host_bridge(). I sort of implied that the
bus is always the root bus, which is obviously not correct. Thanks for pointing it out.

> 
> > +
> > +	if (bridge)
> > +		return bridge->domain_nr;
> > +
> > +	return 0;
> > +}
> > +
> > +int pci_proc_domain(struct pci_bus *bus)
> > +{
> > +	return pci_domain_nr(bus);
> > +}
> > +
> > +/*
> > + * Called after each bus is probed, but before its children are examined
> > + */
> > +void pcibios_fixup_bus(struct pci_bus *bus)
> > +{
> > +	struct pci_dev *dev;
> > +	struct resource *res;
> > +	int i;
> > +
> > +	if (bus->self != NULL) {
> 
> What about use !pci_is_root_bus() ?

Again, good call, will change.

> 
> > +		pci_read_bridge_bases(bus);
> > +
> > +		pci_bus_for_each_resource(bus, res, i) {
> > +			if (!res || !res->flags || res->parent)
> > +				continue;
> > +
> > +			/*
> > +			 * If we are going to reassign everything, we can
> > +			 * shrink the P2P resource to have zero size to
> > +			 * save space
> > +			 */
> > +			if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
> > +				res->flags |= IORESOURCE_UNSET;
> > +				res->start = 0;
> > +				res->end = -1;
> > +				continue;
> > +			}
> > +		}
> > +	}
> > +
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
diff mbox

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index f8e5ee6..48fdd69 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -133,6 +133,23 @@  menu "Bus support"
 config ARM_AMBA
 	bool
 
+config PCI
+	bool "PCI support"
+	help
+	  This feature enables support for PCIe bus system. If you say Y
+	  here, the kernel will include drivers and infrastructure code
+	  to support PCIe bus devices.
+
+config PCI_DOMAINS
+	def_bool PCI
+
+config PCI_SYSCALL
+	def_bool PCI
+
+source "drivers/pci/Kconfig"
+source "drivers/pci/pcie/Kconfig"
+source "drivers/pci/hotplug/Kconfig"
+
 endmenu
 
 menu "Kernel Features"
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 71c53ec..46924bc 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -26,6 +26,7 @@  generic-y += mman.h
 generic-y += msgbuf.h
 generic-y += mutex.h
 generic-y += pci.h
+generic-y += pci-bridge.h
 generic-y += poll.h
 generic-y += posix_types.h
 generic-y += resource.h
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 4cc813e..ce5bad2 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -120,9 +120,13 @@  static inline u64 __raw_readq(const volatile void __iomem *addr)
 /*
  *  I/O port access primitives.
  */
+#define arch_has_dev_port()	(0)
 #define IO_SPACE_LIMIT		0xffff
 #define PCI_IOBASE		((void __iomem *)(MODULES_VADDR - SZ_2M))
 
+#define ioport_map(port, nr)	(PCI_IOBASE + ((port) & IO_SPACE_LIMIT))
+#define ioport_unmap(addr)
+
 static inline u8 inb(unsigned long addr)
 {
 	return readb(addr + PCI_IOBASE);
diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h
new file mode 100644
index 0000000..dd084a3
--- /dev/null
+++ b/arch/arm64/include/asm/pci.h
@@ -0,0 +1,35 @@ 
+#ifndef __ASM_PCI_H
+#define __ASM_PCI_H
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/io.h>
+#include <asm-generic/pci-bridge.h>
+#include <asm-generic/pci-dma-compat.h>
+
+#define PCIBIOS_MIN_IO		0
+#define PCIBIOS_MIN_MEM		0
+
+/*
+ * Set to 1 if the kernel should re-assign all PCI bus numbers
+ */
+#define pcibios_assign_all_busses() \
+	(pci_has_flag(PCI_REASSIGN_ALL_BUS))
+
+/*
+ * PCI address space differs from physical memory address space
+ */
+#define PCI_DMA_BUS_IS_PHYS	(0)
+
+extern int isa_dma_bridge_buggy;
+
+extern int pci_domain_nr(struct pci_bus *bus);
+extern int pci_proc_domain(struct pci_bus *bus);
+
+extern int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr);
+
+#endif  /* __KERNEL__ */
+#endif  /* __ASM_PCI_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 2d7fcc1..8cfec47 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -21,6 +21,7 @@  arm64-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND)	+= sleep.o suspend.o
 arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
 arm64-obj-$(CONFIG_SCHED_HMP)		+= sched_hmp.o
+arm64-obj-$(CONFIG_PCI)			+= pci.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c
new file mode 100644
index 0000000..7b652cf
--- /dev/null
+++ b/arch/arm64/kernel/pci.c
@@ -0,0 +1,112 @@ 
+/*
+ * Code borrowed from powerpc/kernel/pci-common.c
+ *
+ * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2014 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+
+#include <asm/pci-bridge.h>
+
+
+/*
+ * Return the domain number for this bus
+ */
+int pci_domain_nr(struct pci_bus *bus)
+{
+	struct pci_host_bridge *bridge = to_pci_host_bridge(bus->bridge);
+
+	if (bridge)
+		return bridge->domain_nr;
+
+	return 0;
+}
+
+int pci_proc_domain(struct pci_bus *bus)
+{
+	return pci_domain_nr(bus);
+}
+
+/*
+ * Called after each bus is probed, but before its children are examined
+ */
+void pcibios_fixup_bus(struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+	struct resource *res;
+	int i;
+
+	if (bus->self != NULL) {
+		pci_read_bridge_bases(bus);
+
+		pci_bus_for_each_resource(bus, res, i) {
+			if (!res || !res->flags || res->parent)
+				continue;
+
+			/*
+			 * If we are going to reassign everything, we can
+			 * shrink the P2P resource to have zero size to
+			 * save space
+			 */
+			if (pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
+				res->flags |= IORESOURCE_UNSET;
+				res->start = 0;
+				res->end = -1;
+				continue;
+			}
+		}
+	}
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		/* Ignore fully discovered devices */
+		if (dev->is_added)
+			continue;
+
+		set_dev_node(&dev->dev, pcibus_to_node(dev->bus));
+
+		/* Read default IRQs and fixup if necessary */
+		dev->irq = of_irq_parse_and_map_pci(dev, 0, 0);
+	}
+}
+EXPORT_SYMBOL(pcibios_fixup_bus);
+
+/*
+ * We don't have to worry about legacy ISA devices, so nothing to do here
+ */
+resource_size_t pcibios_align_resource(void *data, const struct resource *res,
+				resource_size_t size, resource_size_t align)
+{
+	return ALIGN(res->start, align);
+}
+EXPORT_SYMBOL(pcibios_align_resource);
+
+int pcibios_enable_device(struct pci_dev *dev, int mask)
+{
+	return pci_enable_resources(dev, mask);
+}
+
+void pcibios_fixup_bridge_ranges(struct list_head *resources)
+{
+}
+
+int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
+{
+	BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT);
+
+	return ioremap_page_range((unsigned long)PCI_IOBASE + offset,
+				(unsigned long)PCI_IOBASE + offset + SZ_64K,
+				phys_addr,
+				__pgprot(PROT_DEVICE_nGnRE));
+}