diff mbox

[v8,4/9] pci: OF: Fix the conversion of IO ranges into IO resources.

Message ID 1404240214-9804-5-git-send-email-Liviu.Dudau@arm.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Liviu Dudau July 1, 2014, 6:43 p.m. UTC
The ranges property for a host bridge controller in DT describes
the mapping between the PCI bus address and the CPU physical address.
The resources framework however expects that the IO resources start
at a pseudo "port" address 0 (zero) and have a maximum size of IO_SPACE_LIMIT.
The conversion from pci ranges to resources failed to take that into account.

In the process move the function into drivers/of/address.c as it now
depends on pci_address_to_pio() code and make it return an error message.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Tested-by: Tanmay Inamdar <tinamdar@apm.com>
---
 drivers/of/address.c       | 47 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_address.h | 13 ++-----------
 2 files changed, 49 insertions(+), 11 deletions(-)

Comments

Rob Herring July 5, 2014, 7:25 p.m. UTC | #1
On Tue, Jul 1, 2014 at 1:43 PM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> The ranges property for a host bridge controller in DT describes
> the mapping between the PCI bus address and the CPU physical address.
> The resources framework however expects that the IO resources start
> at a pseudo "port" address 0 (zero) and have a maximum size of IO_SPACE_LIMIT.
> The conversion from pci ranges to resources failed to take that into account.

I don't think this change is right. There are 2 resources: the PCI bus
addresses and cpu addresses. This function deals with the cpu
addresses. Returning pci addresses for i/o and cpu addresses for
memory is going to be error prone. We probably need both cpu and pci
resources exposed to host controllers.

Making the new function only deal with i/o bus resources and naming it
of_pci_range_to_io_resource would be better.

Rob

> In the process move the function into drivers/of/address.c as it now
> depends on pci_address_to_pio() code and make it return an error message.
>
> Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> Tested-by: Tanmay Inamdar <tinamdar@apm.com>
> ---
>  drivers/of/address.c       | 47 ++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/of_address.h | 13 ++-----------
>  2 files changed, 49 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/of/address.c b/drivers/of/address.c
> index 1345733..cbbaed2 100644
> --- a/drivers/of/address.c
> +++ b/drivers/of/address.c
> @@ -872,3 +872,50 @@ bool of_dma_is_coherent(struct device_node *np)
>         return false;
>  }
>  EXPORT_SYMBOL_GPL(of_dma_is_coherent);
> +
> +/*
> + * of_pci_range_to_resource - Create a resource from an of_pci_range
> + * @range:     the PCI range that describes the resource
> + * @np:                device node where the range belongs to
> + * @res:       pointer to a valid resource that will be updated to
> + *              reflect the values contained in the range.
> + *
> + * Returns EINVAL if the range cannot be converted to resource.
> + *
> + * Note that if the range is an IO range, the resource will be converted
> + * using pci_address_to_pio() which can fail if it is called too early or
> + * if the range cannot be matched to any host bridge IO space (our case here).
> + * To guard against that we try to register the IO range first.
> + * If that fails we know that pci_address_to_pio() will do too.
> + */
> +int of_pci_range_to_resource(struct of_pci_range *range,
> +       struct device_node *np, struct resource *res)
> +{
> +       int err;
> +       res->flags = range->flags;
> +       res->parent = res->child = res->sibling = NULL;
> +       res->name = np->full_name;
> +
> +       if (res->flags & IORESOURCE_IO) {
> +               unsigned long port = -1;
> +               err = pci_register_io_range(range->cpu_addr, range->size);
> +               if (err)
> +                       goto invalid_range;
> +               port = pci_address_to_pio(range->cpu_addr);
> +               if (port == (unsigned long)-1) {
> +                       err = -EINVAL;
> +                       goto invalid_range;
> +               }
> +               res->start = port;
> +       } else {
> +               res->start = range->cpu_addr;
> +       }
> +       res->end = res->start + range->size - 1;
> +       return 0;
> +
> +invalid_range:
> +       res->start = (resource_size_t)OF_BAD_ADDR;
> +       res->end = (resource_size_t)OF_BAD_ADDR;
> +       return err;
> +}
> +
> diff --git a/include/linux/of_address.h b/include/linux/of_address.h
> index ac4aac4..33c0420 100644
> --- a/include/linux/of_address.h
> +++ b/include/linux/of_address.h
> @@ -23,17 +23,8 @@ struct of_pci_range {
>  #define for_each_of_pci_range(parser, range) \
>         for (; of_pci_range_parser_one(parser, range);)
>
> -static inline void of_pci_range_to_resource(struct of_pci_range *range,
> -                                           struct device_node *np,
> -                                           struct resource *res)
> -{
> -       res->flags = range->flags;
> -       res->start = range->cpu_addr;
> -       res->end = range->cpu_addr + range->size - 1;
> -       res->parent = res->child = res->sibling = NULL;
> -       res->name = np->full_name;
> -}
> -
> +extern int of_pci_range_to_resource(struct of_pci_range *range,
> +               struct device_node *np, struct resource *res);
>  /* Translate a DMA address from device space to CPU space */
>  extern u64 of_translate_dma_address(struct device_node *dev,
>                                     const __be32 *in_addr);
> --
> 2.0.0
>
>
> _______________________________________________
> linaro-kernel mailing list
> linaro-kernel@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/linaro-kernel
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann July 5, 2014, 8:46 p.m. UTC | #2
On Saturday 05 July 2014 14:25:52 Rob Herring wrote:
> On Tue, Jul 1, 2014 at 1:43 PM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > The ranges property for a host bridge controller in DT describes
> > the mapping between the PCI bus address and the CPU physical address.
> > The resources framework however expects that the IO resources start
> > at a pseudo "port" address 0 (zero) and have a maximum size of IO_SPACE_LIMIT.
> > The conversion from pci ranges to resources failed to take that into account.
> 
> I don't think this change is right. There are 2 resources: the PCI bus
> addresses and cpu addresses. This function deals with the cpu
> addresses. Returning pci addresses for i/o and cpu addresses for
> memory is going to be error prone. We probably need both cpu and pci
> resources exposed to host controllers.
> 
> Making the new function only deal with i/o bus resources and naming it
> of_pci_range_to_io_resource would be better.

I think you are correct that this change by itself is will break existing
drivers that rely on the current behavior of of_pci_range_to_resource,
but there is also something wrong with the existing implementation:

of_pci_range_to_resource() at the moment returns a the address in
cpu address space (i.e. IORESOURCE_MEM) but sets the res->flags
value to IORESOURCE_IO, which means it doesn't fit into the resource
tree. Liviu's version gets that part right, and it would be nice
to fix that eventually, however we do it here.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau July 7, 2014, 11:11 a.m. UTC | #3
On Sat, Jul 05, 2014 at 09:46:09PM +0100, Arnd Bergmann wrote:
> On Saturday 05 July 2014 14:25:52 Rob Herring wrote:
> > On Tue, Jul 1, 2014 at 1:43 PM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > > The ranges property for a host bridge controller in DT describes
> > > the mapping between the PCI bus address and the CPU physical address.
> > > The resources framework however expects that the IO resources start
> > > at a pseudo "port" address 0 (zero) and have a maximum size of IO_SPACE_LIMIT.
> > > The conversion from pci ranges to resources failed to take that into account.
> > 
> > I don't think this change is right. There are 2 resources: the PCI bus
> > addresses and cpu addresses. This function deals with the cpu
> > addresses. Returning pci addresses for i/o and cpu addresses for
> > memory is going to be error prone. We probably need both cpu and pci
> > resources exposed to host controllers.
> > 
> > Making the new function only deal with i/o bus resources and naming it
> > of_pci_range_to_io_resource would be better.
> 
> I think you are correct that this change by itself is will break existing
> drivers that rely on the current behavior of of_pci_range_to_resource,
> but there is also something wrong with the existing implementation:

Either I'm very confused or I've managed to confuse everyone else. The I/O
resources described using CPU addresses *are* using "pseudo" port based
addresses (or at least that is my understanding and my reading of the code).
Can you point me to a function that is expecting the IO resource to have
the start address at the physical address of the mapped space?

I was trying to fix exactly this issue, that you cannot use the resource
structure returned by this function in any call that is expecting an IO
resource.

Rob, you can try this function with two host bridges. Patch [3/9] changes
pci_address_to_pio() to calculate the offset of the range based on already
registed ranges, so the first host bridge will have it's IO resources
starting from zero, but the second host bridge will have .start offseted
by the size of the IO space of the first bridge. That is not a PCI bus
address AFAICT.

Best regards,
Liviu

> 
> of_pci_range_to_resource() at the moment returns a the address in
> cpu address space (i.e. IORESOURCE_MEM) but sets the res->flags
> value to IORESOURCE_IO, which means it doesn't fit into the resource
> tree. Liviu's version gets that part right, and it would be nice
> to fix that eventually, however we do it here.
> 
> 	Arnd
> 
>
Arnd Bergmann July 7, 2014, 9:22 p.m. UTC | #4
On Monday 07 July 2014, Liviu Dudau wrote:
> On Sat, Jul 05, 2014 at 09:46:09PM +0100, Arnd Bergmann wrote:
> > On Saturday 05 July 2014 14:25:52 Rob Herring wrote:
> > > On Tue, Jul 1, 2014 at 1:43 PM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > > > The ranges property for a host bridge controller in DT describes
> > > > the mapping between the PCI bus address and the CPU physical address.
> > > > The resources framework however expects that the IO resources start
> > > > at a pseudo "port" address 0 (zero) and have a maximum size of IO_SPACE_LIMIT.
> > > > The conversion from pci ranges to resources failed to take that into account.
> > > 
> > > I don't think this change is right. There are 2 resources: the PCI bus
> > > addresses and cpu addresses. This function deals with the cpu
> > > addresses. Returning pci addresses for i/o and cpu addresses for
> > > memory is going to be error prone. We probably need both cpu and pci
> > > resources exposed to host controllers.
> > > 
> > > Making the new function only deal with i/o bus resources and naming it
> > > of_pci_range_to_io_resource would be better.
> > 
> > I think you are correct that this change by itself is will break existing
> > drivers that rely on the current behavior of of_pci_range_to_resource,
> > but there is also something wrong with the existing implementation:
> 
> Either I'm very confused or I've managed to confuse everyone else. The I/O
> resources described using CPU addresses *are* using "pseudo" port based
> addresses (or at least that is my understanding and my reading of the code).
> Can you point me to a function that is expecting the IO resource to have
> the start address at the physical address of the mapped space?

pci_v3_preinit() in arch/arm/mach-integrator/pci_v3.c for instance takes
the resource returned by of_pci_range_to_resource and programs the
start and size into hardware registers that expect a physical address
as far as I can tell.

> I was trying to fix exactly this issue, that you cannot use the resource
> structure returned by this function in any call that is expecting an IO
> resource.

I looked at the other drivers briefly, and I think you indeed fix the Tegra
driver with this but break the integrator driver as mentioned above.
The other callers of of_pci_range_to_resource() are apparently not
impacted as they recalculate the values they get.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau July 8, 2014, 10:03 a.m. UTC | #5
On Mon, Jul 07, 2014 at 10:22:00PM +0100, Arnd Bergmann wrote:
> On Monday 07 July 2014, Liviu Dudau wrote:
> > On Sat, Jul 05, 2014 at 09:46:09PM +0100, Arnd Bergmann wrote:
> > > On Saturday 05 July 2014 14:25:52 Rob Herring wrote:
> > > > On Tue, Jul 1, 2014 at 1:43 PM, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > > > > The ranges property for a host bridge controller in DT describes
> > > > > the mapping between the PCI bus address and the CPU physical address.
> > > > > The resources framework however expects that the IO resources start
> > > > > at a pseudo "port" address 0 (zero) and have a maximum size of IO_SPACE_LIMIT.
> > > > > The conversion from pci ranges to resources failed to take that into account.
> > > > 
> > > > I don't think this change is right. There are 2 resources: the PCI bus
> > > > addresses and cpu addresses. This function deals with the cpu
> > > > addresses. Returning pci addresses for i/o and cpu addresses for
> > > > memory is going to be error prone. We probably need both cpu and pci
> > > > resources exposed to host controllers.
> > > > 
> > > > Making the new function only deal with i/o bus resources and naming it
> > > > of_pci_range_to_io_resource would be better.
> > > 
> > > I think you are correct that this change by itself is will break existing
> > > drivers that rely on the current behavior of of_pci_range_to_resource,
> > > but there is also something wrong with the existing implementation:
> > 
> > Either I'm very confused or I've managed to confuse everyone else. The I/O
> > resources described using CPU addresses *are* using "pseudo" port based
> > addresses (or at least that is my understanding and my reading of the code).
> > Can you point me to a function that is expecting the IO resource to have
> > the start address at the physical address of the mapped space?
> 
> pci_v3_preinit() in arch/arm/mach-integrator/pci_v3.c for instance takes
> the resource returned by of_pci_range_to_resource and programs the
> start and size into hardware registers that expect a physical address
> as far as I can tell.
> 
> > I was trying to fix exactly this issue, that you cannot use the resource
> > structure returned by this function in any call that is expecting an IO
> > resource.
> 
> I looked at the other drivers briefly, and I think you indeed fix the Tegra
> driver with this but break the integrator driver as mentioned above.
> The other callers of of_pci_range_to_resource() are apparently not
> impacted as they recalculate the values they get.

I would argue that integrator version is having broken assumptions. If it would
try to allocate that IO range or request the resource as returned currently by
of_pci_range_to_resource (without my patch) it would fail. I know because I did
the same thing in my host bridge driver and it failed miserably. That's why I
tried to patch it.

I will lay out my argument here and people can tell me if I am wrong:

PCI IO resources (even if they are memory mapped on certain architectures) need
to emulate the x86 world "port" concept. Why do I think this? Because of this
structure at the beginning of kernel/resource.c:

struct resource ioport_resource = {
	.name	= "PCI IO",
	.start	= 0,
	.end	= IO_SPACE_LIMIT,
	.flags	= IORESOURCE_IO,
};
EXPORT_SYMBOL(ioport_resource);

The other resource that people seem to confuse it with is the next one in that
file:

struct resource iomem_resource = {
	.name	= "PCI mem",
	.start	= 0,
	.end	= -1,
	.flags	= IORESOURCE_MEM,
};
EXPORT_SYMBOL(iomem_resource);

Now, there are architecture that override the .start and .end values, but arm
is not one of those, and mach-integrator doesn't change it either. So one can
play with the ioport_resource values to move the "port" window wherever he/she
wants, but it doesn't change the "port access" way of addressing it.

If the IO space is memory mapped, then we use the port number, the io_offset
and the PCI_IOBASE to get to the virtual address that, when accessed, will
generate the correct addresses on the bus, based on what the host bridge has
been configured.

This is the current level of my understanding of PCI IO.

Now, I believe Rob has switched entirely to using my series in some test that
he has run and he hasn't encountered any issues, as long as one remembers in
the host bridge driver to add the io_base offset to the .start resource. If
not then I need to patch pci_v3.c.

Best regards,
Liviu

> 
> 	Arnd
>
Arnd Bergmann July 9, 2014, 8:31 a.m. UTC | #6
On Tuesday 08 July 2014, Liviu Dudau wrote:
> On Mon, Jul 07, 2014 at 10:22:00PM +0100, Arnd Bergmann wrote:
> > 
> > I looked at the other drivers briefly, and I think you indeed fix the Tegra
> > driver with this but break the integrator driver as mentioned above.
> > The other callers of of_pci_range_to_resource() are apparently not
> > impacted as they recalculate the values they get.
> 
> I would argue that integrator version is having broken assumptions. If it would
> try to allocate that IO range or request the resource as returned currently by
> of_pci_range_to_resource (without my patch) it would fail. I know because I did
> the same thing in my host bridge driver and it failed miserably. That's why I
> tried to patch it.

The integrator code was just introduced and the reason for how it does things
is the way that of_pci_range_to_resource() works today. We tried to cope with
it and not change the existing behavior in order to not break any other drivers.

It's certainly not fair to call the integrator version broken, it just works
around the common code having a quirky interface. We should probably have
done of_pci_range_to_resource better than it is today (I would have argued
for it to return an IORESOURCE_MEM with the CPU address), but it took long
enough to get that merged and I was sick of arguing about it.

> If the IO space is memory mapped, then we use the port number, the io_offset
> and the PCI_IOBASE to get to the virtual address that, when accessed, will
> generate the correct addresses on the bus, based on what the host bridge has
> been configured.
> 
> This is the current level of my understanding of PCI IO.

Your understanding is absolutely correct, and that's great because very few
people get that right. What I think we're really arguing about is what the
of_pci_range_to_resource is supposed to return. As you and Bjorn both pointed
out earlier, there are in fact two resources associated with the I/O window
and the flaw in the current implementation is that of_pci_range_to_resource
returns the numeric values for the IORESOURCE_MEM resource, but sets the
type to IORESOURCE_IO, which is offset from that by PCI_IOBASE.

You try to fix that by making it return the correct IORESOURCE_IO resource,
which is a reasonable approach but you must not break drivers that rely
on the broken resource while doing that.

The approach that I would have picked is to return the IORESOURCE_MEM
resource associated with the I/O window and pick a (basically random)
IORESOURCE_IO resource struct based on what hasn't been used and then
compute the appropriate io_offset from that. This approach of course
would also have required fixing up all drivers relying on the current
behavior.

To be clear, I'm fine with you (and Bjorn if he cares) picking the
approach you like here, either one of these works fine as long as the
host drivers use the interface in the way it is defined.

> Now, I believe Rob has switched entirely to using my series in some test that
> he has run and he hasn't encountered any issues, as long as one remembers in
> the host bridge driver to add the io_base offset to the .start resource. If
> not then I need to patch pci_v3.c.

The crazy part of all these discussions is that basically nobody ever uses
I/O port access, so it's very hard to test and we don't even notice when
we get it wrong, but we end up spending most of the time for PCI host controller
reviews trying to get these right.

I'm very thankful that you are doing this work to get it moved into common
code so hopefully this is the last time we ever have to worry about it because
all future drivers will be able to use the common implemnetation.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau July 9, 2014, 9:27 a.m. UTC | #7
On Wed, Jul 09, 2014 at 09:31:50AM +0100, Arnd Bergmann wrote:
> On Tuesday 08 July 2014, Liviu Dudau wrote:
> > On Mon, Jul 07, 2014 at 10:22:00PM +0100, Arnd Bergmann wrote:
> > > 
> > > I looked at the other drivers briefly, and I think you indeed fix the Tegra
> > > driver with this but break the integrator driver as mentioned above.
> > > The other callers of of_pci_range_to_resource() are apparently not
> > > impacted as they recalculate the values they get.
> > 
> > I would argue that integrator version is having broken assumptions. If it would
> > try to allocate that IO range or request the resource as returned currently by
> > of_pci_range_to_resource (without my patch) it would fail. I know because I did
> > the same thing in my host bridge driver and it failed miserably. That's why I
> > tried to patch it.
> 
> The integrator code was just introduced and the reason for how it does things
> is the way that of_pci_range_to_resource() works today. We tried to cope with
> it and not change the existing behavior in order to not break any other drivers.
> 
> It's certainly not fair to call the integrator version broken, it just works
> around the common code having a quirky interface. We should probably have
> done of_pci_range_to_resource better than it is today (I would have argued
> for it to return an IORESOURCE_MEM with the CPU address), but it took long
> enough to get that merged and I was sick of arguing about it.

Understood. That is why I have carefully worded my email as not to diss anyone.
I didn't say the code is broken, I've said it has broken assumptions.

> 
> > If the IO space is memory mapped, then we use the port number, the io_offset
> > and the PCI_IOBASE to get to the virtual address that, when accessed, will
> > generate the correct addresses on the bus, based on what the host bridge has
> > been configured.
> > 
> > This is the current level of my understanding of PCI IO.
> 
> Your understanding is absolutely correct, and that's great because very few
> people get that right. What I think we're really arguing about is what the
> of_pci_range_to_resource is supposed to return. As you and Bjorn both pointed
> out earlier, there are in fact two resources associated with the I/O window
> and the flaw in the current implementation is that of_pci_range_to_resource
> returns the numeric values for the IORESOURCE_MEM resource, but sets the
> type to IORESOURCE_IO, which is offset from that by PCI_IOBASE.
> 
> You try to fix that by making it return the correct IORESOURCE_IO resource,
> which is a reasonable approach but you must not break drivers that rely
> on the broken resource while doing that.

Or I need to fix the existing drivers that rely on the old behaviour.

> 
> The approach that I would have picked is to return the IORESOURCE_MEM
> resource associated with the I/O window and pick a (basically random)
> IORESOURCE_IO resource struct based on what hasn't been used and then
> compute the appropriate io_offset from that. This approach of course
> would also have required fixing up all drivers relying on the current
> behavior.
> 
> To be clear, I'm fine with you (and Bjorn if he cares) picking the
> approach you like here, either one of these works fine as long as the
> host drivers use the interface in the way it is defined.

OK. Thanks for that. It does look like either way some existing code needs
fixing, so I will have a look at that. Unless Bjorn votes for making a new
version of pci_range_to_resource().

> 
> > Now, I believe Rob has switched entirely to using my series in some test that
> > he has run and he hasn't encountered any issues, as long as one remembers in
> > the host bridge driver to add the io_base offset to the .start resource. If
> > not then I need to patch pci_v3.c.
> 
> The crazy part of all these discussions is that basically nobody ever uses
> I/O port access, so it's very hard to test and we don't even notice when
> we get it wrong, but we end up spending most of the time for PCI host controller
> reviews trying to get these right.
> 
> I'm very thankful that you are doing this work to get it moved into common
> code so hopefully this is the last time we ever have to worry about it because
> all future drivers will be able to use the common implemnetation.

Ahh, we humans! We always hope for the best! :)

My only chance of succeeding is if I make it a no brainer for people to use the
code. At the moment the interface for host bridge drivers is not too bad, but
it looks like the internals are still hard to comprehend.

Best regards,
Liviu

> 
> 	Arnd
>
Rob Herring July 16, 2014, 2:35 p.m. UTC | #8
On Wed, Jul 9, 2014 at 3:31 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> On Tuesday 08 July 2014, Liviu Dudau wrote:
>> On Mon, Jul 07, 2014 at 10:22:00PM +0100, Arnd Bergmann wrote:
>> >
>> > I looked at the other drivers briefly, and I think you indeed fix the Tegra
>> > driver with this but break the integrator driver as mentioned above.
>> > The other callers of of_pci_range_to_resource() are apparently not
>> > impacted as they recalculate the values they get.
>>
>> I would argue that integrator version is having broken assumptions. If it would
>> try to allocate that IO range or request the resource as returned currently by
>> of_pci_range_to_resource (without my patch) it would fail. I know because I did
>> the same thing in my host bridge driver and it failed miserably. That's why I
>> tried to patch it.
>
> The integrator code was just introduced and the reason for how it does things
> is the way that of_pci_range_to_resource() works today. We tried to cope with
> it and not change the existing behavior in order to not break any other drivers.
>
> It's certainly not fair to call the integrator version broken, it just works
> around the common code having a quirky interface. We should probably have
> done of_pci_range_to_resource better than it is today (I would have argued
> for it to return an IORESOURCE_MEM with the CPU address), but it took long
> enough to get that merged and I was sick of arguing about it.
>
>> If the IO space is memory mapped, then we use the port number, the io_offset
>> and the PCI_IOBASE to get to the virtual address that, when accessed, will
>> generate the correct addresses on the bus, based on what the host bridge has
>> been configured.
>>
>> This is the current level of my understanding of PCI IO.

What is io_offset supposed to be and be based on?

> Your understanding is absolutely correct, and that's great because very few
> people get that right. What I think we're really arguing about is what the
> of_pci_range_to_resource is supposed to return. As you and Bjorn both pointed
> out earlier, there are in fact two resources associated with the I/O window
> and the flaw in the current implementation is that of_pci_range_to_resource
> returns the numeric values for the IORESOURCE_MEM resource, but sets the
> type to IORESOURCE_IO, which is offset from that by PCI_IOBASE.
>
> You try to fix that by making it return the correct IORESOURCE_IO resource,
> which is a reasonable approach but you must not break drivers that rely
> on the broken resource while doing that.
>
> The approach that I would have picked is to return the IORESOURCE_MEM
> resource associated with the I/O window and pick a (basically random)
> IORESOURCE_IO resource struct based on what hasn't been used and then
> compute the appropriate io_offset from that. This approach of course
> would also have required fixing up all drivers relying on the current
> behavior.
>
> To be clear, I'm fine with you (and Bjorn if he cares) picking the
> approach you like here, either one of these works fine as long as the
> host drivers use the interface in the way it is defined.
>
>> Now, I believe Rob has switched entirely to using my series in some test that
>> he has run and he hasn't encountered any issues, as long as one remembers in
>> the host bridge driver to add the io_base offset to the .start resource. If
>> not then I need to patch pci_v3.c.
>
> The crazy part of all these discussions is that basically nobody ever uses
> I/O port access, so it's very hard to test and we don't even notice when
> we get it wrong, but we end up spending most of the time for PCI host controller
> reviews trying to get these right.

FWIW, I test i/o accesses with Versatile QEMU. The LSI53xxxx device in
the model has a kconfig option to use i/o accesses. However, I have
seen in the past this is an area where 2 wrongs can make a right.

Rob
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau July 16, 2014, 2:47 p.m. UTC | #9
On Wed, Jul 16, 2014 at 03:35:37PM +0100, Rob Herring wrote:
> On Wed, Jul 9, 2014 at 3:31 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> > On Tuesday 08 July 2014, Liviu Dudau wrote:
> >> On Mon, Jul 07, 2014 at 10:22:00PM +0100, Arnd Bergmann wrote:
> >> >
> >> > I looked at the other drivers briefly, and I think you indeed fix the Tegra
> >> > driver with this but break the integrator driver as mentioned above.
> >> > The other callers of of_pci_range_to_resource() are apparently not
> >> > impacted as they recalculate the values they get.
> >>
> >> I would argue that integrator version is having broken assumptions. If it would
> >> try to allocate that IO range or request the resource as returned currently by
> >> of_pci_range_to_resource (without my patch) it would fail. I know because I did
> >> the same thing in my host bridge driver and it failed miserably. That's why I
> >> tried to patch it.
> >
> > The integrator code was just introduced and the reason for how it does things
> > is the way that of_pci_range_to_resource() works today. We tried to cope with
> > it and not change the existing behavior in order to not break any other drivers.
> >
> > It's certainly not fair to call the integrator version broken, it just works
> > around the common code having a quirky interface. We should probably have
> > done of_pci_range_to_resource better than it is today (I would have argued
> > for it to return an IORESOURCE_MEM with the CPU address), but it took long
> > enough to get that merged and I was sick of arguing about it.
> >
> >> If the IO space is memory mapped, then we use the port number, the io_offset
> >> and the PCI_IOBASE to get to the virtual address that, when accessed, will
> >> generate the correct addresses on the bus, based on what the host bridge has
> >> been configured.
> >>
> >> This is the current level of my understanding of PCI IO.
> 
> What is io_offset supposed to be and be based on?

io_offset is the offset that gets applied for each host bridge to the port number
to get the offset from PCI_IOBASE. Basically, the second host bridge will have
port numbers starting from zero like the first one in the system, but the io_offset
will be >= largest port number in the first host bridge.


> 
> > Your understanding is absolutely correct, and that's great because very few
> > people get that right. What I think we're really arguing about is what the
> > of_pci_range_to_resource is supposed to return. As you and Bjorn both pointed
> > out earlier, there are in fact two resources associated with the I/O window
> > and the flaw in the current implementation is that of_pci_range_to_resource
> > returns the numeric values for the IORESOURCE_MEM resource, but sets the
> > type to IORESOURCE_IO, which is offset from that by PCI_IOBASE.
> >
> > You try to fix that by making it return the correct IORESOURCE_IO resource,
> > which is a reasonable approach but you must not break drivers that rely
> > on the broken resource while doing that.
> >
> > The approach that I would have picked is to return the IORESOURCE_MEM
> > resource associated with the I/O window and pick a (basically random)
> > IORESOURCE_IO resource struct based on what hasn't been used and then
> > compute the appropriate io_offset from that. This approach of course
> > would also have required fixing up all drivers relying on the current
> > behavior.
> >
> > To be clear, I'm fine with you (and Bjorn if he cares) picking the
> > approach you like here, either one of these works fine as long as the
> > host drivers use the interface in the way it is defined.
> >
> >> Now, I believe Rob has switched entirely to using my series in some test that
> >> he has run and he hasn't encountered any issues, as long as one remembers in
> >> the host bridge driver to add the io_base offset to the .start resource. If
> >> not then I need to patch pci_v3.c.
> >
> > The crazy part of all these discussions is that basically nobody ever uses
> > I/O port access, so it's very hard to test and we don't even notice when
> > we get it wrong, but we end up spending most of the time for PCI host controller
> > reviews trying to get these right.
> 
> FWIW, I test i/o accesses with Versatile QEMU. The LSI53xxxx device in
> the model has a kconfig option to use i/o accesses. However, I have
> seen in the past this is an area where 2 wrongs can make a right.

:)

Best regards,
Liviu

> 
> Rob
>
Arnd Bergmann July 16, 2014, 2:47 p.m. UTC | #10
On Wednesday 16 July 2014 09:35:37 Rob Herring wrote:
> On Wed, Jul 9, 2014 at 3:31 AM, Arnd Bergmann <arnd@arndb.de> wrote:
> > On Tuesday 08 July 2014, Liviu Dudau wrote:
> >> On Mon, Jul 07, 2014 at 10:22:00PM +0100, Arnd Bergmann wrote:
> >> >
> >> > I looked at the other drivers briefly, and I think you indeed fix the Tegra
> >> > driver with this but break the integrator driver as mentioned above.
> >> > The other callers of of_pci_range_to_resource() are apparently not
> >> > impacted as they recalculate the values they get.
> >>
> >> I would argue that integrator version is having broken assumptions. If it would
> >> try to allocate that IO range or request the resource as returned currently by
> >> of_pci_range_to_resource (without my patch) it would fail. I know because I did
> >> the same thing in my host bridge driver and it failed miserably. That's why I
> >> tried to patch it.
> >
> > The integrator code was just introduced and the reason for how it does things
> > is the way that of_pci_range_to_resource() works today. We tried to cope with
> > it and not change the existing behavior in order to not break any other drivers.
> >
> > It's certainly not fair to call the integrator version broken, it just works
> > around the common code having a quirky interface. We should probably have
> > done of_pci_range_to_resource better than it is today (I would have argued
> > for it to return an IORESOURCE_MEM with the CPU address), but it took long
> > enough to get that merged and I was sick of arguing about it.
> >
> >> If the IO space is memory mapped, then we use the port number, the io_offset
> >> and the PCI_IOBASE to get to the virtual address that, when accessed, will
> >> generate the correct addresses on the bus, based on what the host bridge has
> >> been configured.
> >>
> >> This is the current level of my understanding of PCI IO.
> 
> What is io_offset supposed to be and be based on?

(you probably know most of this, but I'll explain it the long way
to avoid ambiguity).

io_offset is a concept used internally to translate bus-specific I/O port
numbers into Linux-global ports.

A simple example would be having two PCI host bridges each with a
(hardware) port range from 0 to 0xffff. These numbers are programmed
into "BARs" in PCI device config space and they are used on the physical
address lines in PCI or in the packet header on PCIe.

In Linux, we have a single logical port range that is seen by device
drivers, in the example the first host bridge would use ports 0-0xfffff
and the second one would use ports 0x10000-0x1ffff.

The PCI core uses the io_offset to translate between the two address
spaces when it does the resource allocation during bus probe, so a device
that gets Linux I/O port 0x10100 has its BAR programmed with 0x100 and
the struct resource filled as 0x10000.

When a PCI host bridge driver registers its root bus with the PCI core,
it passes the io_offset using the last argument to pci_add_resource_offset()
along with the Linux I/O port resource, so in the example the first
io_offset is zero, while the second one is 0x10000.

Note that there is no requirement for the I/O port range on the bus to
start at zero, and you can even have negative io_offset values to
deal with that, but this is the exception.

> >> Now, I believe Rob has switched entirely to using my series in some test that
> >> he has run and he hasn't encountered any issues, as long as one remembers in
> >> the host bridge driver to add the io_base offset to the .start resource. If
> >> not then I need to patch pci_v3.c.
> >
> > The crazy part of all these discussions is that basically nobody ever uses
> > I/O port access, so it's very hard to test and we don't even notice when
> > we get it wrong, but we end up spending most of the time for PCI host controller
> > reviews trying to get these right.
> 
> FWIW, I test i/o accesses with Versatile QEMU. The LSI53xxxx device in
> the model has a kconfig option to use i/o accesses. However, I have
> seen in the past this is an area where 2 wrongs can make a right.

Can you point me to a git tree with your kernel and dts?

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 1345733..cbbaed2 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -872,3 +872,50 @@  bool of_dma_is_coherent(struct device_node *np)
 	return false;
 }
 EXPORT_SYMBOL_GPL(of_dma_is_coherent);
+
+/*
+ * of_pci_range_to_resource - Create a resource from an of_pci_range
+ * @range:	the PCI range that describes the resource
+ * @np:		device node where the range belongs to
+ * @res:	pointer to a valid resource that will be updated to
+ *              reflect the values contained in the range.
+ *
+ * Returns EINVAL if the range cannot be converted to resource.
+ *
+ * Note that if the range is an IO range, the resource will be converted
+ * using pci_address_to_pio() which can fail if it is called too early or
+ * if the range cannot be matched to any host bridge IO space (our case here).
+ * To guard against that we try to register the IO range first.
+ * If that fails we know that pci_address_to_pio() will do too.
+ */
+int of_pci_range_to_resource(struct of_pci_range *range,
+	struct device_node *np, struct resource *res)
+{
+	int err;
+	res->flags = range->flags;
+	res->parent = res->child = res->sibling = NULL;
+	res->name = np->full_name;
+
+	if (res->flags & IORESOURCE_IO) {
+		unsigned long port = -1;
+		err = pci_register_io_range(range->cpu_addr, range->size);
+		if (err)
+			goto invalid_range;
+		port = pci_address_to_pio(range->cpu_addr);
+		if (port == (unsigned long)-1) {
+			err = -EINVAL;
+			goto invalid_range;
+		}
+		res->start = port;
+	} else {
+		res->start = range->cpu_addr;
+	}
+	res->end = res->start + range->size - 1;
+	return 0;
+
+invalid_range:
+	res->start = (resource_size_t)OF_BAD_ADDR;
+	res->end = (resource_size_t)OF_BAD_ADDR;
+	return err;
+}
+
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index ac4aac4..33c0420 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -23,17 +23,8 @@  struct of_pci_range {
 #define for_each_of_pci_range(parser, range) \
 	for (; of_pci_range_parser_one(parser, range);)
 
-static inline void of_pci_range_to_resource(struct of_pci_range *range,
-					    struct device_node *np,
-					    struct resource *res)
-{
-	res->flags = range->flags;
-	res->start = range->cpu_addr;
-	res->end = range->cpu_addr + range->size - 1;
-	res->parent = res->child = res->sibling = NULL;
-	res->name = np->full_name;
-}
-
+extern int of_pci_range_to_resource(struct of_pci_range *range,
+		struct device_node *np, struct resource *res);
 /* Translate a DMA address from device space to CPU space */
 extern u64 of_translate_dma_address(struct device_node *dev,
 				    const __be32 *in_addr);