diff mbox

[v3,2/5] PCI: Take bridge window alignment into account when distributing resources

Message ID 20180226132112.81447-3-mika.westerberg@linux.intel.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Mika Westerberg Feb. 26, 2018, 1:21 p.m. UTC
When connecting a Thunderbolt endpoint which itself has internal PCIe
switch (not the integrated one) the way we currently distribute
resources does not always work well because devices below non-hotplug
bridges might need to have their MMIO resources aligned to something
else than 1 MB boundary. As an example connecting a server grade 4-port
GbE add in card adds another PCIe switch leading to GbE devices that
want to have their MMIO resources aligned to 2 MB boundary instead.

Current resource distribution code does not take this alignment into
account and might try to add too much resources for the extension
hotplug bridges. The resulting bridge window is then too big which makes
Linux to re-allocate minimal amount of resources, making future
extension impossible.

Make this work better by substracting properly aligned non-hotplug
downstream bridge window size from the remaining resources.

Fixes: 1a5767725cec ("PCI: Distribute available resources to hotplug-capable bridges")
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: stable@vger.kernel.org
---
 drivers/pci/setup-bus.c | 41 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

Comments

Bjorn Helgaas March 27, 2018, 7:23 p.m. UTC | #1
On Mon, Feb 26, 2018 at 04:21:09PM +0300, Mika Westerberg wrote:
> When connecting a Thunderbolt endpoint which itself has internal PCIe
> switch (not the integrated one) the way we currently distribute

I don't understand this distinction between "internal PCIe switch" and
"integrated one".

From a PCI point of view, I assume you're adding a switch with 4 NIC
endpoints below it.

> resources does not always work well because devices below non-hotplug
> bridges might need to have their MMIO resources aligned to something
> else than 1 MB boundary. As an example connecting a server grade 4-port
> GbE add in card adds another PCIe switch leading to GbE devices that
> want to have their MMIO resources aligned to 2 MB boundary instead.

Is there something unique about Thunderbolt here?  I assume from a PCI
point of view, these NIC ports have BARs that are 2 MB or larger and
hence need 2 MB or larger alignment?

> Current resource distribution code does not take this alignment into
> account and might try to add too much resources for the extension
> hotplug bridges. The resulting bridge window is then too big which makes
> Linux to re-allocate minimal amount of resources, making future
> extension impossible.

Maybe a concrete example would make this clearer to me.

> Make this work better by substracting properly aligned non-hotplug
> downstream bridge window size from the remaining resources.
> 
> Fixes: 1a5767725cec ("PCI: Distribute available resources to hotplug-capable bridges")
> Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> Cc: stable@vger.kernel.org
> ---
>  drivers/pci/setup-bus.c | 41 ++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 40 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
> index 3cce29a069e6..f1e6172734f0 100644
> --- a/drivers/pci/setup-bus.c
> +++ b/drivers/pci/setup-bus.c
> @@ -1882,6 +1882,7 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
>  	resource_size_t available_mmio, resource_size_t available_mmio_pref)
>  {
>  	resource_size_t remaining_io, remaining_mmio, remaining_mmio_pref;
> +	resource_size_t io_start, mmio_start, mmio_pref_start;
>  	unsigned int normal_bridges = 0, hotplug_bridges = 0;
>  	struct resource *io_res, *mmio_res, *mmio_pref_res;
>  	struct pci_dev *dev, *bridge = bus->self;
> @@ -1946,11 +1947,16 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
>  			remaining_mmio_pref -= resource_size(res);
>  	}
>  
> +	io_start = io_res->start;
> +	mmio_start = mmio_res->start;
> +	mmio_pref_start = mmio_pref_res->start;
> +
>  	/*
>  	 * Go over devices on this bus and distribute the remaining
>  	 * resource space between hotplug bridges.
>  	 */
>  	for_each_pci_bridge(dev, bus) {
> +		resource_size_t align;
>  		struct pci_bus *b;
>  
>  		b = dev->subordinate;
> @@ -1968,7 +1974,7 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
>  				available_io, available_mmio,
>  				available_mmio_pref);
>  		} else if (dev->is_hotplug_bridge) {
> -			resource_size_t align, io, mmio, mmio_pref;
> +			resource_size_t io, mmio, mmio_pref;
>  
>  			/*
>  			 * Distribute available extra resources equally
> @@ -1981,11 +1987,13 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
>  			io = div64_ul(available_io, hotplug_bridges);
>  			io = min(ALIGN(io, align), remaining_io);
>  			remaining_io -= io;
> +			io_start += io;
>  
>  			align = pci_resource_alignment(bridge, mmio_res);
>  			mmio = div64_ul(available_mmio, hotplug_bridges);
>  			mmio = min(ALIGN(mmio, align), remaining_mmio);
>  			remaining_mmio -= mmio;
> +			mmio_start += mmio;
>  
>  			align = pci_resource_alignment(bridge, mmio_pref_res);
>  			mmio_pref = div64_ul(available_mmio_pref,
> @@ -1993,9 +2001,40 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
>  			mmio_pref = min(ALIGN(mmio_pref, align),
>  					remaining_mmio_pref);
>  			remaining_mmio_pref -= mmio_pref;
> +			mmio_pref_start += mmio_pref;
>  
>  			pci_bus_distribute_available_resources(b, add_list, io,
>  							       mmio, mmio_pref);
> +		} else {
> +			/*
> +			 * For normal bridges, track start of the parent
> +			 * bridge window to make sure we align the
> +			 * remaining space which is distributed to the
> +			 * hotplug bridges properly.
> +			 */
> +			resource_size_t aligned;
> +			struct resource *res;
> +
> +			res = &dev->resource[PCI_BRIDGE_RESOURCES + 0];
> +			io_start += resource_size(res);
> +			aligned = ALIGN(io_start,
> +					pci_resource_alignment(dev, res));
> +			if (aligned > io_start)
> +				remaining_io -= aligned - io_start;
> +
> +			res = &dev->resource[PCI_BRIDGE_RESOURCES + 1];
> +			mmio_start += resource_size(res);
> +			aligned = ALIGN(mmio_start,
> +					pci_resource_alignment(dev, res));
> +			if (aligned > mmio_start)
> +				remaining_mmio -= aligned - mmio_start;
> +
> +			res = &dev->resource[PCI_BRIDGE_RESOURCES + 2];
> +			mmio_pref_start += resource_size(res);
> +			aligned = ALIGN(mmio_pref_start,
> +					pci_resource_alignment(dev, res));
> +			if (aligned > mmio_pref_start)
> +				remaining_mmio_pref -= aligned - mmio_pref_start;

I don't understand how this can work.  You have this:

  for_each_pci_bridge(dev, bus) {
    if (!hotplug_bridges && normal_bridges == 1) {
      pci_bus_distribute_available_resources(...);   # block 1
    } else if (dev->is_hotplug_bridge) {
      ...                                            # block 2
      pci_bus_distribute_available_resources(...);
    } else {
      ...                                            # block 3
    }
  }

This patch adds block 3.  Block 3 doesn't do anything with side
effects, i.e., it only updates local variables like io_start,
remaining_io, mmio_start, remaining_mmio, etc.

Those may be used in subsequent iterations, but it's only useful if
there *are* subsequent iterations, so this is dependent on the order
we iterate through the bridges.  It seems like we want the same
results no matter what the order.

>  		}
>  	}
>  }
> -- 
> 2.16.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mika Westerberg March 28, 2018, 12:01 p.m. UTC | #2
On Tue, Mar 27, 2018 at 02:23:18PM -0500, Bjorn Helgaas wrote:
> On Mon, Feb 26, 2018 at 04:21:09PM +0300, Mika Westerberg wrote:
> > When connecting a Thunderbolt endpoint which itself has internal PCIe
> > switch (not the integrated one) the way we currently distribute
> 
> I don't understand this distinction between "internal PCIe switch" and
> "integrated one".

I tried to explain here that Thunderbolt endpoint itself has an internal
(integrated) PCIe switch.

> >From a PCI point of view, I assume you're adding a switch with 4 NIC
> endpoints below it.

Yes, exactly.

The PCIe device in question is Intel Gigabit ET2 Quad Port Server
adapter.

> > resources does not always work well because devices below non-hotplug
> > bridges might need to have their MMIO resources aligned to something
> > else than 1 MB boundary. As an example connecting a server grade 4-port
> > GbE add in card adds another PCIe switch leading to GbE devices that
> > want to have their MMIO resources aligned to 2 MB boundary instead.
> 
> Is there something unique about Thunderbolt here?  I assume from a PCI
> point of view, these NIC ports have BARs that are 2 MB or larger and
> hence need 2 MB or larger alignment?

No, this is not unique to Thunderbolt in any way.

> > Current resource distribution code does not take this alignment into
> > account and might try to add too much resources for the extension
> > hotplug bridges. The resulting bridge window is then too big which makes
> > Linux to re-allocate minimal amount of resources, making future
> > extension impossible.
> 
> Maybe a concrete example would make this clearer to me.

OK.

> > Make this work better by substracting properly aligned non-hotplug
> > downstream bridge window size from the remaining resources.
> > 
> > Fixes: 1a5767725cec ("PCI: Distribute available resources to hotplug-capable bridges")
> > Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
> > Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > Cc: stable@vger.kernel.org
> > ---
> >  drivers/pci/setup-bus.c | 41 ++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 40 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
> > index 3cce29a069e6..f1e6172734f0 100644
> > --- a/drivers/pci/setup-bus.c
> > +++ b/drivers/pci/setup-bus.c
> > @@ -1882,6 +1882,7 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> >  	resource_size_t available_mmio, resource_size_t available_mmio_pref)
> >  {
> >  	resource_size_t remaining_io, remaining_mmio, remaining_mmio_pref;
> > +	resource_size_t io_start, mmio_start, mmio_pref_start;
> >  	unsigned int normal_bridges = 0, hotplug_bridges = 0;
> >  	struct resource *io_res, *mmio_res, *mmio_pref_res;
> >  	struct pci_dev *dev, *bridge = bus->self;
> > @@ -1946,11 +1947,16 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> >  			remaining_mmio_pref -= resource_size(res);
> >  	}
> >  
> > +	io_start = io_res->start;
> > +	mmio_start = mmio_res->start;
> > +	mmio_pref_start = mmio_pref_res->start;
> > +
> >  	/*
> >  	 * Go over devices on this bus and distribute the remaining
> >  	 * resource space between hotplug bridges.
> >  	 */
> >  	for_each_pci_bridge(dev, bus) {
> > +		resource_size_t align;
> >  		struct pci_bus *b;
> >  
> >  		b = dev->subordinate;
> > @@ -1968,7 +1974,7 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> >  				available_io, available_mmio,
> >  				available_mmio_pref);
> >  		} else if (dev->is_hotplug_bridge) {
> > -			resource_size_t align, io, mmio, mmio_pref;
> > +			resource_size_t io, mmio, mmio_pref;
> >  
> >  			/*
> >  			 * Distribute available extra resources equally
> > @@ -1981,11 +1987,13 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> >  			io = div64_ul(available_io, hotplug_bridges);
> >  			io = min(ALIGN(io, align), remaining_io);
> >  			remaining_io -= io;
> > +			io_start += io;
> >  
> >  			align = pci_resource_alignment(bridge, mmio_res);
> >  			mmio = div64_ul(available_mmio, hotplug_bridges);
> >  			mmio = min(ALIGN(mmio, align), remaining_mmio);
> >  			remaining_mmio -= mmio;
> > +			mmio_start += mmio;
> >  
> >  			align = pci_resource_alignment(bridge, mmio_pref_res);
> >  			mmio_pref = div64_ul(available_mmio_pref,
> > @@ -1993,9 +2001,40 @@ static void pci_bus_distribute_available_resources(struct pci_bus *bus,
> >  			mmio_pref = min(ALIGN(mmio_pref, align),
> >  					remaining_mmio_pref);
> >  			remaining_mmio_pref -= mmio_pref;
> > +			mmio_pref_start += mmio_pref;
> >  
> >  			pci_bus_distribute_available_resources(b, add_list, io,
> >  							       mmio, mmio_pref);
> > +		} else {
> > +			/*
> > +			 * For normal bridges, track start of the parent
> > +			 * bridge window to make sure we align the
> > +			 * remaining space which is distributed to the
> > +			 * hotplug bridges properly.
> > +			 */
> > +			resource_size_t aligned;
> > +			struct resource *res;
> > +
> > +			res = &dev->resource[PCI_BRIDGE_RESOURCES + 0];
> > +			io_start += resource_size(res);
> > +			aligned = ALIGN(io_start,
> > +					pci_resource_alignment(dev, res));
> > +			if (aligned > io_start)
> > +				remaining_io -= aligned - io_start;
> > +
> > +			res = &dev->resource[PCI_BRIDGE_RESOURCES + 1];
> > +			mmio_start += resource_size(res);
> > +			aligned = ALIGN(mmio_start,
> > +					pci_resource_alignment(dev, res));
> > +			if (aligned > mmio_start)
> > +				remaining_mmio -= aligned - mmio_start;
> > +
> > +			res = &dev->resource[PCI_BRIDGE_RESOURCES + 2];
> > +			mmio_pref_start += resource_size(res);
> > +			aligned = ALIGN(mmio_pref_start,
> > +					pci_resource_alignment(dev, res));
> > +			if (aligned > mmio_pref_start)
> > +				remaining_mmio_pref -= aligned - mmio_pref_start;
> 
> I don't understand how this can work.  You have this:
> 
>   for_each_pci_bridge(dev, bus) {
>     if (!hotplug_bridges && normal_bridges == 1) {
>       pci_bus_distribute_available_resources(...);   # block 1
>     } else if (dev->is_hotplug_bridge) {
>       ...                                            # block 2
>       pci_bus_distribute_available_resources(...);
>     } else {
>       ...                                            # block 3
>     }
>   }
> 
> This patch adds block 3.  Block 3 doesn't do anything with side
> effects, i.e., it only updates local variables like io_start,
> remaining_io, mmio_start, remaining_mmio, etc.

It updates remaining_* so that the next bridge resource is aligned
properly (if there is going to be one).

> Those may be used in subsequent iterations, but it's only useful if
> there *are* subsequent iterations, so this is dependent on the order
> we iterate through the bridges.  It seems like we want the same
> results no matter what the order.

Here again we walk over the bridges using recursive calls so we need to
know upfront the alignment before we do next call to
pci_bus_distribute_available_resources() and that information is only
available when previous bridge is already handled. I suppose this can be
done in a better way but unfortunately I have no idea how :-( Can you
suggest something conrete I could do instead if this is not sufficient?
Bjorn Helgaas March 29, 2018, 9:29 p.m. UTC | #3
On Wed, Mar 28, 2018 at 03:01:20PM +0300, Mika Westerberg wrote:
> On Tue, Mar 27, 2018 at 02:23:18PM -0500, Bjorn Helgaas wrote:
> > On Mon, Feb 26, 2018 at 04:21:09PM +0300, Mika Westerberg wrote:
> > > When connecting a Thunderbolt endpoint which itself has internal PCIe
> > > switch (not the integrated one) the way we currently distribute
> > 
> > I don't understand this distinction between "internal PCIe switch" and
> > "integrated one".
> 
> I tried to explain here that Thunderbolt endpoint itself has an internal
> (integrated) PCIe switch.

I was confused by this because I thought you meant "Endpoint" in the
sense used in the PCIe spec, i.e., something with a type 0 config
header.  But you're using "endpoint" to mean "some collection of
devices with a Thunderbolt interface on one side".

> > >From a PCI point of view, I assume you're adding a switch with 4 NIC
> > endpoints below it.
> 
> Yes, exactly.
> 
> The PCIe device in question is Intel Gigabit ET2 Quad Port Server
> adapter.
> 
> > > resources does not always work well because devices below
> > > non-hotplug bridges might need to have their MMIO resources
> > > aligned to something else than 1 MB boundary. As an example
> > > connecting a server grade 4-port GbE add in card adds another
> > > PCIe switch leading to GbE devices that want to have their MMIO
> > > resources aligned to 2 MB boundary instead.
> > 
> > Is there something unique about Thunderbolt here?  I assume from a
> > PCI point of view, these NIC ports have BARs that are 2 MB or
> > larger and hence need 2 MB or larger alignment?
> 
> No, this is not unique to Thunderbolt in any way.

I think this is a case of confusing things by mentioning irrelevant
details, e.g., leading off with "When connecting a Thunderbolt
endpoint" when the problem can occur when hot-adding any PCIe device
(or maybe only when adding a bridge?  I don't know).

It's good to mention a specific example, but I think it's best if you
can describe the situation as generically as possible first, then cite
the case at hand in a way that it's obvious this is just one of many
possibilities.

> > I don't understand how this can work.  You have this:
> > 
> >   for_each_pci_bridge(dev, bus) {
> >     if (!hotplug_bridges && normal_bridges == 1) {
> >       pci_bus_distribute_available_resources(...);   # block 1
> >     } else if (dev->is_hotplug_bridge) {
> >       ...                                            # block 2
> >       pci_bus_distribute_available_resources(...);
> >     } else {
> >       ...                                            # block 3
> >     }
> >   }
> > 
> > This patch adds block 3.  Block 3 doesn't do anything with side
> > effects, i.e., it only updates local variables like io_start,
> > remaining_io, mmio_start, remaining_mmio, etc.
> 
> It updates remaining_* so that the next bridge resource is aligned
> properly (if there is going to be one).

What do you mean by "aligned properly"?  The spec requires that bridge
memory windows be 1MB aligned and I/O windows 4KB aligned.  Does the
current code do that wrong?

> > Those may be used in subsequent iterations, but it's only useful
> > if there *are* subsequent iterations, so this is dependent on the
> > order we iterate through the bridges.  It seems like we want the
> > same results no matter what the order.
> 
> Here again we walk over the bridges using recursive calls so we need
> to know upfront the alignment before we do next call to
> pci_bus_distribute_available_resources() and that information is
> only available when previous bridge is already handled. I suppose
> this can be done in a better way but unfortunately I have no idea
> how :-( Can you suggest something concrete I could do instead if
> this is not sufficient?

I don't understand this well enough to have any great suggestions.

I did notice that the "!hotplug_bridges && normal_bridges == 1" case
doesn't look like it needs to be in the for_each_pci_bridge() loop
because there's only one bridge (I think "hotplug_bridges == 0 &&
normal_bridges == 1" would be slightly easier to read).

And I suspect you might want to handle the "hotplug_bridges == 1 &&
normal_bridges == 0" case identically.  That would mean you could pull
these "list_is_singular()" cases out of the loop, which makes sense
because if there's only one bridge, it should get everything.
Mika Westerberg March 31, 2018, 9:18 a.m. UTC | #4
On Thu, Mar 29, 2018 at 04:29:21PM -0500, Bjorn Helgaas wrote:
> On Wed, Mar 28, 2018 at 03:01:20PM +0300, Mika Westerberg wrote:
> > On Tue, Mar 27, 2018 at 02:23:18PM -0500, Bjorn Helgaas wrote:
> > > On Mon, Feb 26, 2018 at 04:21:09PM +0300, Mika Westerberg wrote:
> > > > When connecting a Thunderbolt endpoint which itself has internal PCIe
> > > > switch (not the integrated one) the way we currently distribute
> > > 
> > > I don't understand this distinction between "internal PCIe switch" and
> > > "integrated one".
> > 
> > I tried to explain here that Thunderbolt endpoint itself has an internal
> > (integrated) PCIe switch.
> 
> I was confused by this because I thought you meant "Endpoint" in the
> sense used in the PCIe spec, i.e., something with a type 0 config
> header.  But you're using "endpoint" to mean "some collection of
> devices with a Thunderbolt interface on one side".

Yeah, I think the official term is "Thunderbolt endpoint" but I admit
it is confusing here.

> > > >From a PCI point of view, I assume you're adding a switch with 4 NIC
> > > endpoints below it.
> > 
> > Yes, exactly.
> > 
> > The PCIe device in question is Intel Gigabit ET2 Quad Port Server
> > adapter.
> > 
> > > > resources does not always work well because devices below
> > > > non-hotplug bridges might need to have their MMIO resources
> > > > aligned to something else than 1 MB boundary. As an example
> > > > connecting a server grade 4-port GbE add in card adds another
> > > > PCIe switch leading to GbE devices that want to have their MMIO
> > > > resources aligned to 2 MB boundary instead.
> > > 
> > > Is there something unique about Thunderbolt here?  I assume from a
> > > PCI point of view, these NIC ports have BARs that are 2 MB or
> > > larger and hence need 2 MB or larger alignment?
> > 
> > No, this is not unique to Thunderbolt in any way.
> 
> I think this is a case of confusing things by mentioning irrelevant
> details, e.g., leading off with "When connecting a Thunderbolt
> endpoint" when the problem can occur when hot-adding any PCIe device
> (or maybe only when adding a bridge?  I don't know).
> 
> It's good to mention a specific example, but I think it's best if you
> can describe the situation as generically as possible first, then cite
> the case at hand in a way that it's obvious this is just one of many
> possibilities.

OK, I'll rework the changelog accordingly.

> > > I don't understand how this can work.  You have this:
> > > 
> > >   for_each_pci_bridge(dev, bus) {
> > >     if (!hotplug_bridges && normal_bridges == 1) {
> > >       pci_bus_distribute_available_resources(...);   # block 1
> > >     } else if (dev->is_hotplug_bridge) {
> > >       ...                                            # block 2
> > >       pci_bus_distribute_available_resources(...);
> > >     } else {
> > >       ...                                            # block 3
> > >     }
> > >   }
> > > 
> > > This patch adds block 3.  Block 3 doesn't do anything with side
> > > effects, i.e., it only updates local variables like io_start,
> > > remaining_io, mmio_start, remaining_mmio, etc.
> > 
> > It updates remaining_* so that the next bridge resource is aligned
> > properly (if there is going to be one).
> 
> What do you mean by "aligned properly"?  The spec requires that bridge
> memory windows be 1MB aligned and I/O windows 4KB aligned.  Does the
> current code do that wrong?

Yes it does. I'll put this to the changelog but in short here are
the two bridges where you can see the alignment:

  pci 0000:39:00.0: BAR 14: assigned [mem 0x53300000-0x6a0fffff]
                                          ^^^^^^^^^^
  pci 0000:3a:01.0: BAR 14: assigned [mem 0x53400000-0x547fffff]
                                          ^^^^^^^^^^

The 3a:01.0 (downstream port) is behind 39:00.0 (upstream port) and
leads to the other PCIe switch with 4 NICs. The starting address is
aligned to 2 MB instead of 1 MB. Next we try to assign remaining
resources to the hotplug downstream port 3a:04.0:

   pci 0000:3a:04.0: BAR 14: no space for [mem size 0x15a00000]
   pci 0000:3a:04.0: BAR 14: failed to assign [mem size 0x15a00000]

but since we calculated the remaining MMIO space (0x15a00000) without
taking this alignment into account it does not fit to the [mem
0x53300000-0x6a0fffff] bridge window:

  start = 0x54800000
  end = 0x54800000 + 0x15a00000 - 1 = 0x6a1fffff

And 0x6a1fffff > 0x6a0fffff which leads to the failure.

> > > Those may be used in subsequent iterations, but it's only useful
> > > if there *are* subsequent iterations, so this is dependent on the
> > > order we iterate through the bridges.  It seems like we want the
> > > same results no matter what the order.
> > 
> > Here again we walk over the bridges using recursive calls so we need
> > to know upfront the alignment before we do next call to
> > pci_bus_distribute_available_resources() and that information is
> > only available when previous bridge is already handled. I suppose
> > this can be done in a better way but unfortunately I have no idea
> > how :-( Can you suggest something concrete I could do instead if
> > this is not sufficient?
> 
> I don't understand this well enough to have any great suggestions.

OK, thanks anyway.

> I did notice that the "!hotplug_bridges && normal_bridges == 1" case
> doesn't look like it needs to be in the for_each_pci_bridge() loop
> because there's only one bridge (I think "hotplug_bridges == 0 &&
> normal_bridges == 1" would be slightly easier to read).
> 
> And I suspect you might want to handle the "hotplug_bridges == 1 &&
> normal_bridges == 0" case identically.  That would mean you could pull
> these "list_is_singular()" cases out of the loop, which makes sense
> because if there's only one bridge, it should get everything.

That's a good idea. I'll do that in the next revision.
diff mbox

Patch

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 3cce29a069e6..f1e6172734f0 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1882,6 +1882,7 @@  static void pci_bus_distribute_available_resources(struct pci_bus *bus,
 	resource_size_t available_mmio, resource_size_t available_mmio_pref)
 {
 	resource_size_t remaining_io, remaining_mmio, remaining_mmio_pref;
+	resource_size_t io_start, mmio_start, mmio_pref_start;
 	unsigned int normal_bridges = 0, hotplug_bridges = 0;
 	struct resource *io_res, *mmio_res, *mmio_pref_res;
 	struct pci_dev *dev, *bridge = bus->self;
@@ -1946,11 +1947,16 @@  static void pci_bus_distribute_available_resources(struct pci_bus *bus,
 			remaining_mmio_pref -= resource_size(res);
 	}
 
+	io_start = io_res->start;
+	mmio_start = mmio_res->start;
+	mmio_pref_start = mmio_pref_res->start;
+
 	/*
 	 * Go over devices on this bus and distribute the remaining
 	 * resource space between hotplug bridges.
 	 */
 	for_each_pci_bridge(dev, bus) {
+		resource_size_t align;
 		struct pci_bus *b;
 
 		b = dev->subordinate;
@@ -1968,7 +1974,7 @@  static void pci_bus_distribute_available_resources(struct pci_bus *bus,
 				available_io, available_mmio,
 				available_mmio_pref);
 		} else if (dev->is_hotplug_bridge) {
-			resource_size_t align, io, mmio, mmio_pref;
+			resource_size_t io, mmio, mmio_pref;
 
 			/*
 			 * Distribute available extra resources equally
@@ -1981,11 +1987,13 @@  static void pci_bus_distribute_available_resources(struct pci_bus *bus,
 			io = div64_ul(available_io, hotplug_bridges);
 			io = min(ALIGN(io, align), remaining_io);
 			remaining_io -= io;
+			io_start += io;
 
 			align = pci_resource_alignment(bridge, mmio_res);
 			mmio = div64_ul(available_mmio, hotplug_bridges);
 			mmio = min(ALIGN(mmio, align), remaining_mmio);
 			remaining_mmio -= mmio;
+			mmio_start += mmio;
 
 			align = pci_resource_alignment(bridge, mmio_pref_res);
 			mmio_pref = div64_ul(available_mmio_pref,
@@ -1993,9 +2001,40 @@  static void pci_bus_distribute_available_resources(struct pci_bus *bus,
 			mmio_pref = min(ALIGN(mmio_pref, align),
 					remaining_mmio_pref);
 			remaining_mmio_pref -= mmio_pref;
+			mmio_pref_start += mmio_pref;
 
 			pci_bus_distribute_available_resources(b, add_list, io,
 							       mmio, mmio_pref);
+		} else {
+			/*
+			 * For normal bridges, track start of the parent
+			 * bridge window to make sure we align the
+			 * remaining space which is distributed to the
+			 * hotplug bridges properly.
+			 */
+			resource_size_t aligned;
+			struct resource *res;
+
+			res = &dev->resource[PCI_BRIDGE_RESOURCES + 0];
+			io_start += resource_size(res);
+			aligned = ALIGN(io_start,
+					pci_resource_alignment(dev, res));
+			if (aligned > io_start)
+				remaining_io -= aligned - io_start;
+
+			res = &dev->resource[PCI_BRIDGE_RESOURCES + 1];
+			mmio_start += resource_size(res);
+			aligned = ALIGN(mmio_start,
+					pci_resource_alignment(dev, res));
+			if (aligned > mmio_start)
+				remaining_mmio -= aligned - mmio_start;
+
+			res = &dev->resource[PCI_BRIDGE_RESOURCES + 2];
+			mmio_pref_start += resource_size(res);
+			aligned = ALIGN(mmio_pref_start,
+					pci_resource_alignment(dev, res));
+			if (aligned > mmio_pref_start)
+				remaining_mmio_pref -= aligned - mmio_pref_start;
 		}
 	}
 }