diff mbox series

[2/3] PCI: Introduce a minimizing assignment algorithm

Message ID 20200928010609.5375-3-jonathan.derrick@intel.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show
Series PCI: Minimizing resource assignment algorithm | expand

Commit Message

Jon Derrick Sept. 28, 2020, 1:06 a.m. UTC
Some PCI domains have limited resources that get exhausted by hotplug
resource domains. VMD subdevice domains, for example, tend to support
only 32MB MMIO, of which the decoable address space is split between
prefetchable and non-prefetchable windows using existing resource
assignment algorithms. In addition to these limitations, hotplug bridges
require additional resource reservations as specified by default or
module parameters "pci=hp{io,mmio,mmiopref}size, further exhausting the
domain resources prior to full domain assignment.

Introduce a minimizing assignment algorithm which starts with the
default or user-requested hotplug resource values, tries with minimal
hotplug resource values, and lastly tries no hotplug resource values.

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
---
 drivers/pci/setup-bus.c | 98 ++++++++++++++++++++++++++++++++++++++---
 include/linux/pci.h     |  2 +
 2 files changed, 95 insertions(+), 5 deletions(-)

Comments

Christoph Hellwig Sept. 28, 2020, 7:17 a.m. UTC | #1
Please keep this code in VMD if we really have to do it (although I'd
be perfectly fine to let people dumb enough to enable VMD devices to
live with the problems).  You are adding lots of code that gets
copiled into every Linux kernel that supports PCI jut to work around
a copletely idiotic invention from Intel that makes life painful for
us for no good reason.
Jon Derrick Sept. 28, 2020, 1:34 p.m. UTC | #2
Hi Christoph,

Thanks for your valuable feedback as always

On Mon, 2020-09-28 at 08:17 +0100, Christoph Hellwig wrote:
> Please keep this code in VMD if we really have to do it (although I'd
> be perfectly fine to let people dumb enough to enable VMD devices to
> live with the problems).
Great! Sounds like you're more open to us working openly within vmd.c
then?

>   You are adding lots of code that gets
> copiled into every Linux kernel that supports PCI jut to work around
> a copletely idiotic invention from Intel that makes life painful for
> us for no good reason.
Well this fix in particular may not be needed once the dynamic hotplug
resource resizing set is in and build on that. But frankly the generic
resource assignment code itself is very difficult to work within and
has been discussed at several LPC over the years. I don't see a problem
with another algorithm which could be relied upon by other host bridge
controller drivers if they want it.

I also spent a good deal of time trying to get the minimizing algorithm
into pci_assign_unassigned_root_bus_resources, where the only instance
of pci=realloc detection takes place (who knew there were so many
originating different paths for resource assignment?). I couldn't make
headway there so started fresh. Maybe someone talented could refactor
mine into it and save a few instruction bytes.
Christoph Hellwig Sept. 29, 2020, 5:48 p.m. UTC | #3
On Mon, Sep 28, 2020 at 01:34:50PM +0000, Derrick, Jonathan wrote:
> Well this fix in particular may not be needed once the dynamic hotplug
> resource resizing set is in and build on that. But frankly the generic
> resource assignment code itself is very difficult to work within and
> has been discussed at several LPC over the years. I don't see a problem
> with another algorithm which could be relied upon by other host bridge
> controller drivers if they want it.
> 
> I also spent a good deal of time trying to get the minimizing algorithm
> into pci_assign_unassigned_root_bus_resources, where the only instance
> of pci=realloc detection takes place (who knew there were so many
> originating different paths for resource assignment?). I couldn't make
> headway there so started fresh. Maybe someone talented could refactor
> mine into it and save a few instruction bytes.

If the maintainers think there might be other use cases we could
also just make it conditional and let VMD select it.  I'm just a little
worried but all kinds of cruft slipping into core code to work around
the various problems vmd creates.
diff mbox series

Patch

diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index f22502e8e6e6..7beb4f37660b 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -1200,6 +1200,35 @@  static void pci_bus_size_cardbus(struct pci_bus *bus,
 	;
 }
 
+enum {
+	PCI_SIZING_VARIANT_DEFAULT,
+	PCI_SIZING_VARIANT_NOHOTPLUG,
+	PCI_SIZING_VARIANT_MINIMUM,
+	PCI_NUM_SIZING_VARIANTS,
+};
+
+static void hotplug_sizes(int sizing_variant, resource_size_t *io,
+			  resource_size_t *mmio, resource_size_t *pref)
+{
+	switch (sizing_variant) {
+	case PCI_SIZING_VARIANT_MINIMUM:
+		*io = 0;
+		*mmio = 0;
+		*pref = 0;
+		break;
+	case PCI_SIZING_VARIANT_NOHOTPLUG:
+		*io = 256;
+		*mmio = 1 << 20;
+		*pref = 1 << 20;
+		break;
+	case PCI_SIZING_VARIANT_DEFAULT:
+	default:
+		*io = pci_hotplug_io_size;
+		*mmio = pci_hotplug_mmio_size;
+		*pref = pci_hotplug_mmio_pref_size;
+	}
+}
+
 void __pci_bus_size_bridges(struct pci_bus *bus, struct list_head *realloc_head)
 {
 	struct pci_dev *dev;
@@ -1248,11 +1277,11 @@  void __pci_bus_size_bridges(struct pci_bus *bus, struct list_head *realloc_head)
 
 	case PCI_HEADER_TYPE_BRIDGE:
 		pci_bridge_check_ranges(bus);
-		if (bus->self->is_hotplug_bridge) {
-			additional_io_size  = pci_hotplug_io_size;
-			additional_mmio_size = pci_hotplug_mmio_size;
-			additional_mmio_pref_size = pci_hotplug_mmio_pref_size;
-		}
+		if (bus->self->is_hotplug_bridge)
+			hotplug_sizes(bus->self->sizing_variant,
+				      &additional_io_size,
+				      &additional_mmio_size,
+				      &additional_mmio_pref_size);
 		/* Fall through */
 	default:
 		pbus_size_io(bus, realloc_head ? 0 : additional_io_size,
@@ -2247,3 +2276,62 @@  void pci_assign_unassigned_bus_resources(struct pci_bus *bus)
 	BUG_ON(!list_empty(&add_list));
 }
 EXPORT_SYMBOL_GPL(pci_assign_unassigned_bus_resources);
+
+static int __set_sizing_variant(struct pci_dev *dev, void *data)
+{
+	if (dev->is_hotplug_bridge)
+		dev->sizing_variant = *((int *) data);
+
+	return 0;
+}
+
+static void release_bridge_resources(struct pci_bus *bus)
+{
+	struct resource *res;
+	struct pci_dev *dev;
+	int i;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		if (dev->subordinate) {
+			for (i = PCI_BRIDGE_RESOURCES; i < PCI_BRIDGE_RESOURCE_END; i++)
+				reset_resource(&dev->resource[i]);
+
+			release_bridge_resources(dev->subordinate);
+		}
+
+		if (pci_is_root_bus(bus))
+			continue;
+
+		pci_bus_for_each_resource(bus, res, i)
+			reset_resource(res);
+	}
+}
+
+void pci_bus_assign_resources_fallback_sizing(struct pci_bus *bus)
+{
+	LIST_HEAD(fail_head);
+	int i = 0;
+
+	pci_walk_bus(bus, __set_sizing_variant, &i);
+	__pci_bus_assign_resources(bus, NULL, &fail_head);
+
+	if (list_empty(&fail_head))
+		return;
+
+	for (i = 0; i < PCI_NUM_SIZING_VARIANTS; i++) {
+		pci_walk_bus(bus, __set_sizing_variant, &i);
+
+		down_read(&pci_bus_sem);
+		__pci_bus_size_bridges(bus, NULL);
+		up_read(&pci_bus_sem);
+
+		__pci_bus_assign_resources(bus, NULL, &fail_head);
+		if (list_empty(&fail_head))
+			return;
+
+		release_and_restore_resources(&fail_head);
+		release_bridge_resources(bus);
+		free_list(&fail_head);
+	}
+}
+EXPORT_SYMBOL(pci_bus_assign_resources_fallback_sizing);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 801e9ad0d57e..72ae11d3b5ea 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -424,6 +424,7 @@  struct pci_dev {
 	unsigned int	is_hotplug_bridge:1;
 	unsigned int	shpc_managed:1;		/* SHPC owned by shpchp */
 	unsigned int	is_thunderbolt:1;	/* Thunderbolt controller */
+	unsigned int	sizing_variant:2;	/* normal, minimum, no hotplug */
 	/*
 	 * Devices marked being untrusted are the ones that can potentially
 	 * execute DMA attacks and similar. They are typically connected
@@ -1299,6 +1300,7 @@  void pci_assign_unassigned_resources(void);
 void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge);
 void pci_assign_unassigned_bus_resources(struct pci_bus *bus);
 void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus);
+void pci_bus_assign_resources_fallback_sizing(struct pci_bus *bus);
 int pci_reassign_bridge_resources(struct pci_dev *bridge, unsigned long type);
 void pdev_enable_device(struct pci_dev *);
 int pci_enable_resources(struct pci_dev *, int mask);