diff mbox

[v3] powerpc/pci: Assign fixed PHB number based on device-tree properties

Message ID 1458245026-27635-1-git-send-email-gpiccoli@linux.vnet.ibm.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Guilherme G. Piccoli March 17, 2016, 8:03 p.m. UTC
The domain/PHB field of PCI addresses has its value obtained from a
global variable, incremented each time a new domain (represented by
struct pci_controller) is added on the system. The domain addition
process happens during boot or due to PCI device hotplug.

As recent kernels are using predictable naming for network interfaces,
the network stack is more tied to PCI naming. This can be a problem in
hotplug scenarios, because PCI addresses will change if devices are
removed and then re-added. This situation seems unusual, but it can
happen if a user wants to replace a NIC without rebooting the machine,
for example.

This patch changes the way PCI domain values are generated: now, we use
device-tree properties to assign fixed PHB numbers to PCI addresses
when available (meaning pSeries and PowerNV cases). We also use a bitmap
to allow dynamic PHB numbering when device-tree properties are not
used. This bitmap keeps track of used PHB numbers and if a PHB is
released (by hotplug operations for example), it allows the reuse of
this PHB number, avoiding PCI address to change in case of device remove
and re-add soon after. No functional changes were introduced.

Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/pci-common.c | 41 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 3 deletions(-)

Comments

Gavin Shan March 17, 2016, 10:37 p.m. UTC | #1
On Thu, Mar 17, 2016 at 05:03:46PM -0300, Guilherme G. Piccoli wrote:
>The domain/PHB field of PCI addresses has its value obtained from a
>global variable, incremented each time a new domain (represented by
>struct pci_controller) is added on the system. The domain addition
>process happens during boot or due to PCI device hotplug.
>
>As recent kernels are using predictable naming for network interfaces,
>the network stack is more tied to PCI naming. This can be a problem in
>hotplug scenarios, because PCI addresses will change if devices are
>removed and then re-added. This situation seems unusual, but it can
>happen if a user wants to replace a NIC without rebooting the machine,
>for example.
>
>This patch changes the way PCI domain values are generated: now, we use
>device-tree properties to assign fixed PHB numbers to PCI addresses
>when available (meaning pSeries and PowerNV cases). We also use a bitmap
>to allow dynamic PHB numbering when device-tree properties are not
>used. This bitmap keeps track of used PHB numbers and if a PHB is
>released (by hotplug operations for example), it allows the reuse of
>this PHB number, avoiding PCI address to change in case of device remove
>and re-add soon after. No functional changes were introduced.
>
>Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>

Apart from below minor issues to be fixed, it looks good to me.

Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>

>---
> arch/powerpc/kernel/pci-common.c | 41 +++++++++++++++++++++++++++++++++++++---
> 1 file changed, 38 insertions(+), 3 deletions(-)
>
>diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
>index 0f7a60f..8777614 100644
>--- a/arch/powerpc/kernel/pci-common.c
>+++ b/arch/powerpc/kernel/pci-common.c
>@@ -44,8 +44,11 @@
> static DEFINE_SPINLOCK(hose_spinlock);
> LIST_HEAD(hose_list);
>
>-/* XXX kill that some day ... */
>-static int global_phb_number;		/* Global phb counter */
>+/* For dynamic PHB numbering on get_phb_number(): max number of PHBs. */
>+#define	MAX_PHBS	8192
>+
>+/* For dynamic PHB numbering: used/free PHBs tracking bitmap. */
>+static DECLARE_BITMAP(phb_bitmap, MAX_PHBS);
>
> /* ISA Memory physical address */
> resource_size_t isa_mem_base;
>@@ -64,6 +67,33 @@ struct dma_map_ops *get_pci_dma_ops(void)
> }
> EXPORT_SYMBOL(get_pci_dma_ops);
>
>+static int get_phb_number(struct device_node *dn)
>+{
>+	const __be64 *prop64;
>+	const __be32 *regs;
>+	int phb_id = 0;
>+
>+	/* try fixed PHB numbering first, by checking archs and reading
>+	 * the respective device-tree property. */
>+	if (machine_is(pseries)) {
>+		regs = of_get_property(dn, "reg", NULL);
>+		if (regs)
>+			return (int)(be32_to_cpu(regs[1]) & 0xFFFF);
>+	} else {
>+		if (machine_is(powernv)) {
>+			prop64 = of_get_property(dn, "ibm,opal-phbid", NULL);
>+			if (prop64)
>+				return (int)(be64_to_cpup(prop64) & 0xFFFF);
>+		}
>+	}
>+

The nested statements can be merged to one with "else if (machine_is(powernv))".

>+	/* if not pSeries nor PowerNV, fallback to dynamic PHB numbering */
>+	phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS);
>+	BUG_ON(phb_id >= MAX_PHBS); /* reached maximum number of PHBs */
>+	set_bit(phb_id, phb_bitmap);
>+	return phb_id;

It's possible another CPU sets this bit before current CPU updates it, which
will cause same domain number used by two PHBs though it's very rare. I guess
it might be worthwhile to check if the bit is reserved by somebody else to
avoid the issue.

>+}
>+
> struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
> {
> 	struct pci_controller *phb;
>@@ -72,7 +102,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
> 	if (phb == NULL)
> 		return NULL;
> 	spin_lock(&hose_spinlock);
>-	phb->global_number = global_phb_number++;
>+	phb->global_number = get_phb_number(dev);
> 	list_add_tail(&phb->list_node, &hose_list);
> 	spin_unlock(&hose_spinlock);
> 	phb->dn = dev;
>@@ -94,6 +124,11 @@ EXPORT_SYMBOL_GPL(pcibios_alloc_controller);
> void pcibios_free_controller(struct pci_controller *phb)
> {
> 	spin_lock(&hose_spinlock);
>+
>+	/* clear bit of phb_bitmap to allow reuse of this phb number */
>+	if (phb->global_number < MAX_PHBS)
>+		clear_bit(phb->global_number, phb_bitmap);
>+
> 	list_del(&phb->list_node);
> 	spin_unlock(&hose_spinlock);
>

Thanks,
Gavin

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gavin Shan March 18, 2016, 4 a.m. UTC | #2
On Fri, Mar 18, 2016 at 09:37:57AM +1100, Gavin Shan wrote:
>On Thu, Mar 17, 2016 at 05:03:46PM -0300, Guilherme G. Piccoli wrote:
>>The domain/PHB field of PCI addresses has its value obtained from a
>>global variable, incremented each time a new domain (represented by
>>struct pci_controller) is added on the system. The domain addition
>>process happens during boot or due to PCI device hotplug.
>>
>>As recent kernels are using predictable naming for network interfaces,
>>the network stack is more tied to PCI naming. This can be a problem in
>>hotplug scenarios, because PCI addresses will change if devices are
>>removed and then re-added. This situation seems unusual, but it can
>>happen if a user wants to replace a NIC without rebooting the machine,
>>for example.
>>
>>This patch changes the way PCI domain values are generated: now, we use
>>device-tree properties to assign fixed PHB numbers to PCI addresses
>>when available (meaning pSeries and PowerNV cases). We also use a bitmap
>>to allow dynamic PHB numbering when device-tree properties are not
>>used. This bitmap keeps track of used PHB numbers and if a PHB is
>>released (by hotplug operations for example), it allows the reuse of
>>this PHB number, avoiding PCI address to change in case of device remove
>>and re-add soon after. No functional changes were introduced.
>>
>>Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
>
>Apart from below minor issues to be fixed, it looks good to me.
>
>Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>
>>---
>> arch/powerpc/kernel/pci-common.c | 41 +++++++++++++++++++++++++++++++++++++---
>> 1 file changed, 38 insertions(+), 3 deletions(-)
>>
>>diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
>>index 0f7a60f..8777614 100644
>>--- a/arch/powerpc/kernel/pci-common.c
>>+++ b/arch/powerpc/kernel/pci-common.c
>>@@ -44,8 +44,11 @@
>> static DEFINE_SPINLOCK(hose_spinlock);
>> LIST_HEAD(hose_list);
>>
>>-/* XXX kill that some day ... */
>>-static int global_phb_number;		/* Global phb counter */
>>+/* For dynamic PHB numbering on get_phb_number(): max number of PHBs. */
>>+#define	MAX_PHBS	8192
>>+
>>+/* For dynamic PHB numbering: used/free PHBs tracking bitmap. */
>>+static DECLARE_BITMAP(phb_bitmap, MAX_PHBS);
>>
>> /* ISA Memory physical address */
>> resource_size_t isa_mem_base;
>>@@ -64,6 +67,33 @@ struct dma_map_ops *get_pci_dma_ops(void)
>> }
>> EXPORT_SYMBOL(get_pci_dma_ops);
>>
>>+static int get_phb_number(struct device_node *dn)
>>+{
>>+	const __be64 *prop64;
>>+	const __be32 *regs;
>>+	int phb_id = 0;
>>+
>>+	/* try fixed PHB numbering first, by checking archs and reading
>>+	 * the respective device-tree property. */
>>+	if (machine_is(pseries)) {
>>+		regs = of_get_property(dn, "reg", NULL);
>>+		if (regs)
>>+			return (int)(be32_to_cpu(regs[1]) & 0xFFFF);
>>+	} else {
>>+		if (machine_is(powernv)) {
>>+			prop64 = of_get_property(dn, "ibm,opal-phbid", NULL);
>>+			if (prop64)
>>+				return (int)(be64_to_cpup(prop64) & 0xFFFF);
>>+		}
>>+	}
>>+
>
>The nested statements can be merged to one with "else if (machine_is(powernv))".
>
>>+	/* if not pSeries nor PowerNV, fallback to dynamic PHB numbering */
>>+	phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS);
>>+	BUG_ON(phb_id >= MAX_PHBS); /* reached maximum number of PHBs */
>>+	set_bit(phb_id, phb_bitmap);
>>+	return phb_id;
>
>It's possible another CPU sets this bit before current CPU updates it, which
>will cause same domain number used by two PHBs though it's very rare. I guess
>it might be worthwhile to check if the bit is reserved by somebody else to
>avoid the issue.
>

Please ignore this comment: there is a lock (hose_spinlock) avoiding the issue.

>>+}
>>+
>> struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
>> {
>> 	struct pci_controller *phb;
>>@@ -72,7 +102,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
>> 	if (phb == NULL)
>> 		return NULL;
>> 	spin_lock(&hose_spinlock);
>>-	phb->global_number = global_phb_number++;
>>+	phb->global_number = get_phb_number(dev);
>> 	list_add_tail(&phb->list_node, &hose_list);
>> 	spin_unlock(&hose_spinlock);
>> 	phb->dn = dev;
>>@@ -94,6 +124,11 @@ EXPORT_SYMBOL_GPL(pcibios_alloc_controller);
>> void pcibios_free_controller(struct pci_controller *phb)
>> {
>> 	spin_lock(&hose_spinlock);
>>+
>>+	/* clear bit of phb_bitmap to allow reuse of this phb number */
>>+	if (phb->global_number < MAX_PHBS)
>>+		clear_bit(phb->global_number, phb_bitmap);
>>+
>> 	list_del(&phb->list_node);
>> 	spin_unlock(&hose_spinlock);
>>
>
>Thanks,
>Gavin

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Guilherme G. Piccoli March 18, 2016, 3:27 p.m. UTC | #3
On 03/18/2016 01:00 AM, Gavin Shan wrote:
>> Apart from below minor issues to be fixed, it looks good to me.
>>
>> Reviewed-by: Gavin Shan <gwshan@linux.vnet.ibm.com>

Thanks for the review Gavin, I'll fix the else/if detail and send a v4.


>>> +	} else {
>>> +		if (machine_is(powernv)) {
>>> +			prop64 = of_get_property(dn, "ibm,opal-phbid", NULL);
>>> +			if (prop64)
>>> +				return (int)(be64_to_cpup(prop64) & 0xFFFF);
>>> +		}
>>> +	}
>>> +
>>
>> The nested statements can be merged to one with "else if (machine_is(powernv))".
>>
>>> +	/* if not pSeries nor PowerNV, fallback to dynamic PHB numbering */
>>> +	phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS);
>>> +	BUG_ON(phb_id >= MAX_PHBS); /* reached maximum number of PHBs */
>>> +	set_bit(phb_id, phb_bitmap);
>>> +	return phb_id;
>>
>> It's possible another CPU sets this bit before current CPU updates it, which
>> will cause same domain number used by two PHBs though it's very rare. I guess
>> it might be worthwhile to check if the bit is reserved by somebody else to
>> avoid the issue.
>>
>
> Please ignore this comment: there is a lock (hose_spinlock) avoiding the issue.
>

Ok =)


>> Thanks,
>> Gavin

Cheers,


Guilherme

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 0f7a60f..8777614 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -44,8 +44,11 @@ 
 static DEFINE_SPINLOCK(hose_spinlock);
 LIST_HEAD(hose_list);
 
-/* XXX kill that some day ... */
-static int global_phb_number;		/* Global phb counter */
+/* For dynamic PHB numbering on get_phb_number(): max number of PHBs. */
+#define	MAX_PHBS	8192
+
+/* For dynamic PHB numbering: used/free PHBs tracking bitmap. */
+static DECLARE_BITMAP(phb_bitmap, MAX_PHBS);
 
 /* ISA Memory physical address */
 resource_size_t isa_mem_base;
@@ -64,6 +67,33 @@  struct dma_map_ops *get_pci_dma_ops(void)
 }
 EXPORT_SYMBOL(get_pci_dma_ops);
 
+static int get_phb_number(struct device_node *dn)
+{
+	const __be64 *prop64;
+	const __be32 *regs;
+	int phb_id = 0;
+
+	/* try fixed PHB numbering first, by checking archs and reading
+	 * the respective device-tree property. */
+	if (machine_is(pseries)) {
+		regs = of_get_property(dn, "reg", NULL);
+		if (regs)
+			return (int)(be32_to_cpu(regs[1]) & 0xFFFF);
+	} else {
+		if (machine_is(powernv)) {
+			prop64 = of_get_property(dn, "ibm,opal-phbid", NULL);
+			if (prop64)
+				return (int)(be64_to_cpup(prop64) & 0xFFFF);
+		}
+	}
+
+	/* if not pSeries nor PowerNV, fallback to dynamic PHB numbering */
+	phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS);
+	BUG_ON(phb_id >= MAX_PHBS); /* reached maximum number of PHBs */
+	set_bit(phb_id, phb_bitmap);
+	return phb_id;
+}
+
 struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
 {
 	struct pci_controller *phb;
@@ -72,7 +102,7 @@  struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
 	if (phb == NULL)
 		return NULL;
 	spin_lock(&hose_spinlock);
-	phb->global_number = global_phb_number++;
+	phb->global_number = get_phb_number(dev);
 	list_add_tail(&phb->list_node, &hose_list);
 	spin_unlock(&hose_spinlock);
 	phb->dn = dev;
@@ -94,6 +124,11 @@  EXPORT_SYMBOL_GPL(pcibios_alloc_controller);
 void pcibios_free_controller(struct pci_controller *phb)
 {
 	spin_lock(&hose_spinlock);
+
+	/* clear bit of phb_bitmap to allow reuse of this phb number */
+	if (phb->global_number < MAX_PHBS)
+		clear_bit(phb->global_number, phb_bitmap);
+
 	list_del(&phb->list_node);
 	spin_unlock(&hose_spinlock);