diff mbox

powerpc/pci: Reuse PHB number on pci_controller add if available

Message ID 1457633505-19857-2-git-send-email-gpiccoli@linux.vnet.ibm.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Guilherme G. Piccoli March 10, 2016, 6:11 p.m. UTC
The domain/PHB field of PCI addresses has its value obtained from a
global variable, incremented each time a new domain (represented by
struct pci_controller) is added on the system. The domain addition
process happens during boot or due to PCI device hotplug.

As recent kernels are using predictable naming for network interfaces,
the network stack is more tied to PCI naming. This can be a problem in
hotplug scenarios, because PCI addresses will change if a device is
removed and then re-added. This situation seems unusual, but it can
happen if a user wants to replace a NIC without rebooting the machine,
for example.

This patch changes the way PCI domain values are generated: the global
variable is no longer used; instead, a list is introduced, containing
a flag that indicates whenever a domain is in use or was released, for
example by a PCI hotplug remove. If the new PHB being added finds a
free domain, it will use its number; otherwise a new number is
generated incrementing the higher domain value present on the system.
No functional changes were introduced.

Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/pci-common.c | 47 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 44 insertions(+), 3 deletions(-)

Comments

Benjamin Herrenschmidt March 10, 2016, 9:34 p.m. UTC | #1
On Thu, 2016-03-10 at 15:11 -0300, Guilherme G. Piccoli wrote:
> The domain/PHB field of PCI addresses has its value obtained from a
> global variable, incremented each time a new domain (represented by
> struct pci_controller) is added on the system. The domain addition
> process happens during boot or due to PCI device hotplug.

I'd rather we use a fixed numbering for domains, for example, under
OPAL, use the opal-id, under phyp, there is also a number in the DT we
can use (forgot what it's called) etc...

> As recent kernels are using predictable naming for network
> interfaces,
> the network stack is more tied to PCI naming. This can be a problem
> in
> hotplug scenarios, because PCI addresses will change if a device is
> removed and then re-added. This situation seems unusual, but it can
> happen if a user wants to replace a NIC without rebooting the
> machine,
> for example.
> 
> This patch changes the way PCI domain values are generated: the
> global
> variable is no longer used; instead, a list is introduced, containing
> a flag that indicates whenever a domain is in use or was released,
> for
> example by a PCI hotplug remove. If the new PHB being added finds a
> free domain, it will use its number; otherwise a new number is
> generated incrementing the higher domain value present on the system.
> No functional changes were introduced.
> 
> Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
> ---
>  arch/powerpc/kernel/pci-common.c | 47
> +++++++++++++++++++++++++++++++++++++---
>  1 file changed, 44 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/pci-common.c
> b/arch/powerpc/kernel/pci-common.c
> index 0f7a60f..6eac0dd 100644
> --- a/arch/powerpc/kernel/pci-common.c
> +++ b/arch/powerpc/kernel/pci-common.c
> @@ -44,8 +44,12 @@
>  static DEFINE_SPINLOCK(hose_spinlock);
>  LIST_HEAD(hose_list);
>  
> -/* XXX kill that some day ... */
> -static int global_phb_number;		/* Global phb counter
> */
> +/* Global PHB counter list */
> +struct global_phb_number {
> +	bool used;
> +	struct list_head node;
> +};
> +LIST_HEAD(global_phb_number_list);
>  
>  /* ISA Memory physical address */
>  resource_size_t isa_mem_base;
> @@ -64,6 +68,42 @@ struct dma_map_ops *get_pci_dma_ops(void)
>  }
>  EXPORT_SYMBOL(get_pci_dma_ops);
>  
> +static int get_phb_number(void)
> +{
> +	struct global_phb_number *ptr;
> +	int latest_number = 0;
> +
> +	list_for_each_entry(ptr, &global_phb_number_list, node) {
> +		if (!ptr->used) {
> +			ptr->used = true;
> +			return latest_number;
> +		}
> +		latest_number++;
> +	}
> +
> +	ptr = zalloc_maybe_bootmem(sizeof(struct global_phb_number),
> GFP_KERNEL);
> +	BUG_ON(ptr == NULL);
> +
> +	ptr->used = true;
> +	list_add_tail(&ptr->node, &global_phb_number_list);
> +
> +	return latest_number;
> +}
> +
> +static void release_phb_number(int phb_number)
> +{
> +	struct global_phb_number *ptr;
> +	int current_number = 0;
> +
> +	list_for_each_entry(ptr, &global_phb_number_list, node) {
> +		if (current_number == phb_number) {
> +			ptr->used = false;
> +			return;
> +		}
> +		current_number++;
> +	}
> +}
> +
>  struct pci_controller *pcibios_alloc_controller(struct device_node
> *dev)
>  {
>  	struct pci_controller *phb;
> @@ -72,7 +112,7 @@ struct pci_controller
> *pcibios_alloc_controller(struct device_node *dev)
>  	if (phb == NULL)
>  		return NULL;
>  	spin_lock(&hose_spinlock);
> -	phb->global_number = global_phb_number++;
> +	phb->global_number = get_phb_number();
>  	list_add_tail(&phb->list_node, &hose_list);
>  	spin_unlock(&hose_spinlock);
>  	phb->dn = dev;
> @@ -94,6 +134,7 @@ EXPORT_SYMBOL_GPL(pcibios_alloc_controller);
>  void pcibios_free_controller(struct pci_controller *phb)
>  {
>  	spin_lock(&hose_spinlock);
> +	release_phb_number(phb->global_number);
>  	list_del(&phb->list_node);
>  	spin_unlock(&hose_spinlock);
>  
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gavin Shan March 10, 2016, 11:18 p.m. UTC | #2
On Fri, Mar 11, 2016 at 08:34:58AM +1100, Benjamin Herrenschmidt wrote:
>On Thu, 2016-03-10 at 15:11 -0300, Guilherme G. Piccoli wrote:
>> The domain/PHB field of PCI addresses has its value obtained from a
>> global variable, incremented each time a new domain (represented by
>> struct pci_controller) is added on the system. The domain addition
>> process happens during boot or due to PCI device hotplug.
>
>I'd rather we use a fixed numbering for domains, for example, under
>OPAL, use the opal-id, under phyp, there is also a number in the DT we
>can use (forgot what it's called) etc...
>


guest on phyp has something like below:

ibm,fw-phb-id = <0x8000000 0x20000200>;

However, PowerKVM guest doesn't have it. I guess it's a problem, right?

Thanks,
Gavin

>> As recent kernels are using predictable naming for network
>> interfaces,
>> the network stack is more tied to PCI naming. This can be a problem
>> in
>> hotplug scenarios, because PCI addresses will change if a device is
>> removed and then re-added. This situation seems unusual, but it can
>> happen if a user wants to replace a NIC without rebooting the
>> machine,
>> for example.
>> 
>> This patch changes the way PCI domain values are generated: the
>> global
>> variable is no longer used; instead, a list is introduced, containing
>> a flag that indicates whenever a domain is in use or was released,
>> for
>> example by a PCI hotplug remove. If the new PHB being added finds a
>> free domain, it will use its number; otherwise a new number is
>> generated incrementing the higher domain value present on the system.
>> No functional changes were introduced.
>> 
>> Signed-off-by: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
>> ---
>>  arch/powerpc/kernel/pci-common.c | 47
>> +++++++++++++++++++++++++++++++++++++---
>>  1 file changed, 44 insertions(+), 3 deletions(-)
>> 
>> diff --git a/arch/powerpc/kernel/pci-common.c
>> b/arch/powerpc/kernel/pci-common.c
>> index 0f7a60f..6eac0dd 100644
>> --- a/arch/powerpc/kernel/pci-common.c
>> +++ b/arch/powerpc/kernel/pci-common.c
>> @@ -44,8 +44,12 @@
>>  static DEFINE_SPINLOCK(hose_spinlock);
>>  LIST_HEAD(hose_list);
>>  
>> -/* XXX kill that some day ... */
>> -static int global_phb_number;		/* Global phb counter
>> */
>> +/* Global PHB counter list */
>> +struct global_phb_number {
>> +	bool used;
>> +	struct list_head node;
>> +};
>> +LIST_HEAD(global_phb_number_list);
>>  
>>  /* ISA Memory physical address */
>>  resource_size_t isa_mem_base;
>> @@ -64,6 +68,42 @@ struct dma_map_ops *get_pci_dma_ops(void)
>>  }
>>  EXPORT_SYMBOL(get_pci_dma_ops);
>>  
>> +static int get_phb_number(void)
>> +{
>> +	struct global_phb_number *ptr;
>> +	int latest_number = 0;
>> +
>> +	list_for_each_entry(ptr, &global_phb_number_list, node) {
>> +		if (!ptr->used) {
>> +			ptr->used = true;
>> +			return latest_number;
>> +		}
>> +		latest_number++;
>> +	}
>> +
>> +	ptr = zalloc_maybe_bootmem(sizeof(struct global_phb_number),
>> GFP_KERNEL);
>> +	BUG_ON(ptr == NULL);
>> +
>> +	ptr->used = true;
>> +	list_add_tail(&ptr->node, &global_phb_number_list);
>> +
>> +	return latest_number;
>> +}
>> +
>> +static void release_phb_number(int phb_number)
>> +{
>> +	struct global_phb_number *ptr;
>> +	int current_number = 0;
>> +
>> +	list_for_each_entry(ptr, &global_phb_number_list, node) {
>> +		if (current_number == phb_number) {
>> +			ptr->used = false;
>> +			return;
>> +		}
>> +		current_number++;
>> +	}
>> +}
>> +
>>  struct pci_controller *pcibios_alloc_controller(struct device_node
>> *dev)
>>  {
>>  	struct pci_controller *phb;
>> @@ -72,7 +112,7 @@ struct pci_controller
>> *pcibios_alloc_controller(struct device_node *dev)
>>  	if (phb == NULL)
>>  		return NULL;
>>  	spin_lock(&hose_spinlock);
>> -	phb->global_number = global_phb_number++;
>> +	phb->global_number = get_phb_number();
>>  	list_add_tail(&phb->list_node, &hose_list);
>>  	spin_unlock(&hose_spinlock);
>>  	phb->dn = dev;
>> @@ -94,6 +134,7 @@ EXPORT_SYMBOL_GPL(pcibios_alloc_controller);
>>  void pcibios_free_controller(struct pci_controller *phb)
>>  {
>>  	spin_lock(&hose_spinlock);
>> +	release_phb_number(phb->global_number);
>>  	list_del(&phb->list_node);
>>  	spin_unlock(&hose_spinlock);
>>  
>

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt March 11, 2016, 10:52 a.m. UTC | #3
On Fri, 2016-03-11 at 10:18 +1100, Gavin Shan wrote:
> However, PowerKVM guest doesn't have it. I guess it's a problem,
> right?

What is the number we use for the TCE h-calls ? I think it's from the
first part of the dma window property no ?

Cheers,
Ben.
 
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gavin Shan March 11, 2016, 1:14 p.m. UTC | #4
On Fri, Mar 11, 2016 at 09:52:13PM +1100, Benjamin Herrenschmidt wrote:
>On Fri, 2016-03-11 at 10:18 +1100, Gavin Shan wrote:
>> However, PowerKVM guest doesn't have it. I guess it's a problem,
>> right?
>
>What is the number we use for the TCE h-calls ? I think it's from the
>first part of the dma window property no ?
>

Yeah, the BUID is PHB's identifier, but we only need the low 16-bits
as its base is 0x800000020000000ULL in QEMU or pHyp. I think other platforms
(like freescale's) still need dynamically allocated domain number, which
could be managed by a bitmap.

Thanks,
Gavin

>Cheers,
>Ben.


--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Guilherme G. Piccoli March 11, 2016, 1:29 p.m. UTC | #5
On 03/11/2016 10:14 AM, Gavin Shan wrote:
> Yeah, the BUID is PHB's identifier, but we only need the low 16-bits
> as its base is 0x800000020000000ULL in QEMU or pHyp. I think other platforms
> (like freescale's) still need dynamically allocated domain number, which
> could be managed by a bitmap.

Thanks very much for the suggestions Benjamin and Gavin.
Will improve the patch and send a v2.

Cheers,


Guilherme

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 0f7a60f..6eac0dd 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -44,8 +44,12 @@ 
 static DEFINE_SPINLOCK(hose_spinlock);
 LIST_HEAD(hose_list);
 
-/* XXX kill that some day ... */
-static int global_phb_number;		/* Global phb counter */
+/* Global PHB counter list */
+struct global_phb_number {
+	bool used;
+	struct list_head node;
+};
+LIST_HEAD(global_phb_number_list);
 
 /* ISA Memory physical address */
 resource_size_t isa_mem_base;
@@ -64,6 +68,42 @@  struct dma_map_ops *get_pci_dma_ops(void)
 }
 EXPORT_SYMBOL(get_pci_dma_ops);
 
+static int get_phb_number(void)
+{
+	struct global_phb_number *ptr;
+	int latest_number = 0;
+
+	list_for_each_entry(ptr, &global_phb_number_list, node) {
+		if (!ptr->used) {
+			ptr->used = true;
+			return latest_number;
+		}
+		latest_number++;
+	}
+
+	ptr = zalloc_maybe_bootmem(sizeof(struct global_phb_number), GFP_KERNEL);
+	BUG_ON(ptr == NULL);
+
+	ptr->used = true;
+	list_add_tail(&ptr->node, &global_phb_number_list);
+
+	return latest_number;
+}
+
+static void release_phb_number(int phb_number)
+{
+	struct global_phb_number *ptr;
+	int current_number = 0;
+
+	list_for_each_entry(ptr, &global_phb_number_list, node) {
+		if (current_number == phb_number) {
+			ptr->used = false;
+			return;
+		}
+		current_number++;
+	}
+}
+
 struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
 {
 	struct pci_controller *phb;
@@ -72,7 +112,7 @@  struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
 	if (phb == NULL)
 		return NULL;
 	spin_lock(&hose_spinlock);
-	phb->global_number = global_phb_number++;
+	phb->global_number = get_phb_number();
 	list_add_tail(&phb->list_node, &hose_list);
 	spin_unlock(&hose_spinlock);
 	phb->dn = dev;
@@ -94,6 +134,7 @@  EXPORT_SYMBOL_GPL(pcibios_alloc_controller);
 void pcibios_free_controller(struct pci_controller *phb)
 {
 	spin_lock(&hose_spinlock);
+	release_phb_number(phb->global_number);
 	list_del(&phb->list_node);
 	spin_unlock(&hose_spinlock);