diff mbox

[V9,11/11] powerpc/powernv: compound PE for VFs

Message ID 1437112961-17275-12-git-send-email-weiyang@linux.vnet.ibm.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Wei Yang July 17, 2015, 6:02 a.m. UTC
When VF BAR size is larger than 64MB, we group VFs in terms of M64 BAR,
which means those VFs in a group should form a compound PE.

This patch links those VF PEs into compound PE in this case.

[gwshan: code refactoring for a bit]
Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   46 +++++++++++++++++++++++++----
 arch/powerpc/platforms/powernv/pci.c      |   17 +++++++++--
 2 files changed, 56 insertions(+), 7 deletions(-)

Comments

Wei Yang July 29, 2015, 3:17 a.m. UTC | #1
Hi, Michael

Hope you didn't take this yet. We may change this patch a little.

On Fri, Jul 17, 2015 at 02:02:41PM +0800, Wei Yang wrote:
>When VF BAR size is larger than 64MB, we group VFs in terms of M64 BAR,
>which means those VFs in a group should form a compound PE.
>
>This patch links those VF PEs into compound PE in this case.
>
>[gwshan: code refactoring for a bit]
>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>---
> arch/powerpc/platforms/powernv/pci-ioda.c |   46 +++++++++++++++++++++++++----
> arch/powerpc/platforms/powernv/pci.c      |   17 +++++++++--
> 2 files changed, 56 insertions(+), 7 deletions(-)
>
>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>index 5738d31..d1530cb 100644
>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>@@ -1359,9 +1359,20 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
> 	}
>
> 	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
>+		struct pnv_ioda_pe *s, *sn;
> 		if (pe->parent_dev != pdev)
> 			continue;
>
>+		if ((pe->flags & PNV_IODA_PE_MASTER) &&
>+		    (pe->flags & PNV_IODA_PE_VF)) {
>+			list_for_each_entry_safe(s, sn, &pe->slaves, list) {
>+				pnv_pci_ioda2_release_dma_pe(pdev, s);
>+				list_del(&s->list);
>+				pnv_ioda_deconfigure_pe(phb, s);
>+				pnv_ioda_free_pe(phb, s->pe_number);
>+			}
>+		}
>+
> 		pnv_pci_ioda2_release_dma_pe(pdev, pe);
>
> 		/* Remove from list */
>@@ -1414,7 +1425,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
> 	struct pci_bus        *bus;
> 	struct pci_controller *hose;
> 	struct pnv_phb        *phb;
>-	struct pnv_ioda_pe    *pe;
>+	struct pnv_ioda_pe    *pe, *master_pe;
> 	int                    pe_num;
> 	u16                    vf_index;
> 	struct pci_dn         *pdn;
>@@ -1456,10 +1467,13 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
> 			continue;
> 		}
>
>-		/* Put PE to the list */
>-		mutex_lock(&phb->ioda.pe_list_mutex);
>-		list_add_tail(&pe->list, &phb->ioda.pe_list);
>-		mutex_unlock(&phb->ioda.pe_list_mutex);
>+		/* Put PE to the list, or postpone it for compound PEs */
>+		if ((pdn->m64_per_iov != M64_PER_IOV) ||
>+		    (num_vfs <= M64_PER_IOV)) {
>+			mutex_lock(&phb->ioda.pe_list_mutex);
>+			list_add_tail(&pe->list, &phb->ioda.pe_list);
>+			mutex_unlock(&phb->ioda.pe_list_mutex);
>+		}
>
> 		pnv_pci_ioda2_setup_dma_pe(phb, pe);
> 	}
>@@ -1472,10 +1486,32 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
> 		vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
>
> 		for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) {
>+			master_pe = NULL;
>+
> 			for (vf_index = vf_group * vf_per_group;
> 			     vf_index < (vf_group + 1) * vf_per_group &&
> 			     vf_index < num_vfs;
> 			     vf_index++) {
>+
>+				/*
>+				 * Figure out the master PE and put all slave
>+				 * PEs to master PE's list.
>+				 */
>+				pe = &phb->ioda.pe_array[pdn->offset + vf_index];
>+				if (!master_pe) {
>+					pe->flags |= PNV_IODA_PE_MASTER;
>+					INIT_LIST_HEAD(&pe->slaves);
>+					master_pe = pe;
>+					mutex_lock(&phb->ioda.pe_list_mutex);
>+					list_add_tail(&pe->list, &phb->ioda.pe_list);
>+					mutex_unlock(&phb->ioda.pe_list_mutex);
>+				} else {
>+					pe->flags |= PNV_IODA_PE_SLAVE;
>+					pe->master = master_pe;
>+					list_add_tail(&pe->list,
>+						&master_pe->slaves);
>+				}
>+
> 				for (vf_index1 = vf_group * vf_per_group;
> 				     vf_index1 < (vf_group + 1) * vf_per_group &&
> 				     vf_index1 < num_vfs;
>diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
>index 0e4f42e..f3aead0 100644
>--- a/arch/powerpc/platforms/powernv/pci.c
>+++ b/arch/powerpc/platforms/powernv/pci.c
>@@ -739,7 +739,7 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
> 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
> 	struct pnv_phb *phb = hose->private_data;
> #ifdef CONFIG_PCI_IOV
>-	struct pnv_ioda_pe *pe;
>+	struct pnv_ioda_pe *pe, *slave;
> 	struct pci_dn *pdn;
>
> 	/* Fix the VF pdn PE number */
>@@ -751,10 +751,23 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
> 			    (pdev->devfn & 0xff))) {
> 				pdn->pe_number = pe->pe_number;
> 				pe->pdev = pdev;
>-				break;
>+				goto found;
>+			}
>+
>+			if ((pe->flags & PNV_IODA_PE_MASTER) &&
>+			    (pe->flags & PNV_IODA_PE_VF)) {
>+				list_for_each_entry(slave, &pe->slaves, list) {
>+					if (slave->rid == ((pdev->bus->number << 8)
>+					   | (pdev->devfn & 0xff))) {
>+						pdn->pe_number = slave->pe_number;
>+						slave->pdev = pdev;
>+						goto found;
>+					}
>+				}
> 			}
> 		}
> 	}
>+found:
> #endif /* CONFIG_PCI_IOV */
>
> 	if (phb && phb->dma_dev_setup)
>-- 
>1.7.9.5
Gavin Shan Sept. 9, 2015, 2:48 a.m. UTC | #2
On Wed, Jul 29, 2015 at 11:17:18AM +0800, Wei Yang wrote:
>Hi, Michael
>
>Hope you didn't take this yet. We may change this patch a little.
>

[Cc Alexey who might concern the SRIOV status]

Richard, do you have plan to get it upstream? It seems it's hanged
over here for long time.

>On Fri, Jul 17, 2015 at 02:02:41PM +0800, Wei Yang wrote:
>>When VF BAR size is larger than 64MB, we group VFs in terms of M64 BAR,
>>which means those VFs in a group should form a compound PE.
>>
>>This patch links those VF PEs into compound PE in this case.
>>
>>[gwshan: code refactoring for a bit]
>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>---
>> arch/powerpc/platforms/powernv/pci-ioda.c |   46 +++++++++++++++++++++++++----
>> arch/powerpc/platforms/powernv/pci.c      |   17 +++++++++--
>> 2 files changed, 56 insertions(+), 7 deletions(-)
>>
>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>index 5738d31..d1530cb 100644
>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>@@ -1359,9 +1359,20 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>> 	}
>>
>> 	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
>>+		struct pnv_ioda_pe *s, *sn;
>> 		if (pe->parent_dev != pdev)
>> 			continue;
>>
>>+		if ((pe->flags & PNV_IODA_PE_MASTER) &&
>>+		    (pe->flags & PNV_IODA_PE_VF)) {
>>+			list_for_each_entry_safe(s, sn, &pe->slaves, list) {
>>+				pnv_pci_ioda2_release_dma_pe(pdev, s);
>>+				list_del(&s->list);
>>+				pnv_ioda_deconfigure_pe(phb, s);
>>+				pnv_ioda_free_pe(phb, s->pe_number);
>>+			}
>>+		}
>>+
>> 		pnv_pci_ioda2_release_dma_pe(pdev, pe);
>>
>> 		/* Remove from list */
>>@@ -1414,7 +1425,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>> 	struct pci_bus        *bus;
>> 	struct pci_controller *hose;
>> 	struct pnv_phb        *phb;
>>-	struct pnv_ioda_pe    *pe;
>>+	struct pnv_ioda_pe    *pe, *master_pe;
>> 	int                    pe_num;
>> 	u16                    vf_index;
>> 	struct pci_dn         *pdn;
>>@@ -1456,10 +1467,13 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>> 			continue;
>> 		}
>>
>>-		/* Put PE to the list */
>>-		mutex_lock(&phb->ioda.pe_list_mutex);
>>-		list_add_tail(&pe->list, &phb->ioda.pe_list);
>>-		mutex_unlock(&phb->ioda.pe_list_mutex);
>>+		/* Put PE to the list, or postpone it for compound PEs */
>>+		if ((pdn->m64_per_iov != M64_PER_IOV) ||
>>+		    (num_vfs <= M64_PER_IOV)) {
>>+			mutex_lock(&phb->ioda.pe_list_mutex);
>>+			list_add_tail(&pe->list, &phb->ioda.pe_list);
>>+			mutex_unlock(&phb->ioda.pe_list_mutex);
>>+		}
>>
>> 		pnv_pci_ioda2_setup_dma_pe(phb, pe);
>> 	}
>>@@ -1472,10 +1486,32 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>> 		vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
>>
>> 		for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) {
>>+			master_pe = NULL;
>>+
>> 			for (vf_index = vf_group * vf_per_group;
>> 			     vf_index < (vf_group + 1) * vf_per_group &&
>> 			     vf_index < num_vfs;
>> 			     vf_index++) {
>>+
>>+				/*
>>+				 * Figure out the master PE and put all slave
>>+				 * PEs to master PE's list.
>>+				 */
>>+				pe = &phb->ioda.pe_array[pdn->offset + vf_index];
>>+				if (!master_pe) {
>>+					pe->flags |= PNV_IODA_PE_MASTER;
>>+					INIT_LIST_HEAD(&pe->slaves);
>>+					master_pe = pe;
>>+					mutex_lock(&phb->ioda.pe_list_mutex);
>>+					list_add_tail(&pe->list, &phb->ioda.pe_list);
>>+					mutex_unlock(&phb->ioda.pe_list_mutex);
>>+				} else {
>>+					pe->flags |= PNV_IODA_PE_SLAVE;
>>+					pe->master = master_pe;
>>+					list_add_tail(&pe->list,
>>+						&master_pe->slaves);
>>+				}
>>+
>> 				for (vf_index1 = vf_group * vf_per_group;
>> 				     vf_index1 < (vf_group + 1) * vf_per_group &&
>> 				     vf_index1 < num_vfs;
>>diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
>>index 0e4f42e..f3aead0 100644
>>--- a/arch/powerpc/platforms/powernv/pci.c
>>+++ b/arch/powerpc/platforms/powernv/pci.c
>>@@ -739,7 +739,7 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
>> 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
>> 	struct pnv_phb *phb = hose->private_data;
>> #ifdef CONFIG_PCI_IOV
>>-	struct pnv_ioda_pe *pe;
>>+	struct pnv_ioda_pe *pe, *slave;
>> 	struct pci_dn *pdn;
>>
>> 	/* Fix the VF pdn PE number */
>>@@ -751,10 +751,23 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
>> 			    (pdev->devfn & 0xff))) {
>> 				pdn->pe_number = pe->pe_number;
>> 				pe->pdev = pdev;
>>-				break;
>>+				goto found;
>>+			}
>>+
>>+			if ((pe->flags & PNV_IODA_PE_MASTER) &&
>>+			    (pe->flags & PNV_IODA_PE_VF)) {
>>+				list_for_each_entry(slave, &pe->slaves, list) {
>>+					if (slave->rid == ((pdev->bus->number << 8)
>>+					   | (pdev->devfn & 0xff))) {
>>+						pdn->pe_number = slave->pe_number;
>>+						slave->pdev = pdev;
>>+						goto found;
>>+					}
>>+				}
>> 			}
>> 		}
>> 	}
>>+found:
>> #endif /* CONFIG_PCI_IOV */
>>
>> 	if (phb && phb->dma_dev_setup)
>>-- 
>>1.7.9.5
>
>-- 
>Richard Yang
>Help you, Help me

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wei Yang Sept. 9, 2015, 3:36 a.m. UTC | #3
On Wed, Sep 09, 2015 at 12:48:21PM +1000, Gavin Shan wrote:
>On Wed, Jul 29, 2015 at 11:17:18AM +0800, Wei Yang wrote:
>>Hi, Michael
>>
>>Hope you didn't take this yet. We may change this patch a little.
>>
>
>[Cc Alexey who might concern the SRIOV status]
>
>Richard, do you have plan to get it upstream? It seems it's hanged
>over here for long time.
>

The VF EEH is hung since we re-designed the SRIOV. After the re-design, we
don't have VF groups.

My plan is to push the VF EEH patch set after the SRIOV Redesign is accepted.

>>On Fri, Jul 17, 2015 at 02:02:41PM +0800, Wei Yang wrote:
>>>When VF BAR size is larger than 64MB, we group VFs in terms of M64 BAR,
>>>which means those VFs in a group should form a compound PE.
>>>
>>>This patch links those VF PEs into compound PE in this case.
>>>
>>>[gwshan: code refactoring for a bit]
>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>>---
>>> arch/powerpc/platforms/powernv/pci-ioda.c |   46 +++++++++++++++++++++++++----
>>> arch/powerpc/platforms/powernv/pci.c      |   17 +++++++++--
>>> 2 files changed, 56 insertions(+), 7 deletions(-)
>>>
>>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>index 5738d31..d1530cb 100644
>>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>@@ -1359,9 +1359,20 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>>> 	}
>>>
>>> 	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
>>>+		struct pnv_ioda_pe *s, *sn;
>>> 		if (pe->parent_dev != pdev)
>>> 			continue;
>>>
>>>+		if ((pe->flags & PNV_IODA_PE_MASTER) &&
>>>+		    (pe->flags & PNV_IODA_PE_VF)) {
>>>+			list_for_each_entry_safe(s, sn, &pe->slaves, list) {
>>>+				pnv_pci_ioda2_release_dma_pe(pdev, s);
>>>+				list_del(&s->list);
>>>+				pnv_ioda_deconfigure_pe(phb, s);
>>>+				pnv_ioda_free_pe(phb, s->pe_number);
>>>+			}
>>>+		}
>>>+
>>> 		pnv_pci_ioda2_release_dma_pe(pdev, pe);
>>>
>>> 		/* Remove from list */
>>>@@ -1414,7 +1425,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>>> 	struct pci_bus        *bus;
>>> 	struct pci_controller *hose;
>>> 	struct pnv_phb        *phb;
>>>-	struct pnv_ioda_pe    *pe;
>>>+	struct pnv_ioda_pe    *pe, *master_pe;
>>> 	int                    pe_num;
>>> 	u16                    vf_index;
>>> 	struct pci_dn         *pdn;
>>>@@ -1456,10 +1467,13 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>>> 			continue;
>>> 		}
>>>
>>>-		/* Put PE to the list */
>>>-		mutex_lock(&phb->ioda.pe_list_mutex);
>>>-		list_add_tail(&pe->list, &phb->ioda.pe_list);
>>>-		mutex_unlock(&phb->ioda.pe_list_mutex);
>>>+		/* Put PE to the list, or postpone it for compound PEs */
>>>+		if ((pdn->m64_per_iov != M64_PER_IOV) ||
>>>+		    (num_vfs <= M64_PER_IOV)) {
>>>+			mutex_lock(&phb->ioda.pe_list_mutex);
>>>+			list_add_tail(&pe->list, &phb->ioda.pe_list);
>>>+			mutex_unlock(&phb->ioda.pe_list_mutex);
>>>+		}
>>>
>>> 		pnv_pci_ioda2_setup_dma_pe(phb, pe);
>>> 	}
>>>@@ -1472,10 +1486,32 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>>> 		vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
>>>
>>> 		for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) {
>>>+			master_pe = NULL;
>>>+
>>> 			for (vf_index = vf_group * vf_per_group;
>>> 			     vf_index < (vf_group + 1) * vf_per_group &&
>>> 			     vf_index < num_vfs;
>>> 			     vf_index++) {
>>>+
>>>+				/*
>>>+				 * Figure out the master PE and put all slave
>>>+				 * PEs to master PE's list.
>>>+				 */
>>>+				pe = &phb->ioda.pe_array[pdn->offset + vf_index];
>>>+				if (!master_pe) {
>>>+					pe->flags |= PNV_IODA_PE_MASTER;
>>>+					INIT_LIST_HEAD(&pe->slaves);
>>>+					master_pe = pe;
>>>+					mutex_lock(&phb->ioda.pe_list_mutex);
>>>+					list_add_tail(&pe->list, &phb->ioda.pe_list);
>>>+					mutex_unlock(&phb->ioda.pe_list_mutex);
>>>+				} else {
>>>+					pe->flags |= PNV_IODA_PE_SLAVE;
>>>+					pe->master = master_pe;
>>>+					list_add_tail(&pe->list,
>>>+						&master_pe->slaves);
>>>+				}
>>>+
>>> 				for (vf_index1 = vf_group * vf_per_group;
>>> 				     vf_index1 < (vf_group + 1) * vf_per_group &&
>>> 				     vf_index1 < num_vfs;
>>>diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
>>>index 0e4f42e..f3aead0 100644
>>>--- a/arch/powerpc/platforms/powernv/pci.c
>>>+++ b/arch/powerpc/platforms/powernv/pci.c
>>>@@ -739,7 +739,7 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
>>> 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
>>> 	struct pnv_phb *phb = hose->private_data;
>>> #ifdef CONFIG_PCI_IOV
>>>-	struct pnv_ioda_pe *pe;
>>>+	struct pnv_ioda_pe *pe, *slave;
>>> 	struct pci_dn *pdn;
>>>
>>> 	/* Fix the VF pdn PE number */
>>>@@ -751,10 +751,23 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
>>> 			    (pdev->devfn & 0xff))) {
>>> 				pdn->pe_number = pe->pe_number;
>>> 				pe->pdev = pdev;
>>>-				break;
>>>+				goto found;
>>>+			}
>>>+
>>>+			if ((pe->flags & PNV_IODA_PE_MASTER) &&
>>>+			    (pe->flags & PNV_IODA_PE_VF)) {
>>>+				list_for_each_entry(slave, &pe->slaves, list) {
>>>+					if (slave->rid == ((pdev->bus->number << 8)
>>>+					   | (pdev->devfn & 0xff))) {
>>>+						pdn->pe_number = slave->pe_number;
>>>+						slave->pdev = pdev;
>>>+						goto found;
>>>+					}
>>>+				}
>>> 			}
>>> 		}
>>> 	}
>>>+found:
>>> #endif /* CONFIG_PCI_IOV */
>>>
>>> 	if (phb && phb->dma_dev_setup)
>>>-- 
>>>1.7.9.5
>>
>>-- 
>>Richard Yang
>>Help you, Help me
Gavin Shan Sept. 9, 2015, 3:52 a.m. UTC | #4
On Wed, Sep 09, 2015 at 11:36:16AM +0800, Richard Yang wrote:
>On Wed, Sep 09, 2015 at 12:48:21PM +1000, Gavin Shan wrote:
>>On Wed, Jul 29, 2015 at 11:17:18AM +0800, Wei Yang wrote:
>>>Hi, Michael
>>>
>>>Hope you didn't take this yet. We may change this patch a little.
>>>
>>
>>[Cc Alexey who might concern the SRIOV status]
>>
>>Richard, do you have plan to get it upstream? It seems it's hanged
>>over here for long time.
>>
>
>The VF EEH is hung since we re-designed the SRIOV. After the re-design, we
>don't have VF groups.
>

How can this SRIOV redesign patchset affect EEH part greatly? The EEH
VF patchset already support VF PE which contains only one VF.

>My plan is to push the VF EEH patch set after the SRIOV Redesign is accepted.
>

That SRIOV redesign patchset missed 4.3 merge window obviously. I think the
code has been reviewed by Alexey and me. If Alexey isn't going to have more
comments about it, you can refresh the series (EEH support for VF) based on
it and send the updated series. I don't think there is any dependencies.

>>>On Fri, Jul 17, 2015 at 02:02:41PM +0800, Wei Yang wrote:
>>>>When VF BAR size is larger than 64MB, we group VFs in terms of M64 BAR,
>>>>which means those VFs in a group should form a compound PE.
>>>>
>>>>This patch links those VF PEs into compound PE in this case.
>>>>
>>>>[gwshan: code refactoring for a bit]
>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>>>---
>>>> arch/powerpc/platforms/powernv/pci-ioda.c |   46 +++++++++++++++++++++++++----
>>>> arch/powerpc/platforms/powernv/pci.c      |   17 +++++++++--
>>>> 2 files changed, 56 insertions(+), 7 deletions(-)
>>>>
>>>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>index 5738d31..d1530cb 100644
>>>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>@@ -1359,9 +1359,20 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>>>> 	}
>>>>
>>>> 	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
>>>>+		struct pnv_ioda_pe *s, *sn;
>>>> 		if (pe->parent_dev != pdev)
>>>> 			continue;
>>>>
>>>>+		if ((pe->flags & PNV_IODA_PE_MASTER) &&
>>>>+		    (pe->flags & PNV_IODA_PE_VF)) {
>>>>+			list_for_each_entry_safe(s, sn, &pe->slaves, list) {
>>>>+				pnv_pci_ioda2_release_dma_pe(pdev, s);
>>>>+				list_del(&s->list);
>>>>+				pnv_ioda_deconfigure_pe(phb, s);
>>>>+				pnv_ioda_free_pe(phb, s->pe_number);
>>>>+			}
>>>>+		}
>>>>+
>>>> 		pnv_pci_ioda2_release_dma_pe(pdev, pe);
>>>>
>>>> 		/* Remove from list */
>>>>@@ -1414,7 +1425,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>>>> 	struct pci_bus        *bus;
>>>> 	struct pci_controller *hose;
>>>> 	struct pnv_phb        *phb;
>>>>-	struct pnv_ioda_pe    *pe;
>>>>+	struct pnv_ioda_pe    *pe, *master_pe;
>>>> 	int                    pe_num;
>>>> 	u16                    vf_index;
>>>> 	struct pci_dn         *pdn;
>>>>@@ -1456,10 +1467,13 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>>>> 			continue;
>>>> 		}
>>>>
>>>>-		/* Put PE to the list */
>>>>-		mutex_lock(&phb->ioda.pe_list_mutex);
>>>>-		list_add_tail(&pe->list, &phb->ioda.pe_list);
>>>>-		mutex_unlock(&phb->ioda.pe_list_mutex);
>>>>+		/* Put PE to the list, or postpone it for compound PEs */
>>>>+		if ((pdn->m64_per_iov != M64_PER_IOV) ||
>>>>+		    (num_vfs <= M64_PER_IOV)) {
>>>>+			mutex_lock(&phb->ioda.pe_list_mutex);
>>>>+			list_add_tail(&pe->list, &phb->ioda.pe_list);
>>>>+			mutex_unlock(&phb->ioda.pe_list_mutex);
>>>>+		}
>>>>
>>>> 		pnv_pci_ioda2_setup_dma_pe(phb, pe);
>>>> 	}
>>>>@@ -1472,10 +1486,32 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>>>> 		vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
>>>>
>>>> 		for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) {
>>>>+			master_pe = NULL;
>>>>+
>>>> 			for (vf_index = vf_group * vf_per_group;
>>>> 			     vf_index < (vf_group + 1) * vf_per_group &&
>>>> 			     vf_index < num_vfs;
>>>> 			     vf_index++) {
>>>>+
>>>>+				/*
>>>>+				 * Figure out the master PE and put all slave
>>>>+				 * PEs to master PE's list.
>>>>+				 */
>>>>+				pe = &phb->ioda.pe_array[pdn->offset + vf_index];
>>>>+				if (!master_pe) {
>>>>+					pe->flags |= PNV_IODA_PE_MASTER;
>>>>+					INIT_LIST_HEAD(&pe->slaves);
>>>>+					master_pe = pe;
>>>>+					mutex_lock(&phb->ioda.pe_list_mutex);
>>>>+					list_add_tail(&pe->list, &phb->ioda.pe_list);
>>>>+					mutex_unlock(&phb->ioda.pe_list_mutex);
>>>>+				} else {
>>>>+					pe->flags |= PNV_IODA_PE_SLAVE;
>>>>+					pe->master = master_pe;
>>>>+					list_add_tail(&pe->list,
>>>>+						&master_pe->slaves);
>>>>+				}
>>>>+
>>>> 				for (vf_index1 = vf_group * vf_per_group;
>>>> 				     vf_index1 < (vf_group + 1) * vf_per_group &&
>>>> 				     vf_index1 < num_vfs;
>>>>diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
>>>>index 0e4f42e..f3aead0 100644
>>>>--- a/arch/powerpc/platforms/powernv/pci.c
>>>>+++ b/arch/powerpc/platforms/powernv/pci.c
>>>>@@ -739,7 +739,7 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
>>>> 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
>>>> 	struct pnv_phb *phb = hose->private_data;
>>>> #ifdef CONFIG_PCI_IOV
>>>>-	struct pnv_ioda_pe *pe;
>>>>+	struct pnv_ioda_pe *pe, *slave;
>>>> 	struct pci_dn *pdn;
>>>>
>>>> 	/* Fix the VF pdn PE number */
>>>>@@ -751,10 +751,23 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
>>>> 			    (pdev->devfn & 0xff))) {
>>>> 				pdn->pe_number = pe->pe_number;
>>>> 				pe->pdev = pdev;
>>>>-				break;
>>>>+				goto found;
>>>>+			}
>>>>+
>>>>+			if ((pe->flags & PNV_IODA_PE_MASTER) &&
>>>>+			    (pe->flags & PNV_IODA_PE_VF)) {
>>>>+				list_for_each_entry(slave, &pe->slaves, list) {
>>>>+					if (slave->rid == ((pdev->bus->number << 8)
>>>>+					   | (pdev->devfn & 0xff))) {
>>>>+						pdn->pe_number = slave->pe_number;
>>>>+						slave->pdev = pdev;
>>>>+						goto found;
>>>>+					}
>>>>+				}
>>>> 			}
>>>> 		}
>>>> 	}
>>>>+found:
>>>> #endif /* CONFIG_PCI_IOV */
>>>>
>>>> 	if (phb && phb->dma_dev_setup)
>>>>-- 
>>>>1.7.9.5
>>>
>>>-- 
>>>Richard Yang
>>>Help you, Help me
>
>-- 
>Richard Yang
>Help you, Help me

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt Sept. 9, 2015, 4:59 a.m. UTC | #5
On Wed, 2015-09-09 at 11:36 +0800, Richard Yang wrote:
> The VF EEH is hung since we re-designed the SRIOV. After the re
> -design, we
> don't have VF groups.
> 
> My plan is to push the VF EEH patch set after the SRIOV Redesign is
> accepted.

What do you mean taht we don't have VF groups ?

If we don't have IOMMU groups per VF that means we can't assign them to
KVM partitions -> they are completely useless.

Ben.

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexey Kardashevskiy Sept. 9, 2015, 5:22 a.m. UTC | #6
On 09/09/2015 01:36 PM, Richard Yang wrote:
> On Wed, Sep 09, 2015 at 12:48:21PM +1000, Gavin Shan wrote:
>> On Wed, Jul 29, 2015 at 11:17:18AM +0800, Wei Yang wrote:
>>> Hi, Michael
>>>
>>> Hope you didn't take this yet. We may change this patch a little.
>>>
>>
>> [Cc Alexey who might concern the SRIOV status]
>>
>> Richard, do you have plan to get it upstream? It seems it's hanged
>> over here for long time.
>>
>
> The VF EEH is hung since we re-designed the SRIOV. After the re-design, we
> don't have VF groups.
>
> My plan is to push the VF EEH patch set after the SRIOV Redesign is accepted.


Can you please rebase on v4.2 (or v4.2 + sriov rework) and repost VF EEH 
patchset just to me? Or share the tree somewhere where I can pull it from? 
Thanks.

As for now, I cannot tell what difference your SRIOV patchset actually makes.
Wei Yang Sept. 9, 2015, 5:53 a.m. UTC | #7
On Wed, Sep 09, 2015 at 02:59:11PM +1000, Benjamin Herrenschmidt wrote:
>On Wed, 2015-09-09 at 11:36 +0800, Richard Yang wrote:
>> The VF EEH is hung since we re-designed the SRIOV. After the re
>> -design, we
>> don't have VF groups.
>> 
>> My plan is to push the VF EEH patch set after the SRIOV Redesign is
>> accepted.
>
>What do you mean taht we don't have VF groups ?
>

Before we redesign the SRIOV, several VFs may share one M64 segment. This
introduced the compound PE for VFs. The VF group in previous mail means the
compound PE composed of a master VF PE will have several slave VF PEs.

>If we don't have IOMMU groups per VF that means we can't assign them to
>KVM partitions -> they are completely useless.

Yes, I don't mean for the IOMMU group.

>
>Ben.
Wei Yang Sept. 9, 2015, 6 a.m. UTC | #8
On Wed, Sep 09, 2015 at 01:52:32PM +1000, Gavin Shan wrote:
>tatus: O
>Content-Length: 6303
>Lines: 176
>
>On Wed, Sep 09, 2015 at 11:36:16AM +0800, Richard Yang wrote:
>>On Wed, Sep 09, 2015 at 12:48:21PM +1000, Gavin Shan wrote:
>>>On Wed, Jul 29, 2015 at 11:17:18AM +0800, Wei Yang wrote:
>>>>Hi, Michael
>>>>
>>>>Hope you didn't take this yet. We may change this patch a little.
>>>>
>>>
>>>[Cc Alexey who might concern the SRIOV status]
>>>
>>>Richard, do you have plan to get it upstream? It seems it's hanged
>>>over here for long time.
>>>
>>
>>The VF EEH is hung since we re-designed the SRIOV. After the re-design, we
>>don't have VF groups.
>>
>
>How can this SRIOV redesign patchset affect EEH part greatly? The EEH
>VF patchset already support VF PE which contains only one VF.
>

Yes, that's not greatly. Mostly the difference after SRIOV redesign is the
last patch "powerpc/powernv: compound PE for VFs" will be removed.

>>My plan is to push the VF EEH patch set after the SRIOV Redesign is accepted.
>>
>
>That SRIOV redesign patchset missed 4.3 merge window obviously. I think the
>code has been reviewed by Alexey and me. If Alexey isn't going to have more
>comments about it, you can refresh the series (EEH support for VF) based on
>it and send the updated series. I don't think there is any dependencies.
>

The difference is simple, while we can't apply a patch series without the last
patch in this thread to the upstream. The upstream version will have the
compound VF PE, while after SRIOV Redesign, we don't.

>>>>On Fri, Jul 17, 2015 at 02:02:41PM +0800, Wei Yang wrote:
>>>>>When VF BAR size is larger than 64MB, we group VFs in terms of M64 BAR,
>>>>>which means those VFs in a group should form a compound PE.
>>>>>
>>>>>This patch links those VF PEs into compound PE in this case.
>>>>>
>>>>>[gwshan: code refactoring for a bit]
>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>>>>Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>>>>---
>>>>> arch/powerpc/platforms/powernv/pci-ioda.c |   46 +++++++++++++++++++++++++----
>>>>> arch/powerpc/platforms/powernv/pci.c      |   17 +++++++++--
>>>>> 2 files changed, 56 insertions(+), 7 deletions(-)
>>>>>
>>>>>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>index 5738d31..d1530cb 100644
>>>>>--- a/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>>>>>@@ -1359,9 +1359,20 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>>>>> 	}
>>>>>
>>>>> 	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
>>>>>+		struct pnv_ioda_pe *s, *sn;
>>>>> 		if (pe->parent_dev != pdev)
>>>>> 			continue;
>>>>>
>>>>>+		if ((pe->flags & PNV_IODA_PE_MASTER) &&
>>>>>+		    (pe->flags & PNV_IODA_PE_VF)) {
>>>>>+			list_for_each_entry_safe(s, sn, &pe->slaves, list) {
>>>>>+				pnv_pci_ioda2_release_dma_pe(pdev, s);
>>>>>+				list_del(&s->list);
>>>>>+				pnv_ioda_deconfigure_pe(phb, s);
>>>>>+				pnv_ioda_free_pe(phb, s->pe_number);
>>>>>+			}
>>>>>+		}
>>>>>+
>>>>> 		pnv_pci_ioda2_release_dma_pe(pdev, pe);
>>>>>
>>>>> 		/* Remove from list */
>>>>>@@ -1414,7 +1425,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>>>>> 	struct pci_bus        *bus;
>>>>> 	struct pci_controller *hose;
>>>>> 	struct pnv_phb        *phb;
>>>>>-	struct pnv_ioda_pe    *pe;
>>>>>+	struct pnv_ioda_pe    *pe, *master_pe;
>>>>> 	int                    pe_num;
>>>>> 	u16                    vf_index;
>>>>> 	struct pci_dn         *pdn;
>>>>>@@ -1456,10 +1467,13 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>>>>> 			continue;
>>>>> 		}
>>>>>
>>>>>-		/* Put PE to the list */
>>>>>-		mutex_lock(&phb->ioda.pe_list_mutex);
>>>>>-		list_add_tail(&pe->list, &phb->ioda.pe_list);
>>>>>-		mutex_unlock(&phb->ioda.pe_list_mutex);
>>>>>+		/* Put PE to the list, or postpone it for compound PEs */
>>>>>+		if ((pdn->m64_per_iov != M64_PER_IOV) ||
>>>>>+		    (num_vfs <= M64_PER_IOV)) {
>>>>>+			mutex_lock(&phb->ioda.pe_list_mutex);
>>>>>+			list_add_tail(&pe->list, &phb->ioda.pe_list);
>>>>>+			mutex_unlock(&phb->ioda.pe_list_mutex);
>>>>>+		}
>>>>>
>>>>> 		pnv_pci_ioda2_setup_dma_pe(phb, pe);
>>>>> 	}
>>>>>@@ -1472,10 +1486,32 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>>>>> 		vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
>>>>>
>>>>> 		for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) {
>>>>>+			master_pe = NULL;
>>>>>+
>>>>> 			for (vf_index = vf_group * vf_per_group;
>>>>> 			     vf_index < (vf_group + 1) * vf_per_group &&
>>>>> 			     vf_index < num_vfs;
>>>>> 			     vf_index++) {
>>>>>+
>>>>>+				/*
>>>>>+				 * Figure out the master PE and put all slave
>>>>>+				 * PEs to master PE's list.
>>>>>+				 */
>>>>>+				pe = &phb->ioda.pe_array[pdn->offset + vf_index];
>>>>>+				if (!master_pe) {
>>>>>+					pe->flags |= PNV_IODA_PE_MASTER;
>>>>>+					INIT_LIST_HEAD(&pe->slaves);
>>>>>+					master_pe = pe;
>>>>>+					mutex_lock(&phb->ioda.pe_list_mutex);
>>>>>+					list_add_tail(&pe->list, &phb->ioda.pe_list);
>>>>>+					mutex_unlock(&phb->ioda.pe_list_mutex);
>>>>>+				} else {
>>>>>+					pe->flags |= PNV_IODA_PE_SLAVE;
>>>>>+					pe->master = master_pe;
>>>>>+					list_add_tail(&pe->list,
>>>>>+						&master_pe->slaves);
>>>>>+				}
>>>>>+
>>>>> 				for (vf_index1 = vf_group * vf_per_group;
>>>>> 				     vf_index1 < (vf_group + 1) * vf_per_group &&
>>>>> 				     vf_index1 < num_vfs;
>>>>>diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
>>>>>index 0e4f42e..f3aead0 100644
>>>>>--- a/arch/powerpc/platforms/powernv/pci.c
>>>>>+++ b/arch/powerpc/platforms/powernv/pci.c
>>>>>@@ -739,7 +739,7 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
>>>>> 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
>>>>> 	struct pnv_phb *phb = hose->private_data;
>>>>> #ifdef CONFIG_PCI_IOV
>>>>>-	struct pnv_ioda_pe *pe;
>>>>>+	struct pnv_ioda_pe *pe, *slave;
>>>>> 	struct pci_dn *pdn;
>>>>>
>>>>> 	/* Fix the VF pdn PE number */
>>>>>@@ -751,10 +751,23 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
>>>>> 			    (pdev->devfn & 0xff))) {
>>>>> 				pdn->pe_number = pe->pe_number;
>>>>> 				pe->pdev = pdev;
>>>>>-				break;
>>>>>+				goto found;
>>>>>+			}
>>>>>+
>>>>>+			if ((pe->flags & PNV_IODA_PE_MASTER) &&
>>>>>+			    (pe->flags & PNV_IODA_PE_VF)) {
>>>>>+				list_for_each_entry(slave, &pe->slaves, list) {
>>>>>+					if (slave->rid == ((pdev->bus->number << 8)
>>>>>+					   | (pdev->devfn & 0xff))) {
>>>>>+						pdn->pe_number = slave->pe_number;
>>>>>+						slave->pdev = pdev;
>>>>>+						goto found;
>>>>>+					}
>>>>>+				}
>>>>> 			}
>>>>> 		}
>>>>> 	}
>>>>>+found:
>>>>> #endif /* CONFIG_PCI_IOV */
>>>>>
>>>>> 	if (phb && phb->dma_dev_setup)
>>>>>-- 
>>>>>1.7.9.5
>>>>
>>>>-- 
>>>>Richard Yang
>>>>Help you, Help m
Wei Yang Sept. 9, 2015, 6:01 a.m. UTC | #9
On Wed, Sep 09, 2015 at 03:22:08PM +1000, Alexey Kardashevskiy wrote:
>On 09/09/2015 01:36 PM, Richard Yang wrote:
>>On Wed, Sep 09, 2015 at 12:48:21PM +1000, Gavin Shan wrote:
>>>On Wed, Jul 29, 2015 at 11:17:18AM +0800, Wei Yang wrote:
>>>>Hi, Michael
>>>>
>>>>Hope you didn't take this yet. We may change this patch a little.
>>>>
>>>
>>>[Cc Alexey who might concern the SRIOV status]
>>>
>>>Richard, do you have plan to get it upstream? It seems it's hanged
>>>over here for long time.
>>>
>>
>>The VF EEH is hung since we re-designed the SRIOV. After the re-design, we
>>don't have VF groups.
>>
>>My plan is to push the VF EEH patch set after the SRIOV Redesign is accepted.
>
>
>Can you please rebase on v4.2 (or v4.2 + sriov rework) and repost VF EEH
>patchset just to me? Or share the tree somewhere where I can pull it from?
>Thanks.
>

Yep, this is what I am planning to do.

>As for now, I cannot tell what difference your SRIOV patchset actually makes.
>
>-- 
>Alexey
Gavin Shan Sept. 17, 2015, 12:28 a.m. UTC | #10
On Wed, Sep 09, 2015 at 02:01:29PM +0800, Richard Yang wrote:
>On Wed, Sep 09, 2015 at 03:22:08PM +1000, Alexey Kardashevskiy wrote:
>>On 09/09/2015 01:36 PM, Richard Yang wrote:
>>>On Wed, Sep 09, 2015 at 12:48:21PM +1000, Gavin Shan wrote:
>>>>On Wed, Jul 29, 2015 at 11:17:18AM +0800, Wei Yang wrote:
>>>>>Hi, Michael
>>>>>
>>>>>Hope you didn't take this yet. We may change this patch a little.
>>>>>
>>>>
>>>>[Cc Alexey who might concern the SRIOV status]
>>>>
>>>>Richard, do you have plan to get it upstream? It seems it's hanged
>>>>over here for long time.
>>>>
>>>
>>>The VF EEH is hung since we re-designed the SRIOV. After the re-design, we
>>>don't have VF groups.
>>>
>>>My plan is to push the VF EEH patch set after the SRIOV Redesign is accepted.
>>
>>
>>Can you please rebase on v4.2 (or v4.2 + sriov rework) and repost VF EEH
>>patchset just to me? Or share the tree somewhere where I can pull it from?
>>Thanks.
>>
>
>Yep, this is what I am planning to do.
>

Can you rebase your patchset on v4.3.rc1+sriov rework and then repost to
linux-ppc-dev maillist?


>>As for now, I cannot tell what difference your SRIOV patchset actually makes.
>>
>>-- 
>>Alexey
>
>-- 
>Richard Yang
>Help you, Help me

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 5738d31..d1530cb 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1359,9 +1359,20 @@  static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
 	}
 
 	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
+		struct pnv_ioda_pe *s, *sn;
 		if (pe->parent_dev != pdev)
 			continue;
 
+		if ((pe->flags & PNV_IODA_PE_MASTER) &&
+		    (pe->flags & PNV_IODA_PE_VF)) {
+			list_for_each_entry_safe(s, sn, &pe->slaves, list) {
+				pnv_pci_ioda2_release_dma_pe(pdev, s);
+				list_del(&s->list);
+				pnv_ioda_deconfigure_pe(phb, s);
+				pnv_ioda_free_pe(phb, s->pe_number);
+			}
+		}
+
 		pnv_pci_ioda2_release_dma_pe(pdev, pe);
 
 		/* Remove from list */
@@ -1414,7 +1425,7 @@  static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
 	struct pci_bus        *bus;
 	struct pci_controller *hose;
 	struct pnv_phb        *phb;
-	struct pnv_ioda_pe    *pe;
+	struct pnv_ioda_pe    *pe, *master_pe;
 	int                    pe_num;
 	u16                    vf_index;
 	struct pci_dn         *pdn;
@@ -1456,10 +1467,13 @@  static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
 			continue;
 		}
 
-		/* Put PE to the list */
-		mutex_lock(&phb->ioda.pe_list_mutex);
-		list_add_tail(&pe->list, &phb->ioda.pe_list);
-		mutex_unlock(&phb->ioda.pe_list_mutex);
+		/* Put PE to the list, or postpone it for compound PEs */
+		if ((pdn->m64_per_iov != M64_PER_IOV) ||
+		    (num_vfs <= M64_PER_IOV)) {
+			mutex_lock(&phb->ioda.pe_list_mutex);
+			list_add_tail(&pe->list, &phb->ioda.pe_list);
+			mutex_unlock(&phb->ioda.pe_list_mutex);
+		}
 
 		pnv_pci_ioda2_setup_dma_pe(phb, pe);
 	}
@@ -1472,10 +1486,32 @@  static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
 		vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov;
 
 		for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) {
+			master_pe = NULL;
+
 			for (vf_index = vf_group * vf_per_group;
 			     vf_index < (vf_group + 1) * vf_per_group &&
 			     vf_index < num_vfs;
 			     vf_index++) {
+
+				/*
+				 * Figure out the master PE and put all slave
+				 * PEs to master PE's list.
+				 */
+				pe = &phb->ioda.pe_array[pdn->offset + vf_index];
+				if (!master_pe) {
+					pe->flags |= PNV_IODA_PE_MASTER;
+					INIT_LIST_HEAD(&pe->slaves);
+					master_pe = pe;
+					mutex_lock(&phb->ioda.pe_list_mutex);
+					list_add_tail(&pe->list, &phb->ioda.pe_list);
+					mutex_unlock(&phb->ioda.pe_list_mutex);
+				} else {
+					pe->flags |= PNV_IODA_PE_SLAVE;
+					pe->master = master_pe;
+					list_add_tail(&pe->list,
+						&master_pe->slaves);
+				}
+
 				for (vf_index1 = vf_group * vf_per_group;
 				     vf_index1 < (vf_group + 1) * vf_per_group &&
 				     vf_index1 < num_vfs;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 0e4f42e..f3aead0 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -739,7 +739,7 @@  void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
 	struct pnv_phb *phb = hose->private_data;
 #ifdef CONFIG_PCI_IOV
-	struct pnv_ioda_pe *pe;
+	struct pnv_ioda_pe *pe, *slave;
 	struct pci_dn *pdn;
 
 	/* Fix the VF pdn PE number */
@@ -751,10 +751,23 @@  void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
 			    (pdev->devfn & 0xff))) {
 				pdn->pe_number = pe->pe_number;
 				pe->pdev = pdev;
-				break;
+				goto found;
+			}
+
+			if ((pe->flags & PNV_IODA_PE_MASTER) &&
+			    (pe->flags & PNV_IODA_PE_VF)) {
+				list_for_each_entry(slave, &pe->slaves, list) {
+					if (slave->rid == ((pdev->bus->number << 8)
+					   | (pdev->devfn & 0xff))) {
+						pdn->pe_number = slave->pe_number;
+						slave->pdev = pdev;
+						goto found;
+					}
+				}
 			}
 		}
 	}
+found:
 #endif /* CONFIG_PCI_IOV */
 
 	if (phb && phb->dma_dev_setup)