Message ID | 1438834307-26960-9-git-send-email-gwshan@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Delegated to: | Bjorn Helgaas |
Headers | show |
On 08/06/2015 02:11 PM, Gavin Shan wrote: > For P7IOC, the whole available DMA32 space, which is below the > MEM32 space, is divided evenly into 256MB segments. The number > of continuous segments assigned to one particular PE depends on > the PE's DMA weight that is calculated based on the type of each > PCI devices contained in the PE, and PHB's DMA weight which is > accumulative DMA weight of PEs contained in the PHB. It means > that the PHB's DMA weight calculation depends on existing PEs, > which works perfectly now, but not hotplug friendly. As the > whole available DMA32 space can be assigned to one PE on PHB3, > so we don't have the issue on PHB3. > > The patch calculates PHB's DMA weight based on the PCI devices > contained in the PHB dynamically so that it's hotplug friendly. It does not look like the patch changed anything about when to calculate weights, it was and is pnv_ioda_setup_dma(). What the patch seems to be doing is changing weights by multiplying them by phb->ioda.tce32_count but it is unclear why you do this. > > Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> > --- > arch/powerpc/platforms/powernv/pci-ioda.c | 88 +++++++++++++++---------------- > arch/powerpc/platforms/powernv/pci.h | 6 --- > 2 files changed, 43 insertions(+), 51 deletions(-) > > diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c > index 713f4b4..7342cfd 100644 > --- a/arch/powerpc/platforms/powernv/pci-ioda.c > +++ b/arch/powerpc/platforms/powernv/pci-ioda.c > @@ -927,6 +927,9 @@ static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, > > static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) > { > + struct pci_controller *hose = pci_bus_to_host(dev->bus); > + struct pnv_phb *phb = hose->private_data; > + > /* This is quite simplistic. The "base" weight of a device > * is 10. 0 means no DMA is to be accounted for it. > */ > @@ -939,14 +942,34 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) > if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || > dev->class == PCI_CLASS_SERIAL_USB_OHCI || > dev->class == PCI_CLASS_SERIAL_USB_EHCI) > - return 3; > + return 3 * phb->ioda.tce32_count; > > /* Increase the weight of RAID (includes Obsidian) */ > if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) > - return 15; > + return 15 * phb->ioda.tce32_count; > > /* Default */ > - return 10; > + return 10 * phb->ioda.tce32_count; > +} > + > +static int __pnv_ioda_phb_dma_weight(struct pci_dev *pdev, void *data) > +{ > + unsigned int *dma_weight = data; > + > + *dma_weight += pnv_ioda_dma_weight(pdev); > + return 0; > +} > + > +static unsigned int pnv_ioda_phb_dma_weight(struct pnv_phb *phb) > +{ > + unsigned int dma_weight = 0; > + > + if (!phb->hose->bus) > + return 0; > + > + pci_walk_bus(phb->hose->bus, > + __pnv_ioda_phb_dma_weight, &dma_weight); > + return dma_weight; > } > > #ifdef CONFIG_PCI_IOV > @@ -1097,14 +1120,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) > /* Put PE to the list */ > list_add_tail(&pe->list, &phb->ioda.pe_list); > > - /* Account for one DMA PE if at least one DMA capable device exist > - * below the bridge > - */ > - if (pe->dma_weight != 0) { > - phb->ioda.dma_weight += pe->dma_weight; > - phb->ioda.dma_pe_count++; > - } > - > /* Link the PE */ > pnv_ioda_link_pe_by_weight(phb, pe); > } > @@ -2431,24 +2446,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, > static void pnv_ioda_setup_dma(struct pnv_phb *phb) > { > struct pci_controller *hose = phb->hose; > - unsigned int residual, remaining, segs, tw, base; > struct pnv_ioda_pe *pe; > + unsigned int dma_weight; > > - /* If we have more PE# than segments available, hand out one > - * per PE until we run out and let the rest fail. If not, > - * then we assign at least one segment per PE, plus more based > - * on the amount of devices under that PE > - */ > - if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) > - residual = 0; > - else > - residual = phb->ioda.tce32_count - > - phb->ioda.dma_pe_count; > - > - pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", > - hose->global_number, phb->ioda.tce32_count); > - pr_info("PCI: %d PE# for a total weight of %d\n", > - phb->ioda.dma_pe_count, phb->ioda.dma_weight); > + /* Calculate the PHB's DMA weight */ > + dma_weight = pnv_ioda_phb_dma_weight(phb); > + pr_info("PCI%04x has %ld DMA32 segments, total weight %d\n", > + hose->global_number, phb->ioda.tce32_count, dma_weight); > > pnv_pci_ioda_setup_opal_tce_kill(phb); > > @@ -2456,22 +2460,9 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) > * out one base segment plus any residual segments based on > * weight > */ > - remaining = phb->ioda.tce32_count; > - tw = phb->ioda.dma_weight; > - base = 0; > list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { > if (!pe->dma_weight) > continue; > - if (!remaining) { > - pe_warn(pe, "No DMA32 resources available\n"); > - continue; > - } > - segs = 1; > - if (residual) { > - segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; > - if (segs > remaining) > - segs = remaining; > - } > > /* > * For IODA2 compliant PHB3, we needn't care about the weight. > @@ -2479,17 +2470,24 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) > * the specific PE. > */ > if (phb->type == PNV_PHB_IODA1) { > - pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", > + unsigned int segs, base = 0; > + > + if (pe->dma_weight < > + dma_weight / phb->ioda.tce32_count) > + segs = 1; > + else > + segs = (pe->dma_weight * > + phb->ioda.tce32_count) / dma_weight; > + > + pe_info(pe, "DMA32 weight %d, assigned %d segments\n", > pe->dma_weight, segs); > pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); > + > + base += segs; > } else { > pe_info(pe, "Assign DMA32 space\n"); > - segs = 0; > pnv_pci_ioda2_setup_dma_pe(phb, pe); > } > - > - remaining -= segs; > - base += segs; > } > } > > diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h > index 08a4e57..addd3f7 100644 > --- a/arch/powerpc/platforms/powernv/pci.h > +++ b/arch/powerpc/platforms/powernv/pci.h > @@ -183,12 +183,6 @@ struct pnv_phb { > /* 32-bit TCE tables allocation */ > unsigned long tce32_count; > > - /* Total "weight" for the sake of DMA resources > - * allocation > - */ > - unsigned int dma_weight; > - unsigned int dma_pe_count; > - > /* Sorted list of used PE's, sorted at > * boot for resource allocation purposes > */ >
On 08/06/2015 02:11 PM, Gavin Shan wrote: > For P7IOC, the whole available DMA32 space, which is below the > MEM32 space, is divided evenly into 256MB segments. The number > of continuous segments assigned to one particular PE depends on > the PE's DMA weight that is calculated based on the type of each > PCI devices contained in the PE, and PHB's DMA weight which is > accumulative DMA weight of PEs contained in the PHB. It means > that the PHB's DMA weight calculation depends on existing PEs, > which works perfectly now, but not hotplug friendly. As the > whole available DMA32 space can be assigned to one PE on PHB3, > so we don't have the issue on PHB3. > > The patch calculates PHB's DMA weight based on the PCI devices > contained in the PHB dynamically so that it's hotplug friendly. > > Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> > --- > arch/powerpc/platforms/powernv/pci-ioda.c | 88 +++++++++++++++---------------- > arch/powerpc/platforms/powernv/pci.h | 6 --- > 2 files changed, 43 insertions(+), 51 deletions(-) > > diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c > index 713f4b4..7342cfd 100644 > --- a/arch/powerpc/platforms/powernv/pci-ioda.c > +++ b/arch/powerpc/platforms/powernv/pci-ioda.c > @@ -927,6 +927,9 @@ static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, > > static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) > { > + struct pci_controller *hose = pci_bus_to_host(dev->bus); > + struct pnv_phb *phb = hose->private_data; > + > /* This is quite simplistic. The "base" weight of a device > * is 10. 0 means no DMA is to be accounted for it. > */ > @@ -939,14 +942,34 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) > if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || > dev->class == PCI_CLASS_SERIAL_USB_OHCI || > dev->class == PCI_CLASS_SERIAL_USB_EHCI) > - return 3; > + return 3 * phb->ioda.tce32_count; > > /* Increase the weight of RAID (includes Obsidian) */ > if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) > - return 15; > + return 15 * phb->ioda.tce32_count; > > /* Default */ > - return 10; > + return 10 * phb->ioda.tce32_count; > +} > + > +static int __pnv_ioda_phb_dma_weight(struct pci_dev *pdev, void *data) > +{ > + unsigned int *dma_weight = data; > + > + *dma_weight += pnv_ioda_dma_weight(pdev); > + return 0; > +} > + > +static unsigned int pnv_ioda_phb_dma_weight(struct pnv_phb *phb) > +{ > + unsigned int dma_weight = 0; > + > + if (!phb->hose->bus) > + return 0; > + > + pci_walk_bus(phb->hose->bus, > + __pnv_ioda_phb_dma_weight, &dma_weight); > + return dma_weight; > } > > #ifdef CONFIG_PCI_IOV > @@ -1097,14 +1120,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) > /* Put PE to the list */ > list_add_tail(&pe->list, &phb->ioda.pe_list); > > - /* Account for one DMA PE if at least one DMA capable device exist > - * below the bridge > - */ > - if (pe->dma_weight != 0) { > - phb->ioda.dma_weight += pe->dma_weight; > - phb->ioda.dma_pe_count++; > - } > - > /* Link the PE */ > pnv_ioda_link_pe_by_weight(phb, pe); > } > @@ -2431,24 +2446,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, > static void pnv_ioda_setup_dma(struct pnv_phb *phb) > { > struct pci_controller *hose = phb->hose; > - unsigned int residual, remaining, segs, tw, base; > struct pnv_ioda_pe *pe; > + unsigned int dma_weight; > > - /* If we have more PE# than segments available, hand out one > - * per PE until we run out and let the rest fail. If not, > - * then we assign at least one segment per PE, plus more based > - * on the amount of devices under that PE > - */ > - if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) > - residual = 0; > - else > - residual = phb->ioda.tce32_count - > - phb->ioda.dma_pe_count; > - > - pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", > - hose->global_number, phb->ioda.tce32_count); > - pr_info("PCI: %d PE# for a total weight of %d\n", > - phb->ioda.dma_pe_count, phb->ioda.dma_weight); > + /* Calculate the PHB's DMA weight */ > + dma_weight = pnv_ioda_phb_dma_weight(phb); > + pr_info("PCI%04x has %ld DMA32 segments, total weight %d\n", > + hose->global_number, phb->ioda.tce32_count, dma_weight); > > pnv_pci_ioda_setup_opal_tce_kill(phb); > > @@ -2456,22 +2460,9 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) > * out one base segment plus any residual segments based on > * weight > */ > - remaining = phb->ioda.tce32_count; > - tw = phb->ioda.dma_weight; > - base = 0; > list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { > if (!pe->dma_weight) > continue; > - if (!remaining) { > - pe_warn(pe, "No DMA32 resources available\n"); > - continue; > - } > - segs = 1; > - if (residual) { > - segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; > - if (segs > remaining) > - segs = remaining; > - } > > /* > * For IODA2 compliant PHB3, we needn't care about the weight. > @@ -2479,17 +2470,24 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) > * the specific PE. > */ > if (phb->type == PNV_PHB_IODA1) { > - pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", > + unsigned int segs, base = 0; > + > + if (pe->dma_weight < > + dma_weight / phb->ioda.tce32_count) > + segs = 1; > + else > + segs = (pe->dma_weight * > + phb->ioda.tce32_count) / dma_weight; > + > + pe_info(pe, "DMA32 weight %d, assigned %d segments\n", > pe->dma_weight, segs); > pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); > + > + base += segs; This is not right. @base here is a local variable in the scope, pnv_pci_ioda_setup_dma_pe() will always be called with base==0. Sorry for commenting the same patch twice. > } else { > pe_info(pe, "Assign DMA32 space\n"); > - segs = 0; > pnv_pci_ioda2_setup_dma_pe(phb, pe); > } > - > - remaining -= segs; > - base += segs; > } > } > > diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h > index 08a4e57..addd3f7 100644 > --- a/arch/powerpc/platforms/powernv/pci.h > +++ b/arch/powerpc/platforms/powernv/pci.h > @@ -183,12 +183,6 @@ struct pnv_phb { > /* 32-bit TCE tables allocation */ > unsigned long tce32_count; > > - /* Total "weight" for the sake of DMA resources > - * allocation > - */ > - unsigned int dma_weight; > - unsigned int dma_pe_count; > - > /* Sorted list of used PE's, sorted at > * boot for resource allocation purposes > */ >
On Mon, Aug 10, 2015 at 07:21:12PM +1000, Alexey Kardashevskiy wrote: >On 08/06/2015 02:11 PM, Gavin Shan wrote: >>For P7IOC, the whole available DMA32 space, which is below the >>MEM32 space, is divided evenly into 256MB segments. The number >>of continuous segments assigned to one particular PE depends on >>the PE's DMA weight that is calculated based on the type of each >>PCI devices contained in the PE, and PHB's DMA weight which is >>accumulative DMA weight of PEs contained in the PHB. It means >>that the PHB's DMA weight calculation depends on existing PEs, >>which works perfectly now, but not hotplug friendly. As the >>whole available DMA32 space can be assigned to one PE on PHB3, >>so we don't have the issue on PHB3. >> >>The patch calculates PHB's DMA weight based on the PCI devices >>contained in the PHB dynamically so that it's hotplug friendly. >> >>Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> >>--- >> arch/powerpc/platforms/powernv/pci-ioda.c | 88 +++++++++++++++---------------- >> arch/powerpc/platforms/powernv/pci.h | 6 --- >> 2 files changed, 43 insertions(+), 51 deletions(-) >> >>diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c >>index 713f4b4..7342cfd 100644 >>--- a/arch/powerpc/platforms/powernv/pci-ioda.c >>+++ b/arch/powerpc/platforms/powernv/pci-ioda.c >>@@ -927,6 +927,9 @@ static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, >> >> static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) >> { >>+ struct pci_controller *hose = pci_bus_to_host(dev->bus); >>+ struct pnv_phb *phb = hose->private_data; >>+ >> /* This is quite simplistic. The "base" weight of a device >> * is 10. 0 means no DMA is to be accounted for it. >> */ >>@@ -939,14 +942,34 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) >> if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || >> dev->class == PCI_CLASS_SERIAL_USB_OHCI || >> dev->class == PCI_CLASS_SERIAL_USB_EHCI) >>- return 3; >>+ return 3 * phb->ioda.tce32_count; >> >> /* Increase the weight of RAID (includes Obsidian) */ >> if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) >>- return 15; >>+ return 15 * phb->ioda.tce32_count; >> >> /* Default */ >>- return 10; >>+ return 10 * phb->ioda.tce32_count; >>+} >>+ >>+static int __pnv_ioda_phb_dma_weight(struct pci_dev *pdev, void *data) >>+{ >>+ unsigned int *dma_weight = data; >>+ >>+ *dma_weight += pnv_ioda_dma_weight(pdev); >>+ return 0; >>+} >>+ >>+static unsigned int pnv_ioda_phb_dma_weight(struct pnv_phb *phb) >>+{ >>+ unsigned int dma_weight = 0; >>+ >>+ if (!phb->hose->bus) >>+ return 0; >>+ >>+ pci_walk_bus(phb->hose->bus, >>+ __pnv_ioda_phb_dma_weight, &dma_weight); >>+ return dma_weight; >> } >> >> #ifdef CONFIG_PCI_IOV >>@@ -1097,14 +1120,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) >> /* Put PE to the list */ >> list_add_tail(&pe->list, &phb->ioda.pe_list); >> >>- /* Account for one DMA PE if at least one DMA capable device exist >>- * below the bridge >>- */ >>- if (pe->dma_weight != 0) { >>- phb->ioda.dma_weight += pe->dma_weight; >>- phb->ioda.dma_pe_count++; >>- } >>- >> /* Link the PE */ >> pnv_ioda_link_pe_by_weight(phb, pe); >> } >>@@ -2431,24 +2446,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, >> static void pnv_ioda_setup_dma(struct pnv_phb *phb) >> { >> struct pci_controller *hose = phb->hose; >>- unsigned int residual, remaining, segs, tw, base; >> struct pnv_ioda_pe *pe; >>+ unsigned int dma_weight; >> >>- /* If we have more PE# than segments available, hand out one >>- * per PE until we run out and let the rest fail. If not, >>- * then we assign at least one segment per PE, plus more based >>- * on the amount of devices under that PE >>- */ >>- if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) >>- residual = 0; >>- else >>- residual = phb->ioda.tce32_count - >>- phb->ioda.dma_pe_count; >>- >>- pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", >>- hose->global_number, phb->ioda.tce32_count); >>- pr_info("PCI: %d PE# for a total weight of %d\n", >>- phb->ioda.dma_pe_count, phb->ioda.dma_weight); >>+ /* Calculate the PHB's DMA weight */ >>+ dma_weight = pnv_ioda_phb_dma_weight(phb); >>+ pr_info("PCI%04x has %ld DMA32 segments, total weight %d\n", >>+ hose->global_number, phb->ioda.tce32_count, dma_weight); >> >> pnv_pci_ioda_setup_opal_tce_kill(phb); >> >>@@ -2456,22 +2460,9 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) >> * out one base segment plus any residual segments based on >> * weight >> */ >>- remaining = phb->ioda.tce32_count; >>- tw = phb->ioda.dma_weight; >>- base = 0; >> list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { >> if (!pe->dma_weight) >> continue; >>- if (!remaining) { >>- pe_warn(pe, "No DMA32 resources available\n"); >>- continue; >>- } >>- segs = 1; >>- if (residual) { >>- segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; >>- if (segs > remaining) >>- segs = remaining; >>- } >> >> /* >> * For IODA2 compliant PHB3, we needn't care about the weight. >>@@ -2479,17 +2470,24 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) >> * the specific PE. >> */ >> if (phb->type == PNV_PHB_IODA1) { >>- pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", >>+ unsigned int segs, base = 0; >>+ >>+ if (pe->dma_weight < >>+ dma_weight / phb->ioda.tce32_count) >>+ segs = 1; >>+ else >>+ segs = (pe->dma_weight * >>+ phb->ioda.tce32_count) / dma_weight; >>+ >>+ pe_info(pe, "DMA32 weight %d, assigned %d segments\n", >> pe->dma_weight, segs); >> pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); >>+ >>+ base += segs; > > >This is not right. @base here is a local variable in the scope, >pnv_pci_ioda_setup_dma_pe() will always be called with base==0. > > >Sorry for commenting the same patch twice. > That's ok to comment for twice on same patch. But I don't see how it's wrong. The function (pnv_ioda_setup_dma()) is called as below and it iterate all PEs in the PHB's DMA32 list. That means the function is affects PHB, not every PE yet. It's out of problem with "base=0". pnv_pci_ioda_fixup pnv_pci_ioda_setup_DMA pnv_ioda_setup_dma > >> } else { >> pe_info(pe, "Assign DMA32 space\n"); >>- segs = 0; >> pnv_pci_ioda2_setup_dma_pe(phb, pe); >> } >>- >>- remaining -= segs; >>- base += segs; >> } >> } >> >>diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h >>index 08a4e57..addd3f7 100644 >>--- a/arch/powerpc/platforms/powernv/pci.h >>+++ b/arch/powerpc/platforms/powernv/pci.h >>@@ -183,12 +183,6 @@ struct pnv_phb { >> /* 32-bit TCE tables allocation */ >> unsigned long tce32_count; >> >>- /* Total "weight" for the sake of DMA resources >>- * allocation >>- */ >>- unsigned int dma_weight; >>- unsigned int dma_pe_count; >>- >> /* Sorted list of used PE's, sorted at >> * boot for resource allocation purposes >> */ >> Thanks, Gavin -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 713f4b4..7342cfd 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -927,6 +927,9 @@ static void pnv_ioda_link_pe_by_weight(struct pnv_phb *phb, static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) { + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + /* This is quite simplistic. The "base" weight of a device * is 10. 0 means no DMA is to be accounted for it. */ @@ -939,14 +942,34 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev) if (dev->class == PCI_CLASS_SERIAL_USB_UHCI || dev->class == PCI_CLASS_SERIAL_USB_OHCI || dev->class == PCI_CLASS_SERIAL_USB_EHCI) - return 3; + return 3 * phb->ioda.tce32_count; /* Increase the weight of RAID (includes Obsidian) */ if ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID) - return 15; + return 15 * phb->ioda.tce32_count; /* Default */ - return 10; + return 10 * phb->ioda.tce32_count; +} + +static int __pnv_ioda_phb_dma_weight(struct pci_dev *pdev, void *data) +{ + unsigned int *dma_weight = data; + + *dma_weight += pnv_ioda_dma_weight(pdev); + return 0; +} + +static unsigned int pnv_ioda_phb_dma_weight(struct pnv_phb *phb) +{ + unsigned int dma_weight = 0; + + if (!phb->hose->bus) + return 0; + + pci_walk_bus(phb->hose->bus, + __pnv_ioda_phb_dma_weight, &dma_weight); + return dma_weight; } #ifdef CONFIG_PCI_IOV @@ -1097,14 +1120,6 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) /* Put PE to the list */ list_add_tail(&pe->list, &phb->ioda.pe_list); - /* Account for one DMA PE if at least one DMA capable device exist - * below the bridge - */ - if (pe->dma_weight != 0) { - phb->ioda.dma_weight += pe->dma_weight; - phb->ioda.dma_pe_count++; - } - /* Link the PE */ pnv_ioda_link_pe_by_weight(phb, pe); } @@ -2431,24 +2446,13 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, static void pnv_ioda_setup_dma(struct pnv_phb *phb) { struct pci_controller *hose = phb->hose; - unsigned int residual, remaining, segs, tw, base; struct pnv_ioda_pe *pe; + unsigned int dma_weight; - /* If we have more PE# than segments available, hand out one - * per PE until we run out and let the rest fail. If not, - * then we assign at least one segment per PE, plus more based - * on the amount of devices under that PE - */ - if (phb->ioda.dma_pe_count > phb->ioda.tce32_count) - residual = 0; - else - residual = phb->ioda.tce32_count - - phb->ioda.dma_pe_count; - - pr_info("PCI: Domain %04x has %ld available 32-bit DMA segments\n", - hose->global_number, phb->ioda.tce32_count); - pr_info("PCI: %d PE# for a total weight of %d\n", - phb->ioda.dma_pe_count, phb->ioda.dma_weight); + /* Calculate the PHB's DMA weight */ + dma_weight = pnv_ioda_phb_dma_weight(phb); + pr_info("PCI%04x has %ld DMA32 segments, total weight %d\n", + hose->global_number, phb->ioda.tce32_count, dma_weight); pnv_pci_ioda_setup_opal_tce_kill(phb); @@ -2456,22 +2460,9 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) * out one base segment plus any residual segments based on * weight */ - remaining = phb->ioda.tce32_count; - tw = phb->ioda.dma_weight; - base = 0; list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) { if (!pe->dma_weight) continue; - if (!remaining) { - pe_warn(pe, "No DMA32 resources available\n"); - continue; - } - segs = 1; - if (residual) { - segs += ((pe->dma_weight * residual) + (tw / 2)) / tw; - if (segs > remaining) - segs = remaining; - } /* * For IODA2 compliant PHB3, we needn't care about the weight. @@ -2479,17 +2470,24 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb) * the specific PE. */ if (phb->type == PNV_PHB_IODA1) { - pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n", + unsigned int segs, base = 0; + + if (pe->dma_weight < + dma_weight / phb->ioda.tce32_count) + segs = 1; + else + segs = (pe->dma_weight * + phb->ioda.tce32_count) / dma_weight; + + pe_info(pe, "DMA32 weight %d, assigned %d segments\n", pe->dma_weight, segs); pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs); + + base += segs; } else { pe_info(pe, "Assign DMA32 space\n"); - segs = 0; pnv_pci_ioda2_setup_dma_pe(phb, pe); } - - remaining -= segs; - base += segs; } } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 08a4e57..addd3f7 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -183,12 +183,6 @@ struct pnv_phb { /* 32-bit TCE tables allocation */ unsigned long tce32_count; - /* Total "weight" for the sake of DMA resources - * allocation - */ - unsigned int dma_weight; - unsigned int dma_pe_count; - /* Sorted list of used PE's, sorted at * boot for resource allocation purposes */
For P7IOC, the whole available DMA32 space, which is below the MEM32 space, is divided evenly into 256MB segments. The number of continuous segments assigned to one particular PE depends on the PE's DMA weight that is calculated based on the type of each PCI devices contained in the PE, and PHB's DMA weight which is accumulative DMA weight of PEs contained in the PHB. It means that the PHB's DMA weight calculation depends on existing PEs, which works perfectly now, but not hotplug friendly. As the whole available DMA32 space can be assigned to one PE on PHB3, so we don't have the issue on PHB3. The patch calculates PHB's DMA weight based on the PCI devices contained in the PHB dynamically so that it's hotplug friendly. Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> --- arch/powerpc/platforms/powernv/pci-ioda.c | 88 +++++++++++++++---------------- arch/powerpc/platforms/powernv/pci.h | 6 --- 2 files changed, 43 insertions(+), 51 deletions(-)