Message ID | 1445829362-2738-9-git-send-email-weiyang@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Delegated to: | Bjorn Helgaas |
Headers | show |
On 10/26/2015 02:15 PM, Wei Yang wrote: > PEs for VFs don't have primary bus. So they have to have their own reset > backend, which is used during EEH recovery. The patch implements the reset > backend for VF's PE by issuing FLR or AF FLR to the VFs, which are contained > in the PE. > > [gwshan: changelog and code refactoring] > Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> > Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com> > --- > arch/powerpc/include/asm/eeh.h | 1 + > arch/powerpc/platforms/powernv/eeh-powernv.c | 134 ++++++++++++++++++++++++++- > 2 files changed, 134 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h > index ec21f8f..331c856 100644 > --- a/arch/powerpc/include/asm/eeh.h > +++ b/arch/powerpc/include/asm/eeh.h > @@ -136,6 +136,7 @@ struct eeh_dev { > int pcix_cap; /* Saved PCIx capability */ > int pcie_cap; /* Saved PCIe capability */ > int aer_cap; /* Saved AER capability */ > + int af_cap; /* Saved AF capability */ > struct eeh_pe *pe; /* Associated PE */ > struct list_head list; /* Form link list in the PE */ > struct pci_controller *phb; /* Associated PHB */ > diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c > index cfd55dd..017cd72 100644 > --- a/arch/powerpc/platforms/powernv/eeh-powernv.c > +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c > @@ -404,6 +404,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) > edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); > edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); > edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); > + edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); > if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { > edev->mode |= EEH_DEV_BRIDGE; > if (edev->pcie_cap) { > @@ -893,6 +894,127 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option) > return 0; > } > > +static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, int pos, > + u16 mask, bool af_flr_rst) > +{ > + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); > + int status, i; > + > + /* Wait for Transaction Pending bit to be cleared */ > + for (i = 0; i < 4; i++) { > + eeh_ops->read_config(pdn, pos, 2, &status); gcc should have complained on using uninitialized @status here. > + if (!(status & mask)) > + return; > + > + msleep((1 << i) * 100); > + } > + > + pr_warn("%s: Pending transaction while issuing %s FLR to " > + "%04x:%02x:%02x.%01x\n", Do not wrap user-visible strings. > + __func__, af_flr_rst ? "AF" : "", > + edev->phb->global_number, pdn->busno, > + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); > +} > + > +static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) > +{ > + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); > + u32 reg; > + > + if (!edev->pcie_cap) > + return -ENOTTY; Can pnv_eeh_do_flr() be really called on a non PCIe device, can we get that far? WARN_ON_ONCE() may be? > + > + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); ... and here about uninitialized @reg. > + if (!(reg & PCI_EXP_DEVCAP_FLR)) > + return -ENOTTY; > + > + switch (option) { > + case EEH_RESET_HOT: > + case EEH_RESET_FUNDAMENTAL: > + pnv_eeh_wait_for_pending(pdn, edev->pcie_cap + PCI_EXP_DEVSTA, > + PCI_EXP_DEVSTA_TRPND, false); > + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, > + 4, ®); > + reg |= PCI_EXP_DEVCTL_BCR_FLR; > + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, > + 4, reg); > + msleep(EEH_PE_RST_HOLD_TIME); > + break; > + case EEH_RESET_DEACTIVATE: > + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, > + 4, ®); > + reg &= ~PCI_EXP_DEVCTL_BCR_FLR; > + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, > + 4, reg); > + msleep(EEH_PE_RST_SETTLE_TIME); > + break; > + } > + > + return 0; > +} > + > +static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) > +{ > + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); > + u32 cap; > + > + if (!edev->af_cap) > + return -ENOTTY; > + > + eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap); ... and here about @cap. > + if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) > + return -ENOTTY; > + > + switch (option) { > + case EEH_RESET_HOT: > + case EEH_RESET_FUNDAMENTAL: > + /* > + * Wait for Transaction Pending bit to clear. A word-aligned > + * test is used, so we use the conrol offset rather than status > + * and shift the test bit to match. Why word-aligned (not byte or double word)? > + */ > + pnv_eeh_wait_for_pending(pdn, edev->af_cap + PCI_AF_CTRL, > + PCI_AF_STATUS_TP << 8, true); > + eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, > + 1, PCI_AF_CTRL_FLR); > + msleep(EEH_PE_RST_HOLD_TIME); > + break; > + case EEH_RESET_DEACTIVATE: > + eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0); > + msleep(EEH_PE_RST_SETTLE_TIME); btw there is an unrelated issue with EEH_PE_RST_SETTLE_TIME which is defined as 1800 which is A LOT (+250ms from EEH_PE_RST_HOLD_TIME and for some reason this is actually doubled so there is another reset somewhere). Booting a guest with 63 VFs takes 6 minutes or so, is there a good reason for such a huge timeout? > + break; > + } > + > + return 0; > +} > + > +static int pnv_eeh_reset_vf(struct pci_dn *pdn, int option) > +{ > + int ret; > + > + ret = pnv_eeh_do_flr(pdn, option); > + if (ret != -ENOTTY) > + return ret; > + > + return pnv_eeh_do_af_flr(pdn, option); > +} > + > +static int pnv_eeh_vf_pe_reset(struct eeh_pe *pe, int option) > +{ > + struct eeh_dev *edev, *tmp; > + struct pci_dn *pdn; > + int ret; > + > + eeh_pe_for_each_dev(pe, edev, tmp) { > + pdn = eeh_dev_to_pdn(edev); > + ret = pnv_eeh_reset_vf(pdn, option); > + if (ret) > + return ret; > + } > + > + return 0; > +} > + > void pnv_pci_reset_secondary_bus(struct pci_dev *dev) > { > struct pci_controller *hose; > @@ -968,7 +1090,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) > } > > bus = eeh_pe_bus_get(pe); > - if (pci_is_root_bus(bus) || > + if (pe->type & EEH_PE_VF) > + ret = pnv_eeh_vf_pe_reset(pe, option); > + else if (pci_is_root_bus(bus) || > pci_is_root_bus(bus->parent)) > ret = pnv_eeh_root_reset(hose, option); > else > @@ -1108,6 +1232,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) > if (!edev || !edev->pe) > return false; > > + /* > + * We will issue FLR or AF FLR to all VFs, which are contained > + * in VF PE. It relies on the EEH PCI config accessors. So we > + * can't block them during the window. > + */ > + if ((edev->physfn) && (edev->pe->state & EEH_PE_RESET)) Extra braces around edev->physfn. > + return false; > + > if (edev->pe->state & EEH_PE_CFG_BLOCKED) > return true; > >
On Fri, Oct 30, 2015 at 03:11:20PM +1100, Alexey Kardashevskiy wrote: >On 10/26/2015 02:15 PM, Wei Yang wrote: >>PEs for VFs don't have primary bus. So they have to have their own reset >>backend, which is used during EEH recovery. The patch implements the reset >>backend for VF's PE by issuing FLR or AF FLR to the VFs, which are contained >>in the PE. >> >>[gwshan: changelog and code refactoring] >>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> >>Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com> >>--- >> arch/powerpc/include/asm/eeh.h | 1 + >> arch/powerpc/platforms/powernv/eeh-powernv.c | 134 ++++++++++++++++++++++++++- >> 2 files changed, 134 insertions(+), 1 deletion(-) >> >>diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h >>index ec21f8f..331c856 100644 >>--- a/arch/powerpc/include/asm/eeh.h >>+++ b/arch/powerpc/include/asm/eeh.h >>@@ -136,6 +136,7 @@ struct eeh_dev { >> int pcix_cap; /* Saved PCIx capability */ >> int pcie_cap; /* Saved PCIe capability */ >> int aer_cap; /* Saved AER capability */ >>+ int af_cap; /* Saved AF capability */ >> struct eeh_pe *pe; /* Associated PE */ >> struct list_head list; /* Form link list in the PE */ >> struct pci_controller *phb; /* Associated PHB */ >>diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c >>index cfd55dd..017cd72 100644 >>--- a/arch/powerpc/platforms/powernv/eeh-powernv.c >>+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c >>@@ -404,6 +404,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) >> edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); >> edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); >> edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); >>+ edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); >> if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { >> edev->mode |= EEH_DEV_BRIDGE; >> if (edev->pcie_cap) { >>@@ -893,6 +894,127 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option) >> return 0; >> } >> >>+static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, int pos, >>+ u16 mask, bool af_flr_rst) >>+{ >>+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn); >>+ int status, i; >>+ >>+ /* Wait for Transaction Pending bit to be cleared */ >>+ for (i = 0; i < 4; i++) { >>+ eeh_ops->read_config(pdn, pos, 2, &status); > > >gcc should have complained on using uninitialized @status here. > I remove the obj file and re-compile the file, not the warning. And took a look at other places where read_config() is called. The laster parameter is not initialized before called. You see the error during build? > >>+ if (!(status & mask)) >>+ return; >>+ >>+ msleep((1 << i) * 100); >>+ } >>+ >>+ pr_warn("%s: Pending transaction while issuing %s FLR to " >>+ "%04x:%02x:%02x.%01x\n", > >Do not wrap user-visible strings. > Will change this. > >>+ __func__, af_flr_rst ? "AF" : "", >>+ edev->phb->global_number, pdn->busno, >>+ PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); >>+} >>+ >>+static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) >>+{ >>+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn); >>+ u32 reg; >>+ >>+ if (!edev->pcie_cap) >>+ return -ENOTTY; > > >Can pnv_eeh_do_flr() be really called on a non PCIe device, can we get that >far? WARN_ON_ONCE() may be? > So you suggest to add a WARN_ON_ONCE() in this condition, right? > >>+ >>+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); > > >... and here about uninitialized @reg. > > >>+ if (!(reg & PCI_EXP_DEVCAP_FLR)) >>+ return -ENOTTY; >>+ >>+ switch (option) { >>+ case EEH_RESET_HOT: >>+ case EEH_RESET_FUNDAMENTAL: >>+ pnv_eeh_wait_for_pending(pdn, edev->pcie_cap + PCI_EXP_DEVSTA, >>+ PCI_EXP_DEVSTA_TRPND, false); >>+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>+ 4, ®); >>+ reg |= PCI_EXP_DEVCTL_BCR_FLR; >>+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>+ 4, reg); >>+ msleep(EEH_PE_RST_HOLD_TIME); >>+ break; >>+ case EEH_RESET_DEACTIVATE: >>+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>+ 4, ®); >>+ reg &= ~PCI_EXP_DEVCTL_BCR_FLR; >>+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>+ 4, reg); >>+ msleep(EEH_PE_RST_SETTLE_TIME); >>+ break; >>+ } >>+ >>+ return 0; >>+} >>+ >>+static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) >>+{ >>+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn); >>+ u32 cap; >>+ >>+ if (!edev->af_cap) >>+ return -ENOTTY; >>+ >>+ eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap); > > >... and here about @cap. > >>+ if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) >>+ return -ENOTTY; >>+ >>+ switch (option) { >>+ case EEH_RESET_HOT: >>+ case EEH_RESET_FUNDAMENTAL: >>+ /* >>+ * Wait for Transaction Pending bit to clear. A word-aligned >>+ * test is used, so we use the conrol offset rather than status >>+ * and shift the test bit to match. > > >Why word-aligned (not byte or double word)? > I copied this words from pci_af_flr(). Actually, I don't tried to understand this reason. >>+ */ >>+ pnv_eeh_wait_for_pending(pdn, edev->af_cap + PCI_AF_CTRL, >>+ PCI_AF_STATUS_TP << 8, true); >>+ eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, >>+ 1, PCI_AF_CTRL_FLR); >>+ msleep(EEH_PE_RST_HOLD_TIME); >>+ break; >>+ case EEH_RESET_DEACTIVATE: >>+ eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0); >>+ msleep(EEH_PE_RST_SETTLE_TIME); > > >btw there is an unrelated issue with EEH_PE_RST_SETTLE_TIME which is defined >as 1800 which is A LOT (+250ms from EEH_PE_RST_HOLD_TIME and for some reason >this is actually doubled so there is another reset somewhere). > I don't know the reason for this value. This code keeps aligned with other reset functions, like pnv_eeh_bridge_reset(). >Booting a guest with 63 VFs takes 6 minutes or so, is there a good reason for >such a huge timeout? > > >>+ break; >>+ } >>+ >>+ return 0; >>+} >>+ >>+static int pnv_eeh_reset_vf(struct pci_dn *pdn, int option) >>+{ >>+ int ret; >>+ >>+ ret = pnv_eeh_do_flr(pdn, option); >>+ if (ret != -ENOTTY) >>+ return ret; >>+ >>+ return pnv_eeh_do_af_flr(pdn, option); >>+} >>+ >>+static int pnv_eeh_vf_pe_reset(struct eeh_pe *pe, int option) >>+{ >>+ struct eeh_dev *edev, *tmp; >>+ struct pci_dn *pdn; >>+ int ret; >>+ >>+ eeh_pe_for_each_dev(pe, edev, tmp) { >>+ pdn = eeh_dev_to_pdn(edev); >>+ ret = pnv_eeh_reset_vf(pdn, option); >>+ if (ret) >>+ return ret; >>+ } >>+ >>+ return 0; >>+} >>+ >> void pnv_pci_reset_secondary_bus(struct pci_dev *dev) >> { >> struct pci_controller *hose; >>@@ -968,7 +1090,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) >> } >> >> bus = eeh_pe_bus_get(pe); >>- if (pci_is_root_bus(bus) || >>+ if (pe->type & EEH_PE_VF) >>+ ret = pnv_eeh_vf_pe_reset(pe, option); >>+ else if (pci_is_root_bus(bus) || >> pci_is_root_bus(bus->parent)) >> ret = pnv_eeh_root_reset(hose, option); >> else >>@@ -1108,6 +1232,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) >> if (!edev || !edev->pe) >> return false; >> >>+ /* >>+ * We will issue FLR or AF FLR to all VFs, which are contained >>+ * in VF PE. It relies on the EEH PCI config accessors. So we >>+ * can't block them during the window. >>+ */ >>+ if ((edev->physfn) && (edev->pe->state & EEH_PE_RESET)) > > >Extra braces around edev->physfn. > Will remove it. > > >>+ return false; >>+ >> if (edev->pe->state & EEH_PE_CFG_BLOCKED) >> return true; >> >> > > >-- >Alexey
On 10/30/2015 06:18 PM, Wei Yang wrote: > On Fri, Oct 30, 2015 at 03:11:20PM +1100, Alexey Kardashevskiy wrote: >> On 10/26/2015 02:15 PM, Wei Yang wrote: >>> PEs for VFs don't have primary bus. So they have to have their own reset >>> backend, which is used during EEH recovery. The patch implements the reset >>> backend for VF's PE by issuing FLR or AF FLR to the VFs, which are contained >>> in the PE. >>> >>> [gwshan: changelog and code refactoring] >>> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> >>> Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com> >>> --- >>> arch/powerpc/include/asm/eeh.h | 1 + >>> arch/powerpc/platforms/powernv/eeh-powernv.c | 134 ++++++++++++++++++++++++++- >>> 2 files changed, 134 insertions(+), 1 deletion(-) >>> >>> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h >>> index ec21f8f..331c856 100644 >>> --- a/arch/powerpc/include/asm/eeh.h >>> +++ b/arch/powerpc/include/asm/eeh.h >>> @@ -136,6 +136,7 @@ struct eeh_dev { >>> int pcix_cap; /* Saved PCIx capability */ >>> int pcie_cap; /* Saved PCIe capability */ >>> int aer_cap; /* Saved AER capability */ >>> + int af_cap; /* Saved AF capability */ >>> struct eeh_pe *pe; /* Associated PE */ >>> struct list_head list; /* Form link list in the PE */ >>> struct pci_controller *phb; /* Associated PHB */ >>> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c >>> index cfd55dd..017cd72 100644 >>> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c >>> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c >>> @@ -404,6 +404,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) >>> edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); >>> edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); >>> edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); >>> + edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); >>> if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { >>> edev->mode |= EEH_DEV_BRIDGE; >>> if (edev->pcie_cap) { >>> @@ -893,6 +894,127 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option) >>> return 0; >>> } >>> >>> +static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, int pos, >>> + u16 mask, bool af_flr_rst) Missed this - @af_flr_rst is only used for warnings so better do: s/bool af_flr_rst/const char *reset_type/ to make it explicit. >>> +{ >>> + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); >>> + int status, i; >>> + >>> + /* Wait for Transaction Pending bit to be cleared */ >>> + for (i = 0; i < 4; i++) { >>> + eeh_ops->read_config(pdn, pos, 2, &status); >> >> >> gcc should have complained on using uninitialized @status here. >> > > I remove the obj file and re-compile the file, not the warning. Hm. Does not warn me either. > And took a look at other places where read_config() is called. The laster > parameter is not initialized before called. So? It does not make it right. > You see the error during build? Why does it matter? We have an undefined behavior here which we should not. You could test the return values from read_config() but you do not so at least initialize local variables. > >> >>> + if (!(status & mask)) >>> + return; >>> + >>> + msleep((1 << i) * 100); >>> + } >>> + >>> + pr_warn("%s: Pending transaction while issuing %s FLR to " >>> + "%04x:%02x:%02x.%01x\n", >> >> Do not wrap user-visible strings. >> > > Will change this. > >> >>> + __func__, af_flr_rst ? "AF" : "", >>> + edev->phb->global_number, pdn->busno, >>> + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); >>> +} >>> + >>> +static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) >>> +{ >>> + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); >>> + u32 reg; >>> + >>> + if (!edev->pcie_cap) >>> + return -ENOTTY; >> >> >> Can pnv_eeh_do_flr() be really called on a non PCIe device, can we get that >> far? WARN_ON_ONCE() may be? >> > > So you suggest to add a WARN_ON_ONCE() in this condition, right? I am asking a question here whether it makes sense or not to add a WARN_ON_ONCE or replace "if" with WARN_ON_ONCE or not having pcie_cap initialized is possible in this code - which one is it? > >> >>> + >>> + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); >> >> >> ... and here about uninitialized @reg. >> >> >>> + if (!(reg & PCI_EXP_DEVCAP_FLR)) >>> + return -ENOTTY; >>> + >>> + switch (option) { >>> + case EEH_RESET_HOT: >>> + case EEH_RESET_FUNDAMENTAL: >>> + pnv_eeh_wait_for_pending(pdn, edev->pcie_cap + PCI_EXP_DEVSTA, >>> + PCI_EXP_DEVSTA_TRPND, false); >>> + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>> + 4, ®); >>> + reg |= PCI_EXP_DEVCTL_BCR_FLR; >>> + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>> + 4, reg); >>> + msleep(EEH_PE_RST_HOLD_TIME); >>> + break; >>> + case EEH_RESET_DEACTIVATE: >>> + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>> + 4, ®); >>> + reg &= ~PCI_EXP_DEVCTL_BCR_FLR; >>> + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>> + 4, reg); >>> + msleep(EEH_PE_RST_SETTLE_TIME); >>> + break; >>> + } >>> + >>> + return 0; >>> +} >>> + >>> +static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) >>> +{ >>> + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); >>> + u32 cap; >>> + >>> + if (!edev->af_cap) >>> + return -ENOTTY; >>> + >>> + eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap); >> >> >> ... and here about @cap. >> >>> + if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) >>> + return -ENOTTY; >>> + >>> + switch (option) { >>> + case EEH_RESET_HOT: >>> + case EEH_RESET_FUNDAMENTAL: >>> + /* >>> + * Wait for Transaction Pending bit to clear. A word-aligned >>> + * test is used, so we use the conrol offset rather than status >>> + * and shift the test bit to match. >> >> >> Why word-aligned (not byte or double word)? >> > > I copied this words from pci_af_flr(). Actually, I don't tried to understand > this reason. Ok. I looked at pci_af_flr(). In this patch, the comment before pnv_eeh_wait_for_pending() is missing, something like "pnv_eeh_wait_for_pending() uses a word-size accessor so @pos must be work-aligned". > >>> + */ >>> + pnv_eeh_wait_for_pending(pdn, edev->af_cap + PCI_AF_CTRL, >>> + PCI_AF_STATUS_TP << 8, true); >>> + eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, >>> + 1, PCI_AF_CTRL_FLR); >>> + msleep(EEH_PE_RST_HOLD_TIME); >>> + break; >>> + case EEH_RESET_DEACTIVATE: >>> + eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0); >>> + msleep(EEH_PE_RST_SETTLE_TIME); >> >> >> btw there is an unrelated issue with EEH_PE_RST_SETTLE_TIME which is defined >> as 1800 which is A LOT (+250ms from EEH_PE_RST_HOLD_TIME and for some reason >> this is actually doubled so there is another reset somewhere). >> > > I don't know the reason for this value. This code keeps aligned with other > reset functions, like pnv_eeh_bridge_reset(). Are they all in POWERNV/EEH or generic PCI uses same values on, for example, x86? >> Booting a guest with 63 VFs takes 6 minutes or so, is there a good reason for >> such a huge timeout? >> >> >>> + break; >>> + } >>> + >>> + return 0; >>> +} >>> + >>> +static int pnv_eeh_reset_vf(struct pci_dn *pdn, int option) >>> +{ >>> + int ret; >>> + >>> + ret = pnv_eeh_do_flr(pdn, option); >>> + if (ret != -ENOTTY) >>> + return ret; >>> + >>> + return pnv_eeh_do_af_flr(pdn, option); >>> +} >>> + >>> +static int pnv_eeh_vf_pe_reset(struct eeh_pe *pe, int option) >>> +{ >>> + struct eeh_dev *edev, *tmp; >>> + struct pci_dn *pdn; >>> + int ret; >>> + >>> + eeh_pe_for_each_dev(pe, edev, tmp) { >>> + pdn = eeh_dev_to_pdn(edev); >>> + ret = pnv_eeh_reset_vf(pdn, option); >>> + if (ret) >>> + return ret; >>> + } >>> + >>> + return 0; >>> +} >>> + >>> void pnv_pci_reset_secondary_bus(struct pci_dev *dev) >>> { >>> struct pci_controller *hose; >>> @@ -968,7 +1090,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) >>> } >>> >>> bus = eeh_pe_bus_get(pe); >>> - if (pci_is_root_bus(bus) || >>> + if (pe->type & EEH_PE_VF) >>> + ret = pnv_eeh_vf_pe_reset(pe, option); >>> + else if (pci_is_root_bus(bus) || >>> pci_is_root_bus(bus->parent)) >>> ret = pnv_eeh_root_reset(hose, option); >>> else >>> @@ -1108,6 +1232,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) >>> if (!edev || !edev->pe) >>> return false; >>> >>> + /* >>> + * We will issue FLR or AF FLR to all VFs, which are contained >>> + * in VF PE. It relies on the EEH PCI config accessors. So we >>> + * can't block them during the window. >>> + */ >>> + if ((edev->physfn) && (edev->pe->state & EEH_PE_RESET)) >> >> >> Extra braces around edev->physfn. >> > > Will remove it. > >> >> >>> + return false; >>> + >>> if (edev->pe->state & EEH_PE_CFG_BLOCKED) >>> return true; >>> >>>
On Fri, Oct 30, 2015 at 07:05:05PM +1100, Alexey Kardashevskiy wrote: >On 10/30/2015 06:18 PM, Wei Yang wrote: >>On Fri, Oct 30, 2015 at 03:11:20PM +1100, Alexey Kardashevskiy wrote: >>>On 10/26/2015 02:15 PM, Wei Yang wrote: >>>>PEs for VFs don't have primary bus. So they have to have their own reset >>>>backend, which is used during EEH recovery. The patch implements the reset >>>>backend for VF's PE by issuing FLR or AF FLR to the VFs, which are contained >>>>in the PE. >>>> >>>>[gwshan: changelog and code refactoring] >>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> >>>>Acked-by: Gavin Shan <gwshan@linux.vnet.ibm.com> >>>>--- >>>> arch/powerpc/include/asm/eeh.h | 1 + >>>> arch/powerpc/platforms/powernv/eeh-powernv.c | 134 ++++++++++++++++++++++++++- >>>> 2 files changed, 134 insertions(+), 1 deletion(-) >>>> >>>>diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h >>>>index ec21f8f..331c856 100644 >>>>--- a/arch/powerpc/include/asm/eeh.h >>>>+++ b/arch/powerpc/include/asm/eeh.h >>>>@@ -136,6 +136,7 @@ struct eeh_dev { >>>> int pcix_cap; /* Saved PCIx capability */ >>>> int pcie_cap; /* Saved PCIe capability */ >>>> int aer_cap; /* Saved AER capability */ >>>>+ int af_cap; /* Saved AF capability */ >>>> struct eeh_pe *pe; /* Associated PE */ >>>> struct list_head list; /* Form link list in the PE */ >>>> struct pci_controller *phb; /* Associated PHB */ >>>>diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c >>>>index cfd55dd..017cd72 100644 >>>>--- a/arch/powerpc/platforms/powernv/eeh-powernv.c >>>>+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c >>>>@@ -404,6 +404,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) >>>> edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); >>>> edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); >>>> edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); >>>>+ edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); >>>> if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { >>>> edev->mode |= EEH_DEV_BRIDGE; >>>> if (edev->pcie_cap) { >>>>@@ -893,6 +894,127 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option) >>>> return 0; >>>> } >>>> >>>>+static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, int pos, >>>>+ u16 mask, bool af_flr_rst) > >Missed this - @af_flr_rst is only used for warnings so better do: >s/bool af_flr_rst/const char *reset_type/ >to make it explicit. > Looks good, will change in next version. > >>>>+{ >>>>+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn); >>>>+ int status, i; >>>>+ >>>>+ /* Wait for Transaction Pending bit to be cleared */ >>>>+ for (i = 0; i < 4; i++) { >>>>+ eeh_ops->read_config(pdn, pos, 2, &status); >>> >>> >>>gcc should have complained on using uninitialized @status here. >>> >> >>I remove the obj file and re-compile the file, not the warning. > >Hm. Does not warn me either. > >>And took a look at other places where read_config() is called. The laster >>parameter is not initialized before called. > >So? It does not make it right. > >>You see the error during build? > >Why does it matter? We have an undefined behavior here which we should not. >You could test the return values from read_config() but you do not so at >least initialize local variables. > I believe your concern is reasonable. I suggest to have a separate patch to fix the read_config() by initialize the last parameter. > >> >>> >>>>+ if (!(status & mask)) >>>>+ return; >>>>+ >>>>+ msleep((1 << i) * 100); >>>>+ } >>>>+ >>>>+ pr_warn("%s: Pending transaction while issuing %s FLR to " >>>>+ "%04x:%02x:%02x.%01x\n", >>> >>>Do not wrap user-visible strings. >>> >> >>Will change this. >> >>> >>>>+ __func__, af_flr_rst ? "AF" : "", >>>>+ edev->phb->global_number, pdn->busno, >>>>+ PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); >>>>+} >>>>+ >>>>+static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) >>>>+{ >>>>+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn); >>>>+ u32 reg; >>>>+ >>>>+ if (!edev->pcie_cap) >>>>+ return -ENOTTY; >>> >>> >>>Can pnv_eeh_do_flr() be really called on a non PCIe device, can we get that >>>far? WARN_ON_ONCE() may be? >>> >> >>So you suggest to add a WARN_ON_ONCE() in this condition, right? > >I am asking a question here whether it makes sense or not to add a >WARN_ON_ONCE or replace "if" with WARN_ON_ONCE or not having pcie_cap >initialized is possible in this code - which one is it? > I think the check here is reasonable. In the body of this function, pcie_cap is used to access the config space. If we remove this, it would be a chance to access a not correct area. > >> >>> >>>>+ >>>>+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); >>> >>> >>>... and here about uninitialized @reg. >>> >>> >>>>+ if (!(reg & PCI_EXP_DEVCAP_FLR)) >>>>+ return -ENOTTY; >>>>+ >>>>+ switch (option) { >>>>+ case EEH_RESET_HOT: >>>>+ case EEH_RESET_FUNDAMENTAL: >>>>+ pnv_eeh_wait_for_pending(pdn, edev->pcie_cap + PCI_EXP_DEVSTA, >>>>+ PCI_EXP_DEVSTA_TRPND, false); >>>>+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>>>+ 4, ®); >>>>+ reg |= PCI_EXP_DEVCTL_BCR_FLR; >>>>+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>>>+ 4, reg); >>>>+ msleep(EEH_PE_RST_HOLD_TIME); >>>>+ break; >>>>+ case EEH_RESET_DEACTIVATE: >>>>+ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>>>+ 4, ®); >>>>+ reg &= ~PCI_EXP_DEVCTL_BCR_FLR; >>>>+ eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, >>>>+ 4, reg); >>>>+ msleep(EEH_PE_RST_SETTLE_TIME); >>>>+ break; >>>>+ } >>>>+ >>>>+ return 0; >>>>+} >>>>+ >>>>+static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) >>>>+{ >>>>+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn); >>>>+ u32 cap; >>>>+ >>>>+ if (!edev->af_cap) >>>>+ return -ENOTTY; >>>>+ >>>>+ eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap); >>> >>> >>>... and here about @cap. >>> >>>>+ if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) >>>>+ return -ENOTTY; >>>>+ >>>>+ switch (option) { >>>>+ case EEH_RESET_HOT: >>>>+ case EEH_RESET_FUNDAMENTAL: >>>>+ /* >>>>+ * Wait for Transaction Pending bit to clear. A word-aligned >>>>+ * test is used, so we use the conrol offset rather than status >>>>+ * and shift the test bit to match. >>> >>> >>>Why word-aligned (not byte or double word)? >>> >> >>I copied this words from pci_af_flr(). Actually, I don't tried to understand >>this reason. > >Ok. I looked at pci_af_flr(). > >In this patch, the comment before pnv_eeh_wait_for_pending() is missing, >something like "pnv_eeh_wait_for_pending() uses a word-size accessor so @pos >must be work-aligned". > > >> >>>>+ */ >>>>+ pnv_eeh_wait_for_pending(pdn, edev->af_cap + PCI_AF_CTRL, >>>>+ PCI_AF_STATUS_TP << 8, true); >>>>+ eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, >>>>+ 1, PCI_AF_CTRL_FLR); >>>>+ msleep(EEH_PE_RST_HOLD_TIME); >>>>+ break; >>>>+ case EEH_RESET_DEACTIVATE: >>>>+ eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0); >>>>+ msleep(EEH_PE_RST_SETTLE_TIME); >>> >>> >>>btw there is an unrelated issue with EEH_PE_RST_SETTLE_TIME which is defined >>>as 1800 which is A LOT (+250ms from EEH_PE_RST_HOLD_TIME and for some reason >>>this is actually doubled so there is another reset somewhere). >>> >> >>I don't know the reason for this value. This code keeps aligned with other >>reset functions, like pnv_eeh_bridge_reset(). > > >Are they all in POWERNV/EEH or generic PCI uses same values on, for example, >x86? > > > >>>Booting a guest with 63 VFs takes 6 minutes or so, is there a good reason for >>>such a huge timeout? >>> >>> >>>>+ break; >>>>+ } >>>>+ >>>>+ return 0; >>>>+} >>>>+ >>>>+static int pnv_eeh_reset_vf(struct pci_dn *pdn, int option) >>>>+{ >>>>+ int ret; >>>>+ >>>>+ ret = pnv_eeh_do_flr(pdn, option); >>>>+ if (ret != -ENOTTY) >>>>+ return ret; >>>>+ >>>>+ return pnv_eeh_do_af_flr(pdn, option); >>>>+} >>>>+ >>>>+static int pnv_eeh_vf_pe_reset(struct eeh_pe *pe, int option) >>>>+{ >>>>+ struct eeh_dev *edev, *tmp; >>>>+ struct pci_dn *pdn; >>>>+ int ret; >>>>+ >>>>+ eeh_pe_for_each_dev(pe, edev, tmp) { >>>>+ pdn = eeh_dev_to_pdn(edev); >>>>+ ret = pnv_eeh_reset_vf(pdn, option); >>>>+ if (ret) >>>>+ return ret; >>>>+ } >>>>+ >>>>+ return 0; >>>>+} >>>>+ >>>> void pnv_pci_reset_secondary_bus(struct pci_dev *dev) >>>> { >>>> struct pci_controller *hose; >>>>@@ -968,7 +1090,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) >>>> } >>>> >>>> bus = eeh_pe_bus_get(pe); >>>>- if (pci_is_root_bus(bus) || >>>>+ if (pe->type & EEH_PE_VF) >>>>+ ret = pnv_eeh_vf_pe_reset(pe, option); >>>>+ else if (pci_is_root_bus(bus) || >>>> pci_is_root_bus(bus->parent)) >>>> ret = pnv_eeh_root_reset(hose, option); >>>> else >>>>@@ -1108,6 +1232,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) >>>> if (!edev || !edev->pe) >>>> return false; >>>> >>>>+ /* >>>>+ * We will issue FLR or AF FLR to all VFs, which are contained >>>>+ * in VF PE. It relies on the EEH PCI config accessors. So we >>>>+ * can't block them during the window. >>>>+ */ >>>>+ if ((edev->physfn) && (edev->pe->state & EEH_PE_RESET)) >>> >>> >>>Extra braces around edev->physfn. >>> >> >>Will remove it. >> >>> >>> >>>>+ return false; >>>>+ >>>> if (edev->pe->state & EEH_PE_CFG_BLOCKED) >>>> return true; >>>> >>>> > > >-- >Alexey
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index ec21f8f..331c856 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -136,6 +136,7 @@ struct eeh_dev { int pcix_cap; /* Saved PCIx capability */ int pcie_cap; /* Saved PCIe capability */ int aer_cap; /* Saved AER capability */ + int af_cap; /* Saved AF capability */ struct eeh_pe *pe; /* Associated PE */ struct list_head list; /* Form link list in the PE */ struct pci_controller *phb; /* Associated PHB */ diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index cfd55dd..017cd72 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -404,6 +404,7 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); + edev->af_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF); if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; if (edev->pcie_cap) { @@ -893,6 +894,127 @@ static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option) return 0; } +static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, int pos, + u16 mask, bool af_flr_rst) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + int status, i; + + /* Wait for Transaction Pending bit to be cleared */ + for (i = 0; i < 4; i++) { + eeh_ops->read_config(pdn, pos, 2, &status); + if (!(status & mask)) + return; + + msleep((1 << i) * 100); + } + + pr_warn("%s: Pending transaction while issuing %s FLR to " + "%04x:%02x:%02x.%01x\n", + __func__, af_flr_rst ? "AF" : "", + edev->phb->global_number, pdn->busno, + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); +} + +static int pnv_eeh_do_flr(struct pci_dn *pdn, int option) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 reg; + + if (!edev->pcie_cap) + return -ENOTTY; + + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, ®); + if (!(reg & PCI_EXP_DEVCAP_FLR)) + return -ENOTTY; + + switch (option) { + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + pnv_eeh_wait_for_pending(pdn, edev->pcie_cap + PCI_EXP_DEVSTA, + PCI_EXP_DEVSTA_TRPND, false); + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, ®); + reg |= PCI_EXP_DEVCTL_BCR_FLR; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, reg); + msleep(EEH_PE_RST_HOLD_TIME); + break; + case EEH_RESET_DEACTIVATE: + eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, ®); + reg &= ~PCI_EXP_DEVCTL_BCR_FLR; + eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, + 4, reg); + msleep(EEH_PE_RST_SETTLE_TIME); + break; + } + + return 0; +} + +static int pnv_eeh_do_af_flr(struct pci_dn *pdn, int option) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 cap; + + if (!edev->af_cap) + return -ENOTTY; + + eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap); + if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR)) + return -ENOTTY; + + switch (option) { + case EEH_RESET_HOT: + case EEH_RESET_FUNDAMENTAL: + /* + * Wait for Transaction Pending bit to clear. A word-aligned + * test is used, so we use the conrol offset rather than status + * and shift the test bit to match. + */ + pnv_eeh_wait_for_pending(pdn, edev->af_cap + PCI_AF_CTRL, + PCI_AF_STATUS_TP << 8, true); + eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, + 1, PCI_AF_CTRL_FLR); + msleep(EEH_PE_RST_HOLD_TIME); + break; + case EEH_RESET_DEACTIVATE: + eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0); + msleep(EEH_PE_RST_SETTLE_TIME); + break; + } + + return 0; +} + +static int pnv_eeh_reset_vf(struct pci_dn *pdn, int option) +{ + int ret; + + ret = pnv_eeh_do_flr(pdn, option); + if (ret != -ENOTTY) + return ret; + + return pnv_eeh_do_af_flr(pdn, option); +} + +static int pnv_eeh_vf_pe_reset(struct eeh_pe *pe, int option) +{ + struct eeh_dev *edev, *tmp; + struct pci_dn *pdn; + int ret; + + eeh_pe_for_each_dev(pe, edev, tmp) { + pdn = eeh_dev_to_pdn(edev); + ret = pnv_eeh_reset_vf(pdn, option); + if (ret) + return ret; + } + + return 0; +} + void pnv_pci_reset_secondary_bus(struct pci_dev *dev) { struct pci_controller *hose; @@ -968,7 +1090,9 @@ static int pnv_eeh_reset(struct eeh_pe *pe, int option) } bus = eeh_pe_bus_get(pe); - if (pci_is_root_bus(bus) || + if (pe->type & EEH_PE_VF) + ret = pnv_eeh_vf_pe_reset(pe, option); + else if (pci_is_root_bus(bus) || pci_is_root_bus(bus->parent)) ret = pnv_eeh_root_reset(hose, option); else @@ -1108,6 +1232,14 @@ static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) if (!edev || !edev->pe) return false; + /* + * We will issue FLR or AF FLR to all VFs, which are contained + * in VF PE. It relies on the EEH PCI config accessors. So we + * can't block them during the window. + */ + if ((edev->physfn) && (edev->pe->state & EEH_PE_RESET)) + return false; + if (edev->pe->state & EEH_PE_CFG_BLOCKED) return true;