Message ID | 1416624759-13543-1-git-send-email-weiyang@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Delegated to: | Bjorn Helgaas |
Headers | show |
On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote: >According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will >affect the offset and stride. Current implementation doesn't refresh the >offset/stride cached in pci_sriov structure. > >This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two >value after NumVFs is written. > >Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> >--- > drivers/pci/iov.c | 17 +++++++++++++---- > 1 file changed, 13 insertions(+), 4 deletions(-) > >diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c >index 4d109c0..c7010c5 100644 >--- a/drivers/pci/iov.c >+++ b/drivers/pci/iov.c >@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id) > dev->sriov->stride * id) & 0xff; > } > >+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn) >+{ >+ struct pci_sriov *iov = dev->sriov; >+ >+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take effect. >+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset); >+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride); >+} >+ > static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) > { > struct pci_bus *child; >@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) > return rc; > } > >- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >+ pci_iov_set_numvfs(dev, nr_virtfn); > iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; > pci_cfg_access_lock(dev); > pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >@@ -272,7 +281,7 @@ failed: > iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); > pci_cfg_access_lock(dev); > pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); >+ pci_iov_set_numvfs(dev, 0); > ssleep(1); The 1 second delay here might be for waiting VFs to be ready. > pci_cfg_access_unlock(dev); > >@@ -303,7 +312,7 @@ static void sriov_disable(struct pci_dev *dev) > sysfs_remove_link(&dev->dev.kobj, "dep_link"); > > iov->num_VFs = 0; >- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); >+ pci_iov_set_numvfs(dev, 0); > } > > static int sriov_init(struct pci_dev *dev, int pos) >@@ -439,7 +448,7 @@ static void sriov_restore_state(struct pci_dev *dev) > pci_update_resource(dev, i); > > pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); >- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->num_VFs); >+ pci_iov_set_numvfs(dev, iov->num_VFs); > pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); > if (iov->ctrl & PCI_SRIOV_CTRL_VFE) > msleep(100); Here's another delay if PCI_SRIOV_CTRL_VFE isn't set previously. Thanks, Gavin >-- >1.7.9.5 > >-- >To unsubscribe from this list: send the line "unsubscribe linux-pci" in >the body of a message to majordomo@vger.kernel.org >More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote: >On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote: >>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will >>affect the offset and stride. Current implementation doesn't refresh the >>offset/stride cached in pci_sriov structure. >> >>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two >>value after NumVFs is written. >> >>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> >>--- >> drivers/pci/iov.c | 17 +++++++++++++---- >> 1 file changed, 13 insertions(+), 4 deletions(-) >> >>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c >>index 4d109c0..c7010c5 100644 >>--- a/drivers/pci/iov.c >>+++ b/drivers/pci/iov.c >>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id) >> dev->sriov->stride * id) & 0xff; >> } >> >>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn) >>+{ >>+ struct pci_sriov *iov = dev->sriov; >>+ >>+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); > >I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take >effect. > >>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset); >>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride); >>+} >>+ >> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) >> { >> struct pci_bus *child; >>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) >> return rc; >> } >> >>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >>+ pci_iov_set_numvfs(dev, nr_virtfn); >> iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; >> pci_cfg_access_lock(dev); >> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>@@ -272,7 +281,7 @@ failed: >> iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); >> pci_cfg_access_lock(dev); >> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); >>+ pci_iov_set_numvfs(dev, 0); >> ssleep(1); > >The 1 second delay here might be for waiting VFs to be ready. > Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better? >> pci_cfg_access_unlock(dev); >> >>@@ -303,7 +312,7 @@ static void sriov_disable(struct pci_dev *dev) >> sysfs_remove_link(&dev->dev.kobj, "dep_link"); >> >> iov->num_VFs = 0; >>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); >>+ pci_iov_set_numvfs(dev, 0); >> } >> >> static int sriov_init(struct pci_dev *dev, int pos) >>@@ -439,7 +448,7 @@ static void sriov_restore_state(struct pci_dev *dev) >> pci_update_resource(dev, i); >> >> pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); >>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->num_VFs); >>+ pci_iov_set_numvfs(dev, iov->num_VFs); >> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >> if (iov->ctrl & PCI_SRIOV_CTRL_VFE) >> msleep(100); > >Here's another delay if PCI_SRIOV_CTRL_VFE isn't set previously. The logic here will not be changed. > >Thanks, >Gavin > >>-- >>1.7.9.5 >> >>-- >>To unsubscribe from this list: send the line "unsubscribe linux-pci" in >>the body of a message to majordomo@vger.kernel.org >>More majordomo info at http://vger.kernel.org/majordomo-info.html >>
On Tue, Nov 25, 2014 at 11:14:55AM +0800, Wei Yang wrote: >On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote: >>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote: >>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will >>>affect the offset and stride. Current implementation doesn't refresh the >>>offset/stride cached in pci_sriov structure. >>> >>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two >>>value after NumVFs is written. >>> >>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> >>>--- >>> drivers/pci/iov.c | 17 +++++++++++++---- >>> 1 file changed, 13 insertions(+), 4 deletions(-) >>> >>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c >>>index 4d109c0..c7010c5 100644 >>>--- a/drivers/pci/iov.c >>>+++ b/drivers/pci/iov.c >>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id) >>> dev->sriov->stride * id) & 0xff; >>> } >>> >>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn) >>>+{ >>>+ struct pci_sriov *iov = dev->sriov; >>>+ >>>+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >> >>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take >>effect. >> >>>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset); >>>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride); >>>+} >>>+ >>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) >>> { >>> struct pci_bus *child; >>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) >>> return rc; >>> } >>> >>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >>>+ pci_iov_set_numvfs(dev, nr_virtfn); >>> iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; >>> pci_cfg_access_lock(dev); >>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>>@@ -272,7 +281,7 @@ failed: >>> iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); >>> pci_cfg_access_lock(dev); >>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); >>>+ pci_iov_set_numvfs(dev, 0); >>> ssleep(1); >> >>The 1 second delay here might be for waiting VFs to be ready. >> > >Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better? > I was not suggesting to do that. I just raised the concern for you to look into. >>> pci_cfg_access_unlock(dev); >>> >>>@@ -303,7 +312,7 @@ static void sriov_disable(struct pci_dev *dev) >>> sysfs_remove_link(&dev->dev.kobj, "dep_link"); >>> >>> iov->num_VFs = 0; >>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); >>>+ pci_iov_set_numvfs(dev, 0); >>> } >>> >>> static int sriov_init(struct pci_dev *dev, int pos) >>>@@ -439,7 +448,7 @@ static void sriov_restore_state(struct pci_dev *dev) >>> pci_update_resource(dev, i); >>> >>> pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); >>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->num_VFs); >>>+ pci_iov_set_numvfs(dev, iov->num_VFs); >>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>> if (iov->ctrl & PCI_SRIOV_CTRL_VFE) >>> msleep(100); >> >>Here's another delay if PCI_SRIOV_CTRL_VFE isn't set previously. > >The logic here will not be changed. > >> >>Thanks, >>Gavin >> >>>-- >>>1.7.9.5 >>> >>>-- >>>To unsubscribe from this list: send the line "unsubscribe linux-pci" in >>>the body of a message to majordomo@vger.kernel.org >>>More majordomo info at http://vger.kernel.org/majordomo-info.html >>> > >-- >Richard Yang >Help you, Help me -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Nov 25, 2014 at 02:46:52PM +1100, Gavin Shan wrote: >On Tue, Nov 25, 2014 at 11:14:55AM +0800, Wei Yang wrote: >>On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote: >>>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote: >>>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will >>>>affect the offset and stride. Current implementation doesn't refresh the >>>>offset/stride cached in pci_sriov structure. >>>> >>>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two >>>>value after NumVFs is written. >>>> >>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> >>>>--- >>>> drivers/pci/iov.c | 17 +++++++++++++---- >>>> 1 file changed, 13 insertions(+), 4 deletions(-) >>>> >>>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c >>>>index 4d109c0..c7010c5 100644 >>>>--- a/drivers/pci/iov.c >>>>+++ b/drivers/pci/iov.c >>>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id) >>>> dev->sriov->stride * id) & 0xff; >>>> } >>>> >>>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn) >>>>+{ >>>>+ struct pci_sriov *iov = dev->sriov; >>>>+ >>>>+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >>> >>>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take >>>effect. >>> >>>>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset); >>>>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride); >>>>+} >>>>+ >>>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) >>>> { >>>> struct pci_bus *child; >>>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) >>>> return rc; >>>> } >>>> >>>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >>>>+ pci_iov_set_numvfs(dev, nr_virtfn); >>>> iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; >>>> pci_cfg_access_lock(dev); >>>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>>>@@ -272,7 +281,7 @@ failed: >>>> iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); >>>> pci_cfg_access_lock(dev); >>>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); >>>>+ pci_iov_set_numvfs(dev, 0); >>>> ssleep(1); >>> >>>The 1 second delay here might be for waiting VFs to be ready. >>> >> >>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better? >> > >I was not suggesting to do that. I just raised the concern for you >to look into. > I looked in the SPEC sec 3.3.3.1 VF Enable. In this section, it says this: To allow components to perform internal initialization, system software must wait for at least 100 ms after changing the VF Enable bit from a 0 to a 1, before it is permitted to issue Requests to the VFs which are enabled by that VF Enable bit. The Root Complex and/or system software must allow at least 1.0 s after Setting the VF Enable bit, before it may determine that a VF which fails to return a Successful Completion status for a valid Configuration Request is broken. After Setting the VF Enable bit, the VFs enabled by that VF Enable bit are permitted to return a CRS status to configuration requests up to the 1.0 s limit, if they are not ready to provide a Successful Completion status for a valid Configuration Request. Additionally, a VF is not permitted to return CRS after having previously returned a Successful Completion without an intervening VF disable or other valid reset condition. As my understanding, it will take 1ms or 1s after VF Enable bit is set. Actually I am confused with the two different time, in which case we needs to wait for different time? And some place we add a lock, but no lock in others. While from the SPEC, I don't see some description the NumVFs field will take some time to be effective.
On Tue, Nov 25, 2014 at 05:11:14PM +0800, Wei Yang wrote: >On Tue, Nov 25, 2014 at 02:46:52PM +1100, Gavin Shan wrote: >>On Tue, Nov 25, 2014 at 11:14:55AM +0800, Wei Yang wrote: >>>On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote: >>>>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote: >>>>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will >>>>>affect the offset and stride. Current implementation doesn't refresh the >>>>>offset/stride cached in pci_sriov structure. >>>>> >>>>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two >>>>>value after NumVFs is written. >>>>> >>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> >>>>>--- >>>>> drivers/pci/iov.c | 17 +++++++++++++---- >>>>> 1 file changed, 13 insertions(+), 4 deletions(-) >>>>> >>>>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c >>>>>index 4d109c0..c7010c5 100644 >>>>>--- a/drivers/pci/iov.c >>>>>+++ b/drivers/pci/iov.c >>>>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id) >>>>> dev->sriov->stride * id) & 0xff; >>>>> } >>>>> >>>>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn) >>>>>+{ >>>>>+ struct pci_sriov *iov = dev->sriov; >>>>>+ >>>>>+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >>>> >>>>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take >>>>effect. >>>> >>>>>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset); >>>>>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride); >>>>>+} >>>>>+ >>>>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) >>>>> { >>>>> struct pci_bus *child; >>>>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) >>>>> return rc; >>>>> } >>>>> >>>>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >>>>>+ pci_iov_set_numvfs(dev, nr_virtfn); >>>>> iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; >>>>> pci_cfg_access_lock(dev); >>>>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>>>>@@ -272,7 +281,7 @@ failed: >>>>> iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); >>>>> pci_cfg_access_lock(dev); >>>>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>>>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); >>>>>+ pci_iov_set_numvfs(dev, 0); >>>>> ssleep(1); >>>> >>>>The 1 second delay here might be for waiting VFs to be ready. >>>> >>> >>>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better? >>> >> >>I was not suggesting to do that. I just raised the concern for you >>to look into. >> > >I looked in the SPEC sec 3.3.3.1 VF Enable. In this section, it says this: > >To allow components to perform internal initialization, system software must wait for at least >100 ms after changing the VF Enable bit from a 0 to a 1, before it is permitted to issue Requests to >the VFs which are enabled by that VF Enable bit. The Root Complex and/or system software must >allow at least 1.0 s after Setting the VF Enable bit, before it may determine that a VF which fails to >return a Successful Completion status for a valid Configuration Request is broken. After Setting the >VF Enable bit, the VFs enabled by that VF Enable bit are permitted to return a CRS status to >configuration requests up to the 1.0 s limit, if they are not ready to provide a Successful Completion >status for a valid Configuration Request. Additionally, a VF is not permitted to return CRS after >having previously returned a Successful Completion without an intervening VF disable or other valid >reset condition. > >As my understanding, it will take 1ms or 1s after VF Enable bit is set. >Actually I am confused with the two different time, in which case we needs to >wait for different time? And some place we add a lock, but no lock in others. > Are you talking about 1ms or 100ms? I assume it's 100ms. If I understand things correctly, 100ms delay before issuing config read request, CRS can be returned before it reaches the timeout (1 second). >While from the SPEC, I don't see some description the NumVFs field will take >some time to be effective. > If I'm correct, we can't change NumVFs without disabling/reanbling VFs. Thanks, Gavin -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Nov 26, 2014 at 10:03:59AM +1100, Gavin Shan wrote: >On Tue, Nov 25, 2014 at 05:11:14PM +0800, Wei Yang wrote: >>On Tue, Nov 25, 2014 at 02:46:52PM +1100, Gavin Shan wrote: >>>On Tue, Nov 25, 2014 at 11:14:55AM +0800, Wei Yang wrote: >>>>On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote: >>>>>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote: >>>>>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will >>>>>>affect the offset and stride. Current implementation doesn't refresh the >>>>>>offset/stride cached in pci_sriov structure. >>>>>> >>>>>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two >>>>>>value after NumVFs is written. >>>>>> >>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> >>>>>>--- >>>>>> drivers/pci/iov.c | 17 +++++++++++++---- >>>>>> 1 file changed, 13 insertions(+), 4 deletions(-) >>>>>> >>>>>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c >>>>>>index 4d109c0..c7010c5 100644 >>>>>>--- a/drivers/pci/iov.c >>>>>>+++ b/drivers/pci/iov.c >>>>>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id) >>>>>> dev->sriov->stride * id) & 0xff; >>>>>> } >>>>>> >>>>>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn) >>>>>>+{ >>>>>>+ struct pci_sriov *iov = dev->sriov; >>>>>>+ >>>>>>+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >>>>> >>>>>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take >>>>>effect. >>>>> >>>>>>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset); >>>>>>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride); >>>>>>+} >>>>>>+ >>>>>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) >>>>>> { >>>>>> struct pci_bus *child; >>>>>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) >>>>>> return rc; >>>>>> } >>>>>> >>>>>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >>>>>>+ pci_iov_set_numvfs(dev, nr_virtfn); >>>>>> iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; >>>>>> pci_cfg_access_lock(dev); >>>>>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>>>>>@@ -272,7 +281,7 @@ failed: >>>>>> iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); >>>>>> pci_cfg_access_lock(dev); >>>>>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>>>>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); >>>>>>+ pci_iov_set_numvfs(dev, 0); >>>>>> ssleep(1); >>>>> >>>>>The 1 second delay here might be for waiting VFs to be ready. >>>>> >>>> >>>>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better? >>>> >>> >>>I was not suggesting to do that. I just raised the concern for you >>>to look into. >>> >> >>I looked in the SPEC sec 3.3.3.1 VF Enable. In this section, it says this: >> >>To allow components to perform internal initialization, system software must wait for at least >>100 ms after changing the VF Enable bit from a 0 to a 1, before it is permitted to issue Requests to >>the VFs which are enabled by that VF Enable bit. The Root Complex and/or system software must >>allow at least 1.0 s after Setting the VF Enable bit, before it may determine that a VF which fails to >>return a Successful Completion status for a valid Configuration Request is broken. After Setting the >>VF Enable bit, the VFs enabled by that VF Enable bit are permitted to return a CRS status to >>configuration requests up to the 1.0 s limit, if they are not ready to provide a Successful Completion >>status for a valid Configuration Request. Additionally, a VF is not permitted to return CRS after >>having previously returned a Successful Completion without an intervening VF disable or other valid >>reset condition. >> >>As my understanding, it will take 1ms or 1s after VF Enable bit is set. >>Actually I am confused with the two different time, in which case we needs to >>wait for different time? And some place we add a lock, but no lock in others. >> > >Are you talking about 1ms or 100ms? I assume it's 100ms. If I >understand things correctly, 100ms delay before issuing config >read request, CRS can be returned before it reaches the timeout >(1 second). Thanks for your explanation. I took a look at the CRS definition in SPEC and get more understanding on this concept. If my understanding is correct, we need to wait 1s in all case. 100ms is the minimum requirement before any Request to the VFs after VF Enable is set to 1. But the VF may not ready until 1s passed. For example in sriov_enable(), after msleep(100) it will call virtfn_add() in which will send Request to the VF. It is not for sure at this moment, 1s has passed. If we want to use the CRS mechanism, we need to call pci_bus_read_dev_vendor_id() on the VF to determine VF is ready. But this would be complecated on powernv platform now. > >>While from the SPEC, I don't see some description the NumVFs field will take >>some time to be effective. >> > >If I'm correct, we can't change NumVFs without disabling/reanbling VFs. > I found this in SRIOV SPEC sec 3.3.7 NumVFs may only be written while VF Enable is Clear. If NumVFs is written when VF Enable is Set, the results are undefined. Will take a close look in the current implementation to see whether they comply with the SPEC. >Thanks, >Gavin
On Wed, Nov 26, 2014 at 10:03:59AM +1100, Gavin Shan wrote: >On Tue, Nov 25, 2014 at 05:11:14PM +0800, Wei Yang wrote: >>On Tue, Nov 25, 2014 at 02:46:52PM +1100, Gavin Shan wrote: >>>On Tue, Nov 25, 2014 at 11:14:55AM +0800, Wei Yang wrote: >>>>On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote: >>>>>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote: >>>>>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will >>>>>>affect the offset and stride. Current implementation doesn't refresh the >>>>>>offset/stride cached in pci_sriov structure. >>>>>> >>>>>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two >>>>>>value after NumVFs is written. >>>>>> >>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> >>>>>>--- >>>>>> drivers/pci/iov.c | 17 +++++++++++++---- >>>>>> 1 file changed, 13 insertions(+), 4 deletions(-) >>>>>> >>>>>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c >>>>>>index 4d109c0..c7010c5 100644 >>>>>>--- a/drivers/pci/iov.c >>>>>>+++ b/drivers/pci/iov.c >>>>>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id) >>>>>> dev->sriov->stride * id) & 0xff; >>>>>> } >>>>>> >>>>>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn) >>>>>>+{ >>>>>>+ struct pci_sriov *iov = dev->sriov; >>>>>>+ >>>>>>+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >>>>> >>>>>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take >>>>>effect. >>>>> >>>>>>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset); >>>>>>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride); >>>>>>+} >>>>>>+ >>>>>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) >>>>>> { >>>>>> struct pci_bus *child; >>>>>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) >>>>>> return rc; >>>>>> } >>>>>> >>>>>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >>>>>>+ pci_iov_set_numvfs(dev, nr_virtfn); >>>>>> iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; >>>>>> pci_cfg_access_lock(dev); >>>>>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>>>>>@@ -272,7 +281,7 @@ failed: >>>>>> iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); >>>>>> pci_cfg_access_lock(dev); >>>>>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>>>>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); >>>>>>+ pci_iov_set_numvfs(dev, 0); >>>>>> ssleep(1); >>>>> >>>>>The 1 second delay here might be for waiting VFs to be ready. >>>>> >>>> >>>>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better? >>>> >>> >>>I was not suggesting to do that. I just raised the concern for you >>>to look into. >>> >> >>I looked in the SPEC sec 3.3.3.1 VF Enable. In this section, it says this: >> >>To allow components to perform internal initialization, system software must wait for at least >>100 ms after changing the VF Enable bit from a 0 to a 1, before it is permitted to issue Requests to >>the VFs which are enabled by that VF Enable bit. The Root Complex and/or system software must >>allow at least 1.0 s after Setting the VF Enable bit, before it may determine that a VF which fails to >>return a Successful Completion status for a valid Configuration Request is broken. After Setting the >>VF Enable bit, the VFs enabled by that VF Enable bit are permitted to return a CRS status to >>configuration requests up to the 1.0 s limit, if they are not ready to provide a Successful Completion >>status for a valid Configuration Request. Additionally, a VF is not permitted to return CRS after >>having previously returned a Successful Completion without an intervening VF disable or other valid >>reset condition. >> >>As my understanding, it will take 1ms or 1s after VF Enable bit is set. >>Actually I am confused with the two different time, in which case we needs to >>wait for different time? And some place we add a lock, but no lock in others. >> > >Are you talking about 1ms or 100ms? I assume it's 100ms. If I >understand things correctly, 100ms delay before issuing config >read request, CRS can be returned before it reaches the timeout >(1 second). > >>While from the SPEC, I don't see some description the NumVFs field will take >>some time to be effective. >> > >If I'm correct, we can't change NumVFs without disabling/reanbling VFs. > In current implementation, NumVFs is written when VF Enable is not set in all cases. While I guess what you mean is whether offset/stride will change immediately after NumVFs is changed when VF Enable is not set. From the SRIOV SPEC, I don't see some statement that the offset/stride will be effective after VF Enable is set. And in current implementation, offset/stride is retrieved when VF Enable is not set, both in sriov_init() and sriov_enable(). So my conclusion is the offset/stride will take effect immediately after NumVFs is written. >Thanks, >Gavin
On Wed, Nov 26, 2014 at 10:03:59AM +1100, Gavin Shan wrote: >On Tue, Nov 25, 2014 at 05:11:14PM +0800, Wei Yang wrote: >>On Tue, Nov 25, 2014 at 02:46:52PM +1100, Gavin Shan wrote: >>>On Tue, Nov 25, 2014 at 11:14:55AM +0800, Wei Yang wrote: >>>>On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote: >>>>>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote: >>>>>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will >>>>>>affect the offset and stride. Current implementation doesn't refresh the >>>>>>offset/stride cached in pci_sriov structure. >>>>>> >>>>>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two >>>>>>value after NumVFs is written. >>>>>> >>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> >>>>>>--- >>>>>> drivers/pci/iov.c | 17 +++++++++++++---- >>>>>> 1 file changed, 13 insertions(+), 4 deletions(-) >>>>>> >>>>>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c >>>>>>index 4d109c0..c7010c5 100644 >>>>>>--- a/drivers/pci/iov.c >>>>>>+++ b/drivers/pci/iov.c >>>>>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id) >>>>>> dev->sriov->stride * id) & 0xff; >>>>>> } >>>>>> >>>>>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn) >>>>>>+{ >>>>>>+ struct pci_sriov *iov = dev->sriov; >>>>>>+ >>>>>>+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >>>>> >>>>>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take >>>>>effect. >>>>> >>>>>>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset); >>>>>>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride); >>>>>>+} >>>>>>+ >>>>>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) >>>>>> { >>>>>> struct pci_bus *child; >>>>>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) >>>>>> return rc; >>>>>> } >>>>>> >>>>>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >>>>>>+ pci_iov_set_numvfs(dev, nr_virtfn); >>>>>> iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; >>>>>> pci_cfg_access_lock(dev); >>>>>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>>>>>@@ -272,7 +281,7 @@ failed: >>>>>> iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); >>>>>> pci_cfg_access_lock(dev); >>>>>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>>>>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); >>>>>>+ pci_iov_set_numvfs(dev, 0); >>>>>> ssleep(1); >>>>> >>>>>The 1 second delay here might be for waiting VFs to be ready. >>>>> >>>> >>>>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better? >>>> >>> >>>I was not suggesting to do that. I just raised the concern for you >>>to look into. >>> >> >>I looked in the SPEC sec 3.3.3.1 VF Enable. In this section, it says this: >> >>To allow components to perform internal initialization, system software must wait for at least >>100 ms after changing the VF Enable bit from a 0 to a 1, before it is permitted to issue Requests to >>the VFs which are enabled by that VF Enable bit. The Root Complex and/or system software must >>allow at least 1.0 s after Setting the VF Enable bit, before it may determine that a VF which fails to >>return a Successful Completion status for a valid Configuration Request is broken. After Setting the >>VF Enable bit, the VFs enabled by that VF Enable bit are permitted to return a CRS status to >>configuration requests up to the 1.0 s limit, if they are not ready to provide a Successful Completion >>status for a valid Configuration Request. Additionally, a VF is not permitted to return CRS after >>having previously returned a Successful Completion without an intervening VF disable or other valid >>reset condition. >> >>As my understanding, it will take 1ms or 1s after VF Enable bit is set. >>Actually I am confused with the two different time, in which case we needs to >>wait for different time? And some place we add a lock, but no lock in others. >> > >Are you talking about 1ms or 100ms? I assume it's 100ms. If I >understand things correctly, 100ms delay before issuing config >read request, CRS can be returned before it reaches the timeout >(1 second). Took a look at the sleep again. There are five sleep in iov.c, two are 100ms sleep and three are 1s sleep. 100ms sleep are both after VF Enable is set. 1s sleep are all after VF Enable is cleared. These are stated in SRIOV SPEC 3.3.3.1. While I still suggest to wait 1s after VF Enable is set. Otherwise we need to add some logic like pci_bus_read_dev_vendor_id() on VF to check whether VF is ready, when CRS Software Visibility is enabled on the Bridge. Like in sriov_enable(), the msleep() needs to be replaced by a call on a VF, while the VF is not created in the system yet. If my understanding is not correct, please let me know. > >Thanks, >Gavin
On Wed, Nov 26, 2014 at 04:40:51PM +0800, Wei Yang wrote: >On Wed, Nov 26, 2014 at 10:03:59AM +1100, Gavin Shan wrote: >>On Tue, Nov 25, 2014 at 05:11:14PM +0800, Wei Yang wrote: >>>On Tue, Nov 25, 2014 at 02:46:52PM +1100, Gavin Shan wrote: >>>>On Tue, Nov 25, 2014 at 11:14:55AM +0800, Wei Yang wrote: >>>>>On Tue, Nov 25, 2014 at 10:01:08AM +1100, Gavin Shan wrote: >>>>>>On Sat, Nov 22, 2014 at 10:52:39AM +0800, Wei Yang wrote: >>>>>>>According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will >>>>>>>affect the offset and stride. Current implementation doesn't refresh the >>>>>>>offset/stride cached in pci_sriov structure. >>>>>>> >>>>>>>This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two >>>>>>>value after NumVFs is written. >>>>>>> >>>>>>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> >>>>>>>--- >>>>>>> drivers/pci/iov.c | 17 +++++++++++++---- >>>>>>> 1 file changed, 13 insertions(+), 4 deletions(-) >>>>>>> >>>>>>>diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c >>>>>>>index 4d109c0..c7010c5 100644 >>>>>>>--- a/drivers/pci/iov.c >>>>>>>+++ b/drivers/pci/iov.c >>>>>>>@@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id) >>>>>>> dev->sriov->stride * id) & 0xff; >>>>>>> } >>>>>>> >>>>>>>+static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn) >>>>>>>+{ >>>>>>>+ struct pci_sriov *iov = dev->sriov; >>>>>>>+ >>>>>>>+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >>>>>> >>>>>>I'm suspecting writing to PCI_SRIOV_NUM_VF would take some time to take >>>>>>effect. >>>>>> >>>>>>>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset); >>>>>>>+ pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride); >>>>>>>+} >>>>>>>+ >>>>>>> static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) >>>>>>> { >>>>>>> struct pci_bus *child; >>>>>>>@@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) >>>>>>> return rc; >>>>>>> } >>>>>>> >>>>>>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); >>>>>>>+ pci_iov_set_numvfs(dev, nr_virtfn); >>>>>>> iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; >>>>>>> pci_cfg_access_lock(dev); >>>>>>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>>>>>>@@ -272,7 +281,7 @@ failed: >>>>>>> iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); >>>>>>> pci_cfg_access_lock(dev); >>>>>>> pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); >>>>>>>- pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); >>>>>>>+ pci_iov_set_numvfs(dev, 0); >>>>>>> ssleep(1); >>>>>> >>>>>>The 1 second delay here might be for waiting VFs to be ready. >>>>>> >>>>> >>>>>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better? >>>>> >>>> >>>>I was not suggesting to do that. I just raised the concern for you >>>>to look into. >>>> >>> >>>I looked in the SPEC sec 3.3.3.1 VF Enable. In this section, it says this: >>> >>>To allow components to perform internal initialization, system software must wait for at least >>>100 ms after changing the VF Enable bit from a 0 to a 1, before it is permitted to issue Requests to >>>the VFs which are enabled by that VF Enable bit. The Root Complex and/or system software must >>>allow at least 1.0 s after Setting the VF Enable bit, before it may determine that a VF which fails to >>>return a Successful Completion status for a valid Configuration Request is broken. After Setting the >>>VF Enable bit, the VFs enabled by that VF Enable bit are permitted to return a CRS status to >>>configuration requests up to the 1.0 s limit, if they are not ready to provide a Successful Completion >>>status for a valid Configuration Request. Additionally, a VF is not permitted to return CRS after >>>having previously returned a Successful Completion without an intervening VF disable or other valid >>>reset condition. >>> >>>As my understanding, it will take 1ms or 1s after VF Enable bit is set. >>>Actually I am confused with the two different time, in which case we needs to >>>wait for different time? And some place we add a lock, but no lock in others. >>> >> >>Are you talking about 1ms or 100ms? I assume it's 100ms. If I >>understand things correctly, 100ms delay before issuing config >>read request, CRS can be returned before it reaches the timeout >>(1 second). > >Took a look at the sleep again. There are five sleep in iov.c, two are 100ms >sleep and three are 1s sleep. 100ms sleep are both after VF Enable is set. 1s >sleep are all after VF Enable is cleared. These are stated in SRIOV SPEC >3.3.3.1. > >While I still suggest to wait 1s after VF Enable is set. Otherwise we need to >add some logic like pci_bus_read_dev_vendor_id() on VF to check whether VF is >ready, when CRS Software Visibility is enabled on the Bridge. Like in >sriov_enable(), the msleep() needs to be replaced by a call on a VF, while >the VF is not created in the system yet. > >If my understanding is not correct, please let me know. > Hi, Gavin, Do you agree with my opinion in this mail? or it is still not correct? >> >>Thanks, >>Gavin > >-- >Richard Yang >Help you, Help me
On Wed, Nov 26, 2014 at 04:40:51PM +0800, Wei Yang wrote: >On Wed, Nov 26, 2014 at 10:03:59AM +1100, Gavin Shan wrote: >>>>>> >>>>>>The 1 second delay here might be for waiting VFs to be ready. >>>>>> >>>>> >>>>>Hmm... so add this ssleep() in pci_iov_set_numvfs() would be better? >>>>> >>>> >>>>I was not suggesting to do that. I just raised the concern for you >>>>to look into. >>>> >>> >>>I looked in the SPEC sec 3.3.3.1 VF Enable. In this section, it says this: >>> >>>To allow components to perform internal initialization, system software must wait for at least >>>100 ms after changing the VF Enable bit from a 0 to a 1, before it is permitted to issue Requests to >>>the VFs which are enabled by that VF Enable bit. The Root Complex and/or system software must >>>allow at least 1.0 s after Setting the VF Enable bit, before it may determine that a VF which fails to >>>return a Successful Completion status for a valid Configuration Request is broken. After Setting the >>>VF Enable bit, the VFs enabled by that VF Enable bit are permitted to return a CRS status to >>>configuration requests up to the 1.0 s limit, if they are not ready to provide a Successful Completion >>>status for a valid Configuration Request. Additionally, a VF is not permitted to return CRS after >>>having previously returned a Successful Completion without an intervening VF disable or other valid >>>reset condition. >>> >>>As my understanding, it will take 1ms or 1s after VF Enable bit is set. >>>Actually I am confused with the two different time, in which case we needs to >>>wait for different time? And some place we add a lock, but no lock in others. >>> >> >>Are you talking about 1ms or 100ms? I assume it's 100ms. If I >>understand things correctly, 100ms delay before issuing config >>read request, CRS can be returned before it reaches the timeout >>(1 second). > >Took a look at the sleep again. There are five sleep in iov.c, two are 100ms >sleep and three are 1s sleep. 100ms sleep are both after VF Enable is set. 1s >sleep are all after VF Enable is cleared. These are stated in SRIOV SPEC >3.3.3.1. > >While I still suggest to wait 1s after VF Enable is set. Otherwise we need to >add some logic like pci_bus_read_dev_vendor_id() on VF to check whether VF is >ready, when CRS Software Visibility is enabled on the Bridge. Like in >sriov_enable(), the msleep() needs to be replaced by a call on a VF, while >the VF is not created in the system yet. > >If my understanding is not correct, please let me know. Bjorn, After discussion with Gavin, I think it is ok to refresh the offset/stride after NumVFs is written. 1. NumVFs are written all with VFE cleared (comply with SPEC) 2. The time to wait, like 100ms and 1s are the time to wait before accessing VF not the PF. So to me, it is not necessary to wait before retrieve the offset/stride after writing NumVFs. If my understanding is not correct, please let me know. > >> >>Thanks, >>Gavin > >-- >Richard Yang >Help you, Help me
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 4d109c0..c7010c5 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -31,6 +31,15 @@ static inline u8 virtfn_devfn(struct pci_dev *dev, int id) dev->sriov->stride * id) & 0xff; } +static inline void pci_iov_set_numvfs(struct pci_dev *dev, int nr_virtfn) +{ + struct pci_sriov *iov = dev->sriov; + + pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &iov->offset); + pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &iov->stride); +} + static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) { struct pci_bus *child; @@ -243,7 +252,7 @@ static int sriov_enable(struct pci_dev *dev, int nr_virtfn) return rc; } - pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); + pci_iov_set_numvfs(dev, nr_virtfn); iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; pci_cfg_access_lock(dev); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); @@ -272,7 +281,7 @@ failed: iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); pci_cfg_access_lock(dev); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); - pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); + pci_iov_set_numvfs(dev, 0); ssleep(1); pci_cfg_access_unlock(dev); @@ -303,7 +312,7 @@ static void sriov_disable(struct pci_dev *dev) sysfs_remove_link(&dev->dev.kobj, "dep_link"); iov->num_VFs = 0; - pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); + pci_iov_set_numvfs(dev, 0); } static int sriov_init(struct pci_dev *dev, int pos) @@ -439,7 +448,7 @@ static void sriov_restore_state(struct pci_dev *dev) pci_update_resource(dev, i); pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); - pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->num_VFs); + pci_iov_set_numvfs(dev, iov->num_VFs); pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); if (iov->ctrl & PCI_SRIOV_CTRL_VFE) msleep(100);
According to SR-IOV spec sec 3.3.9, 3.3.10, the NumVFs setting change will affect the offset and stride. Current implementation doesn't refresh the offset/stride cached in pci_sriov structure. This patch introduces a wrapper pci_iov_set_numvfs(), which refresh these two value after NumVFs is written. Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com> --- drivers/pci/iov.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-)