Message ID | 20240802-reuse-v11-8-fb83bb8c19fb@daynix.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | hw/pci: SR-IOV related fixes and improvements | expand |
On Fri, Aug 02, 2024 at 02:17:58PM +0900, Akihiko Odaki wrote: > num_vfs is not migrated so use PCI_SRIOV_CTRL_VFE and PCI_SRIOV_NUM_VF > instead. > > Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> > --- > include/hw/pci/pcie_sriov.h | 1 - > hw/pci/pcie_sriov.c | 28 ++++++++++++++++++++-------- > hw/pci/trace-events | 2 +- > 3 files changed, 21 insertions(+), 10 deletions(-) > > diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h > index 70649236c18a..5148c5b77dd1 100644 > --- a/include/hw/pci/pcie_sriov.h > +++ b/include/hw/pci/pcie_sriov.h > @@ -16,7 +16,6 @@ > #include "hw/pci/pci.h" > > typedef struct PCIESriovPF { > - uint16_t num_vfs; /* Number of virtual functions created */ > uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */ > PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */ > } PCIESriovPF; > diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c > index 9bd7f8acc3f4..fae6acea4acb 100644 > --- a/hw/pci/pcie_sriov.c > +++ b/hw/pci/pcie_sriov.c > @@ -57,7 +57,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, > pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1, > offset, PCI_EXT_CAP_SRIOV_SIZEOF); > dev->exp.sriov_cap = offset; > - dev->exp.sriov_pf.num_vfs = 0; > dev->exp.sriov_pf.vf = NULL; > > pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); > @@ -186,6 +185,12 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, > } > } > > +static void clear_ctrl_vfe(PCIDevice *dev) > +{ > + uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL; space here, after definition > + pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE); > +} > + Pls use pci_word_test_and_clear_mask > static void register_vfs(PCIDevice *dev) > { > uint16_t num_vfs; > @@ -195,6 +200,7 @@ static void register_vfs(PCIDevice *dev) > assert(sriov_cap > 0); > num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); > if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { > + clear_ctrl_vfe(dev); > return; > } > > @@ -203,20 +209,18 @@ static void register_vfs(PCIDevice *dev) > for (i = 0; i < num_vfs; i++) { > pci_set_enabled(dev->exp.sriov_pf.vf[i], true); > } > - dev->exp.sriov_pf.num_vfs = num_vfs; > } > > static void unregister_vfs(PCIDevice *dev) > { > - uint16_t num_vfs = dev->exp.sriov_pf.num_vfs; > uint16_t i; > + uint8_t *cfg = dev->config + dev->exp.sriov_cap; > > trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), > - PCI_FUNC(dev->devfn), num_vfs); > - for (i = 0; i < num_vfs; i++) { > + PCI_FUNC(dev->devfn)); > + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { Why PCI_SRIOV_TOTAL_VF not PCI_SRIOV_NUM_VF/pcie_sriov_num_vfs? > pci_set_enabled(dev->exp.sriov_pf.vf[i], false); > } > - dev->exp.sriov_pf.num_vfs = 0; > } > > void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, > @@ -242,6 +246,9 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, > } else { > unregister_vfs(dev); > } > + } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) { > + clear_ctrl_vfe(dev); > + unregister_vfs(dev); So any write into PCI_SRIOV_NUM_VF automatically clears VFE? Yes writing into PCI_SRIOV_NUM_VF should not happen when VFE is set, but spec does not say we need to clear it automatically. Why come up with random rules? just don't special case it, whatever happens, let it happen. And what does this change have to do with getting rid of num_vfs? > } > } > > @@ -304,7 +311,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev) > PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) > { > assert(!pci_is_vf(dev)); > - if (n < dev->exp.sriov_pf.num_vfs) { > + if (n < pcie_sriov_num_vfs(dev)) { > return dev->exp.sriov_pf.vf[n]; > } > return NULL; > @@ -312,5 +319,10 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) > > uint16_t pcie_sriov_num_vfs(PCIDevice *dev) > { > - return dev->exp.sriov_pf.num_vfs; > + uint16_t sriov_cap = dev->exp.sriov_cap; > + uint8_t *cfg = dev->config + sriov_cap; > + > + return sriov_cap && > + (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) ? > + pci_get_word(cfg + PCI_SRIOV_NUM_VF) : 0; > } > diff --git a/hw/pci/trace-events b/hw/pci/trace-events > index 19643aa8c6b0..e98f575a9d19 100644 > --- a/hw/pci/trace-events > +++ b/hw/pci/trace-events > @@ -14,7 +14,7 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask > > # hw/pci/pcie_sriov.c > sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs" > -sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs" > +sriov_unregister_vfs(const char *name, int slot, int function) "%s %02x:%x: Unregistering vf devs" > sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d" > > # pcie.c > > -- > 2.45.2
On 2024/08/02 21:58, Michael S. Tsirkin wrote: > On Fri, Aug 02, 2024 at 02:17:58PM +0900, Akihiko Odaki wrote: >> num_vfs is not migrated so use PCI_SRIOV_CTRL_VFE and PCI_SRIOV_NUM_VF >> instead. >> >> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> >> --- >> include/hw/pci/pcie_sriov.h | 1 - >> hw/pci/pcie_sriov.c | 28 ++++++++++++++++++++-------- >> hw/pci/trace-events | 2 +- >> 3 files changed, 21 insertions(+), 10 deletions(-) >> >> diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h >> index 70649236c18a..5148c5b77dd1 100644 >> --- a/include/hw/pci/pcie_sriov.h >> +++ b/include/hw/pci/pcie_sriov.h >> @@ -16,7 +16,6 @@ >> #include "hw/pci/pci.h" >> >> typedef struct PCIESriovPF { >> - uint16_t num_vfs; /* Number of virtual functions created */ >> uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */ >> PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */ >> } PCIESriovPF; >> diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c >> index 9bd7f8acc3f4..fae6acea4acb 100644 >> --- a/hw/pci/pcie_sriov.c >> +++ b/hw/pci/pcie_sriov.c >> @@ -57,7 +57,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, >> pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1, >> offset, PCI_EXT_CAP_SRIOV_SIZEOF); >> dev->exp.sriov_cap = offset; >> - dev->exp.sriov_pf.num_vfs = 0; >> dev->exp.sriov_pf.vf = NULL; >> >> pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); >> @@ -186,6 +185,12 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, >> } >> } >> >> +static void clear_ctrl_vfe(PCIDevice *dev) >> +{ >> + uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL; > > space here, after definition > >> + pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE); >> +} >> + > > Pls use pci_word_test_and_clear_mask That sounds good. I'll do so with the next version. > > >> static void register_vfs(PCIDevice *dev) >> { >> uint16_t num_vfs; >> @@ -195,6 +200,7 @@ static void register_vfs(PCIDevice *dev) >> assert(sriov_cap > 0); >> num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); >> if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { >> + clear_ctrl_vfe(dev); >> return; >> } >> >> @@ -203,20 +209,18 @@ static void register_vfs(PCIDevice *dev) >> for (i = 0; i < num_vfs; i++) { >> pci_set_enabled(dev->exp.sriov_pf.vf[i], true); >> } >> - dev->exp.sriov_pf.num_vfs = num_vfs; >> } >> >> static void unregister_vfs(PCIDevice *dev) >> { >> - uint16_t num_vfs = dev->exp.sriov_pf.num_vfs; >> uint16_t i; >> + uint8_t *cfg = dev->config + dev->exp.sriov_cap; >> >> trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), >> - PCI_FUNC(dev->devfn), num_vfs); >> - for (i = 0; i < num_vfs; i++) { >> + PCI_FUNC(dev->devfn)); >> + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { > > Why PCI_SRIOV_TOTAL_VF not PCI_SRIOV_NUM_VF/pcie_sriov_num_vfs? Because PCI_SRIOV_NUM_VF is overwritten when unregister_vfs() is called. > > >> pci_set_enabled(dev->exp.sriov_pf.vf[i], false); >> } >> - dev->exp.sriov_pf.num_vfs = 0; >> } >> >> void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, >> @@ -242,6 +246,9 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, >> } else { >> unregister_vfs(dev); >> } >> + } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) { >> + clear_ctrl_vfe(dev); >> + unregister_vfs(dev); > > So any write into PCI_SRIOV_NUM_VF automatically clears VFE? > > Yes writing into PCI_SRIOV_NUM_VF should not happen when VFE > is set, but spec does not say we need to clear it automatically. > Why come up with random rules? just don't special case it, > whatever happens, let it happen. > > And what does this change have to do with getting rid of > num_vfs? Keeping VFs working requires to know the number of VFs, but we do no longer know it because PCI_SRIOV_NUM_VF is overwritten. This disables all VFs instead of trying to keep VFs alive. Regards, Akihiko Odaki
On Sat, Aug 03, 2024 at 12:38:10AM +0900, Akihiko Odaki wrote: > On 2024/08/02 21:58, Michael S. Tsirkin wrote: > > On Fri, Aug 02, 2024 at 02:17:58PM +0900, Akihiko Odaki wrote: > > > num_vfs is not migrated so use PCI_SRIOV_CTRL_VFE and PCI_SRIOV_NUM_VF > > > instead. > > > > > > Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> > > > --- > > > include/hw/pci/pcie_sriov.h | 1 - > > > hw/pci/pcie_sriov.c | 28 ++++++++++++++++++++-------- > > > hw/pci/trace-events | 2 +- > > > 3 files changed, 21 insertions(+), 10 deletions(-) > > > > > > diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h > > > index 70649236c18a..5148c5b77dd1 100644 > > > --- a/include/hw/pci/pcie_sriov.h > > > +++ b/include/hw/pci/pcie_sriov.h > > > @@ -16,7 +16,6 @@ > > > #include "hw/pci/pci.h" > > > typedef struct PCIESriovPF { > > > - uint16_t num_vfs; /* Number of virtual functions created */ > > > uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */ > > > PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */ > > > } PCIESriovPF; > > > diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c > > > index 9bd7f8acc3f4..fae6acea4acb 100644 > > > --- a/hw/pci/pcie_sriov.c > > > +++ b/hw/pci/pcie_sriov.c > > > @@ -57,7 +57,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, > > > pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1, > > > offset, PCI_EXT_CAP_SRIOV_SIZEOF); > > > dev->exp.sriov_cap = offset; > > > - dev->exp.sriov_pf.num_vfs = 0; > > > dev->exp.sriov_pf.vf = NULL; > > > pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); > > > @@ -186,6 +185,12 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, > > > } > > > } > > > +static void clear_ctrl_vfe(PCIDevice *dev) > > > +{ > > > + uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL; > > > > space here, after definition > > > > > + pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE); > > > +} > > > + > > > > Pls use pci_word_test_and_clear_mask > > That sounds good. I'll do so with the next version. > > > > > > > > static void register_vfs(PCIDevice *dev) > > > { > > > uint16_t num_vfs; > > > @@ -195,6 +200,7 @@ static void register_vfs(PCIDevice *dev) > > > assert(sriov_cap > 0); > > > num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); > > > if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { > > > + clear_ctrl_vfe(dev); > > > return; > > > } > > > @@ -203,20 +209,18 @@ static void register_vfs(PCIDevice *dev) > > > for (i = 0; i < num_vfs; i++) { > > > pci_set_enabled(dev->exp.sriov_pf.vf[i], true); > > > } > > > - dev->exp.sriov_pf.num_vfs = num_vfs; > > > } > > > static void unregister_vfs(PCIDevice *dev) > > > { > > > - uint16_t num_vfs = dev->exp.sriov_pf.num_vfs; > > > uint16_t i; > > > + uint8_t *cfg = dev->config + dev->exp.sriov_cap; > > > trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), > > > - PCI_FUNC(dev->devfn), num_vfs); > > > - for (i = 0; i < num_vfs; i++) { > > > + PCI_FUNC(dev->devfn)); > > > + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { > > > > Why PCI_SRIOV_TOTAL_VF not PCI_SRIOV_NUM_VF/pcie_sriov_num_vfs? > > Because PCI_SRIOV_NUM_VF is overwritten when unregister_vfs() is called. maybe this function should get the range of VFs to unregister, then. > > > > > > > pci_set_enabled(dev->exp.sriov_pf.vf[i], false); > > > } > > > - dev->exp.sriov_pf.num_vfs = 0; > > > } > > > void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, > > > @@ -242,6 +246,9 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, > > > } else { > > > unregister_vfs(dev); > > > } > > > + } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) { > > > + clear_ctrl_vfe(dev); > > > + unregister_vfs(dev); > > > > So any write into PCI_SRIOV_NUM_VF automatically clears VFE? > > > > Yes writing into PCI_SRIOV_NUM_VF should not happen when VFE > > is set, but spec does not say we need to clear it automatically. > > Why come up with random rules? just don't special case it, > > whatever happens, let it happen. > > > > And what does this change have to do with getting rid of > > num_vfs? > > Keeping VFs working requires to know the number of VFs, but we do no longer > know it because PCI_SRIOV_NUM_VF is overwritten. This disables all VFs > instead of trying to keep VFs alive. > > Regards, > Akihiko Odaki However, we then get into a situation where VFE is set but PCI_SRIOV_NUM_VF no longer reflects the # of registered VFs. Given you removed num_vfs which was exactly the # of registered VFs, it is hard to say if that will lead to confusion now or later.
On 2024/08/03 1:52, Michael S. Tsirkin wrote: > On Sat, Aug 03, 2024 at 12:38:10AM +0900, Akihiko Odaki wrote: >> On 2024/08/02 21:58, Michael S. Tsirkin wrote: >>> On Fri, Aug 02, 2024 at 02:17:58PM +0900, Akihiko Odaki wrote: >>>> num_vfs is not migrated so use PCI_SRIOV_CTRL_VFE and PCI_SRIOV_NUM_VF >>>> instead. >>>> >>>> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> >>>> --- >>>> include/hw/pci/pcie_sriov.h | 1 - >>>> hw/pci/pcie_sriov.c | 28 ++++++++++++++++++++-------- >>>> hw/pci/trace-events | 2 +- >>>> 3 files changed, 21 insertions(+), 10 deletions(-) >>>> >>>> diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h >>>> index 70649236c18a..5148c5b77dd1 100644 >>>> --- a/include/hw/pci/pcie_sriov.h >>>> +++ b/include/hw/pci/pcie_sriov.h >>>> @@ -16,7 +16,6 @@ >>>> #include "hw/pci/pci.h" >>>> typedef struct PCIESriovPF { >>>> - uint16_t num_vfs; /* Number of virtual functions created */ >>>> uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */ >>>> PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */ >>>> } PCIESriovPF; >>>> diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c >>>> index 9bd7f8acc3f4..fae6acea4acb 100644 >>>> --- a/hw/pci/pcie_sriov.c >>>> +++ b/hw/pci/pcie_sriov.c >>>> @@ -57,7 +57,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, >>>> pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1, >>>> offset, PCI_EXT_CAP_SRIOV_SIZEOF); >>>> dev->exp.sriov_cap = offset; >>>> - dev->exp.sriov_pf.num_vfs = 0; >>>> dev->exp.sriov_pf.vf = NULL; >>>> pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); >>>> @@ -186,6 +185,12 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, >>>> } >>>> } >>>> +static void clear_ctrl_vfe(PCIDevice *dev) >>>> +{ >>>> + uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL; >>> >>> space here, after definition >>> >>>> + pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE); >>>> +} >>>> + >>> >>> Pls use pci_word_test_and_clear_mask >> >> That sounds good. I'll do so with the next version. >> >>> >>> >>>> static void register_vfs(PCIDevice *dev) >>>> { >>>> uint16_t num_vfs; >>>> @@ -195,6 +200,7 @@ static void register_vfs(PCIDevice *dev) >>>> assert(sriov_cap > 0); >>>> num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); >>>> if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { >>>> + clear_ctrl_vfe(dev); >>>> return; >>>> } >>>> @@ -203,20 +209,18 @@ static void register_vfs(PCIDevice *dev) >>>> for (i = 0; i < num_vfs; i++) { >>>> pci_set_enabled(dev->exp.sriov_pf.vf[i], true); >>>> } >>>> - dev->exp.sriov_pf.num_vfs = num_vfs; >>>> } >>>> static void unregister_vfs(PCIDevice *dev) >>>> { >>>> - uint16_t num_vfs = dev->exp.sriov_pf.num_vfs; >>>> uint16_t i; >>>> + uint8_t *cfg = dev->config + dev->exp.sriov_cap; >>>> trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), >>>> - PCI_FUNC(dev->devfn), num_vfs); >>>> - for (i = 0; i < num_vfs; i++) { >>>> + PCI_FUNC(dev->devfn)); >>>> + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { >>> >>> Why PCI_SRIOV_TOTAL_VF not PCI_SRIOV_NUM_VF/pcie_sriov_num_vfs? >> >> Because PCI_SRIOV_NUM_VF is overwritten when unregister_vfs() is called. > > > maybe this function should get the range of VFs to unregister, then. PCI_SRIOV_TOTAL_VF gives always a valid value, and it is the only value known to valid when PCI_SRIOV_CTRL gets written with a value clearing PCI_SRIOV_CTRL_VFE. > >>> >>> >>>> pci_set_enabled(dev->exp.sriov_pf.vf[i], false); >>>> } >>>> - dev->exp.sriov_pf.num_vfs = 0; >>>> } >>>> void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, >>>> @@ -242,6 +246,9 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, >>>> } else { >>>> unregister_vfs(dev); >>>> } >>>> + } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) { >>>> + clear_ctrl_vfe(dev); >>>> + unregister_vfs(dev); >>> >>> So any write into PCI_SRIOV_NUM_VF automatically clears VFE? >>> >>> Yes writing into PCI_SRIOV_NUM_VF should not happen when VFE >>> is set, but spec does not say we need to clear it automatically. >>> Why come up with random rules? just don't special case it, >>> whatever happens, let it happen. >>> >>> And what does this change have to do with getting rid of >>> num_vfs? >> >> Keeping VFs working requires to know the number of VFs, but we do no longer >> know it because PCI_SRIOV_NUM_VF is overwritten. This disables all VFs >> instead of trying to keep VFs alive. >> >> Regards, >> Akihiko Odaki > > However, we then get into a situation where VFE is set but > PCI_SRIOV_NUM_VF no longer reflects the # of registered VFs. > Given you removed num_vfs which was exactly > the # of registered VFs, it is hard to say if that will lead to > confusion now or later. I masked writes to PCI_SRIOV_NUM_VF when PCI_SRIOV_CTRL_VFE is set in v12. Regards, Akihiko Odaki
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index 70649236c18a..5148c5b77dd1 100644 --- a/include/hw/pci/pcie_sriov.h +++ b/include/hw/pci/pcie_sriov.h @@ -16,7 +16,6 @@ #include "hw/pci/pci.h" typedef struct PCIESriovPF { - uint16_t num_vfs; /* Number of virtual functions created */ uint8_t vf_bar_type[PCI_NUM_REGIONS]; /* Store type for each VF bar */ PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */ } PCIESriovPF; diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index 9bd7f8acc3f4..fae6acea4acb 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -57,7 +57,6 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset, pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1, offset, PCI_EXT_CAP_SRIOV_SIZEOF); dev->exp.sriov_cap = offset; - dev->exp.sriov_pf.num_vfs = 0; dev->exp.sriov_pf.vf = NULL; pci_set_word(cfg + PCI_SRIOV_VF_OFFSET, vf_offset); @@ -186,6 +185,12 @@ void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num, } } +static void clear_ctrl_vfe(PCIDevice *dev) +{ + uint8_t *ctrl = dev->config + dev->exp.sriov_cap + PCI_SRIOV_CTRL; + pci_set_word(ctrl, pci_get_word(ctrl) & ~PCI_SRIOV_CTRL_VFE); +} + static void register_vfs(PCIDevice *dev) { uint16_t num_vfs; @@ -195,6 +200,7 @@ static void register_vfs(PCIDevice *dev) assert(sriov_cap > 0); num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF); if (num_vfs > pci_get_word(dev->config + sriov_cap + PCI_SRIOV_TOTAL_VF)) { + clear_ctrl_vfe(dev); return; } @@ -203,20 +209,18 @@ static void register_vfs(PCIDevice *dev) for (i = 0; i < num_vfs; i++) { pci_set_enabled(dev->exp.sriov_pf.vf[i], true); } - dev->exp.sriov_pf.num_vfs = num_vfs; } static void unregister_vfs(PCIDevice *dev) { - uint16_t num_vfs = dev->exp.sriov_pf.num_vfs; uint16_t i; + uint8_t *cfg = dev->config + dev->exp.sriov_cap; trace_sriov_unregister_vfs(dev->name, PCI_SLOT(dev->devfn), - PCI_FUNC(dev->devfn), num_vfs); - for (i = 0; i < num_vfs; i++) { + PCI_FUNC(dev->devfn)); + for (i = 0; i < pci_get_word(cfg + PCI_SRIOV_TOTAL_VF); i++) { pci_set_enabled(dev->exp.sriov_pf.vf[i], false); } - dev->exp.sriov_pf.num_vfs = 0; } void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, @@ -242,6 +246,9 @@ void pcie_sriov_config_write(PCIDevice *dev, uint32_t address, } else { unregister_vfs(dev); } + } else if (range_covers_byte(off, len, PCI_SRIOV_NUM_VF)) { + clear_ctrl_vfe(dev); + unregister_vfs(dev); } } @@ -304,7 +311,7 @@ PCIDevice *pcie_sriov_get_pf(PCIDevice *dev) PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) { assert(!pci_is_vf(dev)); - if (n < dev->exp.sriov_pf.num_vfs) { + if (n < pcie_sriov_num_vfs(dev)) { return dev->exp.sriov_pf.vf[n]; } return NULL; @@ -312,5 +319,10 @@ PCIDevice *pcie_sriov_get_vf_at_index(PCIDevice *dev, int n) uint16_t pcie_sriov_num_vfs(PCIDevice *dev) { - return dev->exp.sriov_pf.num_vfs; + uint16_t sriov_cap = dev->exp.sriov_cap; + uint8_t *cfg = dev->config + sriov_cap; + + return sriov_cap && + (pci_get_word(cfg + PCI_SRIOV_CTRL) & PCI_SRIOV_CTRL_VFE) ? + pci_get_word(cfg + PCI_SRIOV_NUM_VF) : 0; } diff --git a/hw/pci/trace-events b/hw/pci/trace-events index 19643aa8c6b0..e98f575a9d19 100644 --- a/hw/pci/trace-events +++ b/hw/pci/trace-events @@ -14,7 +14,7 @@ msix_write_config(char *name, bool enabled, bool masked) "dev %s enabled %d mask # hw/pci/pcie_sriov.c sriov_register_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: creating %d vf devs" -sriov_unregister_vfs(const char *name, int slot, int function, int num_vfs) "%s %02x:%x: Unregistering %d vf devs" +sriov_unregister_vfs(const char *name, int slot, int function) "%s %02x:%x: Unregistering vf devs" sriov_config_write(const char *name, int slot, int fun, uint32_t offset, uint32_t val, uint32_t len) "%s %02x:%x: sriov offset 0x%x val 0x%x len %d" # pcie.c
num_vfs is not migrated so use PCI_SRIOV_CTRL_VFE and PCI_SRIOV_NUM_VF instead. Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> --- include/hw/pci/pcie_sriov.h | 1 - hw/pci/pcie_sriov.c | 28 ++++++++++++++++++++-------- hw/pci/trace-events | 2 +- 3 files changed, 21 insertions(+), 10 deletions(-)