diff mbox series

[v2,10/11] vpci: Add initial support for virtual PCI bus topology

Message ID 20210923125501.234252-11-andr2000@gmail.com (mailing list archive)
State Superseded
Headers show
Series PCI devices passthrough on Arm, part 3 | expand

Commit Message

Oleksandr Andrushchenko Sept. 23, 2021, 12:55 p.m. UTC
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>

Assign SBDF to the PCI devices being passed through with bus 0.
The resulting topology is where PCIe devices reside on the bus 0 of the
root complex itself (embedded endpoints).
This implementation is limited to 32 devices which are allowed on
a single PCI bus.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>

---
New in v2
---
 xen/common/domain.c           |  1 +
 xen/drivers/passthrough/pci.c | 57 +++++++++++++++++++++++++++++++++++
 xen/drivers/vpci/vpci.c       | 18 +++++++++--
 xen/include/xen/pci.h         | 18 +++++++++++
 xen/include/xen/sched.h       |  6 ++++
 5 files changed, 97 insertions(+), 3 deletions(-)

Comments

Michal Orzel Sept. 28, 2021, 7:48 a.m. UTC | #1
Hi Oleksandr,

On 23.09.2021 14:55, Oleksandr Andrushchenko wrote:
> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> 
> Assign SBDF to the PCI devices being passed through with bus 0.
> The resulting topology is where PCIe devices reside on the bus 0 of the
> root complex itself (embedded endpoints).
> This implementation is limited to 32 devices which are allowed on
> a single PCI bus.
> 
> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> 
> ---
> New in v2
> ---
>  xen/common/domain.c           |  1 +
>  xen/drivers/passthrough/pci.c | 57 +++++++++++++++++++++++++++++++++++
>  xen/drivers/vpci/vpci.c       | 18 +++++++++--
>  xen/include/xen/pci.h         | 18 +++++++++++
>  xen/include/xen/sched.h       |  6 ++++
>  5 files changed, 97 insertions(+), 3 deletions(-)
> 
> diff --git a/xen/common/domain.c b/xen/common/domain.c
> index 40d67ec34232..b80ff2e5e2e6 100644
> --- a/xen/common/domain.c
> +++ b/xen/common/domain.c
> @@ -601,6 +601,7 @@ struct domain *domain_create(domid_t domid,
>  
>  #ifdef CONFIG_HAS_PCI
>      INIT_LIST_HEAD(&d->pdev_list);
> +    INIT_LIST_HEAD(&d->vdev_list);
>  #endif
>  
>      /* All error paths can depend on the above setup. */
> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
> index e1da283d73ad..4552ace855e0 100644
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -833,6 +833,63 @@ int pci_remove_device(u16 seg, u8 bus, u8 devfn)
>      return ret;
>  }
>  
> +static struct vpci_dev *pci_find_virtual_device(const struct domain *d,
> +                                                const struct pci_dev *pdev)
> +{
> +    struct vpci_dev *vdev;
> +
> +    list_for_each_entry ( vdev, &d->vdev_list, list )
> +        if ( vdev->pdev == pdev )
> +            return vdev;
> +    return NULL;
> +}
> +
> +int pci_add_virtual_device(struct domain *d, const struct pci_dev *pdev)
> +{
> +    struct vpci_dev *vdev;
> +
> +    ASSERT(!pci_find_virtual_device(d, pdev));
> +
> +    /* Each PCI bus supports 32 devices/slots at max. */
> +    if ( d->vpci_dev_next > 31 )
> +        return -ENOSPC;
> +
> +    vdev = xzalloc(struct vpci_dev);
> +    if ( !vdev )
> +        return -ENOMEM;
> +
> +    /* We emulate a single host bridge for the guest, so segment is always 0. */
> +    *(u16*) &vdev->seg = 0;
Empty line hear would improve readability due to the asterisks being so close to each other.
Apart from that:
Reviewed-by: Michal Orzel <michal.orzel@arm.com>
> +    /*
> +     * The bus number is set to 0, so virtual devices are seen
> +     * as embedded endpoints behind the root complex.
> +     */
> +    *((u8*) &vdev->bus) = 0;
> +    *((u8*) &vdev->devfn) = PCI_DEVFN(d->vpci_dev_next++, 0);
> +
> +    vdev->pdev = pdev;
> +    vdev->domain = d;
> +
> +    pcidevs_lock();
> +    list_add_tail(&vdev->list, &d->vdev_list);
> +    pcidevs_unlock();
> +
> +    return 0;
> +}
> +
> +int pci_remove_virtual_device(struct domain *d, const struct pci_dev *pdev)
> +{
> +    struct vpci_dev *vdev;
> +
> +    pcidevs_lock();
> +    vdev = pci_find_virtual_device(d, pdev);
> +    if ( vdev )
> +        list_del(&vdev->list);
> +    pcidevs_unlock();
> +    xfree(vdev);
> +    return 0;
> +}
> +
>  /* Caller should hold the pcidevs_lock */
>  static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus,
>                             uint8_t devfn)
> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
> index 0cdc0c3f75c4..a40a138a14f7 100644
> --- a/xen/drivers/vpci/vpci.c
> +++ b/xen/drivers/vpci/vpci.c
> @@ -90,24 +90,36 @@ int __hwdom_init vpci_add_handlers(struct pci_dev *pdev)
>  /* Notify vPCI that device is assigned to guest. */
>  int vpci_assign_device(struct domain *d, const struct pci_dev *dev)
>  {
> +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
> +    int rc;
> +#endif
> +
>      /* It only makes sense to assign for hwdom or guest domain. */
>      if ( is_system_domain(d) || !has_vpci(d) )
>          return 0;
>  
>  #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
> -    return vpci_bar_add_handlers(d, dev);
> -#else
> -    return 0;
> +    rc = vpci_bar_add_handlers(d, dev);
> +    if ( rc )
> +        return rc;
>  #endif
> +
> +    return pci_add_virtual_device(d, dev);
>  }
>  
>  /* Notify vPCI that device is de-assigned from guest. */
>  int vpci_deassign_device(struct domain *d, const struct pci_dev *dev)
>  {
> +    int rc;
> +
>      /* It only makes sense to de-assign from hwdom or guest domain. */
>      if ( is_system_domain(d) || !has_vpci(d) )
>          return 0;
>  
> +    rc = pci_remove_virtual_device(d, dev);
> +    if ( rc )
> +        return rc;
> +
>  #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
>      return vpci_bar_remove_handlers(d, dev);
>  #else
> diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
> index 2b2dfb6f1b49..35ae1d093921 100644
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -136,6 +136,22 @@ struct pci_dev {
>      struct vpci *vpci;
>  };
>  
> +struct vpci_dev {
> +    struct list_head list;
> +    /* Physical PCI device this virtual device is connected to. */
> +    const struct pci_dev *pdev;
> +    /* Virtual SBDF of the device. */
> +    const union {
> +        struct {
> +            uint8_t devfn;
> +            uint8_t bus;
> +            uint16_t seg;
> +        };
> +        pci_sbdf_t sbdf;
> +    };
> +    struct domain *domain;
> +};
> +
>  #define for_each_pdev(domain, pdev) \
>      list_for_each_entry(pdev, &(domain)->pdev_list, domain_list)
>  
> @@ -165,7 +181,9 @@ int pci_add_segment(u16 seg);
>  const unsigned long *pci_get_ro_map(u16 seg);
>  int pci_add_device(u16 seg, u8 bus, u8 devfn,
>                     const struct pci_dev_info *, nodeid_t node);
> +int pci_add_virtual_device(struct domain *d, const struct pci_dev *pdev);
>  int pci_remove_device(u16 seg, u8 bus, u8 devfn);
> +int pci_remove_virtual_device(struct domain *d, const struct pci_dev *pdev);
>  int pci_ro_device(int seg, int bus, int devfn);
>  int pci_hide_device(unsigned int seg, unsigned int bus, unsigned int devfn);
>  struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
> diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
> index 28146ee404e6..d304c7ebe766 100644
> --- a/xen/include/xen/sched.h
> +++ b/xen/include/xen/sched.h
> @@ -444,6 +444,12 @@ struct domain
>  
>  #ifdef CONFIG_HAS_PCI
>      struct list_head pdev_list;
> +    struct list_head vdev_list;
> +    /*
> +     * Current device number used by the virtual PCI bus topology
> +     * to assign a unique SBDF to a passed through virtual PCI device.
> +     */
> +    int vpci_dev_next;
>  #endif
>  
>  #ifdef CONFIG_HAS_PASSTHROUGH
>
Jan Beulich Sept. 28, 2021, 7:59 a.m. UTC | #2
On 28.09.2021 09:48, Michal Orzel wrote:
> On 23.09.2021 14:55, Oleksandr Andrushchenko wrote:
>> --- a/xen/drivers/passthrough/pci.c
>> +++ b/xen/drivers/passthrough/pci.c
>> @@ -833,6 +833,63 @@ int pci_remove_device(u16 seg, u8 bus, u8 devfn)
>>      return ret;
>>  }
>>  
>> +static struct vpci_dev *pci_find_virtual_device(const struct domain *d,
>> +                                                const struct pci_dev *pdev)
>> +{
>> +    struct vpci_dev *vdev;
>> +
>> +    list_for_each_entry ( vdev, &d->vdev_list, list )
>> +        if ( vdev->pdev == pdev )
>> +            return vdev;
>> +    return NULL;
>> +}
>> +
>> +int pci_add_virtual_device(struct domain *d, const struct pci_dev *pdev)
>> +{
>> +    struct vpci_dev *vdev;
>> +
>> +    ASSERT(!pci_find_virtual_device(d, pdev));
>> +
>> +    /* Each PCI bus supports 32 devices/slots at max. */
>> +    if ( d->vpci_dev_next > 31 )
>> +        return -ENOSPC;
>> +
>> +    vdev = xzalloc(struct vpci_dev);
>> +    if ( !vdev )
>> +        return -ENOMEM;
>> +
>> +    /* We emulate a single host bridge for the guest, so segment is always 0. */
>> +    *(u16*) &vdev->seg = 0;
> Empty line hear would improve readability due to the asterisks being so close to each other.
> Apart from that:
> Reviewed-by: Michal Orzel <michal.orzel@arm.com>
>> +    /*
>> +     * The bus number is set to 0, so virtual devices are seen
>> +     * as embedded endpoints behind the root complex.
>> +     */
>> +    *((u8*) &vdev->bus) = 0;
>> +    *((u8*) &vdev->devfn) = PCI_DEVFN(d->vpci_dev_next++, 0);

All of these casts are (a) malformed and (b) unnecessary in the first
place, afaics at least.

Jan
Michal Orzel Sept. 28, 2021, 8:17 a.m. UTC | #3
On 28.09.2021 09:59, Jan Beulich wrote:
> On 28.09.2021 09:48, Michal Orzel wrote:
>> On 23.09.2021 14:55, Oleksandr Andrushchenko wrote:
>>> --- a/xen/drivers/passthrough/pci.c
>>> +++ b/xen/drivers/passthrough/pci.c
>>> @@ -833,6 +833,63 @@ int pci_remove_device(u16 seg, u8 bus, u8 devfn)
>>>      return ret;
>>>  }
>>>  
>>> +static struct vpci_dev *pci_find_virtual_device(const struct domain *d,
>>> +                                                const struct pci_dev *pdev)
>>> +{
>>> +    struct vpci_dev *vdev;
>>> +
>>> +    list_for_each_entry ( vdev, &d->vdev_list, list )
>>> +        if ( vdev->pdev == pdev )
>>> +            return vdev;
>>> +    return NULL;
>>> +}
>>> +
>>> +int pci_add_virtual_device(struct domain *d, const struct pci_dev *pdev)
>>> +{
>>> +    struct vpci_dev *vdev;
>>> +
>>> +    ASSERT(!pci_find_virtual_device(d, pdev));
>>> +
>>> +    /* Each PCI bus supports 32 devices/slots at max. */
>>> +    if ( d->vpci_dev_next > 31 )
>>> +        return -ENOSPC;
>>> +
>>> +    vdev = xzalloc(struct vpci_dev);
>>> +    if ( !vdev )
>>> +        return -ENOMEM;
>>> +
>>> +    /* We emulate a single host bridge for the guest, so segment is always 0. */
>>> +    *(u16*) &vdev->seg = 0;
>> Empty line hear would improve readability due to the asterisks being so close to each other.
>> Apart from that:
>> Reviewed-by: Michal Orzel <michal.orzel@arm.com>
>>> +    /*
>>> +     * The bus number is set to 0, so virtual devices are seen
>>> +     * as embedded endpoints behind the root complex.
>>> +     */
>>> +    *((u8*) &vdev->bus) = 0;
>>> +    *((u8*) &vdev->devfn) = PCI_DEVFN(d->vpci_dev_next++, 0);
> 
> All of these casts are (a) malformed and (b) unnecessary in the first
> place, afaics at least.
> 
Agree.
*((u8*) &vdev->bus) = 0;
is the same as:
vdev->bus = 0;
> Jan
>
Oleksandr Andrushchenko Sept. 28, 2021, 12:58 p.m. UTC | #4
On 28.09.21 11:17, Michal Orzel wrote:
>
> On 28.09.2021 09:59, Jan Beulich wrote:
>> On 28.09.2021 09:48, Michal Orzel wrote:
>>> On 23.09.2021 14:55, Oleksandr Andrushchenko wrote:
>>>> --- a/xen/drivers/passthrough/pci.c
>>>> +++ b/xen/drivers/passthrough/pci.c
>>>> @@ -833,6 +833,63 @@ int pci_remove_device(u16 seg, u8 bus, u8 devfn)
>>>>       return ret;
>>>>   }
>>>>   
>>>> +static struct vpci_dev *pci_find_virtual_device(const struct domain *d,
>>>> +                                                const struct pci_dev *pdev)
>>>> +{
>>>> +    struct vpci_dev *vdev;
>>>> +
>>>> +    list_for_each_entry ( vdev, &d->vdev_list, list )
>>>> +        if ( vdev->pdev == pdev )
>>>> +            return vdev;
>>>> +    return NULL;
>>>> +}
>>>> +
>>>> +int pci_add_virtual_device(struct domain *d, const struct pci_dev *pdev)
>>>> +{
>>>> +    struct vpci_dev *vdev;
>>>> +
>>>> +    ASSERT(!pci_find_virtual_device(d, pdev));
>>>> +
>>>> +    /* Each PCI bus supports 32 devices/slots at max. */
>>>> +    if ( d->vpci_dev_next > 31 )
>>>> +        return -ENOSPC;
>>>> +
>>>> +    vdev = xzalloc(struct vpci_dev);
>>>> +    if ( !vdev )
>>>> +        return -ENOMEM;
>>>> +
>>>> +    /* We emulate a single host bridge for the guest, so segment is always 0. */
>>>> +    *(u16*) &vdev->seg = 0;
>>> Empty line hear would improve readability due to the asterisks being so close to each other.
Will add
>>> Apart from that:
>>> Reviewed-by: Michal Orzel <michal.orzel@arm.com>
>>>> +    /*
>>>> +     * The bus number is set to 0, so virtual devices are seen
>>>> +     * as embedded endpoints behind the root complex.
>>>> +     */
>>>> +    *((u8*) &vdev->bus) = 0;
>>>> +    *((u8*) &vdev->devfn) = PCI_DEVFN(d->vpci_dev_next++, 0);
>> All of these casts are (a) malformed and (b) unnecessary in the first
>> place, afaics at least.
>>
> Agree.
> *((u8*) &vdev->bus) = 0;
> is the same as:
> vdev->bus = 0;

Overengineering at its best ;)

Will fix that

>> Jan
>>
Thank you,

Oleksandr
Oleksandr Andrushchenko Sept. 29, 2021, 9:03 a.m. UTC | #5
Hi, Jan!

Sorry for top posting, but this is a general question on this patch/functionality.

Do you see we need to gate all this with CONFIG_HAS_VPCI_GUEST_SUPPORT

as this renders in somewhat dead code for x86 for now? I do think this still

needs to be in the common code though.

Thank you in advance,

Oleksandr

On 28.09.21 15:58, Oleksandr Andrushchenko wrote:
> On 28.09.21 11:17, Michal Orzel wrote:
>> On 28.09.2021 09:59, Jan Beulich wrote:
>>> On 28.09.2021 09:48, Michal Orzel wrote:
>>>> On 23.09.2021 14:55, Oleksandr Andrushchenko wrote:
>>>>> --- a/xen/drivers/passthrough/pci.c
>>>>> +++ b/xen/drivers/passthrough/pci.c
>>>>> @@ -833,6 +833,63 @@ int pci_remove_device(u16 seg, u8 bus, u8 devfn)
>>>>>        return ret;
>>>>>    }
>>>>>    
>>>>> +static struct vpci_dev *pci_find_virtual_device(const struct domain *d,
>>>>> +                                                const struct pci_dev *pdev)
>>>>> +{
>>>>> +    struct vpci_dev *vdev;
>>>>> +
>>>>> +    list_for_each_entry ( vdev, &d->vdev_list, list )
>>>>> +        if ( vdev->pdev == pdev )
>>>>> +            return vdev;
>>>>> +    return NULL;
>>>>> +}
>>>>> +
>>>>> +int pci_add_virtual_device(struct domain *d, const struct pci_dev *pdev)
>>>>> +{
>>>>> +    struct vpci_dev *vdev;
>>>>> +
>>>>> +    ASSERT(!pci_find_virtual_device(d, pdev));
>>>>> +
>>>>> +    /* Each PCI bus supports 32 devices/slots at max. */
>>>>> +    if ( d->vpci_dev_next > 31 )
>>>>> +        return -ENOSPC;
>>>>> +
>>>>> +    vdev = xzalloc(struct vpci_dev);
>>>>> +    if ( !vdev )
>>>>> +        return -ENOMEM;
>>>>> +
>>>>> +    /* We emulate a single host bridge for the guest, so segment is always 0. */
>>>>> +    *(u16*) &vdev->seg = 0;
>>>> Empty line hear would improve readability due to the asterisks being so close to each other.
> Will add
>>>> Apart from that:
>>>> Reviewed-by: Michal Orzel <michal.orzel@arm.com>
>>>>> +    /*
>>>>> +     * The bus number is set to 0, so virtual devices are seen
>>>>> +     * as embedded endpoints behind the root complex.
>>>>> +     */
>>>>> +    *((u8*) &vdev->bus) = 0;
>>>>> +    *((u8*) &vdev->devfn) = PCI_DEVFN(d->vpci_dev_next++, 0);
>>> All of these casts are (a) malformed and (b) unnecessary in the first
>>> place, afaics at least.
>>>
>> Agree.
>> *((u8*) &vdev->bus) = 0;
>> is the same as:
>> vdev->bus = 0;
> Overengineering at its best ;)
>
> Will fix that
>
>>> Jan
>>>
> Thank you,
>
> Oleksandr
Jan Beulich Sept. 29, 2021, 9:09 a.m. UTC | #6
On 29.09.2021 11:03, Oleksandr Andrushchenko wrote:
> Sorry for top posting, but this is a general question on this patch/functionality.
> 
> Do you see we need to gate all this with CONFIG_HAS_VPCI_GUEST_SUPPORT
> as this renders in somewhat dead code for x86 for now? I do think this still
> needs to be in the common code though.

I agree it wants to live in common code, but I'd still like the code to
not bloat x86 binaries. Hence yes, I think there want to be
"if ( !IS_ENABLED() )" early bailout paths or, whenever this isn't
possible without breaking the build, respective #ifdef-s.

Jan
Oleksandr Andrushchenko Sept. 29, 2021, 11:56 a.m. UTC | #7
On 29.09.21 12:09, Jan Beulich wrote:
> On 29.09.2021 11:03, Oleksandr Andrushchenko wrote:
>> Sorry for top posting, but this is a general question on this patch/functionality.
>>
>> Do you see we need to gate all this with CONFIG_HAS_VPCI_GUEST_SUPPORT
>> as this renders in somewhat dead code for x86 for now? I do think this still
>> needs to be in the common code though.
> I agree it wants to live in common code, but I'd still like the code to
> not bloat x86 binaries. Hence yes, I think there want to be
> "if ( !IS_ENABLED() )" early bailout paths or, whenever this isn't
> possible without breaking the build, respective #ifdef-s.

Then it needs to be defined as (xen/drivers/Kconfig):

config HAS_VPCI_GUEST_SUPPORT
     # vPCI guest support is only enabled for Arm now
     def_bool y if ARM
     depends on HAS_VPCI

Because it needs to be defined as "y" for Arm with vPCI support.

Otherwise it breaks the PCI passthrough feature, e.g. it compiles,

but the resulting binary behaves wrong.

Do you see this as an acceptable solution?

>
> Jan
>
>
Thank you,

Oleksandr
Jan Beulich Sept. 29, 2021, 12:54 p.m. UTC | #8
On 29.09.2021 13:56, Oleksandr Andrushchenko wrote:
> 
> On 29.09.21 12:09, Jan Beulich wrote:
>> On 29.09.2021 11:03, Oleksandr Andrushchenko wrote:
>>> Sorry for top posting, but this is a general question on this patch/functionality.
>>>
>>> Do you see we need to gate all this with CONFIG_HAS_VPCI_GUEST_SUPPORT
>>> as this renders in somewhat dead code for x86 for now? I do think this still
>>> needs to be in the common code though.
>> I agree it wants to live in common code, but I'd still like the code to
>> not bloat x86 binaries. Hence yes, I think there want to be
>> "if ( !IS_ENABLED() )" early bailout paths or, whenever this isn't
>> possible without breaking the build, respective #ifdef-s.
> 
> Then it needs to be defined as (xen/drivers/Kconfig):
> 
> config HAS_VPCI_GUEST_SUPPORT
>      # vPCI guest support is only enabled for Arm now
>      def_bool y if ARM
>      depends on HAS_VPCI
> 
> Because it needs to be defined as "y" for Arm with vPCI support.
> 
> Otherwise it breaks the PCI passthrough feature, e.g. it compiles,
> 
> but the resulting binary behaves wrong.
> 
> Do you see this as an acceptable solution?

Like all (or at least the majority) of other HAS_*, it ought to be
"select"-ed (by arm/Kconfig). Is there a reason this isn't possible?
(I don't mind the "depends on", as long as it's clear that it exists
solely to allow kconfig to warn about bogus "select"s.)

Jan
Oleksandr Andrushchenko Sept. 29, 2021, 1:16 p.m. UTC | #9
On 29.09.21 15:54, Jan Beulich wrote:
> On 29.09.2021 13:56, Oleksandr Andrushchenko wrote:
>> On 29.09.21 12:09, Jan Beulich wrote:
>>> On 29.09.2021 11:03, Oleksandr Andrushchenko wrote:
>>>> Sorry for top posting, but this is a general question on this patch/functionality.
>>>>
>>>> Do you see we need to gate all this with CONFIG_HAS_VPCI_GUEST_SUPPORT
>>>> as this renders in somewhat dead code for x86 for now? I do think this still
>>>> needs to be in the common code though.
>>> I agree it wants to live in common code, but I'd still like the code to
>>> not bloat x86 binaries. Hence yes, I think there want to be
>>> "if ( !IS_ENABLED() )" early bailout paths or, whenever this isn't
>>> possible without breaking the build, respective #ifdef-s.
>> Then it needs to be defined as (xen/drivers/Kconfig):
>>
>> config HAS_VPCI_GUEST_SUPPORT
>>       # vPCI guest support is only enabled for Arm now
>>       def_bool y if ARM
>>       depends on HAS_VPCI
>>
>> Because it needs to be defined as "y" for Arm with vPCI support.
>>
>> Otherwise it breaks the PCI passthrough feature, e.g. it compiles,
>>
>> but the resulting binary behaves wrong.
>>
>> Do you see this as an acceptable solution?
> Like all (or at least the majority) of other HAS_*, it ought to be
> "select"-ed (by arm/Kconfig). Is there a reason this isn't possible?
> (I don't mind the "depends on", as long as it's clear that it exists
> solely to allow kconfig to warn about bogus "select"s.)

There is yet no Kconfig exists (even for Arm) that enables HAS_VPCI,

thus I can't do that at the moment even if I want to. Yes, this can be deferred

to the later stage when we enable VPCI for Arm, bit this config option is still

considered as "common code" as the functionality being added

to the common code is just gated with CONFIG_HAS_VPCI_GUEST_SUPPORT.

With this defined now and not later the code seems to be complete and more clean

as it is seen what is this configuration option and how it is enabled and used in the

code.

So, I see no problem if it is defined in this Kconfig and evaluates to "n" for x86

and eventually will be "y" for Arm when it enables HAS_VPCI.


>
> Jan
>
Thank you,

Oleksandr
Jan Beulich Sept. 29, 2021, 1:23 p.m. UTC | #10
On 29.09.2021 15:16, Oleksandr Andrushchenko wrote:
> 
> On 29.09.21 15:54, Jan Beulich wrote:
>> On 29.09.2021 13:56, Oleksandr Andrushchenko wrote:
>>> On 29.09.21 12:09, Jan Beulich wrote:
>>>> On 29.09.2021 11:03, Oleksandr Andrushchenko wrote:
>>>>> Sorry for top posting, but this is a general question on this patch/functionality.
>>>>>
>>>>> Do you see we need to gate all this with CONFIG_HAS_VPCI_GUEST_SUPPORT
>>>>> as this renders in somewhat dead code for x86 for now? I do think this still
>>>>> needs to be in the common code though.
>>>> I agree it wants to live in common code, but I'd still like the code to
>>>> not bloat x86 binaries. Hence yes, I think there want to be
>>>> "if ( !IS_ENABLED() )" early bailout paths or, whenever this isn't
>>>> possible without breaking the build, respective #ifdef-s.
>>> Then it needs to be defined as (xen/drivers/Kconfig):
>>>
>>> config HAS_VPCI_GUEST_SUPPORT
>>>       # vPCI guest support is only enabled for Arm now
>>>       def_bool y if ARM
>>>       depends on HAS_VPCI
>>>
>>> Because it needs to be defined as "y" for Arm with vPCI support.
>>>
>>> Otherwise it breaks the PCI passthrough feature, e.g. it compiles,
>>>
>>> but the resulting binary behaves wrong.
>>>
>>> Do you see this as an acceptable solution?
>> Like all (or at least the majority) of other HAS_*, it ought to be
>> "select"-ed (by arm/Kconfig). Is there a reason this isn't possible?
>> (I don't mind the "depends on", as long as it's clear that it exists
>> solely to allow kconfig to warn about bogus "select"s.)
> 
> There is yet no Kconfig exists (even for Arm) that enables HAS_VPCI,
> 
> thus I can't do that at the moment even if I want to. Yes, this can be deferred
> 
> to the later stage when we enable VPCI for Arm, bit this config option is still
> 
> considered as "common code" as the functionality being added
> 
> to the common code is just gated with CONFIG_HAS_VPCI_GUEST_SUPPORT.
> 
> With this defined now and not later the code seems to be complete and more clean
> 
> as it is seen what is this configuration option and how it is enabled and used in the
> 
> code.
> 
> So, I see no problem if it is defined in this Kconfig and evaluates to "n" for x86
> 
> and eventually will be "y" for Arm when it enables HAS_VPCI.

I'm afraid I don't view this as a reply reflecting that you have
understood what I'm asking for. The primary request is for there to
not be "def_bool y" but just "bool" accompanied by a "select" in
Arm's Kconfig. If HAS_VPCI doesn't exist as an option yet, simply
omit the (questionable) "depends on".

Jan

PS: The more replies I get from you, the more annoying I find the
insertion of blank lines between every two lines of text. Blank
lines are usually used to separate paragraphs. If it is your mail
program which inserts these, can you please try to do something
about this? Thanks.
Oleksandr Andrushchenko Sept. 29, 2021, 1:49 p.m. UTC | #11
On 29.09.21 16:23, Jan Beulich wrote:
> On 29.09.2021 15:16, Oleksandr Andrushchenko wrote:
>> On 29.09.21 15:54, Jan Beulich wrote:
>>> On 29.09.2021 13:56, Oleksandr Andrushchenko wrote:
>>>> On 29.09.21 12:09, Jan Beulich wrote:
>>>>> On 29.09.2021 11:03, Oleksandr Andrushchenko wrote:
>>>>>> Sorry for top posting, but this is a general question on this patch/functionality.
>>>>>>
>>>>>> Do you see we need to gate all this with CONFIG_HAS_VPCI_GUEST_SUPPORT
>>>>>> as this renders in somewhat dead code for x86 for now? I do think this still
>>>>>> needs to be in the common code though.
>>>>> I agree it wants to live in common code, but I'd still like the code to
>>>>> not bloat x86 binaries. Hence yes, I think there want to be
>>>>> "if ( !IS_ENABLED() )" early bailout paths or, whenever this isn't
>>>>> possible without breaking the build, respective #ifdef-s.
>>>> Then it needs to be defined as (xen/drivers/Kconfig):
>>>>
>>>> config HAS_VPCI_GUEST_SUPPORT
>>>>        # vPCI guest support is only enabled for Arm now
>>>>        def_bool y if ARM
>>>>        depends on HAS_VPCI
>>>>
>>>> Because it needs to be defined as "y" for Arm with vPCI support.
>>>>
>>>> Otherwise it breaks the PCI passthrough feature, e.g. it compiles,
>>>>
>>>> but the resulting binary behaves wrong.
>>>>
>>>> Do you see this as an acceptable solution?
>>> Like all (or at least the majority) of other HAS_*, it ought to be
>>> "select"-ed (by arm/Kconfig). Is there a reason this isn't possible?
>>> (I don't mind the "depends on", as long as it's clear that it exists
>>> solely to allow kconfig to warn about bogus "select"s.)
>> There is yet no Kconfig exists (even for Arm) that enables HAS_VPCI,
>>
>> thus I can't do that at the moment even if I want to. Yes, this can be deferred
>>
>> to the later stage when we enable VPCI for Arm, bit this config option is still
>>
>> considered as "common code" as the functionality being added
>>
>> to the common code is just gated with CONFIG_HAS_VPCI_GUEST_SUPPORT.
>>
>> With this defined now and not later the code seems to be complete and more clean
>>
>> as it is seen what is this configuration option and how it is enabled and used in the
>>
>> code.
>>
>> So, I see no problem if it is defined in this Kconfig and evaluates to "n" for x86
>>
>> and eventually will be "y" for Arm when it enables HAS_VPCI.
> I'm afraid I don't view this as a reply reflecting that you have
> understood what I'm asking for. The primary request is for there to
> not be "def_bool y" but just "bool" accompanied by a "select" in
> Arm's Kconfig. If HAS_VPCI doesn't exist as an option yet, simply
> omit the (questionable) "depends on".
I understood that, but was trying to make sure we don't miss
this option while enabling vPCI on Arm, but ok, I'll have the following:
config HAS_VPCI
     bool

config HAS_VPCI_GUEST_SUPPORT
     bool
     depends on HAS_VPCI
and select it for Arm xen/arch/arm/Kconfig
>
> Jan

> PS: The more replies I get from you, the more annoying I find the
> insertion of blank lines between every two lines of text. Blank
> lines are usually used to separate paragraphs. If it is your mail
> program which inserts these, can you please try to do something
> about this? Thanks.
>
I first thought that this is how Thunderbird started showing
my replies and was also curious about the distance between the lines
which seemed to be as double-line, but I couldn't delete or edit
those. I thought this is only visible to me...
It came out that this was because of some Thunderbird settings which
made my replies with those double-liners. Hope it is fixed now.

I am really sorry about that and do understand how annoying it was.

Best regards,
Oleksandr
Jan Beulich Sept. 29, 2021, 2:07 p.m. UTC | #12
On 29.09.2021 15:49, Oleksandr Andrushchenko wrote:
> 
> 
> On 29.09.21 16:23, Jan Beulich wrote:
>> On 29.09.2021 15:16, Oleksandr Andrushchenko wrote:
>>> On 29.09.21 15:54, Jan Beulich wrote:
>>>> On 29.09.2021 13:56, Oleksandr Andrushchenko wrote:
>>>>> On 29.09.21 12:09, Jan Beulich wrote:
>>>>>> On 29.09.2021 11:03, Oleksandr Andrushchenko wrote:
>>>>>>> Sorry for top posting, but this is a general question on this patch/functionality.
>>>>>>>
>>>>>>> Do you see we need to gate all this with CONFIG_HAS_VPCI_GUEST_SUPPORT
>>>>>>> as this renders in somewhat dead code for x86 for now? I do think this still
>>>>>>> needs to be in the common code though.
>>>>>> I agree it wants to live in common code, but I'd still like the code to
>>>>>> not bloat x86 binaries. Hence yes, I think there want to be
>>>>>> "if ( !IS_ENABLED() )" early bailout paths or, whenever this isn't
>>>>>> possible without breaking the build, respective #ifdef-s.
>>>>> Then it needs to be defined as (xen/drivers/Kconfig):
>>>>>
>>>>> config HAS_VPCI_GUEST_SUPPORT
>>>>>        # vPCI guest support is only enabled for Arm now
>>>>>        def_bool y if ARM
>>>>>        depends on HAS_VPCI
>>>>>
>>>>> Because it needs to be defined as "y" for Arm with vPCI support.
>>>>>
>>>>> Otherwise it breaks the PCI passthrough feature, e.g. it compiles,
>>>>>
>>>>> but the resulting binary behaves wrong.
>>>>>
>>>>> Do you see this as an acceptable solution?
>>>> Like all (or at least the majority) of other HAS_*, it ought to be
>>>> "select"-ed (by arm/Kconfig). Is there a reason this isn't possible?
>>>> (I don't mind the "depends on", as long as it's clear that it exists
>>>> solely to allow kconfig to warn about bogus "select"s.)
>>> There is yet no Kconfig exists (even for Arm) that enables HAS_VPCI,
>>>
>>> thus I can't do that at the moment even if I want to. Yes, this can be deferred
>>>
>>> to the later stage when we enable VPCI for Arm, bit this config option is still
>>>
>>> considered as "common code" as the functionality being added
>>>
>>> to the common code is just gated with CONFIG_HAS_VPCI_GUEST_SUPPORT.
>>>
>>> With this defined now and not later the code seems to be complete and more clean
>>>
>>> as it is seen what is this configuration option and how it is enabled and used in the
>>>
>>> code.
>>>
>>> So, I see no problem if it is defined in this Kconfig and evaluates to "n" for x86
>>>
>>> and eventually will be "y" for Arm when it enables HAS_VPCI.
>> I'm afraid I don't view this as a reply reflecting that you have
>> understood what I'm asking for. The primary request is for there to
>> not be "def_bool y" but just "bool" accompanied by a "select" in
>> Arm's Kconfig. If HAS_VPCI doesn't exist as an option yet, simply
>> omit the (questionable) "depends on".
> I understood that, but was trying to make sure we don't miss
> this option while enabling vPCI on Arm, but ok, I'll have the following:
> config HAS_VPCI
>      bool
> 
> config HAS_VPCI_GUEST_SUPPORT
>      bool
>      depends on HAS_VPCI
> and select it for Arm xen/arch/arm/Kconfig

Btw you could also have

config HAS_VPCI
     bool

config HAS_VPCI_GUEST_SUPPORT
     bool
     select HAS_VPCI

which would require arm/Kconfig to only select the latter, while
x86/Kconfig would only select the former.

>> PS: The more replies I get from you, the more annoying I find the
>> insertion of blank lines between every two lines of text. Blank
>> lines are usually used to separate paragraphs. If it is your mail
>> program which inserts these, can you please try to do something
>> about this? Thanks.
>>
> I first thought that this is how Thunderbird started showing
> my replies and was also curious about the distance between the lines
> which seemed to be as double-line, but I couldn't delete or edit
> those. I thought this is only visible to me...
> It came out that this was because of some Thunderbird settings which
> made my replies with those double-liners. Hope it is fixed now.

Indeed, thanks - I did not remove any blank lines from context above.

Jan
Oleksandr Andrushchenko Sept. 29, 2021, 2:16 p.m. UTC | #13
On 29.09.21 17:07, Jan Beulich wrote:
> On 29.09.2021 15:49, Oleksandr Andrushchenko wrote:
>>
>> On 29.09.21 16:23, Jan Beulich wrote:
>>> On 29.09.2021 15:16, Oleksandr Andrushchenko wrote:
>>>> On 29.09.21 15:54, Jan Beulich wrote:
>>>>> On 29.09.2021 13:56, Oleksandr Andrushchenko wrote:
>>>>>> On 29.09.21 12:09, Jan Beulich wrote:
>>>>>>> On 29.09.2021 11:03, Oleksandr Andrushchenko wrote:
>>>>>>>> Sorry for top posting, but this is a general question on this patch/functionality.
>>>>>>>>
>>>>>>>> Do you see we need to gate all this with CONFIG_HAS_VPCI_GUEST_SUPPORT
>>>>>>>> as this renders in somewhat dead code for x86 for now? I do think this still
>>>>>>>> needs to be in the common code though.
>>>>>>> I agree it wants to live in common code, but I'd still like the code to
>>>>>>> not bloat x86 binaries. Hence yes, I think there want to be
>>>>>>> "if ( !IS_ENABLED() )" early bailout paths or, whenever this isn't
>>>>>>> possible without breaking the build, respective #ifdef-s.
>>>>>> Then it needs to be defined as (xen/drivers/Kconfig):
>>>>>>
>>>>>> config HAS_VPCI_GUEST_SUPPORT
>>>>>>         # vPCI guest support is only enabled for Arm now
>>>>>>         def_bool y if ARM
>>>>>>         depends on HAS_VPCI
>>>>>>
>>>>>> Because it needs to be defined as "y" for Arm with vPCI support.
>>>>>>
>>>>>> Otherwise it breaks the PCI passthrough feature, e.g. it compiles,
>>>>>>
>>>>>> but the resulting binary behaves wrong.
>>>>>>
>>>>>> Do you see this as an acceptable solution?
>>>>> Like all (or at least the majority) of other HAS_*, it ought to be
>>>>> "select"-ed (by arm/Kconfig). Is there a reason this isn't possible?
>>>>> (I don't mind the "depends on", as long as it's clear that it exists
>>>>> solely to allow kconfig to warn about bogus "select"s.)
>>>> There is yet no Kconfig exists (even for Arm) that enables HAS_VPCI,
>>>>
>>>> thus I can't do that at the moment even if I want to. Yes, this can be deferred
>>>>
>>>> to the later stage when we enable VPCI for Arm, bit this config option is still
>>>>
>>>> considered as "common code" as the functionality being added
>>>>
>>>> to the common code is just gated with CONFIG_HAS_VPCI_GUEST_SUPPORT.
>>>>
>>>> With this defined now and not later the code seems to be complete and more clean
>>>>
>>>> as it is seen what is this configuration option and how it is enabled and used in the
>>>>
>>>> code.
>>>>
>>>> So, I see no problem if it is defined in this Kconfig and evaluates to "n" for x86
>>>>
>>>> and eventually will be "y" for Arm when it enables HAS_VPCI.
>>> I'm afraid I don't view this as a reply reflecting that you have
>>> understood what I'm asking for. The primary request is for there to
>>> not be "def_bool y" but just "bool" accompanied by a "select" in
>>> Arm's Kconfig. If HAS_VPCI doesn't exist as an option yet, simply
>>> omit the (questionable) "depends on".
>> I understood that, but was trying to make sure we don't miss
>> this option while enabling vPCI on Arm, but ok, I'll have the following:
>> config HAS_VPCI
>>       bool
>>
>> config HAS_VPCI_GUEST_SUPPORT
>>       bool
>>       depends on HAS_VPCI
>> and select it for Arm xen/arch/arm/Kconfig
> Btw you could also have
>
> config HAS_VPCI
>       bool
>
> config HAS_VPCI_GUEST_SUPPORT
>       bool
>       select HAS_VPCI
>
> which would require arm/Kconfig to only select the latter, while
> x86/Kconfig would only select the former.
I'll probably leave it as I wrote before, because it then looks like
a sub-feature enables the parent feature and doesn't seem right
Although it may still look right...
>
>>> PS: The more replies I get from you, the more annoying I find the
>>> insertion of blank lines between every two lines of text. Blank
>>> lines are usually used to separate paragraphs. If it is your mail
>>> program which inserts these, can you please try to do something
>>> about this? Thanks.
>>>
>> I first thought that this is how Thunderbird started showing
>> my replies and was also curious about the distance between the lines
>> which seemed to be as double-line, but I couldn't delete or edit
>> those. I thought this is only visible to me...
>> It came out that this was because of some Thunderbird settings which
>> made my replies with those double-liners. Hope it is fixed now.
> Indeed, thanks - I did not remove any blank lines from context above.
>
> Jan
>
>
Thank you,
Oleksandr
diff mbox series

Patch

diff --git a/xen/common/domain.c b/xen/common/domain.c
index 40d67ec34232..b80ff2e5e2e6 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -601,6 +601,7 @@  struct domain *domain_create(domid_t domid,
 
 #ifdef CONFIG_HAS_PCI
     INIT_LIST_HEAD(&d->pdev_list);
+    INIT_LIST_HEAD(&d->vdev_list);
 #endif
 
     /* All error paths can depend on the above setup. */
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index e1da283d73ad..4552ace855e0 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -833,6 +833,63 @@  int pci_remove_device(u16 seg, u8 bus, u8 devfn)
     return ret;
 }
 
+static struct vpci_dev *pci_find_virtual_device(const struct domain *d,
+                                                const struct pci_dev *pdev)
+{
+    struct vpci_dev *vdev;
+
+    list_for_each_entry ( vdev, &d->vdev_list, list )
+        if ( vdev->pdev == pdev )
+            return vdev;
+    return NULL;
+}
+
+int pci_add_virtual_device(struct domain *d, const struct pci_dev *pdev)
+{
+    struct vpci_dev *vdev;
+
+    ASSERT(!pci_find_virtual_device(d, pdev));
+
+    /* Each PCI bus supports 32 devices/slots at max. */
+    if ( d->vpci_dev_next > 31 )
+        return -ENOSPC;
+
+    vdev = xzalloc(struct vpci_dev);
+    if ( !vdev )
+        return -ENOMEM;
+
+    /* We emulate a single host bridge for the guest, so segment is always 0. */
+    *(u16*) &vdev->seg = 0;
+    /*
+     * The bus number is set to 0, so virtual devices are seen
+     * as embedded endpoints behind the root complex.
+     */
+    *((u8*) &vdev->bus) = 0;
+    *((u8*) &vdev->devfn) = PCI_DEVFN(d->vpci_dev_next++, 0);
+
+    vdev->pdev = pdev;
+    vdev->domain = d;
+
+    pcidevs_lock();
+    list_add_tail(&vdev->list, &d->vdev_list);
+    pcidevs_unlock();
+
+    return 0;
+}
+
+int pci_remove_virtual_device(struct domain *d, const struct pci_dev *pdev)
+{
+    struct vpci_dev *vdev;
+
+    pcidevs_lock();
+    vdev = pci_find_virtual_device(d, pdev);
+    if ( vdev )
+        list_del(&vdev->list);
+    pcidevs_unlock();
+    xfree(vdev);
+    return 0;
+}
+
 /* Caller should hold the pcidevs_lock */
 static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus,
                            uint8_t devfn)
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 0cdc0c3f75c4..a40a138a14f7 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -90,24 +90,36 @@  int __hwdom_init vpci_add_handlers(struct pci_dev *pdev)
 /* Notify vPCI that device is assigned to guest. */
 int vpci_assign_device(struct domain *d, const struct pci_dev *dev)
 {
+#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
+    int rc;
+#endif
+
     /* It only makes sense to assign for hwdom or guest domain. */
     if ( is_system_domain(d) || !has_vpci(d) )
         return 0;
 
 #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
-    return vpci_bar_add_handlers(d, dev);
-#else
-    return 0;
+    rc = vpci_bar_add_handlers(d, dev);
+    if ( rc )
+        return rc;
 #endif
+
+    return pci_add_virtual_device(d, dev);
 }
 
 /* Notify vPCI that device is de-assigned from guest. */
 int vpci_deassign_device(struct domain *d, const struct pci_dev *dev)
 {
+    int rc;
+
     /* It only makes sense to de-assign from hwdom or guest domain. */
     if ( is_system_domain(d) || !has_vpci(d) )
         return 0;
 
+    rc = pci_remove_virtual_device(d, dev);
+    if ( rc )
+        return rc;
+
 #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
     return vpci_bar_remove_handlers(d, dev);
 #else
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 2b2dfb6f1b49..35ae1d093921 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -136,6 +136,22 @@  struct pci_dev {
     struct vpci *vpci;
 };
 
+struct vpci_dev {
+    struct list_head list;
+    /* Physical PCI device this virtual device is connected to. */
+    const struct pci_dev *pdev;
+    /* Virtual SBDF of the device. */
+    const union {
+        struct {
+            uint8_t devfn;
+            uint8_t bus;
+            uint16_t seg;
+        };
+        pci_sbdf_t sbdf;
+    };
+    struct domain *domain;
+};
+
 #define for_each_pdev(domain, pdev) \
     list_for_each_entry(pdev, &(domain)->pdev_list, domain_list)
 
@@ -165,7 +181,9 @@  int pci_add_segment(u16 seg);
 const unsigned long *pci_get_ro_map(u16 seg);
 int pci_add_device(u16 seg, u8 bus, u8 devfn,
                    const struct pci_dev_info *, nodeid_t node);
+int pci_add_virtual_device(struct domain *d, const struct pci_dev *pdev);
 int pci_remove_device(u16 seg, u8 bus, u8 devfn);
+int pci_remove_virtual_device(struct domain *d, const struct pci_dev *pdev);
 int pci_ro_device(int seg, int bus, int devfn);
 int pci_hide_device(unsigned int seg, unsigned int bus, unsigned int devfn);
 struct pci_dev *pci_get_pdev(int seg, int bus, int devfn);
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 28146ee404e6..d304c7ebe766 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -444,6 +444,12 @@  struct domain
 
 #ifdef CONFIG_HAS_PCI
     struct list_head pdev_list;
+    struct list_head vdev_list;
+    /*
+     * Current device number used by the virtual PCI bus topology
+     * to assign a unique SBDF to a passed through virtual PCI device.
+     */
+    int vpci_dev_next;
 #endif
 
 #ifdef CONFIG_HAS_PASSTHROUGH