diff mbox series

[10/11] xen/arm: Do not map PCI ECAM space to Domain-0's p2m

Message ID 20210903083347.131786-11-andr2000@gmail.com (mailing list archive)
State Superseded
Headers show
Series PCI devices passthrough on Arm, part 2 | expand

Commit Message

Oleksandr Andrushchenko Sept. 3, 2021, 8:33 a.m. UTC
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>

Host bridge controller's ECAM space is mapped into Domain-0's p2m,
thus it is not possible to trap the same for vPCI via MMIO handlers.
For this to work we need to not map those while constructing the domain.

Note, that during Domain-0 creation there is no pci_dev yet allocated for
host bridges, thus we cannot match PCI host and its associated
bridge by SBDF. Use dt_device_node field for checks instead.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
---
 xen/arch/arm/domain_build.c        |  3 +++
 xen/arch/arm/pci/ecam.c            | 17 +++++++++++++++++
 xen/arch/arm/pci/pci-host-common.c | 22 ++++++++++++++++++++++
 xen/include/asm-arm/pci.h          | 12 ++++++++++++
 4 files changed, 54 insertions(+)

Comments

Julien Grall Sept. 9, 2021, 5:58 p.m. UTC | #1
Hi Oleksandr,

On 03/09/2021 09:33, Oleksandr Andrushchenko wrote:
> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> 
> Host bridge controller's ECAM space is mapped into Domain-0's p2m,
> thus it is not possible to trap the same for vPCI via MMIO handlers.
> For this to work we need to not map those while constructing the domain.
> 
> Note, that during Domain-0 creation there is no pci_dev yet allocated for
> host bridges, thus we cannot match PCI host and its associated
> bridge by SBDF. Use dt_device_node field for checks instead.
> 
> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> ---
>   xen/arch/arm/domain_build.c        |  3 +++
>   xen/arch/arm/pci/ecam.c            | 17 +++++++++++++++++
>   xen/arch/arm/pci/pci-host-common.c | 22 ++++++++++++++++++++++
>   xen/include/asm-arm/pci.h          | 12 ++++++++++++
>   4 files changed, 54 insertions(+)
> 
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index da427f399711..76f5b513280c 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -1257,6 +1257,9 @@ static int __init map_range_to_domain(const struct dt_device_node *dev,
>           }
>       }
>   
> +    if ( need_mapping && (device_get_class(dev) == DEVICE_PCI) ) > +        need_mapping = pci_host_bridge_need_p2m_mapping(d, dev, 
addr, len);

AFAICT, with device_get_class(dev), you know whether the hostbridge is 
used by Xen. Therefore, I would expect that we don't want to map all the 
regions of the hostbridges in dom0 (including the BARs).

Can you clarify it?

> + >       if ( need_mapping )
>       {
>           res = map_regions_p2mt(d,
> diff --git a/xen/arch/arm/pci/ecam.c b/xen/arch/arm/pci/ecam.c
> index 92ecb2e0762b..d32efb7fcbd0 100644
> --- a/xen/arch/arm/pci/ecam.c
> +++ b/xen/arch/arm/pci/ecam.c
> @@ -52,6 +52,22 @@ static int pci_ecam_register_mmio_handler(struct domain *d,
>       return 0;
>   }
>   
> +static int pci_ecam_need_p2m_mapping(struct domain *d,
> +                                     struct pci_host_bridge *bridge,
> +                                     uint64_t addr, uint64_t len)
> +{
> +    struct pci_config_window *cfg = bridge->sysdata;
> +
> +    if ( !is_hardware_domain(d) )
> +        return true;

I am a bit puzzled with this check. If the ECAM has been initialized by 
Xen, then I believe we cannot expose it to any domain at all.

> +
> +    /*
> +     * We do not want ECAM address space to be mapped in domain's p2m,
> +     * so we can trap access to it.
> +     */
> +    return cfg->phys_addr != addr;
> +}
> +
>   /* ECAM ops */
>   const struct pci_ecam_ops pci_generic_ecam_ops = {
>       .bus_shift  = 20,
> @@ -60,6 +76,7 @@ const struct pci_ecam_ops pci_generic_ecam_ops = {
>           .read                   = pci_generic_config_read,
>           .write                  = pci_generic_config_write,
>           .register_mmio_handler  = pci_ecam_register_mmio_handler,
> +        .need_p2m_mapping       = pci_ecam_need_p2m_mapping,
>       }
>   };
>   
> diff --git a/xen/arch/arm/pci/pci-host-common.c b/xen/arch/arm/pci/pci-host-common.c
> index a89112bfbb7c..c04be636452d 100644
> --- a/xen/arch/arm/pci/pci-host-common.c
> +++ b/xen/arch/arm/pci/pci-host-common.c
> @@ -334,6 +334,28 @@ int pci_host_iterate_bridges(struct domain *d,
>       }
>       return 0;
>   }
> +
> +bool pci_host_bridge_need_p2m_mapping(struct domain *d,
> +                                      const struct dt_device_node *node,
> +                                      uint64_t addr, uint64_t len)
> +{
> +    struct pci_host_bridge *bridge;
> +
> +    list_for_each_entry( bridge, &pci_host_bridges, node )
> +    {
> +        if ( bridge->dt_node != node )
> +            continue;
> +
> +        if ( !bridge->ops->need_p2m_mapping )
> +            return true;
> +
> +        return bridge->ops->need_p2m_mapping(d, bridge, addr, len);
> +    }
> +    printk(XENLOG_ERR "Unable to find PCI bridge for %s segment %d, addr %lx\n",
> +           node->full_name, bridge->segment, addr);
> +    return true;
> +}

If you really need to map the hostbridges, then I would suggest to defer 
the P2M mappings for all of them and then walk all the bridge in one go 
to do the mappings.

This would avoid going through the bridges every time during setup.

> +
>   /*
>    * Local variables:
>    * mode: C
> diff --git a/xen/include/asm-arm/pci.h b/xen/include/asm-arm/pci.h
> index 2c7c7649e00f..9c28a4bdc4b7 100644
> --- a/xen/include/asm-arm/pci.h
> +++ b/xen/include/asm-arm/pci.h
> @@ -82,6 +82,8 @@ struct pci_ops {
>       int (*register_mmio_handler)(struct domain *d,
>                                    struct pci_host_bridge *bridge,
>                                    const struct mmio_handler_ops *ops);
> +    int (*need_p2m_mapping)(struct domain *d, struct pci_host_bridge *bridge,
> +                            uint64_t addr, uint64_t len);
>   };
>   
>   /*
> @@ -115,9 +117,19 @@ struct dt_device_node *pci_find_host_bridge_node(struct device *dev);
>   int pci_host_iterate_bridges(struct domain *d,
>                                int (*clb)(struct domain *d,
>                                           struct pci_host_bridge *bridge));
> +bool pci_host_bridge_need_p2m_mapping(struct domain *d,
> +                                      const struct dt_device_node *node,
> +                                      uint64_t addr, uint64_t len);
>   #else   /*!CONFIG_HAS_PCI*/
>   
>   struct arch_pci_dev { };
>   
> +static inline bool
> +pci_host_bridge_need_p2m_mapping(struct domain *d,
> +                                 const struct dt_device_node *node,
> +                                 uint64_t addr, uint64_t len)
> +{
> +    return true;
> +}
>   #endif  /*!CONFIG_HAS_PCI*/
>   #endif /* __ARM_PCI_H__ */
> 

Cheers,
Oleksandr Andrushchenko Sept. 10, 2021, 12:37 p.m. UTC | #2
Hi, Julien!

On 09.09.21 20:58, Julien Grall wrote:
> Hi Oleksandr,
>
> On 03/09/2021 09:33, Oleksandr Andrushchenko wrote:
>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>
>> Host bridge controller's ECAM space is mapped into Domain-0's p2m,
>> thus it is not possible to trap the same for vPCI via MMIO handlers.
>> For this to work we need to not map those while constructing the domain.
>>
>> Note, that during Domain-0 creation there is no pci_dev yet allocated for
>> host bridges, thus we cannot match PCI host and its associated
>> bridge by SBDF. Use dt_device_node field for checks instead.
>>
>> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>> ---
>>   xen/arch/arm/domain_build.c        |  3 +++
>>   xen/arch/arm/pci/ecam.c            | 17 +++++++++++++++++
>>   xen/arch/arm/pci/pci-host-common.c | 22 ++++++++++++++++++++++
>>   xen/include/asm-arm/pci.h          | 12 ++++++++++++
>>   4 files changed, 54 insertions(+)
>>
>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>> index da427f399711..76f5b513280c 100644
>> --- a/xen/arch/arm/domain_build.c
>> +++ b/xen/arch/arm/domain_build.c
>> @@ -1257,6 +1257,9 @@ static int __init map_range_to_domain(const struct dt_device_node *dev,
>>           }
>>       }
>>   +    if ( need_mapping && (device_get_class(dev) == DEVICE_PCI) ) > +        need_mapping = pci_host_bridge_need_p2m_mapping(d, dev, 
> addr, len);
>
> AFAICT, with device_get_class(dev), you know whether the hostbridge is used by Xen. Therefore, I would expect that we don't want to map all the regions of the hostbridges in dom0 (including the BARs).
>
> Can you clarify it?
We only want to trap ECAM, not MMIOs and any other memory regions as the bridge is

initialized and used by Domain-0 completely. But at the same time we want to trap all

configuration space access, so we can see what is happening to it.

>
>> + >       if ( need_mapping )
>>       {
>>           res = map_regions_p2mt(d,
>> diff --git a/xen/arch/arm/pci/ecam.c b/xen/arch/arm/pci/ecam.c
>> index 92ecb2e0762b..d32efb7fcbd0 100644
>> --- a/xen/arch/arm/pci/ecam.c
>> +++ b/xen/arch/arm/pci/ecam.c
>> @@ -52,6 +52,22 @@ static int pci_ecam_register_mmio_handler(struct domain *d,
>>       return 0;
>>   }
>>   +static int pci_ecam_need_p2m_mapping(struct domain *d,
>> +                                     struct pci_host_bridge *bridge,
>> +                                     uint64_t addr, uint64_t len)
>> +{
>> +    struct pci_config_window *cfg = bridge->sysdata;
>> +
>> +    if ( !is_hardware_domain(d) )
>> +        return true;
>
> I am a bit puzzled with this check. If the ECAM has been initialized by Xen, then I believe we cannot expose it to any domain at all.
You are right, this check needs to be removed as this function is only called for Domain-0

>
>> +
>> +    /*
>> +     * We do not want ECAM address space to be mapped in domain's p2m,
>> +     * so we can trap access to it.
>> +     */
>> +    return cfg->phys_addr != addr;
>> +}
>> +
>>   /* ECAM ops */
>>   const struct pci_ecam_ops pci_generic_ecam_ops = {
>>       .bus_shift  = 20,
>> @@ -60,6 +76,7 @@ const struct pci_ecam_ops pci_generic_ecam_ops = {
>>           .read                   = pci_generic_config_read,
>>           .write                  = pci_generic_config_write,
>>           .register_mmio_handler  = pci_ecam_register_mmio_handler,
>> +        .need_p2m_mapping       = pci_ecam_need_p2m_mapping,
>>       }
>>   };
>>   diff --git a/xen/arch/arm/pci/pci-host-common.c b/xen/arch/arm/pci/pci-host-common.c
>> index a89112bfbb7c..c04be636452d 100644
>> --- a/xen/arch/arm/pci/pci-host-common.c
>> +++ b/xen/arch/arm/pci/pci-host-common.c
>> @@ -334,6 +334,28 @@ int pci_host_iterate_bridges(struct domain *d,
>>       }
>>       return 0;
>>   }
>> +
>> +bool pci_host_bridge_need_p2m_mapping(struct domain *d,
>> +                                      const struct dt_device_node *node,
>> +                                      uint64_t addr, uint64_t len)
>> +{
>> +    struct pci_host_bridge *bridge;
>> +
>> +    list_for_each_entry( bridge, &pci_host_bridges, node )
>> +    {
>> +        if ( bridge->dt_node != node )
>> +            continue;
>> +
>> +        if ( !bridge->ops->need_p2m_mapping )
>> +            return true;
>> +
>> +        return bridge->ops->need_p2m_mapping(d, bridge, addr, len);
>> +    }
>> +    printk(XENLOG_ERR "Unable to find PCI bridge for %s segment %d, addr %lx\n",
>> +           node->full_name, bridge->segment, addr);
>> +    return true;
>> +}
>
> If you really need to map the hostbridges, then I would suggest to defer the P2M mappings for all of them and then walk all the bridge in one go to do the mappings.
>
> This would avoid going through the bridges every time during setup.

Well, this can be done, but: my implementation prevents mappings and what

you suggest will require unmapping. So, the cost in my solution is we need

to go over all the bridges (until we find the one we need) and in what you

suggest we'll need to unmap what we have just mapped.

I think preventing unneeded mappings is cheaper than unmapping afterwards.

>
>> +
>>   /*
>>    * Local variables:
>>    * mode: C
>> diff --git a/xen/include/asm-arm/pci.h b/xen/include/asm-arm/pci.h
>> index 2c7c7649e00f..9c28a4bdc4b7 100644
>> --- a/xen/include/asm-arm/pci.h
>> +++ b/xen/include/asm-arm/pci.h
>> @@ -82,6 +82,8 @@ struct pci_ops {
>>       int (*register_mmio_handler)(struct domain *d,
>>                                    struct pci_host_bridge *bridge,
>>                                    const struct mmio_handler_ops *ops);
>> +    int (*need_p2m_mapping)(struct domain *d, struct pci_host_bridge *bridge,
>> +                            uint64_t addr, uint64_t len);
>>   };
>>     /*
>> @@ -115,9 +117,19 @@ struct dt_device_node *pci_find_host_bridge_node(struct device *dev);
>>   int pci_host_iterate_bridges(struct domain *d,
>>                                int (*clb)(struct domain *d,
>>                                           struct pci_host_bridge *bridge));
>> +bool pci_host_bridge_need_p2m_mapping(struct domain *d,
>> +                                      const struct dt_device_node *node,
>> +                                      uint64_t addr, uint64_t len);
>>   #else   /*!CONFIG_HAS_PCI*/
>>     struct arch_pci_dev { };
>>   +static inline bool
>> +pci_host_bridge_need_p2m_mapping(struct domain *d,
>> +                                 const struct dt_device_node *node,
>> +                                 uint64_t addr, uint64_t len)
>> +{
>> +    return true;
>> +}
>>   #endif  /*!CONFIG_HAS_PCI*/
>>   #endif /* __ARM_PCI_H__ */
>>
>
> Cheers,
>
Thank you,

Oleksandr
Julien Grall Sept. 10, 2021, 1:18 p.m. UTC | #3
On 10/09/2021 13:37, Oleksandr Andrushchenko wrote:
> Hi, Julien!

Hi Oleksandr,

> On 09.09.21 20:58, Julien Grall wrote:
>> On 03/09/2021 09:33, Oleksandr Andrushchenko wrote:
>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>>
>>> Host bridge controller's ECAM space is mapped into Domain-0's p2m,
>>> thus it is not possible to trap the same for vPCI via MMIO handlers.
>>> For this to work we need to not map those while constructing the domain.
>>>
>>> Note, that during Domain-0 creation there is no pci_dev yet allocated for
>>> host bridges, thus we cannot match PCI host and its associated
>>> bridge by SBDF. Use dt_device_node field for checks instead.
>>>
>>> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>> ---
>>>    xen/arch/arm/domain_build.c        |  3 +++
>>>    xen/arch/arm/pci/ecam.c            | 17 +++++++++++++++++
>>>    xen/arch/arm/pci/pci-host-common.c | 22 ++++++++++++++++++++++
>>>    xen/include/asm-arm/pci.h          | 12 ++++++++++++
>>>    4 files changed, 54 insertions(+)
>>>
>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>>> index da427f399711..76f5b513280c 100644
>>> --- a/xen/arch/arm/domain_build.c
>>> +++ b/xen/arch/arm/domain_build.c
>>> @@ -1257,6 +1257,9 @@ static int __init map_range_to_domain(const struct dt_device_node *dev,
>>>            }
>>>        }
>>>    +    if ( need_mapping && (device_get_class(dev) == DEVICE_PCI) ) > +        need_mapping = pci_host_bridge_need_p2m_mapping(d, dev,
>> addr, len);
>>
>> AFAICT, with device_get_class(dev), you know whether the hostbridge is used by Xen. Therefore, I would expect that we don't want to map all the regions of the hostbridges in dom0 (including the BARs).
>>
>> Can you clarify it?
> We only want to trap ECAM, not MMIOs and any other memory regions as the bridge is
> 
> initialized and used by Domain-0 completely. 

What do you mean by "used by Domain-0 completely"? The hostbridge is 
owned by Xen so I don't think we can let dom0 access any MMIO regions by
default.

In particular, we may want to hide a device from dom0 for security 
reasons. This is not going to be possible if you map by default 
everything to dom0.

Instead, the BARs should be mapped on demand when dom0 when we trap 
access to the configuration space.

For other regions, could you provide an example of what you are 
referring too?

>>> +
>>> +    /*
>>> +     * We do not want ECAM address space to be mapped in domain's p2m,
>>> +     * so we can trap access to it.
>>> +     */
>>> +    return cfg->phys_addr != addr;
>>> +}
>>> +
>>>    /* ECAM ops */
>>>    const struct pci_ecam_ops pci_generic_ecam_ops = {
>>>        .bus_shift  = 20,
>>> @@ -60,6 +76,7 @@ const struct pci_ecam_ops pci_generic_ecam_ops = {
>>>            .read                   = pci_generic_config_read,
>>>            .write                  = pci_generic_config_write,
>>>            .register_mmio_handler  = pci_ecam_register_mmio_handler,
>>> +        .need_p2m_mapping       = pci_ecam_need_p2m_mapping,
>>>        }
>>>    };
>>>    diff --git a/xen/arch/arm/pci/pci-host-common.c b/xen/arch/arm/pci/pci-host-common.c
>>> index a89112bfbb7c..c04be636452d 100644
>>> --- a/xen/arch/arm/pci/pci-host-common.c
>>> +++ b/xen/arch/arm/pci/pci-host-common.c
>>> @@ -334,6 +334,28 @@ int pci_host_iterate_bridges(struct domain *d,
>>>        }
>>>        return 0;
>>>    }
>>> +
>>> +bool pci_host_bridge_need_p2m_mapping(struct domain *d,
>>> +                                      const struct dt_device_node *node,
>>> +                                      uint64_t addr, uint64_t len)
>>> +{
>>> +    struct pci_host_bridge *bridge;
>>> +
>>> +    list_for_each_entry( bridge, &pci_host_bridges, node )
>>> +    {
>>> +        if ( bridge->dt_node != node )
>>> +            continue;
>>> +
>>> +        if ( !bridge->ops->need_p2m_mapping )
>>> +            return true;
>>> +
>>> +        return bridge->ops->need_p2m_mapping(d, bridge, addr, len);
>>> +    }
>>> +    printk(XENLOG_ERR "Unable to find PCI bridge for %s segment %d, addr %lx\n",
>>> +           node->full_name, bridge->segment, addr);
>>> +    return true;
>>> +}
>>
>> If you really need to map the hostbridges, then I would suggest to defer the P2M mappings for all of them and then walk all the bridge in one go to do the mappings.
>>
>> This would avoid going through the bridges every time during setup.
> 
> Well, this can be done, but: my implementation prevents mappings and what
> 
> you suggest will require unmapping. So, the cost in my solution is we need
> 
> to go over all the bridges (until we find the one we need) and in what you
> 
> suggest we'll need to unmap what we have just mapped.
> 
> I think preventing unneeded mappings is cheaper than unmapping afterwards.

I think you misundertood what I am suggesting. What I said is you could 
defer the mappings (IOW not do the mapping) until later for the 
hostbridges. And then you can walk all the hostbridges to decide how to 
map them.

The regions will only mapped once and never be unmapped.

Cheers,
Oleksandr Andrushchenko Sept. 10, 2021, 2:01 p.m. UTC | #4
On 10.09.21 16:18, Julien Grall wrote:
>
>
> On 10/09/2021 13:37, Oleksandr Andrushchenko wrote:
>> Hi, Julien!
>
> Hi Oleksandr,
>
>> On 09.09.21 20:58, Julien Grall wrote:
>>> On 03/09/2021 09:33, Oleksandr Andrushchenko wrote:
>>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>>>
>>>> Host bridge controller's ECAM space is mapped into Domain-0's p2m,
>>>> thus it is not possible to trap the same for vPCI via MMIO handlers.
>>>> For this to work we need to not map those while constructing the domain.
>>>>
>>>> Note, that during Domain-0 creation there is no pci_dev yet allocated for
>>>> host bridges, thus we cannot match PCI host and its associated
>>>> bridge by SBDF. Use dt_device_node field for checks instead.
>>>>
>>>> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>>> ---
>>>>    xen/arch/arm/domain_build.c        |  3 +++
>>>>    xen/arch/arm/pci/ecam.c            | 17 +++++++++++++++++
>>>>    xen/arch/arm/pci/pci-host-common.c | 22 ++++++++++++++++++++++
>>>>    xen/include/asm-arm/pci.h          | 12 ++++++++++++
>>>>    4 files changed, 54 insertions(+)
>>>>
>>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>>>> index da427f399711..76f5b513280c 100644
>>>> --- a/xen/arch/arm/domain_build.c
>>>> +++ b/xen/arch/arm/domain_build.c
>>>> @@ -1257,6 +1257,9 @@ static int __init map_range_to_domain(const struct dt_device_node *dev,
>>>>            }
>>>>        }
>>>>    +    if ( need_mapping && (device_get_class(dev) == DEVICE_PCI) ) > +        need_mapping = pci_host_bridge_need_p2m_mapping(d, dev,
>>> addr, len);
>>>
>>> AFAICT, with device_get_class(dev), you know whether the hostbridge is used by Xen. Therefore, I would expect that we don't want to map all the regions of the hostbridges in dom0 (including the BARs).
>>>
>>> Can you clarify it?
>> We only want to trap ECAM, not MMIOs and any other memory regions as the bridge is
>>
>> initialized and used by Domain-0 completely. 
>
> What do you mean by "used by Domain-0 completely"? The hostbridge is owned by Xen so I don't think we can let dom0 access any MMIO regions by
> default.

Now it's my time to ask why do you think Xen owns the bridge?

All the bridges are passed to Domain-0, are fully visible to Domain-0, initialized in Domain-0.

Enumeration etc. is done in Domain-0. So how comes that Xen owns the bridge? In what way it does?

Xen just accesses the ECAM when it needs it, but that's it. Xen traps ECAM access - that is true.

But it in no way uses MMIOs etc. of the bridge - those under direct control of Domain-0

>
> In particular, we may want to hide a device from dom0 for security reasons. This is not going to be possible if you map by default everything to dom0.

Then the bridge most probably will become unusable as we do not have relevant drivers in Xen.

At best we may rely on the firmware doing the initialization for us, then yes, we can control an ECAM bridge in Xen.

But this is not the case now: we rely on Domain-0 to initialize and setup the bridge

>
> Instead, the BARs should be mapped on demand when dom0 when we trap access to the configuration space.
>
> For other regions, could you provide an example of what you are referring too?
Registers of the bridge for example, all what is listed in "reg" property
>
>>>> +
>>>> +    /*
>>>> +     * We do not want ECAM address space to be mapped in domain's p2m,
>>>> +     * so we can trap access to it.
>>>> +     */
>>>> +    return cfg->phys_addr != addr;
>>>> +}
>>>> +
>>>>    /* ECAM ops */
>>>>    const struct pci_ecam_ops pci_generic_ecam_ops = {
>>>>        .bus_shift  = 20,
>>>> @@ -60,6 +76,7 @@ const struct pci_ecam_ops pci_generic_ecam_ops = {
>>>>            .read                   = pci_generic_config_read,
>>>>            .write                  = pci_generic_config_write,
>>>>            .register_mmio_handler  = pci_ecam_register_mmio_handler,
>>>> +        .need_p2m_mapping       = pci_ecam_need_p2m_mapping,
>>>>        }
>>>>    };
>>>>    diff --git a/xen/arch/arm/pci/pci-host-common.c b/xen/arch/arm/pci/pci-host-common.c
>>>> index a89112bfbb7c..c04be636452d 100644
>>>> --- a/xen/arch/arm/pci/pci-host-common.c
>>>> +++ b/xen/arch/arm/pci/pci-host-common.c
>>>> @@ -334,6 +334,28 @@ int pci_host_iterate_bridges(struct domain *d,
>>>>        }
>>>>        return 0;
>>>>    }
>>>> +
>>>> +bool pci_host_bridge_need_p2m_mapping(struct domain *d,
>>>> +                                      const struct dt_device_node *node,
>>>> +                                      uint64_t addr, uint64_t len)
>>>> +{
>>>> +    struct pci_host_bridge *bridge;
>>>> +
>>>> +    list_for_each_entry( bridge, &pci_host_bridges, node )
>>>> +    {
>>>> +        if ( bridge->dt_node != node )
>>>> +            continue;
>>>> +
>>>> +        if ( !bridge->ops->need_p2m_mapping )
>>>> +            return true;
>>>> +
>>>> +        return bridge->ops->need_p2m_mapping(d, bridge, addr, len);
>>>> +    }
>>>> +    printk(XENLOG_ERR "Unable to find PCI bridge for %s segment %d, addr %lx\n",
>>>> +           node->full_name, bridge->segment, addr);
>>>> +    return true;
>>>> +}
>>>
>>> If you really need to map the hostbridges, then I would suggest to defer the P2M mappings for all of them and then walk all the bridge in one go to do the mappings.
>>>
>>> This would avoid going through the bridges every time during setup.
>>
>> Well, this can be done, but: my implementation prevents mappings and what
>>
>> you suggest will require unmapping. So, the cost in my solution is we need
>>
>> to go over all the bridges (until we find the one we need) and in what you
>>
>> suggest we'll need to unmap what we have just mapped.
>>
>> I think preventing unneeded mappings is cheaper than unmapping afterwards.
>
> I think you misundertood what I am suggesting. What I said is you could defer the mappings (IOW not do the mapping) until later for the hostbridges.

For each device tree device we call

static int __init map_range_to_domain(const struct dt_device_node *dev,
                                       u64 addr, u64 len,
                                       void *data)
which will call

         res = map_regions_p2mt(d,
So, ECAM will be mapped and then we'll need to unmap it

> And then you can walk all the hostbridges to decide how to map them.
We don't want to map ECAM, we want to trap it
>
> The regions will only mapped once and never be unmapped.
>
> Cheers,
>
Thank you,

Oleksandr
Julien Grall Sept. 10, 2021, 2:18 p.m. UTC | #5
On 10/09/2021 15:01, Oleksandr Andrushchenko wrote:
> 
> On 10.09.21 16:18, Julien Grall wrote:
>>
>>
>> On 10/09/2021 13:37, Oleksandr Andrushchenko wrote:
>>> Hi, Julien!
>>
>> Hi Oleksandr,
>>
>>> On 09.09.21 20:58, Julien Grall wrote:
>>>> On 03/09/2021 09:33, Oleksandr Andrushchenko wrote:
>>>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>>>>
>>>>> Host bridge controller's ECAM space is mapped into Domain-0's p2m,
>>>>> thus it is not possible to trap the same for vPCI via MMIO handlers.
>>>>> For this to work we need to not map those while constructing the domain.
>>>>>
>>>>> Note, that during Domain-0 creation there is no pci_dev yet allocated for
>>>>> host bridges, thus we cannot match PCI host and its associated
>>>>> bridge by SBDF. Use dt_device_node field for checks instead.
>>>>>
>>>>> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>>>> ---
>>>>>     xen/arch/arm/domain_build.c        |  3 +++
>>>>>     xen/arch/arm/pci/ecam.c            | 17 +++++++++++++++++
>>>>>     xen/arch/arm/pci/pci-host-common.c | 22 ++++++++++++++++++++++
>>>>>     xen/include/asm-arm/pci.h          | 12 ++++++++++++
>>>>>     4 files changed, 54 insertions(+)
>>>>>
>>>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>>>>> index da427f399711..76f5b513280c 100644
>>>>> --- a/xen/arch/arm/domain_build.c
>>>>> +++ b/xen/arch/arm/domain_build.c
>>>>> @@ -1257,6 +1257,9 @@ static int __init map_range_to_domain(const struct dt_device_node *dev,
>>>>>             }
>>>>>         }
>>>>>     +    if ( need_mapping && (device_get_class(dev) == DEVICE_PCI) ) > +        need_mapping = pci_host_bridge_need_p2m_mapping(d, dev,
>>>> addr, len);
>>>>
>>>> AFAICT, with device_get_class(dev), you know whether the hostbridge is used by Xen. Therefore, I would expect that we don't want to map all the regions of the hostbridges in dom0 (including the BARs).
>>>>
>>>> Can you clarify it?
>>> We only want to trap ECAM, not MMIOs and any other memory regions as the bridge is
>>>
>>> initialized and used by Domain-0 completely.
>>
>> What do you mean by "used by Domain-0 completely"? The hostbridge is owned by Xen so I don't think we can let dom0 access any MMIO regions by
>> default.
> 
> Now it's my time to ask why do you think Xen owns the bridge?

Because nothing in this series indicates either way. So I assumed this 
should be owned by Xen because it will drive it.

 From what you wrote below, it sounds like this is not the case. I think 
you want to have a design document sent along the series so we can 
easily know what's the expected design and validate that the code match 
the agreement.

There was already a design document sent a few months ago. So you may 
want to refresh it (if needed) and send it again with this series.

> 
> All the bridges are passed to Domain-0, are fully visible to Domain-0, initialized in Domain-0.
> 
> Enumeration etc. is done in Domain-0. So how comes that Xen owns the bridge? In what way it does?
> 
> Xen just accesses the ECAM when it needs it, but that's it. Xen traps ECAM access - that is true.
> 
> But it in no way uses MMIOs etc. of the bridge - those under direct control of Domain-0
> 
>>
>> In particular, we may want to hide a device from dom0 for security reasons. This is not going to be possible if you map by default everything to dom0.
> 
> Then the bridge most probably will become unusable as we do not have relevant drivers in Xen.
> 
> At best we may rely on the firmware doing the initialization for us, then yes, we can control an ECAM bridge in Xen.
> 
> But this is not the case now: we rely on Domain-0 to initialize and setup the bridge
> 
>>
>> Instead, the BARs should be mapped on demand when dom0 when we trap access to the configuration space.
>>
>> For other regions, could you provide an example of what you are referring too?
> Registers of the bridge for example, all what is listed in "reg" property
>>
>>>>> +
>>>>> +    /*
>>>>> +     * We do not want ECAM address space to be mapped in domain's p2m,
>>>>> +     * so we can trap access to it.
>>>>> +     */
>>>>> +    return cfg->phys_addr != addr;
>>>>> +}
>>>>> +
>>>>>     /* ECAM ops */
>>>>>     const struct pci_ecam_ops pci_generic_ecam_ops = {
>>>>>         .bus_shift  = 20,
>>>>> @@ -60,6 +76,7 @@ const struct pci_ecam_ops pci_generic_ecam_ops = {
>>>>>             .read                   = pci_generic_config_read,
>>>>>             .write                  = pci_generic_config_write,
>>>>>             .register_mmio_handler  = pci_ecam_register_mmio_handler,
>>>>> +        .need_p2m_mapping       = pci_ecam_need_p2m_mapping,
>>>>>         }
>>>>>     };
>>>>>     diff --git a/xen/arch/arm/pci/pci-host-common.c b/xen/arch/arm/pci/pci-host-common.c
>>>>> index a89112bfbb7c..c04be636452d 100644
>>>>> --- a/xen/arch/arm/pci/pci-host-common.c
>>>>> +++ b/xen/arch/arm/pci/pci-host-common.c
>>>>> @@ -334,6 +334,28 @@ int pci_host_iterate_bridges(struct domain *d,
>>>>>         }
>>>>>         return 0;
>>>>>     }
>>>>> +
>>>>> +bool pci_host_bridge_need_p2m_mapping(struct domain *d,
>>>>> +                                      const struct dt_device_node *node,
>>>>> +                                      uint64_t addr, uint64_t len)
>>>>> +{
>>>>> +    struct pci_host_bridge *bridge;
>>>>> +
>>>>> +    list_for_each_entry( bridge, &pci_host_bridges, node )
>>>>> +    {
>>>>> +        if ( bridge->dt_node != node )
>>>>> +            continue;
>>>>> +
>>>>> +        if ( !bridge->ops->need_p2m_mapping )
>>>>> +            return true;
>>>>> +
>>>>> +        return bridge->ops->need_p2m_mapping(d, bridge, addr, len);
>>>>> +    }
>>>>> +    printk(XENLOG_ERR "Unable to find PCI bridge for %s segment %d, addr %lx\n",
>>>>> +           node->full_name, bridge->segment, addr);
>>>>> +    return true;
>>>>> +}
>>>>
>>>> If you really need to map the hostbridges, then I would suggest to defer the P2M mappings for all of them and then walk all the bridge in one go to do the mappings.
>>>>
>>>> This would avoid going through the bridges every time during setup.
>>>
>>> Well, this can be done, but: my implementation prevents mappings and what
>>>
>>> you suggest will require unmapping. So, the cost in my solution is we need
>>>
>>> to go over all the bridges (until we find the one we need) and in what you
>>>
>>> suggest we'll need to unmap what we have just mapped.
>>>
>>> I think preventing unneeded mappings is cheaper than unmapping afterwards.
>>
>> I think you misundertood what I am suggesting. What I said is you could defer the mappings (IOW not do the mapping) until later for the hostbridges.
> 
> For each device tree device we call
> 
> static int __init map_range_to_domain(const struct dt_device_node *dev,
>                                         u64 addr, u64 len,
>                                         void *data)
> which will call
> 
>           res = map_regions_p2mt(d,
> So, ECAM will be mapped and then we'll need to unmap it

Well yes in the current code. But I specifically wrote "defer" as in 
adding a check "if hostbridge then return 0".

> 
>> And then you can walk all the hostbridges to decide how to map them.
> We don't want to map ECAM, we want to trap it

I know that... But as you wrote above, you want to map other regions of 
the hostbridge. So if you defer *all* the mappings for the hostbridge, 
then you can walk the hostbridges and check decide which regions should 
be mapped.

Cheers,
Oleksandr Andrushchenko Sept. 10, 2021, 2:38 p.m. UTC | #6
[snip]


>>> What do you mean by "used by Domain-0 completely"? The hostbridge is owned by Xen so I don't think we can let dom0 access any MMIO regions by
>>> default.
>>
>> Now it's my time to ask why do you think Xen owns the bridge?
>
> Because nothing in this series indicates either way. So I assumed this should be owned by Xen because it will drive it.
>
> From what you wrote below, it sounds like this is not the case. I think you want to have a design document sent along the series so we can easily know what's the expected design and validate that the code match the agreement.
>
> There was already a design document sent a few months ago. So you may want to refresh it (if needed) and send it again with this series.
>
Please see [1] which is the design document we use to implement PCI passthrough on Arm.

For your convenience:

"

# Problem statement:
[snip]

Only Dom0 and Xen will have access to the real PCI bus,​ guest will have a
direct access to the assigned device itself​. IOMEM memory will be mapped to
the guest ​and interrupt will be redirected to the guest. SMMU has to be
configured correctly to have DMA transaction."

"

# Discovering PCI Host Bridge in XEN:

In order to support the PCI passthrough XEN should be aware of all the PCI host
bridges available on the system and should be able to access the PCI
configuration space. ECAM configuration access is supported as of now. XEN
during boot will read the PCI device tree node “reg” property and will map the
ECAM space to the XEN memory using the “ioremap_nocache ()” function.

[snip]

When Dom0 tries to access the PCI config space of the device, XEN will find the
corresponding host bridge based on segment number and access the corresponding
config space assigned to that bridge.

Limitation:
* Only PCI ECAM configuration space access is supported.
* Device tree binding is supported as of now, ACPI is not supported.
* Need to port the PCI host bridge access code to XEN to access the
configuration space (generic one works but lots of platforms will required
some specific code or quirks).

"

Unfortunately the document had not been updated since then, but the question

being discussed has not changed in the design: Domain-0 has full access to the bridge,

Xen traps ECAM. (I am taking dom0less aside as that was not the target for the first phase)

Thank you,

Oleksandr

[1] https://lists.xenproject.org/archives/html/xen-devel/2020-07/msg00777.html
Julien Grall Sept. 10, 2021, 2:52 p.m. UTC | #7
On 10/09/2021 15:38, Oleksandr Andrushchenko wrote:
> [snip]
> 
> 
>>>> What do you mean by "used by Domain-0 completely"? The hostbridge is owned by Xen so I don't think we can let dom0 access any MMIO regions by
>>>> default.
>>>
>>> Now it's my time to ask why do you think Xen owns the bridge?
>>
>> Because nothing in this series indicates either way. So I assumed this should be owned by Xen because it will drive it.
>>
>>  From what you wrote below, it sounds like this is not the case. I think you want to have a design document sent along the series so we can easily know what's the expected design and validate that the code match the agreement.
>>
>> There was already a design document sent a few months ago. So you may want to refresh it (if needed) and send it again with this series.
>>
> Please see [1] which is the design document we use to implement PCI passthrough on Arm.

Thank. Can you make sure to include at least in a link in your cover 
letter next time?

> 
> For your convenience:
> 
> "
> 
> # Problem statement:
> [snip]
> 
> Only Dom0 and Xen will have access to the real PCI bus,​ guest will have a
> direct access to the assigned device itself​. IOMEM memory will be mapped to
> the guest ​and interrupt will be redirected to the guest. SMMU has to be
> configured correctly to have DMA transaction."
> 
> "
> 
> # Discovering PCI Host Bridge in XEN:
> 
> In order to support the PCI passthrough XEN should be aware of all the PCI host
> bridges available on the system and should be able to access the PCI
> configuration space. ECAM configuration access is supported as of now. XEN
> during boot will read the PCI device tree node “reg” property and will map the
> ECAM space to the XEN memory using the “ioremap_nocache ()” function.
> 
> [snip]
> 
> When Dom0 tries to access the PCI config space of the device, XEN will find the
> corresponding host bridge based on segment number and access the corresponding
> config space assigned to that bridge.
> 
> Limitation:
> * Only PCI ECAM configuration space access is supported.
> * Device tree binding is supported as of now, ACPI is not supported.
> * Need to port the PCI host bridge access code to XEN to access the
> configuration space (generic one works but lots of platforms will required
> some specific code or quirks).
> 
> "
> 
> Unfortunately the document had not been updated since then, but the question
> 
> being discussed has not changed in the design: Domain-0 has full access to the bridge,
> 
> Xen traps ECAM. (I am taking dom0less aside as that was not the target for the first phase)

Having an update design document is quite important. This will allow 
reviewer to comment easily on overall approach and speed up the review 
as we can match to the agreed approach.

So can this please be updated and sent along the work?

In addition to that, it feels to me that the commit message should 
contain a summary of what you just pasted as this helps understanding 
the goal and approach of this patch.

Cheers,
Oleksandr Andrushchenko Sept. 10, 2021, 3:01 p.m. UTC | #8
On 10.09.21 17:52, Julien Grall wrote:
>
>
> On 10/09/2021 15:38, Oleksandr Andrushchenko wrote:
>> [snip]
>>
>>
>>>>> What do you mean by "used by Domain-0 completely"? The hostbridge is owned by Xen so I don't think we can let dom0 access any MMIO regions by
>>>>> default.
>>>>
>>>> Now it's my time to ask why do you think Xen owns the bridge?
>>>
>>> Because nothing in this series indicates either way. So I assumed this should be owned by Xen because it will drive it.
>>>
>>>  From what you wrote below, it sounds like this is not the case. I think you want to have a design document sent along the series so we can easily know what's the expected design and validate that the code match the agreement.
>>>
>>> There was already a design document sent a few months ago. So you may want to refresh it (if needed) and send it again with this series.
>>>
>> Please see [1] which is the design document we use to implement PCI passthrough on Arm.
>
> Thank. Can you make sure to include at least in a link in your cover letter next time?
I will update the commit message to have more description on the design aspects
>
>>
>> For your convenience:
>>
>> "
>>
>> # Problem statement:
>> [snip]
>>
>> Only Dom0 and Xen will have access to the real PCI bus,​ guest will have a
>> direct access to the assigned device itself​. IOMEM memory will be mapped to
>> the guest ​and interrupt will be redirected to the guest. SMMU has to be
>> configured correctly to have DMA transaction."
>>
>> "
>>
>> # Discovering PCI Host Bridge in XEN:
>>
>> In order to support the PCI passthrough XEN should be aware of all the PCI host
>> bridges available on the system and should be able to access the PCI
>> configuration space. ECAM configuration access is supported as of now. XEN
>> during boot will read the PCI device tree node “reg” property and will map the
>> ECAM space to the XEN memory using the “ioremap_nocache ()” function.
>>
>> [snip]
>>
>> When Dom0 tries to access the PCI config space of the device, XEN will find the
>> corresponding host bridge based on segment number and access the corresponding
>> config space assigned to that bridge.
>>
>> Limitation:
>> * Only PCI ECAM configuration space access is supported.
>> * Device tree binding is supported as of now, ACPI is not supported.
>> * Need to port the PCI host bridge access code to XEN to access the
>> configuration space (generic one works but lots of platforms will required
>> some specific code or quirks).
>>
>> "
>>
>> Unfortunately the document had not been updated since then, but the question
>>
>> being discussed has not changed in the design: Domain-0 has full access to the bridge,
>>
>> Xen traps ECAM. (I am taking dom0less aside as that was not the target for the first phase)
>
> Having an update design document is quite important. This will allow reviewer to comment easily on overall approach and speed up the review as we can match to the agreed approach.
>
> So can this please be updated and sent along the work?
>
> In addition to that, it feels to me that the commit message should contain a summary of what you just pasted as this helps understanding the goal and approach of this patch.
>
If we are on the same page now will you be able to review the patch

with respect to the design from RFC?

> Cheers,
>
Thank you,

Oleksandr
Julien Grall Sept. 10, 2021, 3:04 p.m. UTC | #9
Hi Oleksandr,

On 10/09/2021 15:01, Oleksandr Andrushchenko wrote:
> 
> On 10.09.21 16:18, Julien Grall wrote:
>>
>>
>> On 10/09/2021 13:37, Oleksandr Andrushchenko wrote:
>>> Hi, Julien!
>>
>> Hi Oleksandr,
>>
>>> On 09.09.21 20:58, Julien Grall wrote:
>>>> On 03/09/2021 09:33, Oleksandr Andrushchenko wrote:
>>>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>>>>
>>>>> Host bridge controller's ECAM space is mapped into Domain-0's p2m,
>>>>> thus it is not possible to trap the same for vPCI via MMIO handlers.
>>>>> For this to work we need to not map those while constructing the domain.
>>>>>
>>>>> Note, that during Domain-0 creation there is no pci_dev yet allocated for
>>>>> host bridges, thus we cannot match PCI host and its associated
>>>>> bridge by SBDF. Use dt_device_node field for checks instead.
>>>>>
>>>>> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>>>>> ---
>>>>>     xen/arch/arm/domain_build.c        |  3 +++
>>>>>     xen/arch/arm/pci/ecam.c            | 17 +++++++++++++++++
>>>>>     xen/arch/arm/pci/pci-host-common.c | 22 ++++++++++++++++++++++
>>>>>     xen/include/asm-arm/pci.h          | 12 ++++++++++++
>>>>>     4 files changed, 54 insertions(+)
>>>>>
>>>>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>>>>> index da427f399711..76f5b513280c 100644
>>>>> --- a/xen/arch/arm/domain_build.c
>>>>> +++ b/xen/arch/arm/domain_build.c
>>>>> @@ -1257,6 +1257,9 @@ static int __init map_range_to_domain(const struct dt_device_node *dev,
>>>>>             }
>>>>>         }
>>>>>     +    if ( need_mapping && (device_get_class(dev) == DEVICE_PCI) ) > +        need_mapping = pci_host_bridge_need_p2m_mapping(d, dev,
>>>> addr, len);
>>>>
>>>> AFAICT, with device_get_class(dev), you know whether the hostbridge is used by Xen. Therefore, I would expect that we don't want to map all the regions of the hostbridges in dom0 (including the BARs).
>>>>
>>>> Can you clarify it?
>>> We only want to trap ECAM, not MMIOs and any other memory regions as the bridge is
>>>
>>> initialized and used by Domain-0 completely.
>>
>> What do you mean by "used by Domain-0 completely"? The hostbridge is owned by Xen so I don't think we can let dom0 access any MMIO regions by
>> default.
> 
> Now it's my time to ask why do you think Xen owns the bridge?
> 
> All the bridges are passed to Domain-0, are fully visible to Domain-0, initialized in Domain-0.
> 
> Enumeration etc. is done in Domain-0. So how comes that Xen owns the bridge? In what way it does?
> 
> Xen just accesses the ECAM when it needs it, but that's it. Xen traps ECAM access - that is true.
> 
> But it in no way uses MMIOs etc. of the bridge - those under direct control of Domain-0

So I looked on the snipped of the design document you posted. I think 
you are instead referring to a different part:

" PCI-PCIe enumeration is a process of detecting devices connected to 
its host.
It is the responsibility of the hardware domain or boot firmware to do 
the PCI enumeration and configure the BAR, PCI capabilities, and 
MSI/MSI-X configuration."

But I still don't see why it means we have to map the MMIOs right now. 
Dom0 should not need to access the MMIOs (aside the hostbridge 
registers) until the BARs are configured.

You can do the mapping when the BAR is accessed. In fact, AFAICT, there 
are code to do the identity mapping in the vPCI. So to me, the mappings 
for the MMIOs are rather pointless if not confusing.

>>
>> In particular, we may want to hide a device from dom0 for security reasons. This is not going to be possible if you map by default everything to dom0.
> 
> Then the bridge most probably will become unusable as we do not have relevant drivers in Xen.
> 
> At best we may rely on the firmware doing the initialization for us, then yes, we can control an ECAM bridge in Xen.
> 
> But this is not the case now: we rely on Domain-0 to initialize and setup the bridge
> 
>>
>> Instead, the BARs should be mapped on demand when dom0 when we trap access to the configuration space.
>>
>> For other regions, could you provide an example of what you are referring too?
> Registers of the bridge for example, all what is listed in "reg" property

I was more after a real life example.

Cheers,
Julien Grall Sept. 10, 2021, 3:05 p.m. UTC | #10
Hi Oleksandr,

On 10/09/2021 16:01, Oleksandr Andrushchenko wrote:
> 
> On 10.09.21 17:52, Julien Grall wrote:
>>
>>
>> On 10/09/2021 15:38, Oleksandr Andrushchenko wrote:
>>> [snip]
>>>
>>>
>>>>>> What do you mean by "used by Domain-0 completely"? The hostbridge is owned by Xen so I don't think we can let dom0 access any MMIO regions by
>>>>>> default.
>>>>>
>>>>> Now it's my time to ask why do you think Xen owns the bridge?
>>>>
>>>> Because nothing in this series indicates either way. So I assumed this should be owned by Xen because it will drive it.
>>>>
>>>>   From what you wrote below, it sounds like this is not the case. I think you want to have a design document sent along the series so we can easily know what's the expected design and validate that the code match the agreement.
>>>>
>>>> There was already a design document sent a few months ago. So you may want to refresh it (if needed) and send it again with this series.
>>>>
>>> Please see [1] which is the design document we use to implement PCI passthrough on Arm.
>>
>> Thank. Can you make sure to include at least in a link in your cover letter next time?
> I will update the commit message to have more description on the design aspects
>>
>>>
>>> For your convenience:
>>>
>>> "
>>>
>>> # Problem statement:
>>> [snip]
>>>
>>> Only Dom0 and Xen will have access to the real PCI bus,​ guest will have a
>>> direct access to the assigned device itself​. IOMEM memory will be mapped to
>>> the guest ​and interrupt will be redirected to the guest. SMMU has to be
>>> configured correctly to have DMA transaction."
>>>
>>> "
>>>
>>> # Discovering PCI Host Bridge in XEN:
>>>
>>> In order to support the PCI passthrough XEN should be aware of all the PCI host
>>> bridges available on the system and should be able to access the PCI
>>> configuration space. ECAM configuration access is supported as of now. XEN
>>> during boot will read the PCI device tree node “reg” property and will map the
>>> ECAM space to the XEN memory using the “ioremap_nocache ()” function.
>>>
>>> [snip]
>>>
>>> When Dom0 tries to access the PCI config space of the device, XEN will find the
>>> corresponding host bridge based on segment number and access the corresponding
>>> config space assigned to that bridge.
>>>
>>> Limitation:
>>> * Only PCI ECAM configuration space access is supported.
>>> * Device tree binding is supported as of now, ACPI is not supported.
>>> * Need to port the PCI host bridge access code to XEN to access the
>>> configuration space (generic one works but lots of platforms will required
>>> some specific code or quirks).
>>>
>>> "
>>>
>>> Unfortunately the document had not been updated since then, but the question
>>>
>>> being discussed has not changed in the design: Domain-0 has full access to the bridge,
>>>
>>> Xen traps ECAM. (I am taking dom0less aside as that was not the target for the first phase)
>>
>> Having an update design document is quite important. This will allow reviewer to comment easily on overall approach and speed up the review as we can match to the agreed approach.
>>
>> So can this please be updated and sent along the work?
>>
>> In addition to that, it feels to me that the commit message should contain a summary of what you just pasted as this helps understanding the goal and approach of this patch.
>>
> If we are on the same page now will you be able to review the patch
> 
> with respect to the design from RFC?

I believe this was already done as I covered both side in my review.

Cheers,
Stefano Stabellini Sept. 10, 2021, 8:30 p.m. UTC | #11
On Fri, 10 Sep 2021, Julien Grall wrote:
> On 10/09/2021 15:01, Oleksandr Andrushchenko wrote:
> > On 10.09.21 16:18, Julien Grall wrote:
> > > On 10/09/2021 13:37, Oleksandr Andrushchenko wrote:
> > > > Hi, Julien!
> > > 
> > > Hi Oleksandr,
> > > 
> > > > On 09.09.21 20:58, Julien Grall wrote:
> > > > > On 03/09/2021 09:33, Oleksandr Andrushchenko wrote:
> > > > > > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> > > > > > 
> > > > > > Host bridge controller's ECAM space is mapped into Domain-0's p2m,
> > > > > > thus it is not possible to trap the same for vPCI via MMIO handlers.
> > > > > > For this to work we need to not map those while constructing the
> > > > > > domain.
> > > > > > 
> > > > > > Note, that during Domain-0 creation there is no pci_dev yet
> > > > > > allocated for
> > > > > > host bridges, thus we cannot match PCI host and its associated
> > > > > > bridge by SBDF. Use dt_device_node field for checks instead.
> > > > > > 
> > > > > > Signed-off-by: Oleksandr Andrushchenko
> > > > > > <oleksandr_andrushchenko@epam.com>
> > > > > > ---
> > > > > >     xen/arch/arm/domain_build.c        |  3 +++
> > > > > >     xen/arch/arm/pci/ecam.c            | 17 +++++++++++++++++
> > > > > >     xen/arch/arm/pci/pci-host-common.c | 22 ++++++++++++++++++++++
> > > > > >     xen/include/asm-arm/pci.h          | 12 ++++++++++++
> > > > > >     4 files changed, 54 insertions(+)
> > > > > > 
> > > > > > diff --git a/xen/arch/arm/domain_build.c
> > > > > > b/xen/arch/arm/domain_build.c
> > > > > > index da427f399711..76f5b513280c 100644
> > > > > > --- a/xen/arch/arm/domain_build.c
> > > > > > +++ b/xen/arch/arm/domain_build.c
> > > > > > @@ -1257,6 +1257,9 @@ static int __init map_range_to_domain(const
> > > > > > struct dt_device_node *dev,
> > > > > >             }
> > > > > >         }
> > > > > >     +    if ( need_mapping && (device_get_class(dev) == DEVICE_PCI)
> > > > > > ) > +        need_mapping = pci_host_bridge_need_p2m_mapping(d, dev,
> > > > > addr, len);
> > > > > 
> > > > > AFAICT, with device_get_class(dev), you know whether the hostbridge is
> > > > > used by Xen. Therefore, I would expect that we don't want to map all
> > > > > the regions of the hostbridges in dom0 (including the BARs).
> > > > > 
> > > > > Can you clarify it?
> > > > We only want to trap ECAM, not MMIOs and any other memory regions as the
> > > > bridge is
> > > > 
> > > > initialized and used by Domain-0 completely.
> > > 
> > > What do you mean by "used by Domain-0 completely"? The hostbridge is owned
> > > by Xen so I don't think we can let dom0 access any MMIO regions by
> > > default.
> > 
> > Now it's my time to ask why do you think Xen owns the bridge?
> > 
> > All the bridges are passed to Domain-0, are fully visible to Domain-0,
> > initialized in Domain-0.
> > 
> > Enumeration etc. is done in Domain-0. So how comes that Xen owns the bridge?
> > In what way it does?
> > 
> > Xen just accesses the ECAM when it needs it, but that's it. Xen traps ECAM
> > access - that is true.
> > 
> > But it in no way uses MMIOs etc. of the bridge - those under direct control
> > of Domain-0
> 
> So I looked on the snipped of the design document you posted. I think you are
> instead referring to a different part:
> 
> " PCI-PCIe enumeration is a process of detecting devices connected to its
> host.
> It is the responsibility of the hardware domain or boot firmware to do the PCI
> enumeration and configure the BAR, PCI capabilities, and MSI/MSI-X
> configuration."
> 
> But I still don't see why it means we have to map the MMIOs right now. Dom0
> should not need to access the MMIOs (aside the hostbridge registers) until the
> BARs are configured.

This is true especially when we are going to assign a specific PCIe
device to a DomU. In that case, the related MMIO regions are going to be
mapped to the DomU and it would be a bad idea to also keep them mapped
in Dom0. Once we do PCIe assignment, the MMIO region of the PCIe device
being assigned should only be mapped in the DomU, right?

If so, it would be better if the MMIO region in question was never
mapped into Dom0 at all rather having to unmap it.

With the current approach, given that the entire PCIe aperture is mapped
to Dom0 since boot, we would have to identify the relevant subset region
and unmap it from Dom0 when doing assignment.
Julien Grall Sept. 10, 2021, 9:41 p.m. UTC | #12
On Fri, 10 Sep 2021, 21:30 Stefano Stabellini, <sstabellini@kernel.org>
wrote:

> On Fri, 10 Sep 2021, Julien Grall wrote:
> > On 10/09/2021 15:01, Oleksandr Andrushchenko wrote:
> > > On 10.09.21 16:18, Julien Grall wrote:
> > > > On 10/09/2021 13:37, Oleksandr Andrushchenko wrote:
> > > > > Hi, Julien!
> > > >
> > > > Hi Oleksandr,
> > > >
> > > > > On 09.09.21 20:58, Julien Grall wrote:
> > > > > > On 03/09/2021 09:33, Oleksandr Andrushchenko wrote:
> > > > > > > From: Oleksandr Andrushchenko <
> oleksandr_andrushchenko@epam.com>
> > > > > > >
> > > > > > > Host bridge controller's ECAM space is mapped into Domain-0's
> p2m,
> > > > > > > thus it is not possible to trap the same for vPCI via MMIO
> handlers.
> > > > > > > For this to work we need to not map those while constructing
> the
> > > > > > > domain.
> > > > > > >
> > > > > > > Note, that during Domain-0 creation there is no pci_dev yet
> > > > > > > allocated for
> > > > > > > host bridges, thus we cannot match PCI host and its associated
> > > > > > > bridge by SBDF. Use dt_device_node field for checks instead.
> > > > > > >
> > > > > > > Signed-off-by: Oleksandr Andrushchenko
> > > > > > > <oleksandr_andrushchenko@epam.com>
> > > > > > > ---
> > > > > > >     xen/arch/arm/domain_build.c        |  3 +++
> > > > > > >     xen/arch/arm/pci/ecam.c            | 17 +++++++++++++++++
> > > > > > >     xen/arch/arm/pci/pci-host-common.c | 22
> ++++++++++++++++++++++
> > > > > > >     xen/include/asm-arm/pci.h          | 12 ++++++++++++
> > > > > > >     4 files changed, 54 insertions(+)
> > > > > > >
> > > > > > > diff --git a/xen/arch/arm/domain_build.c
> > > > > > > b/xen/arch/arm/domain_build.c
> > > > > > > index da427f399711..76f5b513280c 100644
> > > > > > > --- a/xen/arch/arm/domain_build.c
> > > > > > > +++ b/xen/arch/arm/domain_build.c
> > > > > > > @@ -1257,6 +1257,9 @@ static int __init
> map_range_to_domain(const
> > > > > > > struct dt_device_node *dev,
> > > > > > >             }
> > > > > > >         }
> > > > > > >     +    if ( need_mapping && (device_get_class(dev) ==
> DEVICE_PCI)
> > > > > > > ) > +        need_mapping =
> pci_host_bridge_need_p2m_mapping(d, dev,
> > > > > > addr, len);
> > > > > >
> > > > > > AFAICT, with device_get_class(dev), you know whether the
> hostbridge is
> > > > > > used by Xen. Therefore, I would expect that we don't want to map
> all
> > > > > > the regions of the hostbridges in dom0 (including the BARs).
> > > > > >
> > > > > > Can you clarify it?
> > > > > We only want to trap ECAM, not MMIOs and any other memory regions
> as the
> > > > > bridge is
> > > > >
> > > > > initialized and used by Domain-0 completely.
> > > >
> > > > What do you mean by "used by Domain-0 completely"? The hostbridge is
> owned
> > > > by Xen so I don't think we can let dom0 access any MMIO regions by
> > > > default.
> > >
> > > Now it's my time to ask why do you think Xen owns the bridge?
> > >
> > > All the bridges are passed to Domain-0, are fully visible to Domain-0,
> > > initialized in Domain-0.
> > >
> > > Enumeration etc. is done in Domain-0. So how comes that Xen owns the
> bridge?
> > > In what way it does?
> > >
> > > Xen just accesses the ECAM when it needs it, but that's it. Xen traps
> ECAM
> > > access - that is true.
> > >
> > > But it in no way uses MMIOs etc. of the bridge - those under direct
> control
> > > of Domain-0
> >
> > So I looked on the snipped of the design document you posted. I think
> you are
> > instead referring to a different part:
> >
> > " PCI-PCIe enumeration is a process of detecting devices connected to its
> > host.
> > It is the responsibility of the hardware domain or boot firmware to do
> the PCI
> > enumeration and configure the BAR, PCI capabilities, and MSI/MSI-X
> > configuration."
> >
> > But I still don't see why it means we have to map the MMIOs right now.
> Dom0
> > should not need to access the MMIOs (aside the hostbridge registers)
> until the
> > BARs are configured.
>
> This is true especially when we are going to assign a specific PCIe
> device to a DomU. In that case, the related MMIO regions are going to be
> mapped to the DomU and it would be a bad idea to also keep them mapped
> in Dom0. Once we do PCIe assignment, the MMIO region of the PCIe device
> being assigned should only be mapped in the DomU, right?
>

That's actually a good point. This is a recipe for disaster because dom0
and the domU may map the BARs with conflicting caching attributes.

So we ought to unmap the BAR from dom0. When the device is assigned to the
domU


> If so, it would be better if the MMIO region in question was never
> mapped into Dom0 at all rather having to unmap it.
>
> With the current approach, given that the entire PCIe aperture is mapped
> to Dom0 since boot, we would have to identify the relevant subset region
> and unmap it from Dom0 when doing assignment.
Oleksandr Andrushchenko Sept. 13, 2021, 6:27 a.m. UTC | #13
On 11.09.21 00:41, Julien Grall wrote:
>
>
> On Fri, 10 Sep 2021, 21:30 Stefano Stabellini, <sstabellini@kernel.org <mailto:sstabellini@kernel.org>> wrote:
>
>     On Fri, 10 Sep 2021, Julien Grall wrote:
>     > On 10/09/2021 15:01, Oleksandr Andrushchenko wrote:
>     > > On 10.09.21 16:18, Julien Grall wrote:
>     > > > On 10/09/2021 13:37, Oleksandr Andrushchenko wrote:
>     > > > > Hi, Julien!
>     > > >
>     > > > Hi Oleksandr,
>     > > >
>     > > > > On 09.09.21 20:58, Julien Grall wrote:
>     > > > > > On 03/09/2021 09:33, Oleksandr Andrushchenko wrote:
>     > > > > > > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com <mailto:oleksandr_andrushchenko@epam.com>>
>     > > > > > >
>     > > > > > > Host bridge controller's ECAM space is mapped into Domain-0's p2m,
>     > > > > > > thus it is not possible to trap the same for vPCI via MMIO handlers.
>     > > > > > > For this to work we need to not map those while constructing the
>     > > > > > > domain.
>     > > > > > >
>     > > > > > > Note, that during Domain-0 creation there is no pci_dev yet
>     > > > > > > allocated for
>     > > > > > > host bridges, thus we cannot match PCI host and its associated
>     > > > > > > bridge by SBDF. Use dt_device_node field for checks instead.
>     > > > > > >
>     > > > > > > Signed-off-by: Oleksandr Andrushchenko
>     > > > > > > <oleksandr_andrushchenko@epam.com <mailto:oleksandr_andrushchenko@epam.com>>
>     > > > > > > ---
>     > > > > > > xen/arch/arm/domain_build.c        |  3 +++
>     > > > > > > xen/arch/arm/pci/ecam.c            | 17 +++++++++++++++++
>     > > > > > > xen/arch/arm/pci/pci-host-common.c | 22 ++++++++++++++++++++++
>     > > > > > > xen/include/asm-arm/pci.h          | 12 ++++++++++++
>     > > > > > >     4 files changed, 54 insertions(+)
>     > > > > > >
>     > > > > > > diff --git a/xen/arch/arm/domain_build.c
>     > > > > > > b/xen/arch/arm/domain_build.c
>     > > > > > > index da427f399711..76f5b513280c 100644
>     > > > > > > --- a/xen/arch/arm/domain_build.c
>     > > > > > > +++ b/xen/arch/arm/domain_build.c
>     > > > > > > @@ -1257,6 +1257,9 @@ static int __init map_range_to_domain(const
>     > > > > > > struct dt_device_node *dev,
>     > > > > > >             }
>     > > > > > >         }
>     > > > > > >     +    if ( need_mapping && (device_get_class(dev) == DEVICE_PCI)
>     > > > > > > ) > +        need_mapping = pci_host_bridge_need_p2m_mapping(d, dev,
>     > > > > > addr, len);
>     > > > > >
>     > > > > > AFAICT, with device_get_class(dev), you know whether the hostbridge is
>     > > > > > used by Xen. Therefore, I would expect that we don't want to map all
>     > > > > > the regions of the hostbridges in dom0 (including the BARs).
>     > > > > >
>     > > > > > Can you clarify it?
>     > > > > We only want to trap ECAM, not MMIOs and any other memory regions as the
>     > > > > bridge is
>     > > > >
>     > > > > initialized and used by Domain-0 completely.
>     > > >
>     > > > What do you mean by "used by Domain-0 completely"? The hostbridge is owned
>     > > > by Xen so I don't think we can let dom0 access any MMIO regions by
>     > > > default.
>     > >
>     > > Now it's my time to ask why do you think Xen owns the bridge?
>     > >
>     > > All the bridges are passed to Domain-0, are fully visible to Domain-0,
>     > > initialized in Domain-0.
>     > >
>     > > Enumeration etc. is done in Domain-0. So how comes that Xen owns the bridge?
>     > > In what way it does?
>     > >
>     > > Xen just accesses the ECAM when it needs it, but that's it. Xen traps ECAM
>     > > access - that is true.
>     > >
>     > > But it in no way uses MMIOs etc. of the bridge - those under direct control
>     > > of Domain-0
>     >
>     > So I looked on the snipped of the design document you posted. I think you are
>     > instead referring to a different part:
>     >
>     > " PCI-PCIe enumeration is a process of detecting devices connected to its
>     > host.
>     > It is the responsibility of the hardware domain or boot firmware to do the PCI
>     > enumeration and configure the BAR, PCI capabilities, and MSI/MSI-X
>     > configuration."
>     >
>     > But I still don't see why it means we have to map the MMIOs right now. Dom0
>     > should not need to access the MMIOs (aside the hostbridge registers) until the
>     > BARs are configured.
>
>     This is true especially when we are going to assign a specific PCIe
>     device to a DomU. In that case, the related MMIO regions are going to be
>     mapped to the DomU and it would be a bad idea to also keep them mapped
>     in Dom0. Once we do PCIe assignment, the MMIO region of the PCIe device
>     being assigned should only be mapped in the DomU, right?
>
>
> That's actually a good point. This is a recipe for disaster because dom0 and the domU may map the BARs with conflicting caching attributes.
>
> So we ought to unmap the BAR from dom0. When the device is assigned to the domU
1. Yes, currently we map MMIOs to Dom0 from the beginning (the whole aperture actually)

2. When a PCIe device assigned to a DomU we do unmap the relevant MMIOs

from Dom0 and map them to DomU


>
>     If so, it would be better if the MMIO region in question was never
>     mapped into Dom0 at all rather having to unmap it.
>
Ok, I'll do that
>
>
>     With the current approach, given that the entire PCIe aperture is mapped
>     to Dom0 since boot, we would have to identify the relevant subset region
>     and unmap it from Dom0 when doing assignment.
>
It is already in vPCI code (with non-identity mappings in the PCI devices passthrough on Arm, part 3)
Oleksandr Andrushchenko Sept. 14, 2021, 10:03 a.m. UTC | #14
>
>>
>>     If so, it would be better if the MMIO region in question was never
>>     mapped into Dom0 at all rather having to unmap it.
>>
> Ok, I'll do that

Sorry for pasting here quite a big patch, but I feel I need clarification if this is

the way we want it. Please note I had to modify setup.h.


 From 6eee96bc046efb41ec25f87362b1f6e973a4e6c2 Mon Sep 17 00:00:00 2001
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Date: Tue, 14 Sep 2021 12:14:43 +0300
Subject: [PATCH] Fixes: a57dc84da5fd ("xen/arm: Do not map PCI ECAM space to
  Domain-0's p2m")

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
---
  xen/arch/arm/domain_build.c        | 37 +++++++++++++--------
  xen/arch/arm/pci/ecam.c            | 11 +++----
  xen/arch/arm/pci/pci-host-common.c | 53 ++++++++++++++++++++++--------
  xen/include/asm-arm/pci.h          | 18 ++++------
  xen/include/asm-arm/setup.h        | 13 ++++++++
  5 files changed, 86 insertions(+), 46 deletions(-)

diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index 76f5b513280c..b4bfda9d5b5a 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -10,7 +10,6 @@
  #include <asm/regs.h>
  #include <xen/errno.h>
  #include <xen/err.h>
-#include <xen/device_tree.h>
  #include <xen/libfdt/libfdt.h>
  #include <xen/guest_access.h>
  #include <xen/iocap.h>
@@ -47,12 +46,6 @@ static int __init parse_dom0_mem(const char *s)
  }
  custom_param("dom0_mem", parse_dom0_mem);

-struct map_range_data
-{
-    struct domain *d;
-    p2m_type_t p2mt;
-};
-
  /* Override macros from asm/page.h to make them work with mfn_t */
  #undef virt_to_mfn
  #define virt_to_mfn(va) _mfn(__virt_to_mfn(va))
@@ -1228,9 +1221,8 @@ static int __init map_dt_irq_to_domain(const struct dt_device_node *dev,
      return 0;
  }

-static int __init map_range_to_domain(const struct dt_device_node *dev,
-                                      u64 addr, u64 len,
-                                      void *data)
+int __init map_range_to_domain(const struct dt_device_node *dev,
+                               u64 addr, u64 len, void *data)
  {
      struct map_range_data *mr_data = data;
      struct domain *d = mr_data->d;
@@ -1257,8 +1249,10 @@ static int __init map_range_to_domain(const struct dt_device_node *dev,
          }
      }

-    if ( need_mapping && (device_get_class(dev) == DEVICE_PCI) )
-        need_mapping = pci_host_bridge_need_p2m_mapping(d, dev, addr, len);
+#ifdef CONFIG_HAS_PCI
+    if ( (device_get_class(dev) == DEVICE_PCI) && !mr_data->map_pci_bridge )
+        need_mapping = false;
+#endif

      if ( need_mapping )
      {
@@ -1293,7 +1287,11 @@ static int __init map_device_children(struct domain *d,
                                        const struct dt_device_node *dev,
                                        p2m_type_t p2mt)
  {
-    struct map_range_data mr_data = { .d = d, .p2mt = p2mt };
+    struct map_range_data mr_data = {
+        .d = d,
+        .p2mt = p2mt,
+        .map_pci_bridge = false
+    };
      int ret;

      if ( dt_device_type_is_equal(dev, "pci") )
@@ -1425,7 +1423,11 @@ static int __init handle_device(struct domain *d, struct dt_device_node *dev,
      /* Give permission and map MMIOs */
      for ( i = 0; i < naddr; i++ )
      {
-        struct map_range_data mr_data = { .d = d, .p2mt = p2mt };
+        struct map_range_data mr_data = {
+            .d = d,
+            .p2mt = p2mt,
+            .map_pci_bridge = false
+        };
          res = dt_device_get_address(dev, i, &addr, &size);
          if ( res )
          {
@@ -2594,7 +2596,14 @@ static int __init construct_dom0(struct domain *d)
          return rc;

      if ( acpi_disabled )
+    {
          rc = prepare_dtb_hwdom(d, &kinfo);
+#ifdef CONFIG_HAS_PCI
+        if ( rc < 0 )
+            return rc;
+        rc = pci_host_bridge_mappings(d, p2m_mmio_direct_c);
+#endif
+    }
      else
          rc = prepare_acpi(d, &kinfo);

diff --git a/xen/arch/arm/pci/ecam.c b/xen/arch/arm/pci/ecam.c
index d32efb7fcbd0..e08b9c6909b6 100644
--- a/xen/arch/arm/pci/ecam.c
+++ b/xen/arch/arm/pci/ecam.c
@@ -52,17 +52,14 @@ static int pci_ecam_register_mmio_handler(struct domain *d,
      return 0;
  }

-static int pci_ecam_need_p2m_mapping(struct domain *d,
-                                     struct pci_host_bridge *bridge,
-                                     uint64_t addr, uint64_t len)
+static bool pci_ecam_need_p2m_mapping(struct domain *d,
+                                      struct pci_host_bridge *bridge,
+                                      uint64_t addr)
  {
      struct pci_config_window *cfg = bridge->sysdata;

-    if ( !is_hardware_domain(d) )
-        return true;
-
      /*
-     * We do not want ECAM address space to be mapped in domain's p2m,
+     * We do not want ECAM address space to be mapped in Domain-0's p2m,
       * so we can trap access to it.
       */
      return cfg->phys_addr != addr;
diff --git a/xen/arch/arm/pci/pci-host-common.c b/xen/arch/arm/pci/pci-host-common.c
index c04be636452d..74077dec8c72 100644
--- a/xen/arch/arm/pci/pci-host-common.c
+++ b/xen/arch/arm/pci/pci-host-common.c
@@ -25,6 +25,7 @@
  #include <xen/init.h>
  #include <xen/pci.h>
  #include <asm/pci.h>
+#include <asm/setup.h>
  #include <xen/rwlock.h>
  #include <xen/sched.h>
  #include <xen/vmap.h>
@@ -335,25 +336,51 @@ int pci_host_iterate_bridges(struct domain *d,
      return 0;
  }

-bool pci_host_bridge_need_p2m_mapping(struct domain *d,
-                                      const struct dt_device_node *node,
-                                      uint64_t addr, uint64_t len)
+int __init pci_host_bridge_mappings(struct domain *d, p2m_type_t p2mt)
  {
      struct pci_host_bridge *bridge;
+    struct map_range_data mr_data = {
+        .d = d,
+        .p2mt = p2mt,
+        .map_pci_bridge = true
+    };

+    /*
+     * For each PCI host bridge we need to only map those ranges
+     * which are used by Domain-0 to properly initialize the bridge,
+     * e.g. we do not want to map ECAM configuration space which lives in
+     * "reg" or "assigned-addresses" device tree property.
+     * Neither we want to map any of the MMIO ranges found in the  "ranges"
+     * device tree property.
+     */
      list_for_each_entry( bridge, &pci_host_bridges, node )
      {
-        if ( bridge->dt_node != node )
-            continue;
-
-        if ( !bridge->ops->need_p2m_mapping )
-            return true;
-
-        return bridge->ops->need_p2m_mapping(d, bridge, addr, len);
+        const struct dt_device_node *dev = bridge->dt_node;
+        int i;
+
+        for ( i = 0; i < dt_number_of_address(dev); i++ )
+        {
+            uint64_t addr, size;
+            int err;
+
+            err = dt_device_get_address(dev, i, &addr, &size);
+            if ( err )
+            {
+                printk(XENLOG_ERR "Unable to retrieve address %u for %s\n",
+                       i, dt_node_full_name(dev));
+                return err;
+            }
+
+            if ( bridge->ops->need_p2m_mapping(d, bridge, addr) )
+            {
+                err = map_range_to_domain(dev, addr, size, &mr_data);
+                if ( err )
+                    return err;
+            }
+        }
      }
-    printk(XENLOG_ERR "Unable to find PCI bridge for %s segment %d, addr %lx\n",
-           node->full_name, bridge->segment, addr);
-    return true;
+
+    return 0;
  }

  /*
diff --git a/xen/include/asm-arm/pci.h b/xen/include/asm-arm/pci.h
index 9c28a4bdc4b7..97fbaac01370 100644
--- a/xen/include/asm-arm/pci.h
+++ b/xen/include/asm-arm/pci.h
@@ -21,6 +21,8 @@

  #ifdef CONFIG_HAS_PCI

+#include <asm/p2m.h>
+
  #define pci_to_dev(pcidev) (&(pcidev)->arch.dev)
  #define PRI_pci "%04x:%02x:%02x.%u"

@@ -82,8 +84,9 @@ struct pci_ops {
      int (*register_mmio_handler)(struct domain *d,
                                   struct pci_host_bridge *bridge,
                                   const struct mmio_handler_ops *ops);
-    int (*need_p2m_mapping)(struct domain *d, struct pci_host_bridge *bridge,
-                            uint64_t addr, uint64_t len);
+    bool (*need_p2m_mapping)(struct domain *d,
+                             struct pci_host_bridge *bridge,
+                             uint64_t addr);
  };

  /*
@@ -117,19 +120,10 @@ struct dt_device_node *pci_find_host_bridge_node(struct device *dev);
  int pci_host_iterate_bridges(struct domain *d,
                               int (*clb)(struct domain *d,
                                          struct pci_host_bridge *bridge));
-bool pci_host_bridge_need_p2m_mapping(struct domain *d,
-                                      const struct dt_device_node *node,
-                                      uint64_t addr, uint64_t len);
+int pci_host_bridge_mappings(struct domain *d, p2m_type_t p2mt);
  #else   /*!CONFIG_HAS_PCI*/

  struct arch_pci_dev { };

-static inline bool
-pci_host_bridge_need_p2m_mapping(struct domain *d,
-                                 const struct dt_device_node *node,
-                                 uint64_t addr, uint64_t len)
-{
-    return true;
-}
  #endif  /*!CONFIG_HAS_PCI*/
  #endif /* __ARM_PCI_H__ */
diff --git a/xen/include/asm-arm/setup.h b/xen/include/asm-arm/setup.h
index c4b6af602995..a1c31c0bb024 100644
--- a/xen/include/asm-arm/setup.h
+++ b/xen/include/asm-arm/setup.h
@@ -2,6 +2,8 @@
  #define __ARM_SETUP_H_

  #include <public/version.h>
+#include <asm/p2m.h>
+#include <xen/device_tree.h>

  #define MIN_FDT_ALIGN 8
  #define MAX_FDT_SIZE SZ_2M
@@ -76,6 +78,14 @@ struct bootinfo {
  #endif
  };

+struct map_range_data
+{
+    struct domain *d;
+    p2m_type_t p2mt;
+    /* Set if mappings for PCI host bridges must not be skipped. */
+    bool map_pci_bridge;
+};
+
  extern struct bootinfo bootinfo;

  extern domid_t max_init_domid;
@@ -123,6 +133,9 @@ void device_tree_get_reg(const __be32 **cell, u32 address_cells,
  u32 device_tree_get_u32(const void *fdt, int node,
                          const char *prop_name, u32 dflt);

+int map_range_to_domain(const struct dt_device_node *dev,
+                        u64 addr, u64 len, void *data);
+
  #endif
  /*
   * Local variables:
Stefano Stabellini Sept. 15, 2021, 12:36 a.m. UTC | #15
On Tue, 14 Sep 2021, Oleksandr Andrushchenko wrote:
> With the patch above I have the following log in Domain-0:
> 
> generic-armv8-xt-dom0 login: root
> root@generic-armv8-xt-dom0:~# (XEN) *** Serial input to Xen (type 'CTRL-a' three times to switch input)
> (XEN) ==== PCI devices ====
> (XEN) ==== segment 0000 ====
> (XEN) 0000:03:00.0 - d0 - node -1
> (XEN) 0000:02:02.0 - d0 - node -1
> (XEN) 0000:02:01.0 - d0 - node -1
> (XEN) 0000:02:00.0 - d0 - node -1
> (XEN) 0000:01:00.0 - d0 - node -1
> (XEN) 0000:00:00.0 - d0 - node -1
> (XEN) *** Serial input to DOM0 (type 'CTRL-a' three times to switch input)
> 
> root@generic-armv8-xt-dom0:~# modprobe e1000e
> [   46.104729] e1000e: Intel(R) PRO/1000 Network Driver
> [   46.105479] e1000e: Copyright(c) 1999 - 2015 Intel Corporation.
> [   46.107297] e1000e 0000:03:00.0: enabling device (0000 -> 0002)
> (XEN) map [e0000, e001f] -> 0xe0000 for d0
> (XEN) map [e0020, e003f] -> 0xe0020 for d0
> [   46.178513] e1000e 0000:03:00.0: Interrupt Throttling Rate (ints/sec) set to dynamic conservative mode
> [   46.189668] pci_msi_setup_msi_irqs
> [   46.191016] nwl_compose_msi_msg msg at fe440000
> (XEN) traps.c:2014:d0v0 HSR=0x00000093810047 pc=0xffff8000104b4b00 gva=0xffff800010fa5000 gpa=0x000000e0040000
> [   46.200455] Unhandled fault at 0xffff800010fa5000
> 
> [snip]
> 
> [   46.233079] Call trace:
> [   46.233559]  __pci_write_msi_msg+0x70/0x180
> [   46.234149]  pci_msi_domain_write_msg+0x28/0x30
> [   46.234869]  msi_domain_activate+0x5c/0x88
> 
>  From the above you can see that BARs are mapped for Domain-0 now
> 
> only when an assigned PCI device gets enabled in Domain-0.
> 
> Another thing to note is that we crash on MSI-X access as BARs are mapped
> 
> to the domain while enabling memory decoding in the COMMAND register,
> 
> but MSI-X are handled differently, e.g. we have MSI-X holes in the mappings.
> 
> This is because before this change the whole PCI aperture was mapped into
> 
> Domain-0 and it is not. Thus, MSI-X holes are left unmapped now and there
> 
> was no need to do so, e.g. they were always mapped into Domain-0 and
> 
> emulated for guests.
> 
> Please note that one cannot use xl pci-attach in this case to attach the PCI device
> 
> in question to Domain-0 as (please see the log) that device is already attached.
> 
> Neither it can be detached and re-attached. So, without mapping MSI-X holes for
> 
> Domain-0 the device becomes unusable in Domain-0. At the same time the device
> 
> can be successfully passed to DomU.
> 
> 
> Julien, Stefano! Please let me know how can we proceed with this.

What was the plan for MSI-X in Dom0?

Given that Dom0 interacts with a virtual-ITS and virtual-GIC rather than
a physical-ITS and physical-GIC, I imagine that it wasn't correct for
Dom0 to write to the real MSI-X table directly?

Shouldn't Dom0 get emulated MSI-X tables like any DomU?

Otherwise, if Dom0 is expected to have the real MSI-X tables mapped, then
I would suggest to map them at the same time as the BARs. But I am
thinking that Dom0 should get emulated MSI-X tables, not physical MSI-X
tables.
Oleksandr Andrushchenko Sept. 15, 2021, 5:35 a.m. UTC | #16
Hi, Stefano, Rahul!

On 15.09.21 03:36, Stefano Stabellini wrote:
> On Tue, 14 Sep 2021, Oleksandr Andrushchenko wrote:
>> With the patch above I have the following log in Domain-0:
>>
>> generic-armv8-xt-dom0 login: root
>> root@generic-armv8-xt-dom0:~# (XEN) *** Serial input to Xen (type 'CTRL-a' three times to switch input)
>> (XEN) ==== PCI devices ====
>> (XEN) ==== segment 0000 ====
>> (XEN) 0000:03:00.0 - d0 - node -1
>> (XEN) 0000:02:02.0 - d0 - node -1
>> (XEN) 0000:02:01.0 - d0 - node -1
>> (XEN) 0000:02:00.0 - d0 - node -1
>> (XEN) 0000:01:00.0 - d0 - node -1
>> (XEN) 0000:00:00.0 - d0 - node -1
>> (XEN) *** Serial input to DOM0 (type 'CTRL-a' three times to switch input)
>>
>> root@generic-armv8-xt-dom0:~# modprobe e1000e
>> [   46.104729] e1000e: Intel(R) PRO/1000 Network Driver
>> [   46.105479] e1000e: Copyright(c) 1999 - 2015 Intel Corporation.
>> [   46.107297] e1000e 0000:03:00.0: enabling device (0000 -> 0002)
>> (XEN) map [e0000, e001f] -> 0xe0000 for d0
>> (XEN) map [e0020, e003f] -> 0xe0020 for d0
>> [   46.178513] e1000e 0000:03:00.0: Interrupt Throttling Rate (ints/sec) set to dynamic conservative mode
>> [   46.189668] pci_msi_setup_msi_irqs
>> [   46.191016] nwl_compose_msi_msg msg at fe440000
>> (XEN) traps.c:2014:d0v0 HSR=0x00000093810047 pc=0xffff8000104b4b00 gva=0xffff800010fa5000 gpa=0x000000e0040000
>> [   46.200455] Unhandled fault at 0xffff800010fa5000
>>
>> [snip]
>>
>> [   46.233079] Call trace:
>> [   46.233559]  __pci_write_msi_msg+0x70/0x180
>> [   46.234149]  pci_msi_domain_write_msg+0x28/0x30
>> [   46.234869]  msi_domain_activate+0x5c/0x88
>>
>>   From the above you can see that BARs are mapped for Domain-0 now
>>
>> only when an assigned PCI device gets enabled in Domain-0.
>>
>> Another thing to note is that we crash on MSI-X access as BARs are mapped
>>
>> to the domain while enabling memory decoding in the COMMAND register,
>>
>> but MSI-X are handled differently, e.g. we have MSI-X holes in the mappings.
>>
>> This is because before this change the whole PCI aperture was mapped into
>>
>> Domain-0 and it is not. Thus, MSI-X holes are left unmapped now and there
>>
>> was no need to do so, e.g. they were always mapped into Domain-0 and
>>
>> emulated for guests.
>>
>> Please note that one cannot use xl pci-attach in this case to attach the PCI device
>>
>> in question to Domain-0 as (please see the log) that device is already attached.
>>
>> Neither it can be detached and re-attached. So, without mapping MSI-X holes for
>>
>> Domain-0 the device becomes unusable in Domain-0. At the same time the device
>>
>> can be successfully passed to DomU.
>>
>>
>> Julien, Stefano! Please let me know how can we proceed with this.
> What was the plan for MSI-X in Dom0?
It just worked because we mapped everything
>
> Given that Dom0 interacts with a virtual-ITS and virtual-GIC rather than
> a physical-ITS and physical-GIC, I imagine that it wasn't correct for
> Dom0 to write to the real MSI-X table directly?
>
> Shouldn't Dom0 get emulated MSI-X tables like any DomU?
>
> Otherwise, if Dom0 is expected to have the real MSI-X tables mapped, then
> I would suggest to map them at the same time as the BARs. But I am
> thinking that Dom0 should get emulated MSI-X tables, not physical MSI-X
> tables.

Yes, it seems more than reasonable to enable emulation for Domain-0

as well. Other than that, Stefano, do you think we are good to go with

the changes I did in order to unmap everything for Domain-0?

Rahul, it seems we will need a change to vPCI/MSI-X so we can also

trap Domain-0 for MSI-X tables.

Thank you,

Oleksandr
Rahul Singh Sept. 15, 2021, 4:42 p.m. UTC | #17
Hi Oleksandr,

> On 15 Sep 2021, at 6:35 am, Oleksandr Andrushchenko <Oleksandr_Andrushchenko@epam.com> wrote:
> 
> Hi, Stefano, Rahul!
> 
> On 15.09.21 03:36, Stefano Stabellini wrote:
>> On Tue, 14 Sep 2021, Oleksandr Andrushchenko wrote:
>>> With the patch above I have the following log in Domain-0:
>>> 
>>> generic-armv8-xt-dom0 login: root
>>> root@generic-armv8-xt-dom0:~# (XEN) *** Serial input to Xen (type 'CTRL-a' three times to switch input)
>>> (XEN) ==== PCI devices ====
>>> (XEN) ==== segment 0000 ====
>>> (XEN) 0000:03:00.0 - d0 - node -1
>>> (XEN) 0000:02:02.0 - d0 - node -1
>>> (XEN) 0000:02:01.0 - d0 - node -1
>>> (XEN) 0000:02:00.0 - d0 - node -1
>>> (XEN) 0000:01:00.0 - d0 - node -1
>>> (XEN) 0000:00:00.0 - d0 - node -1
>>> (XEN) *** Serial input to DOM0 (type 'CTRL-a' three times to switch input)
>>> 
>>> root@generic-armv8-xt-dom0:~# modprobe e1000e
>>> [   46.104729] e1000e: Intel(R) PRO/1000 Network Driver
>>> [   46.105479] e1000e: Copyright(c) 1999 - 2015 Intel Corporation.
>>> [   46.107297] e1000e 0000:03:00.0: enabling device (0000 -> 0002)
>>> (XEN) map [e0000, e001f] -> 0xe0000 for d0
>>> (XEN) map [e0020, e003f] -> 0xe0020 for d0
>>> [   46.178513] e1000e 0000:03:00.0: Interrupt Throttling Rate (ints/sec) set to dynamic conservative mode
>>> [   46.189668] pci_msi_setup_msi_irqs
>>> [   46.191016] nwl_compose_msi_msg msg at fe440000
>>> (XEN) traps.c:2014:d0v0 HSR=0x00000093810047 pc=0xffff8000104b4b00 gva=0xffff800010fa5000 gpa=0x000000e0040000
>>> [   46.200455] Unhandled fault at 0xffff800010fa5000
>>> 
>>> [snip]
>>> 
>>> [   46.233079] Call trace:
>>> [   46.233559]  __pci_write_msi_msg+0x70/0x180
>>> [   46.234149]  pci_msi_domain_write_msg+0x28/0x30
>>> [   46.234869]  msi_domain_activate+0x5c/0x88
>>> 
>>>  From the above you can see that BARs are mapped for Domain-0 now
>>> 
>>> only when an assigned PCI device gets enabled in Domain-0.
>>> 
>>> Another thing to note is that we crash on MSI-X access as BARs are mapped
>>> 
>>> to the domain while enabling memory decoding in the COMMAND register,
>>> 
>>> but MSI-X are handled differently, e.g. we have MSI-X holes in the mappings.
>>> 
>>> This is because before this change the whole PCI aperture was mapped into
>>> 
>>> Domain-0 and it is not. Thus, MSI-X holes are left unmapped now and there
>>> 
>>> was no need to do so, e.g. they were always mapped into Domain-0 and
>>> 
>>> emulated for guests.
>>> 
>>> Please note that one cannot use xl pci-attach in this case to attach the PCI device
>>> 
>>> in question to Domain-0 as (please see the log) that device is already attached.
>>> 
>>> Neither it can be detached and re-attached. So, without mapping MSI-X holes for
>>> 
>>> Domain-0 the device becomes unusable in Domain-0. At the same time the device
>>> 
>>> can be successfully passed to DomU.
>>> 
>>> 
>>> Julien, Stefano! Please let me know how can we proceed with this.
>> What was the plan for MSI-X in Dom0?
> It just worked because we mapped everything
>> 
>> Given that Dom0 interacts with a virtual-ITS and virtual-GIC rather than
>> a physical-ITS and physical-GIC, I imagine that it wasn't correct for
>> Dom0 to write to the real MSI-X table directly?
>> 
>> Shouldn't Dom0 get emulated MSI-X tables like any DomU?
>> 
>> Otherwise, if Dom0 is expected to have the real MSI-X tables mapped, then
>> I would suggest to map them at the same time as the BARs. But I am
>> thinking that Dom0 should get emulated MSI-X tables, not physical MSI-X
>> tables.
> 
> Yes, it seems more than reasonable to enable emulation for Domain-0
> 
> as well. Other than that, Stefano, do you think we are good to go with
> 
> the changes I did in order to unmap everything for Domain-0?
> 
> Rahul, it seems we will need a change to vPCI/MSI-X so we can also
> 
> trap Domain-0 for MSI-X tables.

I agree that we need emulated MSI-X tables for Dom0 also. Let me check on this and come back to you.

Regards,
Rahul
> 
> Thank you,
> 
> Oleksandr
Stefano Stabellini Sept. 15, 2021, 8:09 p.m. UTC | #18
On Wed, 15 Sep 2021, Oleksandr Andrushchenko wrote:
> On 15.09.21 03:36, Stefano Stabellini wrote:
> > On Tue, 14 Sep 2021, Oleksandr Andrushchenko wrote:
> >> With the patch above I have the following log in Domain-0:
> >>
> >> generic-armv8-xt-dom0 login: root
> >> root@generic-armv8-xt-dom0:~# (XEN) *** Serial input to Xen (type 'CTRL-a' three times to switch input)
> >> (XEN) ==== PCI devices ====
> >> (XEN) ==== segment 0000 ====
> >> (XEN) 0000:03:00.0 - d0 - node -1
> >> (XEN) 0000:02:02.0 - d0 - node -1
> >> (XEN) 0000:02:01.0 - d0 - node -1
> >> (XEN) 0000:02:00.0 - d0 - node -1
> >> (XEN) 0000:01:00.0 - d0 - node -1
> >> (XEN) 0000:00:00.0 - d0 - node -1
> >> (XEN) *** Serial input to DOM0 (type 'CTRL-a' three times to switch input)
> >>
> >> root@generic-armv8-xt-dom0:~# modprobe e1000e
> >> [   46.104729] e1000e: Intel(R) PRO/1000 Network Driver
> >> [   46.105479] e1000e: Copyright(c) 1999 - 2015 Intel Corporation.
> >> [   46.107297] e1000e 0000:03:00.0: enabling device (0000 -> 0002)
> >> (XEN) map [e0000, e001f] -> 0xe0000 for d0
> >> (XEN) map [e0020, e003f] -> 0xe0020 for d0
> >> [   46.178513] e1000e 0000:03:00.0: Interrupt Throttling Rate (ints/sec) set to dynamic conservative mode
> >> [   46.189668] pci_msi_setup_msi_irqs
> >> [   46.191016] nwl_compose_msi_msg msg at fe440000
> >> (XEN) traps.c:2014:d0v0 HSR=0x00000093810047 pc=0xffff8000104b4b00 gva=0xffff800010fa5000 gpa=0x000000e0040000
> >> [   46.200455] Unhandled fault at 0xffff800010fa5000
> >>
> >> [snip]
> >>
> >> [   46.233079] Call trace:
> >> [   46.233559]  __pci_write_msi_msg+0x70/0x180
> >> [   46.234149]  pci_msi_domain_write_msg+0x28/0x30
> >> [   46.234869]  msi_domain_activate+0x5c/0x88
> >>
> >>   From the above you can see that BARs are mapped for Domain-0 now
> >>
> >> only when an assigned PCI device gets enabled in Domain-0.
> >>
> >> Another thing to note is that we crash on MSI-X access as BARs are mapped
> >>
> >> to the domain while enabling memory decoding in the COMMAND register,
> >>
> >> but MSI-X are handled differently, e.g. we have MSI-X holes in the mappings.
> >>
> >> This is because before this change the whole PCI aperture was mapped into
> >>
> >> Domain-0 and it is not. Thus, MSI-X holes are left unmapped now and there
> >>
> >> was no need to do so, e.g. they were always mapped into Domain-0 and
> >>
> >> emulated for guests.
> >>
> >> Please note that one cannot use xl pci-attach in this case to attach the PCI device
> >>
> >> in question to Domain-0 as (please see the log) that device is already attached.
> >>
> >> Neither it can be detached and re-attached. So, without mapping MSI-X holes for
> >>
> >> Domain-0 the device becomes unusable in Domain-0. At the same time the device
> >>
> >> can be successfully passed to DomU.
> >>
> >>
> >> Julien, Stefano! Please let me know how can we proceed with this.
> > What was the plan for MSI-X in Dom0?
> It just worked because we mapped everything
> >
> > Given that Dom0 interacts with a virtual-ITS and virtual-GIC rather than
> > a physical-ITS and physical-GIC, I imagine that it wasn't correct for
> > Dom0 to write to the real MSI-X table directly?
> >
> > Shouldn't Dom0 get emulated MSI-X tables like any DomU?
> >
> > Otherwise, if Dom0 is expected to have the real MSI-X tables mapped, then
> > I would suggest to map them at the same time as the BARs. But I am
> > thinking that Dom0 should get emulated MSI-X tables, not physical MSI-X
> > tables.
> 
> Yes, it seems more than reasonable to enable emulation for Domain-0
> 
> as well. Other than that, Stefano, do you think we are good to go with
> 
> the changes I did in order to unmap everything for Domain-0?


It might be better to resend the series with the patch in it, because it
is difficult to review the patch like this. Nonetheless I tried, but I
might have missed something.

Previously the whole PCIe bridge aperture was mapped to Dom0, and
it was done by map_range_to_domain, is that correct?

Now this patch, to avoid mapping the entire aperture to Dom0, is
skipping any operations for PCIe devices in map_range_to_domain by
setting need_mapping = false.

The idea is that instead, we'll only map things when needed and not the
whole aperture. However, looking at the changes to
pci_host_bridge_mappings (formerly known as
pci_host_bridge_need_p2m_mapping), it is still going through the full
list of address ranges of the PCIe bridge and map each range one by one
using map_range_to_domain. Also, pci_host_bridge_mappings is still
called unconditionally at boot for Dom0.

So I am missing the part where the aperture is actually *not* mapped to
Dom0. What's the difference between the loop in
pci_host_bridge_mappings:

  for ( i = 0; i < dt_number_of_address(dev); i++ )
    map_range_to_domain...

and the previous code in map_range_to_domain? I think I am missing
something but as mentioned it is difficult to review the patch like this
out of order.

Also, and this is minor, even if currently unused, it might be good to
keep a length parameter to pci_host_bridge_need_p2m_mapping.
Stefano Stabellini Sept. 15, 2021, 8:19 p.m. UTC | #19
On Wed, 15 Sep 2021, Stefano Stabellini wrote:
> On Wed, 15 Sep 2021, Oleksandr Andrushchenko wrote:
> > On 15.09.21 03:36, Stefano Stabellini wrote:
> > > On Tue, 14 Sep 2021, Oleksandr Andrushchenko wrote:
> > >> With the patch above I have the following log in Domain-0:
> > >>
> > >> generic-armv8-xt-dom0 login: root
> > >> root@generic-armv8-xt-dom0:~# (XEN) *** Serial input to Xen (type 'CTRL-a' three times to switch input)
> > >> (XEN) ==== PCI devices ====
> > >> (XEN) ==== segment 0000 ====
> > >> (XEN) 0000:03:00.0 - d0 - node -1
> > >> (XEN) 0000:02:02.0 - d0 - node -1
> > >> (XEN) 0000:02:01.0 - d0 - node -1
> > >> (XEN) 0000:02:00.0 - d0 - node -1
> > >> (XEN) 0000:01:00.0 - d0 - node -1
> > >> (XEN) 0000:00:00.0 - d0 - node -1
> > >> (XEN) *** Serial input to DOM0 (type 'CTRL-a' three times to switch input)
> > >>
> > >> root@generic-armv8-xt-dom0:~# modprobe e1000e
> > >> [   46.104729] e1000e: Intel(R) PRO/1000 Network Driver
> > >> [   46.105479] e1000e: Copyright(c) 1999 - 2015 Intel Corporation.
> > >> [   46.107297] e1000e 0000:03:00.0: enabling device (0000 -> 0002)
> > >> (XEN) map [e0000, e001f] -> 0xe0000 for d0
> > >> (XEN) map [e0020, e003f] -> 0xe0020 for d0
> > >> [   46.178513] e1000e 0000:03:00.0: Interrupt Throttling Rate (ints/sec) set to dynamic conservative mode
> > >> [   46.189668] pci_msi_setup_msi_irqs
> > >> [   46.191016] nwl_compose_msi_msg msg at fe440000
> > >> (XEN) traps.c:2014:d0v0 HSR=0x00000093810047 pc=0xffff8000104b4b00 gva=0xffff800010fa5000 gpa=0x000000e0040000
> > >> [   46.200455] Unhandled fault at 0xffff800010fa5000
> > >>
> > >> [snip]
> > >>
> > >> [   46.233079] Call trace:
> > >> [   46.233559]  __pci_write_msi_msg+0x70/0x180
> > >> [   46.234149]  pci_msi_domain_write_msg+0x28/0x30
> > >> [   46.234869]  msi_domain_activate+0x5c/0x88
> > >>
> > >>   From the above you can see that BARs are mapped for Domain-0 now
> > >>
> > >> only when an assigned PCI device gets enabled in Domain-0.
> > >>
> > >> Another thing to note is that we crash on MSI-X access as BARs are mapped
> > >>
> > >> to the domain while enabling memory decoding in the COMMAND register,
> > >>
> > >> but MSI-X are handled differently, e.g. we have MSI-X holes in the mappings.
> > >>
> > >> This is because before this change the whole PCI aperture was mapped into
> > >>
> > >> Domain-0 and it is not. Thus, MSI-X holes are left unmapped now and there
> > >>
> > >> was no need to do so, e.g. they were always mapped into Domain-0 and
> > >>
> > >> emulated for guests.
> > >>
> > >> Please note that one cannot use xl pci-attach in this case to attach the PCI device
> > >>
> > >> in question to Domain-0 as (please see the log) that device is already attached.
> > >>
> > >> Neither it can be detached and re-attached. So, without mapping MSI-X holes for
> > >>
> > >> Domain-0 the device becomes unusable in Domain-0. At the same time the device
> > >>
> > >> can be successfully passed to DomU.
> > >>
> > >>
> > >> Julien, Stefano! Please let me know how can we proceed with this.
> > > What was the plan for MSI-X in Dom0?
> > It just worked because we mapped everything
> > >
> > > Given that Dom0 interacts with a virtual-ITS and virtual-GIC rather than
> > > a physical-ITS and physical-GIC, I imagine that it wasn't correct for
> > > Dom0 to write to the real MSI-X table directly?
> > >
> > > Shouldn't Dom0 get emulated MSI-X tables like any DomU?
> > >
> > > Otherwise, if Dom0 is expected to have the real MSI-X tables mapped, then
> > > I would suggest to map them at the same time as the BARs. But I am
> > > thinking that Dom0 should get emulated MSI-X tables, not physical MSI-X
> > > tables.
> > 
> > Yes, it seems more than reasonable to enable emulation for Domain-0
> > 
> > as well. Other than that, Stefano, do you think we are good to go with
> > 
> > the changes I did in order to unmap everything for Domain-0?
> 
> 
> It might be better to resend the series with the patch in it, because it
> is difficult to review the patch like this. Nonetheless I tried, but I
> might have missed something.
> 
> Previously the whole PCIe bridge aperture was mapped to Dom0, and
> it was done by map_range_to_domain, is that correct?
> 
> Now this patch, to avoid mapping the entire aperture to Dom0, is
> skipping any operations for PCIe devices in map_range_to_domain by
> setting need_mapping = false.
> 
> The idea is that instead, we'll only map things when needed and not the
> whole aperture. However, looking at the changes to
> pci_host_bridge_mappings (formerly known as
> pci_host_bridge_need_p2m_mapping), it is still going through the full
> list of address ranges of the PCIe bridge and map each range one by one
> using map_range_to_domain. Also, pci_host_bridge_mappings is still
> called unconditionally at boot for Dom0.
> 
> So I am missing the part where the aperture is actually *not* mapped to
> Dom0. What's the difference between the loop in
> pci_host_bridge_mappings:
> 
>   for ( i = 0; i < dt_number_of_address(dev); i++ )
>     map_range_to_domain...
> 
> and the previous code in map_range_to_domain? I think I am missing
> something but as mentioned it is difficult to review the patch like this
> out of order.
> 
> Also, and this is minor, even if currently unused, it might be good to
> keep a length parameter to pci_host_bridge_need_p2m_mapping.

It looks like the filtering is done based on:

return cfg->phys_addr != addr

in pci_ecam_need_p2m_mapping that is expected to filter out the address
ranges we don't want to map because it comes from
xen/arch/arm/pci/pci-host-common.c:gen_pci_init:

    /* Parse our PCI ecam register address*/
    err = dt_device_get_address(dev, ecam_reg_idx, &addr, &size);
    if ( err )
        goto err_exit;

In pci_host_bridge_mappings, instead of parsing device tree again, can't
we just fetch the address and length we need to map straight from
bridge->sysdata->phys_addr/size ?

At the point when pci_host_bridge_mappings is called in your new patch,
we have already initialized all the PCIe-related data structures. It
seems a bit useless to have to go via device tree again.
Oleksandr Andrushchenko Sept. 16, 2021, 7:16 a.m. UTC | #20
Hi, Stefano!

On 15.09.21 23:19, Stefano Stabellini wrote:
> On Wed, 15 Sep 2021, Stefano Stabellini wrote:
>> On Wed, 15 Sep 2021, Oleksandr Andrushchenko wrote:
>>> On 15.09.21 03:36, Stefano Stabellini wrote:
>>>> On Tue, 14 Sep 2021, Oleksandr Andrushchenko wrote:
>>>>> With the patch above I have the following log in Domain-0:
>>>>>
>>>>> generic-armv8-xt-dom0 login: root
>>>>> root@generic-armv8-xt-dom0:~# (XEN) *** Serial input to Xen (type 'CTRL-a' three times to switch input)
>>>>> (XEN) ==== PCI devices ====
>>>>> (XEN) ==== segment 0000 ====
>>>>> (XEN) 0000:03:00.0 - d0 - node -1
>>>>> (XEN) 0000:02:02.0 - d0 - node -1
>>>>> (XEN) 0000:02:01.0 - d0 - node -1
>>>>> (XEN) 0000:02:00.0 - d0 - node -1
>>>>> (XEN) 0000:01:00.0 - d0 - node -1
>>>>> (XEN) 0000:00:00.0 - d0 - node -1
>>>>> (XEN) *** Serial input to DOM0 (type 'CTRL-a' three times to switch input)
>>>>>
>>>>> root@generic-armv8-xt-dom0:~# modprobe e1000e
>>>>> [   46.104729] e1000e: Intel(R) PRO/1000 Network Driver
>>>>> [   46.105479] e1000e: Copyright(c) 1999 - 2015 Intel Corporation.
>>>>> [   46.107297] e1000e 0000:03:00.0: enabling device (0000 -> 0002)
>>>>> (XEN) map [e0000, e001f] -> 0xe0000 for d0
>>>>> (XEN) map [e0020, e003f] -> 0xe0020 for d0
>>>>> [   46.178513] e1000e 0000:03:00.0: Interrupt Throttling Rate (ints/sec) set to dynamic conservative mode
>>>>> [   46.189668] pci_msi_setup_msi_irqs
>>>>> [   46.191016] nwl_compose_msi_msg msg at fe440000
>>>>> (XEN) traps.c:2014:d0v0 HSR=0x00000093810047 pc=0xffff8000104b4b00 gva=0xffff800010fa5000 gpa=0x000000e0040000
>>>>> [   46.200455] Unhandled fault at 0xffff800010fa5000
>>>>>
>>>>> [snip]
>>>>>
>>>>> [   46.233079] Call trace:
>>>>> [   46.233559]  __pci_write_msi_msg+0x70/0x180
>>>>> [   46.234149]  pci_msi_domain_write_msg+0x28/0x30
>>>>> [   46.234869]  msi_domain_activate+0x5c/0x88
>>>>>
>>>>>    From the above you can see that BARs are mapped for Domain-0 now
>>>>>
>>>>> only when an assigned PCI device gets enabled in Domain-0.
>>>>>
>>>>> Another thing to note is that we crash on MSI-X access as BARs are mapped
>>>>>
>>>>> to the domain while enabling memory decoding in the COMMAND register,
>>>>>
>>>>> but MSI-X are handled differently, e.g. we have MSI-X holes in the mappings.
>>>>>
>>>>> This is because before this change the whole PCI aperture was mapped into
>>>>>
>>>>> Domain-0 and it is not. Thus, MSI-X holes are left unmapped now and there
>>>>>
>>>>> was no need to do so, e.g. they were always mapped into Domain-0 and
>>>>>
>>>>> emulated for guests.
>>>>>
>>>>> Please note that one cannot use xl pci-attach in this case to attach the PCI device
>>>>>
>>>>> in question to Domain-0 as (please see the log) that device is already attached.
>>>>>
>>>>> Neither it can be detached and re-attached. So, without mapping MSI-X holes for
>>>>>
>>>>> Domain-0 the device becomes unusable in Domain-0. At the same time the device
>>>>>
>>>>> can be successfully passed to DomU.
>>>>>
>>>>>
>>>>> Julien, Stefano! Please let me know how can we proceed with this.
>>>> What was the plan for MSI-X in Dom0?
>>> It just worked because we mapped everything
>>>> Given that Dom0 interacts with a virtual-ITS and virtual-GIC rather than
>>>> a physical-ITS and physical-GIC, I imagine that it wasn't correct for
>>>> Dom0 to write to the real MSI-X table directly?
>>>>
>>>> Shouldn't Dom0 get emulated MSI-X tables like any DomU?
>>>>
>>>> Otherwise, if Dom0 is expected to have the real MSI-X tables mapped, then
>>>> I would suggest to map them at the same time as the BARs. But I am
>>>> thinking that Dom0 should get emulated MSI-X tables, not physical MSI-X
>>>> tables.
>>> Yes, it seems more than reasonable to enable emulation for Domain-0
>>>
>>> as well. Other than that, Stefano, do you think we are good to go with
>>>
>>> the changes I did in order to unmap everything for Domain-0?
>>
>> It might be better to resend the series with the patch in it, because it
>> is difficult to review the patch like this.

This is true. Taking into account Xen release plan I am just trying to

minimize the turn around here. Sorry about this

>>   Nonetheless I tried, but I
>> might have missed something.
Thank you for your time!!
>>
>> Previously the whole PCIe bridge aperture was mapped to Dom0, and
>> it was done by map_range_to_domain, is that correct?

Yes, but not only the aperture: please see below.


>>
>> Now this patch, to avoid mapping the entire aperture to Dom0, is
>> skipping any operations for PCIe devices in map_range_to_domain by
>> setting need_mapping = false.
>>
>> The idea is that instead, we'll only map things when needed and not the
>> whole aperture. However, looking at the changes to
>> pci_host_bridge_mappings (formerly known as
>> pci_host_bridge_need_p2m_mapping), it is still going through the full
>> list of address ranges of the PCIe bridge and map each range one by one
>> using map_range_to_domain. Also, pci_host_bridge_mappings is still
>> called unconditionally at boot for Dom0.
>>
>> So I am missing the part where the aperture is actually *not* mapped to
>> Dom0.
With map_range_to_domain we also mapped all the entries

of the "reg" and "ranges" properties. Let's have a look at [1]:

- ranges         : As described in IEEE Std 1275-1994, but must provide
                    at least a definition of non-prefetchable memory. One
                    or both of prefetchable Memory and IO Space may also
                    be provided.

- reg            : The Configuration Space base address and size, as accessed
                    from the parent bus.  The base address corresponds to
                    the first bus in the "bus-range" property.  If no
                    "bus-range" is specified, this will be bus 0 (the default).

The most interesting comes when "reg" also has other then configuration

space addresses, for example [2]:

- reg: Should contain rc_dbi, config registers location and length.
- reg-names: Must include the following entries:
   "rc_dbi": controller configuration registers;
   "config": PCIe configuration space registers.

So, we don't need to map "ranges" and *config* from the "reg", but all

the rest from "reg" still needs to be mapped to Domain-0, so the PCIe

bridge can remain functional in Domain-0.

>>   What's the difference between the loop in
>> pci_host_bridge_mappings:
>>
>>    for ( i = 0; i < dt_number_of_address(dev); i++ )
>>      map_range_to_domain...
>>
>> and the previous code in map_range_to_domain? I think I am missing
>> something but as mentioned it is difficult to review the patch like this
>> out of order.
>>
>> Also, and this is minor, even if currently unused, it might be good to
>> keep a length parameter to pci_host_bridge_need_p2m_mapping.
> It looks like the filtering is done based on:
>
> return cfg->phys_addr != addr

As I explained above it is *now* the only range that we *do not want* to

be mapped to Domain-0. Other "reg" entries still need to be mapped.

>
> in pci_ecam_need_p2m_mapping that is expected to filter out the address
> ranges we don't want to map because it comes from
> xen/arch/arm/pci/pci-host-common.c:gen_pci_init:
>
>      /* Parse our PCI ecam register address*/
>      err = dt_device_get_address(dev, ecam_reg_idx, &addr, &size);
>      if ( err )
>          goto err_exit;
>
> In pci_host_bridge_mappings, instead of parsing device tree again, can't
> we just fetch the address and length we need to map straight from
> bridge->sysdata->phys_addr/size ?

We can't as the address describes the configuration space which we

*do not* want to be mapped, but what we want is other than configuration

entries in the "reg" property.

>
> At the point when pci_host_bridge_mappings is called in your new patch,
> we have already initialized all the PCIe-related data structures. It
> seems a bit useless to have to go via device tree again.

Bottom line: we do need to go over the "reg" property and map the regions

of which PCIe bridge is dependent and only skip "config" part of it.

Thank you,

Oleksandr


[1] https://www.kernel.org/doc/Documentation/devicetree/bindings/pci/host-generic-pci.txt

[2] https://www.kernel.org/doc/Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
Stefano Stabellini Sept. 16, 2021, 8:22 p.m. UTC | #21
On Thu, 16 Sep 2021, Oleksandr Andrushchenko wrote:
> On 15.09.21 23:19, Stefano Stabellini wrote:
> > On Wed, 15 Sep 2021, Stefano Stabellini wrote:
> >> On Wed, 15 Sep 2021, Oleksandr Andrushchenko wrote:
> >>> On 15.09.21 03:36, Stefano Stabellini wrote:
> >>>> On Tue, 14 Sep 2021, Oleksandr Andrushchenko wrote:
> >>>>> With the patch above I have the following log in Domain-0:
> >>>>>
> >>>>> generic-armv8-xt-dom0 login: root
> >>>>> root@generic-armv8-xt-dom0:~# (XEN) *** Serial input to Xen (type 'CTRL-a' three times to switch input)
> >>>>> (XEN) ==== PCI devices ====
> >>>>> (XEN) ==== segment 0000 ====
> >>>>> (XEN) 0000:03:00.0 - d0 - node -1
> >>>>> (XEN) 0000:02:02.0 - d0 - node -1
> >>>>> (XEN) 0000:02:01.0 - d0 - node -1
> >>>>> (XEN) 0000:02:00.0 - d0 - node -1
> >>>>> (XEN) 0000:01:00.0 - d0 - node -1
> >>>>> (XEN) 0000:00:00.0 - d0 - node -1
> >>>>> (XEN) *** Serial input to DOM0 (type 'CTRL-a' three times to switch input)
> >>>>>
> >>>>> root@generic-armv8-xt-dom0:~# modprobe e1000e
> >>>>> [   46.104729] e1000e: Intel(R) PRO/1000 Network Driver
> >>>>> [   46.105479] e1000e: Copyright(c) 1999 - 2015 Intel Corporation.
> >>>>> [   46.107297] e1000e 0000:03:00.0: enabling device (0000 -> 0002)
> >>>>> (XEN) map [e0000, e001f] -> 0xe0000 for d0
> >>>>> (XEN) map [e0020, e003f] -> 0xe0020 for d0
> >>>>> [   46.178513] e1000e 0000:03:00.0: Interrupt Throttling Rate (ints/sec) set to dynamic conservative mode
> >>>>> [   46.189668] pci_msi_setup_msi_irqs
> >>>>> [   46.191016] nwl_compose_msi_msg msg at fe440000
> >>>>> (XEN) traps.c:2014:d0v0 HSR=0x00000093810047 pc=0xffff8000104b4b00 gva=0xffff800010fa5000 gpa=0x000000e0040000
> >>>>> [   46.200455] Unhandled fault at 0xffff800010fa5000
> >>>>>
> >>>>> [snip]
> >>>>>
> >>>>> [   46.233079] Call trace:
> >>>>> [   46.233559]  __pci_write_msi_msg+0x70/0x180
> >>>>> [   46.234149]  pci_msi_domain_write_msg+0x28/0x30
> >>>>> [   46.234869]  msi_domain_activate+0x5c/0x88
> >>>>>
> >>>>>    From the above you can see that BARs are mapped for Domain-0 now
> >>>>>
> >>>>> only when an assigned PCI device gets enabled in Domain-0.
> >>>>>
> >>>>> Another thing to note is that we crash on MSI-X access as BARs are mapped
> >>>>>
> >>>>> to the domain while enabling memory decoding in the COMMAND register,
> >>>>>
> >>>>> but MSI-X are handled differently, e.g. we have MSI-X holes in the mappings.
> >>>>>
> >>>>> This is because before this change the whole PCI aperture was mapped into
> >>>>>
> >>>>> Domain-0 and it is not. Thus, MSI-X holes are left unmapped now and there
> >>>>>
> >>>>> was no need to do so, e.g. they were always mapped into Domain-0 and
> >>>>>
> >>>>> emulated for guests.
> >>>>>
> >>>>> Please note that one cannot use xl pci-attach in this case to attach the PCI device
> >>>>>
> >>>>> in question to Domain-0 as (please see the log) that device is already attached.
> >>>>>
> >>>>> Neither it can be detached and re-attached. So, without mapping MSI-X holes for
> >>>>>
> >>>>> Domain-0 the device becomes unusable in Domain-0. At the same time the device
> >>>>>
> >>>>> can be successfully passed to DomU.
> >>>>>
> >>>>>
> >>>>> Julien, Stefano! Please let me know how can we proceed with this.
> >>>> What was the plan for MSI-X in Dom0?
> >>> It just worked because we mapped everything
> >>>> Given that Dom0 interacts with a virtual-ITS and virtual-GIC rather than
> >>>> a physical-ITS and physical-GIC, I imagine that it wasn't correct for
> >>>> Dom0 to write to the real MSI-X table directly?
> >>>>
> >>>> Shouldn't Dom0 get emulated MSI-X tables like any DomU?
> >>>>
> >>>> Otherwise, if Dom0 is expected to have the real MSI-X tables mapped, then
> >>>> I would suggest to map them at the same time as the BARs. But I am
> >>>> thinking that Dom0 should get emulated MSI-X tables, not physical MSI-X
> >>>> tables.
> >>> Yes, it seems more than reasonable to enable emulation for Domain-0
> >>>
> >>> as well. Other than that, Stefano, do you think we are good to go with
> >>>
> >>> the changes I did in order to unmap everything for Domain-0?
> >>
> >> It might be better to resend the series with the patch in it, because it
> >> is difficult to review the patch like this.
> 
> This is true. Taking into account Xen release plan I am just trying to
> 
> minimize the turn around here. Sorry about this
> 
> >>   Nonetheless I tried, but I
> >> might have missed something.
> Thank you for your time!!
> >>
> >> Previously the whole PCIe bridge aperture was mapped to Dom0, and
> >> it was done by map_range_to_domain, is that correct?
> 
> Yes, but not only the aperture: please see below.
> 
> 
> >>
> >> Now this patch, to avoid mapping the entire aperture to Dom0, is
> >> skipping any operations for PCIe devices in map_range_to_domain by
> >> setting need_mapping = false.
> >>
> >> The idea is that instead, we'll only map things when needed and not the
> >> whole aperture. However, looking at the changes to
> >> pci_host_bridge_mappings (formerly known as
> >> pci_host_bridge_need_p2m_mapping), it is still going through the full
> >> list of address ranges of the PCIe bridge and map each range one by one
> >> using map_range_to_domain. Also, pci_host_bridge_mappings is still
> >> called unconditionally at boot for Dom0.
> >>
> >> So I am missing the part where the aperture is actually *not* mapped to
> >> Dom0.
> With map_range_to_domain we also mapped all the entries
> 
> of the "reg" and "ranges" properties. Let's have a look at [1]:
> 
> - ranges         : As described in IEEE Std 1275-1994, but must provide
>                     at least a definition of non-prefetchable memory. One
>                     or both of prefetchable Memory and IO Space may also
>                     be provided.
> 
> - reg            : The Configuration Space base address and size, as accessed
>                     from the parent bus.  The base address corresponds to
>                     the first bus in the "bus-range" property.  If no
>                     "bus-range" is specified, this will be bus 0 (the default).
> 
> The most interesting comes when "reg" also has other then configuration
> 
> space addresses, for example [2]:
> 
> - reg: Should contain rc_dbi, config registers location and length.
> - reg-names: Must include the following entries:
>    "rc_dbi": controller configuration registers;
>    "config": PCIe configuration space registers.
> 
> So, we don't need to map "ranges" and *config* from the "reg", but all
> 
> the rest from "reg" still needs to be mapped to Domain-0, so the PCIe
> 
> bridge can remain functional in Domain-0.
> 
> >>   What's the difference between the loop in
> >> pci_host_bridge_mappings:
> >>
> >>    for ( i = 0; i < dt_number_of_address(dev); i++ )
> >>      map_range_to_domain...
> >>
> >> and the previous code in map_range_to_domain? I think I am missing
> >> something but as mentioned it is difficult to review the patch like this
> >> out of order.
> >>
> >> Also, and this is minor, even if currently unused, it might be good to
> >> keep a length parameter to pci_host_bridge_need_p2m_mapping.
> > It looks like the filtering is done based on:
> >
> > return cfg->phys_addr != addr
> 
> As I explained above it is *now* the only range that we *do not want* to
> 
> be mapped to Domain-0. Other "reg" entries still need to be mapped.
> 
> >
> > in pci_ecam_need_p2m_mapping that is expected to filter out the address
> > ranges we don't want to map because it comes from
> > xen/arch/arm/pci/pci-host-common.c:gen_pci_init:
> >
> >      /* Parse our PCI ecam register address*/
> >      err = dt_device_get_address(dev, ecam_reg_idx, &addr, &size);
> >      if ( err )
> >          goto err_exit;
> >
> > In pci_host_bridge_mappings, instead of parsing device tree again, can't
> > we just fetch the address and length we need to map straight from
> > bridge->sysdata->phys_addr/size ?
> 
> We can't as the address describes the configuration space which we
> 
> *do not* want to be mapped, but what we want is other than configuration
> 
> entries in the "reg" property.
> 
> >
> > At the point when pci_host_bridge_mappings is called in your new patch,
> > we have already initialized all the PCIe-related data structures. It
> > seems a bit useless to have to go via device tree again.
> 
> Bottom line: we do need to go over the "reg" property and map the regions
> 
> of which PCIe bridge is dependent and only skip "config" part of it.

OK, thanks for the explanation, please add it to the commit message
and/or as an in-code comment when you resend the patch
diff mbox series

Patch

diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index da427f399711..76f5b513280c 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -1257,6 +1257,9 @@  static int __init map_range_to_domain(const struct dt_device_node *dev,
         }
     }
 
+    if ( need_mapping && (device_get_class(dev) == DEVICE_PCI) )
+        need_mapping = pci_host_bridge_need_p2m_mapping(d, dev, addr, len);
+
     if ( need_mapping )
     {
         res = map_regions_p2mt(d,
diff --git a/xen/arch/arm/pci/ecam.c b/xen/arch/arm/pci/ecam.c
index 92ecb2e0762b..d32efb7fcbd0 100644
--- a/xen/arch/arm/pci/ecam.c
+++ b/xen/arch/arm/pci/ecam.c
@@ -52,6 +52,22 @@  static int pci_ecam_register_mmio_handler(struct domain *d,
     return 0;
 }
 
+static int pci_ecam_need_p2m_mapping(struct domain *d,
+                                     struct pci_host_bridge *bridge,
+                                     uint64_t addr, uint64_t len)
+{
+    struct pci_config_window *cfg = bridge->sysdata;
+
+    if ( !is_hardware_domain(d) )
+        return true;
+
+    /*
+     * We do not want ECAM address space to be mapped in domain's p2m,
+     * so we can trap access to it.
+     */
+    return cfg->phys_addr != addr;
+}
+
 /* ECAM ops */
 const struct pci_ecam_ops pci_generic_ecam_ops = {
     .bus_shift  = 20,
@@ -60,6 +76,7 @@  const struct pci_ecam_ops pci_generic_ecam_ops = {
         .read                   = pci_generic_config_read,
         .write                  = pci_generic_config_write,
         .register_mmio_handler  = pci_ecam_register_mmio_handler,
+        .need_p2m_mapping       = pci_ecam_need_p2m_mapping,
     }
 };
 
diff --git a/xen/arch/arm/pci/pci-host-common.c b/xen/arch/arm/pci/pci-host-common.c
index a89112bfbb7c..c04be636452d 100644
--- a/xen/arch/arm/pci/pci-host-common.c
+++ b/xen/arch/arm/pci/pci-host-common.c
@@ -334,6 +334,28 @@  int pci_host_iterate_bridges(struct domain *d,
     }
     return 0;
 }
+
+bool pci_host_bridge_need_p2m_mapping(struct domain *d,
+                                      const struct dt_device_node *node,
+                                      uint64_t addr, uint64_t len)
+{
+    struct pci_host_bridge *bridge;
+
+    list_for_each_entry( bridge, &pci_host_bridges, node )
+    {
+        if ( bridge->dt_node != node )
+            continue;
+
+        if ( !bridge->ops->need_p2m_mapping )
+            return true;
+
+        return bridge->ops->need_p2m_mapping(d, bridge, addr, len);
+    }
+    printk(XENLOG_ERR "Unable to find PCI bridge for %s segment %d, addr %lx\n",
+           node->full_name, bridge->segment, addr);
+    return true;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/include/asm-arm/pci.h b/xen/include/asm-arm/pci.h
index 2c7c7649e00f..9c28a4bdc4b7 100644
--- a/xen/include/asm-arm/pci.h
+++ b/xen/include/asm-arm/pci.h
@@ -82,6 +82,8 @@  struct pci_ops {
     int (*register_mmio_handler)(struct domain *d,
                                  struct pci_host_bridge *bridge,
                                  const struct mmio_handler_ops *ops);
+    int (*need_p2m_mapping)(struct domain *d, struct pci_host_bridge *bridge,
+                            uint64_t addr, uint64_t len);
 };
 
 /*
@@ -115,9 +117,19 @@  struct dt_device_node *pci_find_host_bridge_node(struct device *dev);
 int pci_host_iterate_bridges(struct domain *d,
                              int (*clb)(struct domain *d,
                                         struct pci_host_bridge *bridge));
+bool pci_host_bridge_need_p2m_mapping(struct domain *d,
+                                      const struct dt_device_node *node,
+                                      uint64_t addr, uint64_t len);
 #else   /*!CONFIG_HAS_PCI*/
 
 struct arch_pci_dev { };
 
+static inline bool
+pci_host_bridge_need_p2m_mapping(struct domain *d,
+                                 const struct dt_device_node *node,
+                                 uint64_t addr, uint64_t len)
+{
+    return true;
+}
 #endif  /*!CONFIG_HAS_PCI*/
 #endif /* __ARM_PCI_H__ */