diff mbox series

[v2,3/4] vpci: use pcidevs locking to protect MMIO handlers

Message ID 20220718211521.664729-4-volodymyr_babchuk@epam.com (mailing list archive)
State New, archived
Headers show
Series vpci: first series in preparation for vpci on ARM | expand

Commit Message

Volodymyr Babchuk July 18, 2022, 9:15 p.m. UTC
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>

vPCI MMIO handlers are accessing pdevs without protecting this access
with pcidevs_{lock|unlock}. This is not a problem as of now as these
are only used by Dom0. But, towards vPCI is used also for guests, we need
to properly protect pdev and pdev->vpci from being removed while still
in use.

For that use pcidevs_read_{un}lock helpers.

This patch adds ASSERTs in the code to check that the rwlock is taken
and in appropriate mode. Some of such checks require changes to the
initialization of local variables which may be accessed before the
ASSERT checks the locking. For example see init_bars and mask_write.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>

---
Since v1:
- move pcidevs_read_{lock|unlock} into patch 1
---
 xen/arch/x86/hvm/vmsi.c   | 24 ++++++++++++++---
 xen/drivers/vpci/header.c | 24 +++++++++++++++--
 xen/drivers/vpci/msi.c    | 21 ++++++++++-----
 xen/drivers/vpci/msix.c   | 55 ++++++++++++++++++++++++++++++++++-----
 xen/drivers/vpci/vpci.c   | 16 +++++++++---
 xen/include/xen/pci.h     |  1 +
 xen/include/xen/vpci.h    |  2 +-
 7 files changed, 121 insertions(+), 22 deletions(-)

Comments

Jan Beulich Aug. 1, 2022, 11:40 a.m. UTC | #1
On 18.07.2022 23:15, Volodymyr Babchuk wrote:
> --- a/xen/arch/x86/hvm/vmsi.c
> +++ b/xen/arch/x86/hvm/vmsi.c
> @@ -891,10 +891,16 @@ void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry)
>      entry->arch.pirq = INVALID_PIRQ;
>  }
>  
> -int vpci_msix_arch_print(const struct vpci_msix *msix)
> +int vpci_msix_arch_print(const struct domain *d, const struct vpci_msix *msix)

I don't think the extra parameter is needed:

> @@ -911,11 +917,23 @@ int vpci_msix_arch_print(const struct vpci_msix *msix)
>          if ( i && !(i % 64) )
>          {
>              struct pci_dev *pdev = msix->pdev;

You get hold of pdev here, and hence you can take the domain from pdev.

> +            pci_sbdf_t sbdf = pdev->sbdf;
>  
>              spin_unlock(&msix->pdev->vpci->lock);
> +            pcidevs_read_unlock();
> +
> +            /* NB: we still hold rcu_read_lock(&domlist_read_lock); here. */
>              process_pending_softirqs();
> -            /* NB: we assume that pdev cannot go away for an alive domain. */

I think this comment wants retaining, as the new one you add is about
a different aspect.

> -            if ( !pdev->vpci || !spin_trylock(&pdev->vpci->lock) )
> +
> +            if ( !pcidevs_read_trylock() )
> +                return -EBUSY;
> +            pdev = pci_get_pdev_by_domain(d, sbdf.seg, sbdf.bus, sbdf.devfn);
> +            /*
> +             * FIXME: we may find a re-allocated pdev's copy here.
> +             * Even occupying the same address as before. Do our best.
> +             */
> +            if ( !pdev || (pdev != msix->pdev) || !pdev->vpci ||

Despite the comment: What guarantees that msix isn't a dangling pointer
at this point? At the very least I think you need to check !pdev->vpci
first. And I'm afraid I don't view "do our best" as good enough here
(considering the patch doesn't carry an RFC tag). And no, I don't have
any good suggestion other than "our PCI device locking needs a complete
overhaul". Quite likely what we need is a refcounter per device, which
- as long as non-zero - prevents removal.

> +                 !spin_trylock(&pdev->vpci->lock) )
>                  return -EBUSY;

Don't you need to drop the pcidevs lock on this error path?

> @@ -450,10 +465,15 @@ static int cf_check init_bars(struct pci_dev *pdev)
>      uint16_t cmd;
>      uint64_t addr, size;
>      unsigned int i, num_bars, rom_reg;
> -    struct vpci_header *header = &pdev->vpci->header;
> -    struct vpci_bar *bars = header->bars;
> +    struct vpci_header *header;
> +    struct vpci_bar *bars;
>      int rc;
>  
> +    ASSERT(pcidevs_write_locked());
> +
> +    header = &pdev->vpci->header;
> +    bars = header->bars;

I'm not convinced the code movement here does us any good. (Same
apparently elsewhere below.)

> @@ -277,6 +282,9 @@ void vpci_dump_msi(void)
>  
>          printk("vPCI MSI/MSI-X d%d\n", d->domain_id);
>  
> +        if ( !pcidevs_read_trylock() )
> +            continue;

Note how this lives ahead of ...

>          for_each_pdev ( d, pdev )
>          {

... the loop, while ...

> @@ -310,7 +318,7 @@ void vpci_dump_msi(void)
>                  printk("  entries: %u maskall: %d enabled: %d\n",
>                         msix->max_entries, msix->masked, msix->enabled);
>  
> -                rc = vpci_msix_arch_print(msix);
> +                rc = vpci_msix_arch_print(d, msix);
>                  if ( rc )
>                  {
>                      /*
> @@ -318,12 +326,13 @@ void vpci_dump_msi(void)
>                       * holding the lock.
>                       */
>                      printk("unable to print all MSI-X entries: %d\n", rc);
> -                    process_pending_softirqs();
> -                    continue;
> +                    goto pdev_done;
>                  }
>              }
>  
>              spin_unlock(&pdev->vpci->lock);
> + pdev_done:
> +            pcidevs_read_unlock();

... this is still inside the loop body.

> @@ -332,10 +334,14 @@ uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int size)
>          return data;
>      }
>  
> +    pcidevs_read_lock();
>      /* Find the PCI dev matching the address. */
>      pdev = pci_get_pdev_by_domain(d, sbdf.seg, sbdf.bus, sbdf.devfn);
> -    if ( !pdev )
> +    if ( !pdev || (pdev && !pdev->vpci) )

Simpler

    if ( !pdev || !pdev->vpci )

?

> @@ -381,6 +387,7 @@ uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int size)
>          ASSERT(data_offset < size);
>      }
>      spin_unlock(&pdev->vpci->lock);
> +    pcidevs_read_unlock();

I guess this is too early and wants to come after ...

>      if ( data_offset < size )
>      {

... this if, which - even if it doesn't use pdev - still accesses the
device.

Both comments equally apply to vpci_write().

> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -161,6 +161,7 @@ void pcidevs_unlock(void);
>  bool __must_check pcidevs_locked(void);
>  
>  void pcidevs_read_lock(void);
> +int pcidevs_read_trylock(void);

This declaration wants adding alongside the introduction of the
function or, if the series was structured that way, at the time of the
dropping of "static" from the function (which from a Misra perspective
would likely be better).

Jan
Volodymyr Babchuk Aug. 9, 2022, 8:33 p.m. UTC | #2
Hello Jan,

Jan Beulich <jbeulich@suse.com> writes:

> On 18.07.2022 23:15, Volodymyr Babchuk wrote:
>> --- a/xen/arch/x86/hvm/vmsi.c
>> +++ b/xen/arch/x86/hvm/vmsi.c
>> @@ -891,10 +891,16 @@ void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry)
>>      entry->arch.pirq = INVALID_PIRQ;
>>  }
>>  
>> -int vpci_msix_arch_print(const struct vpci_msix *msix)
>> +int vpci_msix_arch_print(const struct domain *d, const struct vpci_msix *msix)
>
> I don't think the extra parameter is needed:
>
>> @@ -911,11 +917,23 @@ int vpci_msix_arch_print(const struct vpci_msix *msix)
>>          if ( i && !(i % 64) )
>>          {
>>              struct pci_dev *pdev = msix->pdev;
>
> You get hold of pdev here, and hence you can take the domain from pdev.

Yes, makes sense.

>> +            pci_sbdf_t sbdf = pdev->sbdf;
>>  
>>              spin_unlock(&msix->pdev->vpci->lock);
>> +            pcidevs_read_unlock();
>> +
>> +            /* NB: we still hold rcu_read_lock(&domlist_read_lock); here. */
>>              process_pending_softirqs();
>> -            /* NB: we assume that pdev cannot go away for an alive domain. */
>
> I think this comment wants retaining, as the new one you add is about
> a different aspect.
>
>> -            if ( !pdev->vpci || !spin_trylock(&pdev->vpci->lock) )
>> +
>> +            if ( !pcidevs_read_trylock() )
>> +                return -EBUSY;
>> +            pdev = pci_get_pdev_by_domain(d, sbdf.seg, sbdf.bus, sbdf.devfn);
>> +            /*
>> +             * FIXME: we may find a re-allocated pdev's copy here.
>> +             * Even occupying the same address as before. Do our best.
>> +             */
>> +            if ( !pdev || (pdev != msix->pdev) || !pdev->vpci ||
>
> Despite the comment: What guarantees that msix isn't a dangling pointer
> at this point? At the very least I think you need to check !pdev->vpci
> first. And I'm afraid I don't view "do our best" as good enough here
> (considering the patch doesn't carry an RFC tag). And no, I don't have
> any good suggestion other than "our PCI device locking needs a complete
> overhaul". Quite likely what we need is a refcounter per device, which
> - as long as non-zero - prevents removal.

Refcounter itself is a good idea, but I'm not liking where all this
goes. We already are reworking locking by adding rw-locks with counters,
adding refcounter on top of this will complicate things even further.

I'm starting to think that complete PCI device locking rework may be
simpler solution, actually. By any chance, were there any prior
discussion on how proper locking should look like? 

>
>> +                 !spin_trylock(&pdev->vpci->lock) )
>>                  return -EBUSY;
>
> Don't you need to drop the pcidevs lock on this error path?

Yeah, you are right.

>
>> @@ -450,10 +465,15 @@ static int cf_check init_bars(struct pci_dev *pdev)
>>      uint16_t cmd;
>>      uint64_t addr, size;
>>      unsigned int i, num_bars, rom_reg;
>> -    struct vpci_header *header = &pdev->vpci->header;
>> -    struct vpci_bar *bars = header->bars;
>> +    struct vpci_header *header;
>> +    struct vpci_bar *bars;
>>      int rc;
>>  
>> +    ASSERT(pcidevs_write_locked());
>> +
>> +    header = &pdev->vpci->header;
>> +    bars = header->bars;
>
> I'm not convinced the code movement here does us any good. (Same
> apparently elsewhere below.)
>
>> @@ -277,6 +282,9 @@ void vpci_dump_msi(void)
>>  
>>          printk("vPCI MSI/MSI-X d%d\n", d->domain_id);
>>  
>> +        if ( !pcidevs_read_trylock() )
>> +            continue;
>
> Note how this lives ahead of ...
>
>>          for_each_pdev ( d, pdev )
>>          {
>
> ... the loop, while ...
>
>> @@ -310,7 +318,7 @@ void vpci_dump_msi(void)
>>                  printk("  entries: %u maskall: %d enabled: %d\n",
>>                         msix->max_entries, msix->masked, msix->enabled);
>>  
>> -                rc = vpci_msix_arch_print(msix);
>> +                rc = vpci_msix_arch_print(d, msix);
>>                  if ( rc )
>>                  {
>>                      /*
>> @@ -318,12 +326,13 @@ void vpci_dump_msi(void)
>>                       * holding the lock.
>>                       */
>>                      printk("unable to print all MSI-X entries: %d\n", rc);
>> -                    process_pending_softirqs();
>> -                    continue;
>> +                    goto pdev_done;
>>                  }
>>              }
>>  
>>              spin_unlock(&pdev->vpci->lock);
>> + pdev_done:
>> +            pcidevs_read_unlock();
>
> ... this is still inside the loop body.
>
>> @@ -332,10 +334,14 @@ uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int size)
>>          return data;
>>      }
>>  
>> +    pcidevs_read_lock();
>>      /* Find the PCI dev matching the address. */
>>      pdev = pci_get_pdev_by_domain(d, sbdf.seg, sbdf.bus, sbdf.devfn);
>> -    if ( !pdev )
>> +    if ( !pdev || (pdev && !pdev->vpci) )
>
> Simpler
>
>     if ( !pdev || !pdev->vpci )
>
> ?
>
>> @@ -381,6 +387,7 @@ uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int size)
>>          ASSERT(data_offset < size);
>>      }
>>      spin_unlock(&pdev->vpci->lock);
>> +    pcidevs_read_unlock();
>
> I guess this is too early and wants to come after ...
>
>>      if ( data_offset < size )
>>      {
>
> ... this if, which - even if it doesn't use pdev - still accesses the
> device.
>
> Both comments equally apply to vpci_write().
>
>> --- a/xen/include/xen/pci.h
>> +++ b/xen/include/xen/pci.h
>> @@ -161,6 +161,7 @@ void pcidevs_unlock(void);
>>  bool __must_check pcidevs_locked(void);
>>  
>>  void pcidevs_read_lock(void);
>> +int pcidevs_read_trylock(void);
>
> This declaration wants adding alongside the introduction of the
> function or, if the series was structured that way, at the time of the
> dropping of "static" from the function (which from a Misra perspective
> would likely be better).
>
> Jan
Jan Beulich Aug. 10, 2022, 6:46 a.m. UTC | #3
On 09.08.2022 22:33, Volodymyr Babchuk wrote:
> Jan Beulich <jbeulich@suse.com> writes:
>> On 18.07.2022 23:15, Volodymyr Babchuk wrote:
>>> -            if ( !pdev->vpci || !spin_trylock(&pdev->vpci->lock) )
>>> +
>>> +            if ( !pcidevs_read_trylock() )
>>> +                return -EBUSY;
>>> +            pdev = pci_get_pdev_by_domain(d, sbdf.seg, sbdf.bus, sbdf.devfn);
>>> +            /*
>>> +             * FIXME: we may find a re-allocated pdev's copy here.
>>> +             * Even occupying the same address as before. Do our best.
>>> +             */
>>> +            if ( !pdev || (pdev != msix->pdev) || !pdev->vpci ||
>>
>> Despite the comment: What guarantees that msix isn't a dangling pointer
>> at this point? At the very least I think you need to check !pdev->vpci
>> first. And I'm afraid I don't view "do our best" as good enough here
>> (considering the patch doesn't carry an RFC tag). And no, I don't have
>> any good suggestion other than "our PCI device locking needs a complete
>> overhaul". Quite likely what we need is a refcounter per device, which
>> - as long as non-zero - prevents removal.
> 
> Refcounter itself is a good idea, but I'm not liking where all this
> goes. We already are reworking locking by adding rw-locks with counters,
> adding refcounter on top of this will complicate things even further.

I'm of quite the opposite opinion: A lot of the places will no longer
need to hold the pcidevs lock when instead they hold a reference; the
lock will only be needed to acquire a reference. Therefore refcounting
is likely to simplify things, presumably to the point where at least
recursive locking (and probably also converting to some r/w locking
scheme) won't be necessary. The main complicating factor is that all
places where a reference is needed will have to be located, and (quite
obviously I'm inclined to say) in particular all involved error paths
will need to be covered when it comes to dropping references no longer
needed.

> I'm starting to think that complete PCI device locking rework may be
> simpler solution, actually. By any chance, were there any prior
> discussion on how proper locking should look like? 

Well, there were prior discussions (you'd need to search the list, as
I have no pointers to hand), but I'm not sure a clear picture had
surfaced how "proper locking" would look like. I guess that's part of
the reason why the currently proposed locking model actually makes
things quite a bit more complicated.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index c1ede676d0..3f250f81a4 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -891,10 +891,16 @@  void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry)
     entry->arch.pirq = INVALID_PIRQ;
 }
 
-int vpci_msix_arch_print(const struct vpci_msix *msix)
+int vpci_msix_arch_print(const struct domain *d, const struct vpci_msix *msix)
 {
     unsigned int i;
 
+    /*
+     * FIXME: this is not immediately correct, as the lock can be grabbed
+     * by a different CPU. But this is better then nothing.
+     */
+    ASSERT(pcidevs_read_locked());
+
     for ( i = 0; i < msix->max_entries; i++ )
     {
         const struct vpci_msix_entry *entry = &msix->entries[i];
@@ -911,11 +917,23 @@  int vpci_msix_arch_print(const struct vpci_msix *msix)
         if ( i && !(i % 64) )
         {
             struct pci_dev *pdev = msix->pdev;
+            pci_sbdf_t sbdf = pdev->sbdf;
 
             spin_unlock(&msix->pdev->vpci->lock);
+            pcidevs_read_unlock();
+
+            /* NB: we still hold rcu_read_lock(&domlist_read_lock); here. */
             process_pending_softirqs();
-            /* NB: we assume that pdev cannot go away for an alive domain. */
-            if ( !pdev->vpci || !spin_trylock(&pdev->vpci->lock) )
+
+            if ( !pcidevs_read_trylock() )
+                return -EBUSY;
+            pdev = pci_get_pdev_by_domain(d, sbdf.seg, sbdf.bus, sbdf.devfn);
+            /*
+             * FIXME: we may find a re-allocated pdev's copy here.
+             * Even occupying the same address as before. Do our best.
+             */
+            if ( !pdev || (pdev != msix->pdev) || !pdev->vpci ||
+                 !spin_trylock(&pdev->vpci->lock) )
                 return -EBUSY;
             if ( pdev->vpci->msix != msix )
             {
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index a1c928a0d2..e0461b1139 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -142,16 +142,19 @@  bool vpci_process_pending(struct vcpu *v)
         if ( rc == -ERESTART )
             return true;
 
+        pcidevs_read_lock();
         spin_lock(&v->vpci.pdev->vpci->lock);
         /* Disable memory decoding unconditionally on failure. */
         modify_decoding(v->vpci.pdev,
                         rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY : v->vpci.cmd,
                         !rc && v->vpci.rom_only);
         spin_unlock(&v->vpci.pdev->vpci->lock);
+        pcidevs_read_unlock();
 
         rangeset_destroy(v->vpci.mem);
         v->vpci.mem = NULL;
         if ( rc )
+        {
             /*
              * FIXME: in case of failure remove the device from the domain.
              * Note that there might still be leftover mappings. While this is
@@ -159,7 +162,10 @@  bool vpci_process_pending(struct vcpu *v)
              * killed in order to avoid leaking stale p2m mappings on
              * failure.
              */
+            pcidevs_write_lock();
             vpci_remove_device(v->vpci.pdev);
+            pcidevs_write_unlock();
+        }
     }
 
     return false;
@@ -172,7 +178,16 @@  static int __init apply_map(struct domain *d, const struct pci_dev *pdev,
     int rc;
 
     while ( (rc = rangeset_consume_ranges(mem, map_range, &data)) == -ERESTART )
+    {
+        /*
+         * It's safe to drop and re-acquire the lock in this context
+         * without risking pdev disappearing because devices cannot be
+         * removed until the initial domain has been started.
+         */
+        pcidevs_write_unlock();
         process_pending_softirqs();
+        pcidevs_write_lock();
+    }
     rangeset_destroy(mem);
     if ( !rc )
         modify_decoding(pdev, cmd, false);
@@ -450,10 +465,15 @@  static int cf_check init_bars(struct pci_dev *pdev)
     uint16_t cmd;
     uint64_t addr, size;
     unsigned int i, num_bars, rom_reg;
-    struct vpci_header *header = &pdev->vpci->header;
-    struct vpci_bar *bars = header->bars;
+    struct vpci_header *header;
+    struct vpci_bar *bars;
     int rc;
 
+    ASSERT(pcidevs_write_locked());
+
+    header = &pdev->vpci->header;
+    bars = header->bars;
+
     switch ( pci_conf_read8(pdev->sbdf, PCI_HEADER_TYPE) & 0x7f )
     {
     case PCI_HEADER_TYPE_NORMAL:
diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
index 8f2b59e61a..d864f740cf 100644
--- a/xen/drivers/vpci/msi.c
+++ b/xen/drivers/vpci/msi.c
@@ -184,12 +184,17 @@  static void cf_check mask_write(
 
 static int cf_check init_msi(struct pci_dev *pdev)
 {
-    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
-    unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus, slot, func,
-                                           PCI_CAP_ID_MSI);
+    uint8_t slot, func;
+    unsigned int pos;
     uint16_t control;
     int ret;
 
+    ASSERT(pcidevs_write_locked());
+
+    slot = PCI_SLOT(pdev->devfn);
+    func = PCI_FUNC(pdev->devfn);
+    pos = pci_find_cap_offset(pdev->seg, pdev->bus, slot, func, PCI_CAP_ID_MSI);
+
     if ( !pos )
         return 0;
 
@@ -277,6 +282,9 @@  void vpci_dump_msi(void)
 
         printk("vPCI MSI/MSI-X d%d\n", d->domain_id);
 
+        if ( !pcidevs_read_trylock() )
+            continue;
+
         for_each_pdev ( d, pdev )
         {
             const struct vpci_msi *msi;
@@ -310,7 +318,7 @@  void vpci_dump_msi(void)
                 printk("  entries: %u maskall: %d enabled: %d\n",
                        msix->max_entries, msix->masked, msix->enabled);
 
-                rc = vpci_msix_arch_print(msix);
+                rc = vpci_msix_arch_print(d, msix);
                 if ( rc )
                 {
                     /*
@@ -318,12 +326,13 @@  void vpci_dump_msi(void)
                      * holding the lock.
                      */
                     printk("unable to print all MSI-X entries: %d\n", rc);
-                    process_pending_softirqs();
-                    continue;
+                    goto pdev_done;
                 }
             }
 
             spin_unlock(&pdev->vpci->lock);
+ pdev_done:
+            pcidevs_read_unlock();
             process_pending_softirqs();
         }
     }
diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
index bea0cc7aed..35cc9280f4 100644
--- a/xen/drivers/vpci/msix.c
+++ b/xen/drivers/vpci/msix.c
@@ -144,9 +144,13 @@  static struct vpci_msix *msix_find(const struct domain *d, unsigned long addr)
 
     list_for_each_entry ( msix, &d->arch.hvm.msix_tables, next )
     {
-        const struct vpci_bar *bars = msix->pdev->vpci->header.bars;
+        const struct vpci_bar *bars;
         unsigned int i;
 
+        if ( !msix->pdev->vpci )
+            continue;
+
+        bars = msix->pdev->vpci->header.bars;
         for ( i = 0; i < ARRAY_SIZE(msix->tables); i++ )
             if ( bars[msix->tables[i] & PCI_MSIX_BIRMASK].enabled &&
                  VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, i) )
@@ -158,7 +162,13 @@  static struct vpci_msix *msix_find(const struct domain *d, unsigned long addr)
 
 static int cf_check msix_accept(struct vcpu *v, unsigned long addr)
 {
-    return !!msix_find(v->domain, addr);
+    int rc;
+
+    pcidevs_read_lock();
+    rc = !!msix_find(v->domain, addr);
+    pcidevs_read_unlock();
+
+    return rc;
 }
 
 static bool access_allowed(const struct pci_dev *pdev, unsigned long addr,
@@ -218,17 +228,26 @@  static int cf_check msix_read(
     struct vcpu *v, unsigned long addr, unsigned int len, unsigned long *data)
 {
     const struct domain *d = v->domain;
-    struct vpci_msix *msix = msix_find(d, addr);
+    struct vpci_msix *msix;
     const struct vpci_msix_entry *entry;
     unsigned int offset;
 
     *data = ~0ul;
 
+    pcidevs_read_lock();
+
+    msix = msix_find(d, addr);
     if ( !msix )
+    {
+        pcidevs_read_unlock();
         return X86EMUL_RETRY;
+    }
 
     if ( !access_allowed(msix->pdev, addr, len) )
+    {
+        pcidevs_read_unlock();
         return X86EMUL_OKAY;
+    }
 
     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
     {
@@ -299,6 +318,7 @@  static int cf_check msix_read(
         break;
     }
     spin_unlock(&msix->pdev->vpci->lock);
+    pcidevs_read_unlock();
 
     return X86EMUL_OKAY;
 }
@@ -307,15 +327,24 @@  static int cf_check msix_write(
     struct vcpu *v, unsigned long addr, unsigned int len, unsigned long data)
 {
     const struct domain *d = v->domain;
-    struct vpci_msix *msix = msix_find(d, addr);
+    struct vpci_msix *msix;
     struct vpci_msix_entry *entry;
     unsigned int offset;
 
+    pcidevs_read_lock();
+
+    msix = msix_find(d, addr);
     if ( !msix )
+    {
+        pcidevs_read_unlock();
         return X86EMUL_RETRY;
+    }
 
     if ( !access_allowed(msix->pdev, addr, len) )
+    {
+        pcidevs_read_unlock();
         return X86EMUL_OKAY;
+    }
 
     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
     {
@@ -351,6 +380,7 @@  static int cf_check msix_write(
             break;
         }
 
+        pcidevs_read_unlock();
         return X86EMUL_OKAY;
     }
 
@@ -428,6 +458,7 @@  static int cf_check msix_write(
         break;
     }
     spin_unlock(&msix->pdev->vpci->lock);
+    pcidevs_read_unlock();
 
     return X86EMUL_OKAY;
 }
@@ -440,9 +471,13 @@  static const struct hvm_mmio_ops vpci_msix_table_ops = {
 
 int vpci_make_msix_hole(const struct pci_dev *pdev)
 {
-    struct domain *d = pdev->domain;
+    struct domain *d;
     unsigned int i;
 
+    ASSERT(pcidevs_read_locked());
+
+    d = pdev->domain;
+
     if ( !pdev->vpci->msix )
         return 0;
 
@@ -487,13 +522,19 @@  int vpci_make_msix_hole(const struct pci_dev *pdev)
 
 static int cf_check init_msix(struct pci_dev *pdev)
 {
-    struct domain *d = pdev->domain;
-    uint8_t slot = PCI_SLOT(pdev->devfn), func = PCI_FUNC(pdev->devfn);
+    struct domain *d;
+    uint8_t slot, func;
     unsigned int msix_offset, i, max_entries;
     uint16_t control;
     struct vpci_msix *msix;
     int rc;
 
+    ASSERT(pcidevs_write_locked());
+
+    d = pdev->domain;
+    slot = PCI_SLOT(pdev->devfn);
+    func = PCI_FUNC(pdev->devfn);
+
     msix_offset = pci_find_cap_offset(pdev->seg, pdev->bus, slot, func,
                                       PCI_CAP_ID_MSIX);
     if ( !msix_offset )
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index c7a40a2f41..1559763479 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -37,7 +37,9 @@  extern vpci_register_init_t *const __end_vpci_array[];
 
 void vpci_remove_device(struct pci_dev *pdev)
 {
-    if ( !has_vpci(pdev->domain) )
+    ASSERT(pcidevs_write_locked());
+
+    if ( !has_vpci(pdev->domain) || !pdev->vpci )
         return;
 
     spin_lock(&pdev->vpci->lock);
@@ -332,10 +334,14 @@  uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int size)
         return data;
     }
 
+    pcidevs_read_lock();
     /* Find the PCI dev matching the address. */
     pdev = pci_get_pdev_by_domain(d, sbdf.seg, sbdf.bus, sbdf.devfn);
-    if ( !pdev )
+    if ( !pdev || (pdev && !pdev->vpci) )
+    {
+        pcidevs_read_unlock();
         return vpci_read_hw(sbdf, reg, size);
+    }
 
     spin_lock(&pdev->vpci->lock);
 
@@ -381,6 +387,7 @@  uint32_t vpci_read(pci_sbdf_t sbdf, unsigned int reg, unsigned int size)
         ASSERT(data_offset < size);
     }
     spin_unlock(&pdev->vpci->lock);
+    pcidevs_read_unlock();
 
     if ( data_offset < size )
     {
@@ -443,9 +450,11 @@  void vpci_write(pci_sbdf_t sbdf, unsigned int reg, unsigned int size,
      * Find the PCI dev matching the address.
      * Passthrough everything that's not trapped.
      */
+    pcidevs_read_lock();
     pdev = pci_get_pdev_by_domain(d, sbdf.seg, sbdf.bus, sbdf.devfn);
-    if ( !pdev )
+    if ( !pdev || (pdev && !pdev->vpci) )
     {
+        pcidevs_read_unlock();
         vpci_write_hw(sbdf, reg, size, data);
         return;
     }
@@ -486,6 +495,7 @@  void vpci_write(pci_sbdf_t sbdf, unsigned int reg, unsigned int size,
         ASSERT(data_offset < size);
     }
     spin_unlock(&pdev->vpci->lock);
+    pcidevs_read_unlock();
 
     if ( data_offset < size )
         /* Tailing gap, write the remaining. */
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 052b2ddf9f..c974ebdc94 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -161,6 +161,7 @@  void pcidevs_unlock(void);
 bool __must_check pcidevs_locked(void);
 
 void pcidevs_read_lock(void);
+int pcidevs_read_trylock(void);
 void pcidevs_read_unlock(void);
 bool __must_check pcidevs_read_locked(void);
 
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index 67c9a0c631..7ab39839ff 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -175,7 +175,7 @@  int __must_check vpci_msix_arch_enable_entry(struct vpci_msix_entry *entry,
 int __must_check vpci_msix_arch_disable_entry(struct vpci_msix_entry *entry,
                                               const struct pci_dev *pdev);
 void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry);
-int vpci_msix_arch_print(const struct vpci_msix *msix);
+int vpci_msix_arch_print(const struct domain *d, const struct vpci_msix *msix);
 
 /*
  * Helper functions to fetch MSIX related data. They are used by both the