diff mbox series

[v2,1/4] pci: add rwlock to pcidevs_lock machinery

Message ID 20220718211521.664729-2-volodymyr_babchuk@epam.com (mailing list archive)
State New, archived
Headers show
Series vpci: first series in preparation for vpci on ARM | expand

Commit Message

Volodymyr Babchuk July 18, 2022, 9:15 p.m. UTC
From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>

Currently pcidevs lock is a global recursive spinlock which is fine for
the existing use cases. It is used to both protect pdev instances
themselves from being removed while in use and to make sure the update
of the relevant pdev properties is synchronized.

Moving towards vPCI is used for guests this becomes problematic in terms
of lock contention. For example, during vpci_{read|write} the access to
pdev must be protected to prevent pdev disappearing under our feet.
This needs to be done with the help of pcidevs_{lock|unlock}.
On the other hand it is highly undesirable to lock all other pdev accesses
which only use pdevs in read mode, e.g. those which do not remove or
add pdevs.

For the above reasons introduce a read/write lock which will help
preventing locking contentions between pdev readers and writers. This
read/write lock replaces current recursive spinlock.

By default all existing code uses pcidevs_lock() which takes write
lock. This ensures that all existing locking logic stays the same.

To justify this change, replace locks in (V)MSI code with read
locks. This code is a perfect target, as (V)MSI code only requires
that pdevs will not disappear during handling, while (V)MSI state
is protected by own locks.

This is in preparation for vPCI to be used for guests.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>

---
Since v1:
- user per CPU variable to track recursive readers and writers
- use read locks in vmsi code
---
 xen/arch/x86/hvm/vmsi.c       | 22 ++++++------
 xen/arch/x86/irq.c            |  8 ++---
 xen/arch/x86/msi.c            | 16 ++++-----
 xen/drivers/passthrough/pci.c | 65 +++++++++++++++++++++++++++++++----
 xen/include/xen/pci.h         | 10 +++++-
 5 files changed, 91 insertions(+), 30 deletions(-)

Comments

Wei Chen July 19, 2022, 6:20 a.m. UTC | #1
Hi Volodymyr,

On 2022/7/19 5:15, Volodymyr Babchuk wrote:
> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>

>   
>       if ( !use_msi )
>           return -EOPNOTSUPP;
> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
> index 938821e593..f93922acc8 100644
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -50,21 +50,74 @@ struct pci_seg {
>       } bus2bridge[MAX_BUSES];
>   };
>   
> -static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;
> +static DEFINE_RWLOCK(_pcidevs_rwlock);
> +static DEFINE_PER_CPU(unsigned int, pcidevs_read_cnt);
> +static DEFINE_PER_CPU(unsigned int, pcidevs_write_cnt);
>   
>   void pcidevs_lock(void)
>   {
> -    spin_lock_recursive(&_pcidevs_lock);
> +    pcidevs_write_lock();
>   }
>   
>   void pcidevs_unlock(void)
>   {
> -    spin_unlock_recursive(&_pcidevs_lock);
> +    pcidevs_write_unlock();
>   }
>   
> -bool_t pcidevs_locked(void)
> +bool pcidevs_locked(void)
>   {
> -    return !!spin_is_locked(&_pcidevs_lock);
> +    return pcidevs_write_locked();
> +}
> +
> +void pcidevs_read_lock(void)
> +{
> +    if ( this_cpu(pcidevs_read_cnt)++ == 0 )
> +        read_lock(&_pcidevs_rwlock);
> +}
> +

For my understanding, if pcidevs_read_cnt > 0, pcidevs_read_lock
will be unblocked.I am not sure if this behavior is consistent with
the original lock? According to my understanding, the original spinlock 
should be blocked all the time, if the lock is not acquired. Maybe
I have misunderstanding something, I am not very familiar with PCI
subsystem.

Cheers,
Wei Chen

> +int pcidevs_read_trylock(void)
> +{
> +    int ret = 1;
> +
> +    if ( this_cpu(pcidevs_read_cnt) == 0 )
> +        ret = read_trylock(&_pcidevs_rwlock);
> +    if ( ret )
> +        this_cpu(pcidevs_read_cnt)++;
> +
> +    return ret;
> +}
> +
> +void pcidevs_read_unlock(void)
> +{
> +    ASSERT(this_cpu(pcidevs_read_cnt));
> +
> +    if ( --this_cpu(pcidevs_read_cnt) == 0 )
> +        read_unlock(&_pcidevs_rwlock);
> +}
> +
> +bool pcidevs_read_locked(void)
> +{
> +    /* Write lock implies read lock */
> +    return this_cpu(pcidevs_read_cnt) || this_cpu(pcidevs_write_cnt);
> +}
> +
> +void pcidevs_write_lock(void)
> +{
> +    if ( this_cpu(pcidevs_write_cnt)++ == 0 )
> +        write_lock(&_pcidevs_rwlock);
> +}
> +
> +void pcidevs_write_unlock(void)
> +{
> +    ASSERT(this_cpu(pcidevs_write_cnt));
> +
> +    if ( --this_cpu(pcidevs_write_cnt) == 0 )
> +        write_unlock(&_pcidevs_rwlock);
> +}
> +
> +bool pcidevs_write_locked(void)
> +{
> +    return rw_is_write_locked(&_pcidevs_rwlock);
>   }
>   
>   static struct radix_tree_root pci_segments;
> @@ -581,7 +634,7 @@ struct pci_dev *pci_get_pdev(int seg, int bus, int devfn)
>       struct pci_seg *pseg = get_pseg(seg);
>       struct pci_dev *pdev = NULL;
>   
> -    ASSERT(pcidevs_locked());
> +    ASSERT(pcidevs_read_locked());
>       ASSERT(seg != -1 || bus == -1);
>       ASSERT(bus != -1 || devfn == -1);
>   
> diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
> index f34368643c..052b2ddf9f 100644
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -158,7 +158,15 @@ struct pci_dev {
>   
>   void pcidevs_lock(void);
>   void pcidevs_unlock(void);
> -bool_t __must_check pcidevs_locked(void);
> +bool __must_check pcidevs_locked(void);
> +
> +void pcidevs_read_lock(void);
> +void pcidevs_read_unlock(void);
> +bool __must_check pcidevs_read_locked(void);
> +
> +void pcidevs_write_lock(void);
> +void pcidevs_write_unlock(void);
> +bool __must_check pcidevs_write_locked(void);
>   
>   bool_t pci_known_segment(u16 seg);
>   bool_t pci_device_detect(u16 seg, u8 bus, u8 dev, u8 func);
Volodymyr Babchuk July 20, 2022, 6:43 p.m. UTC | #2
Hello Wei,

Wei Chen <Wei.Chen@arm.com> writes:

> Hi Volodymyr,
>
> On 2022/7/19 5:15, Volodymyr Babchuk wrote:
>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
>
>>         if ( !use_msi )
>>           return -EOPNOTSUPP;
>> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
>> index 938821e593..f93922acc8 100644
>> --- a/xen/drivers/passthrough/pci.c
>> +++ b/xen/drivers/passthrough/pci.c
>> @@ -50,21 +50,74 @@ struct pci_seg {
>>       } bus2bridge[MAX_BUSES];
>>   };
>>   -static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;
>> +static DEFINE_RWLOCK(_pcidevs_rwlock);
>> +static DEFINE_PER_CPU(unsigned int, pcidevs_read_cnt);
>> +static DEFINE_PER_CPU(unsigned int, pcidevs_write_cnt);
>>     void pcidevs_lock(void)
>>   {
>> -    spin_lock_recursive(&_pcidevs_lock);
>> +    pcidevs_write_lock();
>>   }
>>     void pcidevs_unlock(void)
>>   {
>> -    spin_unlock_recursive(&_pcidevs_lock);
>> +    pcidevs_write_unlock();
>>   }
>>   -bool_t pcidevs_locked(void)
>> +bool pcidevs_locked(void)
>>   {
>> -    return !!spin_is_locked(&_pcidevs_lock);
>> +    return pcidevs_write_locked();
>> +}
>> +
>> +void pcidevs_read_lock(void)
>> +{
>> +    if ( this_cpu(pcidevs_read_cnt)++ == 0 )
>> +        read_lock(&_pcidevs_rwlock);
>> +}
>> +
>
> For my understanding, if pcidevs_read_cnt > 0, pcidevs_read_lock
> will be unblocked.I am not sure if this behavior is consistent with
> the original lock? According to my understanding, the original
> spinlock should be blocked all the time, if the lock is not
> acquired. Maybe

Original spinlock was recursive one. As read-write locks are
non-recursive in Xen, we need to implement some other mechanism to
support recursion. This code ensures that pCPU will not dead-lock itself
if it'll call pcidevs_read_lock() twice. Per-CPU counter ensures that
read_unlock() will be called only when pcidevs_read_unlock() calls is
balanced with pcidevs_read_lock()s.

> I have misunderstanding something, I am not very familiar with PCI
> subsystem.
>
> Cheers,
> Wei Chen

[...]
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index d4a8c953e2..c1ede676d0 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -466,7 +466,7 @@  int msixtbl_pt_register(struct domain *d, struct pirq *pirq, uint64_t gtable)
     struct msixtbl_entry *entry, *new_entry;
     int r = -EINVAL;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
     ASSERT(spin_is_locked(&d->event_lock));
 
     if ( !msixtbl_initialised(d) )
@@ -536,7 +536,7 @@  void msixtbl_pt_unregister(struct domain *d, struct pirq *pirq)
     struct pci_dev *pdev;
     struct msixtbl_entry *entry;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
     ASSERT(spin_is_locked(&d->event_lock));
 
     if ( !msixtbl_initialised(d) )
@@ -682,7 +682,7 @@  static int vpci_msi_update(const struct pci_dev *pdev, uint32_t data,
 {
     unsigned int i;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
 
     if ( (address & MSI_ADDR_BASE_MASK) != MSI_ADDR_HEADER )
     {
@@ -724,7 +724,7 @@  void vpci_msi_arch_update(struct vpci_msi *msi, const struct pci_dev *pdev)
 
     ASSERT(msi->arch.pirq != INVALID_PIRQ);
 
-    pcidevs_lock();
+    pcidevs_read_lock();
     for ( i = 0; i < msi->vectors && msi->arch.bound; i++ )
     {
         struct xen_domctl_bind_pt_irq unbind = {
@@ -743,7 +743,7 @@  void vpci_msi_arch_update(struct vpci_msi *msi, const struct pci_dev *pdev)
 
     msi->arch.bound = !vpci_msi_update(pdev, msi->data, msi->address,
                                        msi->vectors, msi->arch.pirq, msi->mask);
-    pcidevs_unlock();
+    pcidevs_read_unlock();
 }
 
 static int vpci_msi_enable(const struct pci_dev *pdev, unsigned int nr,
@@ -783,10 +783,10 @@  int vpci_msi_arch_enable(struct vpci_msi *msi, const struct pci_dev *pdev,
         return rc;
     msi->arch.pirq = rc;
 
-    pcidevs_lock();
+    pcidevs_read_lock();
     msi->arch.bound = !vpci_msi_update(pdev, msi->data, msi->address, vectors,
                                        msi->arch.pirq, msi->mask);
-    pcidevs_unlock();
+    pcidevs_read_unlock();
 
     return 0;
 }
@@ -798,7 +798,7 @@  static void vpci_msi_disable(const struct pci_dev *pdev, int pirq,
 
     ASSERT(pirq != INVALID_PIRQ);
 
-    pcidevs_lock();
+    pcidevs_read_lock();
     for ( i = 0; i < nr && bound; i++ )
     {
         struct xen_domctl_bind_pt_irq bind = {
@@ -814,7 +814,7 @@  static void vpci_msi_disable(const struct pci_dev *pdev, int pirq,
     spin_lock(&pdev->domain->event_lock);
     unmap_domain_pirq(pdev->domain, pirq);
     spin_unlock(&pdev->domain->event_lock);
-    pcidevs_unlock();
+    pcidevs_read_unlock();
 }
 
 void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev)
@@ -861,7 +861,7 @@  int vpci_msix_arch_enable_entry(struct vpci_msix_entry *entry,
 
     entry->arch.pirq = rc;
 
-    pcidevs_lock();
+    pcidevs_read_lock();
     rc = vpci_msi_update(pdev, entry->data, entry->addr, 1, entry->arch.pirq,
                          entry->masked);
     if ( rc )
@@ -869,7 +869,7 @@  int vpci_msix_arch_enable_entry(struct vpci_msix_entry *entry,
         vpci_msi_disable(pdev, entry->arch.pirq, 1, false);
         entry->arch.pirq = INVALID_PIRQ;
     }
-    pcidevs_unlock();
+    pcidevs_read_unlock();
 
     return rc;
 }
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
index de30ee7779..7b4832ffb1 100644
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -2156,7 +2156,7 @@  int map_domain_pirq(
         struct pci_dev *pdev;
         unsigned int nr = 0;
 
-        ASSERT(pcidevs_locked());
+        ASSERT(pcidevs_read_locked());
 
         ret = -ENODEV;
         if ( !cpu_has_apic )
@@ -2313,7 +2313,7 @@  int unmap_domain_pirq(struct domain *d, int pirq)
     if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
         return -EINVAL;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
     ASSERT(spin_is_locked(&d->event_lock));
 
     info = pirq_info(d, pirq);
@@ -2907,7 +2907,7 @@  int allocate_and_map_msi_pirq(struct domain *d, int index, int *pirq_p,
 
     msi->irq = irq;
 
-    pcidevs_lock();
+    pcidevs_read_lock();
     /* Verify or get pirq. */
     spin_lock(&d->event_lock);
     pirq = allocate_pirq(d, index, *pirq_p, irq, type, &msi->entry_nr);
@@ -2923,7 +2923,7 @@  int allocate_and_map_msi_pirq(struct domain *d, int index, int *pirq_p,
 
  done:
     spin_unlock(&d->event_lock);
-    pcidevs_unlock();
+    pcidevs_read_unlock();
     if ( ret )
     {
         switch ( type )
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index 6be81e6c3b..6ce7b5523a 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -613,7 +613,7 @@  static int msi_capability_init(struct pci_dev *dev,
     u8 slot = PCI_SLOT(dev->devfn);
     u8 func = PCI_FUNC(dev->devfn);
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
     if ( !pos )
         return -ENODEV;
@@ -782,7 +782,7 @@  static int msix_capability_init(struct pci_dev *dev,
     if ( !pos )
         return -ENODEV;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
 
     control = pci_conf_read16(dev->sbdf, msix_control_reg(pos));
     /*
@@ -999,7 +999,7 @@  static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
     struct pci_dev *pdev;
     struct msi_desc *old_desc;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
     pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
     if ( !pdev )
         return -ENODEV;
@@ -1054,7 +1054,7 @@  static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc)
     struct pci_dev *pdev;
     struct msi_desc *old_desc;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
     pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
     if ( !pdev || !pdev->msix )
         return -ENODEV;
@@ -1145,7 +1145,7 @@  int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool off)
     if ( !use_msi )
         return 0;
 
-    pcidevs_lock();
+    pcidevs_read_lock();
     pdev = pci_get_pdev(seg, bus, devfn);
     if ( !pdev )
         rc = -ENODEV;
@@ -1158,7 +1158,7 @@  int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool off)
     }
     else
         rc = msix_capability_init(pdev, NULL, NULL);
-    pcidevs_unlock();
+    pcidevs_read_unlock();
 
     return rc;
 }
@@ -1169,7 +1169,7 @@  int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool off)
  */
 int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
 {
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
 
     if ( !use_msi )
         return -EPERM;
@@ -1305,7 +1305,7 @@  int pci_restore_msi_state(struct pci_dev *pdev)
     unsigned int type = 0, pos = 0;
     u16 control = 0;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
 
     if ( !use_msi )
         return -EOPNOTSUPP;
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 938821e593..f93922acc8 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -50,21 +50,74 @@  struct pci_seg {
     } bus2bridge[MAX_BUSES];
 };
 
-static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(_pcidevs_rwlock);
+static DEFINE_PER_CPU(unsigned int, pcidevs_read_cnt);
+static DEFINE_PER_CPU(unsigned int, pcidevs_write_cnt);
 
 void pcidevs_lock(void)
 {
-    spin_lock_recursive(&_pcidevs_lock);
+    pcidevs_write_lock();
 }
 
 void pcidevs_unlock(void)
 {
-    spin_unlock_recursive(&_pcidevs_lock);
+    pcidevs_write_unlock();
 }
 
-bool_t pcidevs_locked(void)
+bool pcidevs_locked(void)
 {
-    return !!spin_is_locked(&_pcidevs_lock);
+    return pcidevs_write_locked();
+}
+
+void pcidevs_read_lock(void)
+{
+    if ( this_cpu(pcidevs_read_cnt)++ == 0 )
+        read_lock(&_pcidevs_rwlock);
+}
+
+int pcidevs_read_trylock(void)
+{
+    int ret = 1;
+
+    if ( this_cpu(pcidevs_read_cnt) == 0 )
+        ret = read_trylock(&_pcidevs_rwlock);
+    if ( ret )
+        this_cpu(pcidevs_read_cnt)++;
+
+    return ret;
+}
+
+void pcidevs_read_unlock(void)
+{
+    ASSERT(this_cpu(pcidevs_read_cnt));
+
+    if ( --this_cpu(pcidevs_read_cnt) == 0 )
+        read_unlock(&_pcidevs_rwlock);
+}
+
+bool pcidevs_read_locked(void)
+{
+    /* Write lock implies read lock */
+    return this_cpu(pcidevs_read_cnt) || this_cpu(pcidevs_write_cnt);
+}
+
+void pcidevs_write_lock(void)
+{
+    if ( this_cpu(pcidevs_write_cnt)++ == 0 )
+        write_lock(&_pcidevs_rwlock);
+}
+
+void pcidevs_write_unlock(void)
+{
+    ASSERT(this_cpu(pcidevs_write_cnt));
+
+    if ( --this_cpu(pcidevs_write_cnt) == 0 )
+        write_unlock(&_pcidevs_rwlock);
+}
+
+bool pcidevs_write_locked(void)
+{
+    return rw_is_write_locked(&_pcidevs_rwlock);
 }
 
 static struct radix_tree_root pci_segments;
@@ -581,7 +634,7 @@  struct pci_dev *pci_get_pdev(int seg, int bus, int devfn)
     struct pci_seg *pseg = get_pseg(seg);
     struct pci_dev *pdev = NULL;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
     ASSERT(seg != -1 || bus == -1);
     ASSERT(bus != -1 || devfn == -1);
 
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index f34368643c..052b2ddf9f 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -158,7 +158,15 @@  struct pci_dev {
 
 void pcidevs_lock(void);
 void pcidevs_unlock(void);
-bool_t __must_check pcidevs_locked(void);
+bool __must_check pcidevs_locked(void);
+
+void pcidevs_read_lock(void);
+void pcidevs_read_unlock(void);
+bool __must_check pcidevs_read_locked(void);
+
+void pcidevs_write_lock(void);
+void pcidevs_write_unlock(void);
+bool __must_check pcidevs_write_locked(void);
 
 bool_t pci_known_segment(u16 seg);
 bool_t pci_device_detect(u16 seg, u8 bus, u8 dev, u8 func);