diff mbox series

[RFC,03/10] xen: pci: introduce ats_list_lock

Message ID 20220831141040.13231-4-volodymyr_babchuk@epam.com (mailing list archive)
State New, archived
Headers show
Series Rework PCI locking | expand

Commit Message

Volodymyr Babchuk Aug. 31, 2022, 2:10 p.m. UTC
ATS subsystem has own list of PCI devices. As we are going to remove
global pcidevs_lock() in favor to more granular locking, we need to
ensure that this list is protected somehow. To do this, we need to add
additional lock for each IOMMU, as list to be protected is also part
of IOMMU.

Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
---
 xen/drivers/passthrough/amd/iommu.h         |  1 +
 xen/drivers/passthrough/amd/iommu_detect.c  |  1 +
 xen/drivers/passthrough/amd/pci_amd_iommu.c |  8 ++++++++
 xen/drivers/passthrough/pci.c               |  1 +
 xen/drivers/passthrough/vtd/iommu.c         | 11 +++++++++++
 xen/drivers/passthrough/vtd/iommu.h         |  1 +
 xen/drivers/passthrough/vtd/qinval.c        |  3 +++
 xen/drivers/passthrough/vtd/x86/ats.c       |  3 +++
 8 files changed, 29 insertions(+)

Comments

Stefano Stabellini Jan. 26, 2023, 11:56 p.m. UTC | #1
On Wed, 31 Aug 2022, Volodymyr Babchuk wrote:
> ATS subsystem has own list of PCI devices. As we are going to remove
> global pcidevs_lock() in favor to more granular locking, we need to
> ensure that this list is protected somehow. To do this, we need to add
> additional lock for each IOMMU, as list to be protected is also part
> of IOMMU.
> 
> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
> ---
>  xen/drivers/passthrough/amd/iommu.h         |  1 +
>  xen/drivers/passthrough/amd/iommu_detect.c  |  1 +
>  xen/drivers/passthrough/amd/pci_amd_iommu.c |  8 ++++++++
>  xen/drivers/passthrough/pci.c               |  1 +
>  xen/drivers/passthrough/vtd/iommu.c         | 11 +++++++++++
>  xen/drivers/passthrough/vtd/iommu.h         |  1 +
>  xen/drivers/passthrough/vtd/qinval.c        |  3 +++
>  xen/drivers/passthrough/vtd/x86/ats.c       |  3 +++
>  8 files changed, 29 insertions(+)
> 
> diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h
> index 8bc3c35b1b..edd6eb52b3 100644
> --- a/xen/drivers/passthrough/amd/iommu.h
> +++ b/xen/drivers/passthrough/amd/iommu.h
> @@ -106,6 +106,7 @@ struct amd_iommu {
>      int enabled;
>  
>      struct list_head ats_devices;
> +    spinlock_t ats_list_lock;
>  };
>  
>  struct ivrs_unity_map {
> diff --git a/xen/drivers/passthrough/amd/iommu_detect.c b/xen/drivers/passthrough/amd/iommu_detect.c
> index 2317fa6a7d..1d6f4f2168 100644
> --- a/xen/drivers/passthrough/amd/iommu_detect.c
> +++ b/xen/drivers/passthrough/amd/iommu_detect.c
> @@ -160,6 +160,7 @@ int __init amd_iommu_detect_one_acpi(
>      }
>  
>      spin_lock_init(&iommu->lock);
> +    spin_lock_init(&iommu->ats_list_lock);
>      INIT_LIST_HEAD(&iommu->ats_devices);
>  
>      iommu->seg = ivhd_block->pci_segment_group;
> diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> index 64c016491d..955f3af57a 100644
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> @@ -276,7 +276,11 @@ static int __must_check amd_iommu_setup_domain_device(
>           !pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
>      {
>          if ( devfn == pdev->devfn )
> +	{
> +	    spin_lock(&iommu->ats_list_lock);
>              enable_ats_device(pdev, &iommu->ats_devices);
> +	    spin_unlock(&iommu->ats_list_lock);

code style


> +	}
>  
>          amd_iommu_flush_iotlb(devfn, pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
>      }
> @@ -416,7 +420,11 @@ static void amd_iommu_disable_domain_device(const struct domain *domain,
>  
>      if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
>           pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
> +    {
> +	spin_lock(&iommu->ats_list_lock);
>          disable_ats_device(pdev);
> +	spin_unlock(&iommu->ats_list_lock);

code style


> +    }
>  
>      BUG_ON ( iommu->dev_table.buffer == NULL );
>      req_id = get_dma_requestor_id(iommu->seg, PCI_BDF(bus, devfn));
> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
> index 2dfa1c2875..b5db5498a1 100644
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -1641,6 +1641,7 @@ void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev)
>  {
>      pcidevs_lock();
>  
> +    /* iommu->ats_list_lock is taken by the caller of this function */

This is a locking inversion. In all other places we take pcidevs_lock
first, then ats_list_lock lock. For instance look at
xen/drivers/passthrough/pci.c:deassign_device that is called with
pcidevs_locked and then calls iommu_call(... reassign_device ...) which
ends up taking ats_list_lock.

This is the only exception. I think we need to move the
spin_lock(ats_list_lock) from qinval.c to here.



>      disable_ats_device(pdev);
>  
>      ASSERT(pdev->domain);
> diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
> index fff1442265..42661f22f4 100644
> --- a/xen/drivers/passthrough/vtd/iommu.c
> +++ b/xen/drivers/passthrough/vtd/iommu.c
> @@ -1281,6 +1281,7 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
>      spin_lock_init(&iommu->lock);
>      spin_lock_init(&iommu->register_lock);
>      spin_lock_init(&iommu->intremap.lock);
> +    spin_lock_init(&iommu->ats_list_lock);
>  
>      iommu->drhd = drhd;
>      drhd->iommu = iommu;
> @@ -1769,7 +1770,11 @@ static int domain_context_mapping(struct domain *domain, u8 devfn,
>          if ( ret > 0 )
>              ret = 0;
>          if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
> +        {
> +            spin_lock(&drhd->iommu->ats_list_lock);
>              enable_ats_device(pdev, &drhd->iommu->ats_devices);
> +            spin_unlock(&drhd->iommu->ats_list_lock);
> +        }
>  
>          break;
>  
> @@ -1977,7 +1982,11 @@ static const struct acpi_drhd_unit *domain_context_unmap(
>                     domain, &PCI_SBDF(seg, bus, devfn));
>          ret = domain_context_unmap_one(domain, iommu, bus, devfn);
>          if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
> +        {
> +            spin_lock(&iommu->ats_list_lock);
>              disable_ats_device(pdev);
> +            spin_unlock(&iommu->ats_list_lock);
> +        }
>  
>          break;
>  
> @@ -2374,7 +2383,9 @@ static int cf_check intel_iommu_enable_device(struct pci_dev *pdev)
>      if ( ret <= 0 )
>          return ret;
>  
> +    spin_lock(&drhd->iommu->ats_list_lock);
>      ret = enable_ats_device(pdev, &drhd->iommu->ats_devices);
> +    spin_unlock(&drhd->iommu->ats_list_lock);
>  
>      return ret >= 0 ? 0 : ret;
>  }
> diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h
> index 78aa8a96f5..2a7a4c1b58 100644
> --- a/xen/drivers/passthrough/vtd/iommu.h
> +++ b/xen/drivers/passthrough/vtd/iommu.h
> @@ -506,6 +506,7 @@ struct vtd_iommu {
>      } flush;
>  
>      struct list_head ats_devices;
> +    spinlock_t ats_list_lock;
>      unsigned long *pseudo_domid_map; /* "pseudo" domain id bitmap */
>      unsigned long *domid_bitmap;  /* domain id bitmap */
>      domid_t *domid_map;           /* domain id mapping array */
> diff --git a/xen/drivers/passthrough/vtd/qinval.c b/xen/drivers/passthrough/vtd/qinval.c
> index 4f9ad136b9..6e876348db 100644
> --- a/xen/drivers/passthrough/vtd/qinval.c
> +++ b/xen/drivers/passthrough/vtd/qinval.c
> @@ -238,7 +238,10 @@ static int __must_check dev_invalidate_sync(struct vtd_iommu *iommu,
>          if ( d == NULL )
>              return rc;
>  
> +	spin_lock(&iommu->ats_list_lock);
>          iommu_dev_iotlb_flush_timeout(d, pdev);
> +	spin_unlock(&iommu->ats_list_lock);

code style


>          rcu_unlock_domain(d);
>      }
>      else if ( rc == -ETIMEDOUT )
> diff --git a/xen/drivers/passthrough/vtd/x86/ats.c b/xen/drivers/passthrough/vtd/x86/ats.c
> index 04d702b1d6..55e991183b 100644
> --- a/xen/drivers/passthrough/vtd/x86/ats.c
> +++ b/xen/drivers/passthrough/vtd/x86/ats.c
> @@ -117,6 +117,7 @@ int dev_invalidate_iotlb(struct vtd_iommu *iommu, u16 did,
>      if ( !ecap_dev_iotlb(iommu->ecap) )
>          return ret;
>  
> +    spin_lock(&iommu->ats_list_lock);
>      list_for_each_entry_safe( pdev, temp, &iommu->ats_devices, ats.list )
>      {
>          bool_t sbit;
> @@ -155,12 +156,14 @@ int dev_invalidate_iotlb(struct vtd_iommu *iommu, u16 did,
>              break;
>          default:
>              dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n");
> +	    spin_unlock(&iommu->ats_list_lock);

code style


>              return -EOPNOTSUPP;
>          }
>  
>          if ( !ret )
>              ret = rc;
>      }
> +    spin_unlock(&iommu->ats_list_lock);
>  
>      return ret;
>  }
> -- 
> 2.36.1
>
Jan Beulich Jan. 27, 2023, 8:13 a.m. UTC | #2
On 27.01.2023 00:56, Stefano Stabellini wrote:
> On Wed, 31 Aug 2022, Volodymyr Babchuk wrote:
>> --- a/xen/drivers/passthrough/pci.c
>> +++ b/xen/drivers/passthrough/pci.c
>> @@ -1641,6 +1641,7 @@ void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev)
>>  {
>>      pcidevs_lock();
>>  
>> +    /* iommu->ats_list_lock is taken by the caller of this function */
> 
> This is a locking inversion. In all other places we take pcidevs_lock
> first, then ats_list_lock lock. For instance look at
> xen/drivers/passthrough/pci.c:deassign_device that is called with
> pcidevs_locked and then calls iommu_call(... reassign_device ...) which
> ends up taking ats_list_lock.
> 
> This is the only exception. I think we need to move the
> spin_lock(ats_list_lock) from qinval.c to here.

First question here is what the lock is meant to protect: Just the list,
or also the ATS state change (i.e. serializing enable and disable against
one another). In the latter case the lock also wants naming differently.

Second question is who is to acquire the lock. Why isn't this done _in_
{en,dis}able_ats_device() themselves? That would then allow to further
reduce the locked range, because at least the pci_find_ext_capability()
call and the final logging can occur without the lock held.

Jan
Volodymyr Babchuk Feb. 17, 2023, 1:20 a.m. UTC | #3
Hello Jan,

Jan Beulich <jbeulich@suse.com> writes:

> On 27.01.2023 00:56, Stefano Stabellini wrote:
>> On Wed, 31 Aug 2022, Volodymyr Babchuk wrote:
>>> --- a/xen/drivers/passthrough/pci.c
>>> +++ b/xen/drivers/passthrough/pci.c
>>> @@ -1641,6 +1641,7 @@ void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev)
>>>  {
>>>      pcidevs_lock();
>>>  
>>> +    /* iommu->ats_list_lock is taken by the caller of this function */
>> 
>> This is a locking inversion. In all other places we take pcidevs_lock
>> first, then ats_list_lock lock. For instance look at
>> xen/drivers/passthrough/pci.c:deassign_device that is called with
>> pcidevs_locked and then calls iommu_call(... reassign_device ...) which
>> ends up taking ats_list_lock.
>> 
>> This is the only exception. I think we need to move the
>> spin_lock(ats_list_lock) from qinval.c to here.
>
> First question here is what the lock is meant to protect: Just the list,
> or also the ATS state change (i.e. serializing enable and disable against
> one another). In the latter case the lock also wants naming differently.

My intention was to protect list only. But I believe you are right and
we should protect the whole state. I'll rename the lock to ats_lock.

> Second question is who is to acquire the lock. Why isn't this done _in_
> {en,dis}able_ats_device() themselves? That would then allow to further
> reduce the locked range, because at least the pci_find_ext_capability()
> call and the final logging can occur without the lock held.

You are right, I'll extended {en,dis}able_ats_device() API to pass
pointer to the lock.
Jan Beulich Feb. 17, 2023, 7:39 a.m. UTC | #4
On 17.02.2023 02:20, Volodymyr Babchuk wrote:
> Jan Beulich <jbeulich@suse.com> writes:
>> On 27.01.2023 00:56, Stefano Stabellini wrote:
>>> On Wed, 31 Aug 2022, Volodymyr Babchuk wrote:
>>>> --- a/xen/drivers/passthrough/pci.c
>>>> +++ b/xen/drivers/passthrough/pci.c
>>>> @@ -1641,6 +1641,7 @@ void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev)
>>>>  {
>>>>      pcidevs_lock();
>>>>  
>>>> +    /* iommu->ats_list_lock is taken by the caller of this function */
>>>
>>> This is a locking inversion. In all other places we take pcidevs_lock
>>> first, then ats_list_lock lock. For instance look at
>>> xen/drivers/passthrough/pci.c:deassign_device that is called with
>>> pcidevs_locked and then calls iommu_call(... reassign_device ...) which
>>> ends up taking ats_list_lock.
>>>
>>> This is the only exception. I think we need to move the
>>> spin_lock(ats_list_lock) from qinval.c to here.
>>
>> First question here is what the lock is meant to protect: Just the list,
>> or also the ATS state change (i.e. serializing enable and disable against
>> one another). In the latter case the lock also wants naming differently.
> 
> My intention was to protect list only. But I believe you are right and
> we should protect the whole state. I'll rename the lock to ats_lock.
> 
>> Second question is who is to acquire the lock. Why isn't this done _in_
>> {en,dis}able_ats_device() themselves? That would then allow to further
>> reduce the locked range, because at least the pci_find_ext_capability()
>> call and the final logging can occur without the lock held.
> 
> You are right, I'll extended {en,dis}able_ats_device() API to pass
> pointer to the lock.

Hmm, that'll make for an odd interface. I was wondering in the past
already why we don't have a backref from the PCI dev to its controlling
IOMMU (might be ambiguous and hence better left unset for bridges,
especially host ones, but I think ATS is being fiddled with only for
leaf devices; would need double checking of course).

Jan
diff mbox series

Patch

diff --git a/xen/drivers/passthrough/amd/iommu.h b/xen/drivers/passthrough/amd/iommu.h
index 8bc3c35b1b..edd6eb52b3 100644
--- a/xen/drivers/passthrough/amd/iommu.h
+++ b/xen/drivers/passthrough/amd/iommu.h
@@ -106,6 +106,7 @@  struct amd_iommu {
     int enabled;
 
     struct list_head ats_devices;
+    spinlock_t ats_list_lock;
 };
 
 struct ivrs_unity_map {
diff --git a/xen/drivers/passthrough/amd/iommu_detect.c b/xen/drivers/passthrough/amd/iommu_detect.c
index 2317fa6a7d..1d6f4f2168 100644
--- a/xen/drivers/passthrough/amd/iommu_detect.c
+++ b/xen/drivers/passthrough/amd/iommu_detect.c
@@ -160,6 +160,7 @@  int __init amd_iommu_detect_one_acpi(
     }
 
     spin_lock_init(&iommu->lock);
+    spin_lock_init(&iommu->ats_list_lock);
     INIT_LIST_HEAD(&iommu->ats_devices);
 
     iommu->seg = ivhd_block->pci_segment_group;
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index 64c016491d..955f3af57a 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -276,7 +276,11 @@  static int __must_check amd_iommu_setup_domain_device(
          !pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
     {
         if ( devfn == pdev->devfn )
+	{
+	    spin_lock(&iommu->ats_list_lock);
             enable_ats_device(pdev, &iommu->ats_devices);
+	    spin_unlock(&iommu->ats_list_lock);
+	}
 
         amd_iommu_flush_iotlb(devfn, pdev, INV_IOMMU_ALL_PAGES_ADDRESS, 0);
     }
@@ -416,7 +420,11 @@  static void amd_iommu_disable_domain_device(const struct domain *domain,
 
     if ( pci_ats_device(iommu->seg, bus, pdev->devfn) &&
          pci_ats_enabled(iommu->seg, bus, pdev->devfn) )
+    {
+	spin_lock(&iommu->ats_list_lock);
         disable_ats_device(pdev);
+	spin_unlock(&iommu->ats_list_lock);
+    }
 
     BUG_ON ( iommu->dev_table.buffer == NULL );
     req_id = get_dma_requestor_id(iommu->seg, PCI_BDF(bus, devfn));
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 2dfa1c2875..b5db5498a1 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -1641,6 +1641,7 @@  void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev)
 {
     pcidevs_lock();
 
+    /* iommu->ats_list_lock is taken by the caller of this function */
     disable_ats_device(pdev);
 
     ASSERT(pdev->domain);
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index fff1442265..42661f22f4 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1281,6 +1281,7 @@  int __init iommu_alloc(struct acpi_drhd_unit *drhd)
     spin_lock_init(&iommu->lock);
     spin_lock_init(&iommu->register_lock);
     spin_lock_init(&iommu->intremap.lock);
+    spin_lock_init(&iommu->ats_list_lock);
 
     iommu->drhd = drhd;
     drhd->iommu = iommu;
@@ -1769,7 +1770,11 @@  static int domain_context_mapping(struct domain *domain, u8 devfn,
         if ( ret > 0 )
             ret = 0;
         if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
+        {
+            spin_lock(&drhd->iommu->ats_list_lock);
             enable_ats_device(pdev, &drhd->iommu->ats_devices);
+            spin_unlock(&drhd->iommu->ats_list_lock);
+        }
 
         break;
 
@@ -1977,7 +1982,11 @@  static const struct acpi_drhd_unit *domain_context_unmap(
                    domain, &PCI_SBDF(seg, bus, devfn));
         ret = domain_context_unmap_one(domain, iommu, bus, devfn);
         if ( !ret && devfn == pdev->devfn && ats_device(pdev, drhd) > 0 )
+        {
+            spin_lock(&iommu->ats_list_lock);
             disable_ats_device(pdev);
+            spin_unlock(&iommu->ats_list_lock);
+        }
 
         break;
 
@@ -2374,7 +2383,9 @@  static int cf_check intel_iommu_enable_device(struct pci_dev *pdev)
     if ( ret <= 0 )
         return ret;
 
+    spin_lock(&drhd->iommu->ats_list_lock);
     ret = enable_ats_device(pdev, &drhd->iommu->ats_devices);
+    spin_unlock(&drhd->iommu->ats_list_lock);
 
     return ret >= 0 ? 0 : ret;
 }
diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h
index 78aa8a96f5..2a7a4c1b58 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -506,6 +506,7 @@  struct vtd_iommu {
     } flush;
 
     struct list_head ats_devices;
+    spinlock_t ats_list_lock;
     unsigned long *pseudo_domid_map; /* "pseudo" domain id bitmap */
     unsigned long *domid_bitmap;  /* domain id bitmap */
     domid_t *domid_map;           /* domain id mapping array */
diff --git a/xen/drivers/passthrough/vtd/qinval.c b/xen/drivers/passthrough/vtd/qinval.c
index 4f9ad136b9..6e876348db 100644
--- a/xen/drivers/passthrough/vtd/qinval.c
+++ b/xen/drivers/passthrough/vtd/qinval.c
@@ -238,7 +238,10 @@  static int __must_check dev_invalidate_sync(struct vtd_iommu *iommu,
         if ( d == NULL )
             return rc;
 
+	spin_lock(&iommu->ats_list_lock);
         iommu_dev_iotlb_flush_timeout(d, pdev);
+	spin_unlock(&iommu->ats_list_lock);
+
         rcu_unlock_domain(d);
     }
     else if ( rc == -ETIMEDOUT )
diff --git a/xen/drivers/passthrough/vtd/x86/ats.c b/xen/drivers/passthrough/vtd/x86/ats.c
index 04d702b1d6..55e991183b 100644
--- a/xen/drivers/passthrough/vtd/x86/ats.c
+++ b/xen/drivers/passthrough/vtd/x86/ats.c
@@ -117,6 +117,7 @@  int dev_invalidate_iotlb(struct vtd_iommu *iommu, u16 did,
     if ( !ecap_dev_iotlb(iommu->ecap) )
         return ret;
 
+    spin_lock(&iommu->ats_list_lock);
     list_for_each_entry_safe( pdev, temp, &iommu->ats_devices, ats.list )
     {
         bool_t sbit;
@@ -155,12 +156,14 @@  int dev_invalidate_iotlb(struct vtd_iommu *iommu, u16 did,
             break;
         default:
             dprintk(XENLOG_WARNING VTDPREFIX, "invalid vt-d flush type\n");
+	    spin_unlock(&iommu->ats_list_lock);
             return -EOPNOTSUPP;
         }
 
         if ( !ret )
             ret = rc;
     }
+    spin_unlock(&iommu->ats_list_lock);
 
     return ret;
 }