diff mbox series

[v2.1,2/2] vpci/msix: fix PBA accesses

Message ID 20220226100554.2664-1-roger.pau@citrix.com (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

Roger Pau Monné Feb. 26, 2022, 10:05 a.m. UTC
Map the PBA in order to access it from the MSI-X read and write
handlers. Note that previously the handlers would pass the physical
host address into the {read,write}{l,q} handlers, which is wrong as
those expect a linear address.

Map the PBA using ioremap when the first access is performed. Note
that 32bit arches might want to abstract the call to ioremap into a
vPCI arch handler, so they can use a fixmap range to map the PBA.

Reported-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Cc: Alex Olson <this.is.a0lson@gmail.com>
---
Changes since v1:
 - Also handle writes.

I don't seem to have a box with a driver that will try to access the
PBA, so I would consider this specific code path only build tested. At
least it doesn't seem to regress the current state of vPCI.
---
 xen/drivers/vpci/msix.c | 56 ++++++++++++++++++++++++++++++++++++-----
 xen/drivers/vpci/vpci.c |  2 ++
 xen/include/xen/vpci.h  |  2 ++
 3 files changed, 54 insertions(+), 6 deletions(-)

Comments

Jan Beulich March 1, 2022, 8:46 a.m. UTC | #1
On 26.02.2022 11:05, Roger Pau Monne wrote:
> --- a/xen/drivers/vpci/msix.c
> +++ b/xen/drivers/vpci/msix.c
> @@ -198,8 +198,13 @@ static int cf_check msix_read(
>      if ( !access_allowed(msix->pdev, addr, len) )
>          return X86EMUL_OKAY;
>  
> +    spin_lock(&msix->pdev->vpci->lock);
>      if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
>      {
> +        struct vpci *vpci = msix->pdev->vpci;
> +        paddr_t base = vmsix_table_addr(vpci, VPCI_MSIX_PBA);
> +        unsigned int idx = addr - base;
> +
>          /*
>           * Access to PBA.
>           *
> @@ -207,25 +212,43 @@ static int cf_check msix_read(
>           * guest address space. If this changes the address will need to be
>           * translated.
>           */
> +
> +        if ( !msix->pba )
> +        {
> +            msix->pba = ioremap(base, vmsix_table_size(vpci, VPCI_MSIX_PBA));
> +            if ( !msix->pba )
> +            {
> +                /*
> +                 * If unable to map the PBA return all 1s (all pending): it's
> +                 * likely better to trigger spurious events than drop them.
> +                 */
> +                spin_unlock(&vpci->lock);
> +                gprintk(XENLOG_WARNING,
> +                        "%pp: unable to map MSI-X PBA, report all pending\n",
> +                        msix->pdev);
> +                return X86EMUL_OKAY;

Hmm, this may report more set bits than there are vectors. Which is
probably fine, but the comment may want adjusting a little to make
clear this is understood and meant to be that way.

> +           }
> +        }

Imo it would make sense to limit the locked region to around just this
check-and-map logic. There's no need for ...

>          switch ( len )
>          {
>          case 4:
> -            *data = readl(addr);
> +            *data = readl(msix->pba + idx);
>              break;
>  
>          case 8:
> -            *data = readq(addr);
> +            *data = readq(msix->pba + idx);
>              break;
>  
>          default:
>              ASSERT_UNREACHABLE();
>              break;
>          }
> +        spin_unlock(&vpci->lock);

... the actual access to happen under lock, as you remove the mapping
only when the device is being removed. I'm inclined to suggest making
a helper function, which does an unlocked check, then the ioremap(),
then takes the lock and re-checks whether the field's still NULL, and
either installs the mapping or (after unlocking) iounmap()s it.

> --- a/xen/include/xen/vpci.h
> +++ b/xen/include/xen/vpci.h
> @@ -127,6 +127,8 @@ struct vpci {
>          bool enabled         : 1;
>          /* Masked? */
>          bool masked          : 1;
> +        /* PBA map */
> +        void *pba;

Here (and elsewhere as/if applicable) you want to add __iomem, even
if this is merely for documentation purposes right now.

I think you did mention this elsewhere: Don't we also need to deal
with accesses to MMIO covered by the same BAR / page, but falling
outside of the MSI-X table and PBA?

Jan
Roger Pau Monné March 1, 2022, 9:08 a.m. UTC | #2
On Tue, Mar 01, 2022 at 09:46:13AM +0100, Jan Beulich wrote:
> On 26.02.2022 11:05, Roger Pau Monne wrote:
> > --- a/xen/drivers/vpci/msix.c
> > +++ b/xen/drivers/vpci/msix.c
> > @@ -198,8 +198,13 @@ static int cf_check msix_read(
> >      if ( !access_allowed(msix->pdev, addr, len) )
> >          return X86EMUL_OKAY;
> >  
> > +    spin_lock(&msix->pdev->vpci->lock);
> >      if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
> >      {
> > +        struct vpci *vpci = msix->pdev->vpci;
> > +        paddr_t base = vmsix_table_addr(vpci, VPCI_MSIX_PBA);
> > +        unsigned int idx = addr - base;
> > +
> >          /*
> >           * Access to PBA.
> >           *
> > @@ -207,25 +212,43 @@ static int cf_check msix_read(
> >           * guest address space. If this changes the address will need to be
> >           * translated.
> >           */
> > +
> > +        if ( !msix->pba )
> > +        {
> > +            msix->pba = ioremap(base, vmsix_table_size(vpci, VPCI_MSIX_PBA));
> > +            if ( !msix->pba )
> > +            {
> > +                /*
> > +                 * If unable to map the PBA return all 1s (all pending): it's
> > +                 * likely better to trigger spurious events than drop them.
> > +                 */
> > +                spin_unlock(&vpci->lock);
> > +                gprintk(XENLOG_WARNING,
> > +                        "%pp: unable to map MSI-X PBA, report all pending\n",
> > +                        msix->pdev);
> > +                return X86EMUL_OKAY;
> 
> Hmm, this may report more set bits than there are vectors. Which is
> probably fine, but the comment may want adjusting a little to make
> clear this is understood and meant to be that way.

Yes, it could return more bits than vectors, but that area is also
part of the PBA (as the end is aligned to 8 bytes). I will adjust the
comment.

> > +           }
> > +        }
> 
> Imo it would make sense to limit the locked region to around just this
> check-and-map logic. There's no need for ...
> 
> >          switch ( len )
> >          {
> >          case 4:
> > -            *data = readl(addr);
> > +            *data = readl(msix->pba + idx);
> >              break;
> >  
> >          case 8:
> > -            *data = readq(addr);
> > +            *data = readq(msix->pba + idx);
> >              break;
> >  
> >          default:
> >              ASSERT_UNREACHABLE();
> >              break;
> >          }
> > +        spin_unlock(&vpci->lock);
> 
> ... the actual access to happen under lock, as you remove the mapping
> only when the device is being removed. I'm inclined to suggest making
> a helper function, which does an unlocked check, then the ioremap(),
> then takes the lock and re-checks whether the field's still NULL, and
> either installs the mapping or (after unlocking) iounmap()s it.

I'm fine with dropping the lock earlier, but I'm not sure there's much
point in placing this in a separate helper, as it's the mapping of at
most 2 pages (PBA is 2048 bytes in size, 64bit aligned).

I guess you are suggesting this in preparation for adding support to
access the non PBA area falling into the same page(s)?

> > --- a/xen/include/xen/vpci.h
> > +++ b/xen/include/xen/vpci.h
> > @@ -127,6 +127,8 @@ struct vpci {
> >          bool enabled         : 1;
> >          /* Masked? */
> >          bool masked          : 1;
> > +        /* PBA map */
> > +        void *pba;
> 
> Here (and elsewhere as/if applicable) you want to add __iomem, even
> if this is merely for documentation purposes right now.

Will add.

> I think you did mention this elsewhere: Don't we also need to deal
> with accesses to MMIO covered by the same BAR / page, but falling
> outside of the MSI-X table and PBA?

Yes, I did mention it in a reply to Alex:

https://lore.kernel.org/xen-devel/Yhj58BIIN2p4bYJ8@Air-de-Roger/

So far we seem to have gotten away without needing it, but I might as
well try to implement, shouldn't be too complicated.

Thanks, Roger.
Jan Beulich March 1, 2022, 10:32 a.m. UTC | #3
On 01.03.2022 10:08, Roger Pau Monné wrote:
> On Tue, Mar 01, 2022 at 09:46:13AM +0100, Jan Beulich wrote:
>> On 26.02.2022 11:05, Roger Pau Monne wrote:
>>> --- a/xen/drivers/vpci/msix.c
>>> +++ b/xen/drivers/vpci/msix.c
>>> @@ -198,8 +198,13 @@ static int cf_check msix_read(
>>>      if ( !access_allowed(msix->pdev, addr, len) )
>>>          return X86EMUL_OKAY;
>>>  
>>> +    spin_lock(&msix->pdev->vpci->lock);
>>>      if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
>>>      {
>>> +        struct vpci *vpci = msix->pdev->vpci;
>>> +        paddr_t base = vmsix_table_addr(vpci, VPCI_MSIX_PBA);
>>> +        unsigned int idx = addr - base;
>>> +
>>>          /*
>>>           * Access to PBA.
>>>           *
>>> @@ -207,25 +212,43 @@ static int cf_check msix_read(
>>>           * guest address space. If this changes the address will need to be
>>>           * translated.
>>>           */
>>> +
>>> +        if ( !msix->pba )
>>> +        {
>>> +            msix->pba = ioremap(base, vmsix_table_size(vpci, VPCI_MSIX_PBA));
>>> +            if ( !msix->pba )
>>> +            {
>>> +                /*
>>> +                 * If unable to map the PBA return all 1s (all pending): it's
>>> +                 * likely better to trigger spurious events than drop them.
>>> +                 */
>>> +                spin_unlock(&vpci->lock);
>>> +                gprintk(XENLOG_WARNING,
>>> +                        "%pp: unable to map MSI-X PBA, report all pending\n",
>>> +                        msix->pdev);
>>> +                return X86EMUL_OKAY;
>>
>> Hmm, this may report more set bits than there are vectors. Which is
>> probably fine, but the comment may want adjusting a little to make
>> clear this is understood and meant to be that way.
> 
> Yes, it could return more bits than vectors, but that area is also
> part of the PBA (as the end is aligned to 8 bytes). I will adjust the
> comment.
> 
>>> +           }
>>> +        }
>>
>> Imo it would make sense to limit the locked region to around just this
>> check-and-map logic. There's no need for ...
>>
>>>          switch ( len )
>>>          {
>>>          case 4:
>>> -            *data = readl(addr);
>>> +            *data = readl(msix->pba + idx);
>>>              break;
>>>  
>>>          case 8:
>>> -            *data = readq(addr);
>>> +            *data = readq(msix->pba + idx);
>>>              break;
>>>  
>>>          default:
>>>              ASSERT_UNREACHABLE();
>>>              break;
>>>          }
>>> +        spin_unlock(&vpci->lock);
>>
>> ... the actual access to happen under lock, as you remove the mapping
>> only when the device is being removed. I'm inclined to suggest making
>> a helper function, which does an unlocked check, then the ioremap(),
>> then takes the lock and re-checks whether the field's still NULL, and
>> either installs the mapping or (after unlocking) iounmap()s it.
> 
> I'm fine with dropping the lock earlier, but I'm not sure there's much
> point in placing this in a separate helper, as it's the mapping of at
> most 2 pages (PBA is 2048 bytes in size, 64bit aligned).
> 
> I guess you are suggesting this in preparation for adding support to
> access the non PBA area falling into the same page(s)?

Not just. The write path wants to use the same logic, and with it
becoming a little more involved I think it would be better to have it
in just one place.

Jan
diff mbox series

Patch

diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
index a1fa7a5f13..4775f88e1f 100644
--- a/xen/drivers/vpci/msix.c
+++ b/xen/drivers/vpci/msix.c
@@ -198,8 +198,13 @@  static int cf_check msix_read(
     if ( !access_allowed(msix->pdev, addr, len) )
         return X86EMUL_OKAY;
 
+    spin_lock(&msix->pdev->vpci->lock);
     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
     {
+        struct vpci *vpci = msix->pdev->vpci;
+        paddr_t base = vmsix_table_addr(vpci, VPCI_MSIX_PBA);
+        unsigned int idx = addr - base;
+
         /*
          * Access to PBA.
          *
@@ -207,25 +212,43 @@  static int cf_check msix_read(
          * guest address space. If this changes the address will need to be
          * translated.
          */
+
+        if ( !msix->pba )
+        {
+            msix->pba = ioremap(base, vmsix_table_size(vpci, VPCI_MSIX_PBA));
+            if ( !msix->pba )
+            {
+                /*
+                 * If unable to map the PBA return all 1s (all pending): it's
+                 * likely better to trigger spurious events than drop them.
+                 */
+                spin_unlock(&vpci->lock);
+                gprintk(XENLOG_WARNING,
+                        "%pp: unable to map MSI-X PBA, report all pending\n",
+                        msix->pdev);
+                return X86EMUL_OKAY;
+           }
+        }
+
         switch ( len )
         {
         case 4:
-            *data = readl(addr);
+            *data = readl(msix->pba + idx);
             break;
 
         case 8:
-            *data = readq(addr);
+            *data = readq(msix->pba + idx);
             break;
 
         default:
             ASSERT_UNREACHABLE();
             break;
         }
+        spin_unlock(&vpci->lock);
 
         return X86EMUL_OKAY;
     }
 
-    spin_lock(&msix->pdev->vpci->lock);
     entry = get_entry(msix, addr);
     offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
 
@@ -273,32 +296,53 @@  static int cf_check msix_write(
     if ( !access_allowed(msix->pdev, addr, len) )
         return X86EMUL_OKAY;
 
+    spin_lock(&msix->pdev->vpci->lock);
     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
     {
+        struct vpci *vpci = msix->pdev->vpci;
+        paddr_t base = vmsix_table_addr(vpci, VPCI_MSIX_PBA);
+        unsigned int idx = addr - base;
 
         if ( !is_hardware_domain(d) )
+        {
             /* Ignore writes to PBA for DomUs, it's behavior is undefined. */
+            spin_unlock(&vpci->lock);
             return X86EMUL_OKAY;
+        }
+
+        if ( !msix->pba )
+        {
+            msix->pba = ioremap(base, vmsix_table_size(vpci, VPCI_MSIX_PBA));
+            if ( !msix->pba )
+            {
+                /* Unable to map the PBA, ignore write. */
+                spin_unlock(&vpci->lock);
+                gprintk(XENLOG_WARNING,
+                        "%pp: unable to map MSI-X PBA, write ignored\n",
+                        msix->pdev);
+                return X86EMUL_OKAY;
+           }
+        }
 
         switch ( len )
         {
         case 4:
-            writel(data, addr);
+            writel(data, msix->pba + idx);
             break;
 
         case 8:
-            writeq(data, addr);
+            writeq(data, msix->pba + idx);
             break;
 
         default:
             ASSERT_UNREACHABLE();
             break;
         }
+        spin_unlock(&vpci->lock);
 
         return X86EMUL_OKAY;
     }
 
-    spin_lock(&msix->pdev->vpci->lock);
     entry = get_entry(msix, addr);
     offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
 
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index f3b32d66cb..9fb3c05b2b 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -51,6 +51,8 @@  void vpci_remove_device(struct pci_dev *pdev)
         xfree(r);
     }
     spin_unlock(&pdev->vpci->lock);
+    if ( pdev->vpci->msix && pdev->vpci->msix->pba )
+        iounmap(pdev->vpci->msix->pba);
     xfree(pdev->vpci->msix);
     xfree(pdev->vpci->msi);
     xfree(pdev->vpci);
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index bcad1516ae..c399b101ee 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -127,6 +127,8 @@  struct vpci {
         bool enabled         : 1;
         /* Masked? */
         bool masked          : 1;
+        /* PBA map */
+        void *pba;
         /* Entries. */
         struct vpci_msix_entry {
             uint64_t addr;