diff mbox series

[v4] vpci/msix: fix PBA accesses

Message ID 20220307163744.74030-1-roger.pau@citrix.com (mailing list archive)
State New, archived
Headers show
Series [v4] vpci/msix: fix PBA accesses | expand

Commit Message

Roger Pau Monné March 7, 2022, 4:37 p.m. UTC
Map the PBA in order to access it from the MSI-X read and write
handlers. Note that previously the handlers would pass the physical
host address into the {read,write}{l,q} handlers, which is wrong as
those expect a linear address.

Map the PBA using ioremap when the first access is performed. Note
that 32bit arches might want to abstract the call to ioremap into a
vPCI arch handler, so they can use a fixmap range to map the PBA.

Reported-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Cc: Alex Olson <this.is.a0lson@gmail.com>
---
Changes since v3:
 - Use {read,write}_atomic for accessing msix pba field.
 - Shrink locked section.
 - Constify pba.

Changes since v2:
 - Use helper function to map PBA.
 - Mark memory as iomem.

Changes since v1:
 - Also handle writes.

I don't seem to have a box with a driver that will try to access the
PBA, so I would consider this specific code path only build tested. At
least it doesn't seem to regress the current state of vPCI.
---
 xen/drivers/vpci/msix.c | 64 ++++++++++++++++++++++++++++++++++++++---
 xen/drivers/vpci/vpci.c |  2 ++
 xen/include/xen/vpci.h  |  2 ++
 3 files changed, 64 insertions(+), 4 deletions(-)

Comments

Jan Beulich March 8, 2022, 8:31 a.m. UTC | #1
On 07.03.2022 17:37, Roger Pau Monne wrote:
> Map the PBA in order to access it from the MSI-X read and write
> handlers. Note that previously the handlers would pass the physical
> host address into the {read,write}{l,q} handlers, which is wrong as
> those expect a linear address.
> 
> Map the PBA using ioremap when the first access is performed. Note
> that 32bit arches might want to abstract the call to ioremap into a
> vPCI arch handler, so they can use a fixmap range to map the PBA.
> 
> Reported-by: Jan Beulich <jbeulich@suse.com>
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>

> Cc: Alex Olson <this.is.a0lson@gmail.com>

I'll wait a little with committing, in the hope for Alex to re-provide
a Tested-by.

> --- a/xen/drivers/vpci/msix.c
> +++ b/xen/drivers/vpci/msix.c
> @@ -182,6 +182,38 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
>      return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
>  }
>  
> +static void __iomem *get_pba(struct vpci *vpci)
> +{
> +    struct vpci_msix *msix = vpci->msix;
> +    /*
> +     * PBA will only be unmapped when the device is deassigned, so access it
> +     * without holding the vpci lock.
> +     */
> +    void __iomem *pba = read_atomic(&msix->pba);
> +
> +    if ( likely(pba) )
> +        return pba;
> +
> +    pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
> +                  vmsix_table_size(vpci, VPCI_MSIX_PBA));
> +    if ( !pba )
> +        return read_atomic(&msix->pba);
> +
> +    spin_lock(&vpci->lock);
> +    if ( !msix->pba )
> +    {
> +        write_atomic(&msix->pba, pba);
> +        spin_unlock(&vpci->lock);
> +    }
> +    else
> +    {
> +        spin_unlock(&vpci->lock);
> +        iounmap(pba);
> +    }

TBH I had been hoping for just a single spin_unlock(), but you're
the maintainer of this code ...

Jan
Roger Pau Monné March 8, 2022, 9:05 a.m. UTC | #2
On Tue, Mar 08, 2022 at 09:31:34AM +0100, Jan Beulich wrote:
> On 07.03.2022 17:37, Roger Pau Monne wrote:
> > Map the PBA in order to access it from the MSI-X read and write
> > handlers. Note that previously the handlers would pass the physical
> > host address into the {read,write}{l,q} handlers, which is wrong as
> > those expect a linear address.
> > 
> > Map the PBA using ioremap when the first access is performed. Note
> > that 32bit arches might want to abstract the call to ioremap into a
> > vPCI arch handler, so they can use a fixmap range to map the PBA.
> > 
> > Reported-by: Jan Beulich <jbeulich@suse.com>
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> 
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> 
> > Cc: Alex Olson <this.is.a0lson@gmail.com>
> 
> I'll wait a little with committing, in the hope for Alex to re-provide
> a Tested-by.
> 
> > --- a/xen/drivers/vpci/msix.c
> > +++ b/xen/drivers/vpci/msix.c
> > @@ -182,6 +182,38 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
> >      return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
> >  }
> >  
> > +static void __iomem *get_pba(struct vpci *vpci)
> > +{
> > +    struct vpci_msix *msix = vpci->msix;
> > +    /*
> > +     * PBA will only be unmapped when the device is deassigned, so access it
> > +     * without holding the vpci lock.
> > +     */
> > +    void __iomem *pba = read_atomic(&msix->pba);
> > +
> > +    if ( likely(pba) )
> > +        return pba;
> > +
> > +    pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
> > +                  vmsix_table_size(vpci, VPCI_MSIX_PBA));
> > +    if ( !pba )
> > +        return read_atomic(&msix->pba);
> > +
> > +    spin_lock(&vpci->lock);
> > +    if ( !msix->pba )
> > +    {
> > +        write_atomic(&msix->pba, pba);
> > +        spin_unlock(&vpci->lock);
> > +    }
> > +    else
> > +    {
> > +        spin_unlock(&vpci->lock);
> > +        iounmap(pba);
> > +    }
> 
> TBH I had been hoping for just a single spin_unlock(), but you're
> the maintainer of this code ...

Would you prefer something like:

spin_lock(&vpci->lock);
if ( !msix->pba )
    write_atomic(&msix->pba, pba);
spin_unlock(&vpci->lock);

if ( read_atomic(&msix->pba) != pba )
    iounmap(pba);

?

Thanks, Roger.
Jan Beulich March 8, 2022, 10:46 a.m. UTC | #3
On 08.03.2022 10:05, Roger Pau Monné wrote:
> On Tue, Mar 08, 2022 at 09:31:34AM +0100, Jan Beulich wrote:
>> On 07.03.2022 17:37, Roger Pau Monne wrote:
>>> Map the PBA in order to access it from the MSI-X read and write
>>> handlers. Note that previously the handlers would pass the physical
>>> host address into the {read,write}{l,q} handlers, which is wrong as
>>> those expect a linear address.
>>>
>>> Map the PBA using ioremap when the first access is performed. Note
>>> that 32bit arches might want to abstract the call to ioremap into a
>>> vPCI arch handler, so they can use a fixmap range to map the PBA.
>>>
>>> Reported-by: Jan Beulich <jbeulich@suse.com>
>>> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
>>
>> Reviewed-by: Jan Beulich <jbeulich@suse.com>
>>
>>> Cc: Alex Olson <this.is.a0lson@gmail.com>
>>
>> I'll wait a little with committing, in the hope for Alex to re-provide
>> a Tested-by.
>>
>>> --- a/xen/drivers/vpci/msix.c
>>> +++ b/xen/drivers/vpci/msix.c
>>> @@ -182,6 +182,38 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
>>>      return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
>>>  }
>>>  
>>> +static void __iomem *get_pba(struct vpci *vpci)
>>> +{
>>> +    struct vpci_msix *msix = vpci->msix;
>>> +    /*
>>> +     * PBA will only be unmapped when the device is deassigned, so access it
>>> +     * without holding the vpci lock.
>>> +     */
>>> +    void __iomem *pba = read_atomic(&msix->pba);
>>> +
>>> +    if ( likely(pba) )
>>> +        return pba;
>>> +
>>> +    pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
>>> +                  vmsix_table_size(vpci, VPCI_MSIX_PBA));
>>> +    if ( !pba )
>>> +        return read_atomic(&msix->pba);
>>> +
>>> +    spin_lock(&vpci->lock);
>>> +    if ( !msix->pba )
>>> +    {
>>> +        write_atomic(&msix->pba, pba);
>>> +        spin_unlock(&vpci->lock);
>>> +    }
>>> +    else
>>> +    {
>>> +        spin_unlock(&vpci->lock);
>>> +        iounmap(pba);
>>> +    }
>>
>> TBH I had been hoping for just a single spin_unlock(), but you're
>> the maintainer of this code ...
> 
> Would you prefer something like:
> 
> spin_lock(&vpci->lock);
> if ( !msix->pba )
>     write_atomic(&msix->pba, pba);
> spin_unlock(&vpci->lock);
> 
> if ( read_atomic(&msix->pba) != pba )
>     iounmap(pba);

This or (to avoid the re-read)

    spin_lock(&vpci->lock);
    if ( !msix->pba )
    {
        write_atomic(&msix->pba, pba);
        pba = NULL;
    }
    spin_unlock(&vpci->lock);

    if ( pba )
        iounmap(pba);

Iirc we have similar constructs elsewhere in a few places.

Jan
Roger Pau Monné March 8, 2022, 12:37 p.m. UTC | #4
On Tue, Mar 08, 2022 at 11:46:20AM +0100, Jan Beulich wrote:
> On 08.03.2022 10:05, Roger Pau Monné wrote:
> > On Tue, Mar 08, 2022 at 09:31:34AM +0100, Jan Beulich wrote:
> >> On 07.03.2022 17:37, Roger Pau Monne wrote:
> >>> Map the PBA in order to access it from the MSI-X read and write
> >>> handlers. Note that previously the handlers would pass the physical
> >>> host address into the {read,write}{l,q} handlers, which is wrong as
> >>> those expect a linear address.
> >>>
> >>> Map the PBA using ioremap when the first access is performed. Note
> >>> that 32bit arches might want to abstract the call to ioremap into a
> >>> vPCI arch handler, so they can use a fixmap range to map the PBA.
> >>>
> >>> Reported-by: Jan Beulich <jbeulich@suse.com>
> >>> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> >>
> >> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> >>
> >>> Cc: Alex Olson <this.is.a0lson@gmail.com>
> >>
> >> I'll wait a little with committing, in the hope for Alex to re-provide
> >> a Tested-by.
> >>
> >>> --- a/xen/drivers/vpci/msix.c
> >>> +++ b/xen/drivers/vpci/msix.c
> >>> @@ -182,6 +182,38 @@ static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
> >>>      return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
> >>>  }
> >>>  
> >>> +static void __iomem *get_pba(struct vpci *vpci)
> >>> +{
> >>> +    struct vpci_msix *msix = vpci->msix;
> >>> +    /*
> >>> +     * PBA will only be unmapped when the device is deassigned, so access it
> >>> +     * without holding the vpci lock.
> >>> +     */
> >>> +    void __iomem *pba = read_atomic(&msix->pba);
> >>> +
> >>> +    if ( likely(pba) )
> >>> +        return pba;
> >>> +
> >>> +    pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
> >>> +                  vmsix_table_size(vpci, VPCI_MSIX_PBA));
> >>> +    if ( !pba )
> >>> +        return read_atomic(&msix->pba);
> >>> +
> >>> +    spin_lock(&vpci->lock);
> >>> +    if ( !msix->pba )
> >>> +    {
> >>> +        write_atomic(&msix->pba, pba);
> >>> +        spin_unlock(&vpci->lock);
> >>> +    }
> >>> +    else
> >>> +    {
> >>> +        spin_unlock(&vpci->lock);
> >>> +        iounmap(pba);
> >>> +    }
> >>
> >> TBH I had been hoping for just a single spin_unlock(), but you're
> >> the maintainer of this code ...
> > 
> > Would you prefer something like:
> > 
> > spin_lock(&vpci->lock);
> > if ( !msix->pba )
> >     write_atomic(&msix->pba, pba);
> > spin_unlock(&vpci->lock);
> > 
> > if ( read_atomic(&msix->pba) != pba )
> >     iounmap(pba);
> 
> This or (to avoid the re-read)
> 
>     spin_lock(&vpci->lock);
>     if ( !msix->pba )
>     {
>         write_atomic(&msix->pba, pba);
>         pba = NULL;
>     }
>     spin_unlock(&vpci->lock);
> 
>     if ( pba )
>         iounmap(pba);
> 
> Iirc we have similar constructs elsewhere in a few places.

I don't really have a strong opinion. I agree the duplicated
spin_unlock() call is not nice, but I think the logic is easier to
follow by using a single if ... else ... section.

Feel free to adjust at commit, or else I can resend if you prefer.

Thanks, Roger.
Alex Olson March 8, 2022, 6:19 p.m. UTC | #5
On Tue, 2022-03-08 at 09:31 +0100, Jan Beulich wrote:
> On 07.03.2022 17:37, Roger Pau Monne wrote:
> > Map the PBA in order to access it from the MSI-X read and write
> > handlers. Note that previously the handlers would pass the physical
> > host address into the {read,write}{l,q} handlers, which is wrong as
> > those expect a linear address.
> > 
> > Map the PBA using ioremap when the first access is performed. Note
> > that 32bit arches might want to abstract the call to ioremap into a
> > vPCI arch handler, so they can use a fixmap range to map the PBA.
> > 
> > Reported-by: Jan Beulich <jbeulich@suse.com>
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> 
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> 
> > Cc: Alex Olson <this.is.a0lson@gmail.com>
> 
> I'll wait a little with committing, in the hope for Alex to re-provide
> a Tested-by.

It works fine for me, you can add "Tested-by: Alex.Olson@starlab.io" to the
commit.

> 
> > --- a/xen/drivers/vpci/msix.c
> > +++ b/xen/drivers/vpci/msix.c
> > @@ -182,6 +182,38 @@ static struct vpci_msix_entry *get_entry(struct
> > vpci_msix *msix,
> >      return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
> >  }
> >  
> > +static void __iomem *get_pba(struct vpci *vpci)
> > +{
> > +    struct vpci_msix *msix = vpci->msix;
> > +    /*
> > +     * PBA will only be unmapped when the device is deassigned, so access
> > it
> > +     * without holding the vpci lock.
> > +     */
> > +    void __iomem *pba = read_atomic(&msix->pba);
> > +
> > +    if ( likely(pba) )
> > +        return pba;
> > +
> > +    pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
> > +                  vmsix_table_size(vpci, VPCI_MSIX_PBA));
> > +    if ( !pba )
> > +        return read_atomic(&msix->pba);
> > +
> > +    spin_lock(&vpci->lock);
> > +    if ( !msix->pba )
> > +    {
> > +        write_atomic(&msix->pba, pba);
> > +        spin_unlock(&vpci->lock);
> > +    }
> > +    else
> > +    {
> > +        spin_unlock(&vpci->lock);
> > +        iounmap(pba);
> > +    }
> 
> TBH I had been hoping for just a single spin_unlock(), but you're
> the maintainer of this code ...
> 
> Jan
> 
Thanks

-Alex
diff mbox series

Patch

diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
index a1fa7a5f13..63f162cf5a 100644
--- a/xen/drivers/vpci/msix.c
+++ b/xen/drivers/vpci/msix.c
@@ -182,6 +182,38 @@  static struct vpci_msix_entry *get_entry(struct vpci_msix *msix,
     return &msix->entries[(addr - start) / PCI_MSIX_ENTRY_SIZE];
 }
 
+static void __iomem *get_pba(struct vpci *vpci)
+{
+    struct vpci_msix *msix = vpci->msix;
+    /*
+     * PBA will only be unmapped when the device is deassigned, so access it
+     * without holding the vpci lock.
+     */
+    void __iomem *pba = read_atomic(&msix->pba);
+
+    if ( likely(pba) )
+        return pba;
+
+    pba = ioremap(vmsix_table_addr(vpci, VPCI_MSIX_PBA),
+                  vmsix_table_size(vpci, VPCI_MSIX_PBA));
+    if ( !pba )
+        return read_atomic(&msix->pba);
+
+    spin_lock(&vpci->lock);
+    if ( !msix->pba )
+    {
+        write_atomic(&msix->pba, pba);
+        spin_unlock(&vpci->lock);
+    }
+    else
+    {
+        spin_unlock(&vpci->lock);
+        iounmap(pba);
+    }
+
+    return read_atomic(&msix->pba);
+}
+
 static int cf_check msix_read(
     struct vcpu *v, unsigned long addr, unsigned int len, unsigned long *data)
 {
@@ -200,6 +232,10 @@  static int cf_check msix_read(
 
     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
     {
+        struct vpci *vpci = msix->pdev->vpci;
+        unsigned int idx = addr - vmsix_table_addr(vpci, VPCI_MSIX_PBA);
+        const void __iomem *pba = get_pba(vpci);
+
         /*
          * Access to PBA.
          *
@@ -207,14 +243,22 @@  static int cf_check msix_read(
          * guest address space. If this changes the address will need to be
          * translated.
          */
+        if ( !pba )
+        {
+            gprintk(XENLOG_WARNING,
+                    "%pp: unable to map MSI-X PBA, report all pending\n",
+                    msix->pdev);
+            return X86EMUL_OKAY;
+        }
+
         switch ( len )
         {
         case 4:
-            *data = readl(addr);
+            *data = readl(pba + idx);
             break;
 
         case 8:
-            *data = readq(addr);
+            *data = readq(pba + idx);
             break;
 
         default:
@@ -275,19 +319,31 @@  static int cf_check msix_write(
 
     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
     {
+        struct vpci *vpci = msix->pdev->vpci;
+        unsigned int idx = addr - vmsix_table_addr(vpci, VPCI_MSIX_PBA);
+        const void __iomem *pba = get_pba(vpci);
 
         if ( !is_hardware_domain(d) )
             /* Ignore writes to PBA for DomUs, it's behavior is undefined. */
             return X86EMUL_OKAY;
 
+        if ( !pba )
+        {
+            /* Unable to map the PBA, ignore write. */
+            gprintk(XENLOG_WARNING,
+                    "%pp: unable to map MSI-X PBA, write ignored\n",
+                    msix->pdev);
+            return X86EMUL_OKAY;
+        }
+
         switch ( len )
         {
         case 4:
-            writel(data, addr);
+            writel(data, pba + idx);
             break;
 
         case 8:
-            writeq(data, addr);
+            writeq(data, pba + idx);
             break;
 
         default:
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index f3b32d66cb..9fb3c05b2b 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -51,6 +51,8 @@  void vpci_remove_device(struct pci_dev *pdev)
         xfree(r);
     }
     spin_unlock(&pdev->vpci->lock);
+    if ( pdev->vpci->msix && pdev->vpci->msix->pba )
+        iounmap(pdev->vpci->msix->pba);
     xfree(pdev->vpci->msix);
     xfree(pdev->vpci->msi);
     xfree(pdev->vpci);
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index bcad1516ae..67c9a0c631 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -127,6 +127,8 @@  struct vpci {
         bool enabled         : 1;
         /* Masked? */
         bool masked          : 1;
+        /* PBA map */
+        void __iomem *pba;
         /* Entries. */
         struct vpci_msix_entry {
             uint64_t addr;