diff mbox series

[v2] x86/msi: dynamically map pages for MSI-X tables

Message ID 20230814161502.88394-1-hakor@amazon.com (mailing list archive)
State New, archived
Headers show
Series [v2] x86/msi: dynamically map pages for MSI-X tables | expand

Commit Message

Ruben Hakobyan Aug. 14, 2023, 4:15 p.m. UTC
Xen reserves a constant number of pages that can be used for mapping
MSI-X tables. This limit is defined by FIX_MSIX_MAX_PAGES in fixmap.h.

Reserving a fixed number of pages could result in an -ENOMEM if a
device requests a new page when the fixmap limit is exhausted and will
necessitate manually adjusting the limit before compilation.

To avoid the issues with the current fixmap implementation, we modify
the MSI-X page mapping logic to instead dynamically map new pages when
they are needed by making use of ioremap().

Note that this approach is not suitable for 32-bit architectures, where
the virtual address space is considerably smaller.

Signed-off-by: Ruben Hakobyan <hakor@amazon.com>
---
Changelog:
 * v2:
  - Updated the commit message to talk about the potential use on 32-bit architectures
  - Addressed the various comments for improving the code
---
 xen/arch/x86/include/asm/fixmap.h |  2 -
 xen/arch/x86/include/asm/msi.h    |  5 +--
 xen/arch/x86/msi.c                | 69 ++++++++-----------------------
 3 files changed, 18 insertions(+), 58 deletions(-)

Comments

Jan Beulich Aug. 16, 2023, 12:33 p.m. UTC | #1
On 14.08.2023 18:15, Ruben Hakobyan wrote:
> Xen reserves a constant number of pages that can be used for mapping
> MSI-X tables. This limit is defined by FIX_MSIX_MAX_PAGES in fixmap.h.
> 
> Reserving a fixed number of pages could result in an -ENOMEM if a
> device requests a new page when the fixmap limit is exhausted and will
> necessitate manually adjusting the limit before compilation.
> 
> To avoid the issues with the current fixmap implementation, we modify
> the MSI-X page mapping logic to instead dynamically map new pages when
> they are needed by making use of ioremap().
> 
> Note that this approach is not suitable for 32-bit architectures, where
> the virtual address space is considerably smaller.

This addresses one of the issues raised on v1. There was also the concern
of the mapping now potentially involving memory allocation (if page tables
need populating). I wonder whether, alongside emitting a warning, we
couldn't fall back to using fixmap in such an event.

Furthermore there's the concern regarding VA space use: If the 512 entries
we presently permit don't suffice, we talk about more than 2M worth of
mappings, i.e. more than 4M worth of VA space (due to guard pages).
That's not entirely negligible, even if still only a fairly small share
out of the 64G that we set aside right now, and hence probably wants at
least mentioning.

> -static int msix_get_fixmap(struct arch_msix *msix, u64 table_paddr,
> +static void __iomem *msix_get_entry(struct arch_msix *msix, u64 table_paddr,
>                             u64 entry_paddr)
>  {
>      long nr_page;
> -    int idx;
> +    void __iomem *va = NULL;

Unnecessary initializer.

>      nr_page = (entry_paddr >> PAGE_SHIFT) - (table_paddr >> PAGE_SHIFT);
>  
>      if ( nr_page < 0 || nr_page >= MAX_MSIX_TABLE_PAGES )
> -        return -EINVAL;
> +        return NULL;

Please don't lose the error code; callers should not need to blindly (and
hence possibly wrongly) assume -ENOMEM when getting back NULL. See
xen/err.h.

>      spin_lock(&msix->table_lock);
>      if ( msix->table_refcnt[nr_page]++ == 0 )
>      {
> -        idx = msix_fixmap_alloc();
> -        if ( idx < 0 )
> +        va = ioremap(round_pgdown(entry_paddr), PAGE_SIZE);
> +        if ( va == NULL )
>          {
>              msix->table_refcnt[nr_page]--;
>              goto out;
>          }
> -        set_fixmap_nocache(idx, entry_paddr);
> -        msix->table_idx[nr_page] = idx;
> +        msix->table_va[nr_page] = va;
>      }
>      else
> -        idx = msix->table_idx[nr_page];
> +        va = msix->table_va[nr_page];
>  
>   out:
>      spin_unlock(&msix->table_lock);
> -    return idx;
> +    return va + (entry_paddr & ~PAGE_MASK);

This is wrong in the error case, i.e. when va is NULL.

>  }
>  
> -static void msix_put_fixmap(struct arch_msix *msix, int idx)
> +static void msix_put_entry(struct arch_msix *msix, const void __iomem *entry_va)
>  {
>      int i;
> +    void __iomem *va = (void*)round_pgdown((unsigned long)entry_va);

Nit (style): Blank after * please.

>      spin_lock(&msix->table_lock);
>      for ( i = 0; i < MAX_MSIX_TABLE_PAGES; i++ )
>      {
> -        if ( msix->table_idx[i] == idx )
> +        if ( msix->table_va[i] == va )
>              break;
>      }
>      if ( i == MAX_MSIX_TABLE_PAGES )
> @@ -115,9 +84,8 @@ static void msix_put_fixmap(struct arch_msix *msix, int idx)
>  
>      if ( --msix->table_refcnt[i] == 0 )
>      {
> -        clear_fixmap(idx);
> -        msix_fixmap_free(idx);
> -        msix->table_idx[i] = 0;
> +        iounmap(va);
> +        msix->table_va[i] = NULL;

While possibly benign here, please clear the field before unmapping,
such that there's no doubt about a transiently dangling pointer.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/include/asm/fixmap.h b/xen/arch/x86/include/asm/fixmap.h
index 516ec3fa6c..139c3e2dcc 100644
--- a/xen/arch/x86/include/asm/fixmap.h
+++ b/xen/arch/x86/include/asm/fixmap.h
@@ -61,8 +61,6 @@  enum fixed_addresses {
     FIX_ACPI_END = FIX_ACPI_BEGIN + NUM_FIXMAP_ACPI_PAGES - 1,
     FIX_HPET_BASE,
     FIX_TBOOT_SHARED_BASE,
-    FIX_MSIX_IO_RESERV_BASE,
-    FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + FIX_MSIX_MAX_PAGES -1,
     FIX_TBOOT_MAP_ADDRESS,
     FIX_APEI_RANGE_BASE,
     FIX_APEI_RANGE_END = FIX_APEI_RANGE_BASE + FIX_APEI_RANGE_MAX -1,
diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h
index a53ade95c9..16c80c9883 100644
--- a/xen/arch/x86/include/asm/msi.h
+++ b/xen/arch/x86/include/asm/msi.h
@@ -55,9 +55,6 @@ 
 #define	 MSI_ADDR_DEST_ID_MASK		0x00ff000
 #define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK)
 
-/* MAX fixed pages reserved for mapping MSIX tables. */
-#define FIX_MSIX_MAX_PAGES              512
-
 struct msi_info {
     pci_sbdf_t sbdf;
     int irq;
@@ -213,7 +210,7 @@  struct arch_msix {
         unsigned long first, last;
     } table, pba;
     int table_refcnt[MAX_MSIX_TABLE_PAGES];
-    int table_idx[MAX_MSIX_TABLE_PAGES];
+    void __iomem *table_va[MAX_MSIX_TABLE_PAGES];
     spinlock_t table_lock;
     bool host_maskall, guest_maskall;
     domid_t warned;
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index d0bf63df1d..57a84b394a 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -24,7 +24,6 @@ 
 #include <asm/smp.h>
 #include <asm/desc.h>
 #include <asm/msi.h>
-#include <asm/fixmap.h>
 #include <asm/p2m.h>
 #include <mach_apic.h>
 #include <io_ports.h>
@@ -39,75 +38,45 @@  boolean_param("msi", use_msi);
 
 static void __pci_disable_msix(struct msi_desc *);
 
-/* bitmap indicate which fixed map is free */
-static DEFINE_SPINLOCK(msix_fixmap_lock);
-static DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
-
-static int msix_fixmap_alloc(void)
-{
-    int i, rc = -ENOMEM;
-
-    spin_lock(&msix_fixmap_lock);
-    for ( i = 0; i < FIX_MSIX_MAX_PAGES; i++ )
-        if ( !test_bit(i, &msix_fixmap_pages) )
-            break;
-    if ( i == FIX_MSIX_MAX_PAGES )
-        goto out;
-    rc = FIX_MSIX_IO_RESERV_BASE + i;
-    set_bit(i, &msix_fixmap_pages);
-
- out:
-    spin_unlock(&msix_fixmap_lock);
-    return rc;
-}
-
-static void msix_fixmap_free(int idx)
-{
-    spin_lock(&msix_fixmap_lock);
-    if ( idx >= FIX_MSIX_IO_RESERV_BASE )
-        clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages);
-    spin_unlock(&msix_fixmap_lock);
-}
-
-static int msix_get_fixmap(struct arch_msix *msix, u64 table_paddr,
+static void __iomem *msix_get_entry(struct arch_msix *msix, u64 table_paddr,
                            u64 entry_paddr)
 {
     long nr_page;
-    int idx;
+    void __iomem *va = NULL;
 
     nr_page = (entry_paddr >> PAGE_SHIFT) - (table_paddr >> PAGE_SHIFT);
 
     if ( nr_page < 0 || nr_page >= MAX_MSIX_TABLE_PAGES )
-        return -EINVAL;
+        return NULL;
 
     spin_lock(&msix->table_lock);
     if ( msix->table_refcnt[nr_page]++ == 0 )
     {
-        idx = msix_fixmap_alloc();
-        if ( idx < 0 )
+        va = ioremap(round_pgdown(entry_paddr), PAGE_SIZE);
+        if ( va == NULL )
         {
             msix->table_refcnt[nr_page]--;
             goto out;
         }
-        set_fixmap_nocache(idx, entry_paddr);
-        msix->table_idx[nr_page] = idx;
+        msix->table_va[nr_page] = va;
     }
     else
-        idx = msix->table_idx[nr_page];
+        va = msix->table_va[nr_page];
 
  out:
     spin_unlock(&msix->table_lock);
-    return idx;
+    return va + (entry_paddr & ~PAGE_MASK);
 }
 
-static void msix_put_fixmap(struct arch_msix *msix, int idx)
+static void msix_put_entry(struct arch_msix *msix, const void __iomem *entry_va)
 {
     int i;
+    void __iomem *va = (void*)round_pgdown((unsigned long)entry_va);
 
     spin_lock(&msix->table_lock);
     for ( i = 0; i < MAX_MSIX_TABLE_PAGES; i++ )
     {
-        if ( msix->table_idx[i] == idx )
+        if ( msix->table_va[i] == va )
             break;
     }
     if ( i == MAX_MSIX_TABLE_PAGES )
@@ -115,9 +84,8 @@  static void msix_put_fixmap(struct arch_msix *msix, int idx)
 
     if ( --msix->table_refcnt[i] == 0 )
     {
-        clear_fixmap(idx);
-        msix_fixmap_free(idx);
-        msix->table_idx[i] = 0;
+        iounmap(va);
+        msix->table_va[i] = NULL;
     }
 
  out:
@@ -568,8 +536,7 @@  int msi_free_irq(struct msi_desc *entry)
     }
 
     if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
-        msix_put_fixmap(entry->dev->msix,
-                        virt_to_fix((unsigned long)entry->mask_base));
+        msix_put_entry(entry->dev->msix, entry->mask_base);
 
     list_del(&entry->list);
     xfree(entry);
@@ -892,10 +859,9 @@  static int msix_capability_init(struct pci_dev *dev,
     {
         /* Map MSI-X table region */
         u64 entry_paddr = table_paddr + msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
-        int idx = msix_get_fixmap(msix, table_paddr, entry_paddr);
-        void __iomem *base;
+        void __iomem *base = msix_get_entry(msix, table_paddr, entry_paddr);
 
-        if ( idx < 0 )
+        if ( base == NULL )
         {
             if ( zap_on_error )
             {
@@ -907,9 +873,8 @@  static int msix_capability_init(struct pci_dev *dev,
 
             pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
             xfree(entry);
-            return idx;
+            return -ENOMEM;
         }
-        base = fix_to_virt(idx) + (entry_paddr & (PAGE_SIZE - 1));
 
         /* Mask interrupt here */
         writel(1, base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);