diff mbox series

[v2,3/5] x86/vmsi: add support for extended destination ID in address field

Message ID 20220216103026.11533-4-roger.pau@citrix.com (mailing list archive)
State New, archived
Headers show
Series x86: extended destination ID support | expand

Commit Message

Roger Pau Monné Feb. 16, 2022, 10:30 a.m. UTC
Both QEMU/KVM and HyperV support using bits 11:5 from the MSI address
field in order to store the high part of the target APIC ID. This
allows expanding the maximum APIC ID usable without interrupt
remapping support from 255 to 32768.

Note the interface used by QEMU for emulated devices (via the
XEN_DMOP_inject_msi hypercall) already passes both the address and
data fields into Xen for processing, so there's no need for any change
to QEMU there.

However for PCI passthrough devices QEMU uses the
XEN_DOMCTL_bind_pt_irq hypercall which does need a modification to the
gflags field in order to pass an APIC destination ID greater than
255.

Take the opportunity to make the domain parameter of
hvm_girq_dest_2_vcpu_id const while modifying the other function
parameters. Also adjust dest_mode when touching related code to make
it bool.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Changes since v1:
 - Do not expose extended destination ID support.
 - Use d->arch.ext_dest_id.
 - Add comment clarifying the usage of MSI_ADDR_VIRT_EXT_DEST_ID_MASK.
---
 xen/arch/x86/hvm/irq.c             |  5 +++-
 xen/arch/x86/hvm/vmsi.c            | 43 +++++++++++++++++++++---------
 xen/arch/x86/include/asm/hvm/hvm.h |  5 ++--
 xen/arch/x86/include/asm/msi.h     |  7 +++++
 xen/drivers/passthrough/x86/hvm.c  | 11 +++++++-
 xen/drivers/vpci/msi.c             |  2 +-
 xen/include/public/domctl.h        |  1 +
 xen/include/xen/vpci.h             |  2 +-
 8 files changed, 58 insertions(+), 18 deletions(-)

Comments

Jan Beulich Feb. 16, 2022, 3:57 p.m. UTC | #1
On 16.02.2022 11:30, Roger Pau Monne wrote:
> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -588,6 +588,7 @@ struct xen_domctl_bind_pt_irq {
>  #define XEN_DOMCTL_VMSI_X86_DELIV_MASK   0x007000
>  #define XEN_DOMCTL_VMSI_X86_TRIG_MASK    0x008000
>  #define XEN_DOMCTL_VMSI_X86_UNMASKED     0x010000
> +#define XEN_DOMCTL_VMSI_X86_EXT_DEST_ID_MASK 0xfe0000

I think this is what requires the interface version bump. With that
moved here:
Reviewed-by: Jan Beulich <jbeulich@suse.com>

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
index 52aae4565f..e10e085a55 100644
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -383,7 +383,7 @@  int hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq)
 int hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data)
 {
     uint32_t tmp = (uint32_t) addr;
-    uint8_t  dest = (tmp & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+    unsigned int dest = MASK_EXTR(tmp, MSI_ADDR_DEST_ID_MASK);
     uint8_t  dest_mode = !!(tmp & MSI_ADDR_DESTMODE_MASK);
     uint8_t  delivery_mode = (data & MSI_DATA_DELIVERY_MODE_MASK)
         >> MSI_DATA_DELIVERY_MODE_SHIFT;
@@ -391,6 +391,9 @@  int hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data)
         >> MSI_DATA_TRIGGER_SHIFT;
     uint8_t vector = data & MSI_DATA_VECTOR_MASK;
 
+    if ( vector && d->arch.ext_dest_id )
+        dest |= MASK_EXTR(tmp, MSI_ADDR_VIRT_EXT_DEST_ID_MASK) << 8;
+
     if ( !vector )
     {
         int pirq = ((addr >> 32) & 0xffffff00) | dest;
diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index 13e2a190b4..4af550cc2a 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -66,7 +66,7 @@  static void vmsi_inj_irq(
 
 int vmsi_deliver(
     struct domain *d, int vector,
-    uint8_t dest, uint8_t dest_mode,
+    unsigned int dest, bool dest_mode,
     uint8_t delivery_mode, uint8_t trig_mode)
 {
     struct vlapic *target;
@@ -107,11 +107,14 @@  void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci)
 {
     uint32_t flags = pirq_dpci->gmsi.gflags;
     int vector = pirq_dpci->gmsi.gvec;
-    uint8_t dest = (uint8_t)flags;
+    unsigned int dest = MASK_EXTR(flags, XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
     bool dest_mode = flags & XEN_DOMCTL_VMSI_X86_DM_MASK;
     uint8_t delivery_mode = MASK_EXTR(flags, XEN_DOMCTL_VMSI_X86_DELIV_MASK);
     bool trig_mode = flags & XEN_DOMCTL_VMSI_X86_TRIG_MASK;
 
+    if ( d->arch.ext_dest_id )
+        dest |= MASK_EXTR(flags, XEN_DOMCTL_VMSI_X86_EXT_DEST_ID_MASK);
+
     HVM_DBG_LOG(DBG_LEVEL_IOAPIC,
                 "msi: dest=%x dest_mode=%x delivery_mode=%x "
                 "vector=%x trig_mode=%x\n",
@@ -123,7 +126,8 @@  void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci)
 }
 
 /* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */
-int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode)
+int hvm_girq_dest_2_vcpu_id(const struct domain *d, unsigned int dest,
+                            bool dest_mode)
 {
     int dest_vcpu_id = -1, w = 0;
     struct vcpu *v;
@@ -636,15 +640,21 @@  void msix_write_completion(struct vcpu *v)
 }
 
 #ifdef CONFIG_HAS_VPCI
-static unsigned int msi_gflags(uint16_t data, uint64_t addr, bool masked)
+static unsigned int msi_gflags(uint16_t data, uint64_t addr, bool masked,
+                               bool ext_dest_id)
 {
+    unsigned int dest = MASK_EXTR(addr, MSI_ADDR_DEST_ID_MASK);
+
+    if ( ext_dest_id )
+        dest |= MASK_EXTR(addr, MSI_ADDR_VIRT_EXT_DEST_ID_MASK) << 8;
+
     /*
      * We need to use the DOMCTL constants here because the output of this
      * function is used as input to pt_irq_create_bind, which also takes the
      * input from the DOMCTL itself.
      */
-    return MASK_INSR(MASK_EXTR(addr, MSI_ADDR_DEST_ID_MASK),
-                     XEN_DOMCTL_VMSI_X86_DEST_ID_MASK) |
+    return MASK_INSR(dest, XEN_DOMCTL_VMSI_X86_DEST_ID_MASK) |
+           MASK_INSR(dest, XEN_DOMCTL_VMSI_X86_EXT_DEST_ID_MASK) |
            MASK_INSR(MASK_EXTR(addr, MSI_ADDR_REDIRECTION_MASK),
                      XEN_DOMCTL_VMSI_X86_RH_MASK) |
            MASK_INSR(MASK_EXTR(addr, MSI_ADDR_DESTMODE_MASK),
@@ -698,7 +708,8 @@  static int vpci_msi_update(const struct pci_dev *pdev, uint32_t data,
             .irq_type = PT_IRQ_TYPE_MSI,
             .u.msi.gvec = (vector & ~vector_mask) |
                           ((vector + i) & vector_mask),
-            .u.msi.gflags = msi_gflags(data, address, (mask >> i) & 1),
+            .u.msi.gflags = msi_gflags(data, address, (mask >> i) & 1,
+                                       pdev->domain->arch.ext_dest_id),
         };
         int rc = pt_irq_create_bind(pdev->domain, &bind);
 
@@ -826,8 +837,13 @@  void vpci_msi_arch_init(struct vpci_msi *msi)
     msi->arch.pirq = INVALID_PIRQ;
 }
 
-void vpci_msi_arch_print(const struct vpci_msi *msi)
+void vpci_msi_arch_print(const struct vpci_msi *msi, const struct domain *d)
 {
+    unsigned long dest = MASK_EXTR(msi->address, MSI_ADDR_DEST_ID_MASK);
+
+    if ( d->arch.ext_dest_id )
+        dest |= MASK_EXTR(msi->address, MSI_ADDR_VIRT_EXT_DEST_ID_MASK) << 8;
+
     printk("vec=%#02x%7s%6s%3sassert%5s%7s dest_id=%lu pirq: %d\n",
            MASK_EXTR(msi->data, MSI_DATA_VECTOR_MASK),
            msi->data & MSI_DATA_DELIVERY_LOWPRI ? "lowest" : "fixed",
@@ -835,8 +851,7 @@  void vpci_msi_arch_print(const struct vpci_msi *msi)
            msi->data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
            msi->address & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
            msi->address & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "fixed",
-           MASK_EXTR(msi->address, MSI_ADDR_DEST_ID_MASK),
-           msi->arch.pirq);
+           dest, msi->arch.pirq);
 }
 
 void vpci_msix_arch_mask_entry(struct vpci_msix_entry *entry,
@@ -891,11 +906,16 @@  void vpci_msix_arch_init_entry(struct vpci_msix_entry *entry)
 
 int vpci_msix_arch_print(const struct vpci_msix *msix)
 {
+    const struct domain *d = msix->pdev->domain;
     unsigned int i;
 
     for ( i = 0; i < msix->max_entries; i++ )
     {
         const struct vpci_msix_entry *entry = &msix->entries[i];
+        unsigned long dest = MASK_EXTR(entry->addr, MSI_ADDR_DEST_ID_MASK);
+
+        if ( d->arch.ext_dest_id )
+            dest |= MASK_EXTR(entry->addr, MSI_ADDR_VIRT_EXT_DEST_ID_MASK) << 8;
 
         printk("%6u vec=%02x%7s%6s%3sassert%5s%7s dest_id=%lu mask=%u pirq: %d\n",
                i, MASK_EXTR(entry->data, MSI_DATA_VECTOR_MASK),
@@ -904,8 +924,7 @@  int vpci_msix_arch_print(const struct vpci_msix *msix)
                entry->data & MSI_DATA_LEVEL_ASSERT ? "" : "de",
                entry->addr & MSI_ADDR_DESTMODE_LOGIC ? "log" : "phys",
                entry->addr & MSI_ADDR_REDIRECTION_LOWPRI ? "lowest" : "fixed",
-               MASK_EXTR(entry->addr, MSI_ADDR_DEST_ID_MASK),
-               entry->masked, entry->arch.pirq);
+               dest, entry->masked, entry->arch.pirq);
         if ( i && !(i % 64) )
         {
             struct pci_dev *pdev = msix->pdev;
diff --git a/xen/arch/x86/include/asm/hvm/hvm.h b/xen/arch/x86/include/asm/hvm/hvm.h
index b44bbdeb21..37e9d4c0fc 100644
--- a/xen/arch/x86/include/asm/hvm/hvm.h
+++ b/xen/arch/x86/include/asm/hvm/hvm.h
@@ -270,11 +270,12 @@  uint64_t hvm_get_guest_time_fixed(const struct vcpu *v, uint64_t at_tsc);
 
 int vmsi_deliver(
     struct domain *d, int vector,
-    uint8_t dest, uint8_t dest_mode,
+    unsigned int dest, bool dest_mode,
     uint8_t delivery_mode, uint8_t trig_mode);
 struct hvm_pirq_dpci;
 void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *);
-int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode);
+int hvm_girq_dest_2_vcpu_id(const struct domain *d, unsigned int dest,
+                            bool dest_mode);
 
 enum hvm_intblk
 hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack);
diff --git a/xen/arch/x86/include/asm/msi.h b/xen/arch/x86/include/asm/msi.h
index e228b0f3f3..9d9509a368 100644
--- a/xen/arch/x86/include/asm/msi.h
+++ b/xen/arch/x86/include/asm/msi.h
@@ -54,6 +54,13 @@ 
 #define MSI_ADDR_DEST_ID_SHIFT		12
 #define	 MSI_ADDR_DEST_ID_MASK		0x00ff000
 #define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & MSI_ADDR_DEST_ID_MASK)
+/*
+ * Use the reserved bits 11:5 to store the high part of the APIC ID, that
+ * allows expanding the destination field from 8 to 15 bits. Note this is a
+ * feature only present in virtualized hardware and currently only exposed to
+ * guests but not used by the hypervisor itself.
+ */
+#define	 MSI_ADDR_VIRT_EXT_DEST_ID_MASK	0x0000fe0
 
 /* MAX fixed pages reserved for mapping MSIX tables. */
 #define FIX_MSIX_MAX_PAGES              512
diff --git a/xen/drivers/passthrough/x86/hvm.c b/xen/drivers/passthrough/x86/hvm.c
index 0b37cd145b..9c42ebe17a 100644
--- a/xen/drivers/passthrough/x86/hvm.c
+++ b/xen/drivers/passthrough/x86/hvm.c
@@ -269,7 +269,8 @@  int pt_irq_create_bind(
     {
     case PT_IRQ_TYPE_MSI:
     {
-        uint8_t dest, delivery_mode;
+        unsigned int dest;
+        bool delivery_mode;
         bool dest_mode;
         int dest_vcpu_id;
         const struct vcpu *vcpu;
@@ -346,6 +347,10 @@  int pt_irq_create_bind(
         /* Calculate dest_vcpu_id for MSI-type pirq migration. */
         dest = MASK_EXTR(pirq_dpci->gmsi.gflags,
                          XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
+        if ( d->arch.ext_dest_id )
+            dest |= MASK_EXTR(pirq_dpci->gmsi.gflags,
+                              XEN_DOMCTL_VMSI_X86_EXT_DEST_ID_MASK);
+
         dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK;
         delivery_mode = MASK_EXTR(pirq_dpci->gmsi.gflags,
                                   XEN_DOMCTL_VMSI_X86_DELIV_MASK);
@@ -789,6 +794,10 @@  static int _hvm_dpci_msi_eoi(struct domain *d,
                                       XEN_DOMCTL_VMSI_X86_DEST_ID_MASK);
         bool dest_mode = pirq_dpci->gmsi.gflags & XEN_DOMCTL_VMSI_X86_DM_MASK;
 
+        if ( d->arch.ext_dest_id )
+            dest |= MASK_EXTR(pirq_dpci->gmsi.gflags,
+                              XEN_DOMCTL_VMSI_X86_EXT_DEST_ID_MASK);
+
         if ( vlapic_match_dest(vcpu_vlapic(current), NULL, 0, dest,
                                dest_mode) )
         {
diff --git a/xen/drivers/vpci/msi.c b/xen/drivers/vpci/msi.c
index 5757a7aed2..e1d8c1d6f2 100644
--- a/xen/drivers/vpci/msi.c
+++ b/xen/drivers/vpci/msi.c
@@ -297,7 +297,7 @@  void vpci_dump_msi(void)
                 printk(" vectors max: %u enabled: %u\n",
                        pdev->msi_maxvec, msi->vectors);
 
-                vpci_msi_arch_print(msi);
+                vpci_msi_arch_print(msi, d);
             }
 
             msix = pdev->vpci->msix;
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 31ec083cb0..ba71ce1148 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -588,6 +588,7 @@  struct xen_domctl_bind_pt_irq {
 #define XEN_DOMCTL_VMSI_X86_DELIV_MASK   0x007000
 #define XEN_DOMCTL_VMSI_X86_TRIG_MASK    0x008000
 #define XEN_DOMCTL_VMSI_X86_UNMASKED     0x010000
+#define XEN_DOMCTL_VMSI_X86_EXT_DEST_ID_MASK 0xfe0000
 
             uint64_aligned_t gtable;
         } msi;
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index e8ac1eb395..354b37ef9c 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -162,7 +162,7 @@  int __must_check vpci_msi_arch_enable(struct vpci_msi *msi,
 void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev);
 void vpci_msi_arch_update(struct vpci_msi *msi, const struct pci_dev *pdev);
 void vpci_msi_arch_init(struct vpci_msi *msi);
-void vpci_msi_arch_print(const struct vpci_msi *msi);
+void vpci_msi_arch_print(const struct vpci_msi *msi, const struct domain *d);
 
 /* Arch-specific vPCI MSI-X helpers. */
 void vpci_msix_arch_mask_entry(struct vpci_msix_entry *entry,