Message ID | 1666288635-72591-13-git-send-email-mikelley@microsoft.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
Series | Add PCI pass-thru support to Hyper-V Confidential VMs | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Not a local patch |
On Thu, Oct 20, 2022 at 10:57:15AM -0700, Michael Kelley wrote: > For PCI pass-thru devices in a Confidential VM, Hyper-V requires > that PCI config space be accessed via hypercalls. In normal VMs, > config space accesses are trapped to the Hyper-V host and emulated. > But in a confidential VM, the host can't access guest memory to > decode the instruction for emulation, so an explicit hypercall must > be used. > > Update the PCI config space access functions to use the hypercalls > when such use is indicated by Hyper-V flags. Also, set the flag to > allow the Hyper-V PCI driver to be loaded and used in a Confidential > VM (a.k.a., "Isolation VM"). The driver has previously been hardened > against a malicious Hyper-V host[1]. > > [1] https://lore.kernel.org/all/20220511223207.3386-2-parri.andrea@gmail.com/ > > Co-developed-by: Dexuan Cui <decui@microsoft.com> > Signed-off-by: Dexuan Cui <decui@microsoft.com> > Signed-off-by: Michael Kelley <mikelley@microsoft.com> > --- > drivers/hv/channel_mgmt.c | 2 +- > drivers/pci/controller/pci-hyperv.c | 153 +++++++++++++++++++++--------------- > 2 files changed, 92 insertions(+), 63 deletions(-) > > diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c > index 5b12040..c0f9ac2 100644 > --- a/drivers/hv/channel_mgmt.c > +++ b/drivers/hv/channel_mgmt.c > @@ -67,7 +67,7 @@ > { .dev_type = HV_PCIE, > HV_PCIE_GUID, > .perf_device = false, > - .allowed_in_isolated = false, > + .allowed_in_isolated = true, > }, > > /* Synthetic Frame Buffer */ > diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c > index 02ebf3e..9873296 100644 > --- a/drivers/pci/controller/pci-hyperv.c > +++ b/drivers/pci/controller/pci-hyperv.c > @@ -514,6 +514,7 @@ struct hv_pcibus_device { > > /* Highest slot of child device with resources allocated */ > int wslot_res_allocated; > + bool use_calls; /* Use hypercalls to access mmio cfg space */ > > /* hypercall arg, must not cross page boundary */ > struct hv_retarget_device_interrupt retarget_msi_interrupt_params; > @@ -1134,8 +1135,9 @@ static void hv_pci_write_mmio(phys_addr_t gpa, int size, u32 val) > static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, > int size, u32 *val) > { > + struct hv_pcibus_device *hbus = hpdev->hbus; > + int offset = where + CFG_PAGE_OFFSET; > unsigned long flags; > - void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + where; > > /* > * If the attempt is to read the IDs or the ROM BAR, simulate that. > @@ -1163,56 +1165,74 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, > */ > *val = 0; > } else if (where + size <= CFG_PAGE_SIZE) { > - spin_lock_irqsave(&hpdev->hbus->config_lock, flags); > - /* Choose the function to be read. (See comment above) */ > - writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr); > - /* Make sure the function was chosen before we start reading. */ > - mb(); > - /* Read from that function's config space. */ > - switch (size) { > - case 1: > - *val = readb(addr); > - break; > - case 2: > - *val = readw(addr); > - break; > - default: > - *val = readl(addr); > - break; > + > + spin_lock_irqsave(&hbus->config_lock, flags); > + if (hbus->use_calls) { > + phys_addr_t addr = hbus->mem_config->start + offset; > + > + hv_pci_write_mmio(hbus->mem_config->start, 4, > + hpdev->desc.win_slot.slot); > + hv_pci_read_mmio(addr, size, val); > + } else { > + void __iomem *addr = hbus->cfg_addr + offset; > + > + /* Choose the function to be read. (See comment above) */ > + writel(hpdev->desc.win_slot.slot, hbus->cfg_addr); > + /* Make sure the function was chosen before reading. */ > + mb(); > + /* Read from that function's config space. */ > + switch (size) { > + case 1: > + *val = readb(addr); > + break; > + case 2: > + *val = readw(addr); > + break; > + default: > + *val = readl(addr); > + break; > + } An mb() is missing here? > } > - /* > - * Make sure the read was done before we release the spinlock > - * allowing consecutive reads/writes. > - */ > - mb(); > - spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags); > + spin_unlock_irqrestore(&hbus->config_lock, flags); > } else { > - dev_err(&hpdev->hbus->hdev->device, > + dev_err(&hbus->hdev->device, > "Attempt to read beyond a function's config space.\n"); > } > } > > static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev) > { > + struct hv_pcibus_device *hbus = hpdev->hbus; > + u32 val; > u16 ret; > unsigned long flags; > - void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + > - PCI_VENDOR_ID; > > - spin_lock_irqsave(&hpdev->hbus->config_lock, flags); > + spin_lock_irqsave(&hbus->config_lock, flags); > > - /* Choose the function to be read. (See comment above) */ > - writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr); > - /* Make sure the function was chosen before we start reading. */ > - mb(); > - /* Read from that function's config space. */ > - ret = readw(addr); > - /* > - * mb() is not required here, because the spin_unlock_irqrestore() > - * is a barrier. > - */ > + if (hbus->use_calls) { > + phys_addr_t addr = hbus->mem_config->start + > + CFG_PAGE_OFFSET + PCI_VENDOR_ID; > + > + hv_pci_write_mmio(hbus->mem_config->start, 4, > + hpdev->desc.win_slot.slot); > + hv_pci_read_mmio(addr, 2, &val); > + ret = val; /* Truncates to 16 bits */ > + } else { > + void __iomem *addr = hbus->cfg_addr + CFG_PAGE_OFFSET + > + PCI_VENDOR_ID; > + /* Choose the function to be read. (See comment above) */ > + writel(hpdev->desc.win_slot.slot, hbus->cfg_addr); > + /* Make sure the function was chosen before we start reading. */ > + mb(); > + /* Read from that function's config space. */ > + ret = readw(addr); > + /* > + * mb() is not required here, because the > + * spin_unlock_irqrestore() is a barrier. > + */ > + } > > - spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags); > + spin_unlock_irqrestore(&hbus->config_lock, flags); > > return ret; > } > @@ -1227,38 +1247,45 @@ static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev) > static void _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, > int size, u32 val) > { > + struct hv_pcibus_device *hbus = hpdev->hbus; > + int offset = where + CFG_PAGE_OFFSET; > unsigned long flags; > - void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + where; > > if (where >= PCI_SUBSYSTEM_VENDOR_ID && > where + size <= PCI_CAPABILITY_LIST) { > /* SSIDs and ROM BARs are read-only */ > } else if (where >= PCI_COMMAND && where + size <= CFG_PAGE_SIZE) { > - spin_lock_irqsave(&hpdev->hbus->config_lock, flags); > - /* Choose the function to be written. (See comment above) */ > - writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr); > - /* Make sure the function was chosen before we start writing. */ > - wmb(); > - /* Write to that function's config space. */ > - switch (size) { > - case 1: > - writeb(val, addr); > - break; > - case 2: > - writew(val, addr); > - break; > - default: > - writel(val, addr); > - break; > + spin_lock_irqsave(&hbus->config_lock, flags); > + > + if (hbus->use_calls) { > + phys_addr_t addr = hbus->mem_config->start + offset; > + > + hv_pci_write_mmio(hbus->mem_config->start, 4, > + hpdev->desc.win_slot.slot); > + hv_pci_write_mmio(addr, size, val); > + } else { > + void __iomem *addr = hbus->cfg_addr + offset; > + > + /* Choose the function to write. (See comment above) */ > + writel(hpdev->desc.win_slot.slot, hbus->cfg_addr); > + /* Make sure the function was chosen before writing. */ > + wmb(); > + /* Write to that function's config space. */ > + switch (size) { > + case 1: > + writeb(val, addr); > + break; > + case 2: > + writew(val, addr); > + break; > + default: > + writel(val, addr); > + break; > + } Ditto, an mb() is needed here to align with the old code. With these fixed, feel free to add Reviewed-by: Boqun Feng <boqun.feng@gmail.com> Regards, BOqun > } > - /* > - * Make sure the write was done before we release the spinlock > - * allowing consecutive reads/writes. > - */ > - mb(); > - spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags); > + spin_unlock_irqrestore(&hbus->config_lock, flags); > } else { > - dev_err(&hpdev->hbus->hdev->device, > + dev_err(&hbus->hdev->device, > "Attempt to write beyond a function's config space.\n"); > } > } > @@ -3568,6 +3595,7 @@ static int hv_pci_probe(struct hv_device *hdev, > hbus->bridge->domain_nr = dom; > #ifdef CONFIG_X86 > hbus->sysdata.domain = dom; > + hbus->use_calls = !!(ms_hyperv.hints & HV_X64_USE_MMIO_HYPERCALLS); > #elif defined(CONFIG_ARM64) > /* > * Set the PCI bus parent to be the corresponding VMbus > @@ -3577,6 +3605,7 @@ static int hv_pci_probe(struct hv_device *hdev, > * information to devices created on the bus. > */ > hbus->sysdata.parent = hdev->device.parent; > + hbus->use_calls = false; > #endif > > hbus->hdev = hdev; > -- > 1.8.3.1 > >
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 5b12040..c0f9ac2 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -67,7 +67,7 @@ { .dev_type = HV_PCIE, HV_PCIE_GUID, .perf_device = false, - .allowed_in_isolated = false, + .allowed_in_isolated = true, }, /* Synthetic Frame Buffer */ diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 02ebf3e..9873296 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -514,6 +514,7 @@ struct hv_pcibus_device { /* Highest slot of child device with resources allocated */ int wslot_res_allocated; + bool use_calls; /* Use hypercalls to access mmio cfg space */ /* hypercall arg, must not cross page boundary */ struct hv_retarget_device_interrupt retarget_msi_interrupt_params; @@ -1134,8 +1135,9 @@ static void hv_pci_write_mmio(phys_addr_t gpa, int size, u32 val) static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size, u32 *val) { + struct hv_pcibus_device *hbus = hpdev->hbus; + int offset = where + CFG_PAGE_OFFSET; unsigned long flags; - void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + where; /* * If the attempt is to read the IDs or the ROM BAR, simulate that. @@ -1163,56 +1165,74 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, */ *val = 0; } else if (where + size <= CFG_PAGE_SIZE) { - spin_lock_irqsave(&hpdev->hbus->config_lock, flags); - /* Choose the function to be read. (See comment above) */ - writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr); - /* Make sure the function was chosen before we start reading. */ - mb(); - /* Read from that function's config space. */ - switch (size) { - case 1: - *val = readb(addr); - break; - case 2: - *val = readw(addr); - break; - default: - *val = readl(addr); - break; + + spin_lock_irqsave(&hbus->config_lock, flags); + if (hbus->use_calls) { + phys_addr_t addr = hbus->mem_config->start + offset; + + hv_pci_write_mmio(hbus->mem_config->start, 4, + hpdev->desc.win_slot.slot); + hv_pci_read_mmio(addr, size, val); + } else { + void __iomem *addr = hbus->cfg_addr + offset; + + /* Choose the function to be read. (See comment above) */ + writel(hpdev->desc.win_slot.slot, hbus->cfg_addr); + /* Make sure the function was chosen before reading. */ + mb(); + /* Read from that function's config space. */ + switch (size) { + case 1: + *val = readb(addr); + break; + case 2: + *val = readw(addr); + break; + default: + *val = readl(addr); + break; + } } - /* - * Make sure the read was done before we release the spinlock - * allowing consecutive reads/writes. - */ - mb(); - spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags); + spin_unlock_irqrestore(&hbus->config_lock, flags); } else { - dev_err(&hpdev->hbus->hdev->device, + dev_err(&hbus->hdev->device, "Attempt to read beyond a function's config space.\n"); } } static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev) { + struct hv_pcibus_device *hbus = hpdev->hbus; + u32 val; u16 ret; unsigned long flags; - void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + - PCI_VENDOR_ID; - spin_lock_irqsave(&hpdev->hbus->config_lock, flags); + spin_lock_irqsave(&hbus->config_lock, flags); - /* Choose the function to be read. (See comment above) */ - writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr); - /* Make sure the function was chosen before we start reading. */ - mb(); - /* Read from that function's config space. */ - ret = readw(addr); - /* - * mb() is not required here, because the spin_unlock_irqrestore() - * is a barrier. - */ + if (hbus->use_calls) { + phys_addr_t addr = hbus->mem_config->start + + CFG_PAGE_OFFSET + PCI_VENDOR_ID; + + hv_pci_write_mmio(hbus->mem_config->start, 4, + hpdev->desc.win_slot.slot); + hv_pci_read_mmio(addr, 2, &val); + ret = val; /* Truncates to 16 bits */ + } else { + void __iomem *addr = hbus->cfg_addr + CFG_PAGE_OFFSET + + PCI_VENDOR_ID; + /* Choose the function to be read. (See comment above) */ + writel(hpdev->desc.win_slot.slot, hbus->cfg_addr); + /* Make sure the function was chosen before we start reading. */ + mb(); + /* Read from that function's config space. */ + ret = readw(addr); + /* + * mb() is not required here, because the + * spin_unlock_irqrestore() is a barrier. + */ + } - spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags); + spin_unlock_irqrestore(&hbus->config_lock, flags); return ret; } @@ -1227,38 +1247,45 @@ static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev) static void _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size, u32 val) { + struct hv_pcibus_device *hbus = hpdev->hbus; + int offset = where + CFG_PAGE_OFFSET; unsigned long flags; - void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + where; if (where >= PCI_SUBSYSTEM_VENDOR_ID && where + size <= PCI_CAPABILITY_LIST) { /* SSIDs and ROM BARs are read-only */ } else if (where >= PCI_COMMAND && where + size <= CFG_PAGE_SIZE) { - spin_lock_irqsave(&hpdev->hbus->config_lock, flags); - /* Choose the function to be written. (See comment above) */ - writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr); - /* Make sure the function was chosen before we start writing. */ - wmb(); - /* Write to that function's config space. */ - switch (size) { - case 1: - writeb(val, addr); - break; - case 2: - writew(val, addr); - break; - default: - writel(val, addr); - break; + spin_lock_irqsave(&hbus->config_lock, flags); + + if (hbus->use_calls) { + phys_addr_t addr = hbus->mem_config->start + offset; + + hv_pci_write_mmio(hbus->mem_config->start, 4, + hpdev->desc.win_slot.slot); + hv_pci_write_mmio(addr, size, val); + } else { + void __iomem *addr = hbus->cfg_addr + offset; + + /* Choose the function to write. (See comment above) */ + writel(hpdev->desc.win_slot.slot, hbus->cfg_addr); + /* Make sure the function was chosen before writing. */ + wmb(); + /* Write to that function's config space. */ + switch (size) { + case 1: + writeb(val, addr); + break; + case 2: + writew(val, addr); + break; + default: + writel(val, addr); + break; + } } - /* - * Make sure the write was done before we release the spinlock - * allowing consecutive reads/writes. - */ - mb(); - spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags); + spin_unlock_irqrestore(&hbus->config_lock, flags); } else { - dev_err(&hpdev->hbus->hdev->device, + dev_err(&hbus->hdev->device, "Attempt to write beyond a function's config space.\n"); } } @@ -3568,6 +3595,7 @@ static int hv_pci_probe(struct hv_device *hdev, hbus->bridge->domain_nr = dom; #ifdef CONFIG_X86 hbus->sysdata.domain = dom; + hbus->use_calls = !!(ms_hyperv.hints & HV_X64_USE_MMIO_HYPERCALLS); #elif defined(CONFIG_ARM64) /* * Set the PCI bus parent to be the corresponding VMbus @@ -3577,6 +3605,7 @@ static int hv_pci_probe(struct hv_device *hdev, * information to devices created on the bus. */ hbus->sysdata.parent = hdev->device.parent; + hbus->use_calls = false; #endif hbus->hdev = hdev;