Message ID | 1474903724-383-1-git-send-email-keith.busch@intel.com (mailing list archive) |
---|---|
State | New, archived |
Delegated to: | Bjorn Helgaas |
Headers | show |
On Mon, Sep 26, 2016 at 09:28:44AM -0600, Keith Busch wrote: > This moves the driver source and Kconfig to the pci host bridge drivers > directory, relocating the config option to a more approrpiate sub-menu > instead of occupying the top level location. > > The Kconfig option for VMD has been updated with its X86_64 dependency > that was implicitly included from the previous location, and added > missing information for building this driver as a loadable module. > > Cc: Jon Derrick <jonathan.derrick@intel.com> > Signed-off-by: Keith Busch <keith.busch@intel.com> Applied to pci/host-vmd for v4.9, thanks, Keith. > --- > arch/x86/Kconfig | 13 - > arch/x86/pci/Makefile | 2 - > arch/x86/pci/vmd.c | 766 ---------------------------------------------- > drivers/pci/host/Kconfig | 16 + > drivers/pci/host/Makefile | 1 + > drivers/pci/host/vmd.c | 766 ++++++++++++++++++++++++++++++++++++++++++++++ > 6 files changed, 783 insertions(+), 781 deletions(-) > delete mode 100644 arch/x86/pci/vmd.c > create mode 100644 drivers/pci/host/vmd.c > > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig > index 5c6e747..c320838 100644 > --- a/arch/x86/Kconfig > +++ b/arch/x86/Kconfig > @@ -2744,19 +2744,6 @@ config PMC_ATOM > def_bool y > depends on PCI > > -config VMD > - depends on PCI_MSI > - tristate "Volume Management Device Driver" > - default N > - ---help--- > - Adds support for the Intel Volume Management Device (VMD). VMD is a > - secondary PCI host bridge that allows PCI Express root ports, > - and devices attached to them, to be removed from the default > - PCI domain and placed within the VMD domain. This provides > - more bus resources than are otherwise possible with a > - single domain. If you know your system provides one of these and > - has devices attached to it, say Y; if you are not sure, say N. > - > source "net/Kconfig" > > source "drivers/Kconfig" > diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile > index 97062a6..5c6fc35 100644 > --- a/arch/x86/pci/Makefile > +++ b/arch/x86/pci/Makefile > @@ -23,8 +23,6 @@ obj-y += bus_numa.o > obj-$(CONFIG_AMD_NB) += amd_bus.o > obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o > > -obj-$(CONFIG_VMD) += vmd.o > - > ifeq ($(CONFIG_PCI_DEBUG),y) > EXTRA_CFLAGS += -DDEBUG > endif > diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c > deleted file mode 100644 > index a021b7b..0000000 > --- a/arch/x86/pci/vmd.c > +++ /dev/null > @@ -1,766 +0,0 @@ > -/* > - * Volume Management Device driver > - * Copyright (c) 2015, Intel Corporation. > - * > - * This program is free software; you can redistribute it and/or modify it > - * under the terms and conditions of the GNU General Public License, > - * version 2, as published by the Free Software Foundation. > - * > - * This program is distributed in the hope it will be useful, but WITHOUT > - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > - * more details. > - */ > - > -#include <linux/device.h> > -#include <linux/interrupt.h> > -#include <linux/irq.h> > -#include <linux/kernel.h> > -#include <linux/module.h> > -#include <linux/msi.h> > -#include <linux/pci.h> > -#include <linux/rculist.h> > -#include <linux/rcupdate.h> > - > -#include <asm/irqdomain.h> > -#include <asm/device.h> > -#include <asm/msi.h> > -#include <asm/msidef.h> > - > -#define VMD_CFGBAR 0 > -#define VMD_MEMBAR1 2 > -#define VMD_MEMBAR2 4 > - > -/* > - * Lock for manipulating VMD IRQ lists. > - */ > -static DEFINE_RAW_SPINLOCK(list_lock); > - > -/** > - * struct vmd_irq - private data to map driver IRQ to the VMD shared vector > - * @node: list item for parent traversal. > - * @rcu: RCU callback item for freeing. > - * @irq: back pointer to parent. > - * @virq: the virtual IRQ value provided to the requesting driver. > - * > - * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to > - * a VMD IRQ using this structure. > - */ > -struct vmd_irq { > - struct list_head node; > - struct rcu_head rcu; > - struct vmd_irq_list *irq; > - unsigned int virq; > -}; > - > -/** > - * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector > - * @irq_list: the list of irq's the VMD one demuxes to. > - * @vmd_vector: the h/w IRQ assigned to the VMD. > - * @index: index into the VMD MSI-X table; used for message routing. > - * @count: number of child IRQs assigned to this vector; used to track > - * sharing. > - */ > -struct vmd_irq_list { > - struct list_head irq_list; > - struct vmd_dev *vmd; > - unsigned int vmd_vector; > - unsigned int index; > - unsigned int count; > -}; > - > -struct vmd_dev { > - struct pci_dev *dev; > - > - spinlock_t cfg_lock; > - char __iomem *cfgbar; > - > - int msix_count; > - struct msix_entry *msix_entries; > - struct vmd_irq_list *irqs; > - > - struct pci_sysdata sysdata; > - struct resource resources[3]; > - struct irq_domain *irq_domain; > - struct pci_bus *bus; > - > -#ifdef CONFIG_X86_DEV_DMA_OPS > - struct dma_map_ops dma_ops; > - struct dma_domain dma_domain; > -#endif > -}; > - > -static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus) > -{ > - return container_of(bus->sysdata, struct vmd_dev, sysdata); > -} > - > -/* > - * Drivers managing a device in a VMD domain allocate their own IRQs as before, > - * but the MSI entry for the hardware it's driving will be programmed with a > - * destination ID for the VMD MSI-X table. The VMD muxes interrupts in its > - * domain into one of its own, and the VMD driver de-muxes these for the > - * handlers sharing that VMD IRQ. The vmd irq_domain provides the operations > - * and irq_chip to set this up. > - */ > -static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) > -{ > - struct vmd_irq *vmdirq = data->chip_data; > - struct vmd_irq_list *irq = vmdirq->irq; > - > - msg->address_hi = MSI_ADDR_BASE_HI; > - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_DEST_ID(irq->index); > - msg->data = 0; > -} > - > -/* > - * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops. > - */ > -static void vmd_irq_enable(struct irq_data *data) > -{ > - struct vmd_irq *vmdirq = data->chip_data; > - unsigned long flags; > - > - raw_spin_lock_irqsave(&list_lock, flags); > - list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); > - raw_spin_unlock_irqrestore(&list_lock, flags); > - > - data->chip->irq_unmask(data); > -} > - > -static void vmd_irq_disable(struct irq_data *data) > -{ > - struct vmd_irq *vmdirq = data->chip_data; > - unsigned long flags; > - > - data->chip->irq_mask(data); > - > - raw_spin_lock_irqsave(&list_lock, flags); > - list_del_rcu(&vmdirq->node); > - INIT_LIST_HEAD_RCU(&vmdirq->node); > - raw_spin_unlock_irqrestore(&list_lock, flags); > -} > - > -/* > - * XXX: Stubbed until we develop acceptable way to not create conflicts with > - * other devices sharing the same vector. > - */ > -static int vmd_irq_set_affinity(struct irq_data *data, > - const struct cpumask *dest, bool force) > -{ > - return -EINVAL; > -} > - > -static struct irq_chip vmd_msi_controller = { > - .name = "VMD-MSI", > - .irq_enable = vmd_irq_enable, > - .irq_disable = vmd_irq_disable, > - .irq_compose_msi_msg = vmd_compose_msi_msg, > - .irq_set_affinity = vmd_irq_set_affinity, > -}; > - > -static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info, > - msi_alloc_info_t *arg) > -{ > - return 0; > -} > - > -/* > - * XXX: We can be even smarter selecting the best IRQ once we solve the > - * affinity problem. > - */ > -static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc) > -{ > - int i, best = 1; > - unsigned long flags; > - > - if (!desc->msi_attrib.is_msix || vmd->msix_count == 1) > - return &vmd->irqs[0]; > - > - raw_spin_lock_irqsave(&list_lock, flags); > - for (i = 1; i < vmd->msix_count; i++) > - if (vmd->irqs[i].count < vmd->irqs[best].count) > - best = i; > - vmd->irqs[best].count++; > - raw_spin_unlock_irqrestore(&list_lock, flags); > - > - return &vmd->irqs[best]; > -} > - > -static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, > - unsigned int virq, irq_hw_number_t hwirq, > - msi_alloc_info_t *arg) > -{ > - struct msi_desc *desc = arg->desc; > - struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus); > - struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL); > - > - if (!vmdirq) > - return -ENOMEM; > - > - INIT_LIST_HEAD(&vmdirq->node); > - vmdirq->irq = vmd_next_irq(vmd, desc); > - vmdirq->virq = virq; > - > - irq_domain_set_info(domain, virq, vmdirq->irq->vmd_vector, info->chip, > - vmdirq, handle_untracked_irq, vmd, NULL); > - return 0; > -} > - > -static void vmd_msi_free(struct irq_domain *domain, > - struct msi_domain_info *info, unsigned int virq) > -{ > - struct vmd_irq *vmdirq = irq_get_chip_data(virq); > - unsigned long flags; > - > - /* XXX: Potential optimization to rebalance */ > - raw_spin_lock_irqsave(&list_lock, flags); > - vmdirq->irq->count--; > - raw_spin_unlock_irqrestore(&list_lock, flags); > - > - kfree_rcu(vmdirq, rcu); > -} > - > -static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev, > - int nvec, msi_alloc_info_t *arg) > -{ > - struct pci_dev *pdev = to_pci_dev(dev); > - struct vmd_dev *vmd = vmd_from_bus(pdev->bus); > - > - if (nvec > vmd->msix_count) > - return vmd->msix_count; > - > - memset(arg, 0, sizeof(*arg)); > - return 0; > -} > - > -static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) > -{ > - arg->desc = desc; > -} > - > -static struct msi_domain_ops vmd_msi_domain_ops = { > - .get_hwirq = vmd_get_hwirq, > - .msi_init = vmd_msi_init, > - .msi_free = vmd_msi_free, > - .msi_prepare = vmd_msi_prepare, > - .set_desc = vmd_set_desc, > -}; > - > -static struct msi_domain_info vmd_msi_domain_info = { > - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | > - MSI_FLAG_PCI_MSIX, > - .ops = &vmd_msi_domain_ops, > - .chip = &vmd_msi_controller, > -}; > - > -#ifdef CONFIG_X86_DEV_DMA_OPS > -/* > - * VMD replaces the requester ID with its own. DMA mappings for devices in a > - * VMD domain need to be mapped for the VMD, not the device requiring > - * the mapping. > - */ > -static struct device *to_vmd_dev(struct device *dev) > -{ > - struct pci_dev *pdev = to_pci_dev(dev); > - struct vmd_dev *vmd = vmd_from_bus(pdev->bus); > - > - return &vmd->dev->dev; > -} > - > -static struct dma_map_ops *vmd_dma_ops(struct device *dev) > -{ > - return get_dma_ops(to_vmd_dev(dev)); > -} > - > -static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr, > - gfp_t flag, unsigned long attrs) > -{ > - return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag, > - attrs); > -} > - > -static void vmd_free(struct device *dev, size_t size, void *vaddr, > - dma_addr_t addr, unsigned long attrs) > -{ > - return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr, > - attrs); > -} > - > -static int vmd_mmap(struct device *dev, struct vm_area_struct *vma, > - void *cpu_addr, dma_addr_t addr, size_t size, > - unsigned long attrs) > -{ > - return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr, > - size, attrs); > -} > - > -static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt, > - void *cpu_addr, dma_addr_t addr, size_t size, > - unsigned long attrs) > -{ > - return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr, > - addr, size, attrs); > -} > - > -static dma_addr_t vmd_map_page(struct device *dev, struct page *page, > - unsigned long offset, size_t size, > - enum dma_data_direction dir, > - unsigned long attrs) > -{ > - return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size, > - dir, attrs); > -} > - > -static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size, > - enum dma_data_direction dir, unsigned long attrs) > -{ > - vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs); > -} > - > -static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents, > - enum dma_data_direction dir, unsigned long attrs) > -{ > - return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs); > -} > - > -static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, > - enum dma_data_direction dir, unsigned long attrs) > -{ > - vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs); > -} > - > -static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr, > - size_t size, enum dma_data_direction dir) > -{ > - vmd_dma_ops(dev)->sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir); > -} > - > -static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr, > - size_t size, enum dma_data_direction dir) > -{ > - vmd_dma_ops(dev)->sync_single_for_device(to_vmd_dev(dev), addr, size, > - dir); > -} > - > -static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, > - int nents, enum dma_data_direction dir) > -{ > - vmd_dma_ops(dev)->sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir); > -} > - > -static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg, > - int nents, enum dma_data_direction dir) > -{ > - vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir); > -} > - > -static int vmd_mapping_error(struct device *dev, dma_addr_t addr) > -{ > - return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr); > -} > - > -static int vmd_dma_supported(struct device *dev, u64 mask) > -{ > - return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask); > -} > - > -#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK > -static u64 vmd_get_required_mask(struct device *dev) > -{ > - return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev)); > -} > -#endif > - > -static void vmd_teardown_dma_ops(struct vmd_dev *vmd) > -{ > - struct dma_domain *domain = &vmd->dma_domain; > - > - if (get_dma_ops(&vmd->dev->dev)) > - del_dma_domain(domain); > -} > - > -#define ASSIGN_VMD_DMA_OPS(source, dest, fn) \ > - do { \ > - if (source->fn) \ > - dest->fn = vmd_##fn; \ > - } while (0) > - > -static void vmd_setup_dma_ops(struct vmd_dev *vmd) > -{ > - const struct dma_map_ops *source = get_dma_ops(&vmd->dev->dev); > - struct dma_map_ops *dest = &vmd->dma_ops; > - struct dma_domain *domain = &vmd->dma_domain; > - > - domain->domain_nr = vmd->sysdata.domain; > - domain->dma_ops = dest; > - > - if (!source) > - return; > - ASSIGN_VMD_DMA_OPS(source, dest, alloc); > - ASSIGN_VMD_DMA_OPS(source, dest, free); > - ASSIGN_VMD_DMA_OPS(source, dest, mmap); > - ASSIGN_VMD_DMA_OPS(source, dest, get_sgtable); > - ASSIGN_VMD_DMA_OPS(source, dest, map_page); > - ASSIGN_VMD_DMA_OPS(source, dest, unmap_page); > - ASSIGN_VMD_DMA_OPS(source, dest, map_sg); > - ASSIGN_VMD_DMA_OPS(source, dest, unmap_sg); > - ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_cpu); > - ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device); > - ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu); > - ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device); > - ASSIGN_VMD_DMA_OPS(source, dest, mapping_error); > - ASSIGN_VMD_DMA_OPS(source, dest, dma_supported); > -#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK > - ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask); > -#endif > - add_dma_domain(domain); > -} > -#undef ASSIGN_VMD_DMA_OPS > -#else > -static void vmd_teardown_dma_ops(struct vmd_dev *vmd) {} > -static void vmd_setup_dma_ops(struct vmd_dev *vmd) {} > -#endif > - > -static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus, > - unsigned int devfn, int reg, int len) > -{ > - char __iomem *addr = vmd->cfgbar + > - (bus->number << 20) + (devfn << 12) + reg; > - > - if ((addr - vmd->cfgbar) + len >= > - resource_size(&vmd->dev->resource[VMD_CFGBAR])) > - return NULL; > - > - return addr; > -} > - > -/* > - * CPU may deadlock if config space is not serialized on some versions of this > - * hardware, so all config space access is done under a spinlock. > - */ > -static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg, > - int len, u32 *value) > -{ > - struct vmd_dev *vmd = vmd_from_bus(bus); > - char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); > - unsigned long flags; > - int ret = 0; > - > - if (!addr) > - return -EFAULT; > - > - spin_lock_irqsave(&vmd->cfg_lock, flags); > - switch (len) { > - case 1: > - *value = readb(addr); > - break; > - case 2: > - *value = readw(addr); > - break; > - case 4: > - *value = readl(addr); > - break; > - default: > - ret = -EINVAL; > - break; > - } > - spin_unlock_irqrestore(&vmd->cfg_lock, flags); > - return ret; > -} > - > -/* > - * VMD h/w converts non-posted config writes to posted memory writes. The > - * read-back in this function forces the completion so it returns only after > - * the config space was written, as expected. > - */ > -static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg, > - int len, u32 value) > -{ > - struct vmd_dev *vmd = vmd_from_bus(bus); > - char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); > - unsigned long flags; > - int ret = 0; > - > - if (!addr) > - return -EFAULT; > - > - spin_lock_irqsave(&vmd->cfg_lock, flags); > - switch (len) { > - case 1: > - writeb(value, addr); > - readb(addr); > - break; > - case 2: > - writew(value, addr); > - readw(addr); > - break; > - case 4: > - writel(value, addr); > - readl(addr); > - break; > - default: > - ret = -EINVAL; > - break; > - } > - spin_unlock_irqrestore(&vmd->cfg_lock, flags); > - return ret; > -} > - > -static struct pci_ops vmd_ops = { > - .read = vmd_pci_read, > - .write = vmd_pci_write, > -}; > - > -static void vmd_attach_resources(struct vmd_dev *vmd) > -{ > - vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1]; > - vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2]; > -} > - > -static void vmd_detach_resources(struct vmd_dev *vmd) > -{ > - vmd->dev->resource[VMD_MEMBAR1].child = NULL; > - vmd->dev->resource[VMD_MEMBAR2].child = NULL; > -} > - > -/* > - * VMD domains start at 0x1000 to not clash with ACPI _SEG domains. > - */ > -static int vmd_find_free_domain(void) > -{ > - int domain = 0xffff; > - struct pci_bus *bus = NULL; > - > - while ((bus = pci_find_next_bus(bus)) != NULL) > - domain = max_t(int, domain, pci_domain_nr(bus)); > - return domain + 1; > -} > - > -static int vmd_enable_domain(struct vmd_dev *vmd) > -{ > - struct pci_sysdata *sd = &vmd->sysdata; > - struct resource *res; > - u32 upper_bits; > - unsigned long flags; > - LIST_HEAD(resources); > - > - res = &vmd->dev->resource[VMD_CFGBAR]; > - vmd->resources[0] = (struct resource) { > - .name = "VMD CFGBAR", > - .start = 0, > - .end = (resource_size(res) >> 20) - 1, > - .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED, > - }; > - > - /* > - * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can > - * put 32-bit resources in the window. > - * > - * There's no hardware reason why a 64-bit window *couldn't* > - * contain a 32-bit resource, but pbus_size_mem() computes the > - * bridge window size assuming a 64-bit window will contain no > - * 32-bit resources. __pci_assign_resource() enforces that > - * artificial restriction to make sure everything will fit. > - * > - * The only way we could use a 64-bit non-prefechable MEMBAR is > - * if its address is <4GB so that we can convert it to a 32-bit > - * resource. To be visible to the host OS, all VMD endpoints must > - * be initially configured by platform BIOS, which includes setting > - * up these resources. We can assume the device is configured > - * according to the platform needs. > - */ > - res = &vmd->dev->resource[VMD_MEMBAR1]; > - upper_bits = upper_32_bits(res->end); > - flags = res->flags & ~IORESOURCE_SIZEALIGN; > - if (!upper_bits) > - flags &= ~IORESOURCE_MEM_64; > - vmd->resources[1] = (struct resource) { > - .name = "VMD MEMBAR1", > - .start = res->start, > - .end = res->end, > - .flags = flags, > - .parent = res, > - }; > - > - res = &vmd->dev->resource[VMD_MEMBAR2]; > - upper_bits = upper_32_bits(res->end); > - flags = res->flags & ~IORESOURCE_SIZEALIGN; > - if (!upper_bits) > - flags &= ~IORESOURCE_MEM_64; > - vmd->resources[2] = (struct resource) { > - .name = "VMD MEMBAR2", > - .start = res->start + 0x2000, > - .end = res->end, > - .flags = flags, > - .parent = res, > - }; > - > - sd->vmd_domain = true; > - sd->domain = vmd_find_free_domain(); > - if (sd->domain < 0) > - return sd->domain; > - > - sd->node = pcibus_to_node(vmd->dev->bus); > - > - vmd->irq_domain = pci_msi_create_irq_domain(NULL, &vmd_msi_domain_info, > - x86_vector_domain); > - if (!vmd->irq_domain) > - return -ENODEV; > - > - pci_add_resource(&resources, &vmd->resources[0]); > - pci_add_resource(&resources, &vmd->resources[1]); > - pci_add_resource(&resources, &vmd->resources[2]); > - vmd->bus = pci_create_root_bus(&vmd->dev->dev, 0, &vmd_ops, sd, > - &resources); > - if (!vmd->bus) { > - pci_free_resource_list(&resources); > - irq_domain_remove(vmd->irq_domain); > - return -ENODEV; > - } > - > - vmd_attach_resources(vmd); > - vmd_setup_dma_ops(vmd); > - dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain); > - pci_rescan_bus(vmd->bus); > - > - WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj, > - "domain"), "Can't create symlink to domain\n"); > - return 0; > -} > - > -static irqreturn_t vmd_irq(int irq, void *data) > -{ > - struct vmd_irq_list *irqs = data; > - struct vmd_irq *vmdirq; > - > - rcu_read_lock(); > - list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node) > - generic_handle_irq(vmdirq->virq); > - rcu_read_unlock(); > - > - return IRQ_HANDLED; > -} > - > -static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) > -{ > - struct vmd_dev *vmd; > - int i, err; > - > - if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20)) > - return -ENOMEM; > - > - vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL); > - if (!vmd) > - return -ENOMEM; > - > - vmd->dev = dev; > - err = pcim_enable_device(dev); > - if (err < 0) > - return err; > - > - vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0); > - if (!vmd->cfgbar) > - return -ENOMEM; > - > - pci_set_master(dev); > - if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) && > - dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32))) > - return -ENODEV; > - > - vmd->msix_count = pci_msix_vec_count(dev); > - if (vmd->msix_count < 0) > - return -ENODEV; > - > - vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs), > - GFP_KERNEL); > - if (!vmd->irqs) > - return -ENOMEM; > - > - vmd->msix_entries = devm_kcalloc(&dev->dev, vmd->msix_count, > - sizeof(*vmd->msix_entries), > - GFP_KERNEL); > - if (!vmd->msix_entries) > - return -ENOMEM; > - for (i = 0; i < vmd->msix_count; i++) > - vmd->msix_entries[i].entry = i; > - > - vmd->msix_count = pci_enable_msix_range(vmd->dev, vmd->msix_entries, 1, > - vmd->msix_count); > - if (vmd->msix_count < 0) > - return vmd->msix_count; > - > - for (i = 0; i < vmd->msix_count; i++) { > - INIT_LIST_HEAD(&vmd->irqs[i].irq_list); > - vmd->irqs[i].vmd_vector = vmd->msix_entries[i].vector; > - vmd->irqs[i].index = i; > - > - err = devm_request_irq(&dev->dev, vmd->irqs[i].vmd_vector, > - vmd_irq, 0, "vmd", &vmd->irqs[i]); > - if (err) > - return err; > - } > - > - spin_lock_init(&vmd->cfg_lock); > - pci_set_drvdata(dev, vmd); > - err = vmd_enable_domain(vmd); > - if (err) > - return err; > - > - dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n", > - vmd->sysdata.domain); > - return 0; > -} > - > -static void vmd_remove(struct pci_dev *dev) > -{ > - struct vmd_dev *vmd = pci_get_drvdata(dev); > - > - vmd_detach_resources(vmd); > - pci_set_drvdata(dev, NULL); > - sysfs_remove_link(&vmd->dev->dev.kobj, "domain"); > - pci_stop_root_bus(vmd->bus); > - pci_remove_root_bus(vmd->bus); > - vmd_teardown_dma_ops(vmd); > - irq_domain_remove(vmd->irq_domain); > -} > - > -#ifdef CONFIG_PM > -static int vmd_suspend(struct device *dev) > -{ > - struct pci_dev *pdev = to_pci_dev(dev); > - > - pci_save_state(pdev); > - return 0; > -} > - > -static int vmd_resume(struct device *dev) > -{ > - struct pci_dev *pdev = to_pci_dev(dev); > - > - pci_restore_state(pdev); > - return 0; > -} > -#endif > -static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume); > - > -static const struct pci_device_id vmd_ids[] = { > - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x201d),}, > - {0,} > -}; > -MODULE_DEVICE_TABLE(pci, vmd_ids); > - > -static struct pci_driver vmd_drv = { > - .name = "vmd", > - .id_table = vmd_ids, > - .probe = vmd_probe, > - .remove = vmd_remove, > - .driver = { > - .pm = &vmd_dev_pm_ops, > - }, > -}; > -module_pci_driver(vmd_drv); > - > -MODULE_AUTHOR("Intel Corporation"); > -MODULE_LICENSE("GPL v2"); > -MODULE_VERSION("0.6"); > diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig > index 9b485d8..93865eb 100644 > --- a/drivers/pci/host/Kconfig > +++ b/drivers/pci/host/Kconfig > @@ -274,4 +274,20 @@ config PCIE_ARTPEC6 > Say Y here to enable PCIe controller support on Axis ARTPEC-6 > SoCs. This PCIe controller uses the DesignWare core. > > +config VMD > + depends on PCI_MSI && X86_64 > + tristate "Intel Volume Management Device Driver" > + default N > + ---help--- > + Adds support for the Intel Volume Management Device (VMD). VMD is a > + secondary PCI host bridge that allows PCI Express root ports, > + and devices attached to them, to be removed from the default > + PCI domain and placed within the VMD domain. This provides > + more bus resources than are otherwise possible with a > + single domain. If you know your system provides one of these and > + has devices attached to it, say Y; if you are not sure, say N. > + > + To compile this driver as a module, choose M here: the > + module will be called vmd. > + > endmenu > diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile > index 8843410..afea1c6 100644 > --- a/drivers/pci/host/Makefile > +++ b/drivers/pci/host/Makefile > @@ -31,3 +31,4 @@ obj-$(CONFIG_PCI_HOST_THUNDER_ECAM) += pci-thunder-ecam.o > obj-$(CONFIG_PCI_HOST_THUNDER_PEM) += pci-thunder-pem.o > obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o > obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o > +obj-$(CONFIG_VMD) += vmd.o > diff --git a/drivers/pci/host/vmd.c b/drivers/pci/host/vmd.c > new file mode 100644 > index 0000000..a021b7b > --- /dev/null > +++ b/drivers/pci/host/vmd.c > @@ -0,0 +1,766 @@ > +/* > + * Volume Management Device driver > + * Copyright (c) 2015, Intel Corporation. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + */ > + > +#include <linux/device.h> > +#include <linux/interrupt.h> > +#include <linux/irq.h> > +#include <linux/kernel.h> > +#include <linux/module.h> > +#include <linux/msi.h> > +#include <linux/pci.h> > +#include <linux/rculist.h> > +#include <linux/rcupdate.h> > + > +#include <asm/irqdomain.h> > +#include <asm/device.h> > +#include <asm/msi.h> > +#include <asm/msidef.h> > + > +#define VMD_CFGBAR 0 > +#define VMD_MEMBAR1 2 > +#define VMD_MEMBAR2 4 > + > +/* > + * Lock for manipulating VMD IRQ lists. > + */ > +static DEFINE_RAW_SPINLOCK(list_lock); > + > +/** > + * struct vmd_irq - private data to map driver IRQ to the VMD shared vector > + * @node: list item for parent traversal. > + * @rcu: RCU callback item for freeing. > + * @irq: back pointer to parent. > + * @virq: the virtual IRQ value provided to the requesting driver. > + * > + * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to > + * a VMD IRQ using this structure. > + */ > +struct vmd_irq { > + struct list_head node; > + struct rcu_head rcu; > + struct vmd_irq_list *irq; > + unsigned int virq; > +}; > + > +/** > + * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector > + * @irq_list: the list of irq's the VMD one demuxes to. > + * @vmd_vector: the h/w IRQ assigned to the VMD. > + * @index: index into the VMD MSI-X table; used for message routing. > + * @count: number of child IRQs assigned to this vector; used to track > + * sharing. > + */ > +struct vmd_irq_list { > + struct list_head irq_list; > + struct vmd_dev *vmd; > + unsigned int vmd_vector; > + unsigned int index; > + unsigned int count; > +}; > + > +struct vmd_dev { > + struct pci_dev *dev; > + > + spinlock_t cfg_lock; > + char __iomem *cfgbar; > + > + int msix_count; > + struct msix_entry *msix_entries; > + struct vmd_irq_list *irqs; > + > + struct pci_sysdata sysdata; > + struct resource resources[3]; > + struct irq_domain *irq_domain; > + struct pci_bus *bus; > + > +#ifdef CONFIG_X86_DEV_DMA_OPS > + struct dma_map_ops dma_ops; > + struct dma_domain dma_domain; > +#endif > +}; > + > +static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus) > +{ > + return container_of(bus->sysdata, struct vmd_dev, sysdata); > +} > + > +/* > + * Drivers managing a device in a VMD domain allocate their own IRQs as before, > + * but the MSI entry for the hardware it's driving will be programmed with a > + * destination ID for the VMD MSI-X table. The VMD muxes interrupts in its > + * domain into one of its own, and the VMD driver de-muxes these for the > + * handlers sharing that VMD IRQ. The vmd irq_domain provides the operations > + * and irq_chip to set this up. > + */ > +static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) > +{ > + struct vmd_irq *vmdirq = data->chip_data; > + struct vmd_irq_list *irq = vmdirq->irq; > + > + msg->address_hi = MSI_ADDR_BASE_HI; > + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_DEST_ID(irq->index); > + msg->data = 0; > +} > + > +/* > + * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops. > + */ > +static void vmd_irq_enable(struct irq_data *data) > +{ > + struct vmd_irq *vmdirq = data->chip_data; > + unsigned long flags; > + > + raw_spin_lock_irqsave(&list_lock, flags); > + list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); > + raw_spin_unlock_irqrestore(&list_lock, flags); > + > + data->chip->irq_unmask(data); > +} > + > +static void vmd_irq_disable(struct irq_data *data) > +{ > + struct vmd_irq *vmdirq = data->chip_data; > + unsigned long flags; > + > + data->chip->irq_mask(data); > + > + raw_spin_lock_irqsave(&list_lock, flags); > + list_del_rcu(&vmdirq->node); > + INIT_LIST_HEAD_RCU(&vmdirq->node); > + raw_spin_unlock_irqrestore(&list_lock, flags); > +} > + > +/* > + * XXX: Stubbed until we develop acceptable way to not create conflicts with > + * other devices sharing the same vector. > + */ > +static int vmd_irq_set_affinity(struct irq_data *data, > + const struct cpumask *dest, bool force) > +{ > + return -EINVAL; > +} > + > +static struct irq_chip vmd_msi_controller = { > + .name = "VMD-MSI", > + .irq_enable = vmd_irq_enable, > + .irq_disable = vmd_irq_disable, > + .irq_compose_msi_msg = vmd_compose_msi_msg, > + .irq_set_affinity = vmd_irq_set_affinity, > +}; > + > +static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info, > + msi_alloc_info_t *arg) > +{ > + return 0; > +} > + > +/* > + * XXX: We can be even smarter selecting the best IRQ once we solve the > + * affinity problem. > + */ > +static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc) > +{ > + int i, best = 1; > + unsigned long flags; > + > + if (!desc->msi_attrib.is_msix || vmd->msix_count == 1) > + return &vmd->irqs[0]; > + > + raw_spin_lock_irqsave(&list_lock, flags); > + for (i = 1; i < vmd->msix_count; i++) > + if (vmd->irqs[i].count < vmd->irqs[best].count) > + best = i; > + vmd->irqs[best].count++; > + raw_spin_unlock_irqrestore(&list_lock, flags); > + > + return &vmd->irqs[best]; > +} > + > +static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, > + unsigned int virq, irq_hw_number_t hwirq, > + msi_alloc_info_t *arg) > +{ > + struct msi_desc *desc = arg->desc; > + struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus); > + struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL); > + > + if (!vmdirq) > + return -ENOMEM; > + > + INIT_LIST_HEAD(&vmdirq->node); > + vmdirq->irq = vmd_next_irq(vmd, desc); > + vmdirq->virq = virq; > + > + irq_domain_set_info(domain, virq, vmdirq->irq->vmd_vector, info->chip, > + vmdirq, handle_untracked_irq, vmd, NULL); > + return 0; > +} > + > +static void vmd_msi_free(struct irq_domain *domain, > + struct msi_domain_info *info, unsigned int virq) > +{ > + struct vmd_irq *vmdirq = irq_get_chip_data(virq); > + unsigned long flags; > + > + /* XXX: Potential optimization to rebalance */ > + raw_spin_lock_irqsave(&list_lock, flags); > + vmdirq->irq->count--; > + raw_spin_unlock_irqrestore(&list_lock, flags); > + > + kfree_rcu(vmdirq, rcu); > +} > + > +static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev, > + int nvec, msi_alloc_info_t *arg) > +{ > + struct pci_dev *pdev = to_pci_dev(dev); > + struct vmd_dev *vmd = vmd_from_bus(pdev->bus); > + > + if (nvec > vmd->msix_count) > + return vmd->msix_count; > + > + memset(arg, 0, sizeof(*arg)); > + return 0; > +} > + > +static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) > +{ > + arg->desc = desc; > +} > + > +static struct msi_domain_ops vmd_msi_domain_ops = { > + .get_hwirq = vmd_get_hwirq, > + .msi_init = vmd_msi_init, > + .msi_free = vmd_msi_free, > + .msi_prepare = vmd_msi_prepare, > + .set_desc = vmd_set_desc, > +}; > + > +static struct msi_domain_info vmd_msi_domain_info = { > + .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | > + MSI_FLAG_PCI_MSIX, > + .ops = &vmd_msi_domain_ops, > + .chip = &vmd_msi_controller, > +}; > + > +#ifdef CONFIG_X86_DEV_DMA_OPS > +/* > + * VMD replaces the requester ID with its own. DMA mappings for devices in a > + * VMD domain need to be mapped for the VMD, not the device requiring > + * the mapping. > + */ > +static struct device *to_vmd_dev(struct device *dev) > +{ > + struct pci_dev *pdev = to_pci_dev(dev); > + struct vmd_dev *vmd = vmd_from_bus(pdev->bus); > + > + return &vmd->dev->dev; > +} > + > +static struct dma_map_ops *vmd_dma_ops(struct device *dev) > +{ > + return get_dma_ops(to_vmd_dev(dev)); > +} > + > +static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr, > + gfp_t flag, unsigned long attrs) > +{ > + return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag, > + attrs); > +} > + > +static void vmd_free(struct device *dev, size_t size, void *vaddr, > + dma_addr_t addr, unsigned long attrs) > +{ > + return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr, > + attrs); > +} > + > +static int vmd_mmap(struct device *dev, struct vm_area_struct *vma, > + void *cpu_addr, dma_addr_t addr, size_t size, > + unsigned long attrs) > +{ > + return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr, > + size, attrs); > +} > + > +static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt, > + void *cpu_addr, dma_addr_t addr, size_t size, > + unsigned long attrs) > +{ > + return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr, > + addr, size, attrs); > +} > + > +static dma_addr_t vmd_map_page(struct device *dev, struct page *page, > + unsigned long offset, size_t size, > + enum dma_data_direction dir, > + unsigned long attrs) > +{ > + return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size, > + dir, attrs); > +} > + > +static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size, > + enum dma_data_direction dir, unsigned long attrs) > +{ > + vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs); > +} > + > +static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents, > + enum dma_data_direction dir, unsigned long attrs) > +{ > + return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs); > +} > + > +static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, > + enum dma_data_direction dir, unsigned long attrs) > +{ > + vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs); > +} > + > +static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr, > + size_t size, enum dma_data_direction dir) > +{ > + vmd_dma_ops(dev)->sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir); > +} > + > +static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr, > + size_t size, enum dma_data_direction dir) > +{ > + vmd_dma_ops(dev)->sync_single_for_device(to_vmd_dev(dev), addr, size, > + dir); > +} > + > +static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, > + int nents, enum dma_data_direction dir) > +{ > + vmd_dma_ops(dev)->sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir); > +} > + > +static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg, > + int nents, enum dma_data_direction dir) > +{ > + vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir); > +} > + > +static int vmd_mapping_error(struct device *dev, dma_addr_t addr) > +{ > + return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr); > +} > + > +static int vmd_dma_supported(struct device *dev, u64 mask) > +{ > + return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask); > +} > + > +#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK > +static u64 vmd_get_required_mask(struct device *dev) > +{ > + return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev)); > +} > +#endif > + > +static void vmd_teardown_dma_ops(struct vmd_dev *vmd) > +{ > + struct dma_domain *domain = &vmd->dma_domain; > + > + if (get_dma_ops(&vmd->dev->dev)) > + del_dma_domain(domain); > +} > + > +#define ASSIGN_VMD_DMA_OPS(source, dest, fn) \ > + do { \ > + if (source->fn) \ > + dest->fn = vmd_##fn; \ > + } while (0) > + > +static void vmd_setup_dma_ops(struct vmd_dev *vmd) > +{ > + const struct dma_map_ops *source = get_dma_ops(&vmd->dev->dev); > + struct dma_map_ops *dest = &vmd->dma_ops; > + struct dma_domain *domain = &vmd->dma_domain; > + > + domain->domain_nr = vmd->sysdata.domain; > + domain->dma_ops = dest; > + > + if (!source) > + return; > + ASSIGN_VMD_DMA_OPS(source, dest, alloc); > + ASSIGN_VMD_DMA_OPS(source, dest, free); > + ASSIGN_VMD_DMA_OPS(source, dest, mmap); > + ASSIGN_VMD_DMA_OPS(source, dest, get_sgtable); > + ASSIGN_VMD_DMA_OPS(source, dest, map_page); > + ASSIGN_VMD_DMA_OPS(source, dest, unmap_page); > + ASSIGN_VMD_DMA_OPS(source, dest, map_sg); > + ASSIGN_VMD_DMA_OPS(source, dest, unmap_sg); > + ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_cpu); > + ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device); > + ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu); > + ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device); > + ASSIGN_VMD_DMA_OPS(source, dest, mapping_error); > + ASSIGN_VMD_DMA_OPS(source, dest, dma_supported); > +#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK > + ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask); > +#endif > + add_dma_domain(domain); > +} > +#undef ASSIGN_VMD_DMA_OPS > +#else > +static void vmd_teardown_dma_ops(struct vmd_dev *vmd) {} > +static void vmd_setup_dma_ops(struct vmd_dev *vmd) {} > +#endif > + > +static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus, > + unsigned int devfn, int reg, int len) > +{ > + char __iomem *addr = vmd->cfgbar + > + (bus->number << 20) + (devfn << 12) + reg; > + > + if ((addr - vmd->cfgbar) + len >= > + resource_size(&vmd->dev->resource[VMD_CFGBAR])) > + return NULL; > + > + return addr; > +} > + > +/* > + * CPU may deadlock if config space is not serialized on some versions of this > + * hardware, so all config space access is done under a spinlock. > + */ > +static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg, > + int len, u32 *value) > +{ > + struct vmd_dev *vmd = vmd_from_bus(bus); > + char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); > + unsigned long flags; > + int ret = 0; > + > + if (!addr) > + return -EFAULT; > + > + spin_lock_irqsave(&vmd->cfg_lock, flags); > + switch (len) { > + case 1: > + *value = readb(addr); > + break; > + case 2: > + *value = readw(addr); > + break; > + case 4: > + *value = readl(addr); > + break; > + default: > + ret = -EINVAL; > + break; > + } > + spin_unlock_irqrestore(&vmd->cfg_lock, flags); > + return ret; > +} > + > +/* > + * VMD h/w converts non-posted config writes to posted memory writes. The > + * read-back in this function forces the completion so it returns only after > + * the config space was written, as expected. > + */ > +static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg, > + int len, u32 value) > +{ > + struct vmd_dev *vmd = vmd_from_bus(bus); > + char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); > + unsigned long flags; > + int ret = 0; > + > + if (!addr) > + return -EFAULT; > + > + spin_lock_irqsave(&vmd->cfg_lock, flags); > + switch (len) { > + case 1: > + writeb(value, addr); > + readb(addr); > + break; > + case 2: > + writew(value, addr); > + readw(addr); > + break; > + case 4: > + writel(value, addr); > + readl(addr); > + break; > + default: > + ret = -EINVAL; > + break; > + } > + spin_unlock_irqrestore(&vmd->cfg_lock, flags); > + return ret; > +} > + > +static struct pci_ops vmd_ops = { > + .read = vmd_pci_read, > + .write = vmd_pci_write, > +}; > + > +static void vmd_attach_resources(struct vmd_dev *vmd) > +{ > + vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1]; > + vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2]; > +} > + > +static void vmd_detach_resources(struct vmd_dev *vmd) > +{ > + vmd->dev->resource[VMD_MEMBAR1].child = NULL; > + vmd->dev->resource[VMD_MEMBAR2].child = NULL; > +} > + > +/* > + * VMD domains start at 0x1000 to not clash with ACPI _SEG domains. > + */ > +static int vmd_find_free_domain(void) > +{ > + int domain = 0xffff; > + struct pci_bus *bus = NULL; > + > + while ((bus = pci_find_next_bus(bus)) != NULL) > + domain = max_t(int, domain, pci_domain_nr(bus)); > + return domain + 1; > +} > + > +static int vmd_enable_domain(struct vmd_dev *vmd) > +{ > + struct pci_sysdata *sd = &vmd->sysdata; > + struct resource *res; > + u32 upper_bits; > + unsigned long flags; > + LIST_HEAD(resources); > + > + res = &vmd->dev->resource[VMD_CFGBAR]; > + vmd->resources[0] = (struct resource) { > + .name = "VMD CFGBAR", > + .start = 0, > + .end = (resource_size(res) >> 20) - 1, > + .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED, > + }; > + > + /* > + * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can > + * put 32-bit resources in the window. > + * > + * There's no hardware reason why a 64-bit window *couldn't* > + * contain a 32-bit resource, but pbus_size_mem() computes the > + * bridge window size assuming a 64-bit window will contain no > + * 32-bit resources. __pci_assign_resource() enforces that > + * artificial restriction to make sure everything will fit. > + * > + * The only way we could use a 64-bit non-prefechable MEMBAR is > + * if its address is <4GB so that we can convert it to a 32-bit > + * resource. To be visible to the host OS, all VMD endpoints must > + * be initially configured by platform BIOS, which includes setting > + * up these resources. We can assume the device is configured > + * according to the platform needs. > + */ > + res = &vmd->dev->resource[VMD_MEMBAR1]; > + upper_bits = upper_32_bits(res->end); > + flags = res->flags & ~IORESOURCE_SIZEALIGN; > + if (!upper_bits) > + flags &= ~IORESOURCE_MEM_64; > + vmd->resources[1] = (struct resource) { > + .name = "VMD MEMBAR1", > + .start = res->start, > + .end = res->end, > + .flags = flags, > + .parent = res, > + }; > + > + res = &vmd->dev->resource[VMD_MEMBAR2]; > + upper_bits = upper_32_bits(res->end); > + flags = res->flags & ~IORESOURCE_SIZEALIGN; > + if (!upper_bits) > + flags &= ~IORESOURCE_MEM_64; > + vmd->resources[2] = (struct resource) { > + .name = "VMD MEMBAR2", > + .start = res->start + 0x2000, > + .end = res->end, > + .flags = flags, > + .parent = res, > + }; > + > + sd->vmd_domain = true; > + sd->domain = vmd_find_free_domain(); > + if (sd->domain < 0) > + return sd->domain; > + > + sd->node = pcibus_to_node(vmd->dev->bus); > + > + vmd->irq_domain = pci_msi_create_irq_domain(NULL, &vmd_msi_domain_info, > + x86_vector_domain); > + if (!vmd->irq_domain) > + return -ENODEV; > + > + pci_add_resource(&resources, &vmd->resources[0]); > + pci_add_resource(&resources, &vmd->resources[1]); > + pci_add_resource(&resources, &vmd->resources[2]); > + vmd->bus = pci_create_root_bus(&vmd->dev->dev, 0, &vmd_ops, sd, > + &resources); > + if (!vmd->bus) { > + pci_free_resource_list(&resources); > + irq_domain_remove(vmd->irq_domain); > + return -ENODEV; > + } > + > + vmd_attach_resources(vmd); > + vmd_setup_dma_ops(vmd); > + dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain); > + pci_rescan_bus(vmd->bus); > + > + WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj, > + "domain"), "Can't create symlink to domain\n"); > + return 0; > +} > + > +static irqreturn_t vmd_irq(int irq, void *data) > +{ > + struct vmd_irq_list *irqs = data; > + struct vmd_irq *vmdirq; > + > + rcu_read_lock(); > + list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node) > + generic_handle_irq(vmdirq->virq); > + rcu_read_unlock(); > + > + return IRQ_HANDLED; > +} > + > +static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) > +{ > + struct vmd_dev *vmd; > + int i, err; > + > + if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20)) > + return -ENOMEM; > + > + vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL); > + if (!vmd) > + return -ENOMEM; > + > + vmd->dev = dev; > + err = pcim_enable_device(dev); > + if (err < 0) > + return err; > + > + vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0); > + if (!vmd->cfgbar) > + return -ENOMEM; > + > + pci_set_master(dev); > + if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) && > + dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32))) > + return -ENODEV; > + > + vmd->msix_count = pci_msix_vec_count(dev); > + if (vmd->msix_count < 0) > + return -ENODEV; > + > + vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs), > + GFP_KERNEL); > + if (!vmd->irqs) > + return -ENOMEM; > + > + vmd->msix_entries = devm_kcalloc(&dev->dev, vmd->msix_count, > + sizeof(*vmd->msix_entries), > + GFP_KERNEL); > + if (!vmd->msix_entries) > + return -ENOMEM; > + for (i = 0; i < vmd->msix_count; i++) > + vmd->msix_entries[i].entry = i; > + > + vmd->msix_count = pci_enable_msix_range(vmd->dev, vmd->msix_entries, 1, > + vmd->msix_count); > + if (vmd->msix_count < 0) > + return vmd->msix_count; > + > + for (i = 0; i < vmd->msix_count; i++) { > + INIT_LIST_HEAD(&vmd->irqs[i].irq_list); > + vmd->irqs[i].vmd_vector = vmd->msix_entries[i].vector; > + vmd->irqs[i].index = i; > + > + err = devm_request_irq(&dev->dev, vmd->irqs[i].vmd_vector, > + vmd_irq, 0, "vmd", &vmd->irqs[i]); > + if (err) > + return err; > + } > + > + spin_lock_init(&vmd->cfg_lock); > + pci_set_drvdata(dev, vmd); > + err = vmd_enable_domain(vmd); > + if (err) > + return err; > + > + dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n", > + vmd->sysdata.domain); > + return 0; > +} > + > +static void vmd_remove(struct pci_dev *dev) > +{ > + struct vmd_dev *vmd = pci_get_drvdata(dev); > + > + vmd_detach_resources(vmd); > + pci_set_drvdata(dev, NULL); > + sysfs_remove_link(&vmd->dev->dev.kobj, "domain"); > + pci_stop_root_bus(vmd->bus); > + pci_remove_root_bus(vmd->bus); > + vmd_teardown_dma_ops(vmd); > + irq_domain_remove(vmd->irq_domain); > +} > + > +#ifdef CONFIG_PM > +static int vmd_suspend(struct device *dev) > +{ > + struct pci_dev *pdev = to_pci_dev(dev); > + > + pci_save_state(pdev); > + return 0; > +} > + > +static int vmd_resume(struct device *dev) > +{ > + struct pci_dev *pdev = to_pci_dev(dev); > + > + pci_restore_state(pdev); > + return 0; > +} > +#endif > +static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume); > + > +static const struct pci_device_id vmd_ids[] = { > + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x201d),}, > + {0,} > +}; > +MODULE_DEVICE_TABLE(pci, vmd_ids); > + > +static struct pci_driver vmd_drv = { > + .name = "vmd", > + .id_table = vmd_ids, > + .probe = vmd_probe, > + .remove = vmd_remove, > + .driver = { > + .pm = &vmd_dev_pm_ops, > + }, > +}; > +module_pci_driver(vmd_drv); > + > +MODULE_AUTHOR("Intel Corporation"); > +MODULE_LICENSE("GPL v2"); > +MODULE_VERSION("0.6"); > -- > 2.7.2 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-pci" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5c6e747..c320838 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2744,19 +2744,6 @@ config PMC_ATOM def_bool y depends on PCI -config VMD - depends on PCI_MSI - tristate "Volume Management Device Driver" - default N - ---help--- - Adds support for the Intel Volume Management Device (VMD). VMD is a - secondary PCI host bridge that allows PCI Express root ports, - and devices attached to them, to be removed from the default - PCI domain and placed within the VMD domain. This provides - more bus resources than are otherwise possible with a - single domain. If you know your system provides one of these and - has devices attached to it, say Y; if you are not sure, say N. - source "net/Kconfig" source "drivers/Kconfig" diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 97062a6..5c6fc35 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -23,8 +23,6 @@ obj-y += bus_numa.o obj-$(CONFIG_AMD_NB) += amd_bus.o obj-$(CONFIG_PCI_CNB20LE_QUIRK) += broadcom_bus.o -obj-$(CONFIG_VMD) += vmd.o - ifeq ($(CONFIG_PCI_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c deleted file mode 100644 index a021b7b..0000000 --- a/arch/x86/pci/vmd.c +++ /dev/null @@ -1,766 +0,0 @@ -/* - * Volume Management Device driver - * Copyright (c) 2015, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#include <linux/device.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/msi.h> -#include <linux/pci.h> -#include <linux/rculist.h> -#include <linux/rcupdate.h> - -#include <asm/irqdomain.h> -#include <asm/device.h> -#include <asm/msi.h> -#include <asm/msidef.h> - -#define VMD_CFGBAR 0 -#define VMD_MEMBAR1 2 -#define VMD_MEMBAR2 4 - -/* - * Lock for manipulating VMD IRQ lists. - */ -static DEFINE_RAW_SPINLOCK(list_lock); - -/** - * struct vmd_irq - private data to map driver IRQ to the VMD shared vector - * @node: list item for parent traversal. - * @rcu: RCU callback item for freeing. - * @irq: back pointer to parent. - * @virq: the virtual IRQ value provided to the requesting driver. - * - * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to - * a VMD IRQ using this structure. - */ -struct vmd_irq { - struct list_head node; - struct rcu_head rcu; - struct vmd_irq_list *irq; - unsigned int virq; -}; - -/** - * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector - * @irq_list: the list of irq's the VMD one demuxes to. - * @vmd_vector: the h/w IRQ assigned to the VMD. - * @index: index into the VMD MSI-X table; used for message routing. - * @count: number of child IRQs assigned to this vector; used to track - * sharing. - */ -struct vmd_irq_list { - struct list_head irq_list; - struct vmd_dev *vmd; - unsigned int vmd_vector; - unsigned int index; - unsigned int count; -}; - -struct vmd_dev { - struct pci_dev *dev; - - spinlock_t cfg_lock; - char __iomem *cfgbar; - - int msix_count; - struct msix_entry *msix_entries; - struct vmd_irq_list *irqs; - - struct pci_sysdata sysdata; - struct resource resources[3]; - struct irq_domain *irq_domain; - struct pci_bus *bus; - -#ifdef CONFIG_X86_DEV_DMA_OPS - struct dma_map_ops dma_ops; - struct dma_domain dma_domain; -#endif -}; - -static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus) -{ - return container_of(bus->sysdata, struct vmd_dev, sysdata); -} - -/* - * Drivers managing a device in a VMD domain allocate their own IRQs as before, - * but the MSI entry for the hardware it's driving will be programmed with a - * destination ID for the VMD MSI-X table. The VMD muxes interrupts in its - * domain into one of its own, and the VMD driver de-muxes these for the - * handlers sharing that VMD IRQ. The vmd irq_domain provides the operations - * and irq_chip to set this up. - */ -static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) -{ - struct vmd_irq *vmdirq = data->chip_data; - struct vmd_irq_list *irq = vmdirq->irq; - - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_DEST_ID(irq->index); - msg->data = 0; -} - -/* - * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops. - */ -static void vmd_irq_enable(struct irq_data *data) -{ - struct vmd_irq *vmdirq = data->chip_data; - unsigned long flags; - - raw_spin_lock_irqsave(&list_lock, flags); - list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); - raw_spin_unlock_irqrestore(&list_lock, flags); - - data->chip->irq_unmask(data); -} - -static void vmd_irq_disable(struct irq_data *data) -{ - struct vmd_irq *vmdirq = data->chip_data; - unsigned long flags; - - data->chip->irq_mask(data); - - raw_spin_lock_irqsave(&list_lock, flags); - list_del_rcu(&vmdirq->node); - INIT_LIST_HEAD_RCU(&vmdirq->node); - raw_spin_unlock_irqrestore(&list_lock, flags); -} - -/* - * XXX: Stubbed until we develop acceptable way to not create conflicts with - * other devices sharing the same vector. - */ -static int vmd_irq_set_affinity(struct irq_data *data, - const struct cpumask *dest, bool force) -{ - return -EINVAL; -} - -static struct irq_chip vmd_msi_controller = { - .name = "VMD-MSI", - .irq_enable = vmd_irq_enable, - .irq_disable = vmd_irq_disable, - .irq_compose_msi_msg = vmd_compose_msi_msg, - .irq_set_affinity = vmd_irq_set_affinity, -}; - -static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info, - msi_alloc_info_t *arg) -{ - return 0; -} - -/* - * XXX: We can be even smarter selecting the best IRQ once we solve the - * affinity problem. - */ -static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc) -{ - int i, best = 1; - unsigned long flags; - - if (!desc->msi_attrib.is_msix || vmd->msix_count == 1) - return &vmd->irqs[0]; - - raw_spin_lock_irqsave(&list_lock, flags); - for (i = 1; i < vmd->msix_count; i++) - if (vmd->irqs[i].count < vmd->irqs[best].count) - best = i; - vmd->irqs[best].count++; - raw_spin_unlock_irqrestore(&list_lock, flags); - - return &vmd->irqs[best]; -} - -static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, - unsigned int virq, irq_hw_number_t hwirq, - msi_alloc_info_t *arg) -{ - struct msi_desc *desc = arg->desc; - struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus); - struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL); - - if (!vmdirq) - return -ENOMEM; - - INIT_LIST_HEAD(&vmdirq->node); - vmdirq->irq = vmd_next_irq(vmd, desc); - vmdirq->virq = virq; - - irq_domain_set_info(domain, virq, vmdirq->irq->vmd_vector, info->chip, - vmdirq, handle_untracked_irq, vmd, NULL); - return 0; -} - -static void vmd_msi_free(struct irq_domain *domain, - struct msi_domain_info *info, unsigned int virq) -{ - struct vmd_irq *vmdirq = irq_get_chip_data(virq); - unsigned long flags; - - /* XXX: Potential optimization to rebalance */ - raw_spin_lock_irqsave(&list_lock, flags); - vmdirq->irq->count--; - raw_spin_unlock_irqrestore(&list_lock, flags); - - kfree_rcu(vmdirq, rcu); -} - -static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev, - int nvec, msi_alloc_info_t *arg) -{ - struct pci_dev *pdev = to_pci_dev(dev); - struct vmd_dev *vmd = vmd_from_bus(pdev->bus); - - if (nvec > vmd->msix_count) - return vmd->msix_count; - - memset(arg, 0, sizeof(*arg)); - return 0; -} - -static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) -{ - arg->desc = desc; -} - -static struct msi_domain_ops vmd_msi_domain_ops = { - .get_hwirq = vmd_get_hwirq, - .msi_init = vmd_msi_init, - .msi_free = vmd_msi_free, - .msi_prepare = vmd_msi_prepare, - .set_desc = vmd_set_desc, -}; - -static struct msi_domain_info vmd_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_PCI_MSIX, - .ops = &vmd_msi_domain_ops, - .chip = &vmd_msi_controller, -}; - -#ifdef CONFIG_X86_DEV_DMA_OPS -/* - * VMD replaces the requester ID with its own. DMA mappings for devices in a - * VMD domain need to be mapped for the VMD, not the device requiring - * the mapping. - */ -static struct device *to_vmd_dev(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - struct vmd_dev *vmd = vmd_from_bus(pdev->bus); - - return &vmd->dev->dev; -} - -static struct dma_map_ops *vmd_dma_ops(struct device *dev) -{ - return get_dma_ops(to_vmd_dev(dev)); -} - -static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr, - gfp_t flag, unsigned long attrs) -{ - return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag, - attrs); -} - -static void vmd_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t addr, unsigned long attrs) -{ - return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr, - attrs); -} - -static int vmd_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t addr, size_t size, - unsigned long attrs) -{ - return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr, - size, attrs); -} - -static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t addr, size_t size, - unsigned long attrs) -{ - return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr, - addr, size, attrs); -} - -static dma_addr_t vmd_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size, - dir, attrs); -} - -static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs); -} - -static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs); -} - -static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs); -} - -static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - vmd_dma_ops(dev)->sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir); -} - -static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir) -{ - vmd_dma_ops(dev)->sync_single_for_device(to_vmd_dev(dev), addr, size, - dir); -} - -static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir) -{ - vmd_dma_ops(dev)->sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir); -} - -static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir) -{ - vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir); -} - -static int vmd_mapping_error(struct device *dev, dma_addr_t addr) -{ - return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr); -} - -static int vmd_dma_supported(struct device *dev, u64 mask) -{ - return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask); -} - -#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK -static u64 vmd_get_required_mask(struct device *dev) -{ - return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev)); -} -#endif - -static void vmd_teardown_dma_ops(struct vmd_dev *vmd) -{ - struct dma_domain *domain = &vmd->dma_domain; - - if (get_dma_ops(&vmd->dev->dev)) - del_dma_domain(domain); -} - -#define ASSIGN_VMD_DMA_OPS(source, dest, fn) \ - do { \ - if (source->fn) \ - dest->fn = vmd_##fn; \ - } while (0) - -static void vmd_setup_dma_ops(struct vmd_dev *vmd) -{ - const struct dma_map_ops *source = get_dma_ops(&vmd->dev->dev); - struct dma_map_ops *dest = &vmd->dma_ops; - struct dma_domain *domain = &vmd->dma_domain; - - domain->domain_nr = vmd->sysdata.domain; - domain->dma_ops = dest; - - if (!source) - return; - ASSIGN_VMD_DMA_OPS(source, dest, alloc); - ASSIGN_VMD_DMA_OPS(source, dest, free); - ASSIGN_VMD_DMA_OPS(source, dest, mmap); - ASSIGN_VMD_DMA_OPS(source, dest, get_sgtable); - ASSIGN_VMD_DMA_OPS(source, dest, map_page); - ASSIGN_VMD_DMA_OPS(source, dest, unmap_page); - ASSIGN_VMD_DMA_OPS(source, dest, map_sg); - ASSIGN_VMD_DMA_OPS(source, dest, unmap_sg); - ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_cpu); - ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device); - ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu); - ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device); - ASSIGN_VMD_DMA_OPS(source, dest, mapping_error); - ASSIGN_VMD_DMA_OPS(source, dest, dma_supported); -#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK - ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask); -#endif - add_dma_domain(domain); -} -#undef ASSIGN_VMD_DMA_OPS -#else -static void vmd_teardown_dma_ops(struct vmd_dev *vmd) {} -static void vmd_setup_dma_ops(struct vmd_dev *vmd) {} -#endif - -static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus, - unsigned int devfn, int reg, int len) -{ - char __iomem *addr = vmd->cfgbar + - (bus->number << 20) + (devfn << 12) + reg; - - if ((addr - vmd->cfgbar) + len >= - resource_size(&vmd->dev->resource[VMD_CFGBAR])) - return NULL; - - return addr; -} - -/* - * CPU may deadlock if config space is not serialized on some versions of this - * hardware, so all config space access is done under a spinlock. - */ -static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg, - int len, u32 *value) -{ - struct vmd_dev *vmd = vmd_from_bus(bus); - char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); - unsigned long flags; - int ret = 0; - - if (!addr) - return -EFAULT; - - spin_lock_irqsave(&vmd->cfg_lock, flags); - switch (len) { - case 1: - *value = readb(addr); - break; - case 2: - *value = readw(addr); - break; - case 4: - *value = readl(addr); - break; - default: - ret = -EINVAL; - break; - } - spin_unlock_irqrestore(&vmd->cfg_lock, flags); - return ret; -} - -/* - * VMD h/w converts non-posted config writes to posted memory writes. The - * read-back in this function forces the completion so it returns only after - * the config space was written, as expected. - */ -static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg, - int len, u32 value) -{ - struct vmd_dev *vmd = vmd_from_bus(bus); - char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); - unsigned long flags; - int ret = 0; - - if (!addr) - return -EFAULT; - - spin_lock_irqsave(&vmd->cfg_lock, flags); - switch (len) { - case 1: - writeb(value, addr); - readb(addr); - break; - case 2: - writew(value, addr); - readw(addr); - break; - case 4: - writel(value, addr); - readl(addr); - break; - default: - ret = -EINVAL; - break; - } - spin_unlock_irqrestore(&vmd->cfg_lock, flags); - return ret; -} - -static struct pci_ops vmd_ops = { - .read = vmd_pci_read, - .write = vmd_pci_write, -}; - -static void vmd_attach_resources(struct vmd_dev *vmd) -{ - vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1]; - vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2]; -} - -static void vmd_detach_resources(struct vmd_dev *vmd) -{ - vmd->dev->resource[VMD_MEMBAR1].child = NULL; - vmd->dev->resource[VMD_MEMBAR2].child = NULL; -} - -/* - * VMD domains start at 0x1000 to not clash with ACPI _SEG domains. - */ -static int vmd_find_free_domain(void) -{ - int domain = 0xffff; - struct pci_bus *bus = NULL; - - while ((bus = pci_find_next_bus(bus)) != NULL) - domain = max_t(int, domain, pci_domain_nr(bus)); - return domain + 1; -} - -static int vmd_enable_domain(struct vmd_dev *vmd) -{ - struct pci_sysdata *sd = &vmd->sysdata; - struct resource *res; - u32 upper_bits; - unsigned long flags; - LIST_HEAD(resources); - - res = &vmd->dev->resource[VMD_CFGBAR]; - vmd->resources[0] = (struct resource) { - .name = "VMD CFGBAR", - .start = 0, - .end = (resource_size(res) >> 20) - 1, - .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED, - }; - - /* - * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can - * put 32-bit resources in the window. - * - * There's no hardware reason why a 64-bit window *couldn't* - * contain a 32-bit resource, but pbus_size_mem() computes the - * bridge window size assuming a 64-bit window will contain no - * 32-bit resources. __pci_assign_resource() enforces that - * artificial restriction to make sure everything will fit. - * - * The only way we could use a 64-bit non-prefechable MEMBAR is - * if its address is <4GB so that we can convert it to a 32-bit - * resource. To be visible to the host OS, all VMD endpoints must - * be initially configured by platform BIOS, which includes setting - * up these resources. We can assume the device is configured - * according to the platform needs. - */ - res = &vmd->dev->resource[VMD_MEMBAR1]; - upper_bits = upper_32_bits(res->end); - flags = res->flags & ~IORESOURCE_SIZEALIGN; - if (!upper_bits) - flags &= ~IORESOURCE_MEM_64; - vmd->resources[1] = (struct resource) { - .name = "VMD MEMBAR1", - .start = res->start, - .end = res->end, - .flags = flags, - .parent = res, - }; - - res = &vmd->dev->resource[VMD_MEMBAR2]; - upper_bits = upper_32_bits(res->end); - flags = res->flags & ~IORESOURCE_SIZEALIGN; - if (!upper_bits) - flags &= ~IORESOURCE_MEM_64; - vmd->resources[2] = (struct resource) { - .name = "VMD MEMBAR2", - .start = res->start + 0x2000, - .end = res->end, - .flags = flags, - .parent = res, - }; - - sd->vmd_domain = true; - sd->domain = vmd_find_free_domain(); - if (sd->domain < 0) - return sd->domain; - - sd->node = pcibus_to_node(vmd->dev->bus); - - vmd->irq_domain = pci_msi_create_irq_domain(NULL, &vmd_msi_domain_info, - x86_vector_domain); - if (!vmd->irq_domain) - return -ENODEV; - - pci_add_resource(&resources, &vmd->resources[0]); - pci_add_resource(&resources, &vmd->resources[1]); - pci_add_resource(&resources, &vmd->resources[2]); - vmd->bus = pci_create_root_bus(&vmd->dev->dev, 0, &vmd_ops, sd, - &resources); - if (!vmd->bus) { - pci_free_resource_list(&resources); - irq_domain_remove(vmd->irq_domain); - return -ENODEV; - } - - vmd_attach_resources(vmd); - vmd_setup_dma_ops(vmd); - dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain); - pci_rescan_bus(vmd->bus); - - WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj, - "domain"), "Can't create symlink to domain\n"); - return 0; -} - -static irqreturn_t vmd_irq(int irq, void *data) -{ - struct vmd_irq_list *irqs = data; - struct vmd_irq *vmdirq; - - rcu_read_lock(); - list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node) - generic_handle_irq(vmdirq->virq); - rcu_read_unlock(); - - return IRQ_HANDLED; -} - -static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) -{ - struct vmd_dev *vmd; - int i, err; - - if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20)) - return -ENOMEM; - - vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL); - if (!vmd) - return -ENOMEM; - - vmd->dev = dev; - err = pcim_enable_device(dev); - if (err < 0) - return err; - - vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0); - if (!vmd->cfgbar) - return -ENOMEM; - - pci_set_master(dev); - if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) && - dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32))) - return -ENODEV; - - vmd->msix_count = pci_msix_vec_count(dev); - if (vmd->msix_count < 0) - return -ENODEV; - - vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs), - GFP_KERNEL); - if (!vmd->irqs) - return -ENOMEM; - - vmd->msix_entries = devm_kcalloc(&dev->dev, vmd->msix_count, - sizeof(*vmd->msix_entries), - GFP_KERNEL); - if (!vmd->msix_entries) - return -ENOMEM; - for (i = 0; i < vmd->msix_count; i++) - vmd->msix_entries[i].entry = i; - - vmd->msix_count = pci_enable_msix_range(vmd->dev, vmd->msix_entries, 1, - vmd->msix_count); - if (vmd->msix_count < 0) - return vmd->msix_count; - - for (i = 0; i < vmd->msix_count; i++) { - INIT_LIST_HEAD(&vmd->irqs[i].irq_list); - vmd->irqs[i].vmd_vector = vmd->msix_entries[i].vector; - vmd->irqs[i].index = i; - - err = devm_request_irq(&dev->dev, vmd->irqs[i].vmd_vector, - vmd_irq, 0, "vmd", &vmd->irqs[i]); - if (err) - return err; - } - - spin_lock_init(&vmd->cfg_lock); - pci_set_drvdata(dev, vmd); - err = vmd_enable_domain(vmd); - if (err) - return err; - - dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n", - vmd->sysdata.domain); - return 0; -} - -static void vmd_remove(struct pci_dev *dev) -{ - struct vmd_dev *vmd = pci_get_drvdata(dev); - - vmd_detach_resources(vmd); - pci_set_drvdata(dev, NULL); - sysfs_remove_link(&vmd->dev->dev.kobj, "domain"); - pci_stop_root_bus(vmd->bus); - pci_remove_root_bus(vmd->bus); - vmd_teardown_dma_ops(vmd); - irq_domain_remove(vmd->irq_domain); -} - -#ifdef CONFIG_PM -static int vmd_suspend(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - - pci_save_state(pdev); - return 0; -} - -static int vmd_resume(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - - pci_restore_state(pdev); - return 0; -} -#endif -static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume); - -static const struct pci_device_id vmd_ids[] = { - {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x201d),}, - {0,} -}; -MODULE_DEVICE_TABLE(pci, vmd_ids); - -static struct pci_driver vmd_drv = { - .name = "vmd", - .id_table = vmd_ids, - .probe = vmd_probe, - .remove = vmd_remove, - .driver = { - .pm = &vmd_dev_pm_ops, - }, -}; -module_pci_driver(vmd_drv); - -MODULE_AUTHOR("Intel Corporation"); -MODULE_LICENSE("GPL v2"); -MODULE_VERSION("0.6"); diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig index 9b485d8..93865eb 100644 --- a/drivers/pci/host/Kconfig +++ b/drivers/pci/host/Kconfig @@ -274,4 +274,20 @@ config PCIE_ARTPEC6 Say Y here to enable PCIe controller support on Axis ARTPEC-6 SoCs. This PCIe controller uses the DesignWare core. +config VMD + depends on PCI_MSI && X86_64 + tristate "Intel Volume Management Device Driver" + default N + ---help--- + Adds support for the Intel Volume Management Device (VMD). VMD is a + secondary PCI host bridge that allows PCI Express root ports, + and devices attached to them, to be removed from the default + PCI domain and placed within the VMD domain. This provides + more bus resources than are otherwise possible with a + single domain. If you know your system provides one of these and + has devices attached to it, say Y; if you are not sure, say N. + + To compile this driver as a module, choose M here: the + module will be called vmd. + endmenu diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile index 8843410..afea1c6 100644 --- a/drivers/pci/host/Makefile +++ b/drivers/pci/host/Makefile @@ -31,3 +31,4 @@ obj-$(CONFIG_PCI_HOST_THUNDER_ECAM) += pci-thunder-ecam.o obj-$(CONFIG_PCI_HOST_THUNDER_PEM) += pci-thunder-pem.o obj-$(CONFIG_PCIE_ARMADA_8K) += pcie-armada8k.o obj-$(CONFIG_PCIE_ARTPEC6) += pcie-artpec6.o +obj-$(CONFIG_VMD) += vmd.o diff --git a/drivers/pci/host/vmd.c b/drivers/pci/host/vmd.c new file mode 100644 index 0000000..a021b7b --- /dev/null +++ b/drivers/pci/host/vmd.c @@ -0,0 +1,766 @@ +/* + * Volume Management Device driver + * Copyright (c) 2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/device.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/msi.h> +#include <linux/pci.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> + +#include <asm/irqdomain.h> +#include <asm/device.h> +#include <asm/msi.h> +#include <asm/msidef.h> + +#define VMD_CFGBAR 0 +#define VMD_MEMBAR1 2 +#define VMD_MEMBAR2 4 + +/* + * Lock for manipulating VMD IRQ lists. + */ +static DEFINE_RAW_SPINLOCK(list_lock); + +/** + * struct vmd_irq - private data to map driver IRQ to the VMD shared vector + * @node: list item for parent traversal. + * @rcu: RCU callback item for freeing. + * @irq: back pointer to parent. + * @virq: the virtual IRQ value provided to the requesting driver. + * + * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to + * a VMD IRQ using this structure. + */ +struct vmd_irq { + struct list_head node; + struct rcu_head rcu; + struct vmd_irq_list *irq; + unsigned int virq; +}; + +/** + * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector + * @irq_list: the list of irq's the VMD one demuxes to. + * @vmd_vector: the h/w IRQ assigned to the VMD. + * @index: index into the VMD MSI-X table; used for message routing. + * @count: number of child IRQs assigned to this vector; used to track + * sharing. + */ +struct vmd_irq_list { + struct list_head irq_list; + struct vmd_dev *vmd; + unsigned int vmd_vector; + unsigned int index; + unsigned int count; +}; + +struct vmd_dev { + struct pci_dev *dev; + + spinlock_t cfg_lock; + char __iomem *cfgbar; + + int msix_count; + struct msix_entry *msix_entries; + struct vmd_irq_list *irqs; + + struct pci_sysdata sysdata; + struct resource resources[3]; + struct irq_domain *irq_domain; + struct pci_bus *bus; + +#ifdef CONFIG_X86_DEV_DMA_OPS + struct dma_map_ops dma_ops; + struct dma_domain dma_domain; +#endif +}; + +static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus) +{ + return container_of(bus->sysdata, struct vmd_dev, sysdata); +} + +/* + * Drivers managing a device in a VMD domain allocate their own IRQs as before, + * but the MSI entry for the hardware it's driving will be programmed with a + * destination ID for the VMD MSI-X table. The VMD muxes interrupts in its + * domain into one of its own, and the VMD driver de-muxes these for the + * handlers sharing that VMD IRQ. The vmd irq_domain provides the operations + * and irq_chip to set this up. + */ +static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct vmd_irq *vmdirq = data->chip_data; + struct vmd_irq_list *irq = vmdirq->irq; + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_DEST_ID(irq->index); + msg->data = 0; +} + +/* + * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops. + */ +static void vmd_irq_enable(struct irq_data *data) +{ + struct vmd_irq *vmdirq = data->chip_data; + unsigned long flags; + + raw_spin_lock_irqsave(&list_lock, flags); + list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); + raw_spin_unlock_irqrestore(&list_lock, flags); + + data->chip->irq_unmask(data); +} + +static void vmd_irq_disable(struct irq_data *data) +{ + struct vmd_irq *vmdirq = data->chip_data; + unsigned long flags; + + data->chip->irq_mask(data); + + raw_spin_lock_irqsave(&list_lock, flags); + list_del_rcu(&vmdirq->node); + INIT_LIST_HEAD_RCU(&vmdirq->node); + raw_spin_unlock_irqrestore(&list_lock, flags); +} + +/* + * XXX: Stubbed until we develop acceptable way to not create conflicts with + * other devices sharing the same vector. + */ +static int vmd_irq_set_affinity(struct irq_data *data, + const struct cpumask *dest, bool force) +{ + return -EINVAL; +} + +static struct irq_chip vmd_msi_controller = { + .name = "VMD-MSI", + .irq_enable = vmd_irq_enable, + .irq_disable = vmd_irq_disable, + .irq_compose_msi_msg = vmd_compose_msi_msg, + .irq_set_affinity = vmd_irq_set_affinity, +}; + +static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info, + msi_alloc_info_t *arg) +{ + return 0; +} + +/* + * XXX: We can be even smarter selecting the best IRQ once we solve the + * affinity problem. + */ +static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc) +{ + int i, best = 1; + unsigned long flags; + + if (!desc->msi_attrib.is_msix || vmd->msix_count == 1) + return &vmd->irqs[0]; + + raw_spin_lock_irqsave(&list_lock, flags); + for (i = 1; i < vmd->msix_count; i++) + if (vmd->irqs[i].count < vmd->irqs[best].count) + best = i; + vmd->irqs[best].count++; + raw_spin_unlock_irqrestore(&list_lock, flags); + + return &vmd->irqs[best]; +} + +static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, + unsigned int virq, irq_hw_number_t hwirq, + msi_alloc_info_t *arg) +{ + struct msi_desc *desc = arg->desc; + struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus); + struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL); + + if (!vmdirq) + return -ENOMEM; + + INIT_LIST_HEAD(&vmdirq->node); + vmdirq->irq = vmd_next_irq(vmd, desc); + vmdirq->virq = virq; + + irq_domain_set_info(domain, virq, vmdirq->irq->vmd_vector, info->chip, + vmdirq, handle_untracked_irq, vmd, NULL); + return 0; +} + +static void vmd_msi_free(struct irq_domain *domain, + struct msi_domain_info *info, unsigned int virq) +{ + struct vmd_irq *vmdirq = irq_get_chip_data(virq); + unsigned long flags; + + /* XXX: Potential optimization to rebalance */ + raw_spin_lock_irqsave(&list_lock, flags); + vmdirq->irq->count--; + raw_spin_unlock_irqrestore(&list_lock, flags); + + kfree_rcu(vmdirq, rcu); +} + +static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *arg) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct vmd_dev *vmd = vmd_from_bus(pdev->bus); + + if (nvec > vmd->msix_count) + return vmd->msix_count; + + memset(arg, 0, sizeof(*arg)); + return 0; +} + +static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) +{ + arg->desc = desc; +} + +static struct msi_domain_ops vmd_msi_domain_ops = { + .get_hwirq = vmd_get_hwirq, + .msi_init = vmd_msi_init, + .msi_free = vmd_msi_free, + .msi_prepare = vmd_msi_prepare, + .set_desc = vmd_set_desc, +}; + +static struct msi_domain_info vmd_msi_domain_info = { + .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_PCI_MSIX, + .ops = &vmd_msi_domain_ops, + .chip = &vmd_msi_controller, +}; + +#ifdef CONFIG_X86_DEV_DMA_OPS +/* + * VMD replaces the requester ID with its own. DMA mappings for devices in a + * VMD domain need to be mapped for the VMD, not the device requiring + * the mapping. + */ +static struct device *to_vmd_dev(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct vmd_dev *vmd = vmd_from_bus(pdev->bus); + + return &vmd->dev->dev; +} + +static struct dma_map_ops *vmd_dma_ops(struct device *dev) +{ + return get_dma_ops(to_vmd_dev(dev)); +} + +static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr, + gfp_t flag, unsigned long attrs) +{ + return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag, + attrs); +} + +static void vmd_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t addr, unsigned long attrs) +{ + return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr, + attrs); +} + +static int vmd_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t addr, size_t size, + unsigned long attrs) +{ + return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr, + size, attrs); +} + +static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t addr, size_t size, + unsigned long attrs) +{ + return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr, + addr, size, attrs); +} + +static dma_addr_t vmd_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size, + dir, attrs); +} + +static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs); +} + +static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs); +} + +static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs) +{ + vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs); +} + +static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + vmd_dma_ops(dev)->sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir); +} + +static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir) +{ + vmd_dma_ops(dev)->sync_single_for_device(to_vmd_dev(dev), addr, size, + dir); +} + +static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + vmd_dma_ops(dev)->sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir); +} + +static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir); +} + +static int vmd_mapping_error(struct device *dev, dma_addr_t addr) +{ + return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr); +} + +static int vmd_dma_supported(struct device *dev, u64 mask) +{ + return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask); +} + +#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK +static u64 vmd_get_required_mask(struct device *dev) +{ + return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev)); +} +#endif + +static void vmd_teardown_dma_ops(struct vmd_dev *vmd) +{ + struct dma_domain *domain = &vmd->dma_domain; + + if (get_dma_ops(&vmd->dev->dev)) + del_dma_domain(domain); +} + +#define ASSIGN_VMD_DMA_OPS(source, dest, fn) \ + do { \ + if (source->fn) \ + dest->fn = vmd_##fn; \ + } while (0) + +static void vmd_setup_dma_ops(struct vmd_dev *vmd) +{ + const struct dma_map_ops *source = get_dma_ops(&vmd->dev->dev); + struct dma_map_ops *dest = &vmd->dma_ops; + struct dma_domain *domain = &vmd->dma_domain; + + domain->domain_nr = vmd->sysdata.domain; + domain->dma_ops = dest; + + if (!source) + return; + ASSIGN_VMD_DMA_OPS(source, dest, alloc); + ASSIGN_VMD_DMA_OPS(source, dest, free); + ASSIGN_VMD_DMA_OPS(source, dest, mmap); + ASSIGN_VMD_DMA_OPS(source, dest, get_sgtable); + ASSIGN_VMD_DMA_OPS(source, dest, map_page); + ASSIGN_VMD_DMA_OPS(source, dest, unmap_page); + ASSIGN_VMD_DMA_OPS(source, dest, map_sg); + ASSIGN_VMD_DMA_OPS(source, dest, unmap_sg); + ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_cpu); + ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device); + ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu); + ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device); + ASSIGN_VMD_DMA_OPS(source, dest, mapping_error); + ASSIGN_VMD_DMA_OPS(source, dest, dma_supported); +#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK + ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask); +#endif + add_dma_domain(domain); +} +#undef ASSIGN_VMD_DMA_OPS +#else +static void vmd_teardown_dma_ops(struct vmd_dev *vmd) {} +static void vmd_setup_dma_ops(struct vmd_dev *vmd) {} +#endif + +static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus, + unsigned int devfn, int reg, int len) +{ + char __iomem *addr = vmd->cfgbar + + (bus->number << 20) + (devfn << 12) + reg; + + if ((addr - vmd->cfgbar) + len >= + resource_size(&vmd->dev->resource[VMD_CFGBAR])) + return NULL; + + return addr; +} + +/* + * CPU may deadlock if config space is not serialized on some versions of this + * hardware, so all config space access is done under a spinlock. + */ +static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg, + int len, u32 *value) +{ + struct vmd_dev *vmd = vmd_from_bus(bus); + char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); + unsigned long flags; + int ret = 0; + + if (!addr) + return -EFAULT; + + spin_lock_irqsave(&vmd->cfg_lock, flags); + switch (len) { + case 1: + *value = readb(addr); + break; + case 2: + *value = readw(addr); + break; + case 4: + *value = readl(addr); + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&vmd->cfg_lock, flags); + return ret; +} + +/* + * VMD h/w converts non-posted config writes to posted memory writes. The + * read-back in this function forces the completion so it returns only after + * the config space was written, as expected. + */ +static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg, + int len, u32 value) +{ + struct vmd_dev *vmd = vmd_from_bus(bus); + char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len); + unsigned long flags; + int ret = 0; + + if (!addr) + return -EFAULT; + + spin_lock_irqsave(&vmd->cfg_lock, flags); + switch (len) { + case 1: + writeb(value, addr); + readb(addr); + break; + case 2: + writew(value, addr); + readw(addr); + break; + case 4: + writel(value, addr); + readl(addr); + break; + default: + ret = -EINVAL; + break; + } + spin_unlock_irqrestore(&vmd->cfg_lock, flags); + return ret; +} + +static struct pci_ops vmd_ops = { + .read = vmd_pci_read, + .write = vmd_pci_write, +}; + +static void vmd_attach_resources(struct vmd_dev *vmd) +{ + vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1]; + vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2]; +} + +static void vmd_detach_resources(struct vmd_dev *vmd) +{ + vmd->dev->resource[VMD_MEMBAR1].child = NULL; + vmd->dev->resource[VMD_MEMBAR2].child = NULL; +} + +/* + * VMD domains start at 0x1000 to not clash with ACPI _SEG domains. + */ +static int vmd_find_free_domain(void) +{ + int domain = 0xffff; + struct pci_bus *bus = NULL; + + while ((bus = pci_find_next_bus(bus)) != NULL) + domain = max_t(int, domain, pci_domain_nr(bus)); + return domain + 1; +} + +static int vmd_enable_domain(struct vmd_dev *vmd) +{ + struct pci_sysdata *sd = &vmd->sysdata; + struct resource *res; + u32 upper_bits; + unsigned long flags; + LIST_HEAD(resources); + + res = &vmd->dev->resource[VMD_CFGBAR]; + vmd->resources[0] = (struct resource) { + .name = "VMD CFGBAR", + .start = 0, + .end = (resource_size(res) >> 20) - 1, + .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED, + }; + + /* + * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can + * put 32-bit resources in the window. + * + * There's no hardware reason why a 64-bit window *couldn't* + * contain a 32-bit resource, but pbus_size_mem() computes the + * bridge window size assuming a 64-bit window will contain no + * 32-bit resources. __pci_assign_resource() enforces that + * artificial restriction to make sure everything will fit. + * + * The only way we could use a 64-bit non-prefechable MEMBAR is + * if its address is <4GB so that we can convert it to a 32-bit + * resource. To be visible to the host OS, all VMD endpoints must + * be initially configured by platform BIOS, which includes setting + * up these resources. We can assume the device is configured + * according to the platform needs. + */ + res = &vmd->dev->resource[VMD_MEMBAR1]; + upper_bits = upper_32_bits(res->end); + flags = res->flags & ~IORESOURCE_SIZEALIGN; + if (!upper_bits) + flags &= ~IORESOURCE_MEM_64; + vmd->resources[1] = (struct resource) { + .name = "VMD MEMBAR1", + .start = res->start, + .end = res->end, + .flags = flags, + .parent = res, + }; + + res = &vmd->dev->resource[VMD_MEMBAR2]; + upper_bits = upper_32_bits(res->end); + flags = res->flags & ~IORESOURCE_SIZEALIGN; + if (!upper_bits) + flags &= ~IORESOURCE_MEM_64; + vmd->resources[2] = (struct resource) { + .name = "VMD MEMBAR2", + .start = res->start + 0x2000, + .end = res->end, + .flags = flags, + .parent = res, + }; + + sd->vmd_domain = true; + sd->domain = vmd_find_free_domain(); + if (sd->domain < 0) + return sd->domain; + + sd->node = pcibus_to_node(vmd->dev->bus); + + vmd->irq_domain = pci_msi_create_irq_domain(NULL, &vmd_msi_domain_info, + x86_vector_domain); + if (!vmd->irq_domain) + return -ENODEV; + + pci_add_resource(&resources, &vmd->resources[0]); + pci_add_resource(&resources, &vmd->resources[1]); + pci_add_resource(&resources, &vmd->resources[2]); + vmd->bus = pci_create_root_bus(&vmd->dev->dev, 0, &vmd_ops, sd, + &resources); + if (!vmd->bus) { + pci_free_resource_list(&resources); + irq_domain_remove(vmd->irq_domain); + return -ENODEV; + } + + vmd_attach_resources(vmd); + vmd_setup_dma_ops(vmd); + dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain); + pci_rescan_bus(vmd->bus); + + WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj, + "domain"), "Can't create symlink to domain\n"); + return 0; +} + +static irqreturn_t vmd_irq(int irq, void *data) +{ + struct vmd_irq_list *irqs = data; + struct vmd_irq *vmdirq; + + rcu_read_lock(); + list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node) + generic_handle_irq(vmdirq->virq); + rcu_read_unlock(); + + return IRQ_HANDLED; +} + +static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + struct vmd_dev *vmd; + int i, err; + + if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20)) + return -ENOMEM; + + vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL); + if (!vmd) + return -ENOMEM; + + vmd->dev = dev; + err = pcim_enable_device(dev); + if (err < 0) + return err; + + vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0); + if (!vmd->cfgbar) + return -ENOMEM; + + pci_set_master(dev); + if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) && + dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32))) + return -ENODEV; + + vmd->msix_count = pci_msix_vec_count(dev); + if (vmd->msix_count < 0) + return -ENODEV; + + vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs), + GFP_KERNEL); + if (!vmd->irqs) + return -ENOMEM; + + vmd->msix_entries = devm_kcalloc(&dev->dev, vmd->msix_count, + sizeof(*vmd->msix_entries), + GFP_KERNEL); + if (!vmd->msix_entries) + return -ENOMEM; + for (i = 0; i < vmd->msix_count; i++) + vmd->msix_entries[i].entry = i; + + vmd->msix_count = pci_enable_msix_range(vmd->dev, vmd->msix_entries, 1, + vmd->msix_count); + if (vmd->msix_count < 0) + return vmd->msix_count; + + for (i = 0; i < vmd->msix_count; i++) { + INIT_LIST_HEAD(&vmd->irqs[i].irq_list); + vmd->irqs[i].vmd_vector = vmd->msix_entries[i].vector; + vmd->irqs[i].index = i; + + err = devm_request_irq(&dev->dev, vmd->irqs[i].vmd_vector, + vmd_irq, 0, "vmd", &vmd->irqs[i]); + if (err) + return err; + } + + spin_lock_init(&vmd->cfg_lock); + pci_set_drvdata(dev, vmd); + err = vmd_enable_domain(vmd); + if (err) + return err; + + dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n", + vmd->sysdata.domain); + return 0; +} + +static void vmd_remove(struct pci_dev *dev) +{ + struct vmd_dev *vmd = pci_get_drvdata(dev); + + vmd_detach_resources(vmd); + pci_set_drvdata(dev, NULL); + sysfs_remove_link(&vmd->dev->dev.kobj, "domain"); + pci_stop_root_bus(vmd->bus); + pci_remove_root_bus(vmd->bus); + vmd_teardown_dma_ops(vmd); + irq_domain_remove(vmd->irq_domain); +} + +#ifdef CONFIG_PM +static int vmd_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + pci_save_state(pdev); + return 0; +} + +static int vmd_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + pci_restore_state(pdev); + return 0; +} +#endif +static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume); + +static const struct pci_device_id vmd_ids[] = { + {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x201d),}, + {0,} +}; +MODULE_DEVICE_TABLE(pci, vmd_ids); + +static struct pci_driver vmd_drv = { + .name = "vmd", + .id_table = vmd_ids, + .probe = vmd_probe, + .remove = vmd_remove, + .driver = { + .pm = &vmd_dev_pm_ops, + }, +}; +module_pci_driver(vmd_drv); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION("0.6");
This moves the driver source and Kconfig to the pci host bridge drivers directory, relocating the config option to a more approrpiate sub-menu instead of occupying the top level location. The Kconfig option for VMD has been updated with its X86_64 dependency that was implicitly included from the previous location, and added missing information for building this driver as a loadable module. Cc: Jon Derrick <jonathan.derrick@intel.com> Signed-off-by: Keith Busch <keith.busch@intel.com> --- arch/x86/Kconfig | 13 - arch/x86/pci/Makefile | 2 - arch/x86/pci/vmd.c | 766 ---------------------------------------------- drivers/pci/host/Kconfig | 16 + drivers/pci/host/Makefile | 1 + drivers/pci/host/vmd.c | 766 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 783 insertions(+), 781 deletions(-) delete mode 100644 arch/x86/pci/vmd.c create mode 100644 drivers/pci/host/vmd.c