Message ID | 41528182.16281476.1677879021033.JavaMail.zimbra@raptorengineeringinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Reenable VFIO support on POWER systems | expand |
On 04/03/2023 08:30, Timothy Pearson wrote: > allow blocking domains > > Up until now PPC64 managed to avoid using iommu_ops. The VFIO driver > uses a SPAPR TCE sub-driver and all iommu_ops uses were kept in > the Type1 VFIO driver. Recent development added 2 uses of iommu_ops to > the generic VFIO which broke POWER: > - a coherency capability check; > - blocking IOMMU domain - iommu_group_dma_owner_claimed()/... > > This adds a simple iommu_ops which reports support for cache > coherency and provides a basic support for blocking domains. No other > domain types are implemented so the default domain is NULL. > > Since now iommu_ops controls the group ownership, this takes it out of > VFIO. > > This adds an IOMMU device into a pci_controller (=PHB) and registers it > in the IOMMU subsystem, iommu_ops is registered at this point. > This setup is done in postcore_initcall_sync. > > This replaces iommu_group_add_device() with iommu_probe_device() as > the former misses necessary steps in connecting PCI devices to IOMMU > devices. This adds a comment about why explicit iommu_probe_device() > is still needed. > > The previous discussion is here: > https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20220707135552.3688927-1-aik@ozlabs.ru/ > https://patchwork.ozlabs.org/project/kvm-ppc/patch/20220701061751.1955857-1-aik@ozlabs.ru/ > > Fixes: e8ae0e140c05 ("vfio: Require that devices support DMA cache coherence") > Fixes: 70693f470848 ("vfio: Set DMA ownership for VFIO devices") > Cc: Deming Wang <wangdeming@inspur.com> > Cc: Robin Murphy <robin.murphy@arm.com> > Cc: Jason Gunthorpe <jgg@nvidia.com> > Cc: Alex Williamson <alex.williamson@redhat.com> > Cc: Daniel Henrique Barboza <danielhb413@gmail.com> > Cc: Fabiano Rosas <farosas@linux.ibm.com> > Cc: Murilo Opsfelder Araujo <muriloo@linux.ibm.com> > Cc: Nicholas Piggin <npiggin@gmail.com> > Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> > Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com> > --- > arch/powerpc/include/asm/pci-bridge.h | 7 + > arch/powerpc/kernel/iommu.c | 149 +++++++++++++++++++++- > arch/powerpc/platforms/powernv/pci-ioda.c | 30 +++++ > arch/powerpc/platforms/pseries/iommu.c | 24 ++++ > arch/powerpc/platforms/pseries/pseries.h | 4 + > arch/powerpc/platforms/pseries/setup.c | 3 + > drivers/vfio/vfio_iommu_spapr_tce.c | 8 -- > 7 files changed, 215 insertions(+), 10 deletions(-) > > diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h > index e18c95f4e1d4..fcab0e4b203b 100644 > --- a/arch/powerpc/include/asm/pci-bridge.h > +++ b/arch/powerpc/include/asm/pci-bridge.h > @@ -8,6 +8,7 @@ > #include <linux/list.h> > #include <linux/ioport.h> > #include <linux/numa.h> > +#include <linux/iommu.h> > > struct device_node; > > @@ -44,6 +45,9 @@ struct pci_controller_ops { > #endif > > void (*shutdown)(struct pci_controller *hose); > + > + struct iommu_group *(*device_group)(struct pci_controller *hose, > + struct pci_dev *pdev); > }; > > /* > @@ -131,6 +135,9 @@ struct pci_controller { > struct irq_domain *dev_domain; > struct irq_domain *msi_domain; > struct fwnode_handle *fwnode; > + > + /* iommu_ops support */ > + struct iommu_device iommu; > }; > > /* These are used for config access before all the PCI probing > diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c > index d873c123ab49..46c8912ae87c 100644 > --- a/arch/powerpc/kernel/iommu.c > +++ b/arch/powerpc/kernel/iommu.c > @@ -35,6 +35,7 @@ > #include <asm/vio.h> > #include <asm/tce.h> > #include <asm/mmu_context.h> > +#include <asm/ppc-pci.h> > > #define DBG(...) > > @@ -1158,8 +1159,14 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) > > pr_debug("%s: Adding %s to iommu group %d\n", > __func__, dev_name(dev), iommu_group_id(table_group->group)); > - > - return iommu_group_add_device(table_group->group, dev); > + /* > + * This is still not adding devices via the IOMMU bus notifier because > + * of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls > + * pcibios_scan_phb() first (and this guy adds devices and triggers > + * the notifier) and only then it calls pci_bus_add_devices() which > + * configures DMA for buses which also creates PEs and IOMMU groups. > + */ > + return iommu_probe_device(dev); > } > EXPORT_SYMBOL_GPL(iommu_add_device); > > @@ -1239,6 +1246,7 @@ static long spapr_tce_take_ownership(struct iommu_table_group *table_group) > rc = iommu_take_ownership(tbl); > if (!rc) > continue; > + > for (j = 0; j < i; ++j) > iommu_release_ownership(table_group->tables[j]); > return rc; > @@ -1271,4 +1279,141 @@ struct iommu_table_group_ops spapr_tce_table_group_ops = { > .release_ownership = spapr_tce_release_ownership, > }; > > +/* > + * A simple iommu_ops to allow less cruft in generic VFIO code. > + */ > +static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom, > + struct device *dev) > +{ > + struct iommu_group *grp = iommu_group_get(dev); > + struct iommu_table_group *table_group; > + int ret = -EINVAL; > + > + if (!grp) > + return -ENODEV; > + > + table_group = iommu_group_get_iommudata(grp); > + ret = table_group->ops->take_ownership(table_group); > + iommu_group_put(grp); > + > + return ret; > +} > + > +static void spapr_tce_blocking_iommu_detach_dev(struct iommu_domain *dom, > + struct device *dev) > +{ > + struct iommu_group *grp = iommu_group_get(dev); > + struct iommu_table_group *table_group; > + > + table_group = iommu_group_get_iommudata(grp); > + table_group->ops->release_ownership(table_group); > +} > + > +static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = { > + .attach_dev = spapr_tce_blocking_iommu_attach_dev, > + .detach_dev = spapr_tce_blocking_iommu_detach_dev, This .detach_dev() is gone now, the upstream has it removed. I am not quite sure now what should be calling release_ownership(). Probably spapr_tce_iommu_ops is going to need default_domain_ops, which attach_dev() will call release_ownership(). Please send patches via "git send-email", it links the cover letter to the patches so they appear threaded and (more importantly) it adds all the people from cc: and sob: to the actual mails sent out so people get copied. In this case, we want Jason to see this, at least :) Thanks, > +}; > + > +static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap) > +{ > + switch (cap) { > + case IOMMU_CAP_CACHE_COHERENCY: > + return true; > + default: > + break; > + } > + > + return false; > +} > + > +static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type) > +{ > + struct iommu_domain *dom; > + > + if (type != IOMMU_DOMAIN_BLOCKED) > + return NULL; > + > + dom = kzalloc(sizeof(*dom), GFP_KERNEL); > + if (!dom) > + return NULL; > + > + dom->ops = &spapr_tce_blocking_domain_ops; > + > + return dom; > +} > + > +static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) > +{ > + struct pci_dev *pdev; > + struct pci_controller *hose; > + > + if (!dev_is_pci(dev)) > + return ERR_PTR(-EPERM); > + > + pdev = to_pci_dev(dev); > + hose = pdev->bus->sysdata; > + > + return &hose->iommu; > +} > + > +static void spapr_tce_iommu_release_device(struct device *dev) > +{ > +} > + > +static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev) > +{ > + struct pci_controller *hose; > + struct pci_dev *pdev; > + > + pdev = to_pci_dev(dev); > + hose = pdev->bus->sysdata; > + > + if (!hose->controller_ops.device_group) > + return ERR_PTR(-ENOENT); > + > + return hose->controller_ops.device_group(hose, pdev); > +} > + > +static const struct iommu_ops spapr_tce_iommu_ops = { > + .capable = spapr_tce_iommu_capable, > + .domain_alloc = spapr_tce_iommu_domain_alloc, > + .probe_device = spapr_tce_iommu_probe_device, > + .release_device = spapr_tce_iommu_release_device, > + .device_group = spapr_tce_iommu_device_group, > +}; > + > +static struct attribute *spapr_tce_iommu_attrs[] = { > + NULL, > +}; > + > +static struct attribute_group spapr_tce_iommu_group = { > + .name = "spapr-tce-iommu", > + .attrs = spapr_tce_iommu_attrs, > +}; > + > +static const struct attribute_group *spapr_tce_iommu_groups[] = { > + &spapr_tce_iommu_group, > + NULL, > +}; > + > +/* > + * This registers IOMMU devices of PHBs. This needs to happen > + * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and > + * before subsys_initcall(iommu_subsys_init). > + */ > +static int __init spapr_tce_setup_phb_iommus_initcall(void) > +{ > + struct pci_controller *hose; > + > + list_for_each_entry(hose, &hose_list, list_node) { > + iommu_device_sysfs_add(&hose->iommu, hose->parent, > + spapr_tce_iommu_groups, "iommu-phb%04x", > + hose->global_number); > + iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops, > + hose->parent); > + } > + return 0; > +} > +postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall); > + > #endif /* CONFIG_IOMMU_API */ > diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c > index 7b371322f1ae..5f0b0bbfa90c 100644 > --- a/arch/powerpc/platforms/powernv/pci-ioda.c > +++ b/arch/powerpc/platforms/powernv/pci-ioda.c > @@ -1897,6 +1897,13 @@ static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group) > /* Store @tbl as pnv_pci_ioda2_unset_window() resets it */ > struct iommu_table *tbl = pe->table_group.tables[0]; > > + /* > + * iommu_ops transfers the ownership per a device and we mode > + * the group ownership with the first device in the group. > + */ > + if (!tbl) > + return 0; > + > pnv_pci_ioda2_set_bypass(pe, false); > pnv_pci_ioda2_unset_window(&pe->table_group, 0); > if (pe->pbus) > @@ -1913,6 +1920,9 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) > struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, > table_group); > > + /* See the comment about iommu_ops above */ > + if (pe->table_group.tables[0]) > + return; > pnv_pci_ioda2_setup_default_config(pe); > if (pe->pbus) > pnv_ioda_setup_bus_dma(pe, pe->pbus); > @@ -2918,6 +2928,25 @@ static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus) > } > } > > +static struct iommu_group *pnv_pci_device_group(struct pci_controller *hose, > + struct pci_dev *pdev) > +{ > + struct pnv_phb *phb = hose->private_data; > + struct pnv_ioda_pe *pe; > + > + if (WARN_ON(!phb)) > + return ERR_PTR(-ENODEV); > + > + pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8)); > + if (!pe) > + return ERR_PTR(-ENODEV); > + > + if (!pe->table_group.group) > + return ERR_PTR(-ENODEV); > + > + return iommu_group_ref_get(pe->table_group.group); > +} > + > static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { > .dma_dev_setup = pnv_pci_ioda_dma_dev_setup, > .dma_bus_setup = pnv_pci_ioda_dma_bus_setup, > @@ -2928,6 +2957,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { > .setup_bridge = pnv_pci_fixup_bridge_resources, > .reset_secondary_bus = pnv_pci_reset_secondary_bus, > .shutdown = pnv_pci_ioda_shutdown, > + .device_group = pnv_pci_device_group, > }; > > static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { > diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c > index 6769d2c55187..1c911eedf8fb 100644 > --- a/arch/powerpc/platforms/pseries/iommu.c > +++ b/arch/powerpc/platforms/pseries/iommu.c > @@ -1727,3 +1727,27 @@ static int __init tce_iommu_bus_notifier_init(void) > return 0; > } > machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init); > + > +#ifdef CONFIG_SPAPR_TCE_IOMMU > +struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose, > + struct pci_dev *pdev) > +{ > + struct device_node *pdn, *dn = pdev->dev.of_node; > + struct iommu_group *grp; > + struct pci_dn *pci; > + > + pdn = pci_dma_find(dn, NULL); > + if (!pdn || !PCI_DN(pdn)) > + return ERR_PTR(-ENODEV); > + > + pci = PCI_DN(pdn); > + if (!pci->table_group) > + return ERR_PTR(-ENODEV); > + > + grp = pci->table_group->group; > + if (!grp) > + return ERR_PTR(-ENODEV); > + > + return iommu_group_ref_get(grp); > +} > +#endif > diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h > index 1d75b7742ef0..f8bce40ebd0c 100644 > --- a/arch/powerpc/platforms/pseries/pseries.h > +++ b/arch/powerpc/platforms/pseries/pseries.h > @@ -123,5 +123,9 @@ static inline void pseries_lpar_read_hblkrm_characteristics(void) { } > #endif > > void pseries_rng_init(void); > +#ifdef CONFIG_SPAPR_TCE_IOMMU > +struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose, > + struct pci_dev *pdev); > +#endif > > #endif /* _PSERIES_PSERIES_H */ > diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c > index 8ef3270515a9..f16fcd93297a 100644 > --- a/arch/powerpc/platforms/pseries/setup.c > +++ b/arch/powerpc/platforms/pseries/setup.c > @@ -1125,6 +1125,9 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus) > > struct pci_controller_ops pseries_pci_controller_ops = { > .probe_mode = pSeries_pci_probe_mode, > +#ifdef CONFIG_SPAPR_TCE_IOMMU > + .device_group = pSeries_pci_device_group, > +#endif > }; > > define_machine(pseries) { > diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c > index c3f8ae102ece..a94ec6225d31 100644 > --- a/drivers/vfio/vfio_iommu_spapr_tce.c > +++ b/drivers/vfio/vfio_iommu_spapr_tce.c > @@ -1200,8 +1200,6 @@ static void tce_iommu_release_ownership(struct tce_container *container, > for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) > if (container->tables[i]) > table_group->ops->unset_window(table_group, i); > - > - table_group->ops->release_ownership(table_group); > } > > static long tce_iommu_take_ownership(struct tce_container *container, > @@ -1209,10 +1207,6 @@ static long tce_iommu_take_ownership(struct tce_container *container, > { > long i, ret = 0; > > - ret = table_group->ops->take_ownership(table_group); > - if (ret) > - return ret; > - > /* Set all windows to the new group */ > for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { > struct iommu_table *tbl = container->tables[i]; > @@ -1231,8 +1225,6 @@ static long tce_iommu_take_ownership(struct tce_container *container, > for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) > table_group->ops->unset_window(table_group, i); > > - table_group->ops->release_ownership(table_group); > - > return ret; > } >
On Mon, Mar 06, 2023 at 09:44:00PM +1100, Alexey Kardashevskiy wrote: > > +static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = { > > + .attach_dev = spapr_tce_blocking_iommu_attach_dev, > > + .detach_dev = spapr_tce_blocking_iommu_detach_dev, > > > This .detach_dev() is gone now, the upstream has it removed. I am not quite > sure now what should be calling release_ownership(). Probably > spapr_tce_iommu_ops is going to need default_domain_ops, which attach_dev() > will call release_ownership(). set_platform_dma_ops is what you want here Jason
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index e18c95f4e1d4..fcab0e4b203b 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -8,6 +8,7 @@ #include <linux/list.h> #include <linux/ioport.h> #include <linux/numa.h> +#include <linux/iommu.h> struct device_node; @@ -44,6 +45,9 @@ struct pci_controller_ops { #endif void (*shutdown)(struct pci_controller *hose); + + struct iommu_group *(*device_group)(struct pci_controller *hose, + struct pci_dev *pdev); }; /* @@ -131,6 +135,9 @@ struct pci_controller { struct irq_domain *dev_domain; struct irq_domain *msi_domain; struct fwnode_handle *fwnode; + + /* iommu_ops support */ + struct iommu_device iommu; }; /* These are used for config access before all the PCI probing diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index d873c123ab49..46c8912ae87c 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -35,6 +35,7 @@ #include <asm/vio.h> #include <asm/tce.h> #include <asm/mmu_context.h> +#include <asm/ppc-pci.h> #define DBG(...) @@ -1158,8 +1159,14 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) pr_debug("%s: Adding %s to iommu group %d\n", __func__, dev_name(dev), iommu_group_id(table_group->group)); - - return iommu_group_add_device(table_group->group, dev); + /* + * This is still not adding devices via the IOMMU bus notifier because + * of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls + * pcibios_scan_phb() first (and this guy adds devices and triggers + * the notifier) and only then it calls pci_bus_add_devices() which + * configures DMA for buses which also creates PEs and IOMMU groups. + */ + return iommu_probe_device(dev); } EXPORT_SYMBOL_GPL(iommu_add_device); @@ -1239,6 +1246,7 @@ static long spapr_tce_take_ownership(struct iommu_table_group *table_group) rc = iommu_take_ownership(tbl); if (!rc) continue; + for (j = 0; j < i; ++j) iommu_release_ownership(table_group->tables[j]); return rc; @@ -1271,4 +1279,141 @@ struct iommu_table_group_ops spapr_tce_table_group_ops = { .release_ownership = spapr_tce_release_ownership, }; +/* + * A simple iommu_ops to allow less cruft in generic VFIO code. + */ +static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom, + struct device *dev) +{ + struct iommu_group *grp = iommu_group_get(dev); + struct iommu_table_group *table_group; + int ret = -EINVAL; + + if (!grp) + return -ENODEV; + + table_group = iommu_group_get_iommudata(grp); + ret = table_group->ops->take_ownership(table_group); + iommu_group_put(grp); + + return ret; +} + +static void spapr_tce_blocking_iommu_detach_dev(struct iommu_domain *dom, + struct device *dev) +{ + struct iommu_group *grp = iommu_group_get(dev); + struct iommu_table_group *table_group; + + table_group = iommu_group_get_iommudata(grp); + table_group->ops->release_ownership(table_group); +} + +static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = { + .attach_dev = spapr_tce_blocking_iommu_attach_dev, + .detach_dev = spapr_tce_blocking_iommu_detach_dev, +}; + +static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + default: + break; + } + + return false; +} + +static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type) +{ + struct iommu_domain *dom; + + if (type != IOMMU_DOMAIN_BLOCKED) + return NULL; + + dom = kzalloc(sizeof(*dom), GFP_KERNEL); + if (!dom) + return NULL; + + dom->ops = &spapr_tce_blocking_domain_ops; + + return dom; +} + +static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) +{ + struct pci_dev *pdev; + struct pci_controller *hose; + + if (!dev_is_pci(dev)) + return ERR_PTR(-EPERM); + + pdev = to_pci_dev(dev); + hose = pdev->bus->sysdata; + + return &hose->iommu; +} + +static void spapr_tce_iommu_release_device(struct device *dev) +{ +} + +static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev) +{ + struct pci_controller *hose; + struct pci_dev *pdev; + + pdev = to_pci_dev(dev); + hose = pdev->bus->sysdata; + + if (!hose->controller_ops.device_group) + return ERR_PTR(-ENOENT); + + return hose->controller_ops.device_group(hose, pdev); +} + +static const struct iommu_ops spapr_tce_iommu_ops = { + .capable = spapr_tce_iommu_capable, + .domain_alloc = spapr_tce_iommu_domain_alloc, + .probe_device = spapr_tce_iommu_probe_device, + .release_device = spapr_tce_iommu_release_device, + .device_group = spapr_tce_iommu_device_group, +}; + +static struct attribute *spapr_tce_iommu_attrs[] = { + NULL, +}; + +static struct attribute_group spapr_tce_iommu_group = { + .name = "spapr-tce-iommu", + .attrs = spapr_tce_iommu_attrs, +}; + +static const struct attribute_group *spapr_tce_iommu_groups[] = { + &spapr_tce_iommu_group, + NULL, +}; + +/* + * This registers IOMMU devices of PHBs. This needs to happen + * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and + * before subsys_initcall(iommu_subsys_init). + */ +static int __init spapr_tce_setup_phb_iommus_initcall(void) +{ + struct pci_controller *hose; + + list_for_each_entry(hose, &hose_list, list_node) { + iommu_device_sysfs_add(&hose->iommu, hose->parent, + spapr_tce_iommu_groups, "iommu-phb%04x", + hose->global_number); + iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops, + hose->parent); + } + return 0; +} +postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall); + #endif /* CONFIG_IOMMU_API */ diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 7b371322f1ae..5f0b0bbfa90c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1897,6 +1897,13 @@ static long pnv_ioda2_take_ownership(struct iommu_table_group *table_group) /* Store @tbl as pnv_pci_ioda2_unset_window() resets it */ struct iommu_table *tbl = pe->table_group.tables[0]; + /* + * iommu_ops transfers the ownership per a device and we mode + * the group ownership with the first device in the group. + */ + if (!tbl) + return 0; + pnv_pci_ioda2_set_bypass(pe, false); pnv_pci_ioda2_unset_window(&pe->table_group, 0); if (pe->pbus) @@ -1913,6 +1920,9 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group) struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe, table_group); + /* See the comment about iommu_ops above */ + if (pe->table_group.tables[0]) + return; pnv_pci_ioda2_setup_default_config(pe); if (pe->pbus) pnv_ioda_setup_bus_dma(pe, pe->pbus); @@ -2918,6 +2928,25 @@ static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus) } } +static struct iommu_group *pnv_pci_device_group(struct pci_controller *hose, + struct pci_dev *pdev) +{ + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pe; + + if (WARN_ON(!phb)) + return ERR_PTR(-ENODEV); + + pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8)); + if (!pe) + return ERR_PTR(-ENODEV); + + if (!pe->table_group.group) + return ERR_PTR(-ENODEV); + + return iommu_group_ref_get(pe->table_group.group); +} + static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .dma_dev_setup = pnv_pci_ioda_dma_dev_setup, .dma_bus_setup = pnv_pci_ioda_dma_bus_setup, @@ -2928,6 +2957,7 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = { .setup_bridge = pnv_pci_fixup_bridge_resources, .reset_secondary_bus = pnv_pci_reset_secondary_bus, .shutdown = pnv_pci_ioda_shutdown, + .device_group = pnv_pci_device_group, }; static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 6769d2c55187..1c911eedf8fb 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1727,3 +1727,27 @@ static int __init tce_iommu_bus_notifier_init(void) return 0; } machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init); + +#ifdef CONFIG_SPAPR_TCE_IOMMU +struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose, + struct pci_dev *pdev) +{ + struct device_node *pdn, *dn = pdev->dev.of_node; + struct iommu_group *grp; + struct pci_dn *pci; + + pdn = pci_dma_find(dn, NULL); + if (!pdn || !PCI_DN(pdn)) + return ERR_PTR(-ENODEV); + + pci = PCI_DN(pdn); + if (!pci->table_group) + return ERR_PTR(-ENODEV); + + grp = pci->table_group->group; + if (!grp) + return ERR_PTR(-ENODEV); + + return iommu_group_ref_get(grp); +} +#endif diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 1d75b7742ef0..f8bce40ebd0c 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -123,5 +123,9 @@ static inline void pseries_lpar_read_hblkrm_characteristics(void) { } #endif void pseries_rng_init(void); +#ifdef CONFIG_SPAPR_TCE_IOMMU +struct iommu_group *pSeries_pci_device_group(struct pci_controller *hose, + struct pci_dev *pdev); +#endif #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 8ef3270515a9..f16fcd93297a 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -1125,6 +1125,9 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus) struct pci_controller_ops pseries_pci_controller_ops = { .probe_mode = pSeries_pci_probe_mode, +#ifdef CONFIG_SPAPR_TCE_IOMMU + .device_group = pSeries_pci_device_group, +#endif }; define_machine(pseries) { diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index c3f8ae102ece..a94ec6225d31 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -1200,8 +1200,6 @@ static void tce_iommu_release_ownership(struct tce_container *container, for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) if (container->tables[i]) table_group->ops->unset_window(table_group, i); - - table_group->ops->release_ownership(table_group); } static long tce_iommu_take_ownership(struct tce_container *container, @@ -1209,10 +1207,6 @@ static long tce_iommu_take_ownership(struct tce_container *container, { long i, ret = 0; - ret = table_group->ops->take_ownership(table_group); - if (ret) - return ret; - /* Set all windows to the new group */ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { struct iommu_table *tbl = container->tables[i]; @@ -1231,8 +1225,6 @@ static long tce_iommu_take_ownership(struct tce_container *container, for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) table_group->ops->unset_window(table_group, i); - table_group->ops->release_ownership(table_group); - return ret; }