Message ID | 2a42664b61cef7cdd44688679b60a8c6c397b075.1650379269.git.jag.raman@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-user server in QEMU | expand |
> On Apr 19, 2022, at 4:45 PM, Jag Raman <jag.raman@oracle.com> wrote: > > Assign separate address space for each device in the remote processes. > > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > --- > include/hw/remote/iommu.h | 40 +++++++++++++ > hw/remote/iommu.c | 114 ++++++++++++++++++++++++++++++++++++++ > hw/remote/machine.c | 13 ++++- > MAINTAINERS | 2 + > hw/remote/meson.build | 1 + > 5 files changed, 169 insertions(+), 1 deletion(-) > create mode 100644 include/hw/remote/iommu.h > create mode 100644 hw/remote/iommu.c > > diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h > new file mode 100644 > index 0000000000..33b68a8f4b > --- /dev/null > +++ b/include/hw/remote/iommu.h > @@ -0,0 +1,40 @@ > +/** > + * Copyright © 2022 Oracle and/or its affiliates. > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +#ifndef REMOTE_IOMMU_H > +#define REMOTE_IOMMU_H > + > +#include "hw/pci/pci_bus.h" > +#include "hw/pci/pci.h" > + > +#ifndef INT2VOIDP > +#define INT2VOIDP(i) (void *)(uintptr_t)(i) > +#endif > + > +typedef struct RemoteIommuElem { > + MemoryRegion *mr; > + > + AddressSpace as; > +} RemoteIommuElem; > + > +#define TYPE_REMOTE_IOMMU "x-remote-iommu" > +OBJECT_DECLARE_SIMPLE_TYPE(RemoteIommu, REMOTE_IOMMU) > + > +struct RemoteIommu { > + Object parent; > + > + GHashTable *elem_by_devfn; > + > + QemuMutex lock; > +}; > + > +void remote_iommu_setup(PCIBus *pci_bus); > + > +void remote_iommu_unplug_dev(PCIDevice *pci_dev); > + > +#endif > diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c > new file mode 100644 > index 0000000000..16c6b0834e > --- /dev/null > +++ b/hw/remote/iommu.c > @@ -0,0 +1,114 @@ > +/** > + * IOMMU for remote device > + * > + * Copyright © 2022 Oracle and/or its affiliates. > + * > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > + * See the COPYING file in the top-level directory. > + * > + */ > + > +#include "qemu/osdep.h" > +#include "qemu-common.h" > + > +#include "hw/remote/iommu.h" > +#include "hw/pci/pci_bus.h" > +#include "hw/pci/pci.h" > +#include "exec/memory.h" > +#include "exec/address-spaces.h" > +#include "trace.h" > + > +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus, > + void *opaque, int devfn) > +{ > + RemoteIommu *iommu = opaque; > + RemoteIommuElem *elem = NULL; > + > + qemu_mutex_lock(&iommu->lock); > + > + elem = g_hash_table_lookup(iommu->elem_by_devfn, INT2VOIDP(devfn)); > + > + if (!elem) { > + elem = g_malloc0(sizeof(RemoteIommuElem)); > + g_hash_table_insert(iommu->elem_by_devfn, INT2VOIDP(devfn), elem); > + } > + > + if (!elem->mr) { > + elem->mr = MEMORY_REGION(object_new(TYPE_MEMORY_REGION)); > + memory_region_set_size(elem->mr, UINT64_MAX); > + address_space_init(&elem->as, elem->mr, NULL); Hi, I’d like to add a note here. We tried to add "elem->mr” as a child of PCIDevice. That way, when PCIDevice is unplugged, the child is also finalized. However, there was some issue with hotplug. During the hotplug, there’s a window during initialization where we couldn’t lookup the PCIDevice by “devfn”. do_pci_register_device() -> pci_init_bus_master() -> pci_device_iommu_address_space() happens before do_pci_register_device() -> “bus->devices[devfn] = pci_dev”. As such, pci_find_device() doesn’t work at this time. Thank you! -- Jag > + } > + > + qemu_mutex_unlock(&iommu->lock); > + > + return &elem->as; > +} > + > +void remote_iommu_unplug_dev(PCIDevice *pci_dev) > +{ > + AddressSpace *as = pci_device_iommu_address_space(pci_dev); > + RemoteIommuElem *elem = NULL; > + > + if (as == &address_space_memory) { > + return; > + } > + > + elem = container_of(as, RemoteIommuElem, as); > + > + address_space_destroy(&elem->as); > + > + object_unref(elem->mr); > + > + elem->mr = NULL; > +} > + > +static void remote_iommu_init(Object *obj) > +{ > + RemoteIommu *iommu = REMOTE_IOMMU(obj); > + > + iommu->elem_by_devfn = g_hash_table_new_full(NULL, NULL, NULL, g_free); > + > + qemu_mutex_init(&iommu->lock); > +} > + > +static void remote_iommu_finalize(Object *obj) > +{ > + RemoteIommu *iommu = REMOTE_IOMMU(obj); > + > + qemu_mutex_destroy(&iommu->lock); > + > + if (iommu->elem_by_devfn) { > + g_hash_table_destroy(iommu->elem_by_devfn); > + iommu->elem_by_devfn = NULL; > + } > +} > + > +void remote_iommu_setup(PCIBus *pci_bus) > +{ > + RemoteIommu *iommu = NULL; > + > + g_assert(pci_bus); > + > + iommu = REMOTE_IOMMU(object_new(TYPE_REMOTE_IOMMU)); > + > + pci_setup_iommu(pci_bus, remote_iommu_find_add_as, iommu); > + > + object_property_add_child(OBJECT(pci_bus), "remote-iommu", OBJECT(iommu)); > + > + object_unref(OBJECT(iommu)); > +} > + > +static const TypeInfo remote_iommu_info = { > + .name = TYPE_REMOTE_IOMMU, > + .parent = TYPE_OBJECT, > + .instance_size = sizeof(RemoteIommu), > + .instance_init = remote_iommu_init, > + .instance_finalize = remote_iommu_finalize, > +}; > + > +static void remote_iommu_register_types(void) > +{ > + type_register_static(&remote_iommu_info); > +} > + > +type_init(remote_iommu_register_types) > diff --git a/hw/remote/machine.c b/hw/remote/machine.c > index ed91659794..cca5d25f50 100644 > --- a/hw/remote/machine.c > +++ b/hw/remote/machine.c > @@ -21,6 +21,7 @@ > #include "qapi/error.h" > #include "hw/pci/pci_host.h" > #include "hw/remote/iohub.h" > +#include "hw/remote/iommu.h" > #include "hw/qdev-core.h" > > static void remote_machine_init(MachineState *machine) > @@ -100,6 +101,16 @@ static void remote_machine_instance_init(Object *obj) > s->auto_shutdown = true; > } > > +static void remote_machine_dev_unplug_cb(HotplugHandler *hotplug_dev, > + DeviceState *dev, Error **errp) > +{ > + qdev_unrealize(dev); > + > + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { > + remote_iommu_unplug_dev(PCI_DEVICE(dev)); > + } > +} > + > static void remote_machine_class_init(ObjectClass *oc, void *data) > { > MachineClass *mc = MACHINE_CLASS(oc); > @@ -108,7 +119,7 @@ static void remote_machine_class_init(ObjectClass *oc, void *data) > mc->init = remote_machine_init; > mc->desc = "Experimental remote machine"; > > - hc->unplug = qdev_simple_device_unplug_cb; > + hc->unplug = remote_machine_dev_unplug_cb; > > object_class_property_add_bool(oc, "vfio-user", > remote_machine_get_vfio_user, > diff --git a/MAINTAINERS b/MAINTAINERS > index 37afdecc61..3e62ccab0a 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -3599,6 +3599,8 @@ F: hw/remote/iohub.c > F: include/hw/remote/iohub.h > F: subprojects/libvfio-user > F: hw/remote/vfio-user-obj.c > +F: hw/remote/iommu.c > +F: include/hw/remote/iommu.h > > EBPF: > M: Jason Wang <jasowang@redhat.com> > diff --git a/hw/remote/meson.build b/hw/remote/meson.build > index 534ac5df79..bcef83c8cc 100644 > --- a/hw/remote/meson.build > +++ b/hw/remote/meson.build > @@ -6,6 +6,7 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('message.c')) > remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c')) > remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c')) > remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c')) > +remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iommu.c')) > remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: files('vfio-user-obj.c')) > > remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: vfiouser) > -- > 2.20.1 >
On Tue, Apr 19, 2022 at 04:44:17PM -0400, Jagannathan Raman wrote: > +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus, > + void *opaque, int devfn) > +{ > + RemoteIommu *iommu = opaque; > + RemoteIommuElem *elem = NULL; > + > + qemu_mutex_lock(&iommu->lock); > + > + elem = g_hash_table_lookup(iommu->elem_by_devfn, INT2VOIDP(devfn)); > + > + if (!elem) { > + elem = g_malloc0(sizeof(RemoteIommuElem)); > + g_hash_table_insert(iommu->elem_by_devfn, INT2VOIDP(devfn), elem); > + } > + > + if (!elem->mr) { > + elem->mr = MEMORY_REGION(object_new(TYPE_MEMORY_REGION)); > + memory_region_set_size(elem->mr, UINT64_MAX); > + address_space_init(&elem->as, elem->mr, NULL); > + } > + > + qemu_mutex_unlock(&iommu->lock); > + > + return &elem->as; > +} A few comments that can be added to this file to explain the design: - Each vfio-user server handles one PCIDevice on a PCIBus. There is one RemoteIommu per PCIBus, so the RemoteIommu tracks multiple PCIDevices by maintaining a ->elem_by_devfn mapping. - memory_region_init_iommu() is not used because vfio-user MemoryRegions will be added to the elem->mr container instead. This is more natural than implementing the IOMMUMemoryRegionClass APIs since vfio-user provides something that is close to a full-fledged MemoryRegion and not like an IOMMU mapping. - When a device is hot unplugged, the elem->mr reference is dropped so all vfio-user MemoryRegions associated with this vfio-user server are destroyed. > +static void remote_iommu_finalize(Object *obj) > +{ > + RemoteIommu *iommu = REMOTE_IOMMU(obj); > + > + qemu_mutex_destroy(&iommu->lock); > + > + if (iommu->elem_by_devfn) { ->init() and ->finalize() are a pair, so I don't think ->finalize() will ever be called with a NULL ->elem_by_devfn. If ->elem_by_devfn can be NULL then there would probably need to be a check around qemu_mutex_destroy(&iommu->lock) too. > + g_hash_table_destroy(iommu->elem_by_devfn); > + iommu->elem_by_devfn = NULL; > + } > +}
On Wed, Apr 20, 2022 at 11:15:16AM +0000, Jag Raman wrote: > > > > On Apr 19, 2022, at 4:45 PM, Jag Raman <jag.raman@oracle.com> wrote: > > > > Assign separate address space for each device in the remote processes. > > > > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > > --- > > include/hw/remote/iommu.h | 40 +++++++++++++ > > hw/remote/iommu.c | 114 ++++++++++++++++++++++++++++++++++++++ > > hw/remote/machine.c | 13 ++++- > > MAINTAINERS | 2 + > > hw/remote/meson.build | 1 + > > 5 files changed, 169 insertions(+), 1 deletion(-) > > create mode 100644 include/hw/remote/iommu.h > > create mode 100644 hw/remote/iommu.c > > > > diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h > > new file mode 100644 > > index 0000000000..33b68a8f4b > > --- /dev/null > > +++ b/include/hw/remote/iommu.h > > @@ -0,0 +1,40 @@ > > +/** > > + * Copyright © 2022 Oracle and/or its affiliates. > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > > + * See the COPYING file in the top-level directory. > > + * > > + */ > > + > > +#ifndef REMOTE_IOMMU_H > > +#define REMOTE_IOMMU_H > > + > > +#include "hw/pci/pci_bus.h" > > +#include "hw/pci/pci.h" > > + > > +#ifndef INT2VOIDP > > +#define INT2VOIDP(i) (void *)(uintptr_t)(i) > > +#endif > > + > > +typedef struct RemoteIommuElem { > > + MemoryRegion *mr; > > + > > + AddressSpace as; > > +} RemoteIommuElem; > > + > > +#define TYPE_REMOTE_IOMMU "x-remote-iommu" > > +OBJECT_DECLARE_SIMPLE_TYPE(RemoteIommu, REMOTE_IOMMU) > > + > > +struct RemoteIommu { > > + Object parent; > > + > > + GHashTable *elem_by_devfn; > > + > > + QemuMutex lock; > > +}; > > + > > +void remote_iommu_setup(PCIBus *pci_bus); > > + > > +void remote_iommu_unplug_dev(PCIDevice *pci_dev); > > + > > +#endif > > diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c > > new file mode 100644 > > index 0000000000..16c6b0834e > > --- /dev/null > > +++ b/hw/remote/iommu.c > > @@ -0,0 +1,114 @@ > > +/** > > + * IOMMU for remote device > > + * > > + * Copyright © 2022 Oracle and/or its affiliates. > > + * > > + * This work is licensed under the terms of the GNU GPL, version 2 or later. > > + * See the COPYING file in the top-level directory. > > + * > > + */ > > + > > +#include "qemu/osdep.h" > > +#include "qemu-common.h" > > + > > +#include "hw/remote/iommu.h" > > +#include "hw/pci/pci_bus.h" > > +#include "hw/pci/pci.h" > > +#include "exec/memory.h" > > +#include "exec/address-spaces.h" > > +#include "trace.h" > > + > > +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus, > > + void *opaque, int devfn) > > +{ > > + RemoteIommu *iommu = opaque; > > + RemoteIommuElem *elem = NULL; > > + > > + qemu_mutex_lock(&iommu->lock); > > + > > + elem = g_hash_table_lookup(iommu->elem_by_devfn, INT2VOIDP(devfn)); > > + > > + if (!elem) { > > + elem = g_malloc0(sizeof(RemoteIommuElem)); > > + g_hash_table_insert(iommu->elem_by_devfn, INT2VOIDP(devfn), elem); > > + } > > + > > + if (!elem->mr) { > > + elem->mr = MEMORY_REGION(object_new(TYPE_MEMORY_REGION)); > > + memory_region_set_size(elem->mr, UINT64_MAX); > > + address_space_init(&elem->as, elem->mr, NULL); > > Hi, > > I’d like to add a note here. > > We tried to add "elem->mr” as a child of PCIDevice. That way, when PCIDevice is > unplugged, the child is also finalized. Do you mean via a memory_region_init()-family function where a parent object is given? Or do you mean by adding a QOM child property? > However, there was some issue with hotplug. During the hotplug, there’s a window > during initialization where we couldn’t lookup the PCIDevice by “devfn”. > > do_pci_register_device() -> pci_init_bus_master() -> pci_device_iommu_address_space() > happens before do_pci_register_device() -> “bus->devices[devfn] = pci_dev”. As such, > pci_find_device() doesn’t work at this time. I don't follow. What calls pci_find_device()? Stefan
> On Apr 25, 2022, at 5:31 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > On Tue, Apr 19, 2022 at 04:44:17PM -0400, Jagannathan Raman wrote: >> +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus, >> + void *opaque, int devfn) >> +{ >> + RemoteIommu *iommu = opaque; >> + RemoteIommuElem *elem = NULL; >> + >> + qemu_mutex_lock(&iommu->lock); >> + >> + elem = g_hash_table_lookup(iommu->elem_by_devfn, INT2VOIDP(devfn)); >> + >> + if (!elem) { >> + elem = g_malloc0(sizeof(RemoteIommuElem)); >> + g_hash_table_insert(iommu->elem_by_devfn, INT2VOIDP(devfn), elem); >> + } >> + >> + if (!elem->mr) { >> + elem->mr = MEMORY_REGION(object_new(TYPE_MEMORY_REGION)); >> + memory_region_set_size(elem->mr, UINT64_MAX); >> + address_space_init(&elem->as, elem->mr, NULL); >> + } >> + >> + qemu_mutex_unlock(&iommu->lock); >> + >> + return &elem->as; >> +} > > A few comments that can be added to this file to explain the design: > > - Each vfio-user server handles one PCIDevice on a PCIBus. There is one > RemoteIommu per PCIBus, so the RemoteIommu tracks multiple PCIDevices > by maintaining a ->elem_by_devfn mapping. > > - memory_region_init_iommu() is not used because vfio-user MemoryRegions > will be added to the elem->mr container instead. This is more natural > than implementing the IOMMUMemoryRegionClass APIs since vfio-user > provides something that is close to a full-fledged MemoryRegion and > not like an IOMMU mapping. > > - When a device is hot unplugged, the elem->mr reference is dropped so > all vfio-user MemoryRegions associated with this vfio-user server are > destroyed. OK, will add this comment. > >> +static void remote_iommu_finalize(Object *obj) >> +{ >> + RemoteIommu *iommu = REMOTE_IOMMU(obj); >> + >> + qemu_mutex_destroy(&iommu->lock); >> + >> + if (iommu->elem_by_devfn) { > > ->init() and ->finalize() are a pair, so I don't think ->finalize() will > ever be called with a NULL ->elem_by_devfn. OK, got it. Thanks! -- Jag > > If ->elem_by_devfn can be NULL then there would probably need to be a > check around qemu_mutex_destroy(&iommu->lock) too. > >> + g_hash_table_destroy(iommu->elem_by_devfn); >> + iommu->elem_by_devfn = NULL; >> + } >> +}
> On Apr 25, 2022, at 5:38 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > On Wed, Apr 20, 2022 at 11:15:16AM +0000, Jag Raman wrote: >> >> >>> On Apr 19, 2022, at 4:45 PM, Jag Raman <jag.raman@oracle.com> wrote: >>> >>> Assign separate address space for each device in the remote processes. >>> >>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> >>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com> >>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> >>> --- >>> include/hw/remote/iommu.h | 40 +++++++++++++ >>> hw/remote/iommu.c | 114 ++++++++++++++++++++++++++++++++++++++ >>> hw/remote/machine.c | 13 ++++- >>> MAINTAINERS | 2 + >>> hw/remote/meson.build | 1 + >>> 5 files changed, 169 insertions(+), 1 deletion(-) >>> create mode 100644 include/hw/remote/iommu.h >>> create mode 100644 hw/remote/iommu.c >>> >>> diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h >>> new file mode 100644 >>> index 0000000000..33b68a8f4b >>> --- /dev/null >>> +++ b/include/hw/remote/iommu.h >>> @@ -0,0 +1,40 @@ >>> +/** >>> + * Copyright © 2022 Oracle and/or its affiliates. >>> + * >>> + * This work is licensed under the terms of the GNU GPL, version 2 or later. >>> + * See the COPYING file in the top-level directory. >>> + * >>> + */ >>> + >>> +#ifndef REMOTE_IOMMU_H >>> +#define REMOTE_IOMMU_H >>> + >>> +#include "hw/pci/pci_bus.h" >>> +#include "hw/pci/pci.h" >>> + >>> +#ifndef INT2VOIDP >>> +#define INT2VOIDP(i) (void *)(uintptr_t)(i) >>> +#endif >>> + >>> +typedef struct RemoteIommuElem { >>> + MemoryRegion *mr; >>> + >>> + AddressSpace as; >>> +} RemoteIommuElem; >>> + >>> +#define TYPE_REMOTE_IOMMU "x-remote-iommu" >>> +OBJECT_DECLARE_SIMPLE_TYPE(RemoteIommu, REMOTE_IOMMU) >>> + >>> +struct RemoteIommu { >>> + Object parent; >>> + >>> + GHashTable *elem_by_devfn; >>> + >>> + QemuMutex lock; >>> +}; >>> + >>> +void remote_iommu_setup(PCIBus *pci_bus); >>> + >>> +void remote_iommu_unplug_dev(PCIDevice *pci_dev); >>> + >>> +#endif >>> diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c >>> new file mode 100644 >>> index 0000000000..16c6b0834e >>> --- /dev/null >>> +++ b/hw/remote/iommu.c >>> @@ -0,0 +1,114 @@ >>> +/** >>> + * IOMMU for remote device >>> + * >>> + * Copyright © 2022 Oracle and/or its affiliates. >>> + * >>> + * This work is licensed under the terms of the GNU GPL, version 2 or later. >>> + * See the COPYING file in the top-level directory. >>> + * >>> + */ >>> + >>> +#include "qemu/osdep.h" >>> +#include "qemu-common.h" >>> + >>> +#include "hw/remote/iommu.h" >>> +#include "hw/pci/pci_bus.h" >>> +#include "hw/pci/pci.h" >>> +#include "exec/memory.h" >>> +#include "exec/address-spaces.h" >>> +#include "trace.h" >>> + >>> +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus, >>> + void *opaque, int devfn) >>> +{ >>> + RemoteIommu *iommu = opaque; >>> + RemoteIommuElem *elem = NULL; >>> + >>> + qemu_mutex_lock(&iommu->lock); >>> + >>> + elem = g_hash_table_lookup(iommu->elem_by_devfn, INT2VOIDP(devfn)); >>> + >>> + if (!elem) { >>> + elem = g_malloc0(sizeof(RemoteIommuElem)); >>> + g_hash_table_insert(iommu->elem_by_devfn, INT2VOIDP(devfn), elem); >>> + } >>> + >>> + if (!elem->mr) { >>> + elem->mr = MEMORY_REGION(object_new(TYPE_MEMORY_REGION)); >>> + memory_region_set_size(elem->mr, UINT64_MAX); >>> + address_space_init(&elem->as, elem->mr, NULL); >> >> Hi, >> >> I’d like to add a note here. >> >> We tried to add "elem->mr” as a child of PCIDevice. That way, when PCIDevice is >> unplugged, the child is also finalized. > > Do you mean via a memory_region_init()-family function where a parent > object is given? Or do you mean by adding a QOM child property? I mean by adding “elem->mr” as a QOM child property of PCIDevice. > >> However, there was some issue with hotplug. During the hotplug, there’s a window >> during initialization where we couldn’t lookup the PCIDevice by “devfn”. >> >> do_pci_register_device() -> pci_init_bus_master() -> pci_device_iommu_address_space() >> happens before do_pci_register_device() -> “bus->devices[devfn] = pci_dev”. As such, >> pci_find_device() doesn’t work at this time. > > I don't follow. What calls pci_find_device()? To add the MemoryRegion as a child of PCIDevice, remote_iommu_find_add_as() would need to lookup the PCIDevice using devfn. The function that looks up PCIDevice by devfn is pci_find_device(). The above note explains why we didn’t lookup the PCIDevice using pci_find_device() and then adding the MemoryRegion as its child. Thank you! -- Jag > > Stefan
On Mon, Apr 25, 2022 at 05:30:19PM +0000, Jag Raman wrote: > > > > On Apr 25, 2022, at 5:38 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote: > > > > On Wed, Apr 20, 2022 at 11:15:16AM +0000, Jag Raman wrote: > >> > >> > >>> On Apr 19, 2022, at 4:45 PM, Jag Raman <jag.raman@oracle.com> wrote: > >>> > >>> Assign separate address space for each device in the remote processes. > >>> > >>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > >>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > >>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > >>> --- > >>> include/hw/remote/iommu.h | 40 +++++++++++++ > >>> hw/remote/iommu.c | 114 ++++++++++++++++++++++++++++++++++++++ > >>> hw/remote/machine.c | 13 ++++- > >>> MAINTAINERS | 2 + > >>> hw/remote/meson.build | 1 + > >>> 5 files changed, 169 insertions(+), 1 deletion(-) > >>> create mode 100644 include/hw/remote/iommu.h > >>> create mode 100644 hw/remote/iommu.c > >>> > >>> diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h > >>> new file mode 100644 > >>> index 0000000000..33b68a8f4b > >>> --- /dev/null > >>> +++ b/include/hw/remote/iommu.h > >>> @@ -0,0 +1,40 @@ > >>> +/** > >>> + * Copyright © 2022 Oracle and/or its affiliates. > >>> + * > >>> + * This work is licensed under the terms of the GNU GPL, version 2 or later. > >>> + * See the COPYING file in the top-level directory. > >>> + * > >>> + */ > >>> + > >>> +#ifndef REMOTE_IOMMU_H > >>> +#define REMOTE_IOMMU_H > >>> + > >>> +#include "hw/pci/pci_bus.h" > >>> +#include "hw/pci/pci.h" > >>> + > >>> +#ifndef INT2VOIDP > >>> +#define INT2VOIDP(i) (void *)(uintptr_t)(i) > >>> +#endif > >>> + > >>> +typedef struct RemoteIommuElem { > >>> + MemoryRegion *mr; > >>> + > >>> + AddressSpace as; > >>> +} RemoteIommuElem; > >>> + > >>> +#define TYPE_REMOTE_IOMMU "x-remote-iommu" > >>> +OBJECT_DECLARE_SIMPLE_TYPE(RemoteIommu, REMOTE_IOMMU) > >>> + > >>> +struct RemoteIommu { > >>> + Object parent; > >>> + > >>> + GHashTable *elem_by_devfn; > >>> + > >>> + QemuMutex lock; > >>> +}; > >>> + > >>> +void remote_iommu_setup(PCIBus *pci_bus); > >>> + > >>> +void remote_iommu_unplug_dev(PCIDevice *pci_dev); > >>> + > >>> +#endif > >>> diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c > >>> new file mode 100644 > >>> index 0000000000..16c6b0834e > >>> --- /dev/null > >>> +++ b/hw/remote/iommu.c > >>> @@ -0,0 +1,114 @@ > >>> +/** > >>> + * IOMMU for remote device > >>> + * > >>> + * Copyright © 2022 Oracle and/or its affiliates. > >>> + * > >>> + * This work is licensed under the terms of the GNU GPL, version 2 or later. > >>> + * See the COPYING file in the top-level directory. > >>> + * > >>> + */ > >>> + > >>> +#include "qemu/osdep.h" > >>> +#include "qemu-common.h" > >>> + > >>> +#include "hw/remote/iommu.h" > >>> +#include "hw/pci/pci_bus.h" > >>> +#include "hw/pci/pci.h" > >>> +#include "exec/memory.h" > >>> +#include "exec/address-spaces.h" > >>> +#include "trace.h" > >>> + > >>> +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus, > >>> + void *opaque, int devfn) > >>> +{ > >>> + RemoteIommu *iommu = opaque; > >>> + RemoteIommuElem *elem = NULL; > >>> + > >>> + qemu_mutex_lock(&iommu->lock); > >>> + > >>> + elem = g_hash_table_lookup(iommu->elem_by_devfn, INT2VOIDP(devfn)); > >>> + > >>> + if (!elem) { > >>> + elem = g_malloc0(sizeof(RemoteIommuElem)); > >>> + g_hash_table_insert(iommu->elem_by_devfn, INT2VOIDP(devfn), elem); > >>> + } > >>> + > >>> + if (!elem->mr) { > >>> + elem->mr = MEMORY_REGION(object_new(TYPE_MEMORY_REGION)); > >>> + memory_region_set_size(elem->mr, UINT64_MAX); > >>> + address_space_init(&elem->as, elem->mr, NULL); > >> > >> Hi, > >> > >> I’d like to add a note here. > >> > >> We tried to add "elem->mr” as a child of PCIDevice. That way, when PCIDevice is > >> unplugged, the child is also finalized. > > > > Do you mean via a memory_region_init()-family function where a parent > > object is given? Or do you mean by adding a QOM child property? > > I mean by adding “elem->mr” as a QOM child property of PCIDevice. > > > > >> However, there was some issue with hotplug. During the hotplug, there’s a window > >> during initialization where we couldn’t lookup the PCIDevice by “devfn”. > >> > >> do_pci_register_device() -> pci_init_bus_master() -> pci_device_iommu_address_space() > >> happens before do_pci_register_device() -> “bus->devices[devfn] = pci_dev”. As such, > >> pci_find_device() doesn’t work at this time. > > > > I don't follow. What calls pci_find_device()? > > To add the MemoryRegion as a child of PCIDevice, remote_iommu_find_add_as() > would need to lookup the PCIDevice using devfn. The function that looks up > PCIDevice by devfn is pci_find_device(). > > The above note explains why we didn’t lookup the PCIDevice using pci_find_device() > and then adding the MemoryRegion as its child. If I understand correctly you're saying it's not possible to use memory_region_init(&elem->mr, OBJECT(pci_dev), ...) inside remote_iommu_find_add_as() because there is no way to find the PCIDevice? It would be nice to automatically clean up the memory region but the AddressSpace needs to be destroyed too and there isn't an automatic way of doing that, so the approach in this patch is fine. Stefan
diff --git a/include/hw/remote/iommu.h b/include/hw/remote/iommu.h new file mode 100644 index 0000000000..33b68a8f4b --- /dev/null +++ b/include/hw/remote/iommu.h @@ -0,0 +1,40 @@ +/** + * Copyright © 2022 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef REMOTE_IOMMU_H +#define REMOTE_IOMMU_H + +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci.h" + +#ifndef INT2VOIDP +#define INT2VOIDP(i) (void *)(uintptr_t)(i) +#endif + +typedef struct RemoteIommuElem { + MemoryRegion *mr; + + AddressSpace as; +} RemoteIommuElem; + +#define TYPE_REMOTE_IOMMU "x-remote-iommu" +OBJECT_DECLARE_SIMPLE_TYPE(RemoteIommu, REMOTE_IOMMU) + +struct RemoteIommu { + Object parent; + + GHashTable *elem_by_devfn; + + QemuMutex lock; +}; + +void remote_iommu_setup(PCIBus *pci_bus); + +void remote_iommu_unplug_dev(PCIDevice *pci_dev); + +#endif diff --git a/hw/remote/iommu.c b/hw/remote/iommu.c new file mode 100644 index 0000000000..16c6b0834e --- /dev/null +++ b/hw/remote/iommu.c @@ -0,0 +1,114 @@ +/** + * IOMMU for remote device + * + * Copyright © 2022 Oracle and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" + +#include "hw/remote/iommu.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci.h" +#include "exec/memory.h" +#include "exec/address-spaces.h" +#include "trace.h" + +static AddressSpace *remote_iommu_find_add_as(PCIBus *pci_bus, + void *opaque, int devfn) +{ + RemoteIommu *iommu = opaque; + RemoteIommuElem *elem = NULL; + + qemu_mutex_lock(&iommu->lock); + + elem = g_hash_table_lookup(iommu->elem_by_devfn, INT2VOIDP(devfn)); + + if (!elem) { + elem = g_malloc0(sizeof(RemoteIommuElem)); + g_hash_table_insert(iommu->elem_by_devfn, INT2VOIDP(devfn), elem); + } + + if (!elem->mr) { + elem->mr = MEMORY_REGION(object_new(TYPE_MEMORY_REGION)); + memory_region_set_size(elem->mr, UINT64_MAX); + address_space_init(&elem->as, elem->mr, NULL); + } + + qemu_mutex_unlock(&iommu->lock); + + return &elem->as; +} + +void remote_iommu_unplug_dev(PCIDevice *pci_dev) +{ + AddressSpace *as = pci_device_iommu_address_space(pci_dev); + RemoteIommuElem *elem = NULL; + + if (as == &address_space_memory) { + return; + } + + elem = container_of(as, RemoteIommuElem, as); + + address_space_destroy(&elem->as); + + object_unref(elem->mr); + + elem->mr = NULL; +} + +static void remote_iommu_init(Object *obj) +{ + RemoteIommu *iommu = REMOTE_IOMMU(obj); + + iommu->elem_by_devfn = g_hash_table_new_full(NULL, NULL, NULL, g_free); + + qemu_mutex_init(&iommu->lock); +} + +static void remote_iommu_finalize(Object *obj) +{ + RemoteIommu *iommu = REMOTE_IOMMU(obj); + + qemu_mutex_destroy(&iommu->lock); + + if (iommu->elem_by_devfn) { + g_hash_table_destroy(iommu->elem_by_devfn); + iommu->elem_by_devfn = NULL; + } +} + +void remote_iommu_setup(PCIBus *pci_bus) +{ + RemoteIommu *iommu = NULL; + + g_assert(pci_bus); + + iommu = REMOTE_IOMMU(object_new(TYPE_REMOTE_IOMMU)); + + pci_setup_iommu(pci_bus, remote_iommu_find_add_as, iommu); + + object_property_add_child(OBJECT(pci_bus), "remote-iommu", OBJECT(iommu)); + + object_unref(OBJECT(iommu)); +} + +static const TypeInfo remote_iommu_info = { + .name = TYPE_REMOTE_IOMMU, + .parent = TYPE_OBJECT, + .instance_size = sizeof(RemoteIommu), + .instance_init = remote_iommu_init, + .instance_finalize = remote_iommu_finalize, +}; + +static void remote_iommu_register_types(void) +{ + type_register_static(&remote_iommu_info); +} + +type_init(remote_iommu_register_types) diff --git a/hw/remote/machine.c b/hw/remote/machine.c index ed91659794..cca5d25f50 100644 --- a/hw/remote/machine.c +++ b/hw/remote/machine.c @@ -21,6 +21,7 @@ #include "qapi/error.h" #include "hw/pci/pci_host.h" #include "hw/remote/iohub.h" +#include "hw/remote/iommu.h" #include "hw/qdev-core.h" static void remote_machine_init(MachineState *machine) @@ -100,6 +101,16 @@ static void remote_machine_instance_init(Object *obj) s->auto_shutdown = true; } +static void remote_machine_dev_unplug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + qdev_unrealize(dev); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + remote_iommu_unplug_dev(PCI_DEVICE(dev)); + } +} + static void remote_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); @@ -108,7 +119,7 @@ static void remote_machine_class_init(ObjectClass *oc, void *data) mc->init = remote_machine_init; mc->desc = "Experimental remote machine"; - hc->unplug = qdev_simple_device_unplug_cb; + hc->unplug = remote_machine_dev_unplug_cb; object_class_property_add_bool(oc, "vfio-user", remote_machine_get_vfio_user, diff --git a/MAINTAINERS b/MAINTAINERS index 37afdecc61..3e62ccab0a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3599,6 +3599,8 @@ F: hw/remote/iohub.c F: include/hw/remote/iohub.h F: subprojects/libvfio-user F: hw/remote/vfio-user-obj.c +F: hw/remote/iommu.c +F: include/hw/remote/iommu.h EBPF: M: Jason Wang <jasowang@redhat.com> diff --git a/hw/remote/meson.build b/hw/remote/meson.build index 534ac5df79..bcef83c8cc 100644 --- a/hw/remote/meson.build +++ b/hw/remote/meson.build @@ -6,6 +6,7 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('message.c')) remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c')) remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c')) remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iohub.c')) +remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('iommu.c')) remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: files('vfio-user-obj.c')) remote_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_true: vfiouser)