Message ID | 20240812130606.90410-9-edgar.iglesias@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | xen: pvh: Partial QOM:fication with new x86 PVH machine | expand |
On Mon, 12 Aug 2024, Edgar E. Iglesias wrote: > From: "Edgar E. Iglesias" <edgar.iglesias@amd.com> > > Add support for optionally creating a PCIe/GPEX controller. > > Signed-off-by: Edgar E. Iglesias <edgar.iglesias@amd.com> > --- > hw/xen/xen-pvh-common.c | 66 +++++++++++++++++++++++++++++++++ > include/hw/xen/xen-pvh-common.h | 10 ++++- > 2 files changed, 75 insertions(+), 1 deletion(-) > > diff --git a/hw/xen/xen-pvh-common.c b/hw/xen/xen-pvh-common.c > index 69a2dbdb6d..b1432e4bd9 100644 > --- a/hw/xen/xen-pvh-common.c > +++ b/hw/xen/xen-pvh-common.c > @@ -120,6 +120,59 @@ static void xen_enable_tpm(XenPVHCommonState *s) > } > #endif > > +static void xen_set_pci_intx_irq(void *opaque, int irq, int level) > +{ > + if (xen_set_pci_intx_level(xen_domid, 0, 0, 0, irq, level)) { Looking at the implementation of XEN_DMOP_set_pci_intx_level in xen/arch/x86/hvm/dm.c, it looks like the device parameter of xen_set_pci_intx_level is required? > + error_report("xendevicemodel_set_pci_intx_level failed"); > + } > +} > + > +static inline void xenpvh_gpex_init(XenPVHCommonState *s, > + MemoryRegion *sysmem, > + hwaddr ecam_base, hwaddr ecam_size, > + hwaddr mmio_base, hwaddr mmio_size, > + hwaddr mmio_high_base, > + hwaddr mmio_high_size, > + int intx_irq_base) > +{ > + MemoryRegion *ecam_reg; > + MemoryRegion *mmio_reg; > + DeviceState *dev; > + int i; > + > + object_initialize_child(OBJECT(s), "gpex", &s->pci.gpex, > + TYPE_GPEX_HOST); > + dev = DEVICE(&s->pci.gpex); > + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); > + > + ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0); > + memory_region_add_subregion(sysmem, ecam_base, ecam_reg); I notice we don't use ecam_size anywhere? Is that because the size is standard? > + mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1); > + > + if (mmio_size) { > + memory_region_init_alias(&s->pci.mmio_alias, OBJECT(dev), "pcie-mmio", > + mmio_reg, mmio_base, mmio_size); > + memory_region_add_subregion(sysmem, mmio_base, &s->pci.mmio_alias); > + } > + > + if (mmio_high_size) { > + memory_region_init_alias(&s->pci.mmio_high_alias, OBJECT(dev), > + "pcie-mmio-high", > + mmio_reg, mmio_high_base, mmio_high_size); > + memory_region_add_subregion(sysmem, mmio_high_base, > + &s->pci.mmio_high_alias); > + } > + > + for (i = 0; i < GPEX_NUM_IRQS; i++) { > + qemu_irq irq = qemu_allocate_irq(xen_set_pci_intx_irq, s, i); > + > + sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq); > + gpex_set_irq_num(GPEX_HOST(dev), i, intx_irq_base + i); > + xen_set_pci_link_route(i, intx_irq_base + i); xen_set_pci_link_route is not currently implemented on ARM? Looking at hw/i386/pc_piix.c:piix_intx_routing_notifier_xen it seems that the routing is much more complex over there. But looking at other machines that use GPEX such as hw/arm/virt.c it looks like the routing is straightforward the same way as in this patch. I thought that PCI interrupt pin swizzling was required, but maybe not ? It is totally fine if we do something different, simpler, than hw/i386/pc_piix.c:piix_intx_routing_notifier_xen. I just want to make sure that things remain consistent between ARM and x86, and also between Xen and QEMU view of virtual PCI interrupt routing. > + } > +} > + > static void xen_pvh_realize(DeviceState *dev, Error **errp) > { > XenPVHCommonState *s = XEN_PVH_COMMON(dev); > @@ -152,6 +205,14 @@ static void xen_pvh_realize(DeviceState *dev, Error **errp) > warn_report("tpm-base-addr is not provided. TPM will not be enabled"); > } > #endif > + > + if (s->cfg.ecam.size) { > + xenpvh_gpex_init(s, sysmem, > + s->cfg.ecam.base, s->cfg.ecam.size, > + s->cfg.mmio.base, s->cfg.mmio.size, > + s->cfg.mmio_high.base, s->cfg.mmio_high.size, > + s->cfg.pci_intx_irq_base); > + } > } > > #define DEFINE_PROP_MEMMAP(n, f) \ > @@ -165,10 +226,15 @@ static Property xen_pvh_properties[] = { > DEFINE_PROP_MEMMAP("ram-high", ram_high), > DEFINE_PROP_MEMMAP("virtio-mmio", virtio_mmio), > DEFINE_PROP_MEMMAP("tpm", tpm), > + DEFINE_PROP_MEMMAP("pci-ecam", ecam), > + DEFINE_PROP_MEMMAP("pci-mmio", mmio), > + DEFINE_PROP_MEMMAP("pci-mmio-high", mmio_high), > DEFINE_PROP_UINT32("virtio-mmio-num", XenPVHCommonState, > cfg.virtio_mmio_num, 0), > DEFINE_PROP_UINT32("virtio-mmio-irq-base", XenPVHCommonState, > cfg.virtio_mmio_irq_base, 0), > + DEFINE_PROP_UINT32("pci-intx-irq-base", XenPVHCommonState, > + cfg.pci_intx_irq_base, 0), > DEFINE_PROP_END_OF_LIST() > }; > > diff --git a/include/hw/xen/xen-pvh-common.h b/include/hw/xen/xen-pvh-common.h > index e958b441fd..faacfca70e 100644 > --- a/include/hw/xen/xen-pvh-common.h > +++ b/include/hw/xen/xen-pvh-common.h > @@ -29,17 +29,25 @@ typedef struct XenPVHCommonState { > MemoryRegion high; > } ram; > > + struct { > + GPEXHost gpex; > + MemoryRegion mmio_alias; > + MemoryRegion mmio_high_alias; > + } pci; > + > struct { > uint64_t ram_size; > uint32_t max_cpus; > uint32_t virtio_mmio_num; > uint32_t virtio_mmio_irq_base; > + uint32_t pci_intx_irq_base; > struct { > uint64_t base; > uint64_t size; > } ram_low, ram_high, > virtio_mmio, > - tpm; > + tpm, > + ecam, mmio, mmio_high; > } cfg; > } XenPVHCommonState; > #endif > -- > 2.43.0 >
On Mon, Aug 12, 2024 at 06:48:37PM -0700, Stefano Stabellini wrote: > On Mon, 12 Aug 2024, Edgar E. Iglesias wrote: > > From: "Edgar E. Iglesias" <edgar.iglesias@amd.com> > > > > Add support for optionally creating a PCIe/GPEX controller. > > > > Signed-off-by: Edgar E. Iglesias <edgar.iglesias@amd.com> > > --- > > hw/xen/xen-pvh-common.c | 66 +++++++++++++++++++++++++++++++++ > > include/hw/xen/xen-pvh-common.h | 10 ++++- > > 2 files changed, 75 insertions(+), 1 deletion(-) > > > > diff --git a/hw/xen/xen-pvh-common.c b/hw/xen/xen-pvh-common.c > > index 69a2dbdb6d..b1432e4bd9 100644 > > --- a/hw/xen/xen-pvh-common.c > > +++ b/hw/xen/xen-pvh-common.c > > @@ -120,6 +120,59 @@ static void xen_enable_tpm(XenPVHCommonState *s) > > } > > #endif > > > > +static void xen_set_pci_intx_irq(void *opaque, int irq, int level) > > +{ > > + if (xen_set_pci_intx_level(xen_domid, 0, 0, 0, irq, level)) { > > Looking at the implementation of XEN_DMOP_set_pci_intx_level in > xen/arch/x86/hvm/dm.c, it looks like the device parameter of > xen_set_pci_intx_level is required? Yes, by setting device = 0, we're bypassing the irq swizzling in Xen. I'll try to clarify below. > > > > + error_report("xendevicemodel_set_pci_intx_level failed"); > > + } > > +} > > + > > +static inline void xenpvh_gpex_init(XenPVHCommonState *s, > > + MemoryRegion *sysmem, > > + hwaddr ecam_base, hwaddr ecam_size, > > + hwaddr mmio_base, hwaddr mmio_size, > > + hwaddr mmio_high_base, > > + hwaddr mmio_high_size, > > + int intx_irq_base) > > +{ > > + MemoryRegion *ecam_reg; > > + MemoryRegion *mmio_reg; > > + DeviceState *dev; > > + int i; > > + > > + object_initialize_child(OBJECT(s), "gpex", &s->pci.gpex, > > + TYPE_GPEX_HOST); > > + dev = DEVICE(&s->pci.gpex); > > + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); > > + > > + ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0); > > + memory_region_add_subregion(sysmem, ecam_base, ecam_reg); > > I notice we don't use ecam_size anywhere? Is that because the size is > standard? Yes. we could remove the size property, having it slightly simplifies the prop setting code (keeping these memmap prop-pairs alike) but it's not a big deal. > > > > + mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1); > > + > > + if (mmio_size) { > > + memory_region_init_alias(&s->pci.mmio_alias, OBJECT(dev), "pcie-mmio", > > + mmio_reg, mmio_base, mmio_size); > > + memory_region_add_subregion(sysmem, mmio_base, &s->pci.mmio_alias); > > + } > > + > > + if (mmio_high_size) { > > + memory_region_init_alias(&s->pci.mmio_high_alias, OBJECT(dev), > > + "pcie-mmio-high", > > + mmio_reg, mmio_high_base, mmio_high_size); > > + memory_region_add_subregion(sysmem, mmio_high_base, > > + &s->pci.mmio_high_alias); > > + } > > + > > + for (i = 0; i < GPEX_NUM_IRQS; i++) { > > + qemu_irq irq = qemu_allocate_irq(xen_set_pci_intx_irq, s, i); > > + > > + sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq); > > + gpex_set_irq_num(GPEX_HOST(dev), i, intx_irq_base + i); > > + xen_set_pci_link_route(i, intx_irq_base + i); > > xen_set_pci_link_route is not currently implemented on ARM? > > Looking at hw/i386/pc_piix.c:piix_intx_routing_notifier_xen it seems > that the routing is much more complex over there. But looking at other > machines that use GPEX such as hw/arm/virt.c it looks like the routing > is straightforward the same way as in this patch. > > I thought that PCI interrupt pin swizzling was required, but maybe not ? > > It is totally fine if we do something different, simpler, than > hw/i386/pc_piix.c:piix_intx_routing_notifier_xen. I just want to make > sure that things remain consistent between ARM and x86, and also between > Xen and QEMU view of virtual PCI interrupt routing. > Good questions. The following is the way I understand things but I may ofcourse be wrong. Yes, we're doing things differently than hw/i386/pc_piix.c mainly because we're using the GPEX PCIe host bridge with it's internal standard swizzling down to 4 INTX interrupts. Similar to microvm and the ARM virt machine. The swizzling for the GPEX is done inside the GPEX model and it's described by xl in the ACPI tables for PVH guests. We don't want Xen to do any additional swizzling in xen_set_pci_intx_level(), hence device=0. I haven't plumbed the GPEX connectinos for ARM yet but I think we could simply call xendevicemodel_set_irq_level() and not use the pci_intx calls that aren't implement (we wouldn't need them). For x86/pvh, I wonder if we should be using xen_set_pci_intx_level() / xen_set_pci_link_route() or some other API? since we're basically bypassing things? In one of the first implementations we used set_isa_irq_level() but that call only reaches into irqs < 16 so it messed things up. Does any one have any better ideas or suggestions? Cheers, Edgar
On Wed, 14 Aug 2024, Edgar E. Iglesias wrote: > On Mon, Aug 12, 2024 at 06:48:37PM -0700, Stefano Stabellini wrote: > > On Mon, 12 Aug 2024, Edgar E. Iglesias wrote: > > > From: "Edgar E. Iglesias" <edgar.iglesias@amd.com> > > > > > > Add support for optionally creating a PCIe/GPEX controller. > > > > > > Signed-off-by: Edgar E. Iglesias <edgar.iglesias@amd.com> > > > --- > > > hw/xen/xen-pvh-common.c | 66 +++++++++++++++++++++++++++++++++ > > > include/hw/xen/xen-pvh-common.h | 10 ++++- > > > 2 files changed, 75 insertions(+), 1 deletion(-) > > > > > > diff --git a/hw/xen/xen-pvh-common.c b/hw/xen/xen-pvh-common.c > > > index 69a2dbdb6d..b1432e4bd9 100644 > > > --- a/hw/xen/xen-pvh-common.c > > > +++ b/hw/xen/xen-pvh-common.c > > > @@ -120,6 +120,59 @@ static void xen_enable_tpm(XenPVHCommonState *s) > > > } > > > #endif > > > > > > +static void xen_set_pci_intx_irq(void *opaque, int irq, int level) > > > +{ > > > + if (xen_set_pci_intx_level(xen_domid, 0, 0, 0, irq, level)) { > > > > Looking at the implementation of XEN_DMOP_set_pci_intx_level in > > xen/arch/x86/hvm/dm.c, it looks like the device parameter of > > xen_set_pci_intx_level is required? > > Yes, by setting device = 0, we're bypassing the irq swizzling in Xen. > I'll try to clarify below. > > > > > > > > > + error_report("xendevicemodel_set_pci_intx_level failed"); > > > + } > > > +} > > > + > > > +static inline void xenpvh_gpex_init(XenPVHCommonState *s, > > > + MemoryRegion *sysmem, > > > + hwaddr ecam_base, hwaddr ecam_size, > > > + hwaddr mmio_base, hwaddr mmio_size, > > > + hwaddr mmio_high_base, > > > + hwaddr mmio_high_size, > > > + int intx_irq_base) > > > +{ > > > + MemoryRegion *ecam_reg; > > > + MemoryRegion *mmio_reg; > > > + DeviceState *dev; > > > + int i; > > > + > > > + object_initialize_child(OBJECT(s), "gpex", &s->pci.gpex, > > > + TYPE_GPEX_HOST); > > > + dev = DEVICE(&s->pci.gpex); > > > + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); > > > + > > > + ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0); > > > + memory_region_add_subregion(sysmem, ecam_base, ecam_reg); > > > > I notice we don't use ecam_size anywhere? Is that because the size is > > standard? > > Yes. we could remove the size property, having it slightly simplifies the > prop setting code (keeping these memmap prop-pairs alike) but it's not a big deal. Not a big deal either way, up to you > > > + mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1); > > > + > > > + if (mmio_size) { > > > + memory_region_init_alias(&s->pci.mmio_alias, OBJECT(dev), "pcie-mmio", > > > + mmio_reg, mmio_base, mmio_size); > > > + memory_region_add_subregion(sysmem, mmio_base, &s->pci.mmio_alias); > > > + } > > > + > > > + if (mmio_high_size) { > > > + memory_region_init_alias(&s->pci.mmio_high_alias, OBJECT(dev), > > > + "pcie-mmio-high", > > > + mmio_reg, mmio_high_base, mmio_high_size); > > > + memory_region_add_subregion(sysmem, mmio_high_base, > > > + &s->pci.mmio_high_alias); > > > + } > > > + > > > + for (i = 0; i < GPEX_NUM_IRQS; i++) { > > > + qemu_irq irq = qemu_allocate_irq(xen_set_pci_intx_irq, s, i); > > > + > > > + sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq); > > > + gpex_set_irq_num(GPEX_HOST(dev), i, intx_irq_base + i); > > > + xen_set_pci_link_route(i, intx_irq_base + i); > > > > xen_set_pci_link_route is not currently implemented on ARM? > > > > Looking at hw/i386/pc_piix.c:piix_intx_routing_notifier_xen it seems > > that the routing is much more complex over there. But looking at other > > machines that use GPEX such as hw/arm/virt.c it looks like the routing > > is straightforward the same way as in this patch. > > > > I thought that PCI interrupt pin swizzling was required, but maybe not ? > > > > It is totally fine if we do something different, simpler, than > > hw/i386/pc_piix.c:piix_intx_routing_notifier_xen. I just want to make > > sure that things remain consistent between ARM and x86, and also between > > Xen and QEMU view of virtual PCI interrupt routing. > > > > Good questions. The following is the way I understand things but I may > ofcourse be wrong. > > Yes, we're doing things differently than hw/i386/pc_piix.c mainly > because we're using the GPEX PCIe host bridge with it's internal > standard swizzling down to 4 INTX interrupts. Similar to microvm and > the ARM virt machine. > > The swizzling for the GPEX is done inside the GPEX model and it's > described by xl in the ACPI tables for PVH guests. We don't want > Xen to do any additional swizzling in xen_set_pci_intx_level(), hence > device=0. OK > I haven't plumbed the GPEX connectinos for ARM yet but I think we could > simply call xendevicemodel_set_irq_level() and not use the pci_intx > calls that aren't implement (we wouldn't need them). > > For x86/pvh, I wonder if we should be using xen_set_pci_intx_level() / > xen_set_pci_link_route() or some other API? since we're basically > bypassing things? > In one of the first implementations we used set_isa_irq_level() but > that call only reaches into irqs < 16 so it messed things up. > > Does any one have any better ideas or suggestions? I think QEMU is free to call or not call any API at setup time. Given that the PVH interrupt controller is emulated by Xen, the important thing is that when QEMU raises an interrupt or an MSI with xen_set_isa_irq_level, xen_inject_msi and xen_set_pci_intx_level, Xen injects it into the guest as expected and the guest receives it appropriately. To oversimplify things, I was worried that QEMU tries to inject INTA but the guest receives INTD instead. Or QEMU tries to raise a level interrupt and Xen injects an edge interrupt instead. Also I think we should try to do things the same way between the PVH machine on ARM and X86. But we can (should?) do things differently from hw/i386/pc_piix.c.
diff --git a/hw/xen/xen-pvh-common.c b/hw/xen/xen-pvh-common.c index 69a2dbdb6d..b1432e4bd9 100644 --- a/hw/xen/xen-pvh-common.c +++ b/hw/xen/xen-pvh-common.c @@ -120,6 +120,59 @@ static void xen_enable_tpm(XenPVHCommonState *s) } #endif +static void xen_set_pci_intx_irq(void *opaque, int irq, int level) +{ + if (xen_set_pci_intx_level(xen_domid, 0, 0, 0, irq, level)) { + error_report("xendevicemodel_set_pci_intx_level failed"); + } +} + +static inline void xenpvh_gpex_init(XenPVHCommonState *s, + MemoryRegion *sysmem, + hwaddr ecam_base, hwaddr ecam_size, + hwaddr mmio_base, hwaddr mmio_size, + hwaddr mmio_high_base, + hwaddr mmio_high_size, + int intx_irq_base) +{ + MemoryRegion *ecam_reg; + MemoryRegion *mmio_reg; + DeviceState *dev; + int i; + + object_initialize_child(OBJECT(s), "gpex", &s->pci.gpex, + TYPE_GPEX_HOST); + dev = DEVICE(&s->pci.gpex); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + ecam_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0); + memory_region_add_subregion(sysmem, ecam_base, ecam_reg); + + mmio_reg = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 1); + + if (mmio_size) { + memory_region_init_alias(&s->pci.mmio_alias, OBJECT(dev), "pcie-mmio", + mmio_reg, mmio_base, mmio_size); + memory_region_add_subregion(sysmem, mmio_base, &s->pci.mmio_alias); + } + + if (mmio_high_size) { + memory_region_init_alias(&s->pci.mmio_high_alias, OBJECT(dev), + "pcie-mmio-high", + mmio_reg, mmio_high_base, mmio_high_size); + memory_region_add_subregion(sysmem, mmio_high_base, + &s->pci.mmio_high_alias); + } + + for (i = 0; i < GPEX_NUM_IRQS; i++) { + qemu_irq irq = qemu_allocate_irq(xen_set_pci_intx_irq, s, i); + + sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, irq); + gpex_set_irq_num(GPEX_HOST(dev), i, intx_irq_base + i); + xen_set_pci_link_route(i, intx_irq_base + i); + } +} + static void xen_pvh_realize(DeviceState *dev, Error **errp) { XenPVHCommonState *s = XEN_PVH_COMMON(dev); @@ -152,6 +205,14 @@ static void xen_pvh_realize(DeviceState *dev, Error **errp) warn_report("tpm-base-addr is not provided. TPM will not be enabled"); } #endif + + if (s->cfg.ecam.size) { + xenpvh_gpex_init(s, sysmem, + s->cfg.ecam.base, s->cfg.ecam.size, + s->cfg.mmio.base, s->cfg.mmio.size, + s->cfg.mmio_high.base, s->cfg.mmio_high.size, + s->cfg.pci_intx_irq_base); + } } #define DEFINE_PROP_MEMMAP(n, f) \ @@ -165,10 +226,15 @@ static Property xen_pvh_properties[] = { DEFINE_PROP_MEMMAP("ram-high", ram_high), DEFINE_PROP_MEMMAP("virtio-mmio", virtio_mmio), DEFINE_PROP_MEMMAP("tpm", tpm), + DEFINE_PROP_MEMMAP("pci-ecam", ecam), + DEFINE_PROP_MEMMAP("pci-mmio", mmio), + DEFINE_PROP_MEMMAP("pci-mmio-high", mmio_high), DEFINE_PROP_UINT32("virtio-mmio-num", XenPVHCommonState, cfg.virtio_mmio_num, 0), DEFINE_PROP_UINT32("virtio-mmio-irq-base", XenPVHCommonState, cfg.virtio_mmio_irq_base, 0), + DEFINE_PROP_UINT32("pci-intx-irq-base", XenPVHCommonState, + cfg.pci_intx_irq_base, 0), DEFINE_PROP_END_OF_LIST() }; diff --git a/include/hw/xen/xen-pvh-common.h b/include/hw/xen/xen-pvh-common.h index e958b441fd..faacfca70e 100644 --- a/include/hw/xen/xen-pvh-common.h +++ b/include/hw/xen/xen-pvh-common.h @@ -29,17 +29,25 @@ typedef struct XenPVHCommonState { MemoryRegion high; } ram; + struct { + GPEXHost gpex; + MemoryRegion mmio_alias; + MemoryRegion mmio_high_alias; + } pci; + struct { uint64_t ram_size; uint32_t max_cpus; uint32_t virtio_mmio_num; uint32_t virtio_mmio_irq_base; + uint32_t pci_intx_irq_base; struct { uint64_t base; uint64_t size; } ram_low, ram_high, virtio_mmio, - tpm; + tpm, + ecam, mmio, mmio_high; } cfg; } XenPVHCommonState; #endif