diff mbox series

[v11,1/2] hw/arm/virt: Basic CXL enablement on pci_expander_bridge instances pxb-cxl

Message ID 20220616141950.23374-2-Jonathan.Cameron@huawei.com
State Handled Elsewhere
Headers show
Series arm/virt: CXL support via pxb_cxl | expand

Commit Message

Jonathan Cameron June 16, 2022, 2:19 p.m. UTC
Code based on i386/pc enablement.
The memory layout places space for 16 host bridge register regions after
the GIC_REDIST2 in the extended memmap.
The CFMWs are placed above the extended memmap.

Only create the CEDT table if cxl=on set for the machine.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

---
 hw/arm/virt-acpi-build.c | 34 +++++++++++++++++++++++++++++++
 hw/arm/virt.c            | 44 ++++++++++++++++++++++++++++++++++++++++
 include/hw/arm/virt.h    |  3 +++
 3 files changed, 81 insertions(+)

Comments

Peter Maydell June 24, 2022, 10:48 a.m. UTC | #1
On Thu, 16 Jun 2022 at 15:20, Jonathan Cameron
<Jonathan.Cameron@huawei.com> wrote:
>
> Code based on i386/pc enablement.
> The memory layout places space for 16 host bridge register regions after
> the GIC_REDIST2 in the extended memmap.
> The CFMWs are placed above the extended memmap.
>
> Only create the CEDT table if cxl=on set for the machine.
>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

This seems to be missing code to advertise the new devices in the
device tree.

Somebody else had better review the ACPI changes :-)

>  #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
> @@ -178,6 +181,7 @@ static const MemMapEntry base_memmap[] = {
>  static MemMapEntry extended_memmap[] = {
>      /* Additional 64 MB redist region (can contain up to 512 redistributors) */
>      [VIRT_HIGH_GIC_REDIST2] =   { 0x0, 64 * MiB },
> +    [VIRT_CXL_HOST] =           { 0x0, 64 * KiB * 16 }, /* 16 UID */

Does this reshuffle the memory layout so that all the stuff after it
moves up ? If so, that's an incompatible change and would need
versioning somehow.

More generally, should this new addition to the machine be
versioned? What did we do for the pc machine types?

>      [VIRT_HIGH_PCIE_ECAM] =     { 0x0, 256 * MiB },
>      /* Second PCIe window */
>      [VIRT_HIGH_PCIE_MMIO] =     { 0x0, 512 * GiB },
> @@ -1525,6 +1529,16 @@ static void create_pcie(VirtMachineState *vms)
>      }
>  }
>
> +static void create_cxl_host_reg_region(VirtMachineState *vms)
> +{
> +    MemoryRegion *sysmem = get_system_memory();
> +    MemoryRegion *mr = &vms->cxl_devices_state.host_mr;
> +
> +    memory_region_init(mr, OBJECT(vms), "cxl_host_reg",
> +                       vms->memmap[VIRT_CXL_HOST].size);
> +    memory_region_add_subregion(sysmem, vms->memmap[VIRT_CXL_HOST].base, mr);
> +}

> @@ -1779,6 +1799,20 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
>          memory_region_init(&ms->device_memory->mr, OBJECT(vms),
>                             "device-memory", device_memory_size);
>      }
> +
> +    if (vms->cxl_devices_state.fixed_windows) {
> +        GList *it;
> +
> +        base = ROUND_UP(base, 256 * MiB);
> +        for (it = vms->cxl_devices_state.fixed_windows; it; it = it->next) {
> +            CXLFixedWindow *fw = it->data;
> +
> +            fw->base = base;
> +            memory_region_init_io(&fw->mr, OBJECT(vms), &cfmws_ops, fw,
> +                                  "cxl-fixed-memory-region", fw->size);

Why is board model code having to init memory regions for this
device? Shouldn't the device be creating the memory regions itself
and then exposing them for the board code to map them ?

> +            base += fw->size;
> +        }
> +    }
>  }
>
>  /*
> @@ -2215,6 +2249,14 @@ static void machvirt_init(MachineState *machine)
>          memory_region_add_subregion(sysmem, machine->device_memory->base,
>                                      &machine->device_memory->mr);
>      }
> +    if (vms->cxl_devices_state.fixed_windows) {
> +        GList *it;
> +        for (it = vms->cxl_devices_state.fixed_windows; it; it = it->next) {
> +            CXLFixedWindow *fw = it->data;
> +
> +            memory_region_add_subregion(sysmem, fw->base, &fw->mr);
> +        }
> +    }
>
>      virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem);
>
> @@ -2241,6 +2283,7 @@ static void machvirt_init(MachineState *machine)
>      create_rtc(vms);
>
>      create_pcie(vms);
> +    create_cxl_host_reg_region(vms);
>
>      if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) {
>          vms->acpi_dev = create_acpi_ged(vms);
> @@ -3070,6 +3113,7 @@ static void virt_instance_init(Object *obj)
>
>      vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
>      vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
> +    cxl_machine_init(obj, &vms->cxl_devices_state);
>  }
>
>  static const TypeInfo virt_machine_info = {
> diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
> index 15feabac63..403c9107e5 100644
> --- a/include/hw/arm/virt.h
> +++ b/include/hw/arm/virt.h
> @@ -35,6 +35,7 @@
>  #include "hw/boards.h"
>  #include "hw/arm/boot.h"
>  #include "hw/block/flash.h"
> +#include "hw/cxl/cxl.h"
>  #include "sysemu/kvm.h"
>  #include "hw/intc/arm_gicv3_common.h"
>  #include "qom/object.h"
> @@ -92,6 +93,7 @@ enum {
>  /* indices of IO regions located after the RAM */
>  enum {
>      VIRT_HIGH_GIC_REDIST2 =  VIRT_LOWMEMMAP_LAST,
> +    VIRT_CXL_HOST,
>      VIRT_HIGH_PCIE_ECAM,
>      VIRT_HIGH_PCIE_MMIO,
>  };
> @@ -176,6 +178,7 @@ struct VirtMachineState {
>      PCIBus *bus;
>      char *oem_id;
>      char *oem_table_id;
> +    CXLState cxl_devices_state;
>  };

I just looked at the CXLState struct. Why isn't this a device object ?

thanks
-- PMM
Jonathan Cameron June 24, 2022, 12:39 p.m. UTC | #2
On Fri, 24 Jun 2022 11:48:47 +0100
Peter Maydell <peter.maydell@linaro.org> wrote:

> On Thu, 16 Jun 2022 at 15:20, Jonathan Cameron
> <Jonathan.Cameron@huawei.com> wrote:
> >
> > Code based on i386/pc enablement.
> > The memory layout places space for 16 host bridge register regions after
> > the GIC_REDIST2 in the extended memmap.
> > The CFMWs are placed above the extended memmap.
> >
> > Only create the CEDT table if cxl=on set for the machine.
> >
> > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>  

Hi Peter,

Thanks for the review,

> 
> This seems to be missing code to advertise the new devices in the
> device tree.

Intentionally. I am not aware of any current interest
in defining DT support CXL or supporting it in operating systems.
Easy enough to add if anyone does the leg work to figure out the
bindings, but that needs to come from someone who cares and
would need to be driven by OS support and a usecase. The ACPI
stuff here is defined as part of the main CXL spec because the
target class of machines simply doesn't generally use DT.

> 
> Somebody else had better review the ACPI changes :-)
> 
> >  #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
> > @@ -178,6 +181,7 @@ static const MemMapEntry base_memmap[] = {
> >  static MemMapEntry extended_memmap[] = {
> >      /* Additional 64 MB redist region (can contain up to 512 redistributors) */
> >      [VIRT_HIGH_GIC_REDIST2] =   { 0x0, 64 * MiB },
> > +    [VIRT_CXL_HOST] =           { 0x0, 64 * KiB * 16 }, /* 16 UID */  
> 
> Does this reshuffle the memory layout so that all the stuff after it
> moves up ? If so, that's an incompatible change and would need
> versioning somehow.

I slotted it into a hole in the existing memory map so unless I
have a bug, it shouldn't move any pre existing memory windows.
The comment just above extended_mmap states that the floating area
starts at 256GiB and the entries after that will be naturally aligned.

Hence existing map will be
256GiB Start of redist2
256GiB + 64MiB Start of gap -  we slot in a 16 * 64KiB region at start of this gap.
256GiB + 256MiB Start of PCIE_ECAM
256GiB + 512MiB Start of gap 2
512GiB Start of PCIE_MMIO

> 
> More generally, should this new addition to the machine be
> versioned? What did we do for the pc machine types?

So far not versioned and concern wasn't raised. I didn't think it was an
issue because there should be no effect on older machines where cxl=on
can't be set. Given the user has to explicitly request more hardware
for anything to change, I don't see a need for versioning (yet).

Not sure it's relevant but note the emulation here is not suitable for
anything other than testing code against (it's very slow and provides
nothing you'd actually want in a VM) so migrating a VM with this support
seems unlikely to ever happen.

> 
> >      [VIRT_HIGH_PCIE_ECAM] =     { 0x0, 256 * MiB },
> >      /* Second PCIe window */
> >      [VIRT_HIGH_PCIE_MMIO] =     { 0x0, 512 * GiB },
> > @@ -1525,6 +1529,16 @@ static void create_pcie(VirtMachineState *vms)
> >      }
> >  }
> >
> > +static void create_cxl_host_reg_region(VirtMachineState *vms)
> > +{
> > +    MemoryRegion *sysmem = get_system_memory();
> > +    MemoryRegion *mr = &vms->cxl_devices_state.host_mr;
> > +
> > +    memory_region_init(mr, OBJECT(vms), "cxl_host_reg",
> > +                       vms->memmap[VIRT_CXL_HOST].size);
> > +    memory_region_add_subregion(sysmem, vms->memmap[VIRT_CXL_HOST].base, mr);
> > +}  
> 
> > @@ -1779,6 +1799,20 @@ static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
> >          memory_region_init(&ms->device_memory->mr, OBJECT(vms),
> >                             "device-memory", device_memory_size);
> >      }
> > +
> > +    if (vms->cxl_devices_state.fixed_windows) {
> > +        GList *it;
> > +
> > +        base = ROUND_UP(base, 256 * MiB);
> > +        for (it = vms->cxl_devices_state.fixed_windows; it; it = it->next) {
> > +            CXLFixedWindow *fw = it->data;
> > +
> > +            fw->base = base;
> > +            memory_region_init_io(&fw->mr, OBJECT(vms), &cfmws_ops, fw,
> > +                                  "cxl-fixed-memory-region", fw->size);  
> 
> Why is board model code having to init memory regions for this
> device? Shouldn't the device be creating the memory regions itself
> and then exposing them for the board code to map them ?

From a device point of view CFMWs are weird. They are is a region of Host PA
space that has fixed routing to devices (CXL host bridges). On a real system
they are a characteristic of the interconnect routing and if programmable it
is done well before any standard software (so might as well be hard coded).
Making them a device would be a little bit like saying the base_memmap should be
a device - all they are doing is saying this address here should route to this
particular device. The flexibility here is to allow testing of different setups
without having to know the actual PA range which is safe to use.

They are very like the device memory or secure memory initialization.
The ops are there to do the interleave decoding. Interleave decoding
can't sensibly use a tree of memory windows because there would need to
be an infeasible number of them hence the memops work out the routing at
access time. If we ever optimize/specialize the non interleaved direct
connected CXL device case, or implement more general memory interleaving
support, it will just be a memory_region_init_ram() call like any other ram.

This went through various approaches but in the end the sequence of what we
need to set up for CXL fixed memory windows made representing them as a device
hard so we didn't. Perhaps it could be done, but the device part would
be very slim and we'd have a bunch of code having to search for the devices
to stitch together all the information the need once it's available as
the rest of the system is built (e.g. after the host bridges are created).

> 
> > +            base += fw->size;
> > +        }
> > +    }
> >  }
> >
> >  /*
> > @@ -2215,6 +2249,14 @@ static void machvirt_init(MachineState *machine)
> >          memory_region_add_subregion(sysmem, machine->device_memory->base,
> >                                      &machine->device_memory->mr);
> >      }
> > +    if (vms->cxl_devices_state.fixed_windows) {
> > +        GList *it;
> > +        for (it = vms->cxl_devices_state.fixed_windows; it; it = it->next) {
> > +            CXLFixedWindow *fw = it->data;
> > +
> > +            memory_region_add_subregion(sysmem, fw->base, &fw->mr);
> > +        }
> > +    }
> >
> >      virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem);
> >
> > @@ -2241,6 +2283,7 @@ static void machvirt_init(MachineState *machine)
> >      create_rtc(vms);
> >
> >      create_pcie(vms);
> > +    create_cxl_host_reg_region(vms);
> >
> >      if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) {
> >          vms->acpi_dev = create_acpi_ged(vms);
> > @@ -3070,6 +3113,7 @@ static void virt_instance_init(Object *obj)
> >
> >      vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
> >      vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
> > +    cxl_machine_init(obj, &vms->cxl_devices_state);
> >  }
> >
> >  static const TypeInfo virt_machine_info = {
> > diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
> > index 15feabac63..403c9107e5 100644
> > --- a/include/hw/arm/virt.h
> > +++ b/include/hw/arm/virt.h
> > @@ -35,6 +35,7 @@
> >  #include "hw/boards.h"
> >  #include "hw/arm/boot.h"
> >  #include "hw/block/flash.h"
> > +#include "hw/cxl/cxl.h"
> >  #include "sysemu/kvm.h"
> >  #include "hw/intc/arm_gicv3_common.h"
> >  #include "qom/object.h"
> > @@ -92,6 +93,7 @@ enum {
> >  /* indices of IO regions located after the RAM */
> >  enum {
> >      VIRT_HIGH_GIC_REDIST2 =  VIRT_LOWMEMMAP_LAST,
> > +    VIRT_CXL_HOST,
> >      VIRT_HIGH_PCIE_ECAM,
> >      VIRT_HIGH_PCIE_MMIO,
> >  };
> > @@ -176,6 +178,7 @@ struct VirtMachineState {
> >      PCIBus *bus;
> >      char *oem_id;
> >      char *oem_table_id;
> > +    CXLState cxl_devices_state;
> >  };  
> 
> I just looked at the CXLState struct. Why isn't this a device object ?

It can't be a pluggable device (e.g. added by -device ...)
because we need to know about it early enough to set up the underlying
PA regions (I tried that a long time back, and couldn't find a way to
make it work).

Possibly we could have machine parameters as today to specify
everything necessary to create a set of fixed memory window associated
devices. I'm not sure what the benefit would be however. When I originally
look at this it seemed much closer to NumaState, NVDimmstate etc and
that's what it was modeled on (those are in MachineState but Paolo asked
we move CXLState down into the machine specific state structures because it
wasn't relevant to the majority of boards).

I guess the simple answer to this is that as it's not a 'device' in the
sense we would normally expect to be called device on a physical system
it never occurred to me to consider making it one.  If there are
benefits to doing so (and we can avoid breaking x86 support, then sure
I can look at doing so).

struct CXLState {
	/*
	 * Used to gate creation of ACPI tables etc. You could use presence
	 * or absence of a device but boolean seems much simpler and similar
	 * too all the bools in Virt MachineState.
	 */
	bool is_enabled; 
	
	/*
	 * At time of PA init we don't know what device are present
	 * so need a trivial allocator that can be used later to
	 * poke the addresses at the devices that have been created
	 * via -device 
	 * Could make it a device, but seems overkill and would complicate
	 * the acpi table building by making us do multiple levels of
	 * address decoding to get to the actual PA of unrelated devices.
	 * This could be avoided if we hard coded the PXBs in the machine
	 * file, but that's a non starter because of how they work today.
	 */
	MemoryRegion host_mr;
   	MemoryRegion next_mr_index;

	/* State for the parameter parser of machine parameters.
	 * CXLState was in MachineState where there are lots of
	 * similar items (NumaState etc) but Paolo asked we move it into
	 * the state of individual machines that we have added
	 * CXL support to.
	 * The split of PA map building and setup of devices means
	 * we need to read this multiple times.
	 */
	GList *fixed_windows;
	CXLFixedMemoryWindowOptionsList *cfmw_list;
};

I just noticed we have a left over cfmw_list in MachineState that
should have gone in the rework set that moved this stuff into the
individual board states.  I'll send a patch to clear that stray
pointer out separately.

Thanks,

Jonathan
> 
> thanks
> -- PMM
Peter Maydell June 24, 2022, 12:56 p.m. UTC | #3
On Fri, 24 Jun 2022 at 13:39, Jonathan Cameron
<Jonathan.Cameron@huawei.com> wrote:
>
> On Fri, 24 Jun 2022 11:48:47 +0100
> Peter Maydell <peter.maydell@linaro.org> wrote:
> >
> > This seems to be missing code to advertise the new devices in the
> > device tree.
>
> Intentionally. I am not aware of any current interest
> in defining DT support CXL or supporting it in operating systems.
> Easy enough to add if anyone does the leg work to figure out the
> bindings, but that needs to come from someone who cares and
> would need to be driven by OS support and a usecase. The ACPI
> stuff here is defined as part of the main CXL spec because the
> target class of machines simply doesn't generally use DT.

I don't really want new devices in the virt board that aren't
usable in the common use case of "just pass a kernel with -kernel"...

-- PMM
Jonathan Cameron June 24, 2022, 2:08 p.m. UTC | #4
On Fri, 24 Jun 2022 13:56:32 +0100
Peter Maydell <peter.maydell@linaro.org> wrote:

> On Fri, 24 Jun 2022 at 13:39, Jonathan Cameron
> <Jonathan.Cameron@huawei.com> wrote:
> >
> > On Fri, 24 Jun 2022 11:48:47 +0100
> > Peter Maydell <peter.maydell@linaro.org> wrote:  
> > >
> > > This seems to be missing code to advertise the new devices in the
> > > device tree.  
> >
> > Intentionally. I am not aware of any current interest
> > in defining DT support CXL or supporting it in operating systems.
> > Easy enough to add if anyone does the leg work to figure out the
> > bindings, but that needs to come from someone who cares and
> > would need to be driven by OS support and a usecase. The ACPI
> > stuff here is defined as part of the main CXL spec because the
> > target class of machines simply doesn't generally use DT.  
> 
> I don't really want new devices in the virt board that aren't
> usable in the common use case of "just pass a kernel with -kernel"...
> 
> -- PMM

Ok.  In that case, what do you suggest?  

Options I can think of:

1) I can come up with plausible DT bindings, but I'm not sure how
that will be received by the kernel community, It will add a bunch of
infrastructure to maintain that may be seen as useless at least in
the short to medium term. Hence is not in anyone's test matrices etc.

Dan, how open would you be to adding DT support? We'd have to ignore
some of the firmware query stuff like QTGs as there isn't an equivalent
in DT - or we'd have to go as far as defining actual devices with
mailboxes to query that info.

2) Add it to something like the SBSA machine, but that brings a large
burden in firmware etc and need to communicate everything via DT to
EDK2 that is needed to build the ACPI tables in a flexible fashion +
mass of EDK2 development.  Whilst the SBSA model is great for ARM
specific stuff, requiring the large additional complexity in
actually using it to test arch independent software is probably
going to just mean it bit rots.

3) Fork virt (obviously sharing as much as possible), potentially I
guess that could be pretty light weight by declaring a new
TypeInfo that is very nearly identical to virt with just the few extra
calls inserted. 

Any other routes open to me to getting this support available?

Jonathan
Jonathan Cameron June 24, 2022, 2:54 p.m. UTC | #5
On Fri, 24 Jun 2022 15:08:44 +0100
Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote:

> On Fri, 24 Jun 2022 13:56:32 +0100
> Peter Maydell <peter.maydell@linaro.org> wrote:
> 
> > On Fri, 24 Jun 2022 at 13:39, Jonathan Cameron
> > <Jonathan.Cameron@huawei.com> wrote:  
> > >
> > > On Fri, 24 Jun 2022 11:48:47 +0100
> > > Peter Maydell <peter.maydell@linaro.org> wrote:    
> > > >
> > > > This seems to be missing code to advertise the new devices in the
> > > > device tree.    
> > >
> > > Intentionally. I am not aware of any current interest
> > > in defining DT support CXL or supporting it in operating systems.
> > > Easy enough to add if anyone does the leg work to figure out the
> > > bindings, but that needs to come from someone who cares and
> > > would need to be driven by OS support and a usecase. The ACPI
> > > stuff here is defined as part of the main CXL spec because the
> > > target class of machines simply doesn't generally use DT.    
> > 
> > I don't really want new devices in the virt board that aren't
> > usable in the common use case of "just pass a kernel with -kernel"...
> > 
> > -- PMM  
> 
> Ok.  In that case, what do you suggest?  
> 
> Options I can think of:
> 
> 1) I can come up with plausible DT bindings, but I'm not sure how
> that will be received by the kernel community, It will add a bunch of
> infrastructure to maintain that may be seen as useless at least in
> the short to medium term. Hence is not in anyone's test matrices etc.

Just occurred to me there is another barrier to an approach that adds
DT bindings.
I fairly sure hw/pci-bridge/pci_expander_bridge.c (PXB)
only works on ACPI platforms and is the only host bridge supported
for CXL emulation in QEMU.

> 
> Dan, how open would you be to adding DT support? We'd have to ignore
> some of the firmware query stuff like QTGs as there isn't an equivalent
> in DT - or we'd have to go as far as defining actual devices with
> mailboxes to query that info.
> 
> 2) Add it to something like the SBSA machine, but that brings a large
> burden in firmware etc and need to communicate everything via DT to
> EDK2 that is needed to build the ACPI tables in a flexible fashion +
> mass of EDK2 development.  Whilst the SBSA model is great for ARM
> specific stuff, requiring the large additional complexity in
> actually using it to test arch independent software is probably
> going to just mean it bit rots.
> 
> 3) Fork virt (obviously sharing as much as possible), potentially I
> guess that could be pretty light weight by declaring a new
> TypeInfo that is very nearly identical to virt with just the few extra
> calls inserted. 
> 
> Any other routes open to me to getting this support available?
> 
> Jonathan
> 
>
Peter Maydell June 24, 2022, 3:01 p.m. UTC | #6
On Fri, 24 Jun 2022 at 15:54, Jonathan Cameron
<Jonathan.Cameron@huawei.com> wrote:
> Just occurred to me there is another barrier to an approach that adds
> DT bindings.
> I fairly sure hw/pci-bridge/pci_expander_bridge.c (PXB)
> only works on ACPI platforms and is the only host bridge supported
> for CXL emulation in QEMU.

Isn't it probeable like any other PCI device/bridge ?

-- PMM
Jonathan Cameron June 24, 2022, 3:59 p.m. UTC | #7
On Fri, 24 Jun 2022 16:01:42 +0100
Peter Maydell <peter.maydell@linaro.org> wrote:

> On Fri, 24 Jun 2022 at 15:54, Jonathan Cameron
> <Jonathan.Cameron@huawei.com> wrote:
> > Just occurred to me there is another barrier to an approach that adds
> > DT bindings.
> > I fairly sure hw/pci-bridge/pci_expander_bridge.c (PXB)
> > only works on ACPI platforms and is the only host bridge supported
> > for CXL emulation in QEMU.  
> 
> Isn't it probeable like any other PCI device/bridge ?

Nope - PXB is a really weird device. (I tested it quickly in case I was
wrong and indeed, no sign of device on the downstream side without a
suitable BIOS / ACPI)
There is no driver support for it as such, rather it presents
as two things.
1) A EP on the main host bridge - which is used for interrupt routing
   and possibly a few other things.  Linux has no idea that's what it
   is though so attaches no driver to it.  lspci shows this as
   Red Hat, Inc, QEMU PCIe Expander Bridge

2) A host bridge with firmware described characteristics (bus number
   range and similar).  Host bridges as defined in ACPI are a concept
   rather than actual hardware and presented to the OS via firmware
   descriptions (ACPI DSDT stuff in this case).

You could probably add dt description via
pci-host-ecam-generic bindings though but it would be an interesting
late bit of dt addition in the virt_machine_done() function. Similar
to the fw_cfg and ACPI stuff done at that stage to deal with PXB
devices becoming visible.

So gut feeling is PXB could be made to work with DT, but doesn't today.
Give the main usecase for PXB is typically NUMA description I guess no one
noticed on DT platforms.

Jonathan



 
> 
> -- PMM
diff mbox series

Patch

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 449fab0080..d47efcb297 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -39,9 +39,11 @@ 
 #include "hw/acpi/aml-build.h"
 #include "hw/acpi/utils.h"
 #include "hw/acpi/pci.h"
+#include "hw/acpi/cxl.h"
 #include "hw/acpi/memory_hotplug.h"
 #include "hw/acpi/generic_event_device.h"
 #include "hw/acpi/tpm.h"
+#include "hw/cxl/cxl.h"
 #include "hw/pci/pcie_host.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_bus.h"
@@ -157,10 +159,29 @@  static void acpi_dsdt_add_virtio(Aml *scope,
     }
 }
 
+/* Uses local definition of AcpiBuildState so can't easily be common code */
+static void build_acpi0017(Aml *table)
+{
+    Aml *dev, *scope, *method;
+
+    scope =  aml_scope("_SB");
+    dev = aml_device("CXLM");
+    aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0017")));
+
+    method = aml_method("_STA", 0, AML_NOTSERIALIZED);
+    aml_append(method, aml_return(aml_int(0x01)));
+    aml_append(dev, method);
+
+    aml_append(scope, dev);
+    aml_append(table, scope);
+}
+
 static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
                               uint32_t irq, VirtMachineState *vms)
 {
     int ecam_id = VIRT_ECAM_ID(vms->highmem_ecam);
+    bool cxl_present = false;
+    PCIBus *bus = vms->bus;
     struct GPEXConfig cfg = {
         .mmio32 = memmap[VIRT_PCIE_MMIO],
         .pio    = memmap[VIRT_PCIE_PIO],
@@ -174,6 +195,14 @@  static void acpi_dsdt_add_pci(Aml *scope, const MemMapEntry *memmap,
     }
 
     acpi_dsdt_add_gpex(scope, &cfg);
+    QLIST_FOREACH(bus, &vms->bus->child, sibling) {
+        if (pci_bus_is_cxl(bus)) {
+            cxl_present = true;
+        }
+    }
+    if (cxl_present) {
+        build_acpi0017(scope);
+    }
 }
 
 static void acpi_dsdt_add_gpio(Aml *scope, const MemMapEntry *gpio_memmap,
@@ -992,6 +1021,11 @@  void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
         }
     }
 
+    if (vms->cxl_devices_state.is_enabled) {
+        cxl_build_cedt(table_offsets, tables_blob, tables->linker,
+                       vms->oem_id, vms->oem_table_id, &vms->cxl_devices_state);
+    }
+
     if (ms->nvdimms_state->is_enabled) {
         nvdimm_build_acpi(table_offsets, tables_blob, tables->linker,
                           ms->nvdimms_state, ms->ram_slots, vms->oem_id,
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 097238faa7..a8a2d79f19 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -55,6 +55,7 @@ 
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "hw/pci-host/gpex.h"
+#include "hw/pci-bridge/pci_expander_bridge.h"
 #include "hw/virtio/virtio-pci.h"
 #include "hw/core/sysbus-fdt.h"
 #include "hw/platform-bus.h"
@@ -78,6 +79,8 @@ 
 #include "hw/virtio/virtio-mem-pci.h"
 #include "hw/virtio/virtio-iommu.h"
 #include "hw/char/pl011.h"
+#include "hw/cxl/cxl.h"
+#include "hw/cxl/cxl_host.h"
 #include "qemu/guest-random.h"
 
 #define DEFINE_VIRT_MACHINE_LATEST(major, minor, latest) \
@@ -178,6 +181,7 @@  static const MemMapEntry base_memmap[] = {
 static MemMapEntry extended_memmap[] = {
     /* Additional 64 MB redist region (can contain up to 512 redistributors) */
     [VIRT_HIGH_GIC_REDIST2] =   { 0x0, 64 * MiB },
+    [VIRT_CXL_HOST] =           { 0x0, 64 * KiB * 16 }, /* 16 UID */
     [VIRT_HIGH_PCIE_ECAM] =     { 0x0, 256 * MiB },
     /* Second PCIe window */
     [VIRT_HIGH_PCIE_MMIO] =     { 0x0, 512 * GiB },
@@ -1525,6 +1529,16 @@  static void create_pcie(VirtMachineState *vms)
     }
 }
 
+static void create_cxl_host_reg_region(VirtMachineState *vms)
+{
+    MemoryRegion *sysmem = get_system_memory();
+    MemoryRegion *mr = &vms->cxl_devices_state.host_mr;
+
+    memory_region_init(mr, OBJECT(vms), "cxl_host_reg",
+                       vms->memmap[VIRT_CXL_HOST].size);
+    memory_region_add_subregion(sysmem, vms->memmap[VIRT_CXL_HOST].base, mr);
+}
+
 static void create_platform_bus(VirtMachineState *vms)
 {
     DeviceState *dev;
@@ -1638,6 +1652,12 @@  void virt_machine_done(Notifier *notifier, void *data)
     struct arm_boot_info *info = &vms->bootinfo;
     AddressSpace *as = arm_boot_address_space(cpu, info);
 
+    cxl_hook_up_pxb_registers(vms->bus, &vms->cxl_devices_state,
+                              &error_fatal);
+
+    if (vms->cxl_devices_state.is_enabled) {
+        cxl_fmws_link_targets(&vms->cxl_devices_state, &error_fatal);
+    }
     /*
      * If the user provided a dtb, we assume the dynamic sysbus nodes
      * already are integrated there. This corresponds to a use case where
@@ -1779,6 +1799,20 @@  static void virt_set_memmap(VirtMachineState *vms, int pa_bits)
         memory_region_init(&ms->device_memory->mr, OBJECT(vms),
                            "device-memory", device_memory_size);
     }
+
+    if (vms->cxl_devices_state.fixed_windows) {
+        GList *it;
+
+        base = ROUND_UP(base, 256 * MiB);
+        for (it = vms->cxl_devices_state.fixed_windows; it; it = it->next) {
+            CXLFixedWindow *fw = it->data;
+
+            fw->base = base;
+            memory_region_init_io(&fw->mr, OBJECT(vms), &cfmws_ops, fw,
+                                  "cxl-fixed-memory-region", fw->size);
+            base += fw->size;
+        }
+    }
 }
 
 /*
@@ -2215,6 +2249,14 @@  static void machvirt_init(MachineState *machine)
         memory_region_add_subregion(sysmem, machine->device_memory->base,
                                     &machine->device_memory->mr);
     }
+    if (vms->cxl_devices_state.fixed_windows) {
+        GList *it;
+        for (it = vms->cxl_devices_state.fixed_windows; it; it = it->next) {
+            CXLFixedWindow *fw = it->data;
+
+            memory_region_add_subregion(sysmem, fw->base, &fw->mr);
+        }
+    }
 
     virt_flash_fdt(vms, sysmem, secure_sysmem ?: sysmem);
 
@@ -2241,6 +2283,7 @@  static void machvirt_init(MachineState *machine)
     create_rtc(vms);
 
     create_pcie(vms);
+    create_cxl_host_reg_region(vms);
 
     if (has_ged && aarch64 && firmware_loaded && virt_is_acpi_enabled(vms)) {
         vms->acpi_dev = create_acpi_ged(vms);
@@ -3070,6 +3113,7 @@  static void virt_instance_init(Object *obj)
 
     vms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
     vms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
+    cxl_machine_init(obj, &vms->cxl_devices_state);
 }
 
 static const TypeInfo virt_machine_info = {
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index 15feabac63..403c9107e5 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -35,6 +35,7 @@ 
 #include "hw/boards.h"
 #include "hw/arm/boot.h"
 #include "hw/block/flash.h"
+#include "hw/cxl/cxl.h"
 #include "sysemu/kvm.h"
 #include "hw/intc/arm_gicv3_common.h"
 #include "qom/object.h"
@@ -92,6 +93,7 @@  enum {
 /* indices of IO regions located after the RAM */
 enum {
     VIRT_HIGH_GIC_REDIST2 =  VIRT_LOWMEMMAP_LAST,
+    VIRT_CXL_HOST,
     VIRT_HIGH_PCIE_ECAM,
     VIRT_HIGH_PCIE_MMIO,
 };
@@ -176,6 +178,7 @@  struct VirtMachineState {
     PCIBus *bus;
     char *oem_id;
     char *oem_table_id;
+    CXLState cxl_devices_state;
 };
 
 #define VIRT_ECAM_ID(high) (high ? VIRT_HIGH_PCIE_ECAM : VIRT_PCIE_ECAM)