diff mbox series

[RFC,v1,3/3] hw/arm/virt-acpi-build.c: Enable CPU cache topology

Message ID 20240129081423.116615-4-jeeheng.sia@starfivetech.com (mailing list archive)
State New, archived
Headers show
Series Add cache structure table creation for PPTT table | expand

Commit Message

Sia Jee Heng Jan. 29, 2024, 8:14 a.m. UTC
Introduced a 3-layer cache for the ARM virtual machine.

Signed-off-by: Sia Jee Heng <jeeheng.sia@starfivetech.com>
---
 hw/arm/virt-acpi-build.c | 44 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

Comments

Jonathan Cameron Jan. 29, 2024, 11:08 a.m. UTC | #1
On Mon, 29 Jan 2024 00:14:23 -0800
Sia Jee Heng <jeeheng.sia@starfivetech.com> wrote:

> Introduced a 3-layer cache for the ARM virtual machine.
> 
> Signed-off-by: Sia Jee Heng <jeeheng.sia@starfivetech.com>

There are a bunch of CPU registers that also need updating to reflect the
described cache.
https://lore.kernel.org/qemu-devel/20230808115713.2613-3-Jonathan.Cameron@huawei.com/
It's called HACK for a reason ;)
But there is some discussion about this issue in the thread.

The l1 etc also needs to reflect the CPU model.  This stuff needs to match.
Wrong information being passed to a VM is probably worse than no information.

Whilst I plan to circle back to the MPAM support (perhaps next month) there
is a lot more to be done here before we have useful cache descriptions for
guests.

Jonathan

> ---
>  hw/arm/virt-acpi-build.c | 44 +++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 43 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> index 17aeec7a6f..c57067cd63 100644
> --- a/hw/arm/virt-acpi-build.c
> +++ b/hw/arm/virt-acpi-build.c
> @@ -426,6 +426,48 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
>      g_array_free(its_idmaps, true);
>  }
>  
> +static void pptt_setup(GArray *table_data, BIOSLinker *linker, MachineState *ms,
> +                       const char *oem_id, const char *oem_table_id)
> +{
> +    CPUCaches default_cache_info = {
> +        .l1d_cache = &(CPUCacheInfo) {
> +            .type = DATA_CACHE,
> +            .size = 64 * KiB,
> +            .line_size = 64,
> +            .associativity = 4,
> +            .sets = 256,
> +            .attributes = 0x02,
> +        },
> +        .l1i_cache = &(CPUCacheInfo) {
> +            .type = INSTRUCTION_CACHE,
> +            .size = 64 * KiB,
> +            .line_size = 64,
> +            .associativity = 4,
> +            .sets = 256,
> +            .attributes = 0x04,

This is the duplication I commented on in patch 1.
The bit set there is the one to indicate it's an instruction
cache and we have type doing that as well.


> +        },
> +        .l2_cache = &(CPUCacheInfo) {
> +            .type = UNIFIED_CACHE,
> +            .size = 2048 * KiB,
> +            .line_size = 64,
> +            .associativity = 8,
> +            .sets = 4096,
> +            .attributes = 0x0a,
> +        },
> +        .l3_cache = &(CPUCacheInfo) {
> +            .type = UNIFIED_CACHE,
> +            .size = 4096 * KiB,
> +            .line_size = 64,
> +            .associativity = 8,
> +            .sets = 8192,
> +            .attributes = 0x0a,
> +        },
> +    };
> +
> +    build_pptt(table_data, linker, ms, oem_id, oem_table_id,
> +               &default_cache_info);
> +}
> +
>  /*
>   * Serial Port Console Redirection Table (SPCR)
>   * Rev: 1.07
> @@ -912,7 +954,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
>  
>      if (!vmc->no_cpu_topology) {
>          acpi_add_table(table_offsets, tables_blob);
> -        build_pptt(tables_blob, tables->linker, ms,
> +        pptt_setup(tables_blob, tables->linker, ms,
>                     vms->oem_id, vms->oem_table_id);
>      }
>
Sia Jee Heng Jan. 30, 2024, 5:03 a.m. UTC | #2
> -----Original Message-----
> From: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
> Sent: Monday, January 29, 2024 7:08 PM
> To: JeeHeng Sia <jeeheng.sia@starfivetech.com>
> Cc: qemu-devel@nongnu.org; qemu-arm@nongnu.org; qemu-riscv@nongnu.org; mst@redhat.com; imammedo@redhat.com;
> anisinha@redhat.com; shannon.zhaosl@gmail.com; peter.maydell@linaro.org; sunilvl@ventanamicro.com; palmer@dabbelt.com;
> alistair.francis@wdc.com; bin.meng@windriver.com; liwei1518@gmail.com; dbarboza@ventanamicro.com;
> zhiwei_liu@linux.alibaba.com
> Subject: Re: [RFC v1 3/3] hw/arm/virt-acpi-build.c: Enable CPU cache topology
> 
> On Mon, 29 Jan 2024 00:14:23 -0800
> Sia Jee Heng <jeeheng.sia@starfivetech.com> wrote:
> 
> > Introduced a 3-layer cache for the ARM virtual machine.
> >
> > Signed-off-by: Sia Jee Heng <jeeheng.sia@starfivetech.com>
> 
> There are a bunch of CPU registers that also need updating to reflect the
> described cache.
> https://lore.kernel.org/qemu-devel/20230808115713.2613-3-Jonathan.Cameron@huawei.com/
> It's called HACK for a reason ;)
> But there is some discussion about this issue in the thread.
> 
> The l1 etc also needs to reflect the CPU model.  This stuff needs to match.
> Wrong information being passed to a VM is probably worse than no information.
> 
> Whilst I plan to circle back to the MPAM support (perhaps next month) there
> is a lot more to be done here before we have useful cache descriptions for
> guests.
Thanks for the info. I will spend time to look into.
> 
> Jonathan
> 
> > ---
> >  hw/arm/virt-acpi-build.c | 44 +++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 43 insertions(+), 1 deletion(-)
> >
> > diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> > index 17aeec7a6f..c57067cd63 100644
> > --- a/hw/arm/virt-acpi-build.c
> > +++ b/hw/arm/virt-acpi-build.c
> > @@ -426,6 +426,48 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
> >      g_array_free(its_idmaps, true);
> >  }
> >
> > +static void pptt_setup(GArray *table_data, BIOSLinker *linker, MachineState *ms,
> > +                       const char *oem_id, const char *oem_table_id)
> > +{
> > +    CPUCaches default_cache_info = {
> > +        .l1d_cache = &(CPUCacheInfo) {
> > +            .type = DATA_CACHE,
> > +            .size = 64 * KiB,
> > +            .line_size = 64,
> > +            .associativity = 4,
> > +            .sets = 256,
> > +            .attributes = 0x02,
> > +        },
> > +        .l1i_cache = &(CPUCacheInfo) {
> > +            .type = INSTRUCTION_CACHE,
> > +            .size = 64 * KiB,
> > +            .line_size = 64,
> > +            .associativity = 4,
> > +            .sets = 256,
> > +            .attributes = 0x04,
> 
> This is the duplication I commented on in patch 1.
> The bit set there is the one to indicate it's an instruction
> cache and we have type doing that as well.
But this gives a great readability, no?
> 
> 
> > +        },
> > +        .l2_cache = &(CPUCacheInfo) {
> > +            .type = UNIFIED_CACHE,
> > +            .size = 2048 * KiB,
> > +            .line_size = 64,
> > +            .associativity = 8,
> > +            .sets = 4096,
> > +            .attributes = 0x0a,
> > +        },
> > +        .l3_cache = &(CPUCacheInfo) {
> > +            .type = UNIFIED_CACHE,
> > +            .size = 4096 * KiB,
> > +            .line_size = 64,
> > +            .associativity = 8,
> > +            .sets = 8192,
> > +            .attributes = 0x0a,
> > +        },
> > +    };
> > +
> > +    build_pptt(table_data, linker, ms, oem_id, oem_table_id,
> > +               &default_cache_info);
> > +}
> > +
> >  /*
> >   * Serial Port Console Redirection Table (SPCR)
> >   * Rev: 1.07
> > @@ -912,7 +954,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
> >
> >      if (!vmc->no_cpu_topology) {
> >          acpi_add_table(table_offsets, tables_blob);
> > -        build_pptt(tables_blob, tables->linker, ms,
> > +        pptt_setup(tables_blob, tables->linker, ms,
> >                     vms->oem_id, vms->oem_table_id);
> >      }
> >
Jonathan Cameron Jan. 31, 2024, 3:56 p.m. UTC | #3
On Tue, 30 Jan 2024 05:03:15 +0000
JeeHeng Sia <jeeheng.sia@starfivetech.com> wrote:

> > -----Original Message-----
> > From: Jonathan Cameron <Jonathan.Cameron@Huawei.com>
> > Sent: Monday, January 29, 2024 7:08 PM
> > To: JeeHeng Sia <jeeheng.sia@starfivetech.com>
> > Cc: qemu-devel@nongnu.org; qemu-arm@nongnu.org; qemu-riscv@nongnu.org; mst@redhat.com; imammedo@redhat.com;
> > anisinha@redhat.com; shannon.zhaosl@gmail.com; peter.maydell@linaro.org; sunilvl@ventanamicro.com; palmer@dabbelt.com;
> > alistair.francis@wdc.com; bin.meng@windriver.com; liwei1518@gmail.com; dbarboza@ventanamicro.com;
> > zhiwei_liu@linux.alibaba.com
> > Subject: Re: [RFC v1 3/3] hw/arm/virt-acpi-build.c: Enable CPU cache topology
> > 
> > On Mon, 29 Jan 2024 00:14:23 -0800
> > Sia Jee Heng <jeeheng.sia@starfivetech.com> wrote:
> >   
> > > Introduced a 3-layer cache for the ARM virtual machine.
> > >
> > > Signed-off-by: Sia Jee Heng <jeeheng.sia@starfivetech.com>  
> > 
> > There are a bunch of CPU registers that also need updating to reflect the
> > described cache.
> > https://lore.kernel.org/qemu-devel/20230808115713.2613-3-Jonathan.Cameron@huawei.com/
> > It's called HACK for a reason ;)
> > But there is some discussion about this issue in the thread.
> > 
> > The l1 etc also needs to reflect the CPU model.  This stuff needs to match.
> > Wrong information being passed to a VM is probably worse than no information.
> > 
> > Whilst I plan to circle back to the MPAM support (perhaps next month) there
> > is a lot more to be done here before we have useful cache descriptions for
> > guests.  
> Thanks for the info. I will spend time to look into.
> > 
> > Jonathan
> >   
> > > ---
> > >  hw/arm/virt-acpi-build.c | 44 +++++++++++++++++++++++++++++++++++++++-
> > >  1 file changed, 43 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> > > index 17aeec7a6f..c57067cd63 100644
> > > --- a/hw/arm/virt-acpi-build.c
> > > +++ b/hw/arm/virt-acpi-build.c
> > > @@ -426,6 +426,48 @@ build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
> > >      g_array_free(its_idmaps, true);
> > >  }
> > >
> > > +static void pptt_setup(GArray *table_data, BIOSLinker *linker, MachineState *ms,
> > > +                       const char *oem_id, const char *oem_table_id)
> > > +{
> > > +    CPUCaches default_cache_info = {
> > > +        .l1d_cache = &(CPUCacheInfo) {
> > > +            .type = DATA_CACHE,
> > > +            .size = 64 * KiB,
> > > +            .line_size = 64,
> > > +            .associativity = 4,
> > > +            .sets = 256,
> > > +            .attributes = 0x02,
> > > +        },
> > > +        .l1i_cache = &(CPUCacheInfo) {
> > > +            .type = INSTRUCTION_CACHE,
> > > +            .size = 64 * KiB,
> > > +            .line_size = 64,
> > > +            .associativity = 4,
> > > +            .sets = 256,
> > > +            .attributes = 0x04,  
> > 
> > This is the duplication I commented on in patch 1.
> > The bit set there is the one to indicate it's an instruction
> > cache and we have type doing that as well.  
> But this gives a great readability, no?

Not really no. If .type and attributes end up out of agreement with each
other it will be non obvious.

You could do 
		.attributes {
			.type = INSTRUCTION_CACHE,
			.other things ...
		}

if you want to list the type clearly and still maintain the info that
this ends up in attributes.


> > 
> >   
> > > +        },
> > > +        .l2_cache = &(CPUCacheInfo) {
> > > +            .type = UNIFIED_CACHE,
> > > +            .size = 2048 * KiB,
> > > +            .line_size = 64,
> > > +            .associativity = 8,
> > > +            .sets = 4096,
> > > +            .attributes = 0x0a,
> > > +        },
> > > +        .l3_cache = &(CPUCacheInfo) {
> > > +            .type = UNIFIED_CACHE,
> > > +            .size = 4096 * KiB,
> > > +            .line_size = 64,
> > > +            .associativity = 8,
> > > +            .sets = 8192,
> > > +            .attributes = 0x0a,
> > > +        },
> > > +    };
> > > +
> > > +    build_pptt(table_data, linker, ms, oem_id, oem_table_id,
> > > +               &default_cache_info);
> > > +}
> > > +
> > >  /*
> > >   * Serial Port Console Redirection Table (SPCR)
> > >   * Rev: 1.07
> > > @@ -912,7 +954,7 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
> > >
> > >      if (!vmc->no_cpu_topology) {
> > >          acpi_add_table(table_offsets, tables_blob);
> > > -        build_pptt(tables_blob, tables->linker, ms,
> > > +        pptt_setup(tables_blob, tables->linker, ms,
> > >                     vms->oem_id, vms->oem_table_id);
> > >      }
> > >  
>
Peter Maydell Feb. 1, 2024, 4:06 p.m. UTC | #4
On Mon, 29 Jan 2024 at 11:08, Jonathan Cameron
<Jonathan.Cameron@huawei.com> wrote:
>
> On Mon, 29 Jan 2024 00:14:23 -0800
> Sia Jee Heng <jeeheng.sia@starfivetech.com> wrote:
>
> > Introduced a 3-layer cache for the ARM virtual machine.
> >
> > Signed-off-by: Sia Jee Heng <jeeheng.sia@starfivetech.com>
>
> There are a bunch of CPU registers that also need updating to reflect the
> described cache.
> https://lore.kernel.org/qemu-devel/20230808115713.2613-3-Jonathan.Cameron@huawei.com/
> It's called HACK for a reason ;)
> But there is some discussion about this issue in the thread.
>
> The l1 etc also needs to reflect the CPU model.  This stuff needs to match.
> Wrong information being passed to a VM is probably worse than no information.

Yes. The ACPI table information, if we provide it, should be
being generated from the CPU cache ID registers.

But I'm a bit confused about why the ACPI table has the cache
topology in it -- can't the guest read the cache ID registers
and figure it out for itself?

thanks
-- PMM
Jonathan Cameron Feb. 2, 2024, 5:30 p.m. UTC | #5
On Thu, 1 Feb 2024 16:06:55 +0000
Peter Maydell <peter.maydell@linaro.org> wrote:

> On Mon, 29 Jan 2024 at 11:08, Jonathan Cameron
> <Jonathan.Cameron@huawei.com> wrote:
> >
> > On Mon, 29 Jan 2024 00:14:23 -0800
> > Sia Jee Heng <jeeheng.sia@starfivetech.com> wrote:
> >  
> > > Introduced a 3-layer cache for the ARM virtual machine.
> > >
> > > Signed-off-by: Sia Jee Heng <jeeheng.sia@starfivetech.com>  
> >
> > There are a bunch of CPU registers that also need updating to reflect the
> > described cache.
> > https://lore.kernel.org/qemu-devel/20230808115713.2613-3-Jonathan.Cameron@huawei.com/
> > It's called HACK for a reason ;)
> > But there is some discussion about this issue in the thread.
> >
> > The l1 etc also needs to reflect the CPU model.  This stuff needs to match.
> > Wrong information being passed to a VM is probably worse than no information.  
> 
> Yes. The ACPI table information, if we provide it, should be
> being generated from the CPU cache ID registers.

That + some additional information I think.

> 
> But I'm a bit confused about why the ACPI table has the cache
> topology in it -- can't the guest read the cache ID registers
> and figure it out for itself?

That's a philosophy question for the ARM architects :)

The registers focus on correctness (so what you need to flush etc,
where the point of coherence is and other fun)
not all the info needed for performance tuning. There is some stuff
on the cache type that I guess is more perf tuning related (sets etc)


They probably could have expanded the CPU registers to provide a lot
more information (which is what x86 effectively does). IIRC what is
there today for ARM is pretty much useless for anything placement
decision related (scheduling etc)  You can't tell what shares a cache
(or more fun cluster topology structures). 

It is hard in registers to define a nice flexible graph where
all sorts of fun topology of the system can be described.
PPTT provides that opportunity for a richer description.
Sure you could squirt out the equivalent table via registers,
but what's the point given we have firmware tables for this
sort of thing..

Jonathan


 

> 
> thanks
> -- PMM
diff mbox series

Patch

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 17aeec7a6f..c57067cd63 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -426,6 +426,48 @@  build_iort(GArray *table_data, BIOSLinker *linker, VirtMachineState *vms)
     g_array_free(its_idmaps, true);
 }
 
+static void pptt_setup(GArray *table_data, BIOSLinker *linker, MachineState *ms,
+                       const char *oem_id, const char *oem_table_id)
+{
+    CPUCaches default_cache_info = {
+        .l1d_cache = &(CPUCacheInfo) {
+            .type = DATA_CACHE,
+            .size = 64 * KiB,
+            .line_size = 64,
+            .associativity = 4,
+            .sets = 256,
+            .attributes = 0x02,
+        },
+        .l1i_cache = &(CPUCacheInfo) {
+            .type = INSTRUCTION_CACHE,
+            .size = 64 * KiB,
+            .line_size = 64,
+            .associativity = 4,
+            .sets = 256,
+            .attributes = 0x04,
+        },
+        .l2_cache = &(CPUCacheInfo) {
+            .type = UNIFIED_CACHE,
+            .size = 2048 * KiB,
+            .line_size = 64,
+            .associativity = 8,
+            .sets = 4096,
+            .attributes = 0x0a,
+        },
+        .l3_cache = &(CPUCacheInfo) {
+            .type = UNIFIED_CACHE,
+            .size = 4096 * KiB,
+            .line_size = 64,
+            .associativity = 8,
+            .sets = 8192,
+            .attributes = 0x0a,
+        },
+    };
+
+    build_pptt(table_data, linker, ms, oem_id, oem_table_id,
+               &default_cache_info);
+}
+
 /*
  * Serial Port Console Redirection Table (SPCR)
  * Rev: 1.07
@@ -912,7 +954,7 @@  void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables)
 
     if (!vmc->no_cpu_topology) {
         acpi_add_table(table_offsets, tables_blob);
-        build_pptt(tables_blob, tables->linker, ms,
+        pptt_setup(tables_blob, tables->linker, ms,
                    vms->oem_id, vms->oem_table_id);
     }