Message ID | 20220922101454.1069462-1-kraxel@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v4] x86: add etc/phys-bits fw_cfg file | expand |
On Thu, Sep 22, 2022 at 12:14:54PM +0200, Gerd Hoffmann wrote: > In case phys bits are functional and can be used by the guest (aka > host-phys-bits=on) add a fw_cfg file carrying the value. This can > be used by the guest firmware for address space configuration. > > The value in the etc/phys-bits fw_cfg file should be identical to > the phys bits value published via cpuid leaf 0x80000008. > > This is only enabled for 7.2+ machine types for live migration > compatibility reasons. Is this going to have any implications for what mgmt apps must take into account when selecting valid migration target hosts ? Historically, apps have tended to ignore any checks for phys bits between src/dst migration hosts and hoped for the best. Will this new behaviour introduce / change any failure scenarios where the target host has fewer phys bits than the src host, that mgmt apps need to be made aware of ? > > Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > --- > hw/i386/fw_cfg.h | 1 + > include/hw/i386/pc.h | 1 + > hw/i386/fw_cfg.c | 12 ++++++++++++ > hw/i386/pc.c | 5 +++++ > hw/i386/pc_piix.c | 2 ++ > hw/i386/pc_q35.c | 2 ++ > 6 files changed, 23 insertions(+) > > diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h > index 275f15c1c5e8..6ff198a6cb85 100644 > --- a/hw/i386/fw_cfg.h > +++ b/hw/i386/fw_cfg.h > @@ -26,5 +26,6 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms, > void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg); > void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg); > void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg); > +void fw_cfg_phys_bits(FWCfgState *fw_cfg); > > #endif > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > index c95333514ed3..bedef1ee13c1 100644 > --- a/include/hw/i386/pc.h > +++ b/include/hw/i386/pc.h > @@ -119,6 +119,7 @@ struct PCMachineClass { > bool enforce_aligned_dimm; > bool broken_reserved_end; > bool enforce_amd_1tb_hole; > + bool phys_bits_in_fw_cfg; > > /* generate legacy CPU hotplug AML */ > bool legacy_cpu_hotplug; > diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c > index a283785a8de4..6a1f18925725 100644 > --- a/hw/i386/fw_cfg.c > +++ b/hw/i386/fw_cfg.c > @@ -219,3 +219,15 @@ void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg) > aml_append(dev, aml_name_decl("_CRS", crs)); > aml_append(scope, dev); > } > + > +void fw_cfg_phys_bits(FWCfgState *fw_cfg) > +{ > + X86CPU *cpu = X86_CPU(first_cpu); > + uint64_t phys_bits = cpu->phys_bits; > + > + if (cpu->host_phys_bits) { > + fw_cfg_add_file(fw_cfg, "etc/phys-bits", > + g_memdup2(&phys_bits, sizeof(phys_bits)), > + sizeof(phys_bits)); > + } > +} > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > index 566accf7e60a..17ecc7fe4331 100644 > --- a/hw/i386/pc.c > +++ b/hw/i386/pc.c > @@ -744,6 +744,7 @@ void pc_machine_done(Notifier *notifier, void *data) > { > PCMachineState *pcms = container_of(notifier, > PCMachineState, machine_done); > + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); > X86MachineState *x86ms = X86_MACHINE(pcms); > > cxl_hook_up_pxb_registers(pcms->bus, &pcms->cxl_devices_state, > @@ -764,6 +765,9 @@ void pc_machine_done(Notifier *notifier, void *data) > fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg); > /* update FW_CFG_NB_CPUS to account for -device added CPUs */ > fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); > + if (pcmc->phys_bits_in_fw_cfg) { > + fw_cfg_phys_bits(x86ms->fw_cfg); > + } > } > } > > @@ -1907,6 +1911,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) > pcmc->kvmclock_enabled = true; > pcmc->enforce_aligned_dimm = true; > pcmc->enforce_amd_1tb_hole = true; > + pcmc->phys_bits_in_fw_cfg = true; > /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported > * to be used at the moment, 32K should be enough for a while. */ > pcmc->acpi_data_size = 0x20000 + 0x8000; > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c > index 8043a250adf3..c6a4dbd5c0b0 100644 > --- a/hw/i386/pc_piix.c > +++ b/hw/i386/pc_piix.c > @@ -447,9 +447,11 @@ DEFINE_I440FX_MACHINE(v7_2, "pc-i440fx-7.2", NULL, > > static void pc_i440fx_7_1_machine_options(MachineClass *m) > { > + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); > pc_i440fx_7_2_machine_options(m); > m->alias = NULL; > m->is_default = false; > + pcmc->phys_bits_in_fw_cfg = false; > compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len); > compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len); > } > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c > index 53eda50e818c..c2b56daa1550 100644 > --- a/hw/i386/pc_q35.c > +++ b/hw/i386/pc_q35.c > @@ -384,8 +384,10 @@ DEFINE_Q35_MACHINE(v7_2, "pc-q35-7.2", NULL, > > static void pc_q35_7_1_machine_options(MachineClass *m) > { > + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); > pc_q35_7_2_machine_options(m); > m->alias = NULL; > + pcmc->phys_bits_in_fw_cfg = false; > compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len); > compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len); > } > -- > 2.37.3 > > With regards, Daniel
On Thu, Sep 22, 2022 at 12:24:09PM +0100, Daniel P. Berrangé wrote: > On Thu, Sep 22, 2022 at 12:14:54PM +0200, Gerd Hoffmann wrote: > > In case phys bits are functional and can be used by the guest (aka > > host-phys-bits=on) add a fw_cfg file carrying the value. This can > > be used by the guest firmware for address space configuration. > > > > The value in the etc/phys-bits fw_cfg file should be identical to > > the phys bits value published via cpuid leaf 0x80000008. > > > > This is only enabled for 7.2+ machine types for live migration > > compatibility reasons. > > Is this going to have any implications for what mgmt apps must > take into account when selecting valid migration target hosts ? I don't think this does anything by itself. It just tells the firmware which value to use, since historically it ignored CPUID. I am still debating with myself whether a boolean would be better. Would appreciate KVM maintainer's take on this. But in any case guests already sometimes use CPUID (e.g. just grep for cpuid_maxphyaddr). This value is just for firmware use. > Historically, apps have tended to ignore any checks for phys > bits between src/dst migration hosts and hoped for the best. > > Will this new behaviour introduce / change any failure scenarios > where the target host has fewer phys bits than the src host, that > mgmt apps need to be made aware of ? > > > > > Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> > > Reviewed-by: Michael S. Tsirkin <mst@redhat.com> > > --- > > hw/i386/fw_cfg.h | 1 + > > include/hw/i386/pc.h | 1 + > > hw/i386/fw_cfg.c | 12 ++++++++++++ > > hw/i386/pc.c | 5 +++++ > > hw/i386/pc_piix.c | 2 ++ > > hw/i386/pc_q35.c | 2 ++ > > 6 files changed, 23 insertions(+) > > > > diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h > > index 275f15c1c5e8..6ff198a6cb85 100644 > > --- a/hw/i386/fw_cfg.h > > +++ b/hw/i386/fw_cfg.h > > @@ -26,5 +26,6 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms, > > void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg); > > void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg); > > void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg); > > +void fw_cfg_phys_bits(FWCfgState *fw_cfg); > > > > #endif > > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > > index c95333514ed3..bedef1ee13c1 100644 > > --- a/include/hw/i386/pc.h > > +++ b/include/hw/i386/pc.h > > @@ -119,6 +119,7 @@ struct PCMachineClass { > > bool enforce_aligned_dimm; > > bool broken_reserved_end; > > bool enforce_amd_1tb_hole; > > + bool phys_bits_in_fw_cfg; > > > > /* generate legacy CPU hotplug AML */ > > bool legacy_cpu_hotplug; > > diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c > > index a283785a8de4..6a1f18925725 100644 > > --- a/hw/i386/fw_cfg.c > > +++ b/hw/i386/fw_cfg.c > > @@ -219,3 +219,15 @@ void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg) > > aml_append(dev, aml_name_decl("_CRS", crs)); > > aml_append(scope, dev); > > } > > + > > +void fw_cfg_phys_bits(FWCfgState *fw_cfg) > > +{ > > + X86CPU *cpu = X86_CPU(first_cpu); > > + uint64_t phys_bits = cpu->phys_bits; > > + > > + if (cpu->host_phys_bits) { > > + fw_cfg_add_file(fw_cfg, "etc/phys-bits", > > + g_memdup2(&phys_bits, sizeof(phys_bits)), > > + sizeof(phys_bits)); > > + } > > +} > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > > index 566accf7e60a..17ecc7fe4331 100644 > > --- a/hw/i386/pc.c > > +++ b/hw/i386/pc.c > > @@ -744,6 +744,7 @@ void pc_machine_done(Notifier *notifier, void *data) > > { > > PCMachineState *pcms = container_of(notifier, > > PCMachineState, machine_done); > > + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); > > X86MachineState *x86ms = X86_MACHINE(pcms); > > > > cxl_hook_up_pxb_registers(pcms->bus, &pcms->cxl_devices_state, > > @@ -764,6 +765,9 @@ void pc_machine_done(Notifier *notifier, void *data) > > fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg); > > /* update FW_CFG_NB_CPUS to account for -device added CPUs */ > > fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); > > + if (pcmc->phys_bits_in_fw_cfg) { > > + fw_cfg_phys_bits(x86ms->fw_cfg); > > + } > > } > > } > > > > @@ -1907,6 +1911,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) > > pcmc->kvmclock_enabled = true; > > pcmc->enforce_aligned_dimm = true; > > pcmc->enforce_amd_1tb_hole = true; > > + pcmc->phys_bits_in_fw_cfg = true; > > /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported > > * to be used at the moment, 32K should be enough for a while. */ > > pcmc->acpi_data_size = 0x20000 + 0x8000; > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c > > index 8043a250adf3..c6a4dbd5c0b0 100644 > > --- a/hw/i386/pc_piix.c > > +++ b/hw/i386/pc_piix.c > > @@ -447,9 +447,11 @@ DEFINE_I440FX_MACHINE(v7_2, "pc-i440fx-7.2", NULL, > > > > static void pc_i440fx_7_1_machine_options(MachineClass *m) > > { > > + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); > > pc_i440fx_7_2_machine_options(m); > > m->alias = NULL; > > m->is_default = false; > > + pcmc->phys_bits_in_fw_cfg = false; > > compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len); > > compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len); > > } > > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c > > index 53eda50e818c..c2b56daa1550 100644 > > --- a/hw/i386/pc_q35.c > > +++ b/hw/i386/pc_q35.c > > @@ -384,8 +384,10 @@ DEFINE_Q35_MACHINE(v7_2, "pc-q35-7.2", NULL, > > > > static void pc_q35_7_1_machine_options(MachineClass *m) > > { > > + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); > > pc_q35_7_2_machine_options(m); > > m->alias = NULL; > > + pcmc->phys_bits_in_fw_cfg = false; > > compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len); > > compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len); > > } > > -- > > 2.37.3 > > > > > > With regards, > Daniel > -- > |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| > |: https://libvirt.org -o- https://fstop138.berrange.com :| > |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On Thu, Sep 22, 2022 at 12:24:09PM +0100, Daniel P. Berrangé wrote: > On Thu, Sep 22, 2022 at 12:14:54PM +0200, Gerd Hoffmann wrote: > > In case phys bits are functional and can be used by the guest (aka > > host-phys-bits=on) add a fw_cfg file carrying the value. This can > > be used by the guest firmware for address space configuration. > > > > The value in the etc/phys-bits fw_cfg file should be identical to > > the phys bits value published via cpuid leaf 0x80000008. > > > > This is only enabled for 7.2+ machine types for live migration > > compatibility reasons. > > Is this going to have any implications for what mgmt apps must > take into account when selecting valid migration target hosts ? > > Historically, apps have tended to ignore any checks for phys > bits between src/dst migration hosts and hoped for the best. > > Will this new behaviour introduce / change any failure scenarios > where the target host has fewer phys bits than the src host, that > mgmt apps need to be made aware of ? No. This will basically inform the guest that host-phys-bits has been enabled (and pass the number of bits). So the firmware can make use of the available address space instead of trying to be as conservative as possible to avoid going beyond the (unknown) limit. The phys-bits config itself is not touched. take care, Gerd
On Thu, Sep 22, 2022 at 2:21 PM Gerd Hoffmann <kraxel@redhat.com> wrote: > No. This will basically inform the guest that host-phys-bits has been > enabled (and pass the number of bits). So the firmware can make use of > the available address space instead of trying to be as conservative as > possible to avoid going beyond the (unknown) limit. Intel processors that are not extremely old have host-phys-bits equal to 39, 46 or 52. Older processors that had 36, in all likelihood, didn't have IOMMUs (so no big 64-bit BARs). AMD processors have had 48 for a while, though older consumer processors had 40. QEMU has always used 40, though many downstream packages (IIRC RHEL and Ubuntu) just use host-phys-bits = true when using KVM. Would it work to: 1) set host-phys-bits to true on new machine types when not using TCG (i.e. KVM / HVF / WHPX) 2) in the firmware treat 40 as if it were 39, to support old machine types? Paolo
On Thu, Sep 22, 2022 at 02:38:02PM +0200, Paolo Bonzini wrote: > On Thu, Sep 22, 2022 at 2:21 PM Gerd Hoffmann <kraxel@redhat.com> wrote: > > No. This will basically inform the guest that host-phys-bits has been > > enabled (and pass the number of bits). So the firmware can make use of > > the available address space instead of trying to be as conservative as > > possible to avoid going beyond the (unknown) limit. > > Intel processors that are not extremely old have host-phys-bits equal > to 39, 46 or 52. Older processors that had 36, in all likelihood, > didn't have IOMMUs (so no big 64-bit BARs). Well, I happen to have a intel box with 36 physbits + iommu. > 1) set host-phys-bits to true on new machine types when not using TCG > (i.e. KVM / HVF / WHPX) That is probably a good idea, but an independent problem. Has live migration problems (when hosts have different phys bits), which is IIRC the reason this hasn't happen yet. Maybe that is solved meanwhile the one way or another, I've seen some phys-bits changes in libvirt recently ... > 2) in the firmware treat 40 as if it were 39, to support old machine > types? The background of all this is that devices need more and more memory, and the very conservative edk2 defaults are becoming increasingly problematic. So what I want do is scale things up with the address space size. Use 1/4 or 1/8 of the physical address space as 64bit pci mmio window. Likewise scale up the default pcie root port window sizes, to have more room for hotplug. For that to work the firmware obviously needs to know how much it actually has, which is not the case. Yes, the problematic cases are intel machines with 36 or 39. Treating 40 as if it were 39 will explode with 36 cpus. Treating 40 as if it were 36 will mostly work. Will leave a big chunk of address space unused. Will cause regressions on guests with > 32G of RAM. Treating 40 as invalid and continue to use the current conservative heuristic, otherwise treat phys-bits as valid might work. Obvious corner case is that it'll not catch broken manual configurations (host-phys-bits=off,phys-bits=<larger-than-host>), only the broken default. Not sure how much of a problem that is in practice, maybe it isn't. I think I still prefer to explicitly communicate a reliable phys-bits value to the guest somehow. take care, Gerd
On Thu, Sep 22, 2022 at 7:16 AM Gerd Hoffmann <kraxel@redhat.com> wrote: > > On Thu, Sep 22, 2022 at 02:38:02PM +0200, Paolo Bonzini wrote: > > On Thu, Sep 22, 2022 at 2:21 PM Gerd Hoffmann <kraxel@redhat.com> wrote: > > > No. This will basically inform the guest that host-phys-bits has been > > > enabled (and pass the number of bits). So the firmware can make use of > > > the available address space instead of trying to be as conservative as > > > possible to avoid going beyond the (unknown) limit. > > > > Intel processors that are not extremely old have host-phys-bits equal > > to 39, 46 or 52. Older processors that had 36, in all likelihood, > > didn't have IOMMUs (so no big 64-bit BARs). > > Well, I happen to have a intel box with 36 physbits + iommu. > > > 1) set host-phys-bits to true on new machine types when not using TCG > > (i.e. KVM / HVF / WHPX) > > That is probably a good idea, but an independent problem. > > Has live migration problems (when hosts have different phys bits), > which is IIRC the reason this hasn't happen yet. Maybe that is solved > meanwhile the one way or another, I've seen some phys-bits changes in > libvirt recently ... > > > 2) in the firmware treat 40 as if it were 39, to support old machine > > types? > > The background of all this is that devices need more and more memory, > and the very conservative edk2 defaults are becoming increasingly > problematic. So what I want do is scale things up with the address > space size. Use 1/4 or 1/8 of the physical address space as 64bit > pci mmio window. Likewise scale up the default pcie root port window > sizes, to have more room for hotplug. > > For that to work the firmware obviously needs to know how much it > actually has, which is not the case. > > Yes, the problematic cases are intel machines with 36 or 39. > > Treating 40 as if it were 39 will explode with 36 cpus. > > Treating 40 as if it were 36 will mostly work. Will leave a big > chunk of address space unused. Will cause regressions on guests > with > 32G of RAM. > > Treating 40 as invalid and continue to use the current conservative > heuristic, otherwise treat phys-bits as valid might work. Obvious > corner case is that it'll not catch broken manual configurations > (host-phys-bits=off,phys-bits=<larger-than-host>), only the broken > default. Not sure how much of a problem that is in practice, maybe > it isn't. > > I think I still prefer to explicitly communicate a reliable phys-bits > value to the guest somehow. On x86 hardware, KVM is incapable of emulating a guest physical width that differs from the host physical width. There isn't support in the hardware for it.
On Thu, Sep 22, 2022 at 7:13 PM Jim Mattson <jmattson@google.com> wrote: > > Treating 40 as invalid and continue to use the current conservative > > heuristic, otherwise treat phys-bits as valid might work. Obvious > > corner case is that it'll not catch broken manual configurations > > (host-phys-bits=off,phys-bits=<larger-than-host>), only the broken > > default. Not sure how much of a problem that is in practice, maybe > > it isn't. > > > > I think I still prefer to explicitly communicate a reliable phys-bits > > value to the guest somehow. > > On x86 hardware, KVM is incapable of emulating a guest physical width > that differs from the host physical width. There isn't support in the > hardware for it. Indeed, everything else is a userspace bug. Especially since here we're talking of host_maxphyaddr < guest_maxphyaddr, which is completely impossible. Paolo
On Thu, Sep 22, 2022 at 02:38:02PM +0200, Paolo Bonzini wrote: > On Thu, Sep 22, 2022 at 2:21 PM Gerd Hoffmann <kraxel@redhat.com> wrote: > > No. This will basically inform the guest that host-phys-bits has been > > enabled (and pass the number of bits). So the firmware can make use of > > the available address space instead of trying to be as conservative as > > possible to avoid going beyond the (unknown) limit. > > Intel processors that are not extremely old have host-phys-bits equal > to 39, 46 or 52. Older processors that had 36, in all likelihood, > didn't have IOMMUs (so no big 64-bit BARs). > > AMD processors have had 48 for a while, though older consumer processors had 40. How reliable is the vendorid? Given newer processors have more than 40 and for older ones we know the possible values for the two relevant x86 vendors we could do something along the lines of: phys-bits >= 41 -> valid phys-bits == 40 + AuthenticAMD -> valid phys-bits == 36,39 + GenuineIntel -> valid everything else -> invalid Does that look sensible to you? take care, Gerd
Hi, > > Given newer processors have more than 40 and for older ones we know > > the possible values for the two relevant x86 vendors we could do > > something along the lines of: > > > > phys-bits >= 41 -> valid > > phys-bits == 40 + AuthenticAMD -> valid > > phys-bits == 36,39 + GenuineIntel -> valid > > everything else -> invalid > > > > Does that look sensible to you? > > > > Yes, it does! Is phys-bits == 36 the same as invalid? 'invalid' would continue to use the current guesswork codepath for phys-bits. Which will end up with phys-bits = 36 for smaller VMs, but it can go beyond that in VMs with alot (32G or more) of memory. That logic assumes that physical machines with enough RAM for 32G+ guests have a physical address space > 64G. 'phys-bits = 36' would be a hard limit. So, it's not exactly the same but small VMs wouldn't see a difference. take care, Gerd
On Fri, Sep 23, 2022 at 08:23:12AM +0200, Gerd Hoffmann wrote: > Hi, > > > > Given newer processors have more than 40 and for older ones we know > > > the possible values for the two relevant x86 vendors we could do > > > something along the lines of: > > > > > > phys-bits >= 41 -> valid > > > phys-bits == 40 + AuthenticAMD -> valid > > > phys-bits == 36,39 + GenuineIntel -> valid > > > everything else -> invalid > > > > > > Does that look sensible to you? > > > > > > > Yes, it does! Is phys-bits == 36 the same as invalid? > > 'invalid' would continue to use the current guesswork codepath for > phys-bits. Which will end up with phys-bits = 36 for smaller VMs, but > it can go beyond that in VMs with alot (32G or more) of memory. That > logic assumes that physical machines with enough RAM for 32G+ guests > have a physical address space > 64G. > > 'phys-bits = 36' would be a hard limit. > > So, it's not exactly the same but small VMs wouldn't see a difference. > > take care, > Gerd I dropped the patch for now.
Hi, > > > > Given newer processors have more than 40 and for older ones we know > > > > the possible values for the two relevant x86 vendors we could do > > > > something along the lines of: > > > > > > > > phys-bits >= 41 -> valid > > > > phys-bits == 40 + AuthenticAMD -> valid > > > > phys-bits == 36,39 + GenuineIntel -> valid > > > > everything else -> invalid > I dropped the patch for now. You can drop it forever. For the mail archives and anyone interested: The approach outlined above appears to work well, patches just landed in edk2 master branch. Next edk2 stable tag (2022-11) will have it. take care, Gerd
diff --git a/hw/i386/fw_cfg.h b/hw/i386/fw_cfg.h index 275f15c1c5e8..6ff198a6cb85 100644 --- a/hw/i386/fw_cfg.h +++ b/hw/i386/fw_cfg.h @@ -26,5 +26,6 @@ FWCfgState *fw_cfg_arch_create(MachineState *ms, void fw_cfg_build_smbios(MachineState *ms, FWCfgState *fw_cfg); void fw_cfg_build_feature_control(MachineState *ms, FWCfgState *fw_cfg); void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg); +void fw_cfg_phys_bits(FWCfgState *fw_cfg); #endif diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index c95333514ed3..bedef1ee13c1 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -119,6 +119,7 @@ struct PCMachineClass { bool enforce_aligned_dimm; bool broken_reserved_end; bool enforce_amd_1tb_hole; + bool phys_bits_in_fw_cfg; /* generate legacy CPU hotplug AML */ bool legacy_cpu_hotplug; diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c index a283785a8de4..6a1f18925725 100644 --- a/hw/i386/fw_cfg.c +++ b/hw/i386/fw_cfg.c @@ -219,3 +219,15 @@ void fw_cfg_add_acpi_dsdt(Aml *scope, FWCfgState *fw_cfg) aml_append(dev, aml_name_decl("_CRS", crs)); aml_append(scope, dev); } + +void fw_cfg_phys_bits(FWCfgState *fw_cfg) +{ + X86CPU *cpu = X86_CPU(first_cpu); + uint64_t phys_bits = cpu->phys_bits; + + if (cpu->host_phys_bits) { + fw_cfg_add_file(fw_cfg, "etc/phys-bits", + g_memdup2(&phys_bits, sizeof(phys_bits)), + sizeof(phys_bits)); + } +} diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 566accf7e60a..17ecc7fe4331 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -744,6 +744,7 @@ void pc_machine_done(Notifier *notifier, void *data) { PCMachineState *pcms = container_of(notifier, PCMachineState, machine_done); + PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); X86MachineState *x86ms = X86_MACHINE(pcms); cxl_hook_up_pxb_registers(pcms->bus, &pcms->cxl_devices_state, @@ -764,6 +765,9 @@ void pc_machine_done(Notifier *notifier, void *data) fw_cfg_build_feature_control(MACHINE(pcms), x86ms->fw_cfg); /* update FW_CFG_NB_CPUS to account for -device added CPUs */ fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus); + if (pcmc->phys_bits_in_fw_cfg) { + fw_cfg_phys_bits(x86ms->fw_cfg); + } } } @@ -1907,6 +1911,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->kvmclock_enabled = true; pcmc->enforce_aligned_dimm = true; pcmc->enforce_amd_1tb_hole = true; + pcmc->phys_bits_in_fw_cfg = true; /* BIOS ACPI tables: 128K. Other BIOS datastructures: less than 4K reported * to be used at the moment, 32K should be enough for a while. */ pcmc->acpi_data_size = 0x20000 + 0x8000; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 8043a250adf3..c6a4dbd5c0b0 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -447,9 +447,11 @@ DEFINE_I440FX_MACHINE(v7_2, "pc-i440fx-7.2", NULL, static void pc_i440fx_7_1_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_i440fx_7_2_machine_options(m); m->alias = NULL; m->is_default = false; + pcmc->phys_bits_in_fw_cfg = false; compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len); compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 53eda50e818c..c2b56daa1550 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -384,8 +384,10 @@ DEFINE_Q35_MACHINE(v7_2, "pc-q35-7.2", NULL, static void pc_q35_7_1_machine_options(MachineClass *m) { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); pc_q35_7_2_machine_options(m); m->alias = NULL; + pcmc->phys_bits_in_fw_cfg = false; compat_props_add(m->compat_props, hw_compat_7_1, hw_compat_7_1_len); compat_props_add(m->compat_props, pc_compat_7_1, pc_compat_7_1_len); }