diff mbox series

[v2] accel/kvm: Specify default IPA size for arm64

Message ID 20230721062421.12017-1-akihiko.odaki@daynix.com (mailing list archive)
State New, archived
Headers show
Series [v2] accel/kvm: Specify default IPA size for arm64 | expand

Commit Message

Akihiko Odaki July 21, 2023, 6:24 a.m. UTC
libvirt uses "none" machine type to test KVM availability. Before this
change, QEMU used to pass 0 as machine type when calling KVM_CREATE_VM.

The kernel documentation says:
> On arm64, the physical address size for a VM (IPA Size limit) is
> limited to 40bits by default. The limit can be configured if the host
> supports the extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use
> KVM_VM_TYPE_ARM_IPA_SIZE(IPA_Bits) to set the size in the machine type
> identifier, where IPA_Bits is the maximum width of any physical
> address used by the VM. The IPA_Bits is encoded in bits[7-0] of the
> machine type identifier.
>
> e.g, to configure a guest to use 48bit physical address size::
>
>     vm_fd = ioctl(dev_fd, KVM_CREATE_VM, KVM_VM_TYPE_ARM_IPA_SIZE(48));
>
> The requested size (IPA_Bits) must be:
>
>  ==   =========================================================
>   0   Implies default size, 40bits (for backward compatibility)
>   N   Implies N bits, where N is a positive integer such that,
>       32 <= N <= Host_IPA_Limit
>  ==   =========================================================

> Host_IPA_Limit is the maximum possible value for IPA_Bits on the host
> and is dependent on the CPU capability and the kernel configuration.
> The limit can be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the
> KVM_CHECK_EXTENSION ioctl() at run-time.
>
> Creation of the VM will fail if the requested IPA size (whether it is
> implicit or explicit) is unsupported on the host.
https://docs.kernel.org/virt/kvm/api.html#kvm-create-vm

So if Host_IPA_Limit < 40, such KVM_CREATE_VM will fail, and libvirt
incorrectly thinks KVM is not available. This actually happened on M2
MacBook Air.

Fix this by specifying 32 for IPA_Bits as any arm64 system should
support the value according to the documentation.

Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
V1 -> V2: Introduced an arch hook

 include/sysemu/kvm.h   | 1 +
 accel/kvm/kvm-all.c    | 2 +-
 target/arm/kvm.c       | 2 ++
 target/i386/kvm/kvm.c  | 2 ++
 target/mips/kvm.c      | 2 ++
 target/ppc/kvm.c       | 2 ++
 target/riscv/kvm.c     | 2 ++
 target/s390x/kvm/kvm.c | 2 ++
 8 files changed, 14 insertions(+), 1 deletion(-)

Comments

Philippe Mathieu-Daudé July 21, 2023, 7:30 a.m. UTC | #1
Hi Akihiko,

On 21/7/23 08:24, Akihiko Odaki wrote:
> libvirt uses "none" machine type to test KVM availability. Before this
> change, QEMU used to pass 0 as machine type when calling KVM_CREATE_VM.
> 
> The kernel documentation says:
>> On arm64, the physical address size for a VM (IPA Size limit) is
>> limited to 40bits by default. The limit can be configured if the host
>> supports the extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use
>> KVM_VM_TYPE_ARM_IPA_SIZE(IPA_Bits) to set the size in the machine type
>> identifier, where IPA_Bits is the maximum width of any physical
>> address used by the VM. The IPA_Bits is encoded in bits[7-0] of the
>> machine type identifier.
>>
>> e.g, to configure a guest to use 48bit physical address size::
>>
>>      vm_fd = ioctl(dev_fd, KVM_CREATE_VM, KVM_VM_TYPE_ARM_IPA_SIZE(48));
>>
>> The requested size (IPA_Bits) must be:
>>
>>   ==   =========================================================
>>    0   Implies default size, 40bits (for backward compatibility)
>>    N   Implies N bits, where N is a positive integer such that,
>>        32 <= N <= Host_IPA_Limit
>>   ==   =========================================================
> 
>> Host_IPA_Limit is the maximum possible value for IPA_Bits on the host
>> and is dependent on the CPU capability and the kernel configuration.
>> The limit can be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the
>> KVM_CHECK_EXTENSION ioctl() at run-time.
>>
>> Creation of the VM will fail if the requested IPA size (whether it is
>> implicit or explicit) is unsupported on the host.
> https://docs.kernel.org/virt/kvm/api.html#kvm-create-vm
> 
> So if Host_IPA_Limit < 40, such KVM_CREATE_VM will fail, and libvirt
> incorrectly thinks KVM is not available. This actually happened on M2
> MacBook Air.
> 
> Fix this by specifying 32 for IPA_Bits as any arm64 system should
> support the value according to the documentation.
> 
> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> ---
> V1 -> V2: Introduced an arch hook
> 
>   include/sysemu/kvm.h   | 1 +
>   accel/kvm/kvm-all.c    | 2 +-
>   target/arm/kvm.c       | 2 ++
>   target/i386/kvm/kvm.c  | 2 ++
>   target/mips/kvm.c      | 2 ++
>   target/ppc/kvm.c       | 2 ++
>   target/riscv/kvm.c     | 2 ++
>   target/s390x/kvm/kvm.c | 2 ++
>   8 files changed, 14 insertions(+), 1 deletion(-)

My understanding of Peter's suggestion would be smth like:

-- >8 --
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 115f0cca79..c0af15eb6c 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -201,10 +201,15 @@ typedef struct KVMCapabilityInfo {

  struct KVMState;

+struct KVMClass {
+    AccelClass parent_class;
+
+    int default_vm_type;
+};
+
  #define TYPE_KVM_ACCEL ACCEL_CLASS_NAME("kvm")
  typedef struct KVMState KVMState;
-DECLARE_INSTANCE_CHECKER(KVMState, KVM_STATE,
-                         TYPE_KVM_ACCEL)
+OBJECT_DECLARE_TYPE(KVMState, KVMClass, KVM_ACCEL)

  extern KVMState *kvm_state;
  typedef struct Notifier Notifier;
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 373d876c05..fdd424e1a5 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2458,12 +2458,13 @@ static int kvm_init(MachineState *ms)
      KVMState *s;
      const KVMCapabilityInfo *missing_cap;
      int ret;
-    int type = 0;
+    int type;
      uint64_t dirty_log_manual_caps;

      qemu_mutex_init(&kml_slots_lock);

      s = KVM_STATE(ms->accelerator);
+    type = KVM_GET_CLASS(s)->default_vm_type;

      /*
       * On systems where the kernel can support different base page
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index b4c7654f49..5c13594fdf 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -1064,4 +1064,8 @@ bool kvm_arch_cpu_check_are_resettable(void)

  void kvm_arch_accel_class_init(ObjectClass *oc)
  {
+    KVMClass *kc = KVM_CLASS(oc);
+
+    /* Host_IPA_Limit ... */
+    kc->default_vm_type = 32;
  }
---
Peter Maydell July 21, 2023, 12:28 p.m. UTC | #2
On Fri, 21 Jul 2023 at 08:30, Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
>
> Hi Akihiko,
>
> On 21/7/23 08:24, Akihiko Odaki wrote:
> > libvirt uses "none" machine type to test KVM availability. Before this
> > change, QEMU used to pass 0 as machine type when calling KVM_CREATE_VM.
> >
> > The kernel documentation says:
> >> On arm64, the physical address size for a VM (IPA Size limit) is
> >> limited to 40bits by default. The limit can be configured if the host
> >> supports the extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use
> >> KVM_VM_TYPE_ARM_IPA_SIZE(IPA_Bits) to set the size in the machine type
> >> identifier, where IPA_Bits is the maximum width of any physical
> >> address used by the VM. The IPA_Bits is encoded in bits[7-0] of the
> >> machine type identifier.
> >>
> >> e.g, to configure a guest to use 48bit physical address size::
> >>
> >>      vm_fd = ioctl(dev_fd, KVM_CREATE_VM, KVM_VM_TYPE_ARM_IPA_SIZE(48));
> >>
> >> The requested size (IPA_Bits) must be:
> >>
> >>   ==   =========================================================
> >>    0   Implies default size, 40bits (for backward compatibility)
> >>    N   Implies N bits, where N is a positive integer such that,
> >>        32 <= N <= Host_IPA_Limit
> >>   ==   =========================================================
> >
> >> Host_IPA_Limit is the maximum possible value for IPA_Bits on the host
> >> and is dependent on the CPU capability and the kernel configuration.
> >> The limit can be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the
> >> KVM_CHECK_EXTENSION ioctl() at run-time.
> >>
> >> Creation of the VM will fail if the requested IPA size (whether it is
> >> implicit or explicit) is unsupported on the host.
> > https://docs.kernel.org/virt/kvm/api.html#kvm-create-vm
> >
> > So if Host_IPA_Limit < 40, such KVM_CREATE_VM will fail, and libvirt
> > incorrectly thinks KVM is not available. This actually happened on M2
> > MacBook Air.
> >
> > Fix this by specifying 32 for IPA_Bits as any arm64 system should
> > support the value according to the documentation.
> >
> > Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> > ---
> > V1 -> V2: Introduced an arch hook
> >
> >   include/sysemu/kvm.h   | 1 +
> >   accel/kvm/kvm-all.c    | 2 +-
> >   target/arm/kvm.c       | 2 ++
> >   target/i386/kvm/kvm.c  | 2 ++
> >   target/mips/kvm.c      | 2 ++
> >   target/ppc/kvm.c       | 2 ++
> >   target/riscv/kvm.c     | 2 ++
> >   target/s390x/kvm/kvm.c | 2 ++
> >   8 files changed, 14 insertions(+), 1 deletion(-)
>
> My understanding of Peter's suggestion would be smth like:
>
> -- >8 --
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index 115f0cca79..c0af15eb6c 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -201,10 +201,15 @@ typedef struct KVMCapabilityInfo {
>
>   struct KVMState;
>
> +struct KVMClass {
> +    AccelClass parent_class;
> +
> +    int default_vm_type;

The kernel docs say you need to check for the
KVM_CAP_ARM_VM_IPA_SIZE before you can pass something other
than zero to the KVM_CREATE_VM ioctl, so this needs to be
a method, not just a value. (kvm_arm_get_max_vm_ipa_size()
will do this bit for you.)

If the machine doesn't provide a kvm_type method, we
should default to "largest the host supports", I think.

I was wondering if we could have one per-arch
method for "actually create the VM" that both was
a place for arm to set the default vm type and
also let us get the TARGET_S390X and TARGET_PPC
ifdefs out of this bit of kvm-all.c, but maybe that would
look just a bit too awkward:

     if (kc->create_vm(s, board_sets_kvm_type, board_kvm_type) < 0) {
         goto err;
     }

where board_sets_kvm_type is a bool, true if board_kvm_type
is valid, and board_kvm_type is whatever the board's
mc->kvm_type method told us.

(Default impl of the method: call KVM_CREATE_VM ioctl
with retry-on-eintr, printing the simple error message;
PPC and s390 versions similar but with their arch
specific extra messages; arm version has a different
default type if board_sets_kvm_type is false.)

Not trying to do both of those things with one method
would result in a simpler
   type = kc->get_default_kvm_type(s);
API.

thanks
-- PMM
diff mbox series

Patch

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 115f0cca79..7cc2eb1a8c 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -347,6 +347,7 @@  bool kvm_device_supported(int vmfd, uint64_t type);
 
 /* Arch specific hooks */
 
+extern const int kvm_arch_default_type;
 extern const KVMCapabilityInfo kvm_arch_required_capabilities[];
 
 void kvm_arch_accel_class_init(ObjectClass *oc);
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 373d876c05..914ade3ec3 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2458,7 +2458,7 @@  static int kvm_init(MachineState *ms)
     KVMState *s;
     const KVMCapabilityInfo *missing_cap;
     int ret;
-    int type = 0;
+    int type = kvm_arch_default_type;
     uint64_t dirty_log_manual_caps;
 
     qemu_mutex_init(&kml_slots_lock);
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index b4c7654f49..f8203b9915 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -32,6 +32,8 @@ 
 #include "hw/irq.h"
 #include "qemu/log.h"
 
+const int kvm_arch_default_type = 32;
+
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
 };
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index ebfaf3d24c..00bccffdfc 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -87,6 +87,8 @@ 
 
 static void kvm_init_msrs(X86CPU *cpu);
 
+const int kvm_arch_default_type;
+
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_INFO(SET_TSS_ADDR),
     KVM_CAP_INFO(EXT_CPUID),
diff --git a/target/mips/kvm.c b/target/mips/kvm.c
index c14e8f550f..754366874e 100644
--- a/target/mips/kvm.c
+++ b/target/mips/kvm.c
@@ -33,6 +33,8 @@ 
 static int kvm_mips_fpu_cap;
 static int kvm_mips_msa_cap;
 
+const int kvm_arch_default_type;
+
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
 };
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index a8a935e267..86f8b645a5 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -54,6 +54,8 @@ 
 #define DEBUG_RETURN_GUEST 0
 #define DEBUG_RETURN_GDB   1
 
+const int kvm_arch_default_type;
+
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
 };
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 9d8a8982f9..fbe56ec3bb 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -764,6 +764,8 @@  void kvm_riscv_init_user_properties(Object *cpu_obj)
     kvm_riscv_destroy_scratch_vcpu(&kvmcpu);
 }
 
+const int kvm_arch_default_type;
+
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
 };
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index a9e5880349..4b9347ffc8 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -144,6 +144,8 @@  static CPUWatchpoint hw_watchpoint;
 static struct kvm_hw_breakpoint *hw_breakpoints;
 static int nb_hw_breakpoints;
 
+const int kvm_arch_default_type;
+
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
 };