diff mbox series

[V2,01/10] accel/kvm: Extract common KVM vCPU {creation, parking} code

Message ID 20230930001933.2660-2-salil.mehta@huawei.com (mailing list archive)
State New, archived
Headers show
Series Add architecture agnostic code to support vCPU Hotplug | expand

Commit Message

Salil Mehta Sept. 30, 2023, 12:19 a.m. UTC
KVM vCPU creation is done once during the initialization of the VM when Qemu
threads are spawned. This is common to all the architectures.

Hot-unplug of vCPU results in destruction of the vCPU objects in QOM but
the KVM vCPU objects in the Host KVM are not destroyed and their representative
KVM vCPU objects/context in Qemu are parked.

Refactor common logic so that some APIs could be reused by vCPU Hotplug code.

Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
---
 accel/kvm/kvm-all.c  | 63 +++++++++++++++++++++++++++++++++-----------
 include/sysemu/kvm.h | 14 ++++++++++
 2 files changed, 61 insertions(+), 16 deletions(-)

Comments

Jonathan Cameron Oct. 2, 2023, 3:53 p.m. UTC | #1
On Sat, 30 Sep 2023 01:19:24 +0100
Salil Mehta <salil.mehta@huawei.com> wrote:

> KVM vCPU creation is done once during the initialization of the VM when Qemu
> threads are spawned. This is common to all the architectures.
> 
> Hot-unplug of vCPU results in destruction of the vCPU objects in QOM but
> the KVM vCPU objects in the Host KVM are not destroyed and their representative
> KVM vCPU objects/context in Qemu are parked.
> 
> Refactor common logic so that some APIs could be reused by vCPU Hotplug code.
> 
> Signed-off-by: Salil Mehta <salil.mehta@huawei.com>

Hi Salil,

A few trivial things inline, plus a question about why 
cpu->cpu_index can now be used but kvm_arch_vcpu_id(cpu);
was previously needed.

Thanks,

Jonathan

> ---
>  accel/kvm/kvm-all.c  | 63 +++++++++++++++++++++++++++++++++-----------
>  include/sysemu/kvm.h | 14 ++++++++++
>  2 files changed, 61 insertions(+), 16 deletions(-)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index ff1578bb32..b8c36ba50a 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -80,7 +80,7 @@
>  #endif
>  
>  struct KVMParkedVcpu {
> -    unsigned long vcpu_id;
> +    int vcpu_id;
>      int kvm_fd;
>      QLIST_ENTRY(KVMParkedVcpu) node;
>  };
> @@ -137,6 +137,7 @@ static QemuMutex kml_slots_lock;
>  #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>  
>  static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
> +static int kvm_get_vcpu(KVMState *s, int vcpu_id);
>  
>  static inline void kvm_resample_fd_remove(int gsi)
>  {
> @@ -320,11 +321,49 @@ err:
>      return ret;
>  }
>  
> +void kvm_park_vcpu(CPUState *cpu)
> +{
> +    int vcpu_id = cpu->cpu_index;
> +    struct KVMParkedVcpu *vcpu;
> +
> +    vcpu = g_malloc0(sizeof(*vcpu));
> +    vcpu->vcpu_id = vcpu_id;

As vcpu_id is only used here why have the local variable?
Maybe that changes in later patches, in which case ignore this.

    vcpu->vcpu_id = cpu->cpu_index;

Why is kvm_arch_vcpu_id() not necessary here any more but was
before?

> +    vcpu->kvm_fd = cpu->kvm_fd;
> +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> +}
> +
> +int kvm_create_vcpu(CPUState *cpu)
> +{
> +    int vcpu_id = cpu->cpu_index;

See below. I'm not sure why it's safe not to use kvm_arch_vcpu_id()
Seems a few architectures have less than trivial implementations of
that function currently.


> +    KVMState *s = kvm_state;
> +    int kvm_fd;
> +
> +    DPRINTF("kvm_create_vcpu\n");
> +
> +    /* check if the KVM vCPU already exist but is parked */
> +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
> +    if (kvm_fd < 0) {
> +        /* vCPU not parked: create a new KVM vCPU */
> +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
> +        if (kvm_fd < 0) {
> +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %d", vcpu_id);
> +            return kvm_fd;
> +        }
> +    }
> +
> +    cpu->vcpu_dirty = true;
> +    cpu->kvm_fd = kvm_fd;
> +    cpu->kvm_state = s;
> +    cpu->dirty_pages = 0;
> +    cpu->throttle_us_per_full = 0;

Trivial but I would have maintained the order wrt to the code removed
below just to avoid a reviewer having to check the two bits of code
do the same thing after the reorder.

> +
> +    return 0;
> +}
> +
>  static int do_kvm_destroy_vcpu(CPUState *cpu)
>  {
>      KVMState *s = kvm_state;
>      long mmap_size;
> -    struct KVMParkedVcpu *vcpu = NULL;
>      int ret = 0;
>  
>      DPRINTF("kvm_destroy_vcpu\n");
> @@ -353,10 +392,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
>          }
>      }
>  
> -    vcpu = g_malloc0(sizeof(*vcpu));
> -    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
> -    vcpu->kvm_fd = cpu->kvm_fd;
> -    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> +    kvm_park_vcpu(cpu);
>  err:
>      return ret;
>  }
> @@ -369,7 +405,7 @@ void kvm_destroy_vcpu(CPUState *cpu)
>      }
>  }
>  
> -static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
> +static int kvm_get_vcpu(KVMState *s, int vcpu_id)
>  {
>      struct KVMParkedVcpu *cpu;
>  
> @@ -384,7 +420,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
>          }
>      }
>  
> -    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
> +    return -1;
>  }
>  
>  int kvm_init_vcpu(CPUState *cpu, Error **errp)
> @@ -395,19 +431,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
>  
>      trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  
> -    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
> +    ret = kvm_create_vcpu(cpu);

The switch from kvm_arch_vcpu_id(cpu) to using 
int vcpu_id = cpu->cpu_index;

Seems like a functional change on some arch.
>      if (ret < 0) {
> -        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
> +        error_setg_errno(errp, -ret,
> +                         "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",

The rewrap of the lines above seems like an unrelated change.

>                           kvm_arch_vcpu_id(cpu));
>          goto err;
>      }
>  
> -    cpu->kvm_fd = ret;
> -    cpu->kvm_state = s;
> -    cpu->vcpu_dirty = true;
> -    cpu->dirty_pages = 0;
> -    cpu->throttle_us_per_full = 0;
> -
>      mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
>      if (mmap_size < 0) {
>          ret = mmap_size;
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index ee9025f8e9..785f3ed083 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -464,6 +464,20 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
>  
>  int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
>                                         hwaddr *phys_addr);
> +/**
> + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
> + * @cpu:  QOM CPUState object for which KVM vCPU has to be created/fetched.

Extra space before QOM (same below)

> + *
> + * @returns: 0 when success, errno (<0) when failed.
> + */
> +int kvm_create_vcpu(CPUState *cpu);

Blank line here perhaps.

> +/**
> + * kvm_park_vcpu - Gets a parked KVM vCPU if it exists
> + * @cpu:  QOM CPUState object for which parked KVM vCPU has to be fetched.

We aren't returning anything, so why fetch?

> + *
> + * @returns: kvm_fd (>0) when success, -1 when failed.
> + */
> +void kvm_park_vcpu(CPUState *cpu);
>  
>  #endif /* NEED_CPU_H */
>
Gavin Shan Oct. 2, 2023, 11:17 p.m. UTC | #2
On 9/30/23 10:19, Salil Mehta wrote:
> KVM vCPU creation is done once during the initialization of the VM when Qemu
> threads are spawned. This is common to all the architectures.
   ^^^^^^^^^^^^^^^^^^^
   thread is spawned.

> 
> Hot-unplug of vCPU results in destruction of the vCPU objects in QOM but
> the KVM vCPU objects in the Host KVM are not destroyed and their representative
> KVM vCPU objects/context in Qemu are parked.
> 
> Refactor common logic so that some APIs could be reused by vCPU Hotplug code.
> 
> Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> ---
>   accel/kvm/kvm-all.c  | 63 +++++++++++++++++++++++++++++++++-----------
>   include/sysemu/kvm.h | 14 ++++++++++
>   2 files changed, 61 insertions(+), 16 deletions(-)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index ff1578bb32..b8c36ba50a 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -80,7 +80,7 @@
>   #endif
>   
>   struct KVMParkedVcpu {
> -    unsigned long vcpu_id;
> +    int vcpu_id;

@vcpu_id represents the vCPU index (CPUState::cpu_index) instead of the
architectural CPU ID any more. However, I don't understand how it works
for x86, and more comments regarding it can be seen below.

>       int kvm_fd;
>       QLIST_ENTRY(KVMParkedVcpu) node;
>   };
> @@ -137,6 +137,7 @@ static QemuMutex kml_slots_lock;
>   #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>   
>   static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
> +static int kvm_get_vcpu(KVMState *s, int vcpu_id);
>   
>   static inline void kvm_resample_fd_remove(int gsi)
>   {
> @@ -320,11 +321,49 @@ err:
>       return ret;
>   }
>   
> +void kvm_park_vcpu(CPUState *cpu)
> +{
> +    int vcpu_id = cpu->cpu_index;
> +    struct KVMParkedVcpu *vcpu;
> +
> +    vcpu = g_malloc0(sizeof(*vcpu));
> +    vcpu->vcpu_id = vcpu_id;
> +    vcpu->kvm_fd = cpu->kvm_fd;
> +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> +}
> +

@vcpu_id can be dropped as suggested previously.

        vcpu->vcpu_id = cpu->cpu_index;

> +int kvm_create_vcpu(CPUState *cpu)
> +{
> +    int vcpu_id = cpu->cpu_index;
> +    KVMState *s = kvm_state;
> +    int kvm_fd;
> +
> +    DPRINTF("kvm_create_vcpu\n");
> +
> +    /* check if the KVM vCPU already exist but is parked */
> +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
> +    if (kvm_fd < 0) {
> +        /* vCPU not parked: create a new KVM vCPU */
> +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
> +        if (kvm_fd < 0) {
> +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %d", vcpu_id);
> +            return kvm_fd;
> +        }
> +    }
> +
> +    cpu->vcpu_dirty = true;
> +    cpu->kvm_fd = kvm_fd;
> +    cpu->kvm_state = s;
> +    cpu->dirty_pages = 0;
> +    cpu->throttle_us_per_full = 0;
> +
> +    return 0;
> +}
> +

The comments here can be dropped since the code is self-explaining.

@vcpu_id represents vCPU index, instead of the architecrual vCPU ID any more.
@vcpu_id is passed to host through ioctl(KVM_CREATE_VCPU), which is expected
as an architecrual vCPU ID instead of a vCPU index by host. It's indicated
by 'struct kvm_vcpu' as below.

struct kvm_vcpu {
	:
	int vcpu_id;  /* id given by userspace at creation */
         int vcpu_idx; /* index into kvm->vcpu_array */
};

Function kvm_arch_vcpu_id() converts the vCPU instance or vCPU index to
the architecrual vCPU ID. All architectures except x86 simply returns
vCPU index (CPUState::cpu_index) as the architecrural vCPU ID. x86 returns
the APIC ID. Treating them equally seems to break x86.

>   static int do_kvm_destroy_vcpu(CPUState *cpu)
>   {
>       KVMState *s = kvm_state;
>       long mmap_size;
> -    struct KVMParkedVcpu *vcpu = NULL;
>       int ret = 0;
>   
>       DPRINTF("kvm_destroy_vcpu\n");
> @@ -353,10 +392,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
>           }
>       }
>   
> -    vcpu = g_malloc0(sizeof(*vcpu));
> -    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
> -    vcpu->kvm_fd = cpu->kvm_fd;
> -    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> +    kvm_park_vcpu(cpu);
>   err:
>       return ret;
>   }
> @@ -369,7 +405,7 @@ void kvm_destroy_vcpu(CPUState *cpu)
>       }
>   }
>   
> -static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
> +static int kvm_get_vcpu(KVMState *s, int vcpu_id)
>   {
>       struct KVMParkedVcpu *cpu;
>   
> @@ -384,7 +420,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
>           }
>       }
>   
> -    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
> +    return -1;
>   }
>   

Why we have -1 here. -ENOENT seems more descriptive?

>   int kvm_init_vcpu(CPUState *cpu, Error **errp)
> @@ -395,19 +431,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
>   
>       trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>   
> -    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
> +    ret = kvm_create_vcpu(cpu);
>       if (ret < 0) {
> -        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
> +        error_setg_errno(errp, -ret,
> +                         "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
>                            kvm_arch_vcpu_id(cpu));
>           goto err;
>       }
>   
> -    cpu->kvm_fd = ret;
> -    cpu->kvm_state = s;
> -    cpu->vcpu_dirty = true;
> -    cpu->dirty_pages = 0;
> -    cpu->throttle_us_per_full = 0;
> -
>       mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
>       if (mmap_size < 0) {
>           ret = mmap_size;
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index ee9025f8e9..785f3ed083 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -464,6 +464,20 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
>   
>   int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
>                                          hwaddr *phys_addr);
> +/**
> + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
> + * @cpu:  QOM CPUState object for which KVM vCPU has to be created/fetched.
> + *
> + * @returns: 0 when success, errno (<0) when failed.
> + */
> +int kvm_create_vcpu(CPUState *cpu);
> +/**
> + * kvm_park_vcpu - Gets a parked KVM vCPU if it exists
> + * @cpu:  QOM CPUState object for which parked KVM vCPU has to be fetched.
> + *
> + * @returns: kvm_fd (>0) when success, -1 when failed.
> + */
> +void kvm_park_vcpu(CPUState *cpu);
>   
>   #endif /* NEED_CPU_H */
>   

Thanks,
Gavin
Salil Mehta Oct. 3, 2023, 11:05 a.m. UTC | #3
Hi Jonathan,

> From: Jonathan Cameron <jonathan.cameron@huawei.com>
> Sent: Monday, October 2, 2023 4:53 PM
> To: Salil Mehta <salil.mehta@huawei.com>
> Cc: qemu-devel@nongnu.org; qemu-arm@nongnu.org; maz@kernel.org; jean-
> philippe@linaro.org; lpieralisi@kernel.org; peter.maydell@linaro.org;
> richard.henderson@linaro.org; imammedo@redhat.com; andrew.jones@linux.dev;
> david@redhat.com; philmd@linaro.org; eric.auger@redhat.com;
> oliver.upton@linux.dev; pbonzini@redhat.com; mst@redhat.com;
> will@kernel.org; gshan@redhat.com; rafael@kernel.org;
> alex.bennee@linaro.org; linux@armlinux.org.uk;
> darren@os.amperecomputing.com; ilkka@os.amperecomputing.com;
> vishnu@os.amperecomputing.com; karl.heubaum@oracle.com;
> miguel.luis@oracle.com; salil.mehta@opnsrc.net; zhukeqian
> <zhukeqian1@huawei.com>; wangxiongfeng (C) <wangxiongfeng2@huawei.com>;
> wangyanan (Y) <wangyanan55@huawei.com>; jiakernel2@gmail.com;
> maobibo@loongson.cn; lixianglai@loongson.cn; Linuxarm <linuxarm@huawei.com>
> Subject: Re: [PATCH V2 01/10] accel/kvm: Extract common KVM vCPU
> {creation,parking} code
> 
> On Sat, 30 Sep 2023 01:19:24 +0100
> Salil Mehta <salil.mehta@huawei.com> wrote:
> 
> > KVM vCPU creation is done once during the initialization of the VM when Qemu
> > threads are spawned. This is common to all the architectures.
> >
> > Hot-unplug of vCPU results in destruction of the vCPU objects in QOM but
> > the KVM vCPU objects in the Host KVM are not destroyed and their representative
> > KVM vCPU objects/context in Qemu are parked.
> >
> > Refactor common logic so that some APIs could be reused by vCPU Hotplug code.
> >
> > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> 
> Hi Salil,
> 
> A few trivial things inline, plus a question about why
> cpu->cpu_index can now be used but kvm_arch_vcpu_id(cpu);
> was previously needed.

Good point. I used the API because it was returning
'unsigned long' and it was being used across the archs.
I thought maybe the size of the index could vary across
archs. For example, for PowerPC above API returns vcpu_id
which presumably could have different data type than
an 'integer'.

But after Alex's comment, I was made to believe that this
assumption might not be correct and CPU index is an
'integer' across archs and perhaps semantics of above
API is not correct.

But perhaps original code was functionally correct?


> >  accel/kvm/kvm-all.c  | 63 +++++++++++++++++++++++++++++++++-----------
> >  include/sysemu/kvm.h | 14 ++++++++++
> >  2 files changed, 61 insertions(+), 16 deletions(-)
> >
> > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> > index ff1578bb32..b8c36ba50a 100644
> > --- a/accel/kvm/kvm-all.c
> > +++ b/accel/kvm/kvm-all.c
> > @@ -80,7 +80,7 @@
> >  #endif
> >
> >  struct KVMParkedVcpu {
> > -    unsigned long vcpu_id;
> > +    int vcpu_id;
> >      int kvm_fd;
> >      QLIST_ENTRY(KVMParkedVcpu) node;
> >  };
> > @@ -137,6 +137,7 @@ static QemuMutex kml_slots_lock;
> >  #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
> >
> >  static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
> > +static int kvm_get_vcpu(KVMState *s, int vcpu_id);
> >
> >  static inline void kvm_resample_fd_remove(int gsi)
> >  {
> > @@ -320,11 +321,49 @@ err:
> >      return ret;
> >  }
> >
> > +void kvm_park_vcpu(CPUState *cpu)
> > +{
> > +    int vcpu_id = cpu->cpu_index;
> > +    struct KVMParkedVcpu *vcpu;
> > +
> > +    vcpu = g_malloc0(sizeof(*vcpu));
> > +    vcpu->vcpu_id = vcpu_id;
> 
> As vcpu_id is only used here why have the local variable?
> Maybe that changes in later patches, in which case ignore this.
> 
>     vcpu->vcpu_id = cpu->cpu_index;


Yes, thanks.


> 
> Why is kvm_arch_vcpu_id() not necessary here any more but was
> before?


Because I have now changed the type of vcpu_id from 'unsigned long'
to an 'integer'.

> 
> > +    vcpu->kvm_fd = cpu->kvm_fd;
> > +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> > +}
> > +
> > +int kvm_create_vcpu(CPUState *cpu)
> > +{
> > +    int vcpu_id = cpu->cpu_index;
> 
> See below. I'm not sure why it's safe not to use kvm_arch_vcpu_id()
> Seems a few architectures have less than trivial implementations of
> that function currently.

I doubt this as well. Other architectures like PowerPC are returning
different type?


> > +    KVMState *s = kvm_state;
> > +    int kvm_fd;
> > +
> > +    DPRINTF("kvm_create_vcpu\n");
> > +
> > +    /* check if the KVM vCPU already exist but is parked */
> > +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
> > +    if (kvm_fd < 0) {
> > +        /* vCPU not parked: create a new KVM vCPU */
> > +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
> > +        if (kvm_fd < 0) {
> > +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %d", vcpu_id);
> > +            return kvm_fd;
> > +        }
> > +    }
> > +
> > +    cpu->vcpu_dirty = true;
> > +    cpu->kvm_fd = kvm_fd;
> > +    cpu->kvm_state = s;
> > +    cpu->dirty_pages = 0;
> > +    cpu->throttle_us_per_full = 0;
> 
> Trivial but I would have maintained the order wrt to the code removed
> below just to avoid a reviewer having to check the two bits of code
> do the same thing after the reorder.


I can do that. No problem.


[...]

> >  int kvm_init_vcpu(CPUState *cpu, Error **errp)
> > @@ -395,19 +431,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
> >
> >      trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
> >
> > -    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
> > +    ret = kvm_create_vcpu(cpu);
> 
> The switch from kvm_arch_vcpu_id(cpu) to using
> int vcpu_id = cpu->cpu_index;
> 
> Seems like a functional change on some arch.


Yes, but then we need to revert to original change inside the
new kvm_create_vcpu() API.


> >      if (ret < 0) {
> > -        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed
> (%lu)",
> > +        error_setg_errno(errp, -ret,
> > +                         "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
> 
> The rewrap of the lines above seems like an unrelated change.

Function has changed from kvm_get_vcpu to kvm_create_vcpu

[...]

> > diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> > index ee9025f8e9..785f3ed083 100644
> > --- a/include/sysemu/kvm.h
> > +++ b/include/sysemu/kvm.h
> > @@ -464,6 +464,20 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int
> sigmask_len);
> >
> >  int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
> >                                         hwaddr *phys_addr);
> > +/**
> > + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
> > + * @cpu:  QOM CPUState object for which KVM vCPU has to be
> created/fetched.
> 
> Extra space before QOM (same below)

:)

> 
> > + *
> > + * @returns: 0 when success, errno (<0) when failed.
> > + */
> > +int kvm_create_vcpu(CPUState *cpu);
> 
> Blank line here perhaps.

Ok.


> 
> > +/**
> > + * kvm_park_vcpu - Gets a parked KVM vCPU if it exists
> > + * @cpu:  QOM CPUState object for which parked KVM vCPU has to be
> fetched.
> 
> We aren't returning anything, so why fetch?

copy-paste comment error, I think. Thanks!

cheers
Salil.
Salil Mehta Oct. 3, 2023, 11:22 a.m. UTC | #4
Hi Gavin,

> From: Gavin Shan <gshan@redhat.com>
> Sent: Tuesday, October 3, 2023 12:18 AM
> To: Salil Mehta <salil.mehta@huawei.com>; qemu-devel@nongnu.org; qemu-
> arm@nongnu.org
> Cc: maz@kernel.org; jean-philippe@linaro.org; Jonathan Cameron
> <jonathan.cameron@huawei.com>; lpieralisi@kernel.org;
> peter.maydell@linaro.org; richard.henderson@linaro.org;
> imammedo@redhat.com; andrew.jones@linux.dev; david@redhat.com;
> philmd@linaro.org; eric.auger@redhat.com; oliver.upton@linux.dev;
> pbonzini@redhat.com; mst@redhat.com; will@kernel.org; rafael@kernel.org;
> alex.bennee@linaro.org; linux@armlinux.org.uk;
> darren@os.amperecomputing.com; ilkka@os.amperecomputing.com;
> vishnu@os.amperecomputing.com; karl.heubaum@oracle.com;
> miguel.luis@oracle.com; salil.mehta@opnsrc.net; zhukeqian
> <zhukeqian1@huawei.com>; wangxiongfeng (C) <wangxiongfeng2@huawei.com>;
> wangyanan (Y) <wangyanan55@huawei.com>; jiakernel2@gmail.com;
> maobibo@loongson.cn; lixianglai@loongson.cn; Linuxarm <linuxarm@huawei.com>
> Subject: Re: [PATCH V2 01/10] accel/kvm: Extract common KVM vCPU
> {creation,parking} code
> 
> On 9/30/23 10:19, Salil Mehta wrote:
> > KVM vCPU creation is done once during the initialization of the VM when Qemu
> > threads are spawned. This is common to all the architectures.
>    ^^^^^^^^^^^^^^^^^^^
>    thread is spawned.

Yes, will fix.

Thanks
Salil.


> > Hot-unplug of vCPU results in destruction of the vCPU objects in QOM but
> > the KVM vCPU objects in the Host KVM are not destroyed and their
> representative
> > KVM vCPU objects/context in Qemu are parked.
> >
> > Refactor common logic so that some APIs could be reused by vCPU Hotplug code.
> >
> > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> > ---
> >   accel/kvm/kvm-all.c  | 63 +++++++++++++++++++++++++++++++++-----------
> >   include/sysemu/kvm.h | 14 ++++++++++
> >   2 files changed, 61 insertions(+), 16 deletions(-)
> >
> > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> > index ff1578bb32..b8c36ba50a 100644
> > --- a/accel/kvm/kvm-all.c
> > +++ b/accel/kvm/kvm-all.c
> > @@ -80,7 +80,7 @@
> >   #endif
> >
> >   struct KVMParkedVcpu {
> > -    unsigned long vcpu_id;
> > +    int vcpu_id;
> 
> @vcpu_id represents the vCPU index (CPUState::cpu_index) instead of the
> architectural CPU ID any more. However, I don't understand how it works
> for x86, and more comments regarding it can be seen below.

I missed the reason why I had used 'unsigned long' everywhere in the
first change. You can check the PowerPC, it returns vcpu_id which might
not be an 'integer'. Hence, this change could actually create a problem.


> >       int kvm_fd;
> >       QLIST_ENTRY(KVMParkedVcpu) node;
> >   };
> > @@ -137,6 +137,7 @@ static QemuMutex kml_slots_lock;
> >   #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
> >
> >   static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
> > +static int kvm_get_vcpu(KVMState *s, int vcpu_id);
> >
> >   static inline void kvm_resample_fd_remove(int gsi)
> >   {
> > @@ -320,11 +321,49 @@ err:
> >       return ret;
> >   }
> >
> > +void kvm_park_vcpu(CPUState *cpu)
> > +{
> > +    int vcpu_id = cpu->cpu_index;
> > +    struct KVMParkedVcpu *vcpu;
> > +
> > +    vcpu = g_malloc0(sizeof(*vcpu));
> > +    vcpu->vcpu_id = vcpu_id;
> > +    vcpu->kvm_fd = cpu->kvm_fd;
> > +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> > +}
> > +
> 
> @vcpu_id can be dropped as suggested previously.
> 
>         vcpu->vcpu_id = cpu->cpu_index;


Yes, agreed.

Thanks
Salil.


> > +int kvm_create_vcpu(CPUState *cpu)
> > +{
> > +    int vcpu_id = cpu->cpu_index;
> > +    KVMState *s = kvm_state;
> > +    int kvm_fd;
> > +
> > +    DPRINTF("kvm_create_vcpu\n");
> > +
> > +    /* check if the KVM vCPU already exist but is parked */
> > +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
> > +    if (kvm_fd < 0) {
> > +        /* vCPU not parked: create a new KVM vCPU */
> > +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
> > +        if (kvm_fd < 0) {
> > +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %d", vcpu_id);
> > +            return kvm_fd;
> > +        }
> > +    }
> > +
> > +    cpu->vcpu_dirty = true;
> > +    cpu->kvm_fd = kvm_fd;
> > +    cpu->kvm_state = s;
> > +    cpu->dirty_pages = 0;
> > +    cpu->throttle_us_per_full = 0;
> > +
> > +    return 0;
> > +}
> > +
> 
> The comments here can be dropped since the code is self-explaining.
> 
> @vcpu_id represents vCPU index, instead of the architecrual vCPU ID any
> more.
> @vcpu_id is passed to host through ioctl(KVM_CREATE_VCPU), which is
> expected
> as an architecrual vCPU ID instead of a vCPU index by host. It's indicated
> by 'struct kvm_vcpu' as below.


That should not be part of this change. I think.


> struct kvm_vcpu {
> 	:
> 	int vcpu_id;  /* id given by userspace at creation */
>          int vcpu_idx; /* index into kvm->vcpu_array */
> };
> 
> Function kvm_arch_vcpu_id() converts the vCPU instance or vCPU index to
> the architecrual vCPU ID. All architectures except x86 simply returns
> vCPU index (CPUState::cpu_index) as the architecrural vCPU ID. x86 returns
> the APIC ID. Treating them equally seems to break x86.

I think PowerPC returns a different Id as well.


[...]

> > -static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
> > +static int kvm_get_vcpu(KVMState *s, int vcpu_id)
> >   {
> >       struct KVMParkedVcpu *cpu;
> >
> > @@ -384,7 +420,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long
> vcpu_id)
> >           }
> >       }
> >
> > -    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
> > +    return -1;
> >   }
> >
> 
> Why we have -1 here. -ENOENT seems more descriptive?

Agreed.

Thanks
Salil.
Jonathan Cameron Oct. 3, 2023, 11:51 a.m. UTC | #5
On Tue, 3 Oct 2023 12:05:11 +0100
Salil Mehta <salil.mehta@huawei.com> wrote:

> Hi Jonathan,
> 
> > From: Jonathan Cameron <jonathan.cameron@huawei.com>
> > Sent: Monday, October 2, 2023 4:53 PM
> > To: Salil Mehta <salil.mehta@huawei.com>
> > Cc: qemu-devel@nongnu.org; qemu-arm@nongnu.org; maz@kernel.org; jean-
> > philippe@linaro.org; lpieralisi@kernel.org; peter.maydell@linaro.org;
> > richard.henderson@linaro.org; imammedo@redhat.com; andrew.jones@linux.dev;
> > david@redhat.com; philmd@linaro.org; eric.auger@redhat.com;
> > oliver.upton@linux.dev; pbonzini@redhat.com; mst@redhat.com;
> > will@kernel.org; gshan@redhat.com; rafael@kernel.org;
> > alex.bennee@linaro.org; linux@armlinux.org.uk;
> > darren@os.amperecomputing.com; ilkka@os.amperecomputing.com;
> > vishnu@os.amperecomputing.com; karl.heubaum@oracle.com;
> > miguel.luis@oracle.com; salil.mehta@opnsrc.net; zhukeqian
> > <zhukeqian1@huawei.com>; wangxiongfeng (C) <wangxiongfeng2@huawei.com>;
> > wangyanan (Y) <wangyanan55@huawei.com>; jiakernel2@gmail.com;
> > maobibo@loongson.cn; lixianglai@loongson.cn; Linuxarm <linuxarm@huawei.com>
> > Subject: Re: [PATCH V2 01/10] accel/kvm: Extract common KVM vCPU
> > {creation,parking} code
> > 
> > On Sat, 30 Sep 2023 01:19:24 +0100
> > Salil Mehta <salil.mehta@huawei.com> wrote:
> >   
> > > KVM vCPU creation is done once during the initialization of the VM when Qemu
> > > threads are spawned. This is common to all the architectures.
> > >
> > > Hot-unplug of vCPU results in destruction of the vCPU objects in QOM but
> > > the KVM vCPU objects in the Host KVM are not destroyed and their representative
> > > KVM vCPU objects/context in Qemu are parked.
> > >
> > > Refactor common logic so that some APIs could be reused by vCPU Hotplug code.
> > >
> > > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>  
> > 
> > Hi Salil,
> > 
> > A few trivial things inline, plus a question about why
> > cpu->cpu_index can now be used but kvm_arch_vcpu_id(cpu);
> > was previously needed.  
> 
> Good point. I used the API because it was returning
> 'unsigned long' and it was being used across the archs.
> I thought maybe the size of the index could vary across
> archs. For example, for PowerPC above API returns vcpu_id
> which presumably could have different data type than
> an 'integer'.
> 
> But after Alex's comment, I was made to believe that this
> assumption might not be correct and CPU index is an
> 'integer' across archs and perhaps semantics of above
> API is not correct.
> 
> But perhaps original code was functionally correct?

I wasn't concerned with the type, but rather that the
value comes from other places than cpu->cpu_index
on some architectures.
> 
> 
> > >  accel/kvm/kvm-all.c  | 63 +++++++++++++++++++++++++++++++++-----------
> > >  include/sysemu/kvm.h | 14 ++++++++++
> > >  2 files changed, 61 insertions(+), 16 deletions(-)
> > >
> > > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> > > index ff1578bb32..b8c36ba50a 100644
> > > --- a/accel/kvm/kvm-all.c
> > > +++ b/accel/kvm/kvm-all.c
> > > @@ -80,7 +80,7 @@
> > >  #endif
> > >
> > >  struct KVMParkedVcpu {
> > > -    unsigned long vcpu_id;
> > > +    int vcpu_id;
> > >      int kvm_fd;
> > >      QLIST_ENTRY(KVMParkedVcpu) node;
> > >  };
> > > @@ -137,6 +137,7 @@ static QemuMutex kml_slots_lock;
> > >  #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
> > >
> > >  static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
> > > +static int kvm_get_vcpu(KVMState *s, int vcpu_id);
> > >
> > >  static inline void kvm_resample_fd_remove(int gsi)
> > >  {
> > > @@ -320,11 +321,49 @@ err:
> > >      return ret;
> > >  }
> > >
> > > +void kvm_park_vcpu(CPUState *cpu)
> > > +{
> > > +    int vcpu_id = cpu->cpu_index;
> > > +    struct KVMParkedVcpu *vcpu;
> > > +
> > > +    vcpu = g_malloc0(sizeof(*vcpu));
> > > +    vcpu->vcpu_id = vcpu_id;  
> > 
> > As vcpu_id is only used here why have the local variable?
> > Maybe that changes in later patches, in which case ignore this.
> > 
> >     vcpu->vcpu_id = cpu->cpu_index;  
> 
> 
> Yes, thanks.
> 
> 
> > 
> > Why is kvm_arch_vcpu_id() not necessary here any more but was
> > before?  
> 
> 
> Because I have now changed the type of vcpu_id from 'unsigned long'
> to an 'integer'.
> 
> >   
> > > +    vcpu->kvm_fd = cpu->kvm_fd;
> > > +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> > > +}
> > > +
> > > +int kvm_create_vcpu(CPUState *cpu)
> > > +{
> > > +    int vcpu_id = cpu->cpu_index;  
> > 
> > See below. I'm not sure why it's safe not to use kvm_arch_vcpu_id()
> > Seems a few architectures have less than trivial implementations of
> > that function currently.  
> 
> I doubt this as well. Other architectures like PowerPC are returning
> different type?
> 
It wasn't the type that bothered, me but rather that the source of
the data isn't always cpu->cpu_index so I have no idea if the values
are consistent.

> 
> 
> > >      if (ret < 0) {
> > > -        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed  
> > (%lu)",  
> > > +        error_setg_errno(errp, -ret,
> > > +                         "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",  
> > 
> > The rewrap of the lines above seems like an unrelated change.  
> 
> Function has changed from kvm_get_vcpu to kvm_create_vcpu
> 
ah. Eyes jumped over that :)
Salil Mehta Oct. 3, 2023, 12:27 p.m. UTC | #6
> From: Jonathan Cameron <jonathan.cameron@huawei.com>
> Sent: Tuesday, October 3, 2023 12:51 PM
> To: Salil Mehta <salil.mehta@huawei.com>
> Cc: qemu-devel@nongnu.org; qemu-arm@nongnu.org; maz@kernel.org; jean-
> philippe@linaro.org; lpieralisi@kernel.org; peter.maydell@linaro.org;
> richard.henderson@linaro.org; imammedo@redhat.com; andrew.jones@linux.dev;
> david@redhat.com; philmd@linaro.org; eric.auger@redhat.com;
> oliver.upton@linux.dev; pbonzini@redhat.com; mst@redhat.com;
> will@kernel.org; gshan@redhat.com; rafael@kernel.org;
> alex.bennee@linaro.org; linux@armlinux.org.uk;
> darren@os.amperecomputing.com; ilkka@os.amperecomputing.com;
> vishnu@os.amperecomputing.com; karl.heubaum@oracle.com;
> miguel.luis@oracle.com; salil.mehta@opnsrc.net; zhukeqian
> <zhukeqian1@huawei.com>; wangxiongfeng (C) <wangxiongfeng2@huawei.com>;
> wangyanan (Y) <wangyanan55@huawei.com>; jiakernel2@gmail.com;
> maobibo@loongson.cn; lixianglai@loongson.cn; Linuxarm <linuxarm@huawei.com>
> Subject: Re: [PATCH V2 01/10] accel/kvm: Extract common KVM vCPU
> {creation,parking} code
> 
> On Tue, 3 Oct 2023 12:05:11 +0100
> Salil Mehta <salil.mehta@huawei.com> wrote:
> 
> > Hi Jonathan,
> >
> > > From: Jonathan Cameron <jonathan.cameron@huawei.com>
> > > Sent: Monday, October 2, 2023 4:53 PM
> > > To: Salil Mehta <salil.mehta@huawei.com>
> > > Cc: qemu-devel@nongnu.org; qemu-arm@nongnu.org; maz@kernel.org; jean-
> > > philippe@linaro.org; lpieralisi@kernel.org; peter.maydell@linaro.org;
> > > richard.henderson@linaro.org; imammedo@redhat.com; andrew.jones@linux.dev;
> > > david@redhat.com; philmd@linaro.org; eric.auger@redhat.com;
> > > oliver.upton@linux.dev; pbonzini@redhat.com; mst@redhat.com;
> > > will@kernel.org; gshan@redhat.com; rafael@kernel.org;
> > > alex.bennee@linaro.org; linux@armlinux.org.uk;
> > > darren@os.amperecomputing.com; ilkka@os.amperecomputing.com;
> > > vishnu@os.amperecomputing.com; karl.heubaum@oracle.com;
> > > miguel.luis@oracle.com; salil.mehta@opnsrc.net; zhukeqian
> > > <zhukeqian1@huawei.com>; wangxiongfeng (C) <wangxiongfeng2@huawei.com>;
> > > wangyanan (Y) <wangyanan55@huawei.com>; jiakernel2@gmail.com;
> > > maobibo@loongson.cn; lixianglai@loongson.cn; Linuxarm <linuxarm@huawei.com>
> > > Subject: Re: [PATCH V2 01/10] accel/kvm: Extract common KVM vCPU
> > > {creation,parking} code
> > >
> > > On Sat, 30 Sep 2023 01:19:24 +0100
> > > Salil Mehta <salil.mehta@huawei.com> wrote:
> > >
> > > > KVM vCPU creation is done once during the initialization of the VM when Qemu
> > > > threads are spawned. This is common to all the architectures.
> > > >
> > > > Hot-unplug of vCPU results in destruction of the vCPU objects in QOM but
> > > > the KVM vCPU objects in the Host KVM are not destroyed and their representative
> > > > KVM vCPU objects/context in Qemu are parked.
> > > >
> > > > Refactor common logic so that some APIs could be reused by vCPU Hotplug code.
> > > >
> > > > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> > >
> > > Hi Salil,
> > >
> > > A few trivial things inline, plus a question about why
> > > cpu->cpu_index can now be used but kvm_arch_vcpu_id(cpu);
> > > was previously needed.
> >
> > Good point. I used the API because it was returning
> > 'unsigned long' and it was being used across the archs.
> > I thought maybe the size of the index could vary across
> > archs. For example, for PowerPC above API returns vcpu_id
> > which presumably could have different data type than
> > an 'integer'.
> >
> > But after Alex's comment, I was made to believe that this
> > assumption might not be correct and CPU index is an
> > 'integer' across archs and perhaps semantics of above
> > API is not correct.
> >
> > But perhaps original code was functionally correct?
> 
> I wasn't concerned with the type, but rather that the
> value comes from other places than cpu->cpu_index
> on some architectures.

Sure, I meant there is a reason why type was chosen as 'unsigned long'
and not an 'integer'. Perhaps the value can exceed the 'integer' size
limits because of the way CPU index is being created on certain archs?

If we try to put value from a larger container 'unsigned long' to a
smaller container 'integer' things can go wrong.


[...]

> > >
> > > > +    vcpu->kvm_fd = cpu->kvm_fd;
> > > > +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> > > > +}
> > > > +
> > > > +int kvm_create_vcpu(CPUState *cpu)
> > > > +{
> > > > +    int vcpu_id = cpu->cpu_index;
> > >
> > > See below. I'm not sure why it's safe not to use kvm_arch_vcpu_id()
> > > Seems a few architectures have less than trivial implementations of
> > > that function currently.
> >
> > I doubt this as well. Other architectures like PowerPC are returning
> > different type?
> >
> It wasn't the type that bothered, me but rather that the source of
> the data isn't always cpu->cpu_index so I have no idea if the values
> are consistent.

Got it.

I meant 'unsigned long' return type in the kvm_arch_vcpu_id(). On some
Architectures, the required container size for their vcpu-id could
exceed an 'integer'. Sorry, for not making it clear.

Thanks
Salil.
diff mbox series

Patch

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index ff1578bb32..b8c36ba50a 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -80,7 +80,7 @@ 
 #endif
 
 struct KVMParkedVcpu {
-    unsigned long vcpu_id;
+    int vcpu_id;
     int kvm_fd;
     QLIST_ENTRY(KVMParkedVcpu) node;
 };
@@ -137,6 +137,7 @@  static QemuMutex kml_slots_lock;
 #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
 
 static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
+static int kvm_get_vcpu(KVMState *s, int vcpu_id);
 
 static inline void kvm_resample_fd_remove(int gsi)
 {
@@ -320,11 +321,49 @@  err:
     return ret;
 }
 
+void kvm_park_vcpu(CPUState *cpu)
+{
+    int vcpu_id = cpu->cpu_index;
+    struct KVMParkedVcpu *vcpu;
+
+    vcpu = g_malloc0(sizeof(*vcpu));
+    vcpu->vcpu_id = vcpu_id;
+    vcpu->kvm_fd = cpu->kvm_fd;
+    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
+}
+
+int kvm_create_vcpu(CPUState *cpu)
+{
+    int vcpu_id = cpu->cpu_index;
+    KVMState *s = kvm_state;
+    int kvm_fd;
+
+    DPRINTF("kvm_create_vcpu\n");
+
+    /* check if the KVM vCPU already exist but is parked */
+    kvm_fd = kvm_get_vcpu(s, vcpu_id);
+    if (kvm_fd < 0) {
+        /* vCPU not parked: create a new KVM vCPU */
+        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
+        if (kvm_fd < 0) {
+            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %d", vcpu_id);
+            return kvm_fd;
+        }
+    }
+
+    cpu->vcpu_dirty = true;
+    cpu->kvm_fd = kvm_fd;
+    cpu->kvm_state = s;
+    cpu->dirty_pages = 0;
+    cpu->throttle_us_per_full = 0;
+
+    return 0;
+}
+
 static int do_kvm_destroy_vcpu(CPUState *cpu)
 {
     KVMState *s = kvm_state;
     long mmap_size;
-    struct KVMParkedVcpu *vcpu = NULL;
     int ret = 0;
 
     DPRINTF("kvm_destroy_vcpu\n");
@@ -353,10 +392,7 @@  static int do_kvm_destroy_vcpu(CPUState *cpu)
         }
     }
 
-    vcpu = g_malloc0(sizeof(*vcpu));
-    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
-    vcpu->kvm_fd = cpu->kvm_fd;
-    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
+    kvm_park_vcpu(cpu);
 err:
     return ret;
 }
@@ -369,7 +405,7 @@  void kvm_destroy_vcpu(CPUState *cpu)
     }
 }
 
-static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
+static int kvm_get_vcpu(KVMState *s, int vcpu_id)
 {
     struct KVMParkedVcpu *cpu;
 
@@ -384,7 +420,7 @@  static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
         }
     }
 
-    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
+    return -1;
 }
 
 int kvm_init_vcpu(CPUState *cpu, Error **errp)
@@ -395,19 +431,14 @@  int kvm_init_vcpu(CPUState *cpu, Error **errp)
 
     trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
 
-    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
+    ret = kvm_create_vcpu(cpu);
     if (ret < 0) {
-        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
+        error_setg_errno(errp, -ret,
+                         "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
                          kvm_arch_vcpu_id(cpu));
         goto err;
     }
 
-    cpu->kvm_fd = ret;
-    cpu->kvm_state = s;
-    cpu->vcpu_dirty = true;
-    cpu->dirty_pages = 0;
-    cpu->throttle_us_per_full = 0;
-
     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
     if (mmap_size < 0) {
         ret = mmap_size;
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index ee9025f8e9..785f3ed083 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -464,6 +464,20 @@  void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
 
 int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
                                        hwaddr *phys_addr);
+/**
+ * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
+ * @cpu:  QOM CPUState object for which KVM vCPU has to be created/fetched.
+ *
+ * @returns: 0 when success, errno (<0) when failed.
+ */
+int kvm_create_vcpu(CPUState *cpu);
+/**
+ * kvm_park_vcpu - Gets a parked KVM vCPU if it exists
+ * @cpu:  QOM CPUState object for which parked KVM vCPU has to be fetched.
+ *
+ * @returns: kvm_fd (>0) when success, -1 when failed.
+ */
+void kvm_park_vcpu(CPUState *cpu);
 
 #endif /* NEED_CPU_H */