diff mbox series

[V8,1/8] accel/kvm: Extract common KVM vCPU {creation, parking} code

Message ID 20240312020000.12992-2-salil.mehta@huawei.com (mailing list archive)
State New, archived
Headers show
Series Add architecture agnostic code to support vCPU Hotplug | expand

Commit Message

Salil Mehta March 12, 2024, 1:59 a.m. UTC
KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread
is spawned. This is common to all the architectures as of now.

Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the
corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't
support vCPU removal. Therefore, its representative KVM vCPU object/context in
Qemu is parked.

Refactor architecture common logic so that some APIs could be reused by vCPU
Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs
with trace events instead of DPRINTF. No functional change is intended here.

Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Tested-by: Xianglai Li <lixianglai@loongson.cn>
Tested-by: Miguel Luis <miguel.luis@oracle.com>
Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
---
 accel/kvm/kvm-all.c    | 64 ++++++++++++++++++++++++++++++++----------
 accel/kvm/trace-events |  5 +++-
 include/sysemu/kvm.h   | 16 +++++++++++
 3 files changed, 69 insertions(+), 16 deletions(-)

Comments

Harsh Prateek Bora March 22, 2024, 8:15 a.m. UTC | #1
+ Vaibhav, Shiva

Hi Salil,

I came across your patch while trying to solve a related problem on 
spapr. One query below ..

On 3/12/24 07:29, Salil Mehta via wrote:
> KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread
> is spawned. This is common to all the architectures as of now.
> 
> Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the
> corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't
> support vCPU removal. Therefore, its representative KVM vCPU object/context in
> Qemu is parked.
> 
> Refactor architecture common logic so that some APIs could be reused by vCPU
> Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs
> with trace events instead of DPRINTF. No functional change is intended here.
> 
> Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> Reviewed-by: Gavin Shan <gshan@redhat.com>
> Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Tested-by: Xianglai Li <lixianglai@loongson.cn>
> Tested-by: Miguel Luis <miguel.luis@oracle.com>
> Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
> ---
>   accel/kvm/kvm-all.c    | 64 ++++++++++++++++++++++++++++++++----------
>   accel/kvm/trace-events |  5 +++-
>   include/sysemu/kvm.h   | 16 +++++++++++
>   3 files changed, 69 insertions(+), 16 deletions(-)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index a8cecd040e..3bc3207bda 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock;
>   #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>   
>   static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
> +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
>   
>   static inline void kvm_resample_fd_remove(int gsi)
>   {
> @@ -314,14 +315,53 @@ err:
>       return ret;
>   }
>   
> +void kvm_park_vcpu(CPUState *cpu)
> +{
> +    struct KVMParkedVcpu *vcpu;
> +
> +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
> +
> +    vcpu = g_malloc0(sizeof(*vcpu));
> +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
> +    vcpu->kvm_fd = cpu->kvm_fd;
> +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> +}
> +
> +int kvm_create_vcpu(CPUState *cpu)
> +{
> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
> +    KVMState *s = kvm_state;
> +    int kvm_fd;
> +
> +    trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
> +
> +    /* check if the KVM vCPU already exist but is parked */
> +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
> +    if (kvm_fd < 0) {
> +        /* vCPU not parked: create a new KVM vCPU */
> +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
> +        if (kvm_fd < 0) {
> +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id);
> +            return kvm_fd;
> +        }
> +    }
> +
> +    cpu->kvm_fd = kvm_fd;
> +    cpu->kvm_state = s;
> +    cpu->vcpu_dirty = true;
> +    cpu->dirty_pages = 0;
> +    cpu->throttle_us_per_full = 0;
> +
> +    return 0;
> +}
> +
>   static int do_kvm_destroy_vcpu(CPUState *cpu)
>   {
>       KVMState *s = kvm_state;
>       long mmap_size;
> -    struct KVMParkedVcpu *vcpu = NULL;
>       int ret = 0;
>   
> -    trace_kvm_destroy_vcpu();
> +    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>   
>       ret = kvm_arch_destroy_vcpu(cpu);
>       if (ret < 0) {
> @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
>           }
>       }
>   
> -    vcpu = g_malloc0(sizeof(*vcpu));
> -    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
> -    vcpu->kvm_fd = cpu->kvm_fd;
> -    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> +    kvm_park_vcpu(cpu);
>   err:
>       return ret;
>   }
> @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
>           if (cpu->vcpu_id == vcpu_id) {
>               int kvm_fd;
>   
> +            trace_kvm_get_vcpu(vcpu_id);
> +
>               QLIST_REMOVE(cpu, node);
>               kvm_fd = cpu->kvm_fd;
>               g_free(cpu);
> @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
>           }
>       }
>   
> -    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
> +    return -ENOENT;
>   }
>   
>   int kvm_init_vcpu(CPUState *cpu, Error **errp)
> @@ -389,19 +428,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
>   
>       trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>   
> -    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
> +    ret = kvm_create_vcpu(cpu);
>       if (ret < 0) {
> -        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
> +        error_setg_errno(errp, -ret,
> +                         "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
>                            kvm_arch_vcpu_id(cpu));

If a vcpu hotplug fails due to failure with kvm_create_vcpu ioctl,
current behaviour would be to bring down the guest as errp is
&error_fatal. Any thoughts on how do we ensure that a failure with
kvm_create_vcpu ioctl for hotplugged cpus (only) doesnt bring down the
guest and fail gracefully (by reporting error to user on monitor?)?

regards,
Harsh
>           goto err;
>       }
>   
> -    cpu->kvm_fd = ret;
> -    cpu->kvm_state = s;
> -    cpu->vcpu_dirty = true;
> -    cpu->dirty_pages = 0;
> -    cpu->throttle_us_per_full = 0;
> -
>       mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
>       if (mmap_size < 0) {
>           ret = mmap_size;
> diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
> index a25902597b..5558cff0dc 100644
> --- a/accel/kvm/trace-events
> +++ b/accel/kvm/trace-events
> @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
>   kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
>   kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
>   kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
> +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
> +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu"
> +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
> +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
>   kvm_irqchip_commit_routes(void) ""
>   kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
>   kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
> @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s"
>   kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
>   kvm_dirty_ring_reaper_kick(const char *reason) "%s"
>   kvm_dirty_ring_flush(int finished) "%d"
> -kvm_destroy_vcpu(void) ""
>   kvm_failed_get_vcpu_mmap_size(void) ""
>   kvm_cpu_exec(void) ""
>   kvm_interrupt_exit_request(void) ""
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index fad9a7e8ff..2ed928aa71 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
>   int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
>                                          hwaddr *phys_addr);
>   
> +/**
> + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
> + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
> + *
> + * @returns: 0 when success, errno (<0) when failed.
> + */
> +int kvm_create_vcpu(CPUState *cpu);
> +
> +/**
> + * kvm_park_vcpu - Park QEMU KVM vCPU context
> + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked.
> + *
> + * @returns: none
> + */
> +void kvm_park_vcpu(CPUState *cpu);
> +
>   #endif /* NEED_CPU_H */
>   
>   void kvm_cpu_synchronize_state(CPUState *cpu);
Vishnu Pajjuri April 4, 2024, 1:59 p.m. UTC | #2
Hi Salil,

On 12-03-2024 07:29, Salil Mehta wrote:
> KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread
> is spawned. This is common to all the architectures as of now.
>
> Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the
> corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't
> support vCPU removal. Therefore, its representative KVM vCPU object/context in
> Qemu is parked.
>
> Refactor architecture common logic so that some APIs could be reused by vCPU
> Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs
> with trace events instead of DPRINTF. No functional change is intended here.
>
> Signed-off-by: Salil Mehta<salil.mehta@huawei.com>
> Reviewed-by: Gavin Shan<gshan@redhat.com>
> Tested-by: Vishnu Pajjuri<vishnu@os.amperecomputing.com>
> Reviewed-by: Jonathan Cameron<Jonathan.Cameron@huawei.com>
> Tested-by: Xianglai Li<lixianglai@loongson.cn>
> Tested-by: Miguel Luis<miguel.luis@oracle.com>
> Reviewed-by: Shaoqin Huang<shahuang@redhat.com>
> ---
>   accel/kvm/kvm-all.c    | 64 ++++++++++++++++++++++++++++++++----------
>   accel/kvm/trace-events |  5 +++-
>   include/sysemu/kvm.h   | 16 +++++++++++
>   3 files changed, 69 insertions(+), 16 deletions(-)
>
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index a8cecd040e..3bc3207bda 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock;
>   #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>   
>   static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
> +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
>   
>   static inline void kvm_resample_fd_remove(int gsi)
>   {
> @@ -314,14 +315,53 @@ err:
>       return ret;
>   }
>   
> +void kvm_park_vcpu(CPUState *cpu)
> +{
> +    struct KVMParkedVcpu *vcpu;
> +
> +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
It's good if we add kvm_fd to trace.
It will be useful to cross verify kvm_get_vcpu()'s kvm_fd with parked vcpu.
> +
> +    vcpu = g_malloc0(sizeof(*vcpu));
> +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
> +    vcpu->kvm_fd = cpu->kvm_fd;
> +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> +}
> +
> +int kvm_create_vcpu(CPUState *cpu)
> +{
> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
> +    KVMState *s = kvm_state;
> +    int kvm_fd;
> +
> +    trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
vcpu_id can be used instead of kvm_arch_vcpu_id(cpu).
> +
> +    /* check if the KVM vCPU already exist but is parked */
> +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
> +    if (kvm_fd < 0) {
> +        /* vCPU not parked: create a new KVM vCPU */
> +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
> +        if (kvm_fd < 0) {
> +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id);
> +            return kvm_fd;
> +        }
> +    }
> +
> +    cpu->kvm_fd = kvm_fd;
> +    cpu->kvm_state = s;
> +    cpu->vcpu_dirty = true;
> +    cpu->dirty_pages = 0;
> +    cpu->throttle_us_per_full = 0;
> +
> +    return 0;
> +}
> +
>   static int do_kvm_destroy_vcpu(CPUState *cpu)
>   {
>       KVMState *s = kvm_state;
>       long mmap_size;
> -    struct KVMParkedVcpu *vcpu = NULL;
>       int ret = 0;
>   
> -    trace_kvm_destroy_vcpu();
> +    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>   
>       ret = kvm_arch_destroy_vcpu(cpu);
>       if (ret < 0) {
> @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
>           }
>       }
>   
> -    vcpu = g_malloc0(sizeof(*vcpu));
> -    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
> -    vcpu->kvm_fd = cpu->kvm_fd;
> -    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> +    kvm_park_vcpu(cpu);
>   err:
>       return ret;
>   }
> @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
>           if (cpu->vcpu_id == vcpu_id) {
>               int kvm_fd;
>   
> +            trace_kvm_get_vcpu(vcpu_id);
It's good if we add kvm_fd to trace.
It will be useful to cross verify kvm_get_vcpu's kvm_fd with parked vcpu.
> +
>               QLIST_REMOVE(cpu, node);
>               kvm_fd = cpu->kvm_fd;
>               g_free(cpu);
> @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
>           }
>       }
>   
> -    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
> +    return -ENOENT;
>   }
>   
>   int kvm_init_vcpu(CPUState *cpu, Error **errp)
> @@ -389,19 +428,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
>   
>       trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>   
> -    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
> +    ret = kvm_create_vcpu(cpu);
>       if (ret < 0) {
> -        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
> +        error_setg_errno(errp, -ret,
> +                         "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
>                            kvm_arch_vcpu_id(cpu));
>           goto err;
>       }
>   
> -    cpu->kvm_fd = ret;
> -    cpu->kvm_state = s;
> -    cpu->vcpu_dirty = true;
> -    cpu->dirty_pages = 0;
> -    cpu->throttle_us_per_full = 0;
> -
>       mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
>       if (mmap_size < 0) {
>           ret = mmap_size;
> diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
> index a25902597b..5558cff0dc 100644
> --- a/accel/kvm/trace-events
> +++ b/accel/kvm/trace-events
> @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
>   kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
>   kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
>   kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
> +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
> +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu"
> +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
> +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
>   kvm_irqchip_commit_routes(void) ""
>   kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
>   kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
> @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s"
>   kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
>   kvm_dirty_ring_reaper_kick(const char *reason) "%s"
>   kvm_dirty_ring_flush(int finished) "%d"
> -kvm_destroy_vcpu(void) ""
>   kvm_failed_get_vcpu_mmap_size(void) ""
>   kvm_cpu_exec(void) ""
>   kvm_interrupt_exit_request(void) ""
> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
> index fad9a7e8ff..2ed928aa71 100644
> --- a/include/sysemu/kvm.h
> +++ b/include/sysemu/kvm.h
> @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
>   int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
>                                          hwaddr *phys_addr);
>   
> +/**
> + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
> + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
> + *
> + * @returns: 0 when success, errno (<0) when failed.
> + */
> +int kvm_create_vcpu(CPUState *cpu);
> +
> +/**
> + * kvm_park_vcpu - Park QEMU KVM vCPU context
> + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked.
> + *
> + * @returns: none
> + */
> +void kvm_park_vcpu(CPUState *cpu);
> +
>   #endif /* NEED_CPU_H */
>   
>   void kvm_cpu_synchronize_state(CPUState *cpu);

Otherwise, Looks good to me.  Feel free to add
Reviewed-by: "Vishnu Pajjuri" <vishnu@os.amperecomputing.com>

_Thanks_,

-Vishnu
Harsh Prateek Bora April 23, 2024, 6:44 a.m. UTC | #3
+ Nick

Hi Salil,
I have posted a patch [1] for ppc which based on this refactoring patch.
I see there were some comments from Vishnu on this patch.
Are we expecting any further updates on this patch before merge?

Thanks
Harsh

[1] 
https://lore.kernel.org/qemu-devel/a0f9b2fc-4c8a-4c37-bc36-26bbaa627fec@linux.ibm.com/T/#u

On 3/22/24 13:45, Harsh Prateek Bora wrote:
> + Vaibhav, Shiva
> 
> Hi Salil,
> 
> I came across your patch while trying to solve a related problem on 
> spapr. One query below ..
> 
> On 3/12/24 07:29, Salil Mehta via wrote:
>> KVM vCPU creation is done once during the vCPU realization when Qemu 
>> vCPU thread
>> is spawned. This is common to all the architectures as of now.
>>
>> Hot-unplug of vCPU results in destruction of the vCPU object in QOM 
>> but the
>> corresponding KVM vCPU object in the Host KVM is not destroyed as KVM 
>> doesn't
>> support vCPU removal. Therefore, its representative KVM vCPU 
>> object/context in
>> Qemu is parked.
>>
>> Refactor architecture common logic so that some APIs could be reused 
>> by vCPU
>> Hotplug code of some architectures likes ARM, Loongson etc. Update 
>> new/old APIs
>> with trace events instead of DPRINTF. No functional change is intended 
>> here.
>>
>> Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>> Reviewed-by: Gavin Shan <gshan@redhat.com>
>> Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> Tested-by: Xianglai Li <lixianglai@loongson.cn>
>> Tested-by: Miguel Luis <miguel.luis@oracle.com>
>> Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
>> ---
>>   accel/kvm/kvm-all.c    | 64 ++++++++++++++++++++++++++++++++----------
>>   accel/kvm/trace-events |  5 +++-
>>   include/sysemu/kvm.h   | 16 +++++++++++
>>   3 files changed, 69 insertions(+), 16 deletions(-)
>>
>> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
>> index a8cecd040e..3bc3207bda 100644
>> --- a/accel/kvm/kvm-all.c
>> +++ b/accel/kvm/kvm-all.c
>> @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock;
>>   #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>>   static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
>> +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
>>   static inline void kvm_resample_fd_remove(int gsi)
>>   {
>> @@ -314,14 +315,53 @@ err:
>>       return ret;
>>   }
>> +void kvm_park_vcpu(CPUState *cpu)
>> +{
>> +    struct KVMParkedVcpu *vcpu;
>> +
>> +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>> +
>> +    vcpu = g_malloc0(sizeof(*vcpu));
>> +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>> +    vcpu->kvm_fd = cpu->kvm_fd;
>> +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
>> +}
>> +
>> +int kvm_create_vcpu(CPUState *cpu)
>> +{
>> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
>> +    KVMState *s = kvm_state;
>> +    int kvm_fd;
>> +
>> +    trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>> +
>> +    /* check if the KVM vCPU already exist but is parked */
>> +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
>> +    if (kvm_fd < 0) {
>> +        /* vCPU not parked: create a new KVM vCPU */
>> +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
>> +        if (kvm_fd < 0) {
>> +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", 
>> vcpu_id);
>> +            return kvm_fd;
>> +        }
>> +    }
>> +
>> +    cpu->kvm_fd = kvm_fd;
>> +    cpu->kvm_state = s;
>> +    cpu->vcpu_dirty = true;
>> +    cpu->dirty_pages = 0;
>> +    cpu->throttle_us_per_full = 0;
>> +
>> +    return 0;
>> +}
>> +
>>   static int do_kvm_destroy_vcpu(CPUState *cpu)
>>   {
>>       KVMState *s = kvm_state;
>>       long mmap_size;
>> -    struct KVMParkedVcpu *vcpu = NULL;
>>       int ret = 0;
>> -    trace_kvm_destroy_vcpu();
>> +    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>>       ret = kvm_arch_destroy_vcpu(cpu);
>>       if (ret < 0) {
>> @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
>>           }
>>       }
>> -    vcpu = g_malloc0(sizeof(*vcpu));
>> -    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>> -    vcpu->kvm_fd = cpu->kvm_fd;
>> -    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
>> +    kvm_park_vcpu(cpu);
>>   err:
>>       return ret;
>>   }
>> @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned long 
>> vcpu_id)
>>           if (cpu->vcpu_id == vcpu_id) {
>>               int kvm_fd;
>> +            trace_kvm_get_vcpu(vcpu_id);
>> +
>>               QLIST_REMOVE(cpu, node);
>>               kvm_fd = cpu->kvm_fd;
>>               g_free(cpu);
>> @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long 
>> vcpu_id)
>>           }
>>       }
>> -    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
>> +    return -ENOENT;
>>   }
>>   int kvm_init_vcpu(CPUState *cpu, Error **errp)
>> @@ -389,19 +428,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
>>       trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>> -    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
>> +    ret = kvm_create_vcpu(cpu);
>>       if (ret < 0) {
>> -        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu 
>> failed (%lu)",
>> +        error_setg_errno(errp, -ret,
>> +                         "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
>>                            kvm_arch_vcpu_id(cpu));
> 
> If a vcpu hotplug fails due to failure with kvm_create_vcpu ioctl,
> current behaviour would be to bring down the guest as errp is
> &error_fatal. Any thoughts on how do we ensure that a failure with
> kvm_create_vcpu ioctl for hotplugged cpus (only) doesnt bring down the
> guest and fail gracefully (by reporting error to user on monitor?)?
> 
> regards,
> Harsh
>>           goto err;
>>       }
>> -    cpu->kvm_fd = ret;
>> -    cpu->kvm_state = s;
>> -    cpu->vcpu_dirty = true;
>> -    cpu->dirty_pages = 0;
>> -    cpu->throttle_us_per_full = 0;
>> -
>>       mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
>>       if (mmap_size < 0) {
>>           ret = mmap_size;
>> diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
>> index a25902597b..5558cff0dc 100644
>> --- a/accel/kvm/trace-events
>> +++ b/accel/kvm/trace-events
>> @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd 
>> %d, type 0x%x, arg %p"
>>   kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to 
>> retrieve ONEREG %" PRIu64 " from KVM: %s"
>>   kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to 
>> set ONEREG %" PRIu64 " to KVM: %s"
>>   kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d 
>> id: %lu"
>> +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d 
>> id: %lu"
>> +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu"
>> +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d 
>> id: %lu"
>> +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d 
>> id: %lu"
>>   kvm_irqchip_commit_routes(void) ""
>>   kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s 
>> vector %d virq %d"
>>   kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
>> @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s"
>>   kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" 
>> pages (took %"PRIi64" us)"
>>   kvm_dirty_ring_reaper_kick(const char *reason) "%s"
>>   kvm_dirty_ring_flush(int finished) "%d"
>> -kvm_destroy_vcpu(void) ""
>>   kvm_failed_get_vcpu_mmap_size(void) ""
>>   kvm_cpu_exec(void) ""
>>   kvm_interrupt_exit_request(void) ""
>> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
>> index fad9a7e8ff..2ed928aa71 100644
>> --- a/include/sysemu/kvm.h
>> +++ b/include/sysemu/kvm.h
>> @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s, unsigned 
>> int sigmask_len);
>>   int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
>>                                          hwaddr *phys_addr);
>> +/**
>> + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
>> + * @cpu: QOM CPUState object for which KVM vCPU has to be 
>> fetched/created.
>> + *
>> + * @returns: 0 when success, errno (<0) when failed.
>> + */
>> +int kvm_create_vcpu(CPUState *cpu);
>> +
>> +/**
>> + * kvm_park_vcpu - Park QEMU KVM vCPU context
>> + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to 
>> be parked.
>> + *
>> + * @returns: none
>> + */
>> +void kvm_park_vcpu(CPUState *cpu);
>> +
>>   #endif /* NEED_CPU_H */
>>   void kvm_cpu_synchronize_state(CPUState *cpu);
Philippe Mathieu-Daudé May 3, 2024, 9:40 a.m. UTC | #4
Hi Salil,

On 12/3/24 02:59, Salil Mehta wrote:
> KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread
> is spawned. This is common to all the architectures as of now.
> 
> Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the
> corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't
> support vCPU removal. Therefore, its representative KVM vCPU object/context in
> Qemu is parked.
> 
> Refactor architecture common logic so that some APIs could be reused by vCPU
> Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs
> with trace events instead of DPRINTF. No functional change is intended here.
> 
> Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
> Reviewed-by: Gavin Shan <gshan@redhat.com>
> Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Tested-by: Xianglai Li <lixianglai@loongson.cn>
> Tested-by: Miguel Luis <miguel.luis@oracle.com>
> Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
> ---
>   accel/kvm/kvm-all.c    | 64 ++++++++++++++++++++++++++++++++----------
>   accel/kvm/trace-events |  5 +++-
>   include/sysemu/kvm.h   | 16 +++++++++++
>   3 files changed, 69 insertions(+), 16 deletions(-)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index a8cecd040e..3bc3207bda 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock;
>   #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>   
>   static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
> +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
>   
>   static inline void kvm_resample_fd_remove(int gsi)
>   {
> @@ -314,14 +315,53 @@ err:
>       return ret;
>   }
>   
> +void kvm_park_vcpu(CPUState *cpu)
> +{
> +    struct KVMParkedVcpu *vcpu;
> +
> +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
> +
> +    vcpu = g_malloc0(sizeof(*vcpu));
> +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
> +    vcpu->kvm_fd = cpu->kvm_fd;
> +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
> +}
> +
> +int kvm_create_vcpu(CPUState *cpu)
> +{
> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
> +    KVMState *s = kvm_state;
> +    int kvm_fd;
> +
> +    trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
> +
> +    /* check if the KVM vCPU already exist but is parked */
> +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
> +    if (kvm_fd < 0) {
> +        /* vCPU not parked: create a new KVM vCPU */
> +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
> +        if (kvm_fd < 0) {
> +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id);
> +            return kvm_fd;
> +        }
> +    }
> +
> +    cpu->kvm_fd = kvm_fd;
> +    cpu->kvm_state = s;
> +    cpu->vcpu_dirty = true;
> +    cpu->dirty_pages = 0;
> +    cpu->throttle_us_per_full = 0;
> +
> +    return 0;
> +}

This seems generic enough to be implemented for all accelerators.

See AccelOpsClass in include/sysemu/accel-ops.h.

That said, can be done later on top.
Salil Mehta May 3, 2024, 3:57 p.m. UTC | #5
Hi Philippe,

>  From: Philippe Mathieu-Daudé <philmd@linaro.org>
>  Sent: Friday, May 3, 2024 10:40 AM
>  Subject: Re: [PATCH V8 1/8] accel/kvm: Extract common KVM vCPU
>  {creation,parking} code
>  
>  Hi Salil,
>  
>  On 12/3/24 02:59, Salil Mehta wrote:
>  > KVM vCPU creation is done once during the vCPU realization when Qemu
>  > vCPU thread is spawned. This is common to all the architectures as of now.
>  >
>  > Hot-unplug of vCPU results in destruction of the vCPU object in QOM
>  > but the corresponding KVM vCPU object in the Host KVM is not destroyed
>  > as KVM doesn't support vCPU removal. Therefore, its representative KVM
>  > vCPU object/context in Qemu is parked.
>  >
>  > Refactor architecture common logic so that some APIs could be reused
>  > by vCPU Hotplug code of some architectures likes ARM, Loongson etc.
>  > Update new/old APIs with trace events instead of DPRINTF. No functional
>  change is intended here.
>  >
>  > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>  > Reviewed-by: Gavin Shan <gshan@redhat.com>
>  > Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
>  > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>  > Tested-by: Xianglai Li <lixianglai@loongson.cn>
>  > Tested-by: Miguel Luis <miguel.luis@oracle.com>
>  > Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
>  > ---
>  >   accel/kvm/kvm-all.c    | 64 ++++++++++++++++++++++++++++++++------
>  ----
>  >   accel/kvm/trace-events |  5 +++-
>  >   include/sysemu/kvm.h   | 16 +++++++++++
>  >   3 files changed, 69 insertions(+), 16 deletions(-)
>  >
>  > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index
>  > a8cecd040e..3bc3207bda 100644
>  > --- a/accel/kvm/kvm-all.c
>  > +++ b/accel/kvm/kvm-all.c
>  > @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock;
>  >   #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>  >
>  >   static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
>  > +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
>  >
>  >   static inline void kvm_resample_fd_remove(int gsi)
>  >   {
>  > @@ -314,14 +315,53 @@ err:
>  >       return ret;
>  >   }
>  >
>  > +void kvm_park_vcpu(CPUState *cpu)
>  > +{
>  > +    struct KVMParkedVcpu *vcpu;
>  > +
>  > +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  > +
>  > +    vcpu = g_malloc0(sizeof(*vcpu));
>  > +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>  > +    vcpu->kvm_fd = cpu->kvm_fd;
>  > +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); }
>  > +
>  > +int kvm_create_vcpu(CPUState *cpu)
>  > +{
>  > +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
>  > +    KVMState *s = kvm_state;
>  > +    int kvm_fd;
>  > +
>  > +    trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  > +
>  > +    /* check if the KVM vCPU already exist but is parked */
>  > +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
>  > +    if (kvm_fd < 0) {
>  > +        /* vCPU not parked: create a new KVM vCPU */
>  > +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
>  > +        if (kvm_fd < 0) {
>  > +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu",  vcpu_id);
>  > +            return kvm_fd;
>  > +        }
>  > +    }
>  > +
>  > +    cpu->kvm_fd = kvm_fd;
>  > +    cpu->kvm_state = s;
>  > +    cpu->vcpu_dirty = true;
>  > +    cpu->dirty_pages = 0;
>  > +    cpu->throttle_us_per_full = 0;
>  > +
>  > +    return 0;
>  > +}
>  
>  This seems generic enough to be implemented for all accelerators.
>  
>  See AccelOpsClass in include/sysemu/accel-ops.h.
>  
>  That said, can be done later on top.

Let me understand correctly. Are you suggesting to implement above even for
HVF, TCG, QTEST etc?

Thanks
Salil.
Salil Mehta May 3, 2024, 4:23 p.m. UTC | #6
Hi Vishnu,

>  From: Vishnu Pajjuri <vishnu@amperemail.onmicrosoft.com> 
>  Sent: Thursday, April 4, 2024 3:00 PM
>  Subject: Re: [PATCH V8 1/8] accel/kvm: Extract common KVM vCPU {creation,parking} code
>  
>  Hi Salil,
>>  On 12-03-2024 07:29, Salil Mehta wrote:
>>  KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread
>>  is spawned. This is common to all the architectures as of now.
>>  
>>  Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the
>>  corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't
>>  support vCPU removal. Therefore, its representative KVM vCPU object/context in
>>  Qemu is parked.
>>  
>>  Refactor architecture common logic so that some APIs could be reused by vCPU
>>  Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs
>>  with trace events instead of DPRINTF. No functional change is intended here.
>>  
>>  Signed-off-by: Salil Mehta mailto:salil.mehta@huawei.com
>>  Reviewed-by: Gavin Shan mailto:gshan@redhat.com
>>  Tested-by: Vishnu Pajjuri mailto:vishnu@os.amperecomputing.com
>>  Reviewed-by: Jonathan Cameron mailto:Jonathan.Cameron@huawei.com
>>  Tested-by: Xianglai Li mailto:lixianglai@loongson.cn
>>  Tested-by: Miguel Luis mailto:miguel.luis@oracle.com
>>  Reviewed-by: Shaoqin Huang mailto:shahuang@redhat.com
>>  ---
>>   accel/kvm/kvm-all.c    | 64 ++++++++++++++++++++++++++++++++----------
>>   accel/kvm/trace-events |  5 +++-
>>   include/sysemu/kvm.h   | 16 +++++++++++
>>   3 files changed, 69 insertions(+), 16 deletions(-)
>>  
>>  diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
>>  index a8cecd040e..3bc3207bda 100644
>>  --- a/accel/kvm/kvm-all.c
>>  +++ b/accel/kvm/kvm-all.c
>>  @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock;
>>   #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>>   
>>   static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
>>  +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
>>   
>>   static inline void kvm_resample_fd_remove(int gsi)
>>   {
>>  @@ -314,14 +315,53 @@ err:
>>  return ret;
>>   }
>>   
>>  +void kvm_park_vcpu(CPUState *cpu)
>>  +{
>>  +    struct KVMParkedVcpu *vcpu;
>>  +
>>  +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  It's good if we add kvm_fd to trace.
>  It will be useful to cross verify kvm_get_vcpu()'s kvm_fd with parked vcpu.


Agreed. But this is currently called in context to create and destroy vCPU
where the trace already exists with the info you are seeking. Having
trace here might duplicate the info and end up increasing the noise.

Let me know if you think otherwise or have something else to add.

Thanks

 
>>  +
>>  +    vcpu = g_malloc0(sizeof(*vcpu));
>>  +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>>  +    vcpu->kvm_fd = cpu->kvm_fd;
>>  +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
>>  +}
>>  +
>>  +int kvm_create_vcpu(CPUState *cpu)
>>  +{
>>  +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
>>  +    KVMState *s = kvm_state;
>>  +    int kvm_fd;
>>  +
>>  +    trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  vcpu_id can be used instead of kvm_arch_vcpu_id(cpu).


KVM arch VCPU Id ensures that ID being traced is meaningful for that
architecture. The way CPU ID gets calculated in on different architectures
could be different. Hence, its value might be quite different.

  
>>  +
>>  +    /* check if the KVM vCPU already exist but is parked */
>>  +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
>>  +    if (kvm_fd < 0) {
>>  +>   /* vCPU not parked: create a new KVM vCPU */
>>  +>   kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
>>  +>   if (kvm_fd < 0) {
>>  +>       error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id);
>>  +>       return kvm_fd;
>>  +>   }
>>  +    }
>>  +
>>  +    cpu->kvm_fd = kvm_fd;
>>  +    cpu->kvm_state = s;
>>  +    cpu->vcpu_dirty = true;
>>  +    cpu->dirty_pages = 0;
>>  +    cpu->throttle_us_per_full = 0;
>>  +
>>  +    return 0;
>>  +}
>>  +
>>   static int do_kvm_destroy_vcpu(CPUState *cpu)
>>   {
>>       KVMState *s = kvm_state;
>>       long mmap_size;
>>  -    struct KVMParkedVcpu *vcpu = NULL;
>>       int ret = 0;
>>   
>>  -    trace_kvm_destroy_vcpu();
>>  +    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>>   
>>       ret = kvm_arch_destroy_vcpu(cpu);
>>       if (ret < 0) {
>>  @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
>>  >    }
>>       }
>>   
>>  -    vcpu = g_malloc0(sizeof(*vcpu));
>>  -    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>>  -    vcpu->kvm_fd = cpu->kvm_fd;
>>  -    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
>>  +    kvm_park_vcpu(cpu);
>>   err:
>>       return ret;
>>   }
>>  @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
>>  >    if (cpu->vcpu_id == vcpu_id) {
>>  >        int kvm_fd;
>>   
>>  +>       trace_kvm_get_vcpu(vcpu_id);
>  It's good if we add kvm_fd to trace.
>  It will be useful to cross verify kvm_get_vcpu's kvm_fd with parked vcpu.


I can but I'm wondering why you've raised this? Perhaps, I'm not aware of the
interface you are using to configure the VMs and how traces across diferent
VMs get reflected. Please help in my understanding.


>>  +
>>  >        QLIST_REMOVE(cpu, node);
>>  >        kvm_fd = cpu->kvm_fd;
>>  >        g_free(cpu);
>>  @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
>>  >    }
>>       }
>>   
>>  -    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
>>  +    return -ENOENT;
>>   }
>>   
>>   int kvm_init_vcpu(CPUState *cpu, Error **errp)
>>  @@ -389,19 +428,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
>>   
>>       trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>>   
>>  -    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
>>  +    ret = kvm_create_vcpu(cpu);
>>       if (ret < 0) {
>>  -   error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
>>  +   error_setg_errno(errp, -ret,
>>  +          "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
>             kvm_arch_vcpu_id(cpu));
>>      goto err;
>>       }
>>   
>>  -    cpu->kvm_fd = ret;
>>  -    cpu->kvm_state = s;
>>  -    cpu->vcpu_dirty = true;
>>  -    cpu->dirty_pages = 0;
>>  -    cpu->throttle_us_per_full = 0;
>>  -
>>       mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
>>       if (mmap_size < 0) {
>>      ret = mmap_size;
>>  diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
>>  index a25902597b..5558cff0dc 100644
>>  --- a/accel/kvm/trace-events
>>  +++ b/accel/kvm/trace-events
>>  @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
>>   kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
>>   kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
>>   kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
>>  +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
>>  +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu"
>>  +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
>>  +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
>>   kvm_irqchip_commit_routes(void) ""
>>   kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
>>   kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
>>  @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s"
>>   kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
>>   kvm_dirty_ring_reaper_kick(const char *reason) "%s"
>>   kvm_dirty_ring_flush(int finished) "%d"
>>  -kvm_destroy_vcpu(void) ""
>>   kvm_failed_get_vcpu_mmap_size(void) ""
>>   kvm_cpu_exec(void) ""
>>   kvm_interrupt_exit_request(void) ""
>>  diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
>>  index fad9a7e8ff..2ed928aa71 100644
>>  --- a/include/sysemu/kvm.h
>>  +++ b/include/sysemu/kvm.h
>>  @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
>>   int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
>>  >  >  >  >  >       hwaddr *phys_addr);
>>   
>>  +/**
>>  + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
>>  + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
>>  + *
>>  + * @returns: 0 when success, errno (<0) when failed.
>>  + */
>>  +int kvm_create_vcpu(CPUState *cpu);
>>  +
>>  +/**
>>  + * kvm_park_vcpu - Park QEMU KVM vCPU context
>>  + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked.
>>  + *
>>  + * @returns: none
>>  + */
>>  +void kvm_park_vcpu(CPUState *cpu);
>>  +
>>   #endif /* NEED_CPU_H */
>>   
>>   void kvm_cpu_synchronize_state(CPUState *cpu);
>  Otherwise, Looks good to me.  Feel free to add
>  Reviewed-by: "Vishnu Pajjuri" mailto:vishnu@os.amperecomputing.com
>  Thanks,

Thanks.
Salil



>  -Vishnu
Philippe Mathieu-Daudé May 3, 2024, 6:22 p.m. UTC | #7
On 3/5/24 17:57, Salil Mehta wrote:
> Hi Philippe,
> 
>>   From: Philippe Mathieu-Daudé <philmd@linaro.org>
>>   Sent: Friday, May 3, 2024 10:40 AM
>>   Subject: Re: [PATCH V8 1/8] accel/kvm: Extract common KVM vCPU
>>   {creation,parking} code
>>   
>>   Hi Salil,
>>   
>>   On 12/3/24 02:59, Salil Mehta wrote:
>>   > KVM vCPU creation is done once during the vCPU realization when Qemu
>>   > vCPU thread is spawned. This is common to all the architectures as of now.
>>   >
>>   > Hot-unplug of vCPU results in destruction of the vCPU object in QOM
>>   > but the corresponding KVM vCPU object in the Host KVM is not destroyed
>>   > as KVM doesn't support vCPU removal. Therefore, its representative KVM
>>   > vCPU object/context in Qemu is parked.
>>   >
>>   > Refactor architecture common logic so that some APIs could be reused
>>   > by vCPU Hotplug code of some architectures likes ARM, Loongson etc.
>>   > Update new/old APIs with trace events instead of DPRINTF. No functional
>>   change is intended here.
>>   >
>>   > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>>   > Reviewed-by: Gavin Shan <gshan@redhat.com>
>>   > Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
>>   > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>   > Tested-by: Xianglai Li <lixianglai@loongson.cn>
>>   > Tested-by: Miguel Luis <miguel.luis@oracle.com>
>>   > Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
>>   > ---
>>   >   accel/kvm/kvm-all.c    | 64 ++++++++++++++++++++++++++++++++------
>>   ----
>>   >   accel/kvm/trace-events |  5 +++-
>>   >   include/sysemu/kvm.h   | 16 +++++++++++
>>   >   3 files changed, 69 insertions(+), 16 deletions(-)
>>   >
>>   > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index
>>   > a8cecd040e..3bc3207bda 100644
>>   > --- a/accel/kvm/kvm-all.c
>>   > +++ b/accel/kvm/kvm-all.c
>>   > @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock;
>>   >   #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>>   >
>>   >   static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
>>   > +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
>>   >
>>   >   static inline void kvm_resample_fd_remove(int gsi)
>>   >   {
>>   > @@ -314,14 +315,53 @@ err:
>>   >       return ret;
>>   >   }
>>   >
>>   > +void kvm_park_vcpu(CPUState *cpu)
>>   > +{
>>   > +    struct KVMParkedVcpu *vcpu;
>>   > +
>>   > +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>>   > +
>>   > +    vcpu = g_malloc0(sizeof(*vcpu));
>>   > +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>>   > +    vcpu->kvm_fd = cpu->kvm_fd;
>>   > +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); }
>>   > +
>>   > +int kvm_create_vcpu(CPUState *cpu)
>>   > +{
>>   > +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
>>   > +    KVMState *s = kvm_state;
>>   > +    int kvm_fd;
>>   > +
>>   > +    trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>>   > +
>>   > +    /* check if the KVM vCPU already exist but is parked */
>>   > +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
>>   > +    if (kvm_fd < 0) {
>>   > +        /* vCPU not parked: create a new KVM vCPU */
>>   > +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
>>   > +        if (kvm_fd < 0) {
>>   > +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu",  vcpu_id);
>>   > +            return kvm_fd;
>>   > +        }
>>   > +    }
>>   > +
>>   > +    cpu->kvm_fd = kvm_fd;
>>   > +    cpu->kvm_state = s;
>>   > +    cpu->vcpu_dirty = true;
>>   > +    cpu->dirty_pages = 0;
>>   > +    cpu->throttle_us_per_full = 0;
>>   > +
>>   > +    return 0;
>>   > +}
>>   
>>   This seems generic enough to be implemented for all accelerators.
>>   
>>   See AccelOpsClass in include/sysemu/accel-ops.h.
>>   
>>   That said, can be done later on top.
> 
> Let me understand correctly. Are you suggesting to implement above even for
> HVF, TCG, QTEST etc?

Not for you to implement the other non-KVM accelerators, but since
you are introducing this, now is a good time to think about a generic
interface.

So far AccelOpsClass::[un]park_vcpu() handlers make sense to me.

> Thanks
> Salil.
> 
> 
> 
>
Salil Mehta May 3, 2024, 6:43 p.m. UTC | #8
Hi Harsh,

Sorry for the delay in my reply. I've been off the grid for some time so missed this
earlier mail. Please find my reply below to you query.

Thanks

>  From: Harsh Prateek Bora <harshpb@linux.ibm.com>
>  Sent: Friday, March 22, 2024 8:15 AM
>  
>  + Vaibhav, Shiva
>  
>  Hi Salil,
>  
>  I came across your patch while trying to solve a related problem on spapr.
>  One query below ..
>  
>  On 3/12/24 07:29, Salil Mehta via wrote:
>  > KVM vCPU creation is done once during the vCPU realization when Qemu
>  > vCPU thread is spawned. This is common to all the architectures as of now.
>  >
>  > Hot-unplug of vCPU results in destruction of the vCPU object in QOM
>  > but the corresponding KVM vCPU object in the Host KVM is not destroyed
>  > as KVM doesn't support vCPU removal. Therefore, its representative KVM
>  > vCPU object/context in Qemu is parked.
>  >
>  > Refactor architecture common logic so that some APIs could be reused
>  > by vCPU Hotplug code of some architectures likes ARM, Loongson etc.
>  > Update new/old APIs with trace events instead of DPRINTF. No functional
>  change is intended here.
>  >
>  > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>  > Reviewed-by: Gavin Shan <gshan@redhat.com>
>  > Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
>  > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>  > Tested-by: Xianglai Li <lixianglai@loongson.cn>
>  > Tested-by: Miguel Luis <miguel.luis@oracle.com>
>  > Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
>  > ---
>  >   accel/kvm/kvm-all.c    | 64 ++++++++++++++++++++++++++++++++------
>  ----
>  >   accel/kvm/trace-events |  5 +++-
>  >   include/sysemu/kvm.h   | 16 +++++++++++
>  >   3 files changed, 69 insertions(+), 16 deletions(-)
>  >
>  > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index
>  > a8cecd040e..3bc3207bda 100644
>  > --- a/accel/kvm/kvm-all.c
>  > +++ b/accel/kvm/kvm-all.c
>  > @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock;
>  >   #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>  >
>  >   static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
>  > +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
>  >
>  >   static inline void kvm_resample_fd_remove(int gsi)
>  >   {
>  > @@ -314,14 +315,53 @@ err:
>  >       return ret;
>  >   }
>  >
>  > +void kvm_park_vcpu(CPUState *cpu)
>  > +{
>  > +    struct KVMParkedVcpu *vcpu;
>  > +
>  > +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  > +
>  > +    vcpu = g_malloc0(sizeof(*vcpu));
>  > +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>  > +    vcpu->kvm_fd = cpu->kvm_fd;
>  > +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); }
>  > +
>  > +int kvm_create_vcpu(CPUState *cpu)
>  > +{
>  > +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
>  > +    KVMState *s = kvm_state;
>  > +    int kvm_fd;
>  > +
>  > +    trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  > +
>  > +    /* check if the KVM vCPU already exist but is parked */
>  > +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
>  > +    if (kvm_fd < 0) {
>  > +        /* vCPU not parked: create a new KVM vCPU */
>  > +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
>  > +        if (kvm_fd < 0) {
>  > +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu",
>  vcpu_id);
>  > +            return kvm_fd;
>  > +        }
>  > +    }
>  > +
>  > +    cpu->kvm_fd = kvm_fd;
>  > +    cpu->kvm_state = s;
>  > +    cpu->vcpu_dirty = true;
>  > +    cpu->dirty_pages = 0;
>  > +    cpu->throttle_us_per_full = 0;
>  > +
>  > +    return 0;
>  > +}
>  > +
>  >   static int do_kvm_destroy_vcpu(CPUState *cpu)
>  >   {
>  >       KVMState *s = kvm_state;
>  >       long mmap_size;
>  > -    struct KVMParkedVcpu *vcpu = NULL;
>  >       int ret = 0;
>  >
>  > -    trace_kvm_destroy_vcpu();
>  > +    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  >
>  >       ret = kvm_arch_destroy_vcpu(cpu);
>  >       if (ret < 0) {
>  > @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
>  >           }
>  >       }
>  >
>  > -    vcpu = g_malloc0(sizeof(*vcpu));
>  > -    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>  > -    vcpu->kvm_fd = cpu->kvm_fd;
>  > -    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
>  > +    kvm_park_vcpu(cpu);
>  >   err:
>  >       return ret;
>  >   }
>  > @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned
>  long vcpu_id)
>  >           if (cpu->vcpu_id == vcpu_id) {
>  >               int kvm_fd;
>  >
>  > +            trace_kvm_get_vcpu(vcpu_id);
>  > +
>  >               QLIST_REMOVE(cpu, node);
>  >               kvm_fd = cpu->kvm_fd;
>  >               g_free(cpu);
>  > @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned
>  long vcpu_id)
>  >           }
>  >       }
>  >
>  > -    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
>  > +    return -ENOENT;
>  >   }
>  >
>  >   int kvm_init_vcpu(CPUState *cpu, Error **errp) @@ -389,19 +428,14 @@
>  > int kvm_init_vcpu(CPUState *cpu, Error **errp)
>  >
>  >       trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  >
>  > -    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
>  > +    ret = kvm_create_vcpu(cpu);
>  >       if (ret < 0) {
>  > -        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed
>  (%lu)",
>  > +        error_setg_errno(errp, -ret,
>  > +                         "kvm_init_vcpu: kvm_create_vcpu failed
>  > + (%lu)",
>  >                            kvm_arch_vcpu_id(cpu));
>  
>  If a vcpu hotplug fails due to failure with kvm_create_vcpu ioctl, current
>  behaviour would be to bring down the guest as errp is &error_fatal. Any
>  thoughts on how do we ensure that a failure with kvm_create_vcpu ioctl for
>  hotplugged cpus (only) doesnt bring down the guest and fail gracefully (by
>  reporting error to user on monitor?)?

In the ARM, we are by design pre-creating all the vCPUs in the KVM during the
Qemu/KVM Init. This is to satisfy the constraints posed by ARM architecture
as we are not allowed to meddle with any initialization at KVM level or Guest
kernel level after system has booted. The constraints are mainly coming from
GIC and related per-CPU features which can only be initialized once during init
in the KVM and then their presence is made to felt to the Guest kernel only
once during enumeration of the CPUs and related GIC CPU interfaces. Later
cannot be changed either. Hence, if all of the KVM vCPUs have been created
successfully during init then hot(un)plugging operations later won't have
fatal initialization errors at the KVM as all operation get handled at QOM
level only for the hot(un)plugged vCPUs.

I feel if there is a failure to create KVM vCPU at Qemu KVM Init time then
there is something severally wrong either with the inputs or the system.
Hence, to keep the handling simple I was in favor of aborting the initialization.


But all of above is ARM arch specific. Do you have anything specific in mind
why you need graceful handling at the init time?

Thanks
Salil.

>  
>  regards,
>  Harsh
>  >           goto err;
>  >       }
>  >
Salil Mehta May 3, 2024, 6:56 p.m. UTC | #9
Hello,

Just replied to your other thread just now. Sorry catching everything late.

Thanks

>  From: Harsh Prateek Bora <harshpb@linux.ibm.com>
>  Sent: Tuesday, April 23, 2024 7:44 AM
>  
>  + Nick
>  
>  Hi Salil,
>  I have posted a patch [1] for ppc which based on this refactoring patch.
>  I see there were some comments from Vishnu on this patch.
>  Are we expecting any further updates on this patch before merge?


Yes, few of them and I'm working towards it. I've received most of the reviews
and SOBs last year itself. There are few minor comments to be addressed before
I can float V9 version of this patch-set.

I'm planning to push that for review in 2 weeks of time along with  RFC V3 of
the architecture specific code.


Thanks
Salil.


>  
>  Thanks
>  Harsh
>  
>  [1]
>  https://lore.kernel.org/qemu-devel/a0f9b2fc-4c8a-4c37-bc36-
>  26bbaa627fec@linux.ibm.com/T/#u
>  
>  On 3/22/24 13:45, Harsh Prateek Bora wrote:
>  > + Vaibhav, Shiva
>  >
>  > Hi Salil,
>  >
>  > I came across your patch while trying to solve a related problem on
>  > spapr. One query below ..
>  >
>  > On 3/12/24 07:29, Salil Mehta via wrote:
>  >> KVM vCPU creation is done once during the vCPU realization when
>  Qemu
>  >> vCPU thread is spawned. This is common to all the architectures as of
>  >> now.
>  >>
>  >> Hot-unplug of vCPU results in destruction of the vCPU object in QOM
>  >> but the corresponding KVM vCPU object in the Host KVM is not
>  >> destroyed as KVM doesn't support vCPU removal. Therefore, its
>  >> representative KVM vCPU object/context in Qemu is parked.
>  >>
>  >> Refactor architecture common logic so that some APIs could be reused
>  >> by vCPU Hotplug code of some architectures likes ARM, Loongson etc.
>  >> Update new/old APIs with trace events instead of DPRINTF. No
>  >> functional change is intended here.
>  >>
>  >> Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>  >> Reviewed-by: Gavin Shan <gshan@redhat.com>
>  >> Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
>  >> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>  >> Tested-by: Xianglai Li <lixianglai@loongson.cn>
>  >> Tested-by: Miguel Luis <miguel.luis@oracle.com>
>  >> Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
>  >> ---
>  >>   accel/kvm/kvm-all.c    | 64
>  >> ++++++++++++++++++++++++++++++++----------
>  >>   accel/kvm/trace-events |  5 +++-
>  >>   include/sysemu/kvm.h   | 16 +++++++++++
>  >>   3 files changed, 69 insertions(+), 16 deletions(-)
>  >>
>  >> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index
>  >> a8cecd040e..3bc3207bda 100644
>  >> --- a/accel/kvm/kvm-all.c
>  >> +++ b/accel/kvm/kvm-all.c
>  >> @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock;
>  >>   #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>  >>   static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
>  >> +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
>  >>   static inline void kvm_resample_fd_remove(int gsi)
>  >>   {
>  >> @@ -314,14 +315,53 @@ err:
>  >>       return ret;
>  >>   }
>  >> +void kvm_park_vcpu(CPUState *cpu)
>  >> +{
>  >> +    struct KVMParkedVcpu *vcpu;
>  >> +
>  >> +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  >> +
>  >> +    vcpu = g_malloc0(sizeof(*vcpu));
>  >> +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>  >> +    vcpu->kvm_fd = cpu->kvm_fd;
>  >> +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
>  }
>  >> +
>  >> +int kvm_create_vcpu(CPUState *cpu)
>  >> +{
>  >> +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
>  >> +    KVMState *s = kvm_state;
>  >> +    int kvm_fd;
>  >> +
>  >> +    trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  >> +
>  >> +    /* check if the KVM vCPU already exist but is parked */
>  >> +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
>  >> +    if (kvm_fd < 0) {
>  >> +        /* vCPU not parked: create a new KVM vCPU */
>  >> +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
>  >> +        if (kvm_fd < 0) {
>  >> +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU
>  >> +%lu",
>  >> vcpu_id);
>  >> +            return kvm_fd;
>  >> +        }
>  >> +    }
>  >> +
>  >> +    cpu->kvm_fd = kvm_fd;
>  >> +    cpu->kvm_state = s;
>  >> +    cpu->vcpu_dirty = true;
>  >> +    cpu->dirty_pages = 0;
>  >> +    cpu->throttle_us_per_full = 0;
>  >> +
>  >> +    return 0;
>  >> +}
>  >> +
>  >>   static int do_kvm_destroy_vcpu(CPUState *cpu)
>  >>   {
>  >>       KVMState *s = kvm_state;
>  >>       long mmap_size;
>  >> -    struct KVMParkedVcpu *vcpu = NULL;
>  >>       int ret = 0;
>  >> -    trace_kvm_destroy_vcpu();
>  >> +    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  >>       ret = kvm_arch_destroy_vcpu(cpu);
>  >>       if (ret < 0) {
>  >> @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
>  >>           }
>  >>       }
>  >> -    vcpu = g_malloc0(sizeof(*vcpu));
>  >> -    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>  >> -    vcpu->kvm_fd = cpu->kvm_fd;
>  >> -    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
>  >> +    kvm_park_vcpu(cpu);
>  >>   err:
>  >>       return ret;
>  >>   }
>  >> @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned
>  >> long
>  >> vcpu_id)
>  >>           if (cpu->vcpu_id == vcpu_id) {
>  >>               int kvm_fd;
>  >> +            trace_kvm_get_vcpu(vcpu_id);
>  >> +
>  >>               QLIST_REMOVE(cpu, node);
>  >>               kvm_fd = cpu->kvm_fd;
>  >>               g_free(cpu);
>  >> @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned
>  >> long
>  >> vcpu_id)
>  >>           }
>  >>       }
>  >> -    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
>  >> +    return -ENOENT;
>  >>   }
>  >>   int kvm_init_vcpu(CPUState *cpu, Error **errp) @@ -389,19 +428,14
>  >> @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
>  >>       trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  >> -    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
>  >> +    ret = kvm_create_vcpu(cpu);
>  >>       if (ret < 0) {
>  >> -        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu
>  >> failed (%lu)",
>  >> +        error_setg_errno(errp, -ret,
>  >> +                         "kvm_init_vcpu: kvm_create_vcpu failed
>  >> +(%lu)",
>  >>                            kvm_arch_vcpu_id(cpu));
>  >
>  > If a vcpu hotplug fails due to failure with kvm_create_vcpu ioctl,
>  > current behaviour would be to bring down the guest as errp is
>  > &error_fatal. Any thoughts on how do we ensure that a failure with
>  > kvm_create_vcpu ioctl for hotplugged cpus (only) doesnt bring down the
>  > guest and fail gracefully (by reporting error to user on monitor?)?
>  >
>  > regards,
>  > Harsh
>  >>           goto err;
>  >>       }
>  >> -    cpu->kvm_fd = ret;
>  >> -    cpu->kvm_state = s;
>  >> -    cpu->vcpu_dirty = true;
>  >> -    cpu->dirty_pages = 0;
>  >> -    cpu->throttle_us_per_full = 0;
>  >> -
>  >>       mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
>  >>       if (mmap_size < 0) {
>  >>           ret = mmap_size;
>  >> diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index
>  >> a25902597b..5558cff0dc 100644
>  >> --- a/accel/kvm/trace-events
>  >> +++ b/accel/kvm/trace-events
>  >> @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev
>  >> fd %d, type 0x%x, arg %p"
>  >>   kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable
>  >> to retrieve ONEREG %" PRIu64 " from KVM: %s"
>  >>   kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable
>  >> to set ONEREG %" PRIu64 " to KVM: %s"
>  >>   kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d
>  >> id: %lu"
>  >> +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index:
>  %d
>  >> id: %lu"
>  >> +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu"
>  >> +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index:
>  >> +%d
>  >> id: %lu"
>  >> +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d
>  >> id: %lu"
>  >>   kvm_irqchip_commit_routes(void) ""
>  >>   kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s
>  >> vector %d virq %d"
>  >>   kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
>  >> @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s"
>  >>   kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64"
>  >> pages (took %"PRIi64" us)"
>  >>   kvm_dirty_ring_reaper_kick(const char *reason) "%s"
>  >>   kvm_dirty_ring_flush(int finished) "%d"
>  >> -kvm_destroy_vcpu(void) ""
>  >>   kvm_failed_get_vcpu_mmap_size(void) ""
>  >>   kvm_cpu_exec(void) ""
>  >>   kvm_interrupt_exit_request(void) ""
>  >> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index
>  >> fad9a7e8ff..2ed928aa71 100644
>  >> --- a/include/sysemu/kvm.h
>  >> +++ b/include/sysemu/kvm.h
>  >> @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s,
>  unsigned
>  >> int sigmask_len);
>  >>   int kvm_physical_memory_addr_from_host(KVMState *s, void
>  *ram_addr,
>  >>                                          hwaddr *phys_addr);
>  >> +/**
>  >> + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
>  >> + * @cpu: QOM CPUState object for which KVM vCPU has to be
>  >> fetched/created.
>  >> + *
>  >> + * @returns: 0 when success, errno (<0) when failed.
>  >> + */
>  >> +int kvm_create_vcpu(CPUState *cpu);
>  >> +
>  >> +/**
>  >> + * kvm_park_vcpu - Park QEMU KVM vCPU context
>  >> + * @cpu: QOM CPUState object for which QEMU KVM vCPU context
>  has to
>  >> be parked.
>  >> + *
>  >> + * @returns: none
>  >> + */
>  >> +void kvm_park_vcpu(CPUState *cpu);
>  >> +
>  >>   #endif /* NEED_CPU_H */
>  >>   void kvm_cpu_synchronize_state(CPUState *cpu);
Vishnu Pajjuri May 7, 2024, 12:39 p.m. UTC | #10
Hi Salil,

On 03-05-2024 21:53, Salil Mehta wrote:
> [EXTERNAL EMAIL NOTICE: This email originated from an external sender. Please be mindful of safe email handling and proprietary information protection practices.]
>
>
> Hi Vishnu,
>
>>   From: Vishnu Pajjuri<vishnu@amperemail.onmicrosoft.com>
>>   Sent: Thursday, April 4, 2024 3:00 PM
>>   Subject: Re: [PATCH V8 1/8] accel/kvm: Extract common KVM vCPU {creation,parking} code
>>
>>   Hi Salil,
>>>   On 12-03-2024 07:29, Salil Mehta wrote:
>>>   KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread
>>>   is spawned. This is common to all the architectures as of now.
>>>
>>>   Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the
>>>   corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't
>>>   support vCPU removal. Therefore, its representative KVM vCPU object/context in
>>>   Qemu is parked.
>>>
>>>   Refactor architecture common logic so that some APIs could be reused by vCPU
>>>   Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs
>>>   with trace events instead of DPRINTF. No functional change is intended here.
>>>
>>>   Signed-off-by: Salil Mehtamailto:salil.mehta@huawei.com
>>>   Reviewed-by: Gavin Shanmailto:gshan@redhat.com
>>>   Tested-by: Vishnu Pajjurimailto:vishnu@os.amperecomputing.com
>>>   Reviewed-by: Jonathan Cameronmailto:Jonathan.Cameron@huawei.com
>>>   Tested-by: Xianglai Limailto:lixianglai@loongson.cn
>>>   Tested-by: Miguel Luismailto:miguel.luis@oracle.com
>>>   Reviewed-by: Shaoqin Huangmailto:shahuang@redhat.com
>>>   ---
>>>    accel/kvm/kvm-all.c    | 64 ++++++++++++++++++++++++++++++++----------
>>>    accel/kvm/trace-events |  5 +++-
>>>    include/sysemu/kvm.h   | 16 +++++++++++
>>>    3 files changed, 69 insertions(+), 16 deletions(-)
>>>
>>>   diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
>>>   index a8cecd040e..3bc3207bda 100644
>>>   --- a/accel/kvm/kvm-all.c
>>>   +++ b/accel/kvm/kvm-all.c
>>>   @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock;
>>>    #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>>>
>>>    static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
>>>   +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
>>>
>>>    static inline void kvm_resample_fd_remove(int gsi)
>>>    {
>>>   @@ -314,14 +315,53 @@ err:
>>>   return ret;
>>>    }
>>>
>>>   +void kvm_park_vcpu(CPUState *cpu)
>>>   +{
>>>   +    struct KVMParkedVcpu *vcpu;
>>>   +
>>>   +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>>   It's good if we add kvm_fd to trace.
>>   It will be useful to cross verify kvm_get_vcpu()'s kvm_fd with parked vcpu.
>
> Agreed. But this is currently called in context to create and destroy vCPU
> where the trace already exists with the info you are seeking. Having
> trace here might duplicate the info and end up increasing the noise.
>
> Let me know if you think otherwise or have something else to add.

This is to provide additional information to the racing only.

The intention here is to trace mapping of vcpu_id<-->kvm_fd while parking

and fetching vcpu. This way we can easily trace what is parked 
(kvm_park_vcpu()) vs fetched (kvm_get_vcpu())

using pair of information.

>
> Thanks
>
>
>>>   +
>>>   +    vcpu = g_malloc0(sizeof(*vcpu));
>>>   +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>>>   +    vcpu->kvm_fd = cpu->kvm_fd;
>>>   +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
>>>   +}
>>>   +
>>>   +int kvm_create_vcpu(CPUState *cpu)
>>>   +{
>>>   +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
>>>   +    KVMState *s = kvm_state;
>>>   +    int kvm_fd;
>>>   +
>>>   +    trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>>   vcpu_id can be used instead of kvm_arch_vcpu_id(cpu).
>
> KVM arch VCPU Id ensures that ID being traced is meaningful for that
> architecture. The way CPU ID gets calculated in on different architectures
> could be different. Hence, its value might be quite different.

vcpu_id is already being calculated just above trace call.

I don't think vcpu_id value gets differ by the time of tracing.

>
>>>   +
>>>   +    /* check if the KVM vCPU already exist but is parked */
>>>   +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
>>>   +    if (kvm_fd < 0) {
>>>   +>   /* vCPU not parked: create a new KVM vCPU */
>>>   +>   kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
>>>   +>   if (kvm_fd < 0) {
>>>   +>       error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id);
>>>   +>       return kvm_fd;
>>>   +>   }
>>>   +    }
>>>   +
>>>   +    cpu->kvm_fd = kvm_fd;
>>>   +    cpu->kvm_state = s;
>>>   +    cpu->vcpu_dirty = true;
>>>   +    cpu->dirty_pages = 0;
>>>   +    cpu->throttle_us_per_full = 0;
>>>   +
>>>   +    return 0;
>>>   +}
>>>   +
>>>    static int do_kvm_destroy_vcpu(CPUState *cpu)
>>>    {
>>>        KVMState *s = kvm_state;
>>>        long mmap_size;
>>>   -    struct KVMParkedVcpu *vcpu = NULL;
>>>        int ret = 0;
>>>
>>>   -    trace_kvm_destroy_vcpu();
>>>   +    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>>>
>>>        ret = kvm_arch_destroy_vcpu(cpu);
>>>        if (ret < 0) {
>>>   @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
>>>   >    }
>>>        }
>>>
>>>   -    vcpu = g_malloc0(sizeof(*vcpu));
>>>   -    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>>>   -    vcpu->kvm_fd = cpu->kvm_fd;
>>>   -    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
>>>   +    kvm_park_vcpu(cpu);
>>>    err:
>>>        return ret;
>>>    }
>>>   @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
>>>   >    if (cpu->vcpu_id == vcpu_id) {
>>>   >        int kvm_fd;
>>>
>>>   +>       trace_kvm_get_vcpu(vcpu_id);
>>   It's good if we add kvm_fd to trace.
>>   It will be useful to cross verify kvm_get_vcpu's kvm_fd with parked vcpu.
>
> I can but I'm wondering why you've raised this? Perhaps, I'm not aware of the
> interface you are using to configure the VMs and how traces across diferent
> VMs get reflected. Please help in my understanding.

This is to provide additional information only not specific to any 
interface to configure VMs.

_Regards_,

-Vishnu

>
>>>   +
>>>   >        QLIST_REMOVE(cpu, node);
>>>   >        kvm_fd = cpu->kvm_fd;
>>>   >        g_free(cpu);
>>>   @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
>>>   >    }
>>>        }
>>>
>>>   -    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
>>>   +    return -ENOENT;
>>>    }
>>>
>>>    int kvm_init_vcpu(CPUState *cpu, Error **errp)
>>>   @@ -389,19 +428,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
>>>
>>>        trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>>>
>>>   -    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
>>>   +    ret = kvm_create_vcpu(cpu);
>>>        if (ret < 0) {
>>>   -   error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
>>>   +   error_setg_errno(errp, -ret,
>>>   +          "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
>>              kvm_arch_vcpu_id(cpu));
>>>       goto err;
>>>        }
>>>
>>>   -    cpu->kvm_fd = ret;
>>>   -    cpu->kvm_state = s;
>>>   -    cpu->vcpu_dirty = true;
>>>   -    cpu->dirty_pages = 0;
>>>   -    cpu->throttle_us_per_full = 0;
>>>   -
>>>        mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
>>>        if (mmap_size < 0) {
>>>       ret = mmap_size;
>>>   diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
>>>   index a25902597b..5558cff0dc 100644
>>>   --- a/accel/kvm/trace-events
>>>   +++ b/accel/kvm/trace-events
>>>   @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
>>>    kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
>>>    kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
>>>    kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
>>>   +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
>>>   +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu"
>>>   +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
>>>   +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
>>>    kvm_irqchip_commit_routes(void) ""
>>>    kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
>>>    kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
>>>   @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s"
>>>    kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
>>>    kvm_dirty_ring_reaper_kick(const char *reason) "%s"
>>>    kvm_dirty_ring_flush(int finished) "%d"
>>>   -kvm_destroy_vcpu(void) ""
>>>    kvm_failed_get_vcpu_mmap_size(void) ""
>>>    kvm_cpu_exec(void) ""
>>>    kvm_interrupt_exit_request(void) ""
>>>   diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
>>>   index fad9a7e8ff..2ed928aa71 100644
>>>   --- a/include/sysemu/kvm.h
>>>   +++ b/include/sysemu/kvm.h
>>>   @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
>>>    int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
>>>   >  >  >  >  >       hwaddr *phys_addr);
>>>
>>>   +/**
>>>   + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
>>>   + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
>>>   + *
>>>   + * @returns: 0 when success, errno (<0) when failed.
>>>   + */
>>>   +int kvm_create_vcpu(CPUState *cpu);
>>>   +
>>>   +/**
>>>   + * kvm_park_vcpu - Park QEMU KVM vCPU context
>>>   + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked.
>>>   + *
>>>   + * @returns: none
>>>   + */
>>>   +void kvm_park_vcpu(CPUState *cpu);
>>>   +
>>>    #endif /* NEED_CPU_H */
>>>
>>>    void kvm_cpu_synchronize_state(CPUState *cpu);
>>   Otherwise, Looks good to me.  Feel free to add
>>   Reviewed-by: "Vishnu Pajjuri"mailto:vishnu@os.amperecomputing.com
>>   Thanks,
> Thanks.
> Salil
>
>
>
>>   -Vishnu
Salil Mehta May 7, 2024, 12:51 p.m. UTC | #11
HI Vishnu,

On Tue, May 7, 2024 at 12:39 PM Vishnu Pajjuri <
vishnu@amperemail.onmicrosoft.com> wrote:

> Hi Salil,
> On 03-05-2024 21:53, Salil Mehta wrote:
>
> [EXTERNAL EMAIL NOTICE: This email originated from an external sender. Please be mindful of safe email handling and proprietary information protection practices.]
>
>
> Hi Vishnu,
>
>
>  From: Vishnu Pajjuri <vishnu@amperemail.onmicrosoft.com> <vishnu@amperemail.onmicrosoft.com>
>  Sent: Thursday, April 4, 2024 3:00 PM
>  Subject: Re: [PATCH V8 1/8] accel/kvm: Extract common KVM vCPU {creation,parking} code
>
>  Hi Salil,
>
>  On 12-03-2024 07:29, Salil Mehta wrote:
>  KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread
>  is spawned. This is common to all the architectures as of now.
>
>  Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the
>  corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't
>  support vCPU removal. Therefore, its representative KVM vCPU object/context in
>  Qemu is parked.
>
>  Refactor architecture common logic so that some APIs could be reused by vCPU
>  Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs
>  with trace events instead of DPRINTF. No functional change is intended here.
>
>  Signed-off-by: Salil Mehta mailto:salil.mehta@huawei.com <salil.mehta@huawei.com>
>  Reviewed-by: Gavin Shan mailto:gshan@redhat.com <gshan@redhat.com>
>  Tested-by: Vishnu Pajjuri mailto:vishnu@os.amperecomputing.com <vishnu@os.amperecomputing.com>
>  Reviewed-by: Jonathan Cameron mailto:Jonathan.Cameron@huawei.com <Jonathan.Cameron@huawei.com>
>  Tested-by: Xianglai Li mailto:lixianglai@loongson.cn <lixianglai@loongson.cn>
>  Tested-by: Miguel Luis mailto:miguel.luis@oracle.com <miguel.luis@oracle.com>
>  Reviewed-by: Shaoqin Huang mailto:shahuang@redhat.com <shahuang@redhat.com>
>  ---
>   accel/kvm/kvm-all.c    | 64 ++++++++++++++++++++++++++++++++----------
>   accel/kvm/trace-events |  5 +++-
>   include/sysemu/kvm.h   | 16 +++++++++++
>   3 files changed, 69 insertions(+), 16 deletions(-)
>
>  diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
>  index a8cecd040e..3bc3207bda 100644
>  --- a/accel/kvm/kvm-all.c
>  +++ b/accel/kvm/kvm-all.c
>  @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock;
>   #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>
>   static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
>  +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
>
>   static inline void kvm_resample_fd_remove(int gsi)
>   {
>  @@ -314,14 +315,53 @@ err:
>  return ret;
>   }
>
>  +void kvm_park_vcpu(CPUState *cpu)
>  +{
>  +    struct KVMParkedVcpu *vcpu;
>  +
>  +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>
>  It's good if we add kvm_fd to trace.
>  It will be useful to cross verify kvm_get_vcpu()'s kvm_fd with parked vcpu.
>
> Agreed. But this is currently called in context to create and destroy vCPU
> where the trace already exists with the info you are seeking. Having
> trace here might duplicate the info and end up increasing the noise.
>
> Let me know if you think otherwise or have something else to add.
>
> This is to provide additional information to the racing only.
>
> The intention here is to trace mapping of vcpu_id<-->kvm_fd while parking
>
> and fetching vcpu. This way we can easily trace what is parked
> (kvm_park_vcpu()) vs fetched (kvm_get_vcpu())
>
> using pair of information.
>

Ok, No problem. I will.


> Thanks
>
>
>
>  +
>  +    vcpu = g_malloc0(sizeof(*vcpu));
>  +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>  +    vcpu->kvm_fd = cpu->kvm_fd;
>  +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
>  +}
>  +
>  +int kvm_create_vcpu(CPUState *cpu)
>  +{
>  +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
>  +    KVMState *s = kvm_state;
>  +    int kvm_fd;
>  +
>  +    trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>
>  vcpu_id can be used instead of kvm_arch_vcpu_id(cpu).
>
> KVM arch VCPU Id ensures that ID being traced is meaningful for that
> architecture. The way CPU ID gets calculated in on different architectures
> could be different. Hence, its value might be quite different.
>
> vcpu_id is already being calculated just above trace call.
>
> I don't think vcpu_id value gets differ by the time of tracing.
>

sure.


>  +
>  +    /* check if the KVM vCPU already exist but is parked */
>  +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
>  +    if (kvm_fd < 0) {
>  +>   /* vCPU not parked: create a new KVM vCPU */
>  +>   kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
>  +>   if (kvm_fd < 0) {
>  +>       error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id);
>  +>       return kvm_fd;
>  +>   }
>  +    }
>  +
>  +    cpu->kvm_fd = kvm_fd;
>  +    cpu->kvm_state = s;
>  +    cpu->vcpu_dirty = true;
>  +    cpu->dirty_pages = 0;
>  +    cpu->throttle_us_per_full = 0;
>  +
>  +    return 0;
>  +}
>  +
>   static int do_kvm_destroy_vcpu(CPUState *cpu)
>   {
>       KVMState *s = kvm_state;
>       long mmap_size;
>  -    struct KVMParkedVcpu *vcpu = NULL;
>       int ret = 0;
>
>  -    trace_kvm_destroy_vcpu();
>  +    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>
>       ret = kvm_arch_destroy_vcpu(cpu);
>       if (ret < 0) {
>  @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
>  >    }
>       }
>
>  -    vcpu = g_malloc0(sizeof(*vcpu));
>  -    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>  -    vcpu->kvm_fd = cpu->kvm_fd;
>  -    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
>  +    kvm_park_vcpu(cpu);
>   err:
>       return ret;
>   }
>  @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
>  >    if (cpu->vcpu_id == vcpu_id) {
>  >        int kvm_fd;
>
>  +>       trace_kvm_get_vcpu(vcpu_id);
>
>  It's good if we add kvm_fd to trace.
>  It will be useful to cross verify kvm_get_vcpu's kvm_fd with parked vcpu.
>
> I can but I'm wondering why you've raised this? Perhaps, I'm not aware of the
> interface you are using to configure the VMs and how traces across diferent
> VMs get reflected. Please help in my understanding.
>
> This is to provide additional information only not specific to any
> interface to configure VMs.
>


Ok. sure.


Thanks
Salil.



> *Regards*,
>
> -Vishnu
>
>  +
>  >        QLIST_REMOVE(cpu, node);
>  >        kvm_fd = cpu->kvm_fd;
>  >        g_free(cpu);
>  @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
>  >    }
>       }
>
>  -    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
>  +    return -ENOENT;
>   }
>
>   int kvm_init_vcpu(CPUState *cpu, Error **errp)
>  @@ -389,19 +428,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
>
>       trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>
>  -    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
>  +    ret = kvm_create_vcpu(cpu);
>       if (ret < 0) {
>  -   error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
>  +   error_setg_errno(errp, -ret,
>  +          "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
>
>             kvm_arch_vcpu_id(cpu));
>
>      goto err;
>       }
>
>  -    cpu->kvm_fd = ret;
>  -    cpu->kvm_state = s;
>  -    cpu->vcpu_dirty = true;
>  -    cpu->dirty_pages = 0;
>  -    cpu->throttle_us_per_full = 0;
>  -
>       mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
>       if (mmap_size < 0) {
>      ret = mmap_size;
>  diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
>  index a25902597b..5558cff0dc 100644
>  --- a/accel/kvm/trace-events
>  +++ b/accel/kvm/trace-events
>  @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
>   kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
>   kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
>   kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
>  +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
>  +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu"
>  +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
>  +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
>   kvm_irqchip_commit_routes(void) ""
>   kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
>   kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
>  @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s"
>   kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
>   kvm_dirty_ring_reaper_kick(const char *reason) "%s"
>   kvm_dirty_ring_flush(int finished) "%d"
>  -kvm_destroy_vcpu(void) ""
>   kvm_failed_get_vcpu_mmap_size(void) ""
>   kvm_cpu_exec(void) ""
>   kvm_interrupt_exit_request(void) ""
>  diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
>  index fad9a7e8ff..2ed928aa71 100644
>  --- a/include/sysemu/kvm.h
>  +++ b/include/sysemu/kvm.h
>  @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
>   int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
>  >  >  >  >  >       hwaddr *phys_addr);
>
>  +/**
>  + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
>  + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
>  + *
>  + * @returns: 0 when success, errno (<0) when failed.
>  + */
>  +int kvm_create_vcpu(CPUState *cpu);
>  +
>  +/**
>  + * kvm_park_vcpu - Park QEMU KVM vCPU context
>  + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked.
>  + *
>  + * @returns: none
>  + */
>  +void kvm_park_vcpu(CPUState *cpu);
>  +
>   #endif /* NEED_CPU_H */
>
>   void kvm_cpu_synchronize_state(CPUState *cpu);
>
>  Otherwise, Looks good to me.  Feel free to add
>  Reviewed-by: "Vishnu Pajjuri" mailto:vishnu@os.amperecomputing.com <vishnu@os.amperecomputing.com>
>  Thanks,
>
> Thanks.
> Salil
>
>
>
>
>  -Vishnu
>
>
Salil Mehta May 8, 2024, 10:46 a.m. UTC | #12
Hi Phillipe,

Sorry, I missed this mail earlier.

>  From: Philippe Mathieu-Daudé <philmd@linaro.org>
>  Sent: Friday, May 3, 2024 7:23 PM
>  To: Salil Mehta <salil.mehta@huawei.com>; qemu-devel@nongnu.org;
>  qemu-arm@nongnu.org
>  
>  On 3/5/24 17:57, Salil Mehta wrote:
>  > Hi Philippe,
>  >
>  >>   From: Philippe Mathieu-Daudé <philmd@linaro.org>
>  >>   Sent: Friday, May 3, 2024 10:40 AM
>  >>   Subject: Re: [PATCH V8 1/8] accel/kvm: Extract common KVM vCPU
>  >>   {creation,parking} code
>  >>
>  >>   Hi Salil,
>  >>
>  >>   On 12/3/24 02:59, Salil Mehta wrote:
>  >>   > KVM vCPU creation is done once during the vCPU realization when Qemu
>  >>   > vCPU thread is spawned. This is common to all the architectures as of now.
>  >>   >
>  >>   > Hot-unplug of vCPU results in destruction of the vCPU object in QOM
>  >>   > but the corresponding KVM vCPU object in the Host KVM is not destroyed
>  >>   > as KVM doesn't support vCPU removal. Therefore, its representative KVM
>  >>   > vCPU object/context in Qemu is parked.
>  >>   >
>  >>   > Refactor architecture common logic so that some APIs could be reused
>  >>   > by vCPU Hotplug code of some architectures likes ARM, Loongson etc.
>  >>   > Update new/old APIs with trace events instead of DPRINTF. No functional
>  >>   change is intended here.
>  >>   >
>  >>   > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>  >>   > Reviewed-by: Gavin Shan <gshan@redhat.com>
>  >>   > Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
>  >>   > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>  >>   > Tested-by: Xianglai Li <lixianglai@loongson.cn>
>  >>   > Tested-by: Miguel Luis <miguel.luis@oracle.com>
>  >>   > Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
>  >>   > ---
>  >>   >   accel/kvm/kvm-all.c    | 64 ++++++++++++++++++++++++++++++++-
>  -----
>  >>   ----
>  >>   >   accel/kvm/trace-events |  5 +++-
>  >>   >   include/sysemu/kvm.h   | 16 +++++++++++
>  >>   >   3 files changed, 69 insertions(+), 16 deletions(-)
>  >>   >
>  >>   > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index
>  >>   > a8cecd040e..3bc3207bda 100644
>  >>   > --- a/accel/kvm/kvm-all.c
>  >>   > +++ b/accel/kvm/kvm-all.c
>  >>   > @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock;
>  >>   >   #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>  >>   >
>  >>   >   static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
>  >>   > +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
>  >>   >
>  >>   >   static inline void kvm_resample_fd_remove(int gsi)
>  >>   >   {
>  >>   > @@ -314,14 +315,53 @@ err:
>  >>   >       return ret;
>  >>   >   }
>  >>   >
>  >>   > +void kvm_park_vcpu(CPUState *cpu)
>  >>   > +{
>  >>   > +    struct KVMParkedVcpu *vcpu;
>  >>   > +
>  >>   > +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  >>   > +
>  >>   > +    vcpu = g_malloc0(sizeof(*vcpu));
>  >>   > +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>  >>   > +    vcpu->kvm_fd = cpu->kvm_fd;
>  >>   > +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); }
>  >>   > +
>  >>   > +int kvm_create_vcpu(CPUState *cpu)
>  >>   > +{
>  >>   > +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
>  >>   > +    KVMState *s = kvm_state;
>  >>   > +    int kvm_fd;
>  >>   > +
>  >>   > +    trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>  >>   > +
>  >>   > +    /* check if the KVM vCPU already exist but is parked */
>  >>   > +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
>  >>   > +    if (kvm_fd < 0) {
>  >>   > +        /* vCPU not parked: create a new KVM vCPU */
>  >>   > +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
>  >>   > +        if (kvm_fd < 0) {
>  >>   > +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id);
>  >>   > +            return kvm_fd;
>  >>   > +        }
>  >>   > +    }
>  >>   > +
>  >>   > +    cpu->kvm_fd = kvm_fd;
>  >>   > +    cpu->kvm_state = s;
>  >>   > +    cpu->vcpu_dirty = true;
>  >>   > +    cpu->dirty_pages = 0;
>  >>   > +    cpu->throttle_us_per_full = 0;
>  >>   > +
>  >>   > +    return 0;
>  >>   > +}
>  >>
>  >>   This seems generic enough to be implemented for all accelerators.
>  >>
>  >>   See AccelOpsClass in include/sysemu/accel-ops.h.
>  >>
>  >>   That said, can be done later on top.
>  >
>  > Let me understand correctly. Are you suggesting to implement above
>  > even for HVF, TCG, QTEST etc?
>  
>  Not for you to implement the other non-KVM accelerators, but since you
>  are introducing this, now is a good time to think about a generic interface.
>  
>  So far AccelOpsClass::[un]park_vcpu() handlers make sense to me.

Sure, but what is the advantage of defining these 'supporting' functions
as part of the AccelOpsClass? Each of these functions in any case will need
to be defined individually for different Accelerators or unless we are
planning to extract some common accelerator functions in a separate file
and use them across all the accelerators?

I'm surely missing some key point here.

Thanks
Salil.
Philippe Mathieu-Daudé May 10, 2024, 2:43 p.m. UTC | #13
On 8/5/24 12:46, Salil Mehta wrote:
> Hi Phillipe,
> 
> Sorry, I missed this mail earlier.
> 
>>   From: Philippe Mathieu-Daudé <philmd@linaro.org>
>>   Sent: Friday, May 3, 2024 7:23 PM
>>   To: Salil Mehta <salil.mehta@huawei.com>; qemu-devel@nongnu.org;
>>   qemu-arm@nongnu.org
>>   
>>   On 3/5/24 17:57, Salil Mehta wrote:
>>   > Hi Philippe,
>>   >
>>   >>   From: Philippe Mathieu-Daudé <philmd@linaro.org>
>>   >>   Sent: Friday, May 3, 2024 10:40 AM
>>   >>   Subject: Re: [PATCH V8 1/8] accel/kvm: Extract common KVM vCPU
>>   >>   {creation,parking} code
>>   >>
>>   >>   Hi Salil,
>>   >>
>>   >>   On 12/3/24 02:59, Salil Mehta wrote:
>>   >>   > KVM vCPU creation is done once during the vCPU realization when Qemu
>>   >>   > vCPU thread is spawned. This is common to all the architectures as of now.
>>   >>   >
>>   >>   > Hot-unplug of vCPU results in destruction of the vCPU object in QOM
>>   >>   > but the corresponding KVM vCPU object in the Host KVM is not destroyed
>>   >>   > as KVM doesn't support vCPU removal. Therefore, its representative KVM
>>   >>   > vCPU object/context in Qemu is parked.
>>   >>   >
>>   >>   > Refactor architecture common logic so that some APIs could be reused
>>   >>   > by vCPU Hotplug code of some architectures likes ARM, Loongson etc.
>>   >>   > Update new/old APIs with trace events instead of DPRINTF. No functional
>>   >>   change is intended here.
>>   >>   >
>>   >>   > Signed-off-by: Salil Mehta <salil.mehta@huawei.com>
>>   >>   > Reviewed-by: Gavin Shan <gshan@redhat.com>
>>   >>   > Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com>
>>   >>   > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>>   >>   > Tested-by: Xianglai Li <lixianglai@loongson.cn>
>>   >>   > Tested-by: Miguel Luis <miguel.luis@oracle.com>
>>   >>   > Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
>>   >>   > ---
>>   >>   >   accel/kvm/kvm-all.c    | 64 ++++++++++++++++++++++++++++++++-
>>   -----
>>   >>   ----
>>   >>   >   accel/kvm/trace-events |  5 +++-
>>   >>   >   include/sysemu/kvm.h   | 16 +++++++++++
>>   >>   >   3 files changed, 69 insertions(+), 16 deletions(-)
>>   >>   >
>>   >>   > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index
>>   >>   > a8cecd040e..3bc3207bda 100644
>>   >>   > --- a/accel/kvm/kvm-all.c
>>   >>   > +++ b/accel/kvm/kvm-all.c
>>   >>   > @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock;
>>   >>   >   #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
>>   >>   >
>>   >>   >   static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
>>   >>   > +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
>>   >>   >
>>   >>   >   static inline void kvm_resample_fd_remove(int gsi)
>>   >>   >   {
>>   >>   > @@ -314,14 +315,53 @@ err:
>>   >>   >       return ret;
>>   >>   >   }
>>   >>   >
>>   >>   > +void kvm_park_vcpu(CPUState *cpu)
>>   >>   > +{
>>   >>   > +    struct KVMParkedVcpu *vcpu;
>>   >>   > +
>>   >>   > +    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>>   >>   > +
>>   >>   > +    vcpu = g_malloc0(sizeof(*vcpu));
>>   >>   > +    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
>>   >>   > +    vcpu->kvm_fd = cpu->kvm_fd;
>>   >>   > +    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); }
>>   >>   > +
>>   >>   > +int kvm_create_vcpu(CPUState *cpu)
>>   >>   > +{
>>   >>   > +    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
>>   >>   > +    KVMState *s = kvm_state;
>>   >>   > +    int kvm_fd;
>>   >>   > +
>>   >>   > +    trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>>   >>   > +
>>   >>   > +    /* check if the KVM vCPU already exist but is parked */
>>   >>   > +    kvm_fd = kvm_get_vcpu(s, vcpu_id);
>>   >>   > +    if (kvm_fd < 0) {
>>   >>   > +        /* vCPU not parked: create a new KVM vCPU */
>>   >>   > +        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
>>   >>   > +        if (kvm_fd < 0) {
>>   >>   > +            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id);
>>   >>   > +            return kvm_fd;
>>   >>   > +        }
>>   >>   > +    }
>>   >>   > +
>>   >>   > +    cpu->kvm_fd = kvm_fd;
>>   >>   > +    cpu->kvm_state = s;
>>   >>   > +    cpu->vcpu_dirty = true;
>>   >>   > +    cpu->dirty_pages = 0;
>>   >>   > +    cpu->throttle_us_per_full = 0;
>>   >>   > +
>>   >>   > +    return 0;
>>   >>   > +}
>>   >>
>>   >>   This seems generic enough to be implemented for all accelerators.
>>   >>
>>   >>   See AccelOpsClass in include/sysemu/accel-ops.h.
>>   >>
>>   >>   That said, can be done later on top.
>>   >
>>   > Let me understand correctly. Are you suggesting to implement above
>>   > even for HVF, TCG, QTEST etc?
>>   
>>   Not for you to implement the other non-KVM accelerators, but since you
>>   are introducing this, now is a good time to think about a generic interface.
>>   
>>   So far AccelOpsClass::[un]park_vcpu() handlers make sense to me.
> 
> Sure, but what is the advantage of defining these 'supporting' functions
> as part of the AccelOpsClass? Each of these functions in any case will need
> to be defined individually for different Accelerators or unless we are
> planning to extract some common accelerator functions in a separate file
> and use them across all the accelerators?

kvm_arm_create_host_vcpu() [*] seems generic. Maybe we could do the
same with HVF at least.

[*] 
https://lore.kernel.org/qemu-devel/20230926100436.28284-7-salil.mehta@huawei.com/
> 
> I'm surely missing some key point here.

I started https://etherpad.opendev.org/p/QEMU_vCPU_life to
document the vCPU spagetti code. In that big picture the "park"
method makes sense to me, but we can discuss that later. Again,
certainly not a block for your work, I'm just trying to see
the whole view.

Regards,

Phil.
diff mbox series

Patch

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index a8cecd040e..3bc3207bda 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -126,6 +126,7 @@  static QemuMutex kml_slots_lock;
 #define kvm_slots_unlock()  qemu_mutex_unlock(&kml_slots_lock)
 
 static void kvm_slot_init_dirty_bitmap(KVMSlot *mem);
+static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id);
 
 static inline void kvm_resample_fd_remove(int gsi)
 {
@@ -314,14 +315,53 @@  err:
     return ret;
 }
 
+void kvm_park_vcpu(CPUState *cpu)
+{
+    struct KVMParkedVcpu *vcpu;
+
+    trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+
+    vcpu = g_malloc0(sizeof(*vcpu));
+    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
+    vcpu->kvm_fd = cpu->kvm_fd;
+    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
+}
+
+int kvm_create_vcpu(CPUState *cpu)
+{
+    unsigned long vcpu_id = kvm_arch_vcpu_id(cpu);
+    KVMState *s = kvm_state;
+    int kvm_fd;
+
+    trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+
+    /* check if the KVM vCPU already exist but is parked */
+    kvm_fd = kvm_get_vcpu(s, vcpu_id);
+    if (kvm_fd < 0) {
+        /* vCPU not parked: create a new KVM vCPU */
+        kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
+        if (kvm_fd < 0) {
+            error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id);
+            return kvm_fd;
+        }
+    }
+
+    cpu->kvm_fd = kvm_fd;
+    cpu->kvm_state = s;
+    cpu->vcpu_dirty = true;
+    cpu->dirty_pages = 0;
+    cpu->throttle_us_per_full = 0;
+
+    return 0;
+}
+
 static int do_kvm_destroy_vcpu(CPUState *cpu)
 {
     KVMState *s = kvm_state;
     long mmap_size;
-    struct KVMParkedVcpu *vcpu = NULL;
     int ret = 0;
 
-    trace_kvm_destroy_vcpu();
+    trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
 
     ret = kvm_arch_destroy_vcpu(cpu);
     if (ret < 0) {
@@ -347,10 +387,7 @@  static int do_kvm_destroy_vcpu(CPUState *cpu)
         }
     }
 
-    vcpu = g_malloc0(sizeof(*vcpu));
-    vcpu->vcpu_id = kvm_arch_vcpu_id(cpu);
-    vcpu->kvm_fd = cpu->kvm_fd;
-    QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node);
+    kvm_park_vcpu(cpu);
 err:
     return ret;
 }
@@ -371,6 +408,8 @@  static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
         if (cpu->vcpu_id == vcpu_id) {
             int kvm_fd;
 
+            trace_kvm_get_vcpu(vcpu_id);
+
             QLIST_REMOVE(cpu, node);
             kvm_fd = cpu->kvm_fd;
             g_free(cpu);
@@ -378,7 +417,7 @@  static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id)
         }
     }
 
-    return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id);
+    return -ENOENT;
 }
 
 int kvm_init_vcpu(CPUState *cpu, Error **errp)
@@ -389,19 +428,14 @@  int kvm_init_vcpu(CPUState *cpu, Error **errp)
 
     trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
 
-    ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu));
+    ret = kvm_create_vcpu(cpu);
     if (ret < 0) {
-        error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)",
+        error_setg_errno(errp, -ret,
+                         "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
                          kvm_arch_vcpu_id(cpu));
         goto err;
     }
 
-    cpu->kvm_fd = ret;
-    cpu->kvm_state = s;
-    cpu->vcpu_dirty = true;
-    cpu->dirty_pages = 0;
-    cpu->throttle_us_per_full = 0;
-
     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
     if (mmap_size < 0) {
         ret = mmap_size;
diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
index a25902597b..5558cff0dc 100644
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -9,6 +9,10 @@  kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
 kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
 kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
 kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
+kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
+kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu"
+kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
+kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
 kvm_irqchip_commit_routes(void) ""
 kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
 kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
@@ -25,7 +29,6 @@  kvm_dirty_ring_reaper(const char *s) "%s"
 kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
 kvm_dirty_ring_reaper_kick(const char *reason) "%s"
 kvm_dirty_ring_flush(int finished) "%d"
-kvm_destroy_vcpu(void) ""
 kvm_failed_get_vcpu_mmap_size(void) ""
 kvm_cpu_exec(void) ""
 kvm_interrupt_exit_request(void) ""
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index fad9a7e8ff..2ed928aa71 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -435,6 +435,22 @@  void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len);
 int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
                                        hwaddr *phys_addr);
 
+/**
+ * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU
+ * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created.
+ *
+ * @returns: 0 when success, errno (<0) when failed.
+ */
+int kvm_create_vcpu(CPUState *cpu);
+
+/**
+ * kvm_park_vcpu - Park QEMU KVM vCPU context
+ * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked.
+ *
+ * @returns: none
+ */
+void kvm_park_vcpu(CPUState *cpu);
+
 #endif /* NEED_CPU_H */
 
 void kvm_cpu_synchronize_state(CPUState *cpu);