Message ID | 20240312020000.12992-2-salil.mehta@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add architecture agnostic code to support vCPU Hotplug | expand |
+ Vaibhav, Shiva Hi Salil, I came across your patch while trying to solve a related problem on spapr. One query below .. On 3/12/24 07:29, Salil Mehta via wrote: > KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread > is spawned. This is common to all the architectures as of now. > > Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the > corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't > support vCPU removal. Therefore, its representative KVM vCPU object/context in > Qemu is parked. > > Refactor architecture common logic so that some APIs could be reused by vCPU > Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs > with trace events instead of DPRINTF. No functional change is intended here. > > Signed-off-by: Salil Mehta <salil.mehta@huawei.com> > Reviewed-by: Gavin Shan <gshan@redhat.com> > Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > Tested-by: Xianglai Li <lixianglai@loongson.cn> > Tested-by: Miguel Luis <miguel.luis@oracle.com> > Reviewed-by: Shaoqin Huang <shahuang@redhat.com> > --- > accel/kvm/kvm-all.c | 64 ++++++++++++++++++++++++++++++++---------- > accel/kvm/trace-events | 5 +++- > include/sysemu/kvm.h | 16 +++++++++++ > 3 files changed, 69 insertions(+), 16 deletions(-) > > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c > index a8cecd040e..3bc3207bda 100644 > --- a/accel/kvm/kvm-all.c > +++ b/accel/kvm/kvm-all.c > @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock; > #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) > > static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); > +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); > > static inline void kvm_resample_fd_remove(int gsi) > { > @@ -314,14 +315,53 @@ err: > return ret; > } > > +void kvm_park_vcpu(CPUState *cpu) > +{ > + struct KVMParkedVcpu *vcpu; > + > + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > + > + vcpu = g_malloc0(sizeof(*vcpu)); > + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); > + vcpu->kvm_fd = cpu->kvm_fd; > + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); > +} > + > +int kvm_create_vcpu(CPUState *cpu) > +{ > + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); > + KVMState *s = kvm_state; > + int kvm_fd; > + > + trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > + > + /* check if the KVM vCPU already exist but is parked */ > + kvm_fd = kvm_get_vcpu(s, vcpu_id); > + if (kvm_fd < 0) { > + /* vCPU not parked: create a new KVM vCPU */ > + kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); > + if (kvm_fd < 0) { > + error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id); > + return kvm_fd; > + } > + } > + > + cpu->kvm_fd = kvm_fd; > + cpu->kvm_state = s; > + cpu->vcpu_dirty = true; > + cpu->dirty_pages = 0; > + cpu->throttle_us_per_full = 0; > + > + return 0; > +} > + > static int do_kvm_destroy_vcpu(CPUState *cpu) > { > KVMState *s = kvm_state; > long mmap_size; > - struct KVMParkedVcpu *vcpu = NULL; > int ret = 0; > > - trace_kvm_destroy_vcpu(); > + trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > > ret = kvm_arch_destroy_vcpu(cpu); > if (ret < 0) { > @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) > } > } > > - vcpu = g_malloc0(sizeof(*vcpu)); > - vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); > - vcpu->kvm_fd = cpu->kvm_fd; > - QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); > + kvm_park_vcpu(cpu); > err: > return ret; > } > @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) > if (cpu->vcpu_id == vcpu_id) { > int kvm_fd; > > + trace_kvm_get_vcpu(vcpu_id); > + > QLIST_REMOVE(cpu, node); > kvm_fd = cpu->kvm_fd; > g_free(cpu); > @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) > } > } > > - return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); > + return -ENOENT; > } > > int kvm_init_vcpu(CPUState *cpu, Error **errp) > @@ -389,19 +428,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) > > trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > > - ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); > + ret = kvm_create_vcpu(cpu); > if (ret < 0) { > - error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)", > + error_setg_errno(errp, -ret, > + "kvm_init_vcpu: kvm_create_vcpu failed (%lu)", > kvm_arch_vcpu_id(cpu)); If a vcpu hotplug fails due to failure with kvm_create_vcpu ioctl, current behaviour would be to bring down the guest as errp is &error_fatal. Any thoughts on how do we ensure that a failure with kvm_create_vcpu ioctl for hotplugged cpus (only) doesnt bring down the guest and fail gracefully (by reporting error to user on monitor?)? regards, Harsh > goto err; > } > > - cpu->kvm_fd = ret; > - cpu->kvm_state = s; > - cpu->vcpu_dirty = true; > - cpu->dirty_pages = 0; > - cpu->throttle_us_per_full = 0; > - > mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); > if (mmap_size < 0) { > ret = mmap_size; > diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events > index a25902597b..5558cff0dc 100644 > --- a/accel/kvm/trace-events > +++ b/accel/kvm/trace-events > @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" > kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" > kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" > kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" > +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" > +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu" > +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" > +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" > kvm_irqchip_commit_routes(void) "" > kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" > kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" > @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s" > kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)" > kvm_dirty_ring_reaper_kick(const char *reason) "%s" > kvm_dirty_ring_flush(int finished) "%d" > -kvm_destroy_vcpu(void) "" > kvm_failed_get_vcpu_mmap_size(void) "" > kvm_cpu_exec(void) "" > kvm_interrupt_exit_request(void) "" > diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h > index fad9a7e8ff..2ed928aa71 100644 > --- a/include/sysemu/kvm.h > +++ b/include/sysemu/kvm.h > @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); > int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, > hwaddr *phys_addr); > > +/** > + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU > + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. > + * > + * @returns: 0 when success, errno (<0) when failed. > + */ > +int kvm_create_vcpu(CPUState *cpu); > + > +/** > + * kvm_park_vcpu - Park QEMU KVM vCPU context > + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked. > + * > + * @returns: none > + */ > +void kvm_park_vcpu(CPUState *cpu); > + > #endif /* NEED_CPU_H */ > > void kvm_cpu_synchronize_state(CPUState *cpu);
Hi Salil, On 12-03-2024 07:29, Salil Mehta wrote: > KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread > is spawned. This is common to all the architectures as of now. > > Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the > corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't > support vCPU removal. Therefore, its representative KVM vCPU object/context in > Qemu is parked. > > Refactor architecture common logic so that some APIs could be reused by vCPU > Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs > with trace events instead of DPRINTF. No functional change is intended here. > > Signed-off-by: Salil Mehta<salil.mehta@huawei.com> > Reviewed-by: Gavin Shan<gshan@redhat.com> > Tested-by: Vishnu Pajjuri<vishnu@os.amperecomputing.com> > Reviewed-by: Jonathan Cameron<Jonathan.Cameron@huawei.com> > Tested-by: Xianglai Li<lixianglai@loongson.cn> > Tested-by: Miguel Luis<miguel.luis@oracle.com> > Reviewed-by: Shaoqin Huang<shahuang@redhat.com> > --- > accel/kvm/kvm-all.c | 64 ++++++++++++++++++++++++++++++++---------- > accel/kvm/trace-events | 5 +++- > include/sysemu/kvm.h | 16 +++++++++++ > 3 files changed, 69 insertions(+), 16 deletions(-) > > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c > index a8cecd040e..3bc3207bda 100644 > --- a/accel/kvm/kvm-all.c > +++ b/accel/kvm/kvm-all.c > @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock; > #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) > > static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); > +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); > > static inline void kvm_resample_fd_remove(int gsi) > { > @@ -314,14 +315,53 @@ err: > return ret; > } > > +void kvm_park_vcpu(CPUState *cpu) > +{ > + struct KVMParkedVcpu *vcpu; > + > + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); It's good if we add kvm_fd to trace. It will be useful to cross verify kvm_get_vcpu()'s kvm_fd with parked vcpu. > + > + vcpu = g_malloc0(sizeof(*vcpu)); > + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); > + vcpu->kvm_fd = cpu->kvm_fd; > + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); > +} > + > +int kvm_create_vcpu(CPUState *cpu) > +{ > + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); > + KVMState *s = kvm_state; > + int kvm_fd; > + > + trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); vcpu_id can be used instead of kvm_arch_vcpu_id(cpu). > + > + /* check if the KVM vCPU already exist but is parked */ > + kvm_fd = kvm_get_vcpu(s, vcpu_id); > + if (kvm_fd < 0) { > + /* vCPU not parked: create a new KVM vCPU */ > + kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); > + if (kvm_fd < 0) { > + error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id); > + return kvm_fd; > + } > + } > + > + cpu->kvm_fd = kvm_fd; > + cpu->kvm_state = s; > + cpu->vcpu_dirty = true; > + cpu->dirty_pages = 0; > + cpu->throttle_us_per_full = 0; > + > + return 0; > +} > + > static int do_kvm_destroy_vcpu(CPUState *cpu) > { > KVMState *s = kvm_state; > long mmap_size; > - struct KVMParkedVcpu *vcpu = NULL; > int ret = 0; > > - trace_kvm_destroy_vcpu(); > + trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > > ret = kvm_arch_destroy_vcpu(cpu); > if (ret < 0) { > @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) > } > } > > - vcpu = g_malloc0(sizeof(*vcpu)); > - vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); > - vcpu->kvm_fd = cpu->kvm_fd; > - QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); > + kvm_park_vcpu(cpu); > err: > return ret; > } > @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) > if (cpu->vcpu_id == vcpu_id) { > int kvm_fd; > > + trace_kvm_get_vcpu(vcpu_id); It's good if we add kvm_fd to trace. It will be useful to cross verify kvm_get_vcpu's kvm_fd with parked vcpu. > + > QLIST_REMOVE(cpu, node); > kvm_fd = cpu->kvm_fd; > g_free(cpu); > @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) > } > } > > - return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); > + return -ENOENT; > } > > int kvm_init_vcpu(CPUState *cpu, Error **errp) > @@ -389,19 +428,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) > > trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > > - ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); > + ret = kvm_create_vcpu(cpu); > if (ret < 0) { > - error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)", > + error_setg_errno(errp, -ret, > + "kvm_init_vcpu: kvm_create_vcpu failed (%lu)", > kvm_arch_vcpu_id(cpu)); > goto err; > } > > - cpu->kvm_fd = ret; > - cpu->kvm_state = s; > - cpu->vcpu_dirty = true; > - cpu->dirty_pages = 0; > - cpu->throttle_us_per_full = 0; > - > mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); > if (mmap_size < 0) { > ret = mmap_size; > diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events > index a25902597b..5558cff0dc 100644 > --- a/accel/kvm/trace-events > +++ b/accel/kvm/trace-events > @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" > kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" > kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" > kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" > +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" > +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu" > +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" > +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" > kvm_irqchip_commit_routes(void) "" > kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" > kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" > @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s" > kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)" > kvm_dirty_ring_reaper_kick(const char *reason) "%s" > kvm_dirty_ring_flush(int finished) "%d" > -kvm_destroy_vcpu(void) "" > kvm_failed_get_vcpu_mmap_size(void) "" > kvm_cpu_exec(void) "" > kvm_interrupt_exit_request(void) "" > diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h > index fad9a7e8ff..2ed928aa71 100644 > --- a/include/sysemu/kvm.h > +++ b/include/sysemu/kvm.h > @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); > int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, > hwaddr *phys_addr); > > +/** > + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU > + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. > + * > + * @returns: 0 when success, errno (<0) when failed. > + */ > +int kvm_create_vcpu(CPUState *cpu); > + > +/** > + * kvm_park_vcpu - Park QEMU KVM vCPU context > + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked. > + * > + * @returns: none > + */ > +void kvm_park_vcpu(CPUState *cpu); > + > #endif /* NEED_CPU_H */ > > void kvm_cpu_synchronize_state(CPUState *cpu); Otherwise, Looks good to me. Feel free to add Reviewed-by: "Vishnu Pajjuri" <vishnu@os.amperecomputing.com> _Thanks_, -Vishnu
+ Nick Hi Salil, I have posted a patch [1] for ppc which based on this refactoring patch. I see there were some comments from Vishnu on this patch. Are we expecting any further updates on this patch before merge? Thanks Harsh [1] https://lore.kernel.org/qemu-devel/a0f9b2fc-4c8a-4c37-bc36-26bbaa627fec@linux.ibm.com/T/#u On 3/22/24 13:45, Harsh Prateek Bora wrote: > + Vaibhav, Shiva > > Hi Salil, > > I came across your patch while trying to solve a related problem on > spapr. One query below .. > > On 3/12/24 07:29, Salil Mehta via wrote: >> KVM vCPU creation is done once during the vCPU realization when Qemu >> vCPU thread >> is spawned. This is common to all the architectures as of now. >> >> Hot-unplug of vCPU results in destruction of the vCPU object in QOM >> but the >> corresponding KVM vCPU object in the Host KVM is not destroyed as KVM >> doesn't >> support vCPU removal. Therefore, its representative KVM vCPU >> object/context in >> Qemu is parked. >> >> Refactor architecture common logic so that some APIs could be reused >> by vCPU >> Hotplug code of some architectures likes ARM, Loongson etc. Update >> new/old APIs >> with trace events instead of DPRINTF. No functional change is intended >> here. >> >> Signed-off-by: Salil Mehta <salil.mehta@huawei.com> >> Reviewed-by: Gavin Shan <gshan@redhat.com> >> Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com> >> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> >> Tested-by: Xianglai Li <lixianglai@loongson.cn> >> Tested-by: Miguel Luis <miguel.luis@oracle.com> >> Reviewed-by: Shaoqin Huang <shahuang@redhat.com> >> --- >> accel/kvm/kvm-all.c | 64 ++++++++++++++++++++++++++++++++---------- >> accel/kvm/trace-events | 5 +++- >> include/sysemu/kvm.h | 16 +++++++++++ >> 3 files changed, 69 insertions(+), 16 deletions(-) >> >> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c >> index a8cecd040e..3bc3207bda 100644 >> --- a/accel/kvm/kvm-all.c >> +++ b/accel/kvm/kvm-all.c >> @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock; >> #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) >> static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); >> +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); >> static inline void kvm_resample_fd_remove(int gsi) >> { >> @@ -314,14 +315,53 @@ err: >> return ret; >> } >> +void kvm_park_vcpu(CPUState *cpu) >> +{ >> + struct KVMParkedVcpu *vcpu; >> + >> + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); >> + >> + vcpu = g_malloc0(sizeof(*vcpu)); >> + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); >> + vcpu->kvm_fd = cpu->kvm_fd; >> + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); >> +} >> + >> +int kvm_create_vcpu(CPUState *cpu) >> +{ >> + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); >> + KVMState *s = kvm_state; >> + int kvm_fd; >> + >> + trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); >> + >> + /* check if the KVM vCPU already exist but is parked */ >> + kvm_fd = kvm_get_vcpu(s, vcpu_id); >> + if (kvm_fd < 0) { >> + /* vCPU not parked: create a new KVM vCPU */ >> + kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); >> + if (kvm_fd < 0) { >> + error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", >> vcpu_id); >> + return kvm_fd; >> + } >> + } >> + >> + cpu->kvm_fd = kvm_fd; >> + cpu->kvm_state = s; >> + cpu->vcpu_dirty = true; >> + cpu->dirty_pages = 0; >> + cpu->throttle_us_per_full = 0; >> + >> + return 0; >> +} >> + >> static int do_kvm_destroy_vcpu(CPUState *cpu) >> { >> KVMState *s = kvm_state; >> long mmap_size; >> - struct KVMParkedVcpu *vcpu = NULL; >> int ret = 0; >> - trace_kvm_destroy_vcpu(); >> + trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); >> ret = kvm_arch_destroy_vcpu(cpu); >> if (ret < 0) { >> @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) >> } >> } >> - vcpu = g_malloc0(sizeof(*vcpu)); >> - vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); >> - vcpu->kvm_fd = cpu->kvm_fd; >> - QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); >> + kvm_park_vcpu(cpu); >> err: >> return ret; >> } >> @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned long >> vcpu_id) >> if (cpu->vcpu_id == vcpu_id) { >> int kvm_fd; >> + trace_kvm_get_vcpu(vcpu_id); >> + >> QLIST_REMOVE(cpu, node); >> kvm_fd = cpu->kvm_fd; >> g_free(cpu); >> @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long >> vcpu_id) >> } >> } >> - return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); >> + return -ENOENT; >> } >> int kvm_init_vcpu(CPUState *cpu, Error **errp) >> @@ -389,19 +428,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) >> trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); >> - ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); >> + ret = kvm_create_vcpu(cpu); >> if (ret < 0) { >> - error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu >> failed (%lu)", >> + error_setg_errno(errp, -ret, >> + "kvm_init_vcpu: kvm_create_vcpu failed (%lu)", >> kvm_arch_vcpu_id(cpu)); > > If a vcpu hotplug fails due to failure with kvm_create_vcpu ioctl, > current behaviour would be to bring down the guest as errp is > &error_fatal. Any thoughts on how do we ensure that a failure with > kvm_create_vcpu ioctl for hotplugged cpus (only) doesnt bring down the > guest and fail gracefully (by reporting error to user on monitor?)? > > regards, > Harsh >> goto err; >> } >> - cpu->kvm_fd = ret; >> - cpu->kvm_state = s; >> - cpu->vcpu_dirty = true; >> - cpu->dirty_pages = 0; >> - cpu->throttle_us_per_full = 0; >> - >> mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); >> if (mmap_size < 0) { >> ret = mmap_size; >> diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events >> index a25902597b..5558cff0dc 100644 >> --- a/accel/kvm/trace-events >> +++ b/accel/kvm/trace-events >> @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd >> %d, type 0x%x, arg %p" >> kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to >> retrieve ONEREG %" PRIu64 " from KVM: %s" >> kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to >> set ONEREG %" PRIu64 " to KVM: %s" >> kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d >> id: %lu" >> +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d >> id: %lu" >> +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu" >> +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d >> id: %lu" >> +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d >> id: %lu" >> kvm_irqchip_commit_routes(void) "" >> kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s >> vector %d virq %d" >> kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" >> @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s" >> kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" >> pages (took %"PRIi64" us)" >> kvm_dirty_ring_reaper_kick(const char *reason) "%s" >> kvm_dirty_ring_flush(int finished) "%d" >> -kvm_destroy_vcpu(void) "" >> kvm_failed_get_vcpu_mmap_size(void) "" >> kvm_cpu_exec(void) "" >> kvm_interrupt_exit_request(void) "" >> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h >> index fad9a7e8ff..2ed928aa71 100644 >> --- a/include/sysemu/kvm.h >> +++ b/include/sysemu/kvm.h >> @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s, unsigned >> int sigmask_len); >> int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, >> hwaddr *phys_addr); >> +/** >> + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU >> + * @cpu: QOM CPUState object for which KVM vCPU has to be >> fetched/created. >> + * >> + * @returns: 0 when success, errno (<0) when failed. >> + */ >> +int kvm_create_vcpu(CPUState *cpu); >> + >> +/** >> + * kvm_park_vcpu - Park QEMU KVM vCPU context >> + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to >> be parked. >> + * >> + * @returns: none >> + */ >> +void kvm_park_vcpu(CPUState *cpu); >> + >> #endif /* NEED_CPU_H */ >> void kvm_cpu_synchronize_state(CPUState *cpu);
Hi Salil, On 12/3/24 02:59, Salil Mehta wrote: > KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread > is spawned. This is common to all the architectures as of now. > > Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the > corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't > support vCPU removal. Therefore, its representative KVM vCPU object/context in > Qemu is parked. > > Refactor architecture common logic so that some APIs could be reused by vCPU > Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs > with trace events instead of DPRINTF. No functional change is intended here. > > Signed-off-by: Salil Mehta <salil.mehta@huawei.com> > Reviewed-by: Gavin Shan <gshan@redhat.com> > Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > Tested-by: Xianglai Li <lixianglai@loongson.cn> > Tested-by: Miguel Luis <miguel.luis@oracle.com> > Reviewed-by: Shaoqin Huang <shahuang@redhat.com> > --- > accel/kvm/kvm-all.c | 64 ++++++++++++++++++++++++++++++++---------- > accel/kvm/trace-events | 5 +++- > include/sysemu/kvm.h | 16 +++++++++++ > 3 files changed, 69 insertions(+), 16 deletions(-) > > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c > index a8cecd040e..3bc3207bda 100644 > --- a/accel/kvm/kvm-all.c > +++ b/accel/kvm/kvm-all.c > @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock; > #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) > > static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); > +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); > > static inline void kvm_resample_fd_remove(int gsi) > { > @@ -314,14 +315,53 @@ err: > return ret; > } > > +void kvm_park_vcpu(CPUState *cpu) > +{ > + struct KVMParkedVcpu *vcpu; > + > + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > + > + vcpu = g_malloc0(sizeof(*vcpu)); > + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); > + vcpu->kvm_fd = cpu->kvm_fd; > + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); > +} > + > +int kvm_create_vcpu(CPUState *cpu) > +{ > + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); > + KVMState *s = kvm_state; > + int kvm_fd; > + > + trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > + > + /* check if the KVM vCPU already exist but is parked */ > + kvm_fd = kvm_get_vcpu(s, vcpu_id); > + if (kvm_fd < 0) { > + /* vCPU not parked: create a new KVM vCPU */ > + kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); > + if (kvm_fd < 0) { > + error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id); > + return kvm_fd; > + } > + } > + > + cpu->kvm_fd = kvm_fd; > + cpu->kvm_state = s; > + cpu->vcpu_dirty = true; > + cpu->dirty_pages = 0; > + cpu->throttle_us_per_full = 0; > + > + return 0; > +} This seems generic enough to be implemented for all accelerators. See AccelOpsClass in include/sysemu/accel-ops.h. That said, can be done later on top.
Hi Philippe, > From: Philippe Mathieu-Daudé <philmd@linaro.org> > Sent: Friday, May 3, 2024 10:40 AM > Subject: Re: [PATCH V8 1/8] accel/kvm: Extract common KVM vCPU > {creation,parking} code > > Hi Salil, > > On 12/3/24 02:59, Salil Mehta wrote: > > KVM vCPU creation is done once during the vCPU realization when Qemu > > vCPU thread is spawned. This is common to all the architectures as of now. > > > > Hot-unplug of vCPU results in destruction of the vCPU object in QOM > > but the corresponding KVM vCPU object in the Host KVM is not destroyed > > as KVM doesn't support vCPU removal. Therefore, its representative KVM > > vCPU object/context in Qemu is parked. > > > > Refactor architecture common logic so that some APIs could be reused > > by vCPU Hotplug code of some architectures likes ARM, Loongson etc. > > Update new/old APIs with trace events instead of DPRINTF. No functional > change is intended here. > > > > Signed-off-by: Salil Mehta <salil.mehta@huawei.com> > > Reviewed-by: Gavin Shan <gshan@redhat.com> > > Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com> > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > Tested-by: Xianglai Li <lixianglai@loongson.cn> > > Tested-by: Miguel Luis <miguel.luis@oracle.com> > > Reviewed-by: Shaoqin Huang <shahuang@redhat.com> > > --- > > accel/kvm/kvm-all.c | 64 ++++++++++++++++++++++++++++++++------ > ---- > > accel/kvm/trace-events | 5 +++- > > include/sysemu/kvm.h | 16 +++++++++++ > > 3 files changed, 69 insertions(+), 16 deletions(-) > > > > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index > > a8cecd040e..3bc3207bda 100644 > > --- a/accel/kvm/kvm-all.c > > +++ b/accel/kvm/kvm-all.c > > @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock; > > #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) > > > > static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); > > +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); > > > > static inline void kvm_resample_fd_remove(int gsi) > > { > > @@ -314,14 +315,53 @@ err: > > return ret; > > } > > > > +void kvm_park_vcpu(CPUState *cpu) > > +{ > > + struct KVMParkedVcpu *vcpu; > > + > > + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > > + > > + vcpu = g_malloc0(sizeof(*vcpu)); > > + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); > > + vcpu->kvm_fd = cpu->kvm_fd; > > + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); } > > + > > +int kvm_create_vcpu(CPUState *cpu) > > +{ > > + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); > > + KVMState *s = kvm_state; > > + int kvm_fd; > > + > > + trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > > + > > + /* check if the KVM vCPU already exist but is parked */ > > + kvm_fd = kvm_get_vcpu(s, vcpu_id); > > + if (kvm_fd < 0) { > > + /* vCPU not parked: create a new KVM vCPU */ > > + kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); > > + if (kvm_fd < 0) { > > + error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id); > > + return kvm_fd; > > + } > > + } > > + > > + cpu->kvm_fd = kvm_fd; > > + cpu->kvm_state = s; > > + cpu->vcpu_dirty = true; > > + cpu->dirty_pages = 0; > > + cpu->throttle_us_per_full = 0; > > + > > + return 0; > > +} > > This seems generic enough to be implemented for all accelerators. > > See AccelOpsClass in include/sysemu/accel-ops.h. > > That said, can be done later on top. Let me understand correctly. Are you suggesting to implement above even for HVF, TCG, QTEST etc? Thanks Salil.
Hi Vishnu, > From: Vishnu Pajjuri <vishnu@amperemail.onmicrosoft.com> > Sent: Thursday, April 4, 2024 3:00 PM > Subject: Re: [PATCH V8 1/8] accel/kvm: Extract common KVM vCPU {creation,parking} code > > Hi Salil, >> On 12-03-2024 07:29, Salil Mehta wrote: >> KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread >> is spawned. This is common to all the architectures as of now. >> >> Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the >> corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't >> support vCPU removal. Therefore, its representative KVM vCPU object/context in >> Qemu is parked. >> >> Refactor architecture common logic so that some APIs could be reused by vCPU >> Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs >> with trace events instead of DPRINTF. No functional change is intended here. >> >> Signed-off-by: Salil Mehta mailto:salil.mehta@huawei.com >> Reviewed-by: Gavin Shan mailto:gshan@redhat.com >> Tested-by: Vishnu Pajjuri mailto:vishnu@os.amperecomputing.com >> Reviewed-by: Jonathan Cameron mailto:Jonathan.Cameron@huawei.com >> Tested-by: Xianglai Li mailto:lixianglai@loongson.cn >> Tested-by: Miguel Luis mailto:miguel.luis@oracle.com >> Reviewed-by: Shaoqin Huang mailto:shahuang@redhat.com >> --- >> accel/kvm/kvm-all.c | 64 ++++++++++++++++++++++++++++++++---------- >> accel/kvm/trace-events | 5 +++- >> include/sysemu/kvm.h | 16 +++++++++++ >> 3 files changed, 69 insertions(+), 16 deletions(-) >> >> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c >> index a8cecd040e..3bc3207bda 100644 >> --- a/accel/kvm/kvm-all.c >> +++ b/accel/kvm/kvm-all.c >> @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock; >> #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) >> >> static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); >> +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); >> >> static inline void kvm_resample_fd_remove(int gsi) >> { >> @@ -314,14 +315,53 @@ err: >> return ret; >> } >> >> +void kvm_park_vcpu(CPUState *cpu) >> +{ >> + struct KVMParkedVcpu *vcpu; >> + >> + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > It's good if we add kvm_fd to trace. > It will be useful to cross verify kvm_get_vcpu()'s kvm_fd with parked vcpu. Agreed. But this is currently called in context to create and destroy vCPU where the trace already exists with the info you are seeking. Having trace here might duplicate the info and end up increasing the noise. Let me know if you think otherwise or have something else to add. Thanks >> + >> + vcpu = g_malloc0(sizeof(*vcpu)); >> + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); >> + vcpu->kvm_fd = cpu->kvm_fd; >> + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); >> +} >> + >> +int kvm_create_vcpu(CPUState *cpu) >> +{ >> + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); >> + KVMState *s = kvm_state; >> + int kvm_fd; >> + >> + trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > vcpu_id can be used instead of kvm_arch_vcpu_id(cpu). KVM arch VCPU Id ensures that ID being traced is meaningful for that architecture. The way CPU ID gets calculated in on different architectures could be different. Hence, its value might be quite different. >> + >> + /* check if the KVM vCPU already exist but is parked */ >> + kvm_fd = kvm_get_vcpu(s, vcpu_id); >> + if (kvm_fd < 0) { >> +> /* vCPU not parked: create a new KVM vCPU */ >> +> kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); >> +> if (kvm_fd < 0) { >> +> error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id); >> +> return kvm_fd; >> +> } >> + } >> + >> + cpu->kvm_fd = kvm_fd; >> + cpu->kvm_state = s; >> + cpu->vcpu_dirty = true; >> + cpu->dirty_pages = 0; >> + cpu->throttle_us_per_full = 0; >> + >> + return 0; >> +} >> + >> static int do_kvm_destroy_vcpu(CPUState *cpu) >> { >> KVMState *s = kvm_state; >> long mmap_size; >> - struct KVMParkedVcpu *vcpu = NULL; >> int ret = 0; >> >> - trace_kvm_destroy_vcpu(); >> + trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); >> >> ret = kvm_arch_destroy_vcpu(cpu); >> if (ret < 0) { >> @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) >> > } >> } >> >> - vcpu = g_malloc0(sizeof(*vcpu)); >> - vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); >> - vcpu->kvm_fd = cpu->kvm_fd; >> - QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); >> + kvm_park_vcpu(cpu); >> err: >> return ret; >> } >> @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) >> > if (cpu->vcpu_id == vcpu_id) { >> > int kvm_fd; >> >> +> trace_kvm_get_vcpu(vcpu_id); > It's good if we add kvm_fd to trace. > It will be useful to cross verify kvm_get_vcpu's kvm_fd with parked vcpu. I can but I'm wondering why you've raised this? Perhaps, I'm not aware of the interface you are using to configure the VMs and how traces across diferent VMs get reflected. Please help in my understanding. >> + >> > QLIST_REMOVE(cpu, node); >> > kvm_fd = cpu->kvm_fd; >> > g_free(cpu); >> @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) >> > } >> } >> >> - return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); >> + return -ENOENT; >> } >> >> int kvm_init_vcpu(CPUState *cpu, Error **errp) >> @@ -389,19 +428,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) >> >> trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); >> >> - ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); >> + ret = kvm_create_vcpu(cpu); >> if (ret < 0) { >> - error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)", >> + error_setg_errno(errp, -ret, >> + "kvm_init_vcpu: kvm_create_vcpu failed (%lu)", > kvm_arch_vcpu_id(cpu)); >> goto err; >> } >> >> - cpu->kvm_fd = ret; >> - cpu->kvm_state = s; >> - cpu->vcpu_dirty = true; >> - cpu->dirty_pages = 0; >> - cpu->throttle_us_per_full = 0; >> - >> mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); >> if (mmap_size < 0) { >> ret = mmap_size; >> diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events >> index a25902597b..5558cff0dc 100644 >> --- a/accel/kvm/trace-events >> +++ b/accel/kvm/trace-events >> @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" >> kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" >> kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" >> kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" >> +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" >> +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu" >> +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" >> +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" >> kvm_irqchip_commit_routes(void) "" >> kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" >> kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" >> @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s" >> kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)" >> kvm_dirty_ring_reaper_kick(const char *reason) "%s" >> kvm_dirty_ring_flush(int finished) "%d" >> -kvm_destroy_vcpu(void) "" >> kvm_failed_get_vcpu_mmap_size(void) "" >> kvm_cpu_exec(void) "" >> kvm_interrupt_exit_request(void) "" >> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h >> index fad9a7e8ff..2ed928aa71 100644 >> --- a/include/sysemu/kvm.h >> +++ b/include/sysemu/kvm.h >> @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); >> int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, >> > > > > > hwaddr *phys_addr); >> >> +/** >> + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU >> + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. >> + * >> + * @returns: 0 when success, errno (<0) when failed. >> + */ >> +int kvm_create_vcpu(CPUState *cpu); >> + >> +/** >> + * kvm_park_vcpu - Park QEMU KVM vCPU context >> + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked. >> + * >> + * @returns: none >> + */ >> +void kvm_park_vcpu(CPUState *cpu); >> + >> #endif /* NEED_CPU_H */ >> >> void kvm_cpu_synchronize_state(CPUState *cpu); > Otherwise, Looks good to me. Feel free to add > Reviewed-by: "Vishnu Pajjuri" mailto:vishnu@os.amperecomputing.com > Thanks, Thanks. Salil > -Vishnu
On 3/5/24 17:57, Salil Mehta wrote: > Hi Philippe, > >> From: Philippe Mathieu-Daudé <philmd@linaro.org> >> Sent: Friday, May 3, 2024 10:40 AM >> Subject: Re: [PATCH V8 1/8] accel/kvm: Extract common KVM vCPU >> {creation,parking} code >> >> Hi Salil, >> >> On 12/3/24 02:59, Salil Mehta wrote: >> > KVM vCPU creation is done once during the vCPU realization when Qemu >> > vCPU thread is spawned. This is common to all the architectures as of now. >> > >> > Hot-unplug of vCPU results in destruction of the vCPU object in QOM >> > but the corresponding KVM vCPU object in the Host KVM is not destroyed >> > as KVM doesn't support vCPU removal. Therefore, its representative KVM >> > vCPU object/context in Qemu is parked. >> > >> > Refactor architecture common logic so that some APIs could be reused >> > by vCPU Hotplug code of some architectures likes ARM, Loongson etc. >> > Update new/old APIs with trace events instead of DPRINTF. No functional >> change is intended here. >> > >> > Signed-off-by: Salil Mehta <salil.mehta@huawei.com> >> > Reviewed-by: Gavin Shan <gshan@redhat.com> >> > Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com> >> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> >> > Tested-by: Xianglai Li <lixianglai@loongson.cn> >> > Tested-by: Miguel Luis <miguel.luis@oracle.com> >> > Reviewed-by: Shaoqin Huang <shahuang@redhat.com> >> > --- >> > accel/kvm/kvm-all.c | 64 ++++++++++++++++++++++++++++++++------ >> ---- >> > accel/kvm/trace-events | 5 +++- >> > include/sysemu/kvm.h | 16 +++++++++++ >> > 3 files changed, 69 insertions(+), 16 deletions(-) >> > >> > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index >> > a8cecd040e..3bc3207bda 100644 >> > --- a/accel/kvm/kvm-all.c >> > +++ b/accel/kvm/kvm-all.c >> > @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock; >> > #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) >> > >> > static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); >> > +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); >> > >> > static inline void kvm_resample_fd_remove(int gsi) >> > { >> > @@ -314,14 +315,53 @@ err: >> > return ret; >> > } >> > >> > +void kvm_park_vcpu(CPUState *cpu) >> > +{ >> > + struct KVMParkedVcpu *vcpu; >> > + >> > + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); >> > + >> > + vcpu = g_malloc0(sizeof(*vcpu)); >> > + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); >> > + vcpu->kvm_fd = cpu->kvm_fd; >> > + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); } >> > + >> > +int kvm_create_vcpu(CPUState *cpu) >> > +{ >> > + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); >> > + KVMState *s = kvm_state; >> > + int kvm_fd; >> > + >> > + trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); >> > + >> > + /* check if the KVM vCPU already exist but is parked */ >> > + kvm_fd = kvm_get_vcpu(s, vcpu_id); >> > + if (kvm_fd < 0) { >> > + /* vCPU not parked: create a new KVM vCPU */ >> > + kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); >> > + if (kvm_fd < 0) { >> > + error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id); >> > + return kvm_fd; >> > + } >> > + } >> > + >> > + cpu->kvm_fd = kvm_fd; >> > + cpu->kvm_state = s; >> > + cpu->vcpu_dirty = true; >> > + cpu->dirty_pages = 0; >> > + cpu->throttle_us_per_full = 0; >> > + >> > + return 0; >> > +} >> >> This seems generic enough to be implemented for all accelerators. >> >> See AccelOpsClass in include/sysemu/accel-ops.h. >> >> That said, can be done later on top. > > Let me understand correctly. Are you suggesting to implement above even for > HVF, TCG, QTEST etc? Not for you to implement the other non-KVM accelerators, but since you are introducing this, now is a good time to think about a generic interface. So far AccelOpsClass::[un]park_vcpu() handlers make sense to me. > Thanks > Salil. > > > >
Hi Harsh, Sorry for the delay in my reply. I've been off the grid for some time so missed this earlier mail. Please find my reply below to you query. Thanks > From: Harsh Prateek Bora <harshpb@linux.ibm.com> > Sent: Friday, March 22, 2024 8:15 AM > > + Vaibhav, Shiva > > Hi Salil, > > I came across your patch while trying to solve a related problem on spapr. > One query below .. > > On 3/12/24 07:29, Salil Mehta via wrote: > > KVM vCPU creation is done once during the vCPU realization when Qemu > > vCPU thread is spawned. This is common to all the architectures as of now. > > > > Hot-unplug of vCPU results in destruction of the vCPU object in QOM > > but the corresponding KVM vCPU object in the Host KVM is not destroyed > > as KVM doesn't support vCPU removal. Therefore, its representative KVM > > vCPU object/context in Qemu is parked. > > > > Refactor architecture common logic so that some APIs could be reused > > by vCPU Hotplug code of some architectures likes ARM, Loongson etc. > > Update new/old APIs with trace events instead of DPRINTF. No functional > change is intended here. > > > > Signed-off-by: Salil Mehta <salil.mehta@huawei.com> > > Reviewed-by: Gavin Shan <gshan@redhat.com> > > Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com> > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > Tested-by: Xianglai Li <lixianglai@loongson.cn> > > Tested-by: Miguel Luis <miguel.luis@oracle.com> > > Reviewed-by: Shaoqin Huang <shahuang@redhat.com> > > --- > > accel/kvm/kvm-all.c | 64 ++++++++++++++++++++++++++++++++------ > ---- > > accel/kvm/trace-events | 5 +++- > > include/sysemu/kvm.h | 16 +++++++++++ > > 3 files changed, 69 insertions(+), 16 deletions(-) > > > > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index > > a8cecd040e..3bc3207bda 100644 > > --- a/accel/kvm/kvm-all.c > > +++ b/accel/kvm/kvm-all.c > > @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock; > > #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) > > > > static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); > > +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); > > > > static inline void kvm_resample_fd_remove(int gsi) > > { > > @@ -314,14 +315,53 @@ err: > > return ret; > > } > > > > +void kvm_park_vcpu(CPUState *cpu) > > +{ > > + struct KVMParkedVcpu *vcpu; > > + > > + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > > + > > + vcpu = g_malloc0(sizeof(*vcpu)); > > + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); > > + vcpu->kvm_fd = cpu->kvm_fd; > > + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); } > > + > > +int kvm_create_vcpu(CPUState *cpu) > > +{ > > + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); > > + KVMState *s = kvm_state; > > + int kvm_fd; > > + > > + trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > > + > > + /* check if the KVM vCPU already exist but is parked */ > > + kvm_fd = kvm_get_vcpu(s, vcpu_id); > > + if (kvm_fd < 0) { > > + /* vCPU not parked: create a new KVM vCPU */ > > + kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); > > + if (kvm_fd < 0) { > > + error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", > vcpu_id); > > + return kvm_fd; > > + } > > + } > > + > > + cpu->kvm_fd = kvm_fd; > > + cpu->kvm_state = s; > > + cpu->vcpu_dirty = true; > > + cpu->dirty_pages = 0; > > + cpu->throttle_us_per_full = 0; > > + > > + return 0; > > +} > > + > > static int do_kvm_destroy_vcpu(CPUState *cpu) > > { > > KVMState *s = kvm_state; > > long mmap_size; > > - struct KVMParkedVcpu *vcpu = NULL; > > int ret = 0; > > > > - trace_kvm_destroy_vcpu(); > > + trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > > > > ret = kvm_arch_destroy_vcpu(cpu); > > if (ret < 0) { > > @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) > > } > > } > > > > - vcpu = g_malloc0(sizeof(*vcpu)); > > - vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); > > - vcpu->kvm_fd = cpu->kvm_fd; > > - QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); > > + kvm_park_vcpu(cpu); > > err: > > return ret; > > } > > @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned > long vcpu_id) > > if (cpu->vcpu_id == vcpu_id) { > > int kvm_fd; > > > > + trace_kvm_get_vcpu(vcpu_id); > > + > > QLIST_REMOVE(cpu, node); > > kvm_fd = cpu->kvm_fd; > > g_free(cpu); > > @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned > long vcpu_id) > > } > > } > > > > - return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); > > + return -ENOENT; > > } > > > > int kvm_init_vcpu(CPUState *cpu, Error **errp) @@ -389,19 +428,14 @@ > > int kvm_init_vcpu(CPUState *cpu, Error **errp) > > > > trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > > > > - ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); > > + ret = kvm_create_vcpu(cpu); > > if (ret < 0) { > > - error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed > (%lu)", > > + error_setg_errno(errp, -ret, > > + "kvm_init_vcpu: kvm_create_vcpu failed > > + (%lu)", > > kvm_arch_vcpu_id(cpu)); > > If a vcpu hotplug fails due to failure with kvm_create_vcpu ioctl, current > behaviour would be to bring down the guest as errp is &error_fatal. Any > thoughts on how do we ensure that a failure with kvm_create_vcpu ioctl for > hotplugged cpus (only) doesnt bring down the guest and fail gracefully (by > reporting error to user on monitor?)? In the ARM, we are by design pre-creating all the vCPUs in the KVM during the Qemu/KVM Init. This is to satisfy the constraints posed by ARM architecture as we are not allowed to meddle with any initialization at KVM level or Guest kernel level after system has booted. The constraints are mainly coming from GIC and related per-CPU features which can only be initialized once during init in the KVM and then their presence is made to felt to the Guest kernel only once during enumeration of the CPUs and related GIC CPU interfaces. Later cannot be changed either. Hence, if all of the KVM vCPUs have been created successfully during init then hot(un)plugging operations later won't have fatal initialization errors at the KVM as all operation get handled at QOM level only for the hot(un)plugged vCPUs. I feel if there is a failure to create KVM vCPU at Qemu KVM Init time then there is something severally wrong either with the inputs or the system. Hence, to keep the handling simple I was in favor of aborting the initialization. But all of above is ARM arch specific. Do you have anything specific in mind why you need graceful handling at the init time? Thanks Salil. > > regards, > Harsh > > goto err; > > } > >
Hello, Just replied to your other thread just now. Sorry catching everything late. Thanks > From: Harsh Prateek Bora <harshpb@linux.ibm.com> > Sent: Tuesday, April 23, 2024 7:44 AM > > + Nick > > Hi Salil, > I have posted a patch [1] for ppc which based on this refactoring patch. > I see there were some comments from Vishnu on this patch. > Are we expecting any further updates on this patch before merge? Yes, few of them and I'm working towards it. I've received most of the reviews and SOBs last year itself. There are few minor comments to be addressed before I can float V9 version of this patch-set. I'm planning to push that for review in 2 weeks of time along with RFC V3 of the architecture specific code. Thanks Salil. > > Thanks > Harsh > > [1] > https://lore.kernel.org/qemu-devel/a0f9b2fc-4c8a-4c37-bc36- > 26bbaa627fec@linux.ibm.com/T/#u > > On 3/22/24 13:45, Harsh Prateek Bora wrote: > > + Vaibhav, Shiva > > > > Hi Salil, > > > > I came across your patch while trying to solve a related problem on > > spapr. One query below .. > > > > On 3/12/24 07:29, Salil Mehta via wrote: > >> KVM vCPU creation is done once during the vCPU realization when > Qemu > >> vCPU thread is spawned. This is common to all the architectures as of > >> now. > >> > >> Hot-unplug of vCPU results in destruction of the vCPU object in QOM > >> but the corresponding KVM vCPU object in the Host KVM is not > >> destroyed as KVM doesn't support vCPU removal. Therefore, its > >> representative KVM vCPU object/context in Qemu is parked. > >> > >> Refactor architecture common logic so that some APIs could be reused > >> by vCPU Hotplug code of some architectures likes ARM, Loongson etc. > >> Update new/old APIs with trace events instead of DPRINTF. No > >> functional change is intended here. > >> > >> Signed-off-by: Salil Mehta <salil.mehta@huawei.com> > >> Reviewed-by: Gavin Shan <gshan@redhat.com> > >> Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com> > >> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > >> Tested-by: Xianglai Li <lixianglai@loongson.cn> > >> Tested-by: Miguel Luis <miguel.luis@oracle.com> > >> Reviewed-by: Shaoqin Huang <shahuang@redhat.com> > >> --- > >> accel/kvm/kvm-all.c | 64 > >> ++++++++++++++++++++++++++++++++---------- > >> accel/kvm/trace-events | 5 +++- > >> include/sysemu/kvm.h | 16 +++++++++++ > >> 3 files changed, 69 insertions(+), 16 deletions(-) > >> > >> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index > >> a8cecd040e..3bc3207bda 100644 > >> --- a/accel/kvm/kvm-all.c > >> +++ b/accel/kvm/kvm-all.c > >> @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock; > >> #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) > >> static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); > >> +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); > >> static inline void kvm_resample_fd_remove(int gsi) > >> { > >> @@ -314,14 +315,53 @@ err: > >> return ret; > >> } > >> +void kvm_park_vcpu(CPUState *cpu) > >> +{ > >> + struct KVMParkedVcpu *vcpu; > >> + > >> + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > >> + > >> + vcpu = g_malloc0(sizeof(*vcpu)); > >> + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); > >> + vcpu->kvm_fd = cpu->kvm_fd; > >> + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); > } > >> + > >> +int kvm_create_vcpu(CPUState *cpu) > >> +{ > >> + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); > >> + KVMState *s = kvm_state; > >> + int kvm_fd; > >> + > >> + trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > >> + > >> + /* check if the KVM vCPU already exist but is parked */ > >> + kvm_fd = kvm_get_vcpu(s, vcpu_id); > >> + if (kvm_fd < 0) { > >> + /* vCPU not parked: create a new KVM vCPU */ > >> + kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); > >> + if (kvm_fd < 0) { > >> + error_report("KVM_CREATE_VCPU IOCTL failed for vCPU > >> +%lu", > >> vcpu_id); > >> + return kvm_fd; > >> + } > >> + } > >> + > >> + cpu->kvm_fd = kvm_fd; > >> + cpu->kvm_state = s; > >> + cpu->vcpu_dirty = true; > >> + cpu->dirty_pages = 0; > >> + cpu->throttle_us_per_full = 0; > >> + > >> + return 0; > >> +} > >> + > >> static int do_kvm_destroy_vcpu(CPUState *cpu) > >> { > >> KVMState *s = kvm_state; > >> long mmap_size; > >> - struct KVMParkedVcpu *vcpu = NULL; > >> int ret = 0; > >> - trace_kvm_destroy_vcpu(); > >> + trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > >> ret = kvm_arch_destroy_vcpu(cpu); > >> if (ret < 0) { > >> @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) > >> } > >> } > >> - vcpu = g_malloc0(sizeof(*vcpu)); > >> - vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); > >> - vcpu->kvm_fd = cpu->kvm_fd; > >> - QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); > >> + kvm_park_vcpu(cpu); > >> err: > >> return ret; > >> } > >> @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned > >> long > >> vcpu_id) > >> if (cpu->vcpu_id == vcpu_id) { > >> int kvm_fd; > >> + trace_kvm_get_vcpu(vcpu_id); > >> + > >> QLIST_REMOVE(cpu, node); > >> kvm_fd = cpu->kvm_fd; > >> g_free(cpu); > >> @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned > >> long > >> vcpu_id) > >> } > >> } > >> - return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); > >> + return -ENOENT; > >> } > >> int kvm_init_vcpu(CPUState *cpu, Error **errp) @@ -389,19 +428,14 > >> @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) > >> trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > >> - ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); > >> + ret = kvm_create_vcpu(cpu); > >> if (ret < 0) { > >> - error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu > >> failed (%lu)", > >> + error_setg_errno(errp, -ret, > >> + "kvm_init_vcpu: kvm_create_vcpu failed > >> +(%lu)", > >> kvm_arch_vcpu_id(cpu)); > > > > If a vcpu hotplug fails due to failure with kvm_create_vcpu ioctl, > > current behaviour would be to bring down the guest as errp is > > &error_fatal. Any thoughts on how do we ensure that a failure with > > kvm_create_vcpu ioctl for hotplugged cpus (only) doesnt bring down the > > guest and fail gracefully (by reporting error to user on monitor?)? > > > > regards, > > Harsh > >> goto err; > >> } > >> - cpu->kvm_fd = ret; > >> - cpu->kvm_state = s; > >> - cpu->vcpu_dirty = true; > >> - cpu->dirty_pages = 0; > >> - cpu->throttle_us_per_full = 0; > >> - > >> mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); > >> if (mmap_size < 0) { > >> ret = mmap_size; > >> diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index > >> a25902597b..5558cff0dc 100644 > >> --- a/accel/kvm/trace-events > >> +++ b/accel/kvm/trace-events > >> @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev > >> fd %d, type 0x%x, arg %p" > >> kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable > >> to retrieve ONEREG %" PRIu64 " from KVM: %s" > >> kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable > >> to set ONEREG %" PRIu64 " to KVM: %s" > >> kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d > >> id: %lu" > >> +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: > %d > >> id: %lu" > >> +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu" > >> +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: > >> +%d > >> id: %lu" > >> +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d > >> id: %lu" > >> kvm_irqchip_commit_routes(void) "" > >> kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s > >> vector %d virq %d" > >> kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" > >> @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s" > >> kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" > >> pages (took %"PRIi64" us)" > >> kvm_dirty_ring_reaper_kick(const char *reason) "%s" > >> kvm_dirty_ring_flush(int finished) "%d" > >> -kvm_destroy_vcpu(void) "" > >> kvm_failed_get_vcpu_mmap_size(void) "" > >> kvm_cpu_exec(void) "" > >> kvm_interrupt_exit_request(void) "" > >> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index > >> fad9a7e8ff..2ed928aa71 100644 > >> --- a/include/sysemu/kvm.h > >> +++ b/include/sysemu/kvm.h > >> @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s, > unsigned > >> int sigmask_len); > >> int kvm_physical_memory_addr_from_host(KVMState *s, void > *ram_addr, > >> hwaddr *phys_addr); > >> +/** > >> + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU > >> + * @cpu: QOM CPUState object for which KVM vCPU has to be > >> fetched/created. > >> + * > >> + * @returns: 0 when success, errno (<0) when failed. > >> + */ > >> +int kvm_create_vcpu(CPUState *cpu); > >> + > >> +/** > >> + * kvm_park_vcpu - Park QEMU KVM vCPU context > >> + * @cpu: QOM CPUState object for which QEMU KVM vCPU context > has to > >> be parked. > >> + * > >> + * @returns: none > >> + */ > >> +void kvm_park_vcpu(CPUState *cpu); > >> + > >> #endif /* NEED_CPU_H */ > >> void kvm_cpu_synchronize_state(CPUState *cpu);
Hi Salil, On 03-05-2024 21:53, Salil Mehta wrote: > [EXTERNAL EMAIL NOTICE: This email originated from an external sender. Please be mindful of safe email handling and proprietary information protection practices.] > > > Hi Vishnu, > >> From: Vishnu Pajjuri<vishnu@amperemail.onmicrosoft.com> >> Sent: Thursday, April 4, 2024 3:00 PM >> Subject: Re: [PATCH V8 1/8] accel/kvm: Extract common KVM vCPU {creation,parking} code >> >> Hi Salil, >>> On 12-03-2024 07:29, Salil Mehta wrote: >>> KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread >>> is spawned. This is common to all the architectures as of now. >>> >>> Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the >>> corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't >>> support vCPU removal. Therefore, its representative KVM vCPU object/context in >>> Qemu is parked. >>> >>> Refactor architecture common logic so that some APIs could be reused by vCPU >>> Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs >>> with trace events instead of DPRINTF. No functional change is intended here. >>> >>> Signed-off-by: Salil Mehtamailto:salil.mehta@huawei.com >>> Reviewed-by: Gavin Shanmailto:gshan@redhat.com >>> Tested-by: Vishnu Pajjurimailto:vishnu@os.amperecomputing.com >>> Reviewed-by: Jonathan Cameronmailto:Jonathan.Cameron@huawei.com >>> Tested-by: Xianglai Limailto:lixianglai@loongson.cn >>> Tested-by: Miguel Luismailto:miguel.luis@oracle.com >>> Reviewed-by: Shaoqin Huangmailto:shahuang@redhat.com >>> --- >>> accel/kvm/kvm-all.c | 64 ++++++++++++++++++++++++++++++++---------- >>> accel/kvm/trace-events | 5 +++- >>> include/sysemu/kvm.h | 16 +++++++++++ >>> 3 files changed, 69 insertions(+), 16 deletions(-) >>> >>> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c >>> index a8cecd040e..3bc3207bda 100644 >>> --- a/accel/kvm/kvm-all.c >>> +++ b/accel/kvm/kvm-all.c >>> @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock; >>> #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) >>> >>> static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); >>> +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); >>> >>> static inline void kvm_resample_fd_remove(int gsi) >>> { >>> @@ -314,14 +315,53 @@ err: >>> return ret; >>> } >>> >>> +void kvm_park_vcpu(CPUState *cpu) >>> +{ >>> + struct KVMParkedVcpu *vcpu; >>> + >>> + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); >> It's good if we add kvm_fd to trace. >> It will be useful to cross verify kvm_get_vcpu()'s kvm_fd with parked vcpu. > > Agreed. But this is currently called in context to create and destroy vCPU > where the trace already exists with the info you are seeking. Having > trace here might duplicate the info and end up increasing the noise. > > Let me know if you think otherwise or have something else to add. This is to provide additional information to the racing only. The intention here is to trace mapping of vcpu_id<-->kvm_fd while parking and fetching vcpu. This way we can easily trace what is parked (kvm_park_vcpu()) vs fetched (kvm_get_vcpu()) using pair of information. > > Thanks > > >>> + >>> + vcpu = g_malloc0(sizeof(*vcpu)); >>> + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); >>> + vcpu->kvm_fd = cpu->kvm_fd; >>> + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); >>> +} >>> + >>> +int kvm_create_vcpu(CPUState *cpu) >>> +{ >>> + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); >>> + KVMState *s = kvm_state; >>> + int kvm_fd; >>> + >>> + trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); >> vcpu_id can be used instead of kvm_arch_vcpu_id(cpu). > > KVM arch VCPU Id ensures that ID being traced is meaningful for that > architecture. The way CPU ID gets calculated in on different architectures > could be different. Hence, its value might be quite different. vcpu_id is already being calculated just above trace call. I don't think vcpu_id value gets differ by the time of tracing. > >>> + >>> + /* check if the KVM vCPU already exist but is parked */ >>> + kvm_fd = kvm_get_vcpu(s, vcpu_id); >>> + if (kvm_fd < 0) { >>> +> /* vCPU not parked: create a new KVM vCPU */ >>> +> kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); >>> +> if (kvm_fd < 0) { >>> +> error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id); >>> +> return kvm_fd; >>> +> } >>> + } >>> + >>> + cpu->kvm_fd = kvm_fd; >>> + cpu->kvm_state = s; >>> + cpu->vcpu_dirty = true; >>> + cpu->dirty_pages = 0; >>> + cpu->throttle_us_per_full = 0; >>> + >>> + return 0; >>> +} >>> + >>> static int do_kvm_destroy_vcpu(CPUState *cpu) >>> { >>> KVMState *s = kvm_state; >>> long mmap_size; >>> - struct KVMParkedVcpu *vcpu = NULL; >>> int ret = 0; >>> >>> - trace_kvm_destroy_vcpu(); >>> + trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); >>> >>> ret = kvm_arch_destroy_vcpu(cpu); >>> if (ret < 0) { >>> @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) >>> > } >>> } >>> >>> - vcpu = g_malloc0(sizeof(*vcpu)); >>> - vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); >>> - vcpu->kvm_fd = cpu->kvm_fd; >>> - QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); >>> + kvm_park_vcpu(cpu); >>> err: >>> return ret; >>> } >>> @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) >>> > if (cpu->vcpu_id == vcpu_id) { >>> > int kvm_fd; >>> >>> +> trace_kvm_get_vcpu(vcpu_id); >> It's good if we add kvm_fd to trace. >> It will be useful to cross verify kvm_get_vcpu's kvm_fd with parked vcpu. > > I can but I'm wondering why you've raised this? Perhaps, I'm not aware of the > interface you are using to configure the VMs and how traces across diferent > VMs get reflected. Please help in my understanding. This is to provide additional information only not specific to any interface to configure VMs. _Regards_, -Vishnu > >>> + >>> > QLIST_REMOVE(cpu, node); >>> > kvm_fd = cpu->kvm_fd; >>> > g_free(cpu); >>> @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) >>> > } >>> } >>> >>> - return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); >>> + return -ENOENT; >>> } >>> >>> int kvm_init_vcpu(CPUState *cpu, Error **errp) >>> @@ -389,19 +428,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) >>> >>> trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); >>> >>> - ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); >>> + ret = kvm_create_vcpu(cpu); >>> if (ret < 0) { >>> - error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)", >>> + error_setg_errno(errp, -ret, >>> + "kvm_init_vcpu: kvm_create_vcpu failed (%lu)", >> kvm_arch_vcpu_id(cpu)); >>> goto err; >>> } >>> >>> - cpu->kvm_fd = ret; >>> - cpu->kvm_state = s; >>> - cpu->vcpu_dirty = true; >>> - cpu->dirty_pages = 0; >>> - cpu->throttle_us_per_full = 0; >>> - >>> mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); >>> if (mmap_size < 0) { >>> ret = mmap_size; >>> diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events >>> index a25902597b..5558cff0dc 100644 >>> --- a/accel/kvm/trace-events >>> +++ b/accel/kvm/trace-events >>> @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" >>> kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" >>> kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" >>> kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" >>> +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" >>> +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu" >>> +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" >>> +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" >>> kvm_irqchip_commit_routes(void) "" >>> kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" >>> kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" >>> @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s" >>> kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)" >>> kvm_dirty_ring_reaper_kick(const char *reason) "%s" >>> kvm_dirty_ring_flush(int finished) "%d" >>> -kvm_destroy_vcpu(void) "" >>> kvm_failed_get_vcpu_mmap_size(void) "" >>> kvm_cpu_exec(void) "" >>> kvm_interrupt_exit_request(void) "" >>> diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h >>> index fad9a7e8ff..2ed928aa71 100644 >>> --- a/include/sysemu/kvm.h >>> +++ b/include/sysemu/kvm.h >>> @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); >>> int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, >>> > > > > > hwaddr *phys_addr); >>> >>> +/** >>> + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU >>> + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. >>> + * >>> + * @returns: 0 when success, errno (<0) when failed. >>> + */ >>> +int kvm_create_vcpu(CPUState *cpu); >>> + >>> +/** >>> + * kvm_park_vcpu - Park QEMU KVM vCPU context >>> + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked. >>> + * >>> + * @returns: none >>> + */ >>> +void kvm_park_vcpu(CPUState *cpu); >>> + >>> #endif /* NEED_CPU_H */ >>> >>> void kvm_cpu_synchronize_state(CPUState *cpu); >> Otherwise, Looks good to me. Feel free to add >> Reviewed-by: "Vishnu Pajjuri"mailto:vishnu@os.amperecomputing.com >> Thanks, > Thanks. > Salil > > > >> -Vishnu
HI Vishnu, On Tue, May 7, 2024 at 12:39 PM Vishnu Pajjuri < vishnu@amperemail.onmicrosoft.com> wrote: > Hi Salil, > On 03-05-2024 21:53, Salil Mehta wrote: > > [EXTERNAL EMAIL NOTICE: This email originated from an external sender. Please be mindful of safe email handling and proprietary information protection practices.] > > > Hi Vishnu, > > > From: Vishnu Pajjuri <vishnu@amperemail.onmicrosoft.com> <vishnu@amperemail.onmicrosoft.com> > Sent: Thursday, April 4, 2024 3:00 PM > Subject: Re: [PATCH V8 1/8] accel/kvm: Extract common KVM vCPU {creation,parking} code > > Hi Salil, > > On 12-03-2024 07:29, Salil Mehta wrote: > KVM vCPU creation is done once during the vCPU realization when Qemu vCPU thread > is spawned. This is common to all the architectures as of now. > > Hot-unplug of vCPU results in destruction of the vCPU object in QOM but the > corresponding KVM vCPU object in the Host KVM is not destroyed as KVM doesn't > support vCPU removal. Therefore, its representative KVM vCPU object/context in > Qemu is parked. > > Refactor architecture common logic so that some APIs could be reused by vCPU > Hotplug code of some architectures likes ARM, Loongson etc. Update new/old APIs > with trace events instead of DPRINTF. No functional change is intended here. > > Signed-off-by: Salil Mehta mailto:salil.mehta@huawei.com <salil.mehta@huawei.com> > Reviewed-by: Gavin Shan mailto:gshan@redhat.com <gshan@redhat.com> > Tested-by: Vishnu Pajjuri mailto:vishnu@os.amperecomputing.com <vishnu@os.amperecomputing.com> > Reviewed-by: Jonathan Cameron mailto:Jonathan.Cameron@huawei.com <Jonathan.Cameron@huawei.com> > Tested-by: Xianglai Li mailto:lixianglai@loongson.cn <lixianglai@loongson.cn> > Tested-by: Miguel Luis mailto:miguel.luis@oracle.com <miguel.luis@oracle.com> > Reviewed-by: Shaoqin Huang mailto:shahuang@redhat.com <shahuang@redhat.com> > --- > accel/kvm/kvm-all.c | 64 ++++++++++++++++++++++++++++++++---------- > accel/kvm/trace-events | 5 +++- > include/sysemu/kvm.h | 16 +++++++++++ > 3 files changed, 69 insertions(+), 16 deletions(-) > > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c > index a8cecd040e..3bc3207bda 100644 > --- a/accel/kvm/kvm-all.c > +++ b/accel/kvm/kvm-all.c > @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock; > #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) > > static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); > +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); > > static inline void kvm_resample_fd_remove(int gsi) > { > @@ -314,14 +315,53 @@ err: > return ret; > } > > +void kvm_park_vcpu(CPUState *cpu) > +{ > + struct KVMParkedVcpu *vcpu; > + > + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > > It's good if we add kvm_fd to trace. > It will be useful to cross verify kvm_get_vcpu()'s kvm_fd with parked vcpu. > > Agreed. But this is currently called in context to create and destroy vCPU > where the trace already exists with the info you are seeking. Having > trace here might duplicate the info and end up increasing the noise. > > Let me know if you think otherwise or have something else to add. > > This is to provide additional information to the racing only. > > The intention here is to trace mapping of vcpu_id<-->kvm_fd while parking > > and fetching vcpu. This way we can easily trace what is parked > (kvm_park_vcpu()) vs fetched (kvm_get_vcpu()) > > using pair of information. > Ok, No problem. I will. > Thanks > > > > + > + vcpu = g_malloc0(sizeof(*vcpu)); > + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); > + vcpu->kvm_fd = cpu->kvm_fd; > + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); > +} > + > +int kvm_create_vcpu(CPUState *cpu) > +{ > + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); > + KVMState *s = kvm_state; > + int kvm_fd; > + > + trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > > vcpu_id can be used instead of kvm_arch_vcpu_id(cpu). > > KVM arch VCPU Id ensures that ID being traced is meaningful for that > architecture. The way CPU ID gets calculated in on different architectures > could be different. Hence, its value might be quite different. > > vcpu_id is already being calculated just above trace call. > > I don't think vcpu_id value gets differ by the time of tracing. > sure. > + > + /* check if the KVM vCPU already exist but is parked */ > + kvm_fd = kvm_get_vcpu(s, vcpu_id); > + if (kvm_fd < 0) { > +> /* vCPU not parked: create a new KVM vCPU */ > +> kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); > +> if (kvm_fd < 0) { > +> error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id); > +> return kvm_fd; > +> } > + } > + > + cpu->kvm_fd = kvm_fd; > + cpu->kvm_state = s; > + cpu->vcpu_dirty = true; > + cpu->dirty_pages = 0; > + cpu->throttle_us_per_full = 0; > + > + return 0; > +} > + > static int do_kvm_destroy_vcpu(CPUState *cpu) > { > KVMState *s = kvm_state; > long mmap_size; > - struct KVMParkedVcpu *vcpu = NULL; > int ret = 0; > > - trace_kvm_destroy_vcpu(); > + trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > > ret = kvm_arch_destroy_vcpu(cpu); > if (ret < 0) { > @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) > > } > } > > - vcpu = g_malloc0(sizeof(*vcpu)); > - vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); > - vcpu->kvm_fd = cpu->kvm_fd; > - QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); > + kvm_park_vcpu(cpu); > err: > return ret; > } > @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) > > if (cpu->vcpu_id == vcpu_id) { > > int kvm_fd; > > +> trace_kvm_get_vcpu(vcpu_id); > > It's good if we add kvm_fd to trace. > It will be useful to cross verify kvm_get_vcpu's kvm_fd with parked vcpu. > > I can but I'm wondering why you've raised this? Perhaps, I'm not aware of the > interface you are using to configure the VMs and how traces across diferent > VMs get reflected. Please help in my understanding. > > This is to provide additional information only not specific to any > interface to configure VMs. > Ok. sure. Thanks Salil. > *Regards*, > > -Vishnu > > + > > QLIST_REMOVE(cpu, node); > > kvm_fd = cpu->kvm_fd; > > g_free(cpu); > @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) > > } > } > > - return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); > + return -ENOENT; > } > > int kvm_init_vcpu(CPUState *cpu, Error **errp) > @@ -389,19 +428,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) > > trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > > - ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); > + ret = kvm_create_vcpu(cpu); > if (ret < 0) { > - error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)", > + error_setg_errno(errp, -ret, > + "kvm_init_vcpu: kvm_create_vcpu failed (%lu)", > > kvm_arch_vcpu_id(cpu)); > > goto err; > } > > - cpu->kvm_fd = ret; > - cpu->kvm_state = s; > - cpu->vcpu_dirty = true; > - cpu->dirty_pages = 0; > - cpu->throttle_us_per_full = 0; > - > mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); > if (mmap_size < 0) { > ret = mmap_size; > diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events > index a25902597b..5558cff0dc 100644 > --- a/accel/kvm/trace-events > +++ b/accel/kvm/trace-events > @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" > kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" > kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" > kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" > +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" > +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu" > +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" > +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" > kvm_irqchip_commit_routes(void) "" > kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" > kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" > @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s" > kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)" > kvm_dirty_ring_reaper_kick(const char *reason) "%s" > kvm_dirty_ring_flush(int finished) "%d" > -kvm_destroy_vcpu(void) "" > kvm_failed_get_vcpu_mmap_size(void) "" > kvm_cpu_exec(void) "" > kvm_interrupt_exit_request(void) "" > diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h > index fad9a7e8ff..2ed928aa71 100644 > --- a/include/sysemu/kvm.h > +++ b/include/sysemu/kvm.h > @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); > int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, > > > > > > hwaddr *phys_addr); > > +/** > + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU > + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. > + * > + * @returns: 0 when success, errno (<0) when failed. > + */ > +int kvm_create_vcpu(CPUState *cpu); > + > +/** > + * kvm_park_vcpu - Park QEMU KVM vCPU context > + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked. > + * > + * @returns: none > + */ > +void kvm_park_vcpu(CPUState *cpu); > + > #endif /* NEED_CPU_H */ > > void kvm_cpu_synchronize_state(CPUState *cpu); > > Otherwise, Looks good to me. Feel free to add > Reviewed-by: "Vishnu Pajjuri" mailto:vishnu@os.amperecomputing.com <vishnu@os.amperecomputing.com> > Thanks, > > Thanks. > Salil > > > > > -Vishnu > >
Hi Phillipe, Sorry, I missed this mail earlier. > From: Philippe Mathieu-Daudé <philmd@linaro.org> > Sent: Friday, May 3, 2024 7:23 PM > To: Salil Mehta <salil.mehta@huawei.com>; qemu-devel@nongnu.org; > qemu-arm@nongnu.org > > On 3/5/24 17:57, Salil Mehta wrote: > > Hi Philippe, > > > >> From: Philippe Mathieu-Daudé <philmd@linaro.org> > >> Sent: Friday, May 3, 2024 10:40 AM > >> Subject: Re: [PATCH V8 1/8] accel/kvm: Extract common KVM vCPU > >> {creation,parking} code > >> > >> Hi Salil, > >> > >> On 12/3/24 02:59, Salil Mehta wrote: > >> > KVM vCPU creation is done once during the vCPU realization when Qemu > >> > vCPU thread is spawned. This is common to all the architectures as of now. > >> > > >> > Hot-unplug of vCPU results in destruction of the vCPU object in QOM > >> > but the corresponding KVM vCPU object in the Host KVM is not destroyed > >> > as KVM doesn't support vCPU removal. Therefore, its representative KVM > >> > vCPU object/context in Qemu is parked. > >> > > >> > Refactor architecture common logic so that some APIs could be reused > >> > by vCPU Hotplug code of some architectures likes ARM, Loongson etc. > >> > Update new/old APIs with trace events instead of DPRINTF. No functional > >> change is intended here. > >> > > >> > Signed-off-by: Salil Mehta <salil.mehta@huawei.com> > >> > Reviewed-by: Gavin Shan <gshan@redhat.com> > >> > Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com> > >> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > >> > Tested-by: Xianglai Li <lixianglai@loongson.cn> > >> > Tested-by: Miguel Luis <miguel.luis@oracle.com> > >> > Reviewed-by: Shaoqin Huang <shahuang@redhat.com> > >> > --- > >> > accel/kvm/kvm-all.c | 64 ++++++++++++++++++++++++++++++++- > ----- > >> ---- > >> > accel/kvm/trace-events | 5 +++- > >> > include/sysemu/kvm.h | 16 +++++++++++ > >> > 3 files changed, 69 insertions(+), 16 deletions(-) > >> > > >> > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index > >> > a8cecd040e..3bc3207bda 100644 > >> > --- a/accel/kvm/kvm-all.c > >> > +++ b/accel/kvm/kvm-all.c > >> > @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock; > >> > #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) > >> > > >> > static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); > >> > +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); > >> > > >> > static inline void kvm_resample_fd_remove(int gsi) > >> > { > >> > @@ -314,14 +315,53 @@ err: > >> > return ret; > >> > } > >> > > >> > +void kvm_park_vcpu(CPUState *cpu) > >> > +{ > >> > + struct KVMParkedVcpu *vcpu; > >> > + > >> > + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > >> > + > >> > + vcpu = g_malloc0(sizeof(*vcpu)); > >> > + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); > >> > + vcpu->kvm_fd = cpu->kvm_fd; > >> > + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); } > >> > + > >> > +int kvm_create_vcpu(CPUState *cpu) > >> > +{ > >> > + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); > >> > + KVMState *s = kvm_state; > >> > + int kvm_fd; > >> > + > >> > + trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); > >> > + > >> > + /* check if the KVM vCPU already exist but is parked */ > >> > + kvm_fd = kvm_get_vcpu(s, vcpu_id); > >> > + if (kvm_fd < 0) { > >> > + /* vCPU not parked: create a new KVM vCPU */ > >> > + kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); > >> > + if (kvm_fd < 0) { > >> > + error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id); > >> > + return kvm_fd; > >> > + } > >> > + } > >> > + > >> > + cpu->kvm_fd = kvm_fd; > >> > + cpu->kvm_state = s; > >> > + cpu->vcpu_dirty = true; > >> > + cpu->dirty_pages = 0; > >> > + cpu->throttle_us_per_full = 0; > >> > + > >> > + return 0; > >> > +} > >> > >> This seems generic enough to be implemented for all accelerators. > >> > >> See AccelOpsClass in include/sysemu/accel-ops.h. > >> > >> That said, can be done later on top. > > > > Let me understand correctly. Are you suggesting to implement above > > even for HVF, TCG, QTEST etc? > > Not for you to implement the other non-KVM accelerators, but since you > are introducing this, now is a good time to think about a generic interface. > > So far AccelOpsClass::[un]park_vcpu() handlers make sense to me. Sure, but what is the advantage of defining these 'supporting' functions as part of the AccelOpsClass? Each of these functions in any case will need to be defined individually for different Accelerators or unless we are planning to extract some common accelerator functions in a separate file and use them across all the accelerators? I'm surely missing some key point here. Thanks Salil.
On 8/5/24 12:46, Salil Mehta wrote: > Hi Phillipe, > > Sorry, I missed this mail earlier. > >> From: Philippe Mathieu-Daudé <philmd@linaro.org> >> Sent: Friday, May 3, 2024 7:23 PM >> To: Salil Mehta <salil.mehta@huawei.com>; qemu-devel@nongnu.org; >> qemu-arm@nongnu.org >> >> On 3/5/24 17:57, Salil Mehta wrote: >> > Hi Philippe, >> > >> >> From: Philippe Mathieu-Daudé <philmd@linaro.org> >> >> Sent: Friday, May 3, 2024 10:40 AM >> >> Subject: Re: [PATCH V8 1/8] accel/kvm: Extract common KVM vCPU >> >> {creation,parking} code >> >> >> >> Hi Salil, >> >> >> >> On 12/3/24 02:59, Salil Mehta wrote: >> >> > KVM vCPU creation is done once during the vCPU realization when Qemu >> >> > vCPU thread is spawned. This is common to all the architectures as of now. >> >> > >> >> > Hot-unplug of vCPU results in destruction of the vCPU object in QOM >> >> > but the corresponding KVM vCPU object in the Host KVM is not destroyed >> >> > as KVM doesn't support vCPU removal. Therefore, its representative KVM >> >> > vCPU object/context in Qemu is parked. >> >> > >> >> > Refactor architecture common logic so that some APIs could be reused >> >> > by vCPU Hotplug code of some architectures likes ARM, Loongson etc. >> >> > Update new/old APIs with trace events instead of DPRINTF. No functional >> >> change is intended here. >> >> > >> >> > Signed-off-by: Salil Mehta <salil.mehta@huawei.com> >> >> > Reviewed-by: Gavin Shan <gshan@redhat.com> >> >> > Tested-by: Vishnu Pajjuri <vishnu@os.amperecomputing.com> >> >> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> >> >> > Tested-by: Xianglai Li <lixianglai@loongson.cn> >> >> > Tested-by: Miguel Luis <miguel.luis@oracle.com> >> >> > Reviewed-by: Shaoqin Huang <shahuang@redhat.com> >> >> > --- >> >> > accel/kvm/kvm-all.c | 64 ++++++++++++++++++++++++++++++++- >> ----- >> >> ---- >> >> > accel/kvm/trace-events | 5 +++- >> >> > include/sysemu/kvm.h | 16 +++++++++++ >> >> > 3 files changed, 69 insertions(+), 16 deletions(-) >> >> > >> >> > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index >> >> > a8cecd040e..3bc3207bda 100644 >> >> > --- a/accel/kvm/kvm-all.c >> >> > +++ b/accel/kvm/kvm-all.c >> >> > @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock; >> >> > #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) >> >> > >> >> > static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); >> >> > +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); >> >> > >> >> > static inline void kvm_resample_fd_remove(int gsi) >> >> > { >> >> > @@ -314,14 +315,53 @@ err: >> >> > return ret; >> >> > } >> >> > >> >> > +void kvm_park_vcpu(CPUState *cpu) >> >> > +{ >> >> > + struct KVMParkedVcpu *vcpu; >> >> > + >> >> > + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); >> >> > + >> >> > + vcpu = g_malloc0(sizeof(*vcpu)); >> >> > + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); >> >> > + vcpu->kvm_fd = cpu->kvm_fd; >> >> > + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); } >> >> > + >> >> > +int kvm_create_vcpu(CPUState *cpu) >> >> > +{ >> >> > + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); >> >> > + KVMState *s = kvm_state; >> >> > + int kvm_fd; >> >> > + >> >> > + trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); >> >> > + >> >> > + /* check if the KVM vCPU already exist but is parked */ >> >> > + kvm_fd = kvm_get_vcpu(s, vcpu_id); >> >> > + if (kvm_fd < 0) { >> >> > + /* vCPU not parked: create a new KVM vCPU */ >> >> > + kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); >> >> > + if (kvm_fd < 0) { >> >> > + error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id); >> >> > + return kvm_fd; >> >> > + } >> >> > + } >> >> > + >> >> > + cpu->kvm_fd = kvm_fd; >> >> > + cpu->kvm_state = s; >> >> > + cpu->vcpu_dirty = true; >> >> > + cpu->dirty_pages = 0; >> >> > + cpu->throttle_us_per_full = 0; >> >> > + >> >> > + return 0; >> >> > +} >> >> >> >> This seems generic enough to be implemented for all accelerators. >> >> >> >> See AccelOpsClass in include/sysemu/accel-ops.h. >> >> >> >> That said, can be done later on top. >> > >> > Let me understand correctly. Are you suggesting to implement above >> > even for HVF, TCG, QTEST etc? >> >> Not for you to implement the other non-KVM accelerators, but since you >> are introducing this, now is a good time to think about a generic interface. >> >> So far AccelOpsClass::[un]park_vcpu() handlers make sense to me. > > Sure, but what is the advantage of defining these 'supporting' functions > as part of the AccelOpsClass? Each of these functions in any case will need > to be defined individually for different Accelerators or unless we are > planning to extract some common accelerator functions in a separate file > and use them across all the accelerators? kvm_arm_create_host_vcpu() [*] seems generic. Maybe we could do the same with HVF at least. [*] https://lore.kernel.org/qemu-devel/20230926100436.28284-7-salil.mehta@huawei.com/ > > I'm surely missing some key point here. I started https://etherpad.opendev.org/p/QEMU_vCPU_life to document the vCPU spagetti code. In that big picture the "park" method makes sense to me, but we can discuss that later. Again, certainly not a block for your work, I'm just trying to see the whole view. Regards, Phil.
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index a8cecd040e..3bc3207bda 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -126,6 +126,7 @@ static QemuMutex kml_slots_lock; #define kvm_slots_unlock() qemu_mutex_unlock(&kml_slots_lock) static void kvm_slot_init_dirty_bitmap(KVMSlot *mem); +static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id); static inline void kvm_resample_fd_remove(int gsi) { @@ -314,14 +315,53 @@ err: return ret; } +void kvm_park_vcpu(CPUState *cpu) +{ + struct KVMParkedVcpu *vcpu; + + trace_kvm_park_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); + + vcpu = g_malloc0(sizeof(*vcpu)); + vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); + vcpu->kvm_fd = cpu->kvm_fd; + QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); +} + +int kvm_create_vcpu(CPUState *cpu) +{ + unsigned long vcpu_id = kvm_arch_vcpu_id(cpu); + KVMState *s = kvm_state; + int kvm_fd; + + trace_kvm_create_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); + + /* check if the KVM vCPU already exist but is parked */ + kvm_fd = kvm_get_vcpu(s, vcpu_id); + if (kvm_fd < 0) { + /* vCPU not parked: create a new KVM vCPU */ + kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id); + if (kvm_fd < 0) { + error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu", vcpu_id); + return kvm_fd; + } + } + + cpu->kvm_fd = kvm_fd; + cpu->kvm_state = s; + cpu->vcpu_dirty = true; + cpu->dirty_pages = 0; + cpu->throttle_us_per_full = 0; + + return 0; +} + static int do_kvm_destroy_vcpu(CPUState *cpu) { KVMState *s = kvm_state; long mmap_size; - struct KVMParkedVcpu *vcpu = NULL; int ret = 0; - trace_kvm_destroy_vcpu(); + trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); ret = kvm_arch_destroy_vcpu(cpu); if (ret < 0) { @@ -347,10 +387,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu) } } - vcpu = g_malloc0(sizeof(*vcpu)); - vcpu->vcpu_id = kvm_arch_vcpu_id(cpu); - vcpu->kvm_fd = cpu->kvm_fd; - QLIST_INSERT_HEAD(&kvm_state->kvm_parked_vcpus, vcpu, node); + kvm_park_vcpu(cpu); err: return ret; } @@ -371,6 +408,8 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) if (cpu->vcpu_id == vcpu_id) { int kvm_fd; + trace_kvm_get_vcpu(vcpu_id); + QLIST_REMOVE(cpu, node); kvm_fd = cpu->kvm_fd; g_free(cpu); @@ -378,7 +417,7 @@ static int kvm_get_vcpu(KVMState *s, unsigned long vcpu_id) } } - return kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)vcpu_id); + return -ENOENT; } int kvm_init_vcpu(CPUState *cpu, Error **errp) @@ -389,19 +428,14 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp) trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu)); - ret = kvm_get_vcpu(s, kvm_arch_vcpu_id(cpu)); + ret = kvm_create_vcpu(cpu); if (ret < 0) { - error_setg_errno(errp, -ret, "kvm_init_vcpu: kvm_get_vcpu failed (%lu)", + error_setg_errno(errp, -ret, + "kvm_init_vcpu: kvm_create_vcpu failed (%lu)", kvm_arch_vcpu_id(cpu)); goto err; } - cpu->kvm_fd = ret; - cpu->kvm_state = s; - cpu->vcpu_dirty = true; - cpu->dirty_pages = 0; - cpu->throttle_us_per_full = 0; - mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); if (mmap_size < 0) { ret = mmap_size; diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events index a25902597b..5558cff0dc 100644 --- a/accel/kvm/trace-events +++ b/accel/kvm/trace-events @@ -9,6 +9,10 @@ kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" kvm_init_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" +kvm_create_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" +kvm_get_vcpu(unsigned long arch_cpu_id) "id: %lu" +kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" +kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu" kvm_irqchip_commit_routes(void) "" kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d" kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d" @@ -25,7 +29,6 @@ kvm_dirty_ring_reaper(const char *s) "%s" kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)" kvm_dirty_ring_reaper_kick(const char *reason) "%s" kvm_dirty_ring_flush(int finished) "%d" -kvm_destroy_vcpu(void) "" kvm_failed_get_vcpu_mmap_size(void) "" kvm_cpu_exec(void) "" kvm_interrupt_exit_request(void) "" diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index fad9a7e8ff..2ed928aa71 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -435,6 +435,22 @@ void kvm_set_sigmask_len(KVMState *s, unsigned int sigmask_len); int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr, hwaddr *phys_addr); +/** + * kvm_create_vcpu - Gets a parked KVM vCPU or creates a KVM vCPU + * @cpu: QOM CPUState object for which KVM vCPU has to be fetched/created. + * + * @returns: 0 when success, errno (<0) when failed. + */ +int kvm_create_vcpu(CPUState *cpu); + +/** + * kvm_park_vcpu - Park QEMU KVM vCPU context + * @cpu: QOM CPUState object for which QEMU KVM vCPU context has to be parked. + * + * @returns: none + */ +void kvm_park_vcpu(CPUState *cpu); + #endif /* NEED_CPU_H */ void kvm_cpu_synchronize_state(CPUState *cpu);