Message ID | 20220216102545.1808018-9-npiggin@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ppc: nested KVM HV for spapr virtual hypervisor | expand |
On 2/16/22 11:25, Nicholas Piggin wrote: > Introduce virtual hypervisor methods that can support a "Nested KVM HV" > implementation using the bare metal 2-level radix MMU, and using HV > exceptions to return from H_ENTER_NESTED (rather than cause interrupts). > > HV exceptions can now be raised in the TCG spapr machine when running a > nested KVM HV guest. The main ones are the lev==1 syscall, the hdecr, > hdsi and hisi, hv fu, and hv emu, and h_virt external interrupts. > > HV exceptions are intercepted in the exception handler code and instead > of causing interrupts in the guest and switching the machine to HV mode, > they go to the vhyp where it may exit the H_ENTER_NESTED hcall with the > interrupt vector numer as return value as required by the hcall API. > > Address translation is provided by the 2-level page table walker that is > implemented for the bare metal radix MMU. The partition scope page table > is pointed to the L1's partition scope by the get_pate vhc method. > > Reviewed-by: Fabiano Rosas <farosas@linux.ibm.com> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Reviewed-by: Cédric Le Goater <clg@kaod.org> Thanks, C. > --- > hw/ppc/pegasos2.c | 6 ++++ > hw/ppc/spapr.c | 6 ++++ > target/ppc/cpu.h | 7 +++++ > target/ppc/excp_helper.c | 64 +++++++++++++++++++++++++++++++++------- > target/ppc/mmu-radix64.c | 11 +++++-- > 5 files changed, 81 insertions(+), 13 deletions(-) > > diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c > index 298e6b93e2..d45008ac71 100644 > --- a/hw/ppc/pegasos2.c > +++ b/hw/ppc/pegasos2.c > @@ -449,6 +449,11 @@ static target_ulong pegasos2_rtas(PowerPCCPU *cpu, Pegasos2MachineState *pm, > } > } > > +static bool pegasos2_cpu_in_nested(PowerPCCPU *cpu) > +{ > + return false; > +} > + > static void pegasos2_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) > { > Pegasos2MachineState *pm = PEGASOS2_MACHINE(vhyp); > @@ -504,6 +509,7 @@ static void pegasos2_machine_class_init(ObjectClass *oc, void *data) > mc->default_ram_id = "pegasos2.ram"; > mc->default_ram_size = 512 * MiB; > > + vhc->cpu_in_nested = pegasos2_cpu_in_nested; > vhc->hypercall = pegasos2_hypercall; > vhc->cpu_exec_enter = vhyp_nop; > vhc->cpu_exec_exit = vhyp_nop; > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 2c95a09d25..6fab70767f 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -4470,6 +4470,11 @@ PowerPCCPU *spapr_find_cpu(int vcpu_id) > return NULL; > } > > +static bool spapr_cpu_in_nested(PowerPCCPU *cpu) > +{ > + return false; > +} > + > static void spapr_cpu_exec_enter(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) > { > SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); > @@ -4578,6 +4583,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) > fwc->get_dev_path = spapr_get_fw_dev_path; > nc->nmi_monitor_handler = spapr_nmi; > smc->phb_placement = spapr_phb_placement; > + vhc->cpu_in_nested = spapr_cpu_in_nested; > vhc->hypercall = emulate_spapr_hypercall; > vhc->hpt_mask = spapr_hpt_mask; > vhc->map_hptes = spapr_map_hptes; > diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h > index c79ae74f10..2baa750729 100644 > --- a/target/ppc/cpu.h > +++ b/target/ppc/cpu.h > @@ -1311,6 +1311,8 @@ PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc); > #ifndef CONFIG_USER_ONLY > struct PPCVirtualHypervisorClass { > InterfaceClass parent; > + bool (*cpu_in_nested)(PowerPCCPU *cpu); > + void (*deliver_hv_excp)(PowerPCCPU *cpu, int excp); > void (*hypercall)(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu); > hwaddr (*hpt_mask)(PPCVirtualHypervisor *vhyp); > const ppc_hash_pte64_t *(*map_hptes)(PPCVirtualHypervisor *vhyp, > @@ -1330,6 +1332,11 @@ struct PPCVirtualHypervisorClass { > #define TYPE_PPC_VIRTUAL_HYPERVISOR "ppc-virtual-hypervisor" > DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass, > PPC_VIRTUAL_HYPERVISOR, TYPE_PPC_VIRTUAL_HYPERVISOR) > + > +static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu) > +{ > + return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu); > +} > #endif /* CONFIG_USER_ONLY */ > > void ppc_cpu_dump_state(CPUState *cpu, FILE *f, int flags); > diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c > index 778eb4f3b0..a78d06d648 100644 > --- a/target/ppc/excp_helper.c > +++ b/target/ppc/excp_helper.c > @@ -1279,6 +1279,18 @@ static void powerpc_excp_booke(PowerPCCPU *cpu, int excp) > powerpc_set_excp_state(cpu, vector, new_msr); > } > > +/* > + * When running a nested HV guest under vhyp, external interrupts are > + * delivered as HVIRT. > + */ > +static bool books_vhyp_promotes_external_to_hvirt(PowerPCCPU *cpu) > +{ > + if (cpu->vhyp) { > + return vhyp_cpu_in_nested(cpu); > + } > + return false; > +} > + > #ifdef TARGET_PPC64 > /* > * When running under vhyp, hcalls are always intercepted and sent to the > @@ -1287,7 +1299,21 @@ static void powerpc_excp_booke(PowerPCCPU *cpu, int excp) > static bool books_vhyp_handles_hcall(PowerPCCPU *cpu) > { > if (cpu->vhyp) { > - return true; > + return !vhyp_cpu_in_nested(cpu); > + } > + return false; > +} > + > +/* > + * When running a nested KVM HV guest under vhyp, HV exceptions are not > + * delivered to the guest (because there is no concept of HV support), but > + * rather they are sent tothe vhyp to exit from the L2 back to the L1 and > + * return from the H_ENTER_NESTED hypercall. > + */ > +static bool books_vhyp_handles_hv_excp(PowerPCCPU *cpu) > +{ > + if (cpu->vhyp) { > + return vhyp_cpu_in_nested(cpu); > } > return false; > } > @@ -1540,12 +1566,6 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) > break; > } > > - /* Sanity check */ > - if (!(env->msr_mask & MSR_HVB) && srr0 == SPR_HSRR0) { > - cpu_abort(cs, "Trying to deliver HV exception (HSRR) %d with " > - "no HV support\n", excp); > - } > - > /* > * Sort out endianness of interrupt, this differs depending on the > * CPU, the HV mode, etc... > @@ -1564,10 +1584,26 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) > env->spr[srr1] = msr; > } > > - /* This can update new_msr and vector if AIL applies */ > - ppc_excp_apply_ail(cpu, excp, msr, &new_msr, &vector); > + if ((new_msr & MSR_HVB) && books_vhyp_handles_hv_excp(cpu)) { > + PPCVirtualHypervisorClass *vhc = > + PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); > + /* Deliver interrupt to L1 by returning from the H_ENTER_NESTED call */ > + vhc->deliver_hv_excp(cpu, excp); > > - powerpc_set_excp_state(cpu, vector, new_msr); > + powerpc_reset_excp_state(cpu); > + > + } else { > + /* Sanity check */ > + if (!(env->msr_mask & MSR_HVB) && srr0 == SPR_HSRR0) { > + cpu_abort(cs, "Trying to deliver HV exception (HSRR) %d with " > + "no HV support\n", excp); > + } > + > + /* This can update new_msr and vector if AIL applies */ > + ppc_excp_apply_ail(cpu, excp, msr, &new_msr, &vector); > + > + powerpc_set_excp_state(cpu, vector, new_msr); > + } > } > #else > static inline void powerpc_excp_books(PowerPCCPU *cpu, int excp) > @@ -1687,7 +1723,11 @@ static void ppc_hw_interrupt(CPUPPCState *env) > /* HEIC blocks delivery to the hypervisor */ > if ((async_deliver && !(heic && msr_hv && !msr_pr)) || > (env->has_hv_mode && msr_hv == 0 && !lpes0)) { > - powerpc_excp(cpu, POWERPC_EXCP_EXTERNAL); > + if (books_vhyp_promotes_external_to_hvirt(cpu)) { > + powerpc_excp(cpu, POWERPC_EXCP_HVIRT); > + } else { > + powerpc_excp(cpu, POWERPC_EXCP_EXTERNAL); > + } > return; > } > } > @@ -1797,6 +1837,8 @@ void ppc_cpu_do_fwnmi_machine_check(CPUState *cs, target_ulong vector) > msr |= (1ULL << MSR_LE); > } > > + /* Anything for nested required here? MSR[HV] bit? */ > + > powerpc_set_excp_state(cpu, vector, msr); > } > > diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c > index 3b6d75a292..b951b310c1 100644 > --- a/target/ppc/mmu-radix64.c > +++ b/target/ppc/mmu-radix64.c > @@ -355,12 +355,19 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, > } > > /* > - * The spapr vhc has a flat partition scope provided by qemu memory. > + * The spapr vhc has a flat partition scope provided by qemu memory when > + * not nested. > + * > + * When running a nested guest, the addressing is 2-level radix on top of the > + * vhc memory, so it works practically identically to the bare metal 2-level > + * radix. So that code is selected directly. A cleaner and more flexible nested > + * hypervisor implementation would allow the vhc to provide a ->nested_xlate() > + * function but that is not required for the moment. > */ > static bool vhyp_flat_addressing(PowerPCCPU *cpu) > { > if (cpu->vhyp) { > - return true; > + return !vhyp_cpu_in_nested(cpu); > } > return false; > }
diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c index 298e6b93e2..d45008ac71 100644 --- a/hw/ppc/pegasos2.c +++ b/hw/ppc/pegasos2.c @@ -449,6 +449,11 @@ static target_ulong pegasos2_rtas(PowerPCCPU *cpu, Pegasos2MachineState *pm, } } +static bool pegasos2_cpu_in_nested(PowerPCCPU *cpu) +{ + return false; +} + static void pegasos2_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) { Pegasos2MachineState *pm = PEGASOS2_MACHINE(vhyp); @@ -504,6 +509,7 @@ static void pegasos2_machine_class_init(ObjectClass *oc, void *data) mc->default_ram_id = "pegasos2.ram"; mc->default_ram_size = 512 * MiB; + vhc->cpu_in_nested = pegasos2_cpu_in_nested; vhc->hypercall = pegasos2_hypercall; vhc->cpu_exec_enter = vhyp_nop; vhc->cpu_exec_exit = vhyp_nop; diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 2c95a09d25..6fab70767f 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4470,6 +4470,11 @@ PowerPCCPU *spapr_find_cpu(int vcpu_id) return NULL; } +static bool spapr_cpu_in_nested(PowerPCCPU *cpu) +{ + return false; +} + static void spapr_cpu_exec_enter(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) { SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); @@ -4578,6 +4583,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) fwc->get_dev_path = spapr_get_fw_dev_path; nc->nmi_monitor_handler = spapr_nmi; smc->phb_placement = spapr_phb_placement; + vhc->cpu_in_nested = spapr_cpu_in_nested; vhc->hypercall = emulate_spapr_hypercall; vhc->hpt_mask = spapr_hpt_mask; vhc->map_hptes = spapr_map_hptes; diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index c79ae74f10..2baa750729 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1311,6 +1311,8 @@ PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc); #ifndef CONFIG_USER_ONLY struct PPCVirtualHypervisorClass { InterfaceClass parent; + bool (*cpu_in_nested)(PowerPCCPU *cpu); + void (*deliver_hv_excp)(PowerPCCPU *cpu, int excp); void (*hypercall)(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu); hwaddr (*hpt_mask)(PPCVirtualHypervisor *vhyp); const ppc_hash_pte64_t *(*map_hptes)(PPCVirtualHypervisor *vhyp, @@ -1330,6 +1332,11 @@ struct PPCVirtualHypervisorClass { #define TYPE_PPC_VIRTUAL_HYPERVISOR "ppc-virtual-hypervisor" DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass, PPC_VIRTUAL_HYPERVISOR, TYPE_PPC_VIRTUAL_HYPERVISOR) + +static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu) +{ + return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu); +} #endif /* CONFIG_USER_ONLY */ void ppc_cpu_dump_state(CPUState *cpu, FILE *f, int flags); diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 778eb4f3b0..a78d06d648 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -1279,6 +1279,18 @@ static void powerpc_excp_booke(PowerPCCPU *cpu, int excp) powerpc_set_excp_state(cpu, vector, new_msr); } +/* + * When running a nested HV guest under vhyp, external interrupts are + * delivered as HVIRT. + */ +static bool books_vhyp_promotes_external_to_hvirt(PowerPCCPU *cpu) +{ + if (cpu->vhyp) { + return vhyp_cpu_in_nested(cpu); + } + return false; +} + #ifdef TARGET_PPC64 /* * When running under vhyp, hcalls are always intercepted and sent to the @@ -1287,7 +1299,21 @@ static void powerpc_excp_booke(PowerPCCPU *cpu, int excp) static bool books_vhyp_handles_hcall(PowerPCCPU *cpu) { if (cpu->vhyp) { - return true; + return !vhyp_cpu_in_nested(cpu); + } + return false; +} + +/* + * When running a nested KVM HV guest under vhyp, HV exceptions are not + * delivered to the guest (because there is no concept of HV support), but + * rather they are sent tothe vhyp to exit from the L2 back to the L1 and + * return from the H_ENTER_NESTED hypercall. + */ +static bool books_vhyp_handles_hv_excp(PowerPCCPU *cpu) +{ + if (cpu->vhyp) { + return vhyp_cpu_in_nested(cpu); } return false; } @@ -1540,12 +1566,6 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) break; } - /* Sanity check */ - if (!(env->msr_mask & MSR_HVB) && srr0 == SPR_HSRR0) { - cpu_abort(cs, "Trying to deliver HV exception (HSRR) %d with " - "no HV support\n", excp); - } - /* * Sort out endianness of interrupt, this differs depending on the * CPU, the HV mode, etc... @@ -1564,10 +1584,26 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) env->spr[srr1] = msr; } - /* This can update new_msr and vector if AIL applies */ - ppc_excp_apply_ail(cpu, excp, msr, &new_msr, &vector); + if ((new_msr & MSR_HVB) && books_vhyp_handles_hv_excp(cpu)) { + PPCVirtualHypervisorClass *vhc = + PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); + /* Deliver interrupt to L1 by returning from the H_ENTER_NESTED call */ + vhc->deliver_hv_excp(cpu, excp); - powerpc_set_excp_state(cpu, vector, new_msr); + powerpc_reset_excp_state(cpu); + + } else { + /* Sanity check */ + if (!(env->msr_mask & MSR_HVB) && srr0 == SPR_HSRR0) { + cpu_abort(cs, "Trying to deliver HV exception (HSRR) %d with " + "no HV support\n", excp); + } + + /* This can update new_msr and vector if AIL applies */ + ppc_excp_apply_ail(cpu, excp, msr, &new_msr, &vector); + + powerpc_set_excp_state(cpu, vector, new_msr); + } } #else static inline void powerpc_excp_books(PowerPCCPU *cpu, int excp) @@ -1687,7 +1723,11 @@ static void ppc_hw_interrupt(CPUPPCState *env) /* HEIC blocks delivery to the hypervisor */ if ((async_deliver && !(heic && msr_hv && !msr_pr)) || (env->has_hv_mode && msr_hv == 0 && !lpes0)) { - powerpc_excp(cpu, POWERPC_EXCP_EXTERNAL); + if (books_vhyp_promotes_external_to_hvirt(cpu)) { + powerpc_excp(cpu, POWERPC_EXCP_HVIRT); + } else { + powerpc_excp(cpu, POWERPC_EXCP_EXTERNAL); + } return; } } @@ -1797,6 +1837,8 @@ void ppc_cpu_do_fwnmi_machine_check(CPUState *cs, target_ulong vector) msr |= (1ULL << MSR_LE); } + /* Anything for nested required here? MSR[HV] bit? */ + powerpc_set_excp_state(cpu, vector, msr); } diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 3b6d75a292..b951b310c1 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -355,12 +355,19 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, } /* - * The spapr vhc has a flat partition scope provided by qemu memory. + * The spapr vhc has a flat partition scope provided by qemu memory when + * not nested. + * + * When running a nested guest, the addressing is 2-level radix on top of the + * vhc memory, so it works practically identically to the bare metal 2-level + * radix. So that code is selected directly. A cleaner and more flexible nested + * hypervisor implementation would allow the vhc to provide a ->nested_xlate() + * function but that is not required for the moment. */ static bool vhyp_flat_addressing(PowerPCCPU *cpu) { if (cpu->vhyp) { - return true; + return !vhyp_cpu_in_nested(cpu); } return false; }