Message ID | 20200724083533.281700-1-lvivier@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | pseries: fix kvmppc_set_fwnmi() | expand |
On 7/24/20 10:35 AM, Laurent Vivier wrote: > QEMU issues the ioctl(KVM_CAP_PPC_FWNMI) on the first vCPU. > > If the first vCPU is currently running, the vCPU mutex is held > and the ioctl() cannot be done and waits until the mutex is released. > This never happens and the VM is stuck. > > To avoid this deadlock, issue the ioctl on the same vCPU doing the > RTAS call. > > The problem can be reproduced by booting a guest with several vCPUs > (the probability to have the problem is (n - 1) / n, n = # of CPUs), > and then by triggering a kernel crash with "echo c >/proc/sysrq-trigger". > > On the reboot, the kernel hangs after: > > ... > [ 0.000000] ----------------------------------------------------- > [ 0.000000] ppc64_pft_size = 0x0 > [ 0.000000] phys_mem_size = 0x48000000 > [ 0.000000] dcache_bsize = 0x80 > [ 0.000000] icache_bsize = 0x80 > [ 0.000000] cpu_features = 0x0001c06f8f4f91a7 > [ 0.000000] possible = 0x0003fbffcf5fb1a7 > [ 0.000000] always = 0x00000003800081a1 > [ 0.000000] cpu_user_features = 0xdc0065c2 0xaee00000 > [ 0.000000] mmu_features = 0x3c006041 > [ 0.000000] firmware_features = 0x00000085455a445f > [ 0.000000] physical_start = 0x8000000 > [ 0.000000] ----------------------------------------------------- > [ 0.000000] numa: NODE_DATA [mem 0x47f33c80-0x47f3ffff] > > Fixes: ec010c00665b ("ppc/spapr: KVM FWNMI should not be enabled until guest requests it") > Cc: npiggin@gmail.com > Signed-off-by: Laurent Vivier <lvivier@redhat.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> > --- > hw/ppc/spapr_rtas.c | 2 +- > target/ppc/kvm.c | 3 +-- > target/ppc/kvm_ppc.h | 4 ++-- > 3 files changed, 4 insertions(+), 5 deletions(-) > > diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c > index bcac0d00e7b6..513c7a84351b 100644 > --- a/hw/ppc/spapr_rtas.c > +++ b/hw/ppc/spapr_rtas.c > @@ -438,7 +438,7 @@ static void rtas_ibm_nmi_register(PowerPCCPU *cpu, > } > > if (kvm_enabled()) { > - if (kvmppc_set_fwnmi() < 0) { > + if (kvmppc_set_fwnmi(cpu) < 0) { > rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); > return; > } > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c > index 2692f76130aa..d85ba8ffe00b 100644 > --- a/target/ppc/kvm.c > +++ b/target/ppc/kvm.c > @@ -2071,9 +2071,8 @@ bool kvmppc_get_fwnmi(void) > return cap_fwnmi; > } > > -int kvmppc_set_fwnmi(void) > +int kvmppc_set_fwnmi(PowerPCCPU *cpu) > { > - PowerPCCPU *cpu = POWERPC_CPU(first_cpu); > CPUState *cs = CPU(cpu); > > return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0); > diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h > index 701c0c262be2..72e05f1cd2fc 100644 > --- a/target/ppc/kvm_ppc.h > +++ b/target/ppc/kvm_ppc.h > @@ -28,7 +28,7 @@ void kvmppc_set_papr(PowerPCCPU *cpu); > int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr); > void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy); > bool kvmppc_get_fwnmi(void); > -int kvmppc_set_fwnmi(void); > +int kvmppc_set_fwnmi(PowerPCCPU *cpu); > int kvmppc_smt_threads(void); > void kvmppc_error_append_smt_possible_hint(Error *const *errp); > int kvmppc_set_smt_threads(int smt); > @@ -169,7 +169,7 @@ static inline bool kvmppc_get_fwnmi(void) > return false; > } > > -static inline int kvmppc_set_fwnmi(void) > +static inline int kvmppc_set_fwnmi(PowerPCCPU *cpu) > { > return -1; > } >
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index bcac0d00e7b6..513c7a84351b 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -438,7 +438,7 @@ static void rtas_ibm_nmi_register(PowerPCCPU *cpu, } if (kvm_enabled()) { - if (kvmppc_set_fwnmi() < 0) { + if (kvmppc_set_fwnmi(cpu) < 0) { rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); return; } diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 2692f76130aa..d85ba8ffe00b 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2071,9 +2071,8 @@ bool kvmppc_get_fwnmi(void) return cap_fwnmi; } -int kvmppc_set_fwnmi(void) +int kvmppc_set_fwnmi(PowerPCCPU *cpu) { - PowerPCCPU *cpu = POWERPC_CPU(first_cpu); CPUState *cs = CPU(cpu); return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0); diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h index 701c0c262be2..72e05f1cd2fc 100644 --- a/target/ppc/kvm_ppc.h +++ b/target/ppc/kvm_ppc.h @@ -28,7 +28,7 @@ void kvmppc_set_papr(PowerPCCPU *cpu); int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr); void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy); bool kvmppc_get_fwnmi(void); -int kvmppc_set_fwnmi(void); +int kvmppc_set_fwnmi(PowerPCCPU *cpu); int kvmppc_smt_threads(void); void kvmppc_error_append_smt_possible_hint(Error *const *errp); int kvmppc_set_smt_threads(int smt); @@ -169,7 +169,7 @@ static inline bool kvmppc_get_fwnmi(void) return false; } -static inline int kvmppc_set_fwnmi(void) +static inline int kvmppc_set_fwnmi(PowerPCCPU *cpu) { return -1; }
QEMU issues the ioctl(KVM_CAP_PPC_FWNMI) on the first vCPU. If the first vCPU is currently running, the vCPU mutex is held and the ioctl() cannot be done and waits until the mutex is released. This never happens and the VM is stuck. To avoid this deadlock, issue the ioctl on the same vCPU doing the RTAS call. The problem can be reproduced by booting a guest with several vCPUs (the probability to have the problem is (n - 1) / n, n = # of CPUs), and then by triggering a kernel crash with "echo c >/proc/sysrq-trigger". On the reboot, the kernel hangs after: ... [ 0.000000] ----------------------------------------------------- [ 0.000000] ppc64_pft_size = 0x0 [ 0.000000] phys_mem_size = 0x48000000 [ 0.000000] dcache_bsize = 0x80 [ 0.000000] icache_bsize = 0x80 [ 0.000000] cpu_features = 0x0001c06f8f4f91a7 [ 0.000000] possible = 0x0003fbffcf5fb1a7 [ 0.000000] always = 0x00000003800081a1 [ 0.000000] cpu_user_features = 0xdc0065c2 0xaee00000 [ 0.000000] mmu_features = 0x3c006041 [ 0.000000] firmware_features = 0x00000085455a445f [ 0.000000] physical_start = 0x8000000 [ 0.000000] ----------------------------------------------------- [ 0.000000] numa: NODE_DATA [mem 0x47f33c80-0x47f3ffff] Fixes: ec010c00665b ("ppc/spapr: KVM FWNMI should not be enabled until guest requests it") Cc: npiggin@gmail.com Signed-off-by: Laurent Vivier <lvivier@redhat.com> --- hw/ppc/spapr_rtas.c | 2 +- target/ppc/kvm.c | 3 +-- target/ppc/kvm_ppc.h | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-)