diff mbox series

pseries: fix kvmppc_set_fwnmi()

Message ID 20200724083533.281700-1-lvivier@redhat.com (mailing list archive)
State New, archived
Headers show
Series pseries: fix kvmppc_set_fwnmi() | expand

Commit Message

Laurent Vivier July 24, 2020, 8:35 a.m. UTC
QEMU issues the ioctl(KVM_CAP_PPC_FWNMI) on the first vCPU.

If the first vCPU is currently running, the vCPU mutex is held
and the ioctl() cannot be done and waits until the mutex is released.
This never happens and the VM is stuck.

To avoid this deadlock, issue the ioctl on the same vCPU doing the
RTAS call.

The problem can be reproduced by booting a guest with several vCPUs
(the probability to have the problem is (n - 1) / n,  n = # of CPUs),
and then by triggering a kernel crash with "echo c >/proc/sysrq-trigger".

On the reboot, the kernel hangs after:

...
[    0.000000] -----------------------------------------------------
[    0.000000] ppc64_pft_size    = 0x0
[    0.000000] phys_mem_size     = 0x48000000
[    0.000000] dcache_bsize      = 0x80
[    0.000000] icache_bsize      = 0x80
[    0.000000] cpu_features      = 0x0001c06f8f4f91a7
[    0.000000]   possible        = 0x0003fbffcf5fb1a7
[    0.000000]   always          = 0x00000003800081a1
[    0.000000] cpu_user_features = 0xdc0065c2 0xaee00000
[    0.000000] mmu_features      = 0x3c006041
[    0.000000] firmware_features = 0x00000085455a445f
[    0.000000] physical_start    = 0x8000000
[    0.000000] -----------------------------------------------------
[    0.000000] numa:   NODE_DATA [mem 0x47f33c80-0x47f3ffff]

Fixes: ec010c00665b ("ppc/spapr: KVM FWNMI should not be enabled until guest requests it")
Cc: npiggin@gmail.com
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
---
 hw/ppc/spapr_rtas.c  | 2 +-
 target/ppc/kvm.c     | 3 +--
 target/ppc/kvm_ppc.h | 4 ++--
 3 files changed, 4 insertions(+), 5 deletions(-)

Comments

Philippe Mathieu-Daudé July 24, 2020, 9:12 a.m. UTC | #1
On 7/24/20 10:35 AM, Laurent Vivier wrote:
> QEMU issues the ioctl(KVM_CAP_PPC_FWNMI) on the first vCPU.
> 
> If the first vCPU is currently running, the vCPU mutex is held
> and the ioctl() cannot be done and waits until the mutex is released.
> This never happens and the VM is stuck.
> 
> To avoid this deadlock, issue the ioctl on the same vCPU doing the
> RTAS call.
> 
> The problem can be reproduced by booting a guest with several vCPUs
> (the probability to have the problem is (n - 1) / n,  n = # of CPUs),
> and then by triggering a kernel crash with "echo c >/proc/sysrq-trigger".
> 
> On the reboot, the kernel hangs after:
> 
> ...
> [    0.000000] -----------------------------------------------------
> [    0.000000] ppc64_pft_size    = 0x0
> [    0.000000] phys_mem_size     = 0x48000000
> [    0.000000] dcache_bsize      = 0x80
> [    0.000000] icache_bsize      = 0x80
> [    0.000000] cpu_features      = 0x0001c06f8f4f91a7
> [    0.000000]   possible        = 0x0003fbffcf5fb1a7
> [    0.000000]   always          = 0x00000003800081a1
> [    0.000000] cpu_user_features = 0xdc0065c2 0xaee00000
> [    0.000000] mmu_features      = 0x3c006041
> [    0.000000] firmware_features = 0x00000085455a445f
> [    0.000000] physical_start    = 0x8000000
> [    0.000000] -----------------------------------------------------
> [    0.000000] numa:   NODE_DATA [mem 0x47f33c80-0x47f3ffff]
> 
> Fixes: ec010c00665b ("ppc/spapr: KVM FWNMI should not be enabled until guest requests it")
> Cc: npiggin@gmail.com
> Signed-off-by: Laurent Vivier <lvivier@redhat.com>

Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>

> ---
>  hw/ppc/spapr_rtas.c  | 2 +-
>  target/ppc/kvm.c     | 3 +--
>  target/ppc/kvm_ppc.h | 4 ++--
>  3 files changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index bcac0d00e7b6..513c7a84351b 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -438,7 +438,7 @@ static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>      }
>  
>      if (kvm_enabled()) {
> -        if (kvmppc_set_fwnmi() < 0) {
> +        if (kvmppc_set_fwnmi(cpu) < 0) {
>              rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>              return;
>          }
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index 2692f76130aa..d85ba8ffe00b 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -2071,9 +2071,8 @@ bool kvmppc_get_fwnmi(void)
>      return cap_fwnmi;
>  }
>  
> -int kvmppc_set_fwnmi(void)
> +int kvmppc_set_fwnmi(PowerPCCPU *cpu)
>  {
> -    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
>      CPUState *cs = CPU(cpu);
>  
>      return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> index 701c0c262be2..72e05f1cd2fc 100644
> --- a/target/ppc/kvm_ppc.h
> +++ b/target/ppc/kvm_ppc.h
> @@ -28,7 +28,7 @@ void kvmppc_set_papr(PowerPCCPU *cpu);
>  int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
>  void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
>  bool kvmppc_get_fwnmi(void);
> -int kvmppc_set_fwnmi(void);
> +int kvmppc_set_fwnmi(PowerPCCPU *cpu);
>  int kvmppc_smt_threads(void);
>  void kvmppc_error_append_smt_possible_hint(Error *const *errp);
>  int kvmppc_set_smt_threads(int smt);
> @@ -169,7 +169,7 @@ static inline bool kvmppc_get_fwnmi(void)
>      return false;
>  }
>  
> -static inline int kvmppc_set_fwnmi(void)
> +static inline int kvmppc_set_fwnmi(PowerPCCPU *cpu)
>  {
>      return -1;
>  }
>
diff mbox series

Patch

diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index bcac0d00e7b6..513c7a84351b 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -438,7 +438,7 @@  static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
     }
 
     if (kvm_enabled()) {
-        if (kvmppc_set_fwnmi() < 0) {
+        if (kvmppc_set_fwnmi(cpu) < 0) {
             rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
             return;
         }
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 2692f76130aa..d85ba8ffe00b 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2071,9 +2071,8 @@  bool kvmppc_get_fwnmi(void)
     return cap_fwnmi;
 }
 
-int kvmppc_set_fwnmi(void)
+int kvmppc_set_fwnmi(PowerPCCPU *cpu)
 {
-    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
     CPUState *cs = CPU(cpu);
 
     return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 701c0c262be2..72e05f1cd2fc 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -28,7 +28,7 @@  void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
 bool kvmppc_get_fwnmi(void);
-int kvmppc_set_fwnmi(void);
+int kvmppc_set_fwnmi(PowerPCCPU *cpu);
 int kvmppc_smt_threads(void);
 void kvmppc_error_append_smt_possible_hint(Error *const *errp);
 int kvmppc_set_smt_threads(int smt);
@@ -169,7 +169,7 @@  static inline bool kvmppc_get_fwnmi(void)
     return false;
 }
 
-static inline int kvmppc_set_fwnmi(void)
+static inline int kvmppc_set_fwnmi(PowerPCCPU *cpu)
 {
     return -1;
 }