Message ID | 20200317050215.159334-5-npiggin@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | FWNMI follow up patches | expand |
On Tue, 17 Mar 2020 15:02:14 +1000 Nicholas Piggin <npiggin@gmail.com> wrote: > Try to be tolerant of errors if the machine check had been recovered > by the host. > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com> > --- Same comment as previous patch on multi-line error strings and warn_report() in the !recovered case. > hw/ppc/spapr_events.c | 25 ++++++++++++++++++------- > 1 file changed, 18 insertions(+), 7 deletions(-) > > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > index d35151eeb0..3f524cb0ca 100644 > --- a/hw/ppc/spapr_events.c > +++ b/hw/ppc/spapr_events.c > @@ -807,13 +807,20 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered) > /* get rtas addr from fdt */ > rtas_addr = spapr_get_rtas_addr(); > if (!rtas_addr) { > - warn_report("FWNMI: Unable to deliver machine check to guest: " > - "rtas_addr not found."); > - qemu_system_guest_panicked(NULL); > + if (!recovered) { > + warn_report("FWNMI: Unable to deliver machine check to guest: " > + "rtas_addr not found."); > + qemu_system_guest_panicked(NULL); > + } else { > + warn_report("FWNMI: Unable to deliver machine check to guest: " > + "rtas_addr not found. Machine check recovered."); > + } > g_free(ext_elog); > return; > } > > + spapr->fwnmi_machine_check_interlock = cpu->vcpu_id; > + I don't understand this change. > stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET, > env->gpr[3]); > cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + > @@ -850,9 +857,14 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) > * that CPU called "ibm,nmi-interlock") > */ > if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) { > - warn_report("FWNMI: Unable to deliver machine check to guest: " > - "nested machine check."); > - qemu_system_guest_panicked(NULL); > + if (!recovered) { > + warn_report("FWNMI: Unable to deliver machine check to guest: " > + "nested machine check."); > + qemu_system_guest_panicked(NULL); > + } else { > + warn_report("FWNMI: Unable to deliver machine check to guest: " > + "nested machine check. Machine check recovered."); > + } > return; > } > qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond); > @@ -880,7 +892,6 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) > warn_report("Received a fwnmi while migration was in progress"); > } > > - spapr->fwnmi_machine_check_interlock = cpu->vcpu_id; > spapr_mce_dispatch_elog(cpu, recovered); > } >
Greg Kurz's on March 18, 2020 2:57 am: > On Tue, 17 Mar 2020 15:02:14 +1000 > Nicholas Piggin <npiggin@gmail.com> wrote: > >> Try to be tolerant of errors if the machine check had been recovered >> by the host. >> >> Signed-off-by: Nicholas Piggin <npiggin@gmail.com> >> --- > > Same comment as previous patch on multi-line error strings and > warn_report() in the !recovered case. > >> hw/ppc/spapr_events.c | 25 ++++++++++++++++++------- >> 1 file changed, 18 insertions(+), 7 deletions(-) >> >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c >> index d35151eeb0..3f524cb0ca 100644 >> --- a/hw/ppc/spapr_events.c >> +++ b/hw/ppc/spapr_events.c >> @@ -807,13 +807,20 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered) >> /* get rtas addr from fdt */ >> rtas_addr = spapr_get_rtas_addr(); >> if (!rtas_addr) { >> - warn_report("FWNMI: Unable to deliver machine check to guest: " >> - "rtas_addr not found."); >> - qemu_system_guest_panicked(NULL); >> + if (!recovered) { >> + warn_report("FWNMI: Unable to deliver machine check to guest: " >> + "rtas_addr not found."); >> + qemu_system_guest_panicked(NULL); >> + } else { >> + warn_report("FWNMI: Unable to deliver machine check to guest: " >> + "rtas_addr not found. Machine check recovered."); >> + } >> g_free(ext_elog); >> return; >> } >> >> + spapr->fwnmi_machine_check_interlock = cpu->vcpu_id; >> + > > I don't understand this change. If we bail out without delivering the interrupt, we can't take the interlock otherwise the guest can never release it. Thanks, Nick
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index d35151eeb0..3f524cb0ca 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -807,13 +807,20 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered) /* get rtas addr from fdt */ rtas_addr = spapr_get_rtas_addr(); if (!rtas_addr) { - warn_report("FWNMI: Unable to deliver machine check to guest: " - "rtas_addr not found."); - qemu_system_guest_panicked(NULL); + if (!recovered) { + warn_report("FWNMI: Unable to deliver machine check to guest: " + "rtas_addr not found."); + qemu_system_guest_panicked(NULL); + } else { + warn_report("FWNMI: Unable to deliver machine check to guest: " + "rtas_addr not found. Machine check recovered."); + } g_free(ext_elog); return; } + spapr->fwnmi_machine_check_interlock = cpu->vcpu_id; + stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET, env->gpr[3]); cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + @@ -850,9 +857,14 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) * that CPU called "ibm,nmi-interlock") */ if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) { - warn_report("FWNMI: Unable to deliver machine check to guest: " - "nested machine check."); - qemu_system_guest_panicked(NULL); + if (!recovered) { + warn_report("FWNMI: Unable to deliver machine check to guest: " + "nested machine check."); + qemu_system_guest_panicked(NULL); + } else { + warn_report("FWNMI: Unable to deliver machine check to guest: " + "nested machine check. Machine check recovered."); + } return; } qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond); @@ -880,7 +892,6 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) warn_report("Received a fwnmi while migration was in progress"); } - spapr->fwnmi_machine_check_interlock = cpu->vcpu_id; spapr_mce_dispatch_elog(cpu, recovered); }
Try to be tolerant of errors if the machine check had been recovered by the host. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> --- hw/ppc/spapr_events.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-)