diff mbox

[RFC] spapr: ignore interrupts during reset state

Message ID 20170608063608.17855-1-nikunj@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Nikunj A. Dadhania June 8, 2017, 6:36 a.m. UTC
Rebooting a SMP TCG guest is broken for both single/multi threaded TCG.

When reset happens, all the CPUs are in halted state. First CPU is brought out
of reset and secondary CPUs would be initialized by the guest kernel using a
rtas call start-cpu.

However, in case of TCG, decrementer interrupts keep on coming and waking the
secondary CPUs up.

These secondary CPUs would see the decrementer interrupt pending, which makes
cpu::has_work() to bring them out of wait loop and start executing
tcg_exec_cpu().

The problem with this is all the CPUs wake up and start booting SLOF image,
causing the following exception(4 CPUs TCG VM):

[   81.440850] reboot: Restarting system

SLOF
S
SLOF
SLOFLOF[0[0m **********************************************************************
QEMU Starting
 Build Date = Mar  3 2017 13:29:19
 FW Version = git-66d250ef0fd06bb8
[0m **********************************************************************
QEMU Starting
 Build Date = Mar  3 2017 13:29:19
 FW Version = git-66d250ef0fd06bb8
[0m *************************************m**********[?25l **********************************************************************
QEMU Starting
 Build Date = Mar  3 2017 13:29:19
 FW Version = git-66d250ef0fd06bb8
***********************
QEMU Starting
 Build Date = Mar  3 2017 13:29:19
 FW Version = git-66d250ef0fd06bb8
ERROR: Flatten device tree not available!

 exception 300
SRR0 = 00000000000060e4  SRR1 = 800000008000000000000000
SPRG2 = 0000000000400000  SPRG3 = 0000000000004bd8
ERROR: Flatten device tree not available!

 exception 300
SRR0 = 00000000000060e4  SRR1 = 800000008000000000000000
SPRG2 = 0000000000400000  SPRG3 = 0000000000004bd8

Reported-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
---

Note: Similar changes would be required for powernv as well.
Haven't got time to test it there.

---
 hw/ppc/spapr.c              | 1 +
 hw/ppc/spapr_cpu_core.c     | 1 +
 hw/ppc/spapr_rtas.c         | 1 +
 target/ppc/cpu.h            | 7 +++++++
 target/ppc/translate_init.c | 9 +++++++++
 5 files changed, 19 insertions(+)

Comments

David Gibson June 9, 2017, 2:01 a.m. UTC | #1
On Thu, Jun 08, 2017 at 12:06:08PM +0530, Nikunj A Dadhania wrote:
> Rebooting a SMP TCG guest is broken for both single/multi threaded TCG.

Ouch.  When exactly did this happen?  I know that smp boot used to
work under TCG, albeit very slowly.

> When reset happens, all the CPUs are in halted state. First CPU is brought out
> of reset and secondary CPUs would be initialized by the guest kernel using a
> rtas call start-cpu.
> 
> However, in case of TCG, decrementer interrupts keep on coming and waking the
> secondary CPUs up.

Ok.. how is that happening given that the secondary CPUs should have
MSR[EE] == 0?

> These secondary CPUs would see the decrementer interrupt pending, which makes
> cpu::has_work() to bring them out of wait loop and start executing
> tcg_exec_cpu().
> 
> The problem with this is all the CPUs wake up and start booting SLOF image,
> causing the following exception(4 CPUs TCG VM):

[snip]
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index d10808d..eb88bcb 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -1013,6 +1013,13 @@ struct CPUPPCState {
>      int access_type; /* when a memory exception occurs, the access
>                          type is stored here */
>  
> +    /* CPU in reset, shouldn't process any interrupts.
> +     *
> +     * Decrementer interrupts in TCG can still wake the CPU up. Make sure that
> +     * when this variable is set, cpu_has_work_* should return false.
> +     */
> +    int in_reset;

So I'd really rather not add another flag to the cpu structure,
especially since we'd then need to migrate it as well.

I'm pretty sure there should be a way to inhibit the unwanted
interrupts using existing mechanisms.

> +
>      CPU_COMMON
>  
>      /* MMU context - only relevant for full system emulation */
> diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c
> index 56a0ab2..64f4348 100644
> --- a/target/ppc/translate_init.c
> +++ b/target/ppc/translate_init.c
> @@ -8561,6 +8561,9 @@ static bool cpu_has_work_POWER7(CPUState *cs)
>      CPUPPCState *env = &cpu->env;
>  
>      if (cs->halted) {
> +        if (env->in_reset) {
> +            return false;
> +        }
>          if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
>              return false;
>          }
> @@ -8718,6 +8721,9 @@ static bool cpu_has_work_POWER8(CPUState *cs)
>      CPUPPCState *env = &cpu->env;
>  
>      if (cs->halted) {
> +        if (env->in_reset) {
> +            return false;
> +        }
>          if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
>              return false;
>          }
> @@ -8899,6 +8905,9 @@ static bool cpu_has_work_POWER9(CPUState *cs)
>      CPUPPCState *env = &cpu->env;
>  
>      if (cs->halted) {
> +        if (env->in_reset) {
> +            return false;
> +        }
>          if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
>              return false;
>          }
Nikunj A. Dadhania June 9, 2017, 5:02 a.m. UTC | #2
David Gibson <david@gibson.dropbear.id.au> writes:

> On Thu, Jun 08, 2017 at 12:06:08PM +0530, Nikunj A Dadhania wrote:
>> Rebooting a SMP TCG guest is broken for both single/multi threaded TCG.
>
> Ouch.  When exactly did this happen?

Broken since long

> I know that smp boot used to work under TCG, albeit very slowly.

SMP boot works, its the reboot issued from the guest doesn't boot and
crashes in SLOF.

>> When reset happens, all the CPUs are in halted state. First CPU is brought out
>> of reset and secondary CPUs would be initialized by the guest kernel using a
>> rtas call start-cpu.
>> 
>> However, in case of TCG, decrementer interrupts keep on coming and waking the
>> secondary CPUs up.
>
> Ok.. how is that happening given that the secondary CPUs should have
> MSR[EE] == 0?

Basically, the CPU is in halted condition and has_work() does not check
for MSR_EE in that case. But I am not sure if checking MSR_EE is
sufficient, as the CPU does go to halted state (idle) while running as
well.

static bool cpu_has_work_POWER8(CPUState *cs)
{
    PowerPCCPU *cpu = POWERPC_CPU(cs);
    CPUPPCState *env = &cpu->env;

    if (cs->halted) {
       [ SNIP ]
       /* Does not check for msr_ee */
    } else {
        return msr_ee && (cs->interrupt_request & CPU_INTERRUPT_HARD);
    }
}

>
>> These secondary CPUs would see the decrementer interrupt pending, which makes
>> cpu::has_work() to bring them out of wait loop and start executing
>> tcg_exec_cpu().
>> 
>> The problem with this is all the CPUs wake up and start booting SLOF image,
>> causing the following exception(4 CPUs TCG VM):
>
> [snip]
>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
>> index d10808d..eb88bcb 100644
>> --- a/target/ppc/cpu.h
>> +++ b/target/ppc/cpu.h
>> @@ -1013,6 +1013,13 @@ struct CPUPPCState {
>>      int access_type; /* when a memory exception occurs, the access
>>                          type is stored here */
>>  
>> +    /* CPU in reset, shouldn't process any interrupts.
>> +     *
>> +     * Decrementer interrupts in TCG can still wake the CPU up. Make sure that
>> +     * when this variable is set, cpu_has_work_* should return false.
>> +     */
>> +    int in_reset;
>
> So I'd really rather not add another flag to the cpu structure,
> especially since we'd then need to migrate it as well.

I agree, Bharata and I did discuss about the migrate case. This patch
was to highlight the exact issue.

> I'm pretty sure there should be a way to inhibit the unwanted
> interrupts using existing mechanisms.

One of the thing that I had observed was msr had just MSR_SF bit set
during the reset case, we can test for that maybe.

The below works as well:

+        if ((env->msr & ~(1ull << MSR_SF)) == 0) {
+            return false;
+        }

>> +
>>      CPU_COMMON
>>  
>>      /* MMU context - only relevant for full system emulation */
>> diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c
>> index 56a0ab2..64f4348 100644
>> --- a/target/ppc/translate_init.c
>> +++ b/target/ppc/translate_init.c
>> @@ -8561,6 +8561,9 @@ static bool cpu_has_work_POWER7(CPUState *cs)
>>      CPUPPCState *env = &cpu->env;
>>  
>>      if (cs->halted) {
>> +        if (env->in_reset) {
>> +            return false;
>> +        }
>>          if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
>>              return false;
>>          }
>> @@ -8718,6 +8721,9 @@ static bool cpu_has_work_POWER8(CPUState *cs)
>>      CPUPPCState *env = &cpu->env;
>>  
>>      if (cs->halted) {
>> +        if (env->in_reset) {
>> +            return false;
>> +        }
>>          if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
>>              return false;
>>          }
>> @@ -8899,6 +8905,9 @@ static bool cpu_has_work_POWER9(CPUState *cs)
>>      CPUPPCState *env = &cpu->env;
>>  
>>      if (cs->halted) {
>> +        if (env->in_reset) {
>> +            return false;
>> +        }
>>          if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
>>              return false;
>>          }

Regards
Nikunj
David Gibson June 9, 2017, 10:27 a.m. UTC | #3
On Fri, Jun 09, 2017 at 10:32:25AM +0530, Nikunj A Dadhania wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > On Thu, Jun 08, 2017 at 12:06:08PM +0530, Nikunj A Dadhania wrote:
> >> Rebooting a SMP TCG guest is broken for both single/multi threaded TCG.
> >
> > Ouch.  When exactly did this happen?
> 
> Broken since long
> 
> > I know that smp boot used to work under TCG, albeit very slowly.
> 
> SMP boot works, its the reboot issued from the guest doesn't boot and
> crashes in SLOF.

Oh, sorry, I misunderstood.

> 
> >> When reset happens, all the CPUs are in halted state. First CPU is brought out
> >> of reset and secondary CPUs would be initialized by the guest kernel using a
> >> rtas call start-cpu.
> >> 
> >> However, in case of TCG, decrementer interrupts keep on coming and waking the
> >> secondary CPUs up.
> >
> > Ok.. how is that happening given that the secondary CPUs should have
> > MSR[EE] == 0?
> 
> Basically, the CPU is in halted condition and has_work() does not check
> for MSR_EE in that case. But I am not sure if checking MSR_EE is
> sufficient, as the CPU does go to halted state (idle) while running as
> well.

Ok, but we definitely should be able to fix this without new
variables.  If we can quiesce the secondary CPUs for the first boot,
we should be able to duplicate that for subsequent boots.
diff mbox

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 01dda9e..fba2ef5 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1370,6 +1370,7 @@  static void ppc_spapr_reset(void)
     first_ppc_cpu->env.gpr[3] = fdt_addr;
     first_ppc_cpu->env.gpr[5] = 0;
     first_cpu->halted = 0;
+    first_ppc_cpu->env.in_reset = 0;
     first_ppc_cpu->env.nip = SPAPR_ENTRY_POINT;
 
     spapr->cas_reboot = false;
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index 029a141..c100213 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -33,6 +33,7 @@  static void spapr_cpu_reset(void *opaque)
      * reset code and the rest are explicitly started up by the guest
      * using an RTAS call */
     cs->halted = 1;
+    env->in_reset = 1;
 
     env->spr[SPR_HIOR] = 0;
 
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 94a2799..eaf0afb 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -177,6 +177,7 @@  static void rtas_start_cpu(PowerPCCPU *cpu_, sPAPRMachineState *spapr,
         env->nip = start;
         env->gpr[3] = r3;
         cs->halted = 0;
+        env->in_reset = 0;
         spapr_cpu_set_endianness(cpu);
         spapr_cpu_update_tb_offset(cpu);
 
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index d10808d..eb88bcb 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1013,6 +1013,13 @@  struct CPUPPCState {
     int access_type; /* when a memory exception occurs, the access
                         type is stored here */
 
+    /* CPU in reset, shouldn't process any interrupts.
+     *
+     * Decrementer interrupts in TCG can still wake the CPU up. Make sure that
+     * when this variable is set, cpu_has_work_* should return false.
+     */
+    int in_reset;
+
     CPU_COMMON
 
     /* MMU context - only relevant for full system emulation */
diff --git a/target/ppc/translate_init.c b/target/ppc/translate_init.c
index 56a0ab2..64f4348 100644
--- a/target/ppc/translate_init.c
+++ b/target/ppc/translate_init.c
@@ -8561,6 +8561,9 @@  static bool cpu_has_work_POWER7(CPUState *cs)
     CPUPPCState *env = &cpu->env;
 
     if (cs->halted) {
+        if (env->in_reset) {
+            return false;
+        }
         if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
             return false;
         }
@@ -8718,6 +8721,9 @@  static bool cpu_has_work_POWER8(CPUState *cs)
     CPUPPCState *env = &cpu->env;
 
     if (cs->halted) {
+        if (env->in_reset) {
+            return false;
+        }
         if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
             return false;
         }
@@ -8899,6 +8905,9 @@  static bool cpu_has_work_POWER9(CPUState *cs)
     CPUPPCState *env = &cpu->env;
 
     if (cs->halted) {
+        if (env->in_reset) {
+            return false;
+        }
         if (!(cs->interrupt_request & CPU_INTERRUPT_HARD)) {
             return false;
         }