Message ID | 1370887961-31569-1-git-send-email-swarren@wwwdotorg.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Stephen, On Mon, Jun 10, 2013 at 07:12:41PM +0100, Stephen Warren wrote: > From: Stephen Warren <swarren@nvidia.com> > > machine_shutdown() is a hook for kexec. Add a comment saying so, since > it isn't obvious from the function name. I'd go as far as saying that some of this commit log could be included in the comment too, since this comes up time and time again and it's never clear! Anyway, a few comments inline. > diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig > index 42d6ea2..d7b3d2e 100644 > --- a/arch/arm/Kconfig > +++ b/arch/arm/Kconfig > @@ -2028,7 +2028,7 @@ config XIP_PHYS_ADDR > > config KEXEC > bool "Kexec system call (EXPERIMENTAL)" > - depends on (!SMP || HOTPLUG_CPU) > + depends on (!SMP || (HOTPLUG_CPU && PM_SLEEP_SMP)) PM_SLEEP_SMP selects HOTPLUG_CPU. > help > kexec is a system call that implements the ability to shutdown your > current kernel, and to start another kernel. It is like a reboot > diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c > index 282de48..dbe1692 100644 > --- a/arch/arm/kernel/process.c > +++ b/arch/arm/kernel/process.c > @@ -97,13 +97,14 @@ void soft_restart(unsigned long addr) > { > u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); > > + BUG_ON(num_online_cpus() > 1); I think this is overkill, and we could actually scream and try to return an error here (we've not yet switched stack, and the upper layers of kexec look like they can do something with an error code). > /* Disable interrupts first */ > local_irq_disable(); > local_fiq_disable(); > > - /* Disable the L2 if we're the last man standing. */ > - if (num_online_cpus() == 1) > - outer_disable(); > + /* Disable the L2 */ > + outer_disable(); > > /* Change to the new stack and continue with the reset. */ > call_with_stack(__soft_restart, (void *)addr, (void *)stack); > @@ -184,30 +185,41 @@ int __init reboot_setup(char *str) > > __setup("reboot=", reboot_setup); > > +/* For kexec */ > void machine_shutdown(void) > { > -#ifdef CONFIG_SMP > - smp_send_stop(); > +#ifdef CONFIG_PM_SLEEP_SMP > + disable_nonboot_cpus(); > #endif You can lose the #ifdef here. > + > + BUG_ON(num_online_cpus() > 1); Maybe redefine machine_shutdown if !kexec and lose this BUG? > void machine_halt(void) > { > - machine_shutdown(); > +#ifdef CONFIG_SMP > + smp_send_stop(); > +#endif Don't need the #ifdef. > + > local_irq_disable(); > while (1); > } > > void machine_power_off(void) > { > - machine_shutdown(); > +#ifdef CONFIG_SMP > + smp_send_stop(); > +#endif > + > if (pm_power_off) > pm_power_off(); > } > > void machine_restart(char *cmd) > { > - machine_shutdown(); > +#ifdef CONFIG_SMP > + smp_send_stop(); > +#endif Similarly for these two (power_off and restart). Will
On 06/11/2013 11:23 AM, Will Deacon wrote: > Hi Stephen, > > On Mon, Jun 10, 2013 at 07:12:41PM +0100, Stephen Warren wrote: >> From: Stephen Warren <swarren@nvidia.com> >> >> machine_shutdown() is a hook for kexec. Add a comment saying so, since >> it isn't obvious from the function name. > > I'd go as far as saying that some of this commit log could be included in > the comment too, since this comes up time and time again and it's never > clear! OK, I'll add some expanded comments to each of the functions. >> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig >> index 42d6ea2..d7b3d2e 100644 >> --- a/arch/arm/Kconfig >> +++ b/arch/arm/Kconfig >> @@ -2028,7 +2028,7 @@ config XIP_PHYS_ADDR >> >> config KEXEC >> bool "Kexec system call (EXPERIMENTAL)" >> - depends on (!SMP || HOTPLUG_CPU) >> + depends on (!SMP || (HOTPLUG_CPU && PM_SLEEP_SMP)) > > PM_SLEEP_SMP selects HOTPLUG_CPU. Ah, that makes more sense; it explains how code under /just/ #ifdef PM_SLEEP_SMP can interact with hotplug-related code. I guess I just checked HOTPLUG for a select/depends and not the other way around. >> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c >> @@ -97,13 +97,14 @@ void soft_restart(unsigned long addr) >> { >> u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); >> >> + BUG_ON(num_online_cpus() > 1); > > I think this is overkill, and we could actually scream and try to return an > error here (we've not yet switched stack, and the upper layers of kexec look > like they can do something with an error code). Hmmm. The function returns void, although I suppose I could look into changing that too? >> +/* For kexec */ >> void machine_shutdown(void) >> { >> -#ifdef CONFIG_SMP >> - smp_send_stop(); >> +#ifdef CONFIG_PM_SLEEP_SMP >> + disable_nonboot_cpus(); >> #endif > > You can lose the #ifdef here. The implementation of disable_nonboot_cpus() is #ifdef CONFIG_PM_SLEEP_SMP, so I think I need that to avoid build errors. >> + BUG_ON(num_online_cpus() > 1); > > Maybe redefine machine_shutdown if !kexec and lose this BUG? IIUC, machine_shutdown() is only used for kexec now, so I don't think an alternative implementation is required in the !kexec case? >> void machine_halt(void) >> { >> - machine_shutdown(); >> +#ifdef CONFIG_SMP >> + smp_send_stop(); >> +#endif > > Don't need the #ifdef. Oh right; I hadn't noticed the inline in smp.h.
On Tue, Jun 11, 2013 at 07:08:40PM +0100, Stephen Warren wrote: > On 06/11/2013 11:23 AM, Will Deacon wrote: > > On Mon, Jun 10, 2013 at 07:12:41PM +0100, Stephen Warren wrote: > >> From: Stephen Warren <swarren@nvidia.com> > >> > >> machine_shutdown() is a hook for kexec. Add a comment saying so, since > >> it isn't obvious from the function name. > > > > I'd go as far as saying that some of this commit log could be included in > > the comment too, since this comes up time and time again and it's never > > clear! > > OK, I'll add some expanded comments to each of the functions. Great, cheers. > >> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c > > >> @@ -97,13 +97,14 @@ void soft_restart(unsigned long addr) > >> { > >> u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); > >> > >> + BUG_ON(num_online_cpus() > 1); > > > > I think this is overkill, and we could actually scream and try to return an > > error here (we've not yet switched stack, and the upper layers of kexec look > > like they can do something with an error code). > > Hmmm. The function returns void, although I suppose I could look into > changing that too? Yes. Usually this is a path of no return, but if we detect an error before we've changed stack then I think we should at least try to propogate the error. > >> +/* For kexec */ > >> void machine_shutdown(void) > >> { > >> -#ifdef CONFIG_SMP > >> - smp_send_stop(); > >> +#ifdef CONFIG_PM_SLEEP_SMP > >> + disable_nonboot_cpus(); > >> #endif > > > > You can lose the #ifdef here. > > The implementation of disable_nonboot_cpus() is #ifdef > CONFIG_PM_SLEEP_SMP, so I think I need that to avoid build errors. Hmm, my include/linux/cpu.h has a dummy definition in an #else block, which simply returns 0. What kernel are you using? > >> + BUG_ON(num_online_cpus() > 1); > > > > Maybe redefine machine_shutdown if !kexec and lose this BUG? > > IIUC, machine_shutdown() is only used for kexec now, so I don't think an > alternative implementation is required in the !kexec case? Yes, you're right. In fact, since we don't support KEXEC_JUMP on ARM, we could dispense with machine_shutdown altogether (just make it do nothing) and move the code into machine_kexec, which has a much better name. What do you think? Will
On 06/11/2013 12:29 PM, Will Deacon wrote: > On Tue, Jun 11, 2013 at 07:08:40PM +0100, Stephen Warren wrote: >> On 06/11/2013 11:23 AM, Will Deacon wrote: >>> On Mon, Jun 10, 2013 at 07:12:41PM +0100, Stephen Warren wrote: >>>> From: Stephen Warren <swarren@nvidia.com> >>>> >>>> machine_shutdown() is a hook for kexec. Add a comment saying so, since >>>> it isn't obvious from the function name. >>>> +/* For kexec */ >>>> void machine_shutdown(void) >>>> { >>>> -#ifdef CONFIG_SMP >>>> - smp_send_stop(); >>>> +#ifdef CONFIG_PM_SLEEP_SMP >>>> + disable_nonboot_cpus(); >>>> #endif >>> >>> You can lose the #ifdef here. >> >> The implementation of disable_nonboot_cpus() is #ifdef >> CONFIG_PM_SLEEP_SMP, so I think I need that to avoid build errors. > > Hmm, my include/linux/cpu.h has a dummy definition in an #else block, which > simply returns 0. What kernel are you using? Ah right. I keep forgetting about those static inlines:-( >>>> + BUG_ON(num_online_cpus() > 1); >>> >>> Maybe redefine machine_shutdown if !kexec and lose this BUG? >> >> IIUC, machine_shutdown() is only used for kexec now, so I don't think an >> alternative implementation is required in the !kexec case? > > Yes, you're right. In fact, since we don't support KEXEC_JUMP on ARM, we > could dispense with machine_shutdown altogether (just make it do nothing) > and move the code into machine_kexec, which has a much better name. > > What do you think? That seems fine to me. Or, if I'm changing the prototype of machine_shutdown(), I could just rename it to e.g. machine_kexec_shutdown() while I'm at it? Actually, I was wondering if ARM's implementation wasn't already KEXEC_JUMP; after all, it uses soft_restart(), which simply jumps to the new kernel rather than doing any kind of HW reset, right? I must admit though, the Kconfig for KEXEC_JUMP doesn't really help me understand what the difference is supposed to be.
On 06/11/2013 11:23 AM, Will Deacon wrote: > Hi Stephen, > > On Mon, Jun 10, 2013 at 07:12:41PM +0100, Stephen Warren wrote: >> From: Stephen Warren <swarren@nvidia.com> >> >> machine_shutdown() is a hook for kexec. Add a comment saying so, since >> it isn't obvious from the function name. >> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c >> index 282de48..dbe1692 100644 >> --- a/arch/arm/kernel/process.c >> +++ b/arch/arm/kernel/process.c >> @@ -97,13 +97,14 @@ void soft_restart(unsigned long addr) >> { >> u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); >> >> + BUG_ON(num_online_cpus() > 1); > > I think this is overkill, and we could actually scream and try to return an > error here (we've not yet switched stack, and the upper layers of kexec look > like they can do something with an error code). Oh, that's the BUG_ON I added to soft_restart() not the one I added into machine_shutdown(). I added this one to make sure nobody was using soft_restart() on an SMP machine; Russell had asked to validate that all SMP systems provided a HW restart implementation. I assume your comments re: aborting the kexec by returning an error code apply more to machine_shutdown() which happens a bunch earlier.
Hi Stephen, On Tue, Jun 11, 2013 at 07:38:17PM +0100, Stephen Warren wrote: > On 06/11/2013 12:29 PM, Will Deacon wrote: > > Yes, you're right. In fact, since we don't support KEXEC_JUMP on ARM, we > > could dispense with machine_shutdown altogether (just make it do nothing) > > and move the code into machine_kexec, which has a much better name. > > > > What do you think? > > That seems fine to me. Or, if I'm changing the prototype of > machine_shutdown(), I could just rename it to e.g. > machine_kexec_shutdown() while I'm at it? Probably best to leave it alone as it has a declaration in linux/reboot.h and is referenced directly by the core kexec code. Moving all this into machine_kexec is a better move. > Actually, I was wondering if ARM's implementation wasn't already > KEXEC_JUMP; after all, it uses soft_restart(), which simply jumps to the > new kernel rather than doing any kind of HW reset, right? I must admit > though, the Kconfig for KEXEC_JUMP doesn't really help me understand > what the difference is supposed to be. I think KEXEC_JUMP keeps both of the kernels `alive' at the same time, so you can `jump' between them -- that's certainly not what we do on ARM. Will
On Tue, Jun 11, 2013 at 07:41:06PM +0100, Stephen Warren wrote: > On 06/11/2013 11:23 AM, Will Deacon wrote: > > Hi Stephen, > > > > On Mon, Jun 10, 2013 at 07:12:41PM +0100, Stephen Warren wrote: > >> From: Stephen Warren <swarren@nvidia.com> > >> > >> machine_shutdown() is a hook for kexec. Add a comment saying so, since > >> it isn't obvious from the function name. > > >> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c > >> index 282de48..dbe1692 100644 > >> --- a/arch/arm/kernel/process.c > >> +++ b/arch/arm/kernel/process.c > >> @@ -97,13 +97,14 @@ void soft_restart(unsigned long addr) > >> { > >> u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); > >> > >> + BUG_ON(num_online_cpus() > 1); > > > > I think this is overkill, and we could actually scream and try to return an > > error here (we've not yet switched stack, and the upper layers of kexec look > > like they can do something with an error code). > > Oh, that's the BUG_ON I added to soft_restart() not the one I added into > machine_shutdown(). > > I added this one to make sure nobody was using soft_restart() on an SMP > machine; Russell had asked to validate that all SMP systems provided a > HW restart implementation. > > I assume your comments re: aborting the kexec by returning an error code > apply more to machine_shutdown() which happens a bunch earlier. No, I did mean soft_restart! For kexec, this hangs off machine_kexec, which seems to propogate errors correctly. For invocations via the machine descriptor (i.e. arm_pm_restart), we'll print out `reboot failed' and go into a while(1) loop. Will
On Tue, Jun 11, 2013 at 07:29:27PM +0100, Will Deacon wrote: > Yes. Usually this is a path of no return, but if we detect an error before > we've changed stack then I think we should at least try to propogate the > error. Except, if you make soft_restart able to return an error, then you really need machine_restart() to do the same, but that's a cross-arch function. And what do you do when you hit a soft-watchdog event and it calls machine_restart(), which then decides to fail? Or a panic event and machine_restart() fails?
On 06/12/2013 12:09 PM, Russell King - ARM Linux wrote: > On Tue, Jun 11, 2013 at 07:29:27PM +0100, Will Deacon wrote: >> Yes. Usually this is a path of no return, but if we detect an error before >> we've changed stack then I think we should at least try to propogate the >> error. > > Except, if you make soft_restart able to return an error, then you really > need machine_restart() to do the same, but that's a cross-arch function. I was tempted to just printk and make them return. It looks like kexec will just return back out of its call path without issue. I suppose adjusting the function signature of machine_kexec() to propagate an error code back would be useful, and probably wouldn't be too much churn. > And what do you do when you hit a soft-watchdog event and it calls > machine_restart(), which then decides to fail? Or a panic event and > machine_restart() fails? Well, if you're using soft_restart with SMP, those case already don't work, right?
On 06/12/2013 11:01 AM, Will Deacon wrote: > On Tue, Jun 11, 2013 at 07:41:06PM +0100, Stephen Warren wrote: >> On 06/11/2013 11:23 AM, Will Deacon wrote: >>> Hi Stephen, >>> >>> On Mon, Jun 10, 2013 at 07:12:41PM +0100, Stephen Warren wrote: >>>> From: Stephen Warren <swarren@nvidia.com> >>>> >>>> machine_shutdown() is a hook for kexec. Add a comment saying so, since >>>> it isn't obvious from the function name. >> >>>> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c >>>> index 282de48..dbe1692 100644 >>>> --- a/arch/arm/kernel/process.c >>>> +++ b/arch/arm/kernel/process.c >>>> @@ -97,13 +97,14 @@ void soft_restart(unsigned long addr) >>>> { >>>> u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); >>>> >>>> + BUG_ON(num_online_cpus() > 1); >>> >>> I think this is overkill, and we could actually scream and try to return an >>> error here (we've not yet switched stack, and the upper layers of kexec look >>> like they can do something with an error code). >> >> Oh, that's the BUG_ON I added to soft_restart() not the one I added into >> machine_shutdown(). >> >> I added this one to make sure nobody was using soft_restart() on an SMP >> machine; Russell had asked to validate that all SMP systems provided a >> HW restart implementation. >> >> I assume your comments re: aborting the kexec by returning an error code >> apply more to machine_shutdown() which happens a bunch earlier. > > No, I did mean soft_restart! For kexec, this hangs off machine_kexec, which > seems to propogate errors correctly. I've confirmed that simply returning from soft_restart() and hence machine_kexec() works fine. The only issue in doing so without modifying machine_kexec() to return an error-code is that kernel_kexec() doesn't set variable "error" (which it later returns) and it defaults to 0, so you see the following in user-space: > ./keroot@localhost:~# ./kexec.sh > mount: / is busy > [ 23.065901] Starting new kernel > [ 23.073486] Bye! > [ 23.080596] soft_restart() called with multiple CPUs online > kexec failed: Success > root@localhost:~# So, at least kernel_kexec() needs fixing to return an error in this case, and possibly to propagate one returned by machine_kexec() I suppose. > For invocations via the machine > descriptor (i.e. arm_pm_restart), we'll print out `reboot failed' and go > into a while(1) loop. So, there's one problem... > void machine_restart(char *cmd) > { > smp_send_stop(); > > arm_pm_restart(reboot_mode, cmd); Inside smp_send_stop(), all CPUs get set to offline, even though they aren't really, they're just pinned. So, if I make: > void soft_restart(unsigned long addr) > { > u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); > > if (num_online_cpus() > 1) { > pr_err("soft_restart() called with multiple CPUs online\n"); > return; > } Then the check never fires, so the message printed by the balance of machine_restart() doesn't print. Maybe I shouldn't try to solve the soft_restart()-used-as-.restart-on-an-SMP-machine issue in this patch: I should remove the if() I quoted above from soft_restart(), and put it directly into machine_kexec(). That wouldn't change any of the machine_restart() behaviour that we have today, but would correctly error-check the kexec case. Does that sound reasonable?
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 42d6ea2..d7b3d2e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2028,7 +2028,7 @@ config XIP_PHYS_ADDR config KEXEC bool "Kexec system call (EXPERIMENTAL)" - depends on (!SMP || HOTPLUG_CPU) + depends on (!SMP || (HOTPLUG_CPU && PM_SLEEP_SMP)) help kexec is a system call that implements the ability to shutdown your current kernel, and to start another kernel. It is like a reboot diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 282de48..dbe1692 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -97,13 +97,14 @@ void soft_restart(unsigned long addr) { u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); + BUG_ON(num_online_cpus() > 1); + /* Disable interrupts first */ local_irq_disable(); local_fiq_disable(); - /* Disable the L2 if we're the last man standing. */ - if (num_online_cpus() == 1) - outer_disable(); + /* Disable the L2 */ + outer_disable(); /* Change to the new stack and continue with the reset. */ call_with_stack(__soft_restart, (void *)addr, (void *)stack); @@ -184,30 +185,41 @@ int __init reboot_setup(char *str) __setup("reboot=", reboot_setup); +/* For kexec */ void machine_shutdown(void) { -#ifdef CONFIG_SMP - smp_send_stop(); +#ifdef CONFIG_PM_SLEEP_SMP + disable_nonboot_cpus(); #endif + + BUG_ON(num_online_cpus() > 1); } void machine_halt(void) { - machine_shutdown(); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + local_irq_disable(); while (1); } void machine_power_off(void) { - machine_shutdown(); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + if (pm_power_off) pm_power_off(); } void machine_restart(char *cmd) { - machine_shutdown(); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif arm_pm_restart(reboot_mode, cmd); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 550d63c..5919eb4 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -651,17 +651,6 @@ void smp_send_reschedule(int cpu) smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); } -#ifdef CONFIG_HOTPLUG_CPU -static void smp_kill_cpus(cpumask_t *mask) -{ - unsigned int cpu; - for_each_cpu(cpu, mask) - platform_cpu_kill(cpu); -} -#else -static void smp_kill_cpus(cpumask_t *mask) { } -#endif - void smp_send_stop(void) { unsigned long timeout; @@ -679,8 +668,6 @@ void smp_send_stop(void) if (num_online_cpus() > 1) pr_warning("SMP: failed to stop secondary CPUs\n"); - - smp_kill_cpus(&mask); } /*