diff mbox

kvmclock: set scheduler clock stable

Message ID 20150423171242.5264bcb5@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Luiz Capitulino April 23, 2015, 9:12 p.m. UTC
If you try to enable NOHZ_FULL on a guest today, you'll get
the following error when the guest tries to deactivate the
scheduler tick:

 WARNING: CPU: 3 PID: 2182 at kernel/time/tick-sched.c:192 can_stop_full_tick+0xb9/0x290()
 NO_HZ FULL will not work with unstable sched clock
 CPU: 3 PID: 2182 Comm: kworker/3:1 Not tainted 4.0.0-10545-gb9bb6fb #204
 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
 Workqueue: events flush_to_ldisc
  ffffffff8162a0c7 ffff88011f583e88 ffffffff814e6ba0 0000000000000002
  ffff88011f583ed8 ffff88011f583ec8 ffffffff8104d095 ffff88011f583eb8
  0000000000000000 0000000000000003 0000000000000001 0000000000000001
 Call Trace:
  <IRQ>  [<ffffffff814e6ba0>] dump_stack+0x4f/0x7b
  [<ffffffff8104d095>] warn_slowpath_common+0x85/0xc0
  [<ffffffff8104d146>] warn_slowpath_fmt+0x46/0x50
  [<ffffffff810bd2a9>] can_stop_full_tick+0xb9/0x290
  [<ffffffff810bd9ed>] tick_nohz_irq_exit+0x8d/0xb0
  [<ffffffff810511c5>] irq_exit+0xc5/0x130
  [<ffffffff814f180a>] smp_apic_timer_interrupt+0x4a/0x60
  [<ffffffff814eff5e>] apic_timer_interrupt+0x6e/0x80
  <EOI>  [<ffffffff814ee5d1>] ? _raw_spin_unlock_irqrestore+0x31/0x60
  [<ffffffff8108bbc8>] __wake_up+0x48/0x60
  [<ffffffff8134836c>] n_tty_receive_buf_common+0x49c/0xba0
  [<ffffffff8134a6bf>] ? tty_ldisc_ref+0x1f/0x70
  [<ffffffff81348a84>] n_tty_receive_buf2+0x14/0x20
  [<ffffffff8134b390>] flush_to_ldisc+0xe0/0x120
  [<ffffffff81064d05>] process_one_work+0x1d5/0x540
  [<ffffffff81064c81>] ? process_one_work+0x151/0x540
  [<ffffffff81065191>] worker_thread+0x121/0x470
  [<ffffffff81065070>] ? process_one_work+0x540/0x540
  [<ffffffff8106b4df>] kthread+0xef/0x110
  [<ffffffff8106b3f0>] ? __kthread_parkme+0xa0/0xa0
  [<ffffffff814ef4f2>] ret_from_fork+0x42/0x70
  [<ffffffff8106b3f0>] ? __kthread_parkme+0xa0/0xa0
 ---[ end trace 06e3507544a38866 ]---

However, it turns out that kvmclock does provide a stable
sched_clock callback. So, let the scheduler know this which
in turn makes NOHZ_FULL work in the guest.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
---

PS: Original author of this patch is Marcelo. I did most of the
testing and backported it to an older real-time kernel tree. Works
like a charm.

 arch/x86/kernel/kvmclock.c | 3 +++
 1 file changed, 3 insertions(+)

Comments

Marcelo Tosatti April 29, 2015, 1:47 a.m. UTC | #1
On Thu, Apr 23, 2015 at 05:12:42PM -0400, Luiz Capitulino wrote:
> If you try to enable NOHZ_FULL on a guest today, you'll get
> the following error when the guest tries to deactivate the
> scheduler tick:
> 
>  WARNING: CPU: 3 PID: 2182 at kernel/time/tick-sched.c:192 can_stop_full_tick+0xb9/0x290()
>  NO_HZ FULL will not work with unstable sched clock
>  CPU: 3 PID: 2182 Comm: kworker/3:1 Not tainted 4.0.0-10545-gb9bb6fb #204
>  Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
>  Workqueue: events flush_to_ldisc
>   ffffffff8162a0c7 ffff88011f583e88 ffffffff814e6ba0 0000000000000002
>   ffff88011f583ed8 ffff88011f583ec8 ffffffff8104d095 ffff88011f583eb8
>   0000000000000000 0000000000000003 0000000000000001 0000000000000001
>  Call Trace:
>   <IRQ>  [<ffffffff814e6ba0>] dump_stack+0x4f/0x7b
>   [<ffffffff8104d095>] warn_slowpath_common+0x85/0xc0
>   [<ffffffff8104d146>] warn_slowpath_fmt+0x46/0x50
>   [<ffffffff810bd2a9>] can_stop_full_tick+0xb9/0x290
>   [<ffffffff810bd9ed>] tick_nohz_irq_exit+0x8d/0xb0
>   [<ffffffff810511c5>] irq_exit+0xc5/0x130
>   [<ffffffff814f180a>] smp_apic_timer_interrupt+0x4a/0x60
>   [<ffffffff814eff5e>] apic_timer_interrupt+0x6e/0x80
>   <EOI>  [<ffffffff814ee5d1>] ? _raw_spin_unlock_irqrestore+0x31/0x60
>   [<ffffffff8108bbc8>] __wake_up+0x48/0x60
>   [<ffffffff8134836c>] n_tty_receive_buf_common+0x49c/0xba0
>   [<ffffffff8134a6bf>] ? tty_ldisc_ref+0x1f/0x70
>   [<ffffffff81348a84>] n_tty_receive_buf2+0x14/0x20
>   [<ffffffff8134b390>] flush_to_ldisc+0xe0/0x120
>   [<ffffffff81064d05>] process_one_work+0x1d5/0x540
>   [<ffffffff81064c81>] ? process_one_work+0x151/0x540
>   [<ffffffff81065191>] worker_thread+0x121/0x470
>   [<ffffffff81065070>] ? process_one_work+0x540/0x540
>   [<ffffffff8106b4df>] kthread+0xef/0x110
>   [<ffffffff8106b3f0>] ? __kthread_parkme+0xa0/0xa0
>   [<ffffffff814ef4f2>] ret_from_fork+0x42/0x70
>   [<ffffffff8106b3f0>] ? __kthread_parkme+0xa0/0xa0
>  ---[ end trace 06e3507544a38866 ]---
> 
> However, it turns out that kvmclock does provide a stable
> sched_clock callback. So, let the scheduler know this which
> in turn makes NOHZ_FULL work in the guest.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
> ---
> 
> PS: Original author of this patch is Marcelo. I did most of the
> testing and backported it to an older real-time kernel tree. Works
> like a charm.
> 
>  arch/x86/kernel/kvmclock.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
> index 42caaef..4e03921 100644
> --- a/arch/x86/kernel/kvmclock.c
> +++ b/arch/x86/kernel/kvmclock.c
> @@ -24,6 +24,7 @@
>  #include <linux/percpu.h>
>  #include <linux/hardirq.h>
>  #include <linux/memblock.h>
> +#include <linux/sched.h>
>  
>  #include <asm/x86_init.h>
>  #include <asm/reboot.h>
> @@ -265,6 +266,8 @@ void __init kvmclock_init(void)
>  
>  	if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
>  		pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
> +
> +	set_sched_clock_stable();
>  }
>  
>  int __init kvm_setup_vsyscall_timeinfo(void)
> -- 
> 1.9.3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Ping?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini April 29, 2015, 8:15 a.m. UTC | #2
On 29/04/2015 03:47, Marcelo Tosatti wrote:
> On Thu, Apr 23, 2015 at 05:12:42PM -0400, Luiz Capitulino wrote:
>> If you try to enable NOHZ_FULL on a guest today, you'll get
>> the following error when the guest tries to deactivate the
>> scheduler tick:
>>
>>  WARNING: CPU: 3 PID: 2182 at kernel/time/tick-sched.c:192 can_stop_full_tick+0xb9/0x290()
>>  NO_HZ FULL will not work with unstable sched clock
>>  CPU: 3 PID: 2182 Comm: kworker/3:1 Not tainted 4.0.0-10545-gb9bb6fb #204
>>  Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
>>  Workqueue: events flush_to_ldisc
>>   ffffffff8162a0c7 ffff88011f583e88 ffffffff814e6ba0 0000000000000002
>>   ffff88011f583ed8 ffff88011f583ec8 ffffffff8104d095 ffff88011f583eb8
>>   0000000000000000 0000000000000003 0000000000000001 0000000000000001
>>  Call Trace:
>>   <IRQ>  [<ffffffff814e6ba0>] dump_stack+0x4f/0x7b
>>   [<ffffffff8104d095>] warn_slowpath_common+0x85/0xc0
>>   [<ffffffff8104d146>] warn_slowpath_fmt+0x46/0x50
>>   [<ffffffff810bd2a9>] can_stop_full_tick+0xb9/0x290
>>   [<ffffffff810bd9ed>] tick_nohz_irq_exit+0x8d/0xb0
>>   [<ffffffff810511c5>] irq_exit+0xc5/0x130
>>   [<ffffffff814f180a>] smp_apic_timer_interrupt+0x4a/0x60
>>   [<ffffffff814eff5e>] apic_timer_interrupt+0x6e/0x80
>>   <EOI>  [<ffffffff814ee5d1>] ? _raw_spin_unlock_irqrestore+0x31/0x60
>>   [<ffffffff8108bbc8>] __wake_up+0x48/0x60
>>   [<ffffffff8134836c>] n_tty_receive_buf_common+0x49c/0xba0
>>   [<ffffffff8134a6bf>] ? tty_ldisc_ref+0x1f/0x70
>>   [<ffffffff81348a84>] n_tty_receive_buf2+0x14/0x20
>>   [<ffffffff8134b390>] flush_to_ldisc+0xe0/0x120
>>   [<ffffffff81064d05>] process_one_work+0x1d5/0x540
>>   [<ffffffff81064c81>] ? process_one_work+0x151/0x540
>>   [<ffffffff81065191>] worker_thread+0x121/0x470
>>   [<ffffffff81065070>] ? process_one_work+0x540/0x540
>>   [<ffffffff8106b4df>] kthread+0xef/0x110
>>   [<ffffffff8106b3f0>] ? __kthread_parkme+0xa0/0xa0
>>   [<ffffffff814ef4f2>] ret_from_fork+0x42/0x70
>>   [<ffffffff8106b3f0>] ? __kthread_parkme+0xa0/0xa0
>>  ---[ end trace 06e3507544a38866 ]---
>>
>> However, it turns out that kvmclock does provide a stable
>> sched_clock callback. So, let the scheduler know this which
>> in turn makes NOHZ_FULL work in the guest.
>>
>> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
>> Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
>> ---
>>
>> PS: Original author of this patch is Marcelo. I did most of the
>> testing and backported it to an older real-time kernel tree. Works
>> like a charm.
>>
>>  arch/x86/kernel/kvmclock.c | 3 +++
>>  1 file changed, 3 insertions(+)
>>
>> diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
>> index 42caaef..4e03921 100644
>> --- a/arch/x86/kernel/kvmclock.c
>> +++ b/arch/x86/kernel/kvmclock.c
>> @@ -24,6 +24,7 @@
>>  #include <linux/percpu.h>
>>  #include <linux/hardirq.h>
>>  #include <linux/memblock.h>
>> +#include <linux/sched.h>
>>  
>>  #include <asm/x86_init.h>
>>  #include <asm/reboot.h>
>> @@ -265,6 +266,8 @@ void __init kvmclock_init(void)
>>  
>>  	if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
>>  		pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
>> +
>> +	set_sched_clock_stable();
>>  }
>>  
>>  int __init kvm_setup_vsyscall_timeinfo(void)
>> -- 
>> 1.9.3
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> Ping?

6 days, half of them during the merge window is not such a terrible
roundtrip.  Anyhow I've applied the patch and will probably push to
kvm/queue today.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 42caaef..4e03921 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -24,6 +24,7 @@ 
 #include <linux/percpu.h>
 #include <linux/hardirq.h>
 #include <linux/memblock.h>
+#include <linux/sched.h>
 
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
@@ -265,6 +266,8 @@  void __init kvmclock_init(void)
 
 	if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
 		pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
+
+	set_sched_clock_stable();
 }
 
 int __init kvm_setup_vsyscall_timeinfo(void)