[2/2] x86/idle: use dynamic halt poll

Message ID	1498130534-26568-3-git-send-email-root@ip-172-31-39-62.us-west-2.compute.internal (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> From: root <yang.zhang.wz@gmail.com> To: tglx@linutronix.de, mingo@redhat.com, hpa@zytor.com, pbonzini@redhat.com Cc: x86@kernel.org, corbet@lwn.net, tony.luck@intel.com, bp@alien8.de, peterz@infradead.org, mchehab@kernel.org, akpm@linux-foundation.org, krzk@kernel.org, jpoimboe@redhat.com, luto@kernel.org, borntraeger@de.ibm.com, thgarnie@google.com, rgerst@gmail.com, minipli@googlemail.com, douly.fnst@cn.fujitsu.com, nicstange@gmail.com, fweisbec@gmail.com, dvlasenk@redhat.com, bristot@redhat.com, yamada.masahiro@socionext.com, mika.westerberg@linux.intel.com, yu.c.chen@intel.com, aaron.lu@intel.com, rostedt@goodmis.org, me@kylehuey.com, len.brown@intel.com, prarit@redhat.com, hidehiro.kawai.ez@hitachi.com, fengtiantian@huawei.com, pmladek@suse.com, jeyu@redhat.com, Larry.Finger@lwfinger.net, zijun_hu@htc.com, luisbg@osg.samsung.com, johannes.berg@intel.com, niklas.soderlund+renesas@ragnatech.se, zlpnobody@gmail.com, adobriyan@gmail.com, fgao@ikuai8.com, ebiederm@xmission.com, subashab@codeaurora.org, arnd@arndb.de, matt@codeblueprint.co.uk, mgorman@techsingularity.net, linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, linux-edac@vger.kernel.org, kvm@vger.kernel.org, Yang Zhang <yang.zhang.wz@gmail.com> Subject: [PATCH 2/2] x86/idle: use dynamic halt poll Date: Thu, 22 Jun 2017 11:22:14 +0000 Message-Id: <1498130534-26568-3-git-send-email-root@ip-172-31-39-62.us-west-2.compute.internal> In-Reply-To: <1498130534-26568-1-git-send-email-root@ip-172-31-39-62.us-west-2.compute.internal> References: <1498130534-26568-1-git-send-email-root@ip-172-31-39-62.us-west-2.compute.internal> Sender: kvm-owner@vger.kernel.org Precedence: bulk

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 4e71bfe..76043b4 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -63,6 +63,8 @@ show up in /proc/sys/kernel: - perf_event_max_stack - perf_event_max_contexts_per_stack - pid_max +- poll_grow [ X86 only ] +- poll_shrink [ X86 only ] - poll_threshold_ns [ X86 only ] - powersave-nap [ PPC only ] - printk @@ -703,6 +705,18 @@ kernel tries to allocate a number starting from this one. ============================================================== +poll_grow: (X86 only) + +This parameter is multiplied in the grow_poll_ns() to increase the poll time. +By default, the values is 2. + +============================================================== +poll_shrink: (X86 only) + +This parameter is divided in the shrink_poll_ns() to reduce the poll time. +By default, the values is 2. + +============================================================== poll_threshold_ns: (X86 only) This parameter used to control the max wait time to poll before going diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3cada99..cf952ed 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -931,4 +931,10 @@ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves) void stop_this_cpu(void *dummy); void df_debug(struct pt_regs *regs, long error_code); +#ifdef CONFIG_HYPERVISOR_GUEST +extern void check_poll(void); +#else +static inline void check_poll(void) {} +#endif + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 2d75faf..37b16b6 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -962,6 +962,7 @@ __visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) * interrupt lock, which is the WrongThing (tm) to do. */ entering_ack_irq(); + check_poll(); local_apic_timer_interrupt(); exiting_irq(); @@ -981,6 +982,7 @@ __visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs) * interrupt lock, which is the WrongThing (tm) to do. */ entering_ack_irq(); + check_poll(); trace_local_timer_entry(LOCAL_TIMER_VECTOR); local_apic_timer_interrupt(); trace_local_timer_exit(LOCAL_TIMER_VECTOR); @@ -1863,6 +1865,7 @@ static void __smp_spurious_interrupt(u8 vector) __visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs) { entering_irq(); + check_poll(); __smp_spurious_interrupt(~regs->orig_ax); exiting_irq(); } @@ -1872,6 +1875,7 @@ __visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs) u8 vector = ~regs->orig_ax; entering_irq(); + check_poll(); trace_spurious_apic_entry(vector); __smp_spurious_interrupt(vector); trace_spurious_apic_exit(vector); @@ -1921,6 +1925,7 @@ static void __smp_error_interrupt(struct pt_regs *regs) __visible void __irq_entry smp_error_interrupt(struct pt_regs *regs) { entering_irq(); + check_poll(); __smp_error_interrupt(regs); exiting_irq(); } @@ -1928,6 +1933,7 @@ __visible void __irq_entry smp_error_interrupt(struct pt_regs *regs) __visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs) { entering_irq(); + check_poll(); trace_error_apic_entry(ERROR_APIC_VECTOR); __smp_error_interrupt(regs); trace_error_apic_exit(ERROR_APIC_VECTOR); diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index f3557a1..77fc6ed 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -564,6 +564,7 @@ asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void) unsigned vector, me; entering_ack_irq(); + check_poll(); /* Prevent vectors vanishing under us */ raw_spin_lock(&vector_lock); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 6e4a047..7f984d6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -819,6 +819,7 @@ static inline void __smp_deferred_error_interrupt(void) asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void) { entering_irq(); + check_poll(); __smp_deferred_error_interrupt(); exiting_ack_irq(); } @@ -826,6 +827,7 @@ asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void) asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void) { entering_irq(); + check_poll(); trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); __smp_deferred_error_interrupt(); trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR); diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index d7cc190..d420b42 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -400,6 +400,7 @@ static inline void __smp_thermal_interrupt(void) smp_thermal_interrupt(struct pt_regs *regs) { entering_irq(); + check_poll(); __smp_thermal_interrupt(); exiting_ack_irq(); } @@ -408,6 +409,7 @@ static inline void __smp_thermal_interrupt(void) smp_trace_thermal_interrupt(struct pt_regs *regs) { entering_irq(); + check_poll(); trace_thermal_apic_entry(THERMAL_APIC_VECTOR); __smp_thermal_interrupt(); trace_thermal_apic_exit(THERMAL_APIC_VECTOR); diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index bb0e75ee..77858ba 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -26,6 +26,7 @@ static inline void __smp_threshold_interrupt(void) asmlinkage __visible void __irq_entry smp_threshold_interrupt(void) { entering_irq(); + check_poll(); __smp_threshold_interrupt(); exiting_ack_irq(); } @@ -33,6 +34,7 @@ asmlinkage __visible void __irq_entry smp_threshold_interrupt(void) asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void) { entering_irq(); + check_poll(); trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR); __smp_threshold_interrupt(); trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index f34fe74..65ff260 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -230,6 +230,7 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) entering_irq(); + check_poll(); /* entering_irq() tells RCU that we're not quiescent. Check it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); @@ -269,6 +270,7 @@ __visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); entering_ack_irq(); + check_poll(); __smp_x86_platform_ipi(); exiting_irq(); set_irq_regs(old_regs); @@ -295,6 +297,7 @@ __visible void smp_kvm_posted_intr_ipi(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); entering_ack_irq(); + check_poll(); inc_irq_stat(kvm_posted_intr_ipis); exiting_irq(); set_irq_regs(old_regs); @@ -308,6 +311,7 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); entering_ack_irq(); + check_poll(); inc_irq_stat(kvm_posted_intr_wakeup_ipis); kvm_posted_intr_wakeup_handler(); exiting_irq(); @@ -320,6 +324,7 @@ __visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); entering_ack_irq(); + check_poll(); trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR); __smp_x86_platform_ipi(); trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR); diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 2754878..2c4b6cd 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -20,6 +20,7 @@ static inline void __smp_irq_work_interrupt(void) __visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); + check_poll(); __smp_irq_work_interrupt(); exiting_irq(); } @@ -27,6 +28,7 @@ __visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs) __visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); + check_poll(); trace_irq_work_entry(IRQ_WORK_VECTOR); __smp_irq_work_interrupt(); trace_irq_work_exit(IRQ_WORK_VECTOR); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6361783..e5238a8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -41,6 +41,10 @@ #ifdef CONFIG_HYPERVISOR_GUEST unsigned long poll_threshold_ns; +unsigned int poll_shrink = 2; +unsigned int poll_grow = 2; +DEFINE_PER_CPU(unsigned long, poll_begin_ns); +DEFINE_PER_CPU(unsigned long, poll_ns); #endif /* @@ -318,6 +322,57 @@ static inline void play_dead(void) #endif #ifdef CONFIG_HYPERVISOR_GUEST +static unsigned int grow_poll_ns(unsigned int old, unsigned int grow, + unsigned int max) +{ + unsigned int val; + + /* 10us as base poll duration */ + if (old == 0 && grow) + return 10000; + + val = old * grow; + if (val > max) + val = max; + + return val; +} + +static unsigned int shrink_poll_ns(unsigned int old, unsigned int shrink) +{ + if (shrink == 0) + return 0; + + return old / shrink; +} + +void check_poll(void) +{ + unsigned int val, poll_duration; + unsigned long begin_ns, now_ns; + + if (!poll_threshold_ns) + return; + + begin_ns = this_cpu_read(poll_begin_ns); + /* Not from halt state */ + if (!begin_ns) + return; + + now_ns = ktime_to_ns(ktime_get()); + poll_duration = this_cpu_read(poll_ns); + + if (poll_duration && now_ns - begin_ns > poll_threshold_ns) + val = shrink_poll_ns(poll_duration, poll_shrink); + else if (poll_duration < poll_threshold_ns && + now_ns - begin_ns < poll_threshold_ns) + val = grow_poll_ns(poll_duration, poll_grow, poll_threshold_ns); + + this_cpu_write(poll_ns, val); + this_cpu_write(poll_begin_ns, 0); + +} + void arch_cpu_idle_poll(void) { ktime_t start, cur, stop; @@ -359,6 +414,10 @@ void arch_cpu_idle(void) void __cpuidle default_idle(void) { trace_cpu_idle_rcuidle(1, smp_processor_id()); +#ifdef CONFIG_HYPERVISOR_GUEST + if (poll_threshold_ns) + this_cpu_write(poll_begin_ns, ktime_to_ns(ktime_get())); +#endif safe_halt(); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index d798c0d..81a3961 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -265,6 +265,7 @@ static inline void __smp_reschedule_interrupt(void) __visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); + check_poll(); __smp_reschedule_interrupt(); /* * KVM uses this interrupt to force a cpu out of guest mode @@ -280,6 +281,7 @@ __visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs) * to nest. */ ipi_entering_ack_irq(); + check_poll(); trace_reschedule_entry(RESCHEDULE_VECTOR); __smp_reschedule_interrupt(); trace_reschedule_exit(RESCHEDULE_VECTOR); @@ -298,6 +300,7 @@ static inline void __smp_call_function_interrupt(void) __visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); + check_poll(); __smp_call_function_interrupt(); exiting_irq(); } @@ -306,6 +309,7 @@ __visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs) smp_trace_call_function_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); + check_poll(); trace_call_function_entry(CALL_FUNCTION_VECTOR); __smp_call_function_interrupt(); trace_call_function_exit(CALL_FUNCTION_VECTOR); @@ -322,6 +326,7 @@ static inline void __smp_call_function_single_interrupt(void) smp_call_function_single_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); + check_poll(); __smp_call_function_single_interrupt(); exiting_irq(); } @@ -330,6 +335,7 @@ static inline void __smp_call_function_single_interrupt(void) smp_trace_call_function_single_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); + check_poll(); trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); __smp_call_function_single_interrupt(); trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 04cf774..e901b26 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -462,6 +462,8 @@ extern __scanf(2, 0) extern bool crash_kexec_post_notifiers; #ifdef CONFIG_HYPERVISOR_GUEST extern unsigned long poll_threshold_ns; +extern unsigned int poll_shrink; +extern unsigned int poll_grow; #endif /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9174d57..82776eb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1211,6 +1211,20 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write, .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "halt_poll_grow", + .data = &poll_grow, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "halt_poll_shrink", + .data = &poll_shrink, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif { } };

[2/2] x86/idle: use dynamic halt poll

Commit Message

Comments

Patch