Message ID | 1389791227-24097-1-git-send-email-pgaikwad@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jan 15, 2014 at 7:07 AM, Prashant Gaikwad <pgaikwad@nvidia.com> wrote: > Now arch timer is registerd using generic sched timer, delay > timer registration is the only part remaining in arch ports. > Move this part to drivers clocksource and remove arch timer > from arch ports. > > Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com> > --- > arch/arm/include/asm/arch_timer.h | 1 - > arch/arm/kernel/Makefile | 1 - > arch/arm/kernel/arch_timer.c | 44 ---------------------------------- > arch/arm64/include/asm/arch_timer.h | 5 ---- > arch/arm64/include/asm/delay.h | 32 ++++++++++++++++++++++++ > arch/arm64/include/asm/timex.h | 5 +-- > arch/arm64/kernel/time.c | 9 ------- > arch/arm64/lib/delay.c | 26 ++++++++++++++++++++ > drivers/clocksource/arm_arch_timer.c | 12 ++++++++- > 9 files changed, 71 insertions(+), 64 deletions(-) > delete mode 100644 arch/arm/kernel/arch_timer.c > create mode 100644 arch/arm64/include/asm/delay.h > > diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h > index 0704e0c..61ad692 100644 > --- a/arch/arm/include/asm/arch_timer.h > +++ b/arch/arm/include/asm/arch_timer.h > @@ -10,7 +10,6 @@ > #include <clocksource/arm_arch_timer.h> > > #ifdef CONFIG_ARM_ARCH_TIMER > -int arch_timer_arch_init(void); > > /* > * These register accessors are marked inline so the compiler can > diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile > index a30fc9b..6b51cf9 100644 > --- a/arch/arm/kernel/Makefile > +++ b/arch/arm/kernel/Makefile > @@ -45,7 +45,6 @@ obj-$(CONFIG_SMP) += smp_tlb.o > endif > obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o > obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o > -obj-$(CONFIG_ARM_ARCH_TIMER) += arch_timer.o > obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o > obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o > obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o > diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c > deleted file mode 100644 > index 1791f12..0000000 > --- a/arch/arm/kernel/arch_timer.c > +++ /dev/null > @@ -1,44 +0,0 @@ > -/* > - * linux/arch/arm/kernel/arch_timer.c > - * > - * Copyright (C) 2011 ARM Ltd. > - * All Rights Reserved > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of the GNU General Public License version 2 as > - * published by the Free Software Foundation. > - */ > -#include <linux/init.h> > -#include <linux/types.h> > -#include <linux/errno.h> > - > -#include <asm/delay.h> > - > -#include <clocksource/arm_arch_timer.h> > - > -static unsigned long arch_timer_read_counter_long(void) > -{ > - return arch_timer_read_counter(); > -} > - > -static struct delay_timer arch_delay_timer; > - > -static void __init arch_timer_delay_timer_register(void) > -{ > - /* Use the architected timer for the delay loop. */ > - arch_delay_timer.read_current_timer = arch_timer_read_counter_long; > - arch_delay_timer.freq = arch_timer_get_rate(); > - register_current_timer_delay(&arch_delay_timer); > -} > - > -int __init arch_timer_arch_init(void) > -{ > - u32 arch_timer_rate = arch_timer_get_rate(); > - > - if (arch_timer_rate == 0) > - return -ENXIO; > - > - arch_timer_delay_timer_register(); > - > - return 0; > -} > diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h > index 9400596..48e06bd 100644 > --- a/arch/arm64/include/asm/arch_timer.h > +++ b/arch/arm64/include/asm/arch_timer.h > @@ -145,9 +145,4 @@ static inline u64 arch_counter_get_cntvct(void) > return cval; > } > > -static inline int arch_timer_arch_init(void) > -{ > - return 0; > -} > - > #endif > diff --git a/arch/arm64/include/asm/delay.h b/arch/arm64/include/asm/delay.h > new file mode 100644 > index 0000000..ea90d99 > --- /dev/null > +++ b/arch/arm64/include/asm/delay.h > @@ -0,0 +1,32 @@ > + > +/* > + * Copyright (c) 2014, NVIDIA Corporation. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > + > +#ifndef __ASM_ARM64_DELAY_H > +#define __ASM_ARM64_DELAY_H > + > +#include <asm-generic/delay.h> > + > +struct delay_timer { > + unsigned long (*read_current_timer)(void); > + unsigned long freq; > +}; > + > +/* Delay-loop timer registration. */ > +#define ARCH_HAS_READ_CURRENT_TIMER > +extern void register_current_timer_delay(const struct delay_timer *timer); Can't all but the define be moved to a common location? > + > +#endif /* defined(_ARM64_DELAY_H) */ > diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h > index 81a076e..ca4bdfb 100644 > --- a/arch/arm64/include/asm/timex.h > +++ b/arch/arm64/include/asm/timex.h > @@ -16,13 +16,12 @@ > #ifndef __ASM_TIMEX_H > #define __ASM_TIMEX_H > > -#include <asm/arch_timer.h> > - > /* > * Use the current timer as a cycle counter since this is what we use for > * the delay loop. > */ > -#define get_cycles() arch_counter_get_cntvct() > +typedef unsigned long cycles_t; > +#define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; }) Could be in a common location conditional on ARCH_HAS_READ_CURRENT_TIMER. > #include <asm-generic/timex.h> > > diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c > index 29c39d5..213d1a3 100644 > --- a/arch/arm64/kernel/time.c > +++ b/arch/arm64/kernel/time.c > @@ -63,14 +63,5 @@ EXPORT_SYMBOL(profile_pc); > > void __init time_init(void) > { > - u32 arch_timer_rate; > - > clocksource_of_init(); > - > - arch_timer_rate = arch_timer_get_rate(); > - if (!arch_timer_rate) > - panic("Unable to initialise architected timer.\n"); > - > - /* Calibrate the delay loop directly */ > - lpj_fine = arch_timer_rate / HZ; > } > diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c > index dad4ec9..cde0a28 100644 > --- a/arch/arm64/lib/delay.c > +++ b/arch/arm64/lib/delay.c > @@ -24,6 +24,19 @@ > #include <linux/module.h> > #include <linux/timex.h> > > +static const struct delay_timer *delay_timer; > +static bool delay_calibrated; > + > +int read_current_timer(unsigned long *timer_val) > +{ > + if (!delay_timer) > + return -ENXIO; > + > + *timer_val = delay_timer->read_current_timer(); > + return 0; > +} > +EXPORT_SYMBOL(read_current_timer); > + This too could be common. > void __delay(unsigned long cycles) > { > cycles_t start = get_cycles(); > @@ -53,3 +66,16 @@ void __ndelay(unsigned long nsecs) > __const_udelay(nsecs * 0x5UL); /* 2**32 / 1000000000 (rounded up) */ > } > EXPORT_SYMBOL(__ndelay); > + > +void register_current_timer_delay(const struct delay_timer *timer) > +{ > + if (!delay_calibrated) { > + pr_info("Switching to timer-based delay loop\n"); > + delay_timer = timer; > + lpj_fine = timer->freq / HZ; > + > + delay_calibrated = true; > + } else { > + pr_info("Ignoring duplicate/late registration of read_current_timer delay\n"); > + } > +} > diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c > index 57e823c..8ee9918 100644 > --- a/drivers/clocksource/arm_arch_timer.c > +++ b/drivers/clocksource/arm_arch_timer.c > @@ -422,6 +422,16 @@ struct timecounter *arch_timer_get_timecounter(void) > return &timecounter; > } > > +static struct delay_timer arch_delay_timer; > + > +static void __init arch_delay_timer_register(void) > +{ > + /* Use the architected timer for the delay loop. */ > + arch_delay_timer.read_current_timer = arch_timer_read_counter(); > + arch_delay_timer.freq = arch_timer_rate; > + register_current_timer_delay(&arch_delay_timer); > +} > + > static void __init arch_counter_register(unsigned type) > { > u64 start_count; > @@ -630,7 +640,7 @@ static void __init arch_timer_common_init(void) > > arch_timer_banner(arch_timers_present); > arch_counter_register(arch_timers_present); > - arch_timer_arch_init(); > + arch_delay_timer_register(); > } > > static void __init arch_timer_init(struct device_node *np) > -- > 1.7.4.1 > > > _______________________________________________ > linux-arm-kernel mailing list > linux-arm-kernel@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Hello, On Wed, Jan 15, 2014 at 01:07:07PM +0000, Prashant Gaikwad wrote: > Now arch timer is registerd using generic sched timer, delay > timer registration is the only part remaining in arch ports. > Move this part to drivers clocksource and remove arch timer > from arch ports. What's the advantage in doing this? I'd have thought consolidation, but... > Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com> > --- > arch/arm/include/asm/arch_timer.h | 1 - > arch/arm/kernel/Makefile | 1 - > arch/arm/kernel/arch_timer.c | 44 ---------------------------------- > arch/arm64/include/asm/arch_timer.h | 5 ---- > arch/arm64/include/asm/delay.h | 32 ++++++++++++++++++++++++ > arch/arm64/include/asm/timex.h | 5 +-- > arch/arm64/kernel/time.c | 9 ------- > arch/arm64/lib/delay.c | 26 ++++++++++++++++++++ > drivers/clocksource/arm_arch_timer.c | 12 ++++++++- > 9 files changed, 71 insertions(+), 64 deletions(-) ... that's a positive diffstat! I also think that delaying the delay loop initialisation for arm64 could be problematic, since we don't have anything to fall back on (like the busy-loop on ARM) in case of early *delay calls. What happens if I call udelay on arm64 before the counter has registered? Will
On Wednesday 15 January 2014 09:11 PM, Rob Herring wrote: > On Wed, Jan 15, 2014 at 7:07 AM, Prashant Gaikwad <pgaikwad@nvidia.com> wrote: >> Now arch timer is registerd using generic sched timer, delay >> timer registration is the only part remaining in arch ports. >> Move this part to drivers clocksource and remove arch timer >> from arch ports. >> >> Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com> >> --- >> arch/arm/include/asm/arch_timer.h | 1 - >> arch/arm/kernel/Makefile | 1 - >> arch/arm/kernel/arch_timer.c | 44 ---------------------------------- >> arch/arm64/include/asm/arch_timer.h | 5 ---- >> arch/arm64/include/asm/delay.h | 32 ++++++++++++++++++++++++ >> arch/arm64/include/asm/timex.h | 5 +-- >> arch/arm64/kernel/time.c | 9 ------- >> arch/arm64/lib/delay.c | 26 ++++++++++++++++++++ >> drivers/clocksource/arm_arch_timer.c | 12 ++++++++- >> 9 files changed, 71 insertions(+), 64 deletions(-) >> delete mode 100644 arch/arm/kernel/arch_timer.c >> create mode 100644 arch/arm64/include/asm/delay.h Thanks for the comments Rob!! I was planning to do that in later patch. Is that fine? >> diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h >> index 0704e0c..61ad692 100644 >> --- a/arch/arm/include/asm/arch_timer.h >> +++ b/arch/arm/include/asm/arch_timer.h >> @@ -10,7 +10,6 @@ >> #include <clocksource/arm_arch_timer.h> >> >> #ifdef CONFIG_ARM_ARCH_TIMER >> -int arch_timer_arch_init(void); >> >> /* >> * These register accessors are marked inline so the compiler can >> diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile >> index a30fc9b..6b51cf9 100644 >> --- a/arch/arm/kernel/Makefile >> +++ b/arch/arm/kernel/Makefile >> @@ -45,7 +45,6 @@ obj-$(CONFIG_SMP) += smp_tlb.o >> endif >> obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o >> obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o >> -obj-$(CONFIG_ARM_ARCH_TIMER) += arch_timer.o >> obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o >> obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o >> obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o >> diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c >> deleted file mode 100644 >> index 1791f12..0000000 >> --- a/arch/arm/kernel/arch_timer.c >> +++ /dev/null >> @@ -1,44 +0,0 @@ >> -/* >> - * linux/arch/arm/kernel/arch_timer.c >> - * >> - * Copyright (C) 2011 ARM Ltd. >> - * All Rights Reserved >> - * >> - * This program is free software; you can redistribute it and/or modify >> - * it under the terms of the GNU General Public License version 2 as >> - * published by the Free Software Foundation. >> - */ >> -#include <linux/init.h> >> -#include <linux/types.h> >> -#include <linux/errno.h> >> - >> -#include <asm/delay.h> >> - >> -#include <clocksource/arm_arch_timer.h> >> - >> -static unsigned long arch_timer_read_counter_long(void) >> -{ >> - return arch_timer_read_counter(); >> -} >> - >> -static struct delay_timer arch_delay_timer; >> - >> -static void __init arch_timer_delay_timer_register(void) >> -{ >> - /* Use the architected timer for the delay loop. */ >> - arch_delay_timer.read_current_timer = arch_timer_read_counter_long; >> - arch_delay_timer.freq = arch_timer_get_rate(); >> - register_current_timer_delay(&arch_delay_timer); >> -} >> - >> -int __init arch_timer_arch_init(void) >> -{ >> - u32 arch_timer_rate = arch_timer_get_rate(); >> - >> - if (arch_timer_rate == 0) >> - return -ENXIO; >> - >> - arch_timer_delay_timer_register(); >> - >> - return 0; >> -} >> diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h >> index 9400596..48e06bd 100644 >> --- a/arch/arm64/include/asm/arch_timer.h >> +++ b/arch/arm64/include/asm/arch_timer.h >> @@ -145,9 +145,4 @@ static inline u64 arch_counter_get_cntvct(void) >> return cval; >> } >> >> -static inline int arch_timer_arch_init(void) >> -{ >> - return 0; >> -} >> - >> #endif >> diff --git a/arch/arm64/include/asm/delay.h b/arch/arm64/include/asm/delay.h >> new file mode 100644 >> index 0000000..ea90d99 >> --- /dev/null >> +++ b/arch/arm64/include/asm/delay.h >> @@ -0,0 +1,32 @@ >> + >> +/* >> + * Copyright (c) 2014, NVIDIA Corporation. All rights reserved. >> + * >> + * This program is free software; you can redistribute it and/or modify it >> + * under the terms and conditions of the GNU General Public License, >> + * version 2, as published by the Free Software Foundation. >> + * >> + * This program is distributed in the hope it will be useful, but WITHOUT >> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or >> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for >> + * more details. >> + * >> + * You should have received a copy of the GNU General Public License >> + * along with this program. If not, see <http://www.gnu.org/licenses/>. >> + */ >> + >> +#ifndef __ASM_ARM64_DELAY_H >> +#define __ASM_ARM64_DELAY_H >> + >> +#include <asm-generic/delay.h> >> + >> +struct delay_timer { >> + unsigned long (*read_current_timer)(void); >> + unsigned long freq; >> +}; >> + >> +/* Delay-loop timer registration. */ >> +#define ARCH_HAS_READ_CURRENT_TIMER >> +extern void register_current_timer_delay(const struct delay_timer *timer); > Can't all but the define be moved to a common location? > >> + >> +#endif /* defined(_ARM64_DELAY_H) */ >> diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h >> index 81a076e..ca4bdfb 100644 >> --- a/arch/arm64/include/asm/timex.h >> +++ b/arch/arm64/include/asm/timex.h >> @@ -16,13 +16,12 @@ >> #ifndef __ASM_TIMEX_H >> #define __ASM_TIMEX_H >> >> -#include <asm/arch_timer.h> >> - >> /* >> * Use the current timer as a cycle counter since this is what we use for >> * the delay loop. >> */ >> -#define get_cycles() arch_counter_get_cntvct() >> +typedef unsigned long cycles_t; >> +#define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; }) > Could be in a common location conditional on ARCH_HAS_READ_CURRENT_TIMER. > >> #include <asm-generic/timex.h> >> >> diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c >> index 29c39d5..213d1a3 100644 >> --- a/arch/arm64/kernel/time.c >> +++ b/arch/arm64/kernel/time.c >> @@ -63,14 +63,5 @@ EXPORT_SYMBOL(profile_pc); >> >> void __init time_init(void) >> { >> - u32 arch_timer_rate; >> - >> clocksource_of_init(); >> - >> - arch_timer_rate = arch_timer_get_rate(); >> - if (!arch_timer_rate) >> - panic("Unable to initialise architected timer.\n"); >> - >> - /* Calibrate the delay loop directly */ >> - lpj_fine = arch_timer_rate / HZ; >> } >> diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c >> index dad4ec9..cde0a28 100644 >> --- a/arch/arm64/lib/delay.c >> +++ b/arch/arm64/lib/delay.c >> @@ -24,6 +24,19 @@ >> #include <linux/module.h> >> #include <linux/timex.h> >> >> +static const struct delay_timer *delay_timer; >> +static bool delay_calibrated; >> + >> +int read_current_timer(unsigned long *timer_val) >> +{ >> + if (!delay_timer) >> + return -ENXIO; >> + >> + *timer_val = delay_timer->read_current_timer(); >> + return 0; >> +} >> +EXPORT_SYMBOL(read_current_timer); >> + > This too could be common. > >> void __delay(unsigned long cycles) >> { >> cycles_t start = get_cycles(); >> @@ -53,3 +66,16 @@ void __ndelay(unsigned long nsecs) >> __const_udelay(nsecs * 0x5UL); /* 2**32 / 1000000000 (rounded up) */ >> } >> EXPORT_SYMBOL(__ndelay); >> + >> +void register_current_timer_delay(const struct delay_timer *timer) >> +{ >> + if (!delay_calibrated) { >> + pr_info("Switching to timer-based delay loop\n"); >> + delay_timer = timer; >> + lpj_fine = timer->freq / HZ; >> + >> + delay_calibrated = true; >> + } else { >> + pr_info("Ignoring duplicate/late registration of read_current_timer delay\n"); >> + } >> +} >> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c >> index 57e823c..8ee9918 100644 >> --- a/drivers/clocksource/arm_arch_timer.c >> +++ b/drivers/clocksource/arm_arch_timer.c >> @@ -422,6 +422,16 @@ struct timecounter *arch_timer_get_timecounter(void) >> return &timecounter; >> } >> >> +static struct delay_timer arch_delay_timer; >> + >> +static void __init arch_delay_timer_register(void) >> +{ >> + /* Use the architected timer for the delay loop. */ >> + arch_delay_timer.read_current_timer = arch_timer_read_counter(); >> + arch_delay_timer.freq = arch_timer_rate; >> + register_current_timer_delay(&arch_delay_timer); >> +} >> + >> static void __init arch_counter_register(unsigned type) >> { >> u64 start_count; >> @@ -630,7 +640,7 @@ static void __init arch_timer_common_init(void) >> >> arch_timer_banner(arch_timers_present); >> arch_counter_register(arch_timers_present); >> - arch_timer_arch_init(); >> + arch_delay_timer_register(); >> } >> >> static void __init arch_timer_init(struct device_node *np) >> -- >> 1.7.4.1 >> >> >> _______________________________________________ >> linux-arm-kernel mailing list >> linux-arm-kernel@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
On Wednesday 15 January 2014 09:15 PM, Will Deacon wrote: > Hello, > > On Wed, Jan 15, 2014 at 01:07:07PM +0000, Prashant Gaikwad wrote: >> Now arch timer is registerd using generic sched timer, delay >> timer registration is the only part remaining in arch ports. >> Move this part to drivers clocksource and remove arch timer >> from arch ports. > What's the advantage in doing this? I'd have thought consolidation, but... Primary objective of doing this to use other timers on SoC instead of arch timers. It is observed that on ARM implementations of ARMv8 architecture context of per CPU logic of arch timer is not preserved when CPU is powered down and reset is held during powerdown. This makes arch timers unusable as wake events for some CPU idle states where CPU core is powered down. So we have two options: 1. Use system timers as broadcast timers and save/restore arch timers context actross these CPU idle states. 2. Disable arch timer and use system timers. This patch will help to implement 2nd option. >> Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com> >> --- >> arch/arm/include/asm/arch_timer.h | 1 - >> arch/arm/kernel/Makefile | 1 - >> arch/arm/kernel/arch_timer.c | 44 ---------------------------------- >> arch/arm64/include/asm/arch_timer.h | 5 ---- >> arch/arm64/include/asm/delay.h | 32 ++++++++++++++++++++++++ >> arch/arm64/include/asm/timex.h | 5 +-- >> arch/arm64/kernel/time.c | 9 ------- >> arch/arm64/lib/delay.c | 26 ++++++++++++++++++++ >> drivers/clocksource/arm_arch_timer.c | 12 ++++++++- >> 9 files changed, 71 insertions(+), 64 deletions(-) > ... that's a positive diffstat! I also think that delaying the delay loop > initialisation for arm64 could be problematic, since we don't have anything > to fall back on (like the busy-loop on ARM) in case of early *delay calls. > > What happens if I call udelay on arm64 before the counter has registered? I will fix this in next patch. Do you have plan to add busy-loop for arm64? Or I will have to initialize read counter function early. > Will
On Thu, Jan 16, 2014 at 05:19:04AM +0000, Prashant Gaikwad wrote: > On Wednesday 15 January 2014 09:15 PM, Will Deacon wrote: > > On Wed, Jan 15, 2014 at 01:07:07PM +0000, Prashant Gaikwad wrote: > >> Now arch timer is registerd using generic sched timer, delay > >> timer registration is the only part remaining in arch ports. > >> Move this part to drivers clocksource and remove arch timer > >> from arch ports. > > What's the advantage in doing this? I'd have thought consolidation, but... > > Primary objective of doing this to use other timers on SoC instead of > arch timers. It is observed that on ARM implementations of ARMv8 > architecture context of per CPU logic of arch timer is not preserved > when CPU is powered down and reset is held during powerdown. > > This makes arch timers unusable as wake events for some CPU idle states > where CPU core is powered down. > > So we have two options: > 1. Use system timers as broadcast timers and save/restore arch timers > context actross these CPU idle states. > 2. Disable arch timer and use system timers. Why can't you use the C3STOP feature so that the arch-timer isn't used when you go idle? There are several good reasons to use the architected timers when you can: - KVM requires them, as they have hardware support for virtualisation - The VDSO can make use of them to implement efficient gtod in userspace - They are per-cpu, so no need to broadcasting in software - They can generate events and allow userspace to implement efficient polling loops > > What happens if I call udelay on arm64 before the counter has registered? > > I will fix this in next patch. Do you have plan to add busy-loop for > arm64? Or I will have to initialize read counter function early. Well, I assume the virtual counter (cntvct) still ticks on your system? If so, I would suggest having that as the default delay implementation and allowing it to be over-ridden later on. The part I don't understand is why being `unusable as wake events' affects the delay loop. All we need for that is the counter, the timers are irrelevant. Will
Will Deacon <will.deacon@arm.com> writes: > Why can't you use the C3STOP feature so that the arch-timer isn't used when > you go idle? That would mean falling back to broadcast timer, right? That's not necessarily on the local CPU so wakeups would often wake two CPUs. Does anyone have patches for using a CPU local timer as a fallback for C3STOP timers? --Antti
On 01/17/2014 10:07 AM, Antti Miettinen wrote: > Will Deacon <will.deacon@arm.com> writes: >> Why can't you use the C3STOP feature so that the arch-timer isn't used when >> you go idle? > > That would mean falling back to broadcast timer, right? That's not > necessarily on the local CPU so wakeups would often wake two CPUs. You can prevent that if the hardware supports it with the CLOCK_EVT_DYNIRQ flag on the broadcast timer. > Does > anyone have patches for using a CPU local timer as a fallback for > C3STOP timers?
On Friday 17 January 2014 02:42 PM, Daniel Lezcano wrote: > On 01/17/2014 10:07 AM, Antti Miettinen wrote: >> Will Deacon <will.deacon@arm.com> writes: >>> Why can't you use the C3STOP feature so that the arch-timer isn't used when >>> you go idle? >> That would mean falling back to broadcast timer, right? That's not >> necessarily on the local CPU so wakeups would often wake two CPUs. > You can prevent that if the hardware supports it with the > CLOCK_EVT_DYNIRQ flag on the broadcast timer. Instead of falling back on broadcast timer, is it possible to fall back on other per-CPU timer which is preserved across idle state? >> Does >> anyone have patches for using a CPU local timer as a fallback for >> C3STOP timers? > >
On 01/17/2014 11:11 AM, Prashant Gaikwad wrote: > On Friday 17 January 2014 02:42 PM, Daniel Lezcano wrote: >> On 01/17/2014 10:07 AM, Antti Miettinen wrote: >>> Will Deacon <will.deacon@arm.com> writes: >>>> Why can't you use the C3STOP feature so that the arch-timer isn't >>>> used when >>>> you go idle? >>> That would mean falling back to broadcast timer, right? That's not >>> necessarily on the local CPU so wakeups would often wake two CPUs. >> You can prevent that if the hardware supports it with the >> CLOCK_EVT_DYNIRQ flag on the broadcast timer. > > Instead of falling back on broadcast timer, is it possible to fall back > on other per-CPU timer which is preserved across idle state? Is it what you are looking for ? http://lwn.net/Articles/580568/ >>> Does >>> anyone have patches for using a CPU local timer as a fallback for >>> C3STOP timers? >> >> >
Daniel Lezcano <daniel.lezcano@linaro.org> writes: > On 01/17/2014 10:07 AM, Antti Miettinen wrote: >> Will Deacon <will.deacon@arm.com> writes: >>> Why can't you use the C3STOP feature so that the arch-timer isn't used when >>> you go idle? >> >> That would mean falling back to broadcast timer, right? That's not >> necessarily on the local CPU so wakeups would often wake two CPUs. > > You can prevent that if the hardware supports it with the > CLOCK_EVT_DYNIRQ flag on the broadcast timer. CLOCK_EVT_FEAT_DYNIRQ seems to be the flag. Cool, thanks. --Antti
On Friday 17 January 2014 03:45 PM, Daniel Lezcano wrote: > On 01/17/2014 11:11 AM, Prashant Gaikwad wrote: >> On Friday 17 January 2014 02:42 PM, Daniel Lezcano wrote: >>> On 01/17/2014 10:07 AM, Antti Miettinen wrote: >>>> Will Deacon <will.deacon@arm.com> writes: >>>>> Why can't you use the C3STOP feature so that the arch-timer isn't >>>>> used when >>>>> you go idle? >>>> That would mean falling back to broadcast timer, right? That's not >>>> necessarily on the local CPU so wakeups would often wake two CPUs. >>> You can prevent that if the hardware supports it with the >>> CLOCK_EVT_DYNIRQ flag on the broadcast timer. >> Instead of falling back on broadcast timer, is it possible to fall back >> on other per-CPU timer which is preserved across idle state? > Is it what you are looking for ? > > http://lwn.net/Articles/580568/ > If I understand correctly these patches enables us to use per-CPU timers as broadcast timers. I do not want to use broadcast timer. If I have 2 per-CPU timers T1 and T2, T1 is not preserved across idle state and T2 is preserved. And I want to use T1 as scheduler timer. Can I do following for idle state? Idle entry: clockevents_shutdown(T1); clockevents_set_mode(T2, ONESHOT); clockevents_program_event(T2, next_event); Idle exit: clockevents_shutdown(T2); clockevents_set_mode(T1, ONESHOT); >>>> Does >>>> anyone have patches for using a CPU local timer as a fallback for >>>> C3STOP timers? >>> >
On 01/17/2014 12:37 PM, Prashant Gaikwad wrote: > On Friday 17 January 2014 03:45 PM, Daniel Lezcano wrote: >> On 01/17/2014 11:11 AM, Prashant Gaikwad wrote: >>> On Friday 17 January 2014 02:42 PM, Daniel Lezcano wrote: >>>> On 01/17/2014 10:07 AM, Antti Miettinen wrote: >>>>> Will Deacon <will.deacon@arm.com> writes: >>>>>> Why can't you use the C3STOP feature so that the arch-timer isn't >>>>>> used when >>>>>> you go idle? >>>>> That would mean falling back to broadcast timer, right? That's not >>>>> necessarily on the local CPU so wakeups would often wake two CPUs. >>>> You can prevent that if the hardware supports it with the >>>> CLOCK_EVT_DYNIRQ flag on the broadcast timer. >>> Instead of falling back on broadcast timer, is it possible to fall back >>> on other per-CPU timer which is preserved across idle state? >> Is it what you are looking for ? >> >> http://lwn.net/Articles/580568/ >> > > If I understand correctly these patches enables us to use per-CPU timers > as broadcast timers. I do not want to use broadcast timer. Why ? > If I have 2 per-CPU timers T1 and T2, T1 is not preserved across idle > state and T2 is preserved. And I want to use T1 as scheduler timer. > Can I do following for idle state? > > Idle entry: > clockevents_shutdown(T1); > clockevents_set_mode(T2, ONESHOT); > clockevents_program_event(T2, next_event); > > Idle exit: > clockevents_shutdown(T2); > clockevents_set_mode(T1, ONESHOT); >>>>> Does >>>>> anyone have patches for using a CPU local timer as a fallback for >>>>> C3STOP timers? >>>> >> >
On Friday 17 January 2014 05:38 PM, Daniel Lezcano wrote: > On 01/17/2014 12:37 PM, Prashant Gaikwad wrote: >> On Friday 17 January 2014 03:45 PM, Daniel Lezcano wrote: >>> On 01/17/2014 11:11 AM, Prashant Gaikwad wrote: >>>> On Friday 17 January 2014 02:42 PM, Daniel Lezcano wrote: >>>>> On 01/17/2014 10:07 AM, Antti Miettinen wrote: >>>>>> Will Deacon <will.deacon@arm.com> writes: >>>>>>> Why can't you use the C3STOP feature so that the arch-timer isn't >>>>>>> used when >>>>>>> you go idle? >>>>>> That would mean falling back to broadcast timer, right? That's not >>>>>> necessarily on the local CPU so wakeups would often wake two CPUs. >>>>> You can prevent that if the hardware supports it with the >>>>> CLOCK_EVT_DYNIRQ flag on the broadcast timer. >>>> Instead of falling back on broadcast timer, is it possible to fall back >>>> on other per-CPU timer which is preserved across idle state? >>> Is it what you are looking for ? >>> >>> http://lwn.net/Articles/580568/ >>> >> If I understand correctly these patches enables us to use per-CPU timers >> as broadcast timers. I do not want to use broadcast timer. > Why ? > For some idle states it may be required to change the timer when entering idle state to adjust the exit latency. It can be done for broadcast timer too but following scenario will not work 1. CPU1 enters in idle state: Broadcast timer next event is in 2ms, CPU latency is 50us. So we change the broadcast timer to send event after (2ms - 50us). 2. After 1ms CPU2 enters in idle state: Next event is 5ms. Broadcast timer is already programmed to < (5ms -50us) so we do nothing. 3. CPU1 exits from idle state because of timer interrupt 4. Broadcast event handler: - Timer event is handled and CPU1 is switched back to local timer. - Next CPU is CPU2 and next event for it is 4ms. So brodcast timer is programmed to 4ms. We can not change brodcast timer here to adjust delay caused by CPU exit latency. CPU idle governors does help to solve the latency issue. I was thinking this from sub-states perspective which are not exposed to CPU idle governor. Solution for this could be to expose those states to CPU idle governor but just wanted to know if we can use timers this way? Another requirement: We have 3 timers T1, T2, T3 used as wake events for 3 idle states C1, C2, C3 respectively. Rating of T2 is better than T3. If I register T2 and T3 both as broadcast timers then T3 will not be used. But ... - T2 is not preserved in C3 idle state. - T3 resolution is very poor (ms) and can not be used as wake event for C2. Possible solution, register only T3 as broadcast device and use T2 as per-CPU fallback timer. >> If I have 2 per-CPU timers T1 and T2, T1 is not preserved across idle >> state and T2 is preserved. And I want to use T1 as scheduler timer. >> Can I do following for idle state? >> >> Idle entry: >> clockevents_shutdown(T1); >> clockevents_set_mode(T2, ONESHOT); >> clockevents_program_event(T2, next_event); >> >> Idle exit: >> clockevents_shutdown(T2); >> clockevents_set_mode(T1, ONESHOT); > > > >>>>>> Does >>>>>> anyone have patches for using a CPU local timer as a fallback for >>>>>> C3STOP timers? >
On 01/17/14 05:40, Prashant Gaikwad wrote: > > Another requirement: > > We have 3 timers T1, T2, T3 used as wake events for 3 idle states C1, > C2, C3 respectively. > > Rating of T2 is better than T3. If I register T2 and T3 both as > broadcast timers then T3 will not be used. But ... > - T2 is not preserved in C3 idle state. > - T3 resolution is very poor (ms) and can not be used as wake > event for C2. > > Possible solution, register only T3 as broadcast device and use T2 as > per-CPU fallback timer. We have the same situation on MSM. I've been thinking about proposing we allow multiple broadcast timers to exist in the system and then have the clockevents_notify() caller indicate which C state is being entered. The broadcast timers would need to indicate which C state they don't work in though.
Prashant Gaikwad <pgaikwad@nvidia.com> writes: > For some idle states it may be required to change the timer when > entering idle state to adjust the exit latency. Would it work to set up a separate software timer to account for the exit latency? --Antti
On 01/17/2014 02:40 PM, Prashant Gaikwad wrote: > On Friday 17 January 2014 05:38 PM, Daniel Lezcano wrote: >> On 01/17/2014 12:37 PM, Prashant Gaikwad wrote: >>> On Friday 17 January 2014 03:45 PM, Daniel Lezcano wrote: >>>> On 01/17/2014 11:11 AM, Prashant Gaikwad wrote: >>>>> On Friday 17 January 2014 02:42 PM, Daniel Lezcano wrote: >>>>>> On 01/17/2014 10:07 AM, Antti Miettinen wrote: >>>>>>> Will Deacon <will.deacon@arm.com> writes: >>>>>>>> Why can't you use the C3STOP feature so that the arch-timer isn't >>>>>>>> used when >>>>>>>> you go idle? >>>>>>> That would mean falling back to broadcast timer, right? That's not >>>>>>> necessarily on the local CPU so wakeups would often wake two CPUs. >>>>>> You can prevent that if the hardware supports it with the >>>>>> CLOCK_EVT_DYNIRQ flag on the broadcast timer. >>>>> Instead of falling back on broadcast timer, is it possible to fall >>>>> back >>>>> on other per-CPU timer which is preserved across idle state? >>>> Is it what you are looking for ? >>>> >>>> http://lwn.net/Articles/580568/ >>>> >>> If I understand correctly these patches enables us to use per-CPU timers >>> as broadcast timers. I do not want to use broadcast timer. >> Why ? >> > > For some idle states it may be required to change the timer when > entering idle state to adjust the exit latency. > > It can be done for broadcast timer too but following scenario will not work > > 1. CPU1 enters in idle state: > Broadcast timer next event is in 2ms, CPU latency is 50us. So > we change the broadcast timer to send event after (2ms - 50us). > > 2. After 1ms CPU2 enters in idle state: > Next event is 5ms. Broadcast timer is already programmed to < > (5ms -50us) so we do nothing. > > 3. CPU1 exits from idle state because of timer interrupt > > 4. Broadcast event handler: > - Timer event is handled and CPU1 is switched back to local timer. > - Next CPU is CPU2 and next event for it is 4ms. So brodcast timer > is programmed to 4ms. > > We can not change brodcast timer here to adjust delay caused by CPU exit > latency. Thanks for the detailed explanation. IIUC, this not only related to your hardware only but with how is implemented the broadcast timer, no ? I think there is a similar need with the scheduler when it needs to know what is the idlest cpu. One thing the scheduler wants to know is the wakeup latency in order to choose the cpu in the shallowest state. > CPU idle governors does help to solve the latency issue. I was thinking > this from sub-states perspective which are not exposed to CPU idle > governor. Could you elaborate what you mean by these sub-states ? Is it related to the cpuidle backend drivers choosing an intermediate state different from the one the governor choose ? > Solution for this could be to expose those states to CPU idle governor > but just wanted to know if we can use timers this way IMO, that should be studied in a larger scope including the scheduler. > Another requirement: > > We have 3 timers T1, T2, T3 used as wake events for 3 idle states C1, > C2, C3 respectively. > > Rating of T2 is better than T3. If I register T2 and T3 both as > broadcast timers then T3 will not be used. But ... > - T2 is not preserved in C3 idle state. > - T3 resolution is very poor (ms) and can not be used as wake event > for C2. > > Possible solution, register only T3 as broadcast device and use T2 as > per-CPU fallback timer. > >>> If I have 2 per-CPU timers T1 and T2, T1 is not preserved across idle >>> state and T2 is preserved. And I want to use T1 as scheduler timer. >>> Can I do following for idle state? >>> >>> Idle entry: >>> clockevents_shutdown(T1); >>> clockevents_set_mode(T2, ONESHOT); >>> clockevents_program_event(T2, next_event); >>> >>> Idle exit: >>> clockevents_shutdown(T2); >>> clockevents_set_mode(T1, ONESHOT); See answer to Stephen.
On 01/17/2014 07:36 PM, Stephen Boyd wrote: > On 01/17/14 05:40, Prashant Gaikwad wrote: >> >> Another requirement: >> >> We have 3 timers T1, T2, T3 used as wake events for 3 idle states C1, >> C2, C3 respectively. >> >> Rating of T2 is better than T3. If I register T2 and T3 both as >> broadcast timers then T3 will not be used. But ... >> - T2 is not preserved in C3 idle state. >> - T3 resolution is very poor (ms) and can not be used as wake >> event for C2. >> >> Possible solution, register only T3 as broadcast device and use T2 as >> per-CPU fallback timer. > > We have the same situation on MSM. I've been thinking about proposing we > allow multiple broadcast timers to exist in the system and then have the > clockevents_notify() caller indicate which C state is being entered. The > broadcast timers would need to indicate which C state they don't work in > though. IMO, there are different solutions: 1. extend the C3STOP to C1STOP, C2STOP, etc ... and pass the idle state to the time framework where these flags are checked against. I don't like this approach but it is feasible. 2. use the generic power domain. When the power domain is shutdown via the cpuidle backend driver, it switches the timer.
On Mon, Jan 20, 2014 at 02:42:10PM +0000, Daniel Lezcano wrote: > On 01/17/2014 07:36 PM, Stephen Boyd wrote: > > On 01/17/14 05:40, Prashant Gaikwad wrote: > >> > >> Another requirement: > >> > >> We have 3 timers T1, T2, T3 used as wake events for 3 idle states C1, > >> C2, C3 respectively. > >> > >> Rating of T2 is better than T3. If I register T2 and T3 both as > >> broadcast timers then T3 will not be used. But ... > >> - T2 is not preserved in C3 idle state. > >> - T3 resolution is very poor (ms) and can not be used as wake > >> event for C2. > >> > >> Possible solution, register only T3 as broadcast device and use T2 as > >> per-CPU fallback timer. > > > > We have the same situation on MSM. I've been thinking about proposing we > > allow multiple broadcast timers to exist in the system and then have the > > clockevents_notify() caller indicate which C state is being entered. The > > broadcast timers would need to indicate which C state they don't work in > > though. > > IMO, there are different solutions: > > 1. extend the C3STOP to C1STOP, C2STOP, etc ... and pass the idle state > to the time framework where these flags are checked against. I don't > like this approach but it is feasible. > > 2. use the generic power domain. When the power domain is shutdown via > the cpuidle backend driver, it switches the timer. IMO, 2 is the way forward. It is the only solution that links resources to the reason they need maintainance (ie power management). I am writing v2 of C-state proposal where power domains are explicitly associated with devices (eg arch timers), and 2 fits well with this approach. Lorenzo
On 01/20/2014 03:56 PM, Lorenzo Pieralisi wrote: > On Mon, Jan 20, 2014 at 02:42:10PM +0000, Daniel Lezcano wrote: >> On 01/17/2014 07:36 PM, Stephen Boyd wrote: >>> On 01/17/14 05:40, Prashant Gaikwad wrote: >>>> >>>> Another requirement: >>>> >>>> We have 3 timers T1, T2, T3 used as wake events for 3 idle states C1, >>>> C2, C3 respectively. >>>> >>>> Rating of T2 is better than T3. If I register T2 and T3 both as >>>> broadcast timers then T3 will not be used. But ... >>>> - T2 is not preserved in C3 idle state. >>>> - T3 resolution is very poor (ms) and can not be used as wake >>>> event for C2. >>>> >>>> Possible solution, register only T3 as broadcast device and use T2 as >>>> per-CPU fallback timer. >>> >>> We have the same situation on MSM. I've been thinking about proposing we >>> allow multiple broadcast timers to exist in the system and then have the >>> clockevents_notify() caller indicate which C state is being entered. The >>> broadcast timers would need to indicate which C state they don't work in >>> though. >> >> IMO, there are different solutions: >> >> 1. extend the C3STOP to C1STOP, C2STOP, etc ... and pass the idle state >> to the time framework where these flags are checked against. I don't >> like this approach but it is feasible. >> >> 2. use the generic power domain. When the power domain is shutdown via >> the cpuidle backend driver, it switches the timer. > > IMO, 2 is the way forward. It is the only solution that links resources to > the reason they need maintainance (ie power management). I am writing > v2 of C-state proposal where power domains are explicitly associated with > devices (eg arch timers), and 2 fits well with this approach. Thanks Lorenzo for your feedback. -- Daniel
On Monday 20 January 2014 08:11 PM, Daniel Lezcano wrote: > On 01/17/2014 02:40 PM, Prashant Gaikwad wrote: >> On Friday 17 January 2014 05:38 PM, Daniel Lezcano wrote: >>> On 01/17/2014 12:37 PM, Prashant Gaikwad wrote: >>>> On Friday 17 January 2014 03:45 PM, Daniel Lezcano wrote: >>>>> On 01/17/2014 11:11 AM, Prashant Gaikwad wrote: >>>>>> On Friday 17 January 2014 02:42 PM, Daniel Lezcano wrote: >>>>>>> On 01/17/2014 10:07 AM, Antti Miettinen wrote: >>>>>>>> Will Deacon <will.deacon@arm.com> writes: >>>>>>>>> Why can't you use the C3STOP feature so that the arch-timer isn't >>>>>>>>> used when >>>>>>>>> you go idle? >>>>>>>> That would mean falling back to broadcast timer, right? That's not >>>>>>>> necessarily on the local CPU so wakeups would often wake two CPUs. >>>>>>> You can prevent that if the hardware supports it with the >>>>>>> CLOCK_EVT_DYNIRQ flag on the broadcast timer. >>>>>> Instead of falling back on broadcast timer, is it possible to fall >>>>>> back >>>>>> on other per-CPU timer which is preserved across idle state? >>>>> Is it what you are looking for ? >>>>> >>>>> http://lwn.net/Articles/580568/ >>>>> >>>> If I understand correctly these patches enables us to use per-CPU timers >>>> as broadcast timers. I do not want to use broadcast timer. >>> Why ? >>> >> For some idle states it may be required to change the timer when >> entering idle state to adjust the exit latency. >> >> It can be done for broadcast timer too but following scenario will not work >> >> 1. CPU1 enters in idle state: >> Broadcast timer next event is in 2ms, CPU latency is 50us. So >> we change the broadcast timer to send event after (2ms - 50us). >> >> 2. After 1ms CPU2 enters in idle state: >> Next event is 5ms. Broadcast timer is already programmed to < >> (5ms -50us) so we do nothing. >> >> 3. CPU1 exits from idle state because of timer interrupt >> >> 4. Broadcast event handler: >> - Timer event is handled and CPU1 is switched back to local timer. >> - Next CPU is CPU2 and next event for it is 4ms. So brodcast timer >> is programmed to 4ms. >> >> We can not change brodcast timer here to adjust delay caused by CPU exit >> latency. > Thanks for the detailed explanation. IIUC, this not only related to your > hardware only but with how is implemented the broadcast timer, no ? Yes. > I think there is a similar need with the scheduler when it needs to know > what is the idlest cpu. One thing the scheduler wants to know is the > wakeup latency in order to choose the cpu in the shallowest state. > >> CPU idle governors does help to solve the latency issue. I was thinking >> this from sub-states perspective which are not exposed to CPU idle >> governor. > Could you elaborate what you mean by these sub-states ? Is it related to > the cpuidle backend drivers choosing an intermediate state different > from the one the governor choose ? Yes. >> Solution for this could be to expose those states to CPU idle governor >> but just wanted to know if we can use timers this way > IMO, that should be studied in a larger scope including the scheduler. > Is this about the per-core timer switching as proposed below ... Idle entry: clockevents_shutdown(T1); clockevents_set_mode(T2, ONESHOT); clockevents_program_event(T2, next_event - latency); Idle exit: clockevents_shutdown(T2); clockevents_set_mode(T1, ONESHOT); ... or about overall approach for this requirement? >> Another requirement: >> >> We have 3 timers T1, T2, T3 used as wake events for 3 idle states C1, >> C2, C3 respectively. >> >> Rating of T2 is better than T3. If I register T2 and T3 both as >> broadcast timers then T3 will not be used. But ... >> - T2 is not preserved in C3 idle state. >> - T3 resolution is very poor (ms) and can not be used as wake event >> for C2. >> >> Possible solution, register only T3 as broadcast device and use T2 as >> per-CPU fallback timer. >> >>>> If I have 2 per-CPU timers T1 and T2, T1 is not preserved across idle >>>> state and T2 is preserved. And I want to use T1 as scheduler timer. >>>> Can I do following for idle state? >>>> >>>> Idle entry: >>>> clockevents_shutdown(T1); >>>> clockevents_set_mode(T2, ONESHOT); >>>> clockevents_program_event(T2, next_event); >>>> >>>> Idle exit: >>>> clockevents_shutdown(T2); >>>> clockevents_set_mode(T1, ONESHOT); > See answer to Stephen. > >
On Monday 20 January 2014 08:12 PM, Daniel Lezcano wrote: > On 01/17/2014 07:36 PM, Stephen Boyd wrote: >> On 01/17/14 05:40, Prashant Gaikwad wrote: >>> Another requirement: >>> >>> We have 3 timers T1, T2, T3 used as wake events for 3 idle states C1, >>> C2, C3 respectively. >>> >>> Rating of T2 is better than T3. If I register T2 and T3 both as >>> broadcast timers then T3 will not be used. But ... >>> - T2 is not preserved in C3 idle state. >>> - T3 resolution is very poor (ms) and can not be used as wake >>> event for C2. >>> >>> Possible solution, register only T3 as broadcast device and use T2 as >>> per-CPU fallback timer. >> We have the same situation on MSM. I've been thinking about proposing we >> allow multiple broadcast timers to exist in the system and then have the >> clockevents_notify() caller indicate which C state is being entered. The >> broadcast timers would need to indicate which C state they don't work in >> though. > IMO, there are different solutions: > > 1. extend the C3STOP to C1STOP, C2STOP, etc ... and pass the idle state > to the time framework where these flags are checked against. I don't > like this approach but it is feasible. > > 2. use the generic power domain. When the power domain is shutdown via > the cpuidle backend driver, it switches the timer. I am aware of a way to attach idle state to GenPD where we enable an idle state when that power domain is turned off but not the other way where domain is shutdown via CPU idle driver. How do we do it? Even though we shutdown power domain via cpuidle driver this still has to happen from CPU idle state, is that correct assumption? and we switch the timer here. So we still need a way to switch timer from CPU idle state. Hence the question remains is how to switch timers from idle state? > >
On 01/21/2014 09:10 AM, Prashant Gaikwad wrote: > On Monday 20 January 2014 08:11 PM, Daniel Lezcano wrote: >> On 01/17/2014 02:40 PM, Prashant Gaikwad wrote: >>> On Friday 17 January 2014 05:38 PM, Daniel Lezcano wrote: >>>> On 01/17/2014 12:37 PM, Prashant Gaikwad wrote: >>>>> On Friday 17 January 2014 03:45 PM, Daniel Lezcano wrote: >>>>>> On 01/17/2014 11:11 AM, Prashant Gaikwad wrote: >>>>>>> On Friday 17 January 2014 02:42 PM, Daniel Lezcano wrote: >>>>>>>> On 01/17/2014 10:07 AM, Antti Miettinen wrote: >>>>>>>>> Will Deacon <will.deacon@arm.com> writes: >>>>>>>>>> Why can't you use the C3STOP feature so that the arch-timer isn't >>>>>>>>>> used when >>>>>>>>>> you go idle? >>>>>>>>> That would mean falling back to broadcast timer, right? That's not >>>>>>>>> necessarily on the local CPU so wakeups would often wake two CPUs. >>>>>>>> You can prevent that if the hardware supports it with the >>>>>>>> CLOCK_EVT_DYNIRQ flag on the broadcast timer. >>>>>>> Instead of falling back on broadcast timer, is it possible to fall >>>>>>> back >>>>>>> on other per-CPU timer which is preserved across idle state? >>>>>> Is it what you are looking for ? >>>>>> >>>>>> http://lwn.net/Articles/580568/ >>>>>> >>>>> If I understand correctly these patches enables us to use per-CPU >>>>> timers >>>>> as broadcast timers. I do not want to use broadcast timer. >>>> Why ? >>>> >>> For some idle states it may be required to change the timer when >>> entering idle state to adjust the exit latency. >>> >>> It can be done for broadcast timer too but following scenario will >>> not work >>> >>> 1. CPU1 enters in idle state: >>> Broadcast timer next event is in 2ms, CPU latency is 50us. So >>> we change the broadcast timer to send event after (2ms - 50us). >>> >>> 2. After 1ms CPU2 enters in idle state: >>> Next event is 5ms. Broadcast timer is already programmed to < >>> (5ms -50us) so we do nothing. >>> >>> 3. CPU1 exits from idle state because of timer interrupt >>> >>> 4. Broadcast event handler: >>> - Timer event is handled and CPU1 is switched back to local timer. >>> - Next CPU is CPU2 and next event for it is 4ms. So brodcast timer >>> is programmed to 4ms. >>> >>> We can not change brodcast timer here to adjust delay caused by CPU exit >>> latency. >> Thanks for the detailed explanation. IIUC, this not only related to your >> hardware only but with how is implemented the broadcast timer, no ? > > Yes. Hmm, interesting. > >> I think there is a similar need with the scheduler when it needs to know >> what is the idlest cpu. One thing the scheduler wants to know is the >> wakeup latency in order to choose the cpu in the shallowest state. >> >>> CPU idle governors does help to solve the latency issue. I was thinking >>> this from sub-states perspective which are not exposed to CPU idle >>> governor. >> Could you elaborate what you mean by these sub-states ? Is it related to >> the cpuidle backend drivers choosing an intermediate state different >> from the one the governor choose ? > > Yes. Ok. Is there any tool to measure how the timers are far from the expected expiration time ? It would be interesting to do some measurements on this with and without cpuidle. That would help to check the future improvements. >>> Solution for this could be to expose those states to CPU idle governor >>> but just wanted to know if we can use timers this way >> IMO, that should be studied in a larger scope including the scheduler. >> > > Is this about the per-core timer switching as proposed below ... > > Idle entry: > clockevents_shutdown(T1); > clockevents_set_mode(T2, ONESHOT); > clockevents_program_event(T2, next_event - latency); > > Idle exit: > clockevents_shutdown(T2); > clockevents_set_mode(T1, ONESHOT); > > ... or about overall approach for this requirement? It is about the overall approach.
On 01/21/2014 09:20 AM, Prashant Gaikwad wrote: > On Monday 20 January 2014 08:12 PM, Daniel Lezcano wrote: >> On 01/17/2014 07:36 PM, Stephen Boyd wrote: >>> On 01/17/14 05:40, Prashant Gaikwad wrote: >>>> Another requirement: >>>> >>>> We have 3 timers T1, T2, T3 used as wake events for 3 idle states C1, >>>> C2, C3 respectively. >>>> >>>> Rating of T2 is better than T3. If I register T2 and T3 both as >>>> broadcast timers then T3 will not be used. But ... >>>> - T2 is not preserved in C3 idle state. >>>> - T3 resolution is very poor (ms) and can not be used as wake >>>> event for C2. >>>> >>>> Possible solution, register only T3 as broadcast device and use T2 as >>>> per-CPU fallback timer. >>> We have the same situation on MSM. I've been thinking about proposing we >>> allow multiple broadcast timers to exist in the system and then have the >>> clockevents_notify() caller indicate which C state is being entered. The >>> broadcast timers would need to indicate which C state they don't work in >>> though. >> IMO, there are different solutions: >> >> 1. extend the C3STOP to C1STOP, C2STOP, etc ... and pass the idle state >> to the time framework where these flags are checked against. I don't >> like this approach but it is feasible. >> >> 2. use the generic power domain. When the power domain is shutdown via >> the cpuidle backend driver, it switches the timer. > > I am aware of a way to attach idle state to GenPD where we enable an > idle state when that power domain is turned off but not the other way > where domain is shutdown via CPU idle driver. How do we do it? > > Even though we shutdown power domain via cpuidle driver this still has > to happen from CPU idle state, is that correct assumption? and we switch > the timer here. So we still need a way to switch timer from CPU idle > state. Hence the question remains is how to switch timers from idle state? You can effectively attach a power domain to a cpuidle state but that wasn't the point. What I meant is to create a generic power domain which maps the power domain of the idle state. When the power domain is shutdown, the callback of the genpd will switch to the timer. I can't give too much details because I am not used to this code but maybe it is a good solution for your specific case.
On Tuesday 21 January 2014 02:10 PM, Daniel Lezcano wrote: > On 01/21/2014 09:20 AM, Prashant Gaikwad wrote: >> On Monday 20 January 2014 08:12 PM, Daniel Lezcano wrote: >>> On 01/17/2014 07:36 PM, Stephen Boyd wrote: >>>> On 01/17/14 05:40, Prashant Gaikwad wrote: >>>>> Another requirement: >>>>> >>>>> We have 3 timers T1, T2, T3 used as wake events for 3 idle states C1, >>>>> C2, C3 respectively. >>>>> >>>>> Rating of T2 is better than T3. If I register T2 and T3 both as >>>>> broadcast timers then T3 will not be used. But ... >>>>> - T2 is not preserved in C3 idle state. >>>>> - T3 resolution is very poor (ms) and can not be used as wake >>>>> event for C2. >>>>> >>>>> Possible solution, register only T3 as broadcast device and use T2 as >>>>> per-CPU fallback timer. >>>> We have the same situation on MSM. I've been thinking about proposing we >>>> allow multiple broadcast timers to exist in the system and then have the >>>> clockevents_notify() caller indicate which C state is being entered. The >>>> broadcast timers would need to indicate which C state they don't work in >>>> though. >>> IMO, there are different solutions: >>> >>> 1. extend the C3STOP to C1STOP, C2STOP, etc ... and pass the idle state >>> to the time framework where these flags are checked against. I don't >>> like this approach but it is feasible. >>> >>> 2. use the generic power domain. When the power domain is shutdown via >>> the cpuidle backend driver, it switches the timer. >> I am aware of a way to attach idle state to GenPD where we enable an >> idle state when that power domain is turned off but not the other way >> where domain is shutdown via CPU idle driver. How do we do it? >> >> Even though we shutdown power domain via cpuidle driver this still has >> to happen from CPU idle state, is that correct assumption? and we switch >> the timer here. So we still need a way to switch timer from CPU idle >> state. Hence the question remains is how to switch timers from idle state? > You can effectively attach a power domain to a cpuidle state but that > wasn't the point. > > What I meant is to create a generic power domain which maps the power > domain of the idle state. When the power domain is shutdown, the > callback of the genpd will switch to the timer. > > I can't give too much details because I am not used to this code but > maybe it is a good solution for your specific case. > Somehow this is not mapping to my use case. We are using generic power domains with CPU idle states.
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h index 0704e0c..61ad692 100644 --- a/arch/arm/include/asm/arch_timer.h +++ b/arch/arm/include/asm/arch_timer.h @@ -10,7 +10,6 @@ #include <clocksource/arm_arch_timer.h> #ifdef CONFIG_ARM_ARCH_TIMER -int arch_timer_arch_init(void); /* * These register accessors are marked inline so the compiler can diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index a30fc9b..6b51cf9 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -45,7 +45,6 @@ obj-$(CONFIG_SMP) += smp_tlb.o endif obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o -obj-$(CONFIG_ARM_ARCH_TIMER) += arch_timer.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c deleted file mode 100644 index 1791f12..0000000 --- a/arch/arm/kernel/arch_timer.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * linux/arch/arm/kernel/arch_timer.c - * - * Copyright (C) 2011 ARM Ltd. - * All Rights Reserved - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/init.h> -#include <linux/types.h> -#include <linux/errno.h> - -#include <asm/delay.h> - -#include <clocksource/arm_arch_timer.h> - -static unsigned long arch_timer_read_counter_long(void) -{ - return arch_timer_read_counter(); -} - -static struct delay_timer arch_delay_timer; - -static void __init arch_timer_delay_timer_register(void) -{ - /* Use the architected timer for the delay loop. */ - arch_delay_timer.read_current_timer = arch_timer_read_counter_long; - arch_delay_timer.freq = arch_timer_get_rate(); - register_current_timer_delay(&arch_delay_timer); -} - -int __init arch_timer_arch_init(void) -{ - u32 arch_timer_rate = arch_timer_get_rate(); - - if (arch_timer_rate == 0) - return -ENXIO; - - arch_timer_delay_timer_register(); - - return 0; -} diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 9400596..48e06bd 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -145,9 +145,4 @@ static inline u64 arch_counter_get_cntvct(void) return cval; } -static inline int arch_timer_arch_init(void) -{ - return 0; -} - #endif diff --git a/arch/arm64/include/asm/delay.h b/arch/arm64/include/asm/delay.h new file mode 100644 index 0000000..ea90d99 --- /dev/null +++ b/arch/arm64/include/asm/delay.h @@ -0,0 +1,32 @@ + +/* + * Copyright (c) 2014, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ASM_ARM64_DELAY_H +#define __ASM_ARM64_DELAY_H + +#include <asm-generic/delay.h> + +struct delay_timer { + unsigned long (*read_current_timer)(void); + unsigned long freq; +}; + +/* Delay-loop timer registration. */ +#define ARCH_HAS_READ_CURRENT_TIMER +extern void register_current_timer_delay(const struct delay_timer *timer); + +#endif /* defined(_ARM64_DELAY_H) */ diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h index 81a076e..ca4bdfb 100644 --- a/arch/arm64/include/asm/timex.h +++ b/arch/arm64/include/asm/timex.h @@ -16,13 +16,12 @@ #ifndef __ASM_TIMEX_H #define __ASM_TIMEX_H -#include <asm/arch_timer.h> - /* * Use the current timer as a cycle counter since this is what we use for * the delay loop. */ -#define get_cycles() arch_counter_get_cntvct() +typedef unsigned long cycles_t; +#define get_cycles() ({ cycles_t c; read_current_timer(&c) ? 0 : c; }) #include <asm-generic/timex.h> diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 29c39d5..213d1a3 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -63,14 +63,5 @@ EXPORT_SYMBOL(profile_pc); void __init time_init(void) { - u32 arch_timer_rate; - clocksource_of_init(); - - arch_timer_rate = arch_timer_get_rate(); - if (!arch_timer_rate) - panic("Unable to initialise architected timer.\n"); - - /* Calibrate the delay loop directly */ - lpj_fine = arch_timer_rate / HZ; } diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c index dad4ec9..cde0a28 100644 --- a/arch/arm64/lib/delay.c +++ b/arch/arm64/lib/delay.c @@ -24,6 +24,19 @@ #include <linux/module.h> #include <linux/timex.h> +static const struct delay_timer *delay_timer; +static bool delay_calibrated; + +int read_current_timer(unsigned long *timer_val) +{ + if (!delay_timer) + return -ENXIO; + + *timer_val = delay_timer->read_current_timer(); + return 0; +} +EXPORT_SYMBOL(read_current_timer); + void __delay(unsigned long cycles) { cycles_t start = get_cycles(); @@ -53,3 +66,16 @@ void __ndelay(unsigned long nsecs) __const_udelay(nsecs * 0x5UL); /* 2**32 / 1000000000 (rounded up) */ } EXPORT_SYMBOL(__ndelay); + +void register_current_timer_delay(const struct delay_timer *timer) +{ + if (!delay_calibrated) { + pr_info("Switching to timer-based delay loop\n"); + delay_timer = timer; + lpj_fine = timer->freq / HZ; + + delay_calibrated = true; + } else { + pr_info("Ignoring duplicate/late registration of read_current_timer delay\n"); + } +} diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 57e823c..8ee9918 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -422,6 +422,16 @@ struct timecounter *arch_timer_get_timecounter(void) return &timecounter; } +static struct delay_timer arch_delay_timer; + +static void __init arch_delay_timer_register(void) +{ + /* Use the architected timer for the delay loop. */ + arch_delay_timer.read_current_timer = arch_timer_read_counter(); + arch_delay_timer.freq = arch_timer_rate; + register_current_timer_delay(&arch_delay_timer); +} + static void __init arch_counter_register(unsigned type) { u64 start_count; @@ -630,7 +640,7 @@ static void __init arch_timer_common_init(void) arch_timer_banner(arch_timers_present); arch_counter_register(arch_timers_present); - arch_timer_arch_init(); + arch_delay_timer_register(); } static void __init arch_timer_init(struct device_node *np)
Now arch timer is registerd using generic sched timer, delay timer registration is the only part remaining in arch ports. Move this part to drivers clocksource and remove arch timer from arch ports. Signed-off-by: Prashant Gaikwad <pgaikwad@nvidia.com> --- arch/arm/include/asm/arch_timer.h | 1 - arch/arm/kernel/Makefile | 1 - arch/arm/kernel/arch_timer.c | 44 ---------------------------------- arch/arm64/include/asm/arch_timer.h | 5 ---- arch/arm64/include/asm/delay.h | 32 ++++++++++++++++++++++++ arch/arm64/include/asm/timex.h | 5 +-- arch/arm64/kernel/time.c | 9 ------- arch/arm64/lib/delay.c | 26 ++++++++++++++++++++ drivers/clocksource/arm_arch_timer.c | 12 ++++++++- 9 files changed, 71 insertions(+), 64 deletions(-) delete mode 100644 arch/arm/kernel/arch_timer.c create mode 100644 arch/arm64/include/asm/delay.h