Message ID | 20191203041440.6275-4-andrew@aj.id.au (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Expose GT CNTFRQ as a CPU property to support AST2600 | expand |
On 12/3/19 5:14 AM, Andrew Jeffery wrote: > The ASPEED AST2600 clocks the generic timer at the rate of HPLL. On > recent firmwares this is at 1125MHz, which is considerably quicker than > the assumed 62.5MHz of the current generic timer implementation. The > delta between the value as read from CNTFRQ and the true rate of the > underlying QEMUTimer leads to sticky behaviour in AST2600 guests. > > Add a feature-gated property exposing CNTFRQ for ARM CPUs providing the > generic timer. This allows platforms to configure CNTFRQ (and the > associated QEMUTimer) to the appropriate frequency prior to starting the > guest. > > As the platform can now determine the rate of CNTFRQ we're exposed to > limitations of QEMUTimer that didn't previously materialise: In the > course of emulation we need to arbitrarily and accurately convert > between guest ticks and time, but we're constrained by QEMUTimer's use > of an integer scaling factor. The effect is QEMUTimer cannot exactly > capture the period of frequencies that do not cleanly divide > NANOSECONDS_PER_SECOND for scaling ticks to time. As such, provide an > equally inaccurate scaling factor for scaling time to ticks so at least > a self-consistent inverse relationship holds. > > Signed-off-by: Andrew Jeffery <andrew@aj.id.au> > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/cpu.c | 43 +++++++++++++++++++++++++++++++++---------- > target/arm/cpu.h | 18 ++++++++++++++++++ > target/arm/helper.c | 9 ++++++++- > 3 files changed, 59 insertions(+), 11 deletions(-) > > diff --git a/target/arm/cpu.c b/target/arm/cpu.c > index 5698a74061bb..f186019a77fd 100644 > --- a/target/arm/cpu.c > +++ b/target/arm/cpu.c > @@ -974,10 +974,12 @@ static void arm_cpu_initfn(Object *obj) > if (tcg_enabled()) { > cpu->psci_version = 2; /* TCG implements PSCI 0.2 */ > } > - > - cpu->gt_cntfrq = NANOSECONDS_PER_SECOND / GTIMER_SCALE; > } > > +static Property arm_cpu_gt_cntfrq_property = > + DEFINE_PROP_UINT64("cntfrq", ARMCPU, gt_cntfrq, > + NANOSECONDS_PER_SECOND / GTIMER_SCALE); > + > static Property arm_cpu_reset_cbar_property = > DEFINE_PROP_UINT64("reset-cbar", ARMCPU, reset_cbar, 0); > > @@ -1174,6 +1176,11 @@ void arm_cpu_post_init(Object *obj) > > qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property, > &error_abort); > + > + if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) { > + qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property, > + &error_abort); > + } > } > > static void arm_cpu_finalizefn(Object *obj) > @@ -1253,14 +1260,30 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) > } > } > > - cpu->gt_timer[GTIMER_PHYS] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE, > - arm_gt_ptimer_cb, cpu); > - cpu->gt_timer[GTIMER_VIRT] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE, > - arm_gt_vtimer_cb, cpu); > - cpu->gt_timer[GTIMER_HYP] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE, > - arm_gt_htimer_cb, cpu); > - cpu->gt_timer[GTIMER_SEC] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE, > - arm_gt_stimer_cb, cpu); > + > + { > + uint64_t scale; Apparently you have to use this odd indent due to the '#ifndef CONFIG_USER_ONLY'. Well, acceptable. > + > + if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) { > + if (!cpu->gt_cntfrq) { > + error_setg(errp, "Invalid CNTFRQ: %"PRId64"Hz", > + cpu->gt_cntfrq); > + return; > + } > + scale = gt_cntfrq_period_ns(cpu); > + } else { > + scale = GTIMER_SCALE; > + } > + > + cpu->gt_timer[GTIMER_PHYS] = timer_new(QEMU_CLOCK_VIRTUAL, scale, > + arm_gt_ptimer_cb, cpu); > + cpu->gt_timer[GTIMER_VIRT] = timer_new(QEMU_CLOCK_VIRTUAL, scale, > + arm_gt_vtimer_cb, cpu); > + cpu->gt_timer[GTIMER_HYP] = timer_new(QEMU_CLOCK_VIRTUAL, scale, > + arm_gt_htimer_cb, cpu); > + cpu->gt_timer[GTIMER_SEC] = timer_new(QEMU_CLOCK_VIRTUAL, scale, > + arm_gt_stimer_cb, cpu); > + } > #endif > > cpu_exec_realizefn(cs, &local_err); > diff --git a/target/arm/cpu.h b/target/arm/cpu.h > index 666c03871fdf..0bcd13dcac81 100644 > --- a/target/arm/cpu.h > +++ b/target/arm/cpu.h > @@ -939,6 +939,24 @@ struct ARMCPU { > > static inline unsigned int gt_cntfrq_period_ns(ARMCPU *cpu) > { > + /* > + * The exact approach to calculating guest ticks is: > + * > + * muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), cpu->gt_cntfrq, > + * NANOSECONDS_PER_SECOND); > + * > + * We don't do that. Rather we intentionally use integer division > + * truncation below and in the caller for the conversion of host monotonic > + * time to guest ticks to provide the exact inverse for the semantics of > + * the QEMUTimer scale factor. QEMUTimer's scale facter is an integer, so > + * it loses precision when representing frequencies where > + * `(NANOSECONDS_PER_SECOND % cpu->gt_cntfrq) > 0` holds. Failing to > + * provide an exact inverse leads to scheduling timers with negative > + * periods, which in turn leads to sticky behaviour in the guest. > + * > + * Finally, CNTFRQ is effectively capped at 1GHz to ensure our scale factor > + * cannot become zero. > + */ This comment belong to the previous patch. I'd rather see this function + big comment in target/arm/cpu.c. With comment moved (and if possible function uninlined): Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> > /* XXX: Could include qemu/timer.h to get NANOSECONDS_PER_SECOND? */ > const unsigned int ns_per_s = 1000 * 1000 * 1000; > return ns_per_s > cpu->gt_cntfrq ? ns_per_s / cpu->gt_cntfrq : 1; > diff --git a/target/arm/helper.c b/target/arm/helper.c > index 2622a9a8d02f..da960d17040b 100644 > --- a/target/arm/helper.c > +++ b/target/arm/helper.c > @@ -2683,6 +2683,13 @@ void arm_gt_stimer_cb(void *opaque) > gt_recalc_timer(cpu, GTIMER_SEC); > } > > +static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *opaque) > +{ > + ARMCPU *cpu = env_archcpu(env); > + > + cpu->env.cp15.c14_cntfrq = cpu->gt_cntfrq; > +} > + > static const ARMCPRegInfo generic_timer_cp_reginfo[] = { > /* Note that CNTFRQ is purely reads-as-written for the benefit > * of software; writing it doesn't actually change the timer frequency. > @@ -2697,7 +2704,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { > .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0, > .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access, > .fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq), > - .resetvalue = (1000 * 1000 * 1000) / GTIMER_SCALE, > + .resetfn = arm_gt_cntfrq_reset, > }, > /* overall control: mostly access permissions */ > { .name = "CNTKCTL", .state = ARM_CP_STATE_BOTH, >
On Tue, 3 Dec 2019, at 16:49, Philippe Mathieu-Daudé wrote: > On 12/3/19 5:14 AM, Andrew Jeffery wrote: > > The ASPEED AST2600 clocks the generic timer at the rate of HPLL. On > > recent firmwares this is at 1125MHz, which is considerably quicker than > > the assumed 62.5MHz of the current generic timer implementation. The > > delta between the value as read from CNTFRQ and the true rate of the > > underlying QEMUTimer leads to sticky behaviour in AST2600 guests. > > > > Add a feature-gated property exposing CNTFRQ for ARM CPUs providing the > > generic timer. This allows platforms to configure CNTFRQ (and the > > associated QEMUTimer) to the appropriate frequency prior to starting the > > guest. > > > > As the platform can now determine the rate of CNTFRQ we're exposed to > > limitations of QEMUTimer that didn't previously materialise: In the > > course of emulation we need to arbitrarily and accurately convert > > between guest ticks and time, but we're constrained by QEMUTimer's use > > of an integer scaling factor. The effect is QEMUTimer cannot exactly > > capture the period of frequencies that do not cleanly divide > > NANOSECONDS_PER_SECOND for scaling ticks to time. As such, provide an > > equally inaccurate scaling factor for scaling time to ticks so at least > > a self-consistent inverse relationship holds. > > > > Signed-off-by: Andrew Jeffery <andrew@aj.id.au> > > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> > > --- > > target/arm/cpu.c | 43 +++++++++++++++++++++++++++++++++---------- > > target/arm/cpu.h | 18 ++++++++++++++++++ > > target/arm/helper.c | 9 ++++++++- > > 3 files changed, 59 insertions(+), 11 deletions(-) > > > > diff --git a/target/arm/cpu.c b/target/arm/cpu.c > > index 5698a74061bb..f186019a77fd 100644 > > --- a/target/arm/cpu.c > > +++ b/target/arm/cpu.c > > @@ -974,10 +974,12 @@ static void arm_cpu_initfn(Object *obj) > > if (tcg_enabled()) { > > cpu->psci_version = 2; /* TCG implements PSCI 0.2 */ > > } > > - > > - cpu->gt_cntfrq = NANOSECONDS_PER_SECOND / GTIMER_SCALE; > > } > > > > +static Property arm_cpu_gt_cntfrq_property = > > + DEFINE_PROP_UINT64("cntfrq", ARMCPU, gt_cntfrq, > > + NANOSECONDS_PER_SECOND / GTIMER_SCALE); > > + > > static Property arm_cpu_reset_cbar_property = > > DEFINE_PROP_UINT64("reset-cbar", ARMCPU, reset_cbar, 0); > > > > @@ -1174,6 +1176,11 @@ void arm_cpu_post_init(Object *obj) > > > > qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property, > > &error_abort); > > + > > + if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) { > > + qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property, > > + &error_abort); > > + } > > } > > > > static void arm_cpu_finalizefn(Object *obj) > > @@ -1253,14 +1260,30 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) > > } > > } > > > > - cpu->gt_timer[GTIMER_PHYS] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE, > > - arm_gt_ptimer_cb, cpu); > > - cpu->gt_timer[GTIMER_VIRT] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE, > > - arm_gt_vtimer_cb, cpu); > > - cpu->gt_timer[GTIMER_HYP] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE, > > - arm_gt_htimer_cb, cpu); > > - cpu->gt_timer[GTIMER_SEC] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE, > > - arm_gt_stimer_cb, cpu); > > + > > + { > > + uint64_t scale; > > Apparently you have to use this odd indent due to the '#ifndef > CONFIG_USER_ONLY'. Well, acceptable. It's the indent associated with the block scope for the scale variable to limit its lifetime to where I needed it. > > > + > > + if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) { > > + if (!cpu->gt_cntfrq) { > > + error_setg(errp, "Invalid CNTFRQ: %"PRId64"Hz", > > + cpu->gt_cntfrq); > > + return; > > + } > > + scale = gt_cntfrq_period_ns(cpu); > > + } else { > > + scale = GTIMER_SCALE; > > + } > > + > > + cpu->gt_timer[GTIMER_PHYS] = timer_new(QEMU_CLOCK_VIRTUAL, scale, > > + arm_gt_ptimer_cb, cpu); > > + cpu->gt_timer[GTIMER_VIRT] = timer_new(QEMU_CLOCK_VIRTUAL, scale, > > + arm_gt_vtimer_cb, cpu); > > + cpu->gt_timer[GTIMER_HYP] = timer_new(QEMU_CLOCK_VIRTUAL, scale, > > + arm_gt_htimer_cb, cpu); > > + cpu->gt_timer[GTIMER_SEC] = timer_new(QEMU_CLOCK_VIRTUAL, scale, > > + arm_gt_stimer_cb, cpu); > > + } > > #endif > > > > cpu_exec_realizefn(cs, &local_err); > > diff --git a/target/arm/cpu.h b/target/arm/cpu.h > > index 666c03871fdf..0bcd13dcac81 100644 > > --- a/target/arm/cpu.h > > +++ b/target/arm/cpu.h > > @@ -939,6 +939,24 @@ struct ARMCPU { > > > > static inline unsigned int gt_cntfrq_period_ns(ARMCPU *cpu) > > { > > + /* > > + * The exact approach to calculating guest ticks is: > > + * > > + * muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), cpu->gt_cntfrq, > > + * NANOSECONDS_PER_SECOND); > > + * > > + * We don't do that. Rather we intentionally use integer division > > + * truncation below and in the caller for the conversion of host monotonic > > + * time to guest ticks to provide the exact inverse for the semantics of > > + * the QEMUTimer scale factor. QEMUTimer's scale facter is an integer, so > > + * it loses precision when representing frequencies where > > + * `(NANOSECONDS_PER_SECOND % cpu->gt_cntfrq) > 0` holds. Failing to > > + * provide an exact inverse leads to scheduling timers with negative > > + * periods, which in turn leads to sticky behaviour in the guest. > > + * > > + * Finally, CNTFRQ is effectively capped at 1GHz to ensure our scale factor > > + * cannot become zero. > > + */ > > This comment belong to the previous patch. Sort of, but also sort of not. We don't expose the limitation until this patch as NANOSECONDS_PER_SECOND is an integer multiple of GTIMER_SCALE, which is what gt_cntfrq is set to until we add the property to configure it to arbitrary values in this patch. So I added the comment in this patch rather than the previous one which adds the code. Andrew
diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 5698a74061bb..f186019a77fd 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -974,10 +974,12 @@ static void arm_cpu_initfn(Object *obj) if (tcg_enabled()) { cpu->psci_version = 2; /* TCG implements PSCI 0.2 */ } - - cpu->gt_cntfrq = NANOSECONDS_PER_SECOND / GTIMER_SCALE; } +static Property arm_cpu_gt_cntfrq_property = + DEFINE_PROP_UINT64("cntfrq", ARMCPU, gt_cntfrq, + NANOSECONDS_PER_SECOND / GTIMER_SCALE); + static Property arm_cpu_reset_cbar_property = DEFINE_PROP_UINT64("reset-cbar", ARMCPU, reset_cbar, 0); @@ -1174,6 +1176,11 @@ void arm_cpu_post_init(Object *obj) qdev_property_add_static(DEVICE(obj), &arm_cpu_cfgend_property, &error_abort); + + if (arm_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER)) { + qdev_property_add_static(DEVICE(cpu), &arm_cpu_gt_cntfrq_property, + &error_abort); + } } static void arm_cpu_finalizefn(Object *obj) @@ -1253,14 +1260,30 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } } - cpu->gt_timer[GTIMER_PHYS] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE, - arm_gt_ptimer_cb, cpu); - cpu->gt_timer[GTIMER_VIRT] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE, - arm_gt_vtimer_cb, cpu); - cpu->gt_timer[GTIMER_HYP] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE, - arm_gt_htimer_cb, cpu); - cpu->gt_timer[GTIMER_SEC] = timer_new(QEMU_CLOCK_VIRTUAL, GTIMER_SCALE, - arm_gt_stimer_cb, cpu); + + { + uint64_t scale; + + if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) { + if (!cpu->gt_cntfrq) { + error_setg(errp, "Invalid CNTFRQ: %"PRId64"Hz", + cpu->gt_cntfrq); + return; + } + scale = gt_cntfrq_period_ns(cpu); + } else { + scale = GTIMER_SCALE; + } + + cpu->gt_timer[GTIMER_PHYS] = timer_new(QEMU_CLOCK_VIRTUAL, scale, + arm_gt_ptimer_cb, cpu); + cpu->gt_timer[GTIMER_VIRT] = timer_new(QEMU_CLOCK_VIRTUAL, scale, + arm_gt_vtimer_cb, cpu); + cpu->gt_timer[GTIMER_HYP] = timer_new(QEMU_CLOCK_VIRTUAL, scale, + arm_gt_htimer_cb, cpu); + cpu->gt_timer[GTIMER_SEC] = timer_new(QEMU_CLOCK_VIRTUAL, scale, + arm_gt_stimer_cb, cpu); + } #endif cpu_exec_realizefn(cs, &local_err); diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 666c03871fdf..0bcd13dcac81 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -939,6 +939,24 @@ struct ARMCPU { static inline unsigned int gt_cntfrq_period_ns(ARMCPU *cpu) { + /* + * The exact approach to calculating guest ticks is: + * + * muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), cpu->gt_cntfrq, + * NANOSECONDS_PER_SECOND); + * + * We don't do that. Rather we intentionally use integer division + * truncation below and in the caller for the conversion of host monotonic + * time to guest ticks to provide the exact inverse for the semantics of + * the QEMUTimer scale factor. QEMUTimer's scale facter is an integer, so + * it loses precision when representing frequencies where + * `(NANOSECONDS_PER_SECOND % cpu->gt_cntfrq) > 0` holds. Failing to + * provide an exact inverse leads to scheduling timers with negative + * periods, which in turn leads to sticky behaviour in the guest. + * + * Finally, CNTFRQ is effectively capped at 1GHz to ensure our scale factor + * cannot become zero. + */ /* XXX: Could include qemu/timer.h to get NANOSECONDS_PER_SECOND? */ const unsigned int ns_per_s = 1000 * 1000 * 1000; return ns_per_s > cpu->gt_cntfrq ? ns_per_s / cpu->gt_cntfrq : 1; diff --git a/target/arm/helper.c b/target/arm/helper.c index 2622a9a8d02f..da960d17040b 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -2683,6 +2683,13 @@ void arm_gt_stimer_cb(void *opaque) gt_recalc_timer(cpu, GTIMER_SEC); } +static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *opaque) +{ + ARMCPU *cpu = env_archcpu(env); + + cpu->env.cp15.c14_cntfrq = cpu->gt_cntfrq; +} + static const ARMCPRegInfo generic_timer_cp_reginfo[] = { /* Note that CNTFRQ is purely reads-as-written for the benefit * of software; writing it doesn't actually change the timer frequency. @@ -2697,7 +2704,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0, .access = PL1_RW | PL0_R, .accessfn = gt_cntfrq_access, .fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq), - .resetvalue = (1000 * 1000 * 1000) / GTIMER_SCALE, + .resetfn = arm_gt_cntfrq_reset, }, /* overall control: mostly access permissions */ { .name = "CNTKCTL", .state = ARM_CP_STATE_BOTH,