diff mbox

[1/3] xen/arm: introduce xen_read_wallclock

Message ID 1446743385-12848-1-git-send-email-stefano.stabellini@eu.citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Stefano Stabellini Nov. 5, 2015, 5:09 p.m. UTC
Read the wallclock from the shared info page at boot time.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
---
 arch/arm/Kconfig         |    1 +
 arch/arm/xen/enlighten.c |   31 +++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

Comments

Arnd Bergmann Nov. 5, 2015, 6:53 p.m. UTC | #1
On Thursday 05 November 2015 17:09:43 Stefano Stabellini wrote:
> Read the wallclock from the shared info page at boot time.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

Please use the appropriate timespec64 based functions here,
we are in the process of converting all callers of struct timespec.

> ---
> +static void xen_read_wallclock(struct timespec *ts)
> +{
> +	u32 version;
> +	u64 delta;
> +	struct timespec now;
> +	struct shared_info *s = HYPERVISOR_shared_info;
> +	struct pvclock_wall_clock *wall_clock = &(s->wc);

pvclock_wall_clock has a 'u32 sec', so that suffers from
a potential overflow. We should try to avoid introducing that
on ARM, and instead have a 64-bit seconds based version,
or alternatively a 64-bit nanoseconds version (with no
extra seconds) that is also sufficient.

>  	struct vcpu_register_vcpu_info info;
> @@ -218,6 +246,7 @@ static int __init xen_guest_init(void)
>  	struct shared_info *shared_info_page = NULL;
>  	struct resource res;
>  	phys_addr_t grant_frames;
> +	struct timespec ts;
>  
>  	if (!xen_domain())
>  		return 0;
> @@ -291,6 +320,8 @@ static int __init xen_guest_init(void)
>  
>  	pv_time_ops.steal_clock = xen_stolen_accounting;
>  	static_key_slow_inc(&paravirt_steal_enabled);
> +	xen_read_wallclock(&ts);
> +	do_settimeofday(&ts);
>  

Once you can get a 64-bit time, call do_settimeofday64()
here.

	Arnd
Mark Rutland Nov. 6, 2015, 12:26 p.m. UTC | #2
> +	delta = arch_timer_read_counter();	/* time since system boot */
> +	delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;

The arch counter value is not a number of nanoseconds (unless CNTFRQ
reads as 1000000), so this doesn't look right; the units don't match.

Thanks,
Mark.
Stefano Stabellini Nov. 6, 2015, 2:34 p.m. UTC | #3
On Fri, 6 Nov 2015, Mark Rutland wrote:
> > +	delta = arch_timer_read_counter();	/* time since system boot */
> > +	delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
> 
> The arch counter value is not a number of nanoseconds (unless CNTFRQ
> reads as 1000000), so this doesn't look right; the units don't match.

That was a bad mistake. Thanks for catching it!
Stefano Stabellini Nov. 6, 2015, 3:09 p.m. UTC | #4
On Thu, 5 Nov 2015, Arnd Bergmann wrote:
> On Thursday 05 November 2015 17:09:43 Stefano Stabellini wrote:
> > Read the wallclock from the shared info page at boot time.
> > 
> > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> 
> Please use the appropriate timespec64 based functions here,
> we are in the process of converting all callers of struct timespec.

OK, I can do that


> > ---
> > +static void xen_read_wallclock(struct timespec *ts)
> > +{
> > +	u32 version;
> > +	u64 delta;
> > +	struct timespec now;
> > +	struct shared_info *s = HYPERVISOR_shared_info;
> > +	struct pvclock_wall_clock *wall_clock = &(s->wc);
> 
> pvclock_wall_clock has a 'u32 sec', so that suffers from
> a potential overflow. We should try to avoid introducing that
> on ARM, and instead have a 64-bit seconds based version,
> or alternatively a 64-bit nanoseconds version (with no
> extra seconds) that is also sufficient.

Unfortunately this a preexisting ABI which is common with other existing
architectures (see arch/x86/kernel/pvclock.c:pvclock_read_wallclock). I
cannot just change it.


> >  	struct vcpu_register_vcpu_info info;
> > @@ -218,6 +246,7 @@ static int __init xen_guest_init(void)
> >  	struct shared_info *shared_info_page = NULL;
> >  	struct resource res;
> >  	phys_addr_t grant_frames;
> > +	struct timespec ts;
> >  
> >  	if (!xen_domain())
> >  		return 0;
> > @@ -291,6 +320,8 @@ static int __init xen_guest_init(void)
> >  
> >  	pv_time_ops.steal_clock = xen_stolen_accounting;
> >  	static_key_slow_inc(&paravirt_steal_enabled);
> > +	xen_read_wallclock(&ts);
> > +	do_settimeofday(&ts);
> >  
> 
> Once you can get a 64-bit time, call do_settimeofday64()
> here.

OK
Arnd Bergmann Nov. 6, 2015, 3:56 p.m. UTC | #5
On Friday 06 November 2015 15:09:53 Stefano Stabellini wrote:
> > > ---
> > > +static void xen_read_wallclock(struct timespec *ts)
> > > +{
> > > +   u32 version;
> > > +   u64 delta;
> > > +   struct timespec now;
> > > +   struct shared_info *s = HYPERVISOR_shared_info;
> > > +   struct pvclock_wall_clock *wall_clock = &(s->wc);
> > 
> > pvclock_wall_clock has a 'u32 sec', so that suffers from
> > a potential overflow. We should try to avoid introducing that
> > on ARM, and instead have a 64-bit seconds based version,
> > or alternatively a 64-bit nanoseconds version (with no
> > extra seconds) that is also sufficient.
> 
> Unfortunately this a preexisting ABI which is common with other existing
> architectures (see arch/x86/kernel/pvclock.c:pvclock_read_wallclock). I
> cannot just change it.

Maybe we need a "struct pvclock_wall_clock_v2"?

In theory, 'u32 sec' takes us all the way until 2106, which is significantly
longer away than the signed overflow in 2038, but it would still be nice
to not introduce this limitation on ARM in the first place.

I'm not quite sure about how the split between pvclock_wall_clock and
the delta works. Normally I'd expect that pvclock_wall_clock is the wallclock
time as it was at boot, while delta is the number of expired nanoseconds
since boot. However it is unclear why the latter has a longer range
(539 years starting at boot, rather than 126 years starting in 1970).

	Arnd
Stefano Stabellini Nov. 9, 2015, 1:53 p.m. UTC | #6
On Fri, 6 Nov 2015, Arnd Bergmann wrote:
> > > > ---
> > > > +static void xen_read_wallclock(struct timespec *ts)
> > > > +{
> > > > +   u32 version;
> > > > +   u64 delta;
> > > > +   struct timespec now;
> > > > +   struct shared_info *s = HYPERVISOR_shared_info;
> > > > +   struct pvclock_wall_clock *wall_clock = &(s->wc);
> > > 
> > > pvclock_wall_clock has a 'u32 sec', so that suffers from
> > > a potential overflow. We should try to avoid introducing that
> > > on ARM, and instead have a 64-bit seconds based version,
> > > or alternatively a 64-bit nanoseconds version (with no
> > > extra seconds) that is also sufficient.
> > 
> > Unfortunately this a preexisting ABI which is common with other existing
> > architectures (see arch/x86/kernel/pvclock.c:pvclock_read_wallclock). I
> > cannot just change it.
> 
> Maybe we need a "struct pvclock_wall_clock_v2"?
> 
> In theory, 'u32 sec' takes us all the way until 2106, which is significantly
> longer away than the signed overflow in 2038, but it would still be nice
> to not introduce this limitation on ARM in the first place.
> 
> I'm not quite sure about how the split between pvclock_wall_clock and
> the delta works. Normally I'd expect that pvclock_wall_clock is the wallclock
> time as it was at boot, while delta is the number of expired nanoseconds
> since boot. However it is unclear why the latter has a longer range
> (539 years starting at boot, rather than 126 years starting in 1970).

Actually we already have a sec overflow field in struct shared_info on
the hypervisor side, called wc_sec_hi. I just need to make use of it in
Linux. See:

http://xenbits.xen.org/gitweb/?p=xen.git;a=blob_plain;f=xen/include/public/xen.h;hb=HEAD

Thanks for raising my attention to the problem,

Stefano
Arnd Bergmann Nov. 9, 2015, 4:16 p.m. UTC | #7
On Monday 09 November 2015 13:53:30 Stefano Stabellini wrote:
> On Fri, 6 Nov 2015, Arnd Bergmann wrote:
> > I'm not quite sure about how the split between pvclock_wall_clock and
> > the delta works. Normally I'd expect that pvclock_wall_clock is the wallclock
> > time as it was at boot, while delta is the number of expired nanoseconds
> > since boot. However it is unclear why the latter has a longer range
> > (539 years starting at boot, rather than 126 years starting in 1970).
> 
> Actually we already have a sec overflow field in struct shared_info on
> the hypervisor side, called wc_sec_hi. I just need to make use of it in
> Linux. See:
> 
> http://xenbits.xen.org/gitweb/?p=xen.git;a=blob_plain;f=xen/include/public/xen.h;hb=HEAD
> 
> Thanks for raising my attention to the problem,

Sounds good for Xen on ARM. This same interface is also used on
KVM, right? Does the extension also work there?

	Arnd
Stefano Stabellini Nov. 9, 2015, 5:14 p.m. UTC | #8
On Mon, 9 Nov 2015, Arnd Bergmann wrote:
> On Monday 09 November 2015 13:53:30 Stefano Stabellini wrote:
> > On Fri, 6 Nov 2015, Arnd Bergmann wrote:
> > > I'm not quite sure about how the split between pvclock_wall_clock and
> > > the delta works. Normally I'd expect that pvclock_wall_clock is the wallclock
> > > time as it was at boot, while delta is the number of expired nanoseconds
> > > since boot. However it is unclear why the latter has a longer range
> > > (539 years starting at boot, rather than 126 years starting in 1970).
> > 
> > Actually we already have a sec overflow field in struct shared_info on
> > the hypervisor side, called wc_sec_hi. I just need to make use of it in
> > Linux. See:
> > 
> > http://xenbits.xen.org/gitweb/?p=xen.git;a=blob_plain;f=xen/include/public/xen.h;hb=HEAD
> > 
> > Thanks for raising my attention to the problem,
> 
> Sounds good for Xen on ARM. This same interface is also used on
> KVM, right? Does the extension also work there?

It doesn't look like it (see arch/x86/kvm/x86.c:kvm_write_wall_clock).
Arnd Bergmann Nov. 9, 2015, 8:42 p.m. UTC | #9
On Monday 09 November 2015 17:14:24 Stefano Stabellini wrote:
> On Mon, 9 Nov 2015, Arnd Bergmann wrote:
> > On Monday 09 November 2015 13:53:30 Stefano Stabellini wrote:
> > > On Fri, 6 Nov 2015, Arnd Bergmann wrote:
> > > > I'm not quite sure about how the split between pvclock_wall_clock and
> > > > the delta works. Normally I'd expect that pvclock_wall_clock is the wallclock
> > > > time as it was at boot, while delta is the number of expired nanoseconds
> > > > since boot. However it is unclear why the latter has a longer range
> > > > (539 years starting at boot, rather than 126 years starting in 1970).
> > > 
> > > Actually we already have a sec overflow field in struct shared_info on
> > > the hypervisor side, called wc_sec_hi. I just need to make use of it in
> > > Linux. See:
> > > 
> > > http://xenbits.xen.org/gitweb/?p=xen.git;a=blob_plain;f=xen/include/public/xen.h;hb=HEAD
> > > 
> > > Thanks for raising my attention to the problem,
> > 
> > Sounds good for Xen on ARM. This same interface is also used on
> > KVM, right? Does the extension also work there?
> 
> It doesn't look like it (see arch/x86/kvm/x86.c:kvm_write_wall_clock).

(adding Christoffer and Marc)

That kvm_set_msr_common() function appears to be very x86 specific though,
and I'm not sure what the ARM equivalent would be.

How does KVM get/set the system time today on ARM and ARM64? Is this
going to be done through a PSCI call based on pvclock_wall_clock in
the future?

	Arnd
diff mbox

Patch

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 60be104..a9de420 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1852,6 +1852,7 @@  config XEN
 	depends on CPU_V7 && !CPU_V6
 	depends on !GENERIC_ATOMIC64
 	depends on MMU
+	depends on HAVE_ARM_ARCH_TIMER
 	select ARCH_DMA_ADDR_T_64BIT
 	select ARM_PSCI
 	select SWIOTLB_XEN
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 15621b1..f07383d 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -28,6 +28,8 @@ 
 #include <linux/cpufreq.h>
 #include <linux/cpu.h>
 #include <linux/console.h>
+#include <linux/timekeeping.h>
+#include <clocksource/arm_arch_timer.h>
 
 #include <linux/mm.h>
 
@@ -95,6 +97,32 @@  static unsigned long long xen_stolen_accounting(int cpu)
 	return state.time[RUNSTATE_runnable] + state.time[RUNSTATE_offline];
 }
 
+static void xen_read_wallclock(struct timespec *ts)
+{
+	u32 version;
+	u64 delta;
+	struct timespec now;
+	struct shared_info *s = HYPERVISOR_shared_info;
+	struct pvclock_wall_clock *wall_clock = &(s->wc);
+
+	/* get wallclock at system boot */
+	do {
+		version = wall_clock->version;
+		rmb();		/* fetch version before time */
+		now.tv_sec  = wall_clock->sec;
+		now.tv_nsec = wall_clock->nsec;
+		rmb();		/* fetch time before checking version */
+	} while ((wall_clock->version & 1) || (version != wall_clock->version));
+
+	delta = arch_timer_read_counter();	/* time since system boot */
+	delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
+
+	now.tv_nsec = do_div(delta, NSEC_PER_SEC);
+	now.tv_sec = delta;
+
+	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
+}
+
 static void xen_percpu_init(void)
 {
 	struct vcpu_register_vcpu_info info;
@@ -218,6 +246,7 @@  static int __init xen_guest_init(void)
 	struct shared_info *shared_info_page = NULL;
 	struct resource res;
 	phys_addr_t grant_frames;
+	struct timespec ts;
 
 	if (!xen_domain())
 		return 0;
@@ -291,6 +320,8 @@  static int __init xen_guest_init(void)
 
 	pv_time_ops.steal_clock = xen_stolen_accounting;
 	static_key_slow_inc(&paravirt_steal_enabled);
+	xen_read_wallclock(&ts);
+	do_settimeofday(&ts);
 
 	return 0;
 }