diff mbox

[2/2] add sysctl for kvm wallclock sync

Message ID 1251805848-17451-3-git-send-email-glommer@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Glauber Costa Sept. 1, 2009, 11:50 a.m. UTC
This patch introduces a new sysctl called kvm_sync_wallclock.

It controls the behaviour of the worker that updates guest wallclock time.
The worker will fire in periods specified by its value, if it is greater than zero,
and not fire at all otherwise.

Signed-off-by: Glauber Costa <glommer@redhat.com>
---
 arch/x86/include/asm/kvm_para.h |    6 ++++++
 arch/x86/kernel/kvmclock.c      |   17 ++++++++++++++++-
 kernel/sysctl.c                 |   13 +++++++++++++
 3 files changed, 35 insertions(+), 1 deletions(-)

Comments

Chris Lalancette Sept. 2, 2009, 6:54 a.m. UTC | #1
Glauber Costa wrote:
> diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
> index fc409e9..3a4e1bd 100644
> --- a/arch/x86/kernel/kvmclock.c
> +++ b/arch/x86/kernel/kvmclock.c
> @@ -27,7 +27,7 @@
>  #define KVM_SCALE 22
>  
>  static int kvmclock = 1;
> -static unsigned int kvm_wall_update_interval = 5;
> +unsigned int kvm_wall_update_interval = 5;

I think the overall idea is very interesting, but I also think that it should be
disabled by default.  Because of the problems with time in virtualization,
people are already conditioned to run ntpd inside their guests, and this
kvmclock change will "fight" with ntpd.  Also, the command "# date 09091323" (or
whatever) ceases to work like it does on bare-metal, so I think it has to be an
opt-in feature.
Glauber Costa Sept. 2, 2009, 11:31 a.m. UTC | #2
On Wed, Sep 02, 2009 at 08:54:37AM +0200, Chris Lalancette wrote:
> Glauber Costa wrote:
> > diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
> > index fc409e9..3a4e1bd 100644
> > --- a/arch/x86/kernel/kvmclock.c
> > +++ b/arch/x86/kernel/kvmclock.c
> > @@ -27,7 +27,7 @@
> >  #define KVM_SCALE 22
> >  
> >  static int kvmclock = 1;
> > -static unsigned int kvm_wall_update_interval = 5;
> > +unsigned int kvm_wall_update_interval = 5;
> 
> I think the overall idea is very interesting, but I also think that it should be
> disabled by default.  Because of the problems with time in virtualization,
> people are already conditioned to run ntpd inside their guests, and this
> kvmclock change will "fight" with ntpd.  Also, the command "# date 09091323" (or
> whatever) ceases to work like it does on bare-metal, so I think it has to be an
> opt-in feature.
I don't disagree.

Actually, I thought about that myself a few hours after I sent the patch. 
Avi, do you have a word on that ?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Sept. 2, 2009, 11:40 a.m. UTC | #3
On 09/02/2009 02:31 PM, Glauber Costa wrote:
>> I think the overall idea is very interesting, but I also think that it should be
>> disabled by default.  Because of the problems with time in virtualization,
>> people are already conditioned to run ntpd inside their guests, and this
>> kvmclock change will "fight" with ntpd.  Also, the command "# date 09091323" (or
>> whatever) ceases to work like it does on bare-metal, so I think it has to be an
>> opt-in feature.
>>      
> I don't disagree.
>
> Actually, I thought about that myself a few hours after I sent the patch.
> Avi, do you have a word on that ?
>    

Chris' arguments are compelling IMO.
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index b8a3305..3a3f38f 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -47,8 +47,14 @@  struct kvm_mmu_op_release_pt {
 
 #ifdef __KERNEL__
 #include <asm/processor.h>
+#include <linux/sysctl.h>
 
 extern void kvmclock_init(void);
+extern unsigned int kvm_wall_update_interval;
+extern int kvm_sync_wall_handler(struct ctl_table *table, int write,
+                          struct file *filp, void __user *buffer,
+                          size_t *lenp, loff_t *ppos);
+
 
 
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index fc409e9..3a4e1bd 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -27,7 +27,7 @@ 
 #define KVM_SCALE 22
 
 static int kvmclock = 1;
-static unsigned int kvm_wall_update_interval = 5;
+unsigned int kvm_wall_update_interval = 5;
 
 static int parse_no_kvmclock(char *arg)
 {
@@ -91,6 +91,21 @@  static __init int init_updates(void)
  */
 late_initcall(init_updates);
 
+int kvm_sync_wall_handler(struct ctl_table *table, int write,
+	                  struct file *filp, void __user *buffer,
+			  size_t *lenp, loff_t *ppos)
+{
+	int ret  = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+
+	if (ret || !write)
+		return ret;
+
+	cancel_delayed_work_sync(&kvm_sync_wall_work);
+
+	schedule_next_update();
+	return 0;
+}
+
 /*
  * The wallclock is the time of day when we booted. Since then, some time may
  * have elapsed since the hypervisor wrote the data. So we try to account for
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 98e0232..b787c81 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -51,6 +51,7 @@ 
 #include <linux/ftrace.h>
 #include <linux/slow-work.h>
 #include <linux/perf_counter.h>
+#include <linux/kvm_para.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -989,6 +990,18 @@  static struct ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_KVM_CLOCK
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "kvm_sync_wallclock",
+		.data		= &kvm_wall_update_interval,
+		.maxlen		= sizeof(kvm_wall_update_interval),
+		.mode		= 0644,
+		.proc_handler	= &kvm_sync_wall_handler,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+	},
+#endif
 
 /*
  * NOTE: do not add new entries to this table unless you have read